一、前言
之前推荐的 讯飞OCR API额度用掉了,于是转向百度OCR,不得不说还是百度大气:调用量:50000次/天;QPS/并发:2qps。下面以PHP为例介绍百度OCR API构建。
二、API构建
新建index.php、access.ini(存放access_token)、expires.ini(存放token过期时间戳)文件,提前申请好你的百度API Key和Secret Key。
php文件内写入
<?php /* *@通过curl方式获取指定的图片到本地 *@ 完整的图片地址 *@ 要存储的文件名 */ function getImg($url = "", $filename = ""){ //去除URL连接上面可能的引号 $hander = curl_init(); $fp = fopen($filename, 'wb'); curl_setopt($hander, CURLOPT_URL, $url); curl_setopt($hander, CURLOPT_FILE, $fp); curl_setopt($hander, CURLOPT_HEADER, 0); curl_setopt($hander, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($hander, CURLOPT_TIMEOUT, 60); curl_exec($hander); curl_close($hander); fclose($fp); return true; } /** * 发起http post请求(REST API), 并获取REST请求的结果 * @param string $url * @param string $param * @return - http response body if succeeds, else false. */ function request_post($url = '', $param = '') { if (empty($url) || empty($param)) { return false; } $postUrl = $url; $curlPost = $param; // 初始化curl $curl = curl_init(); curl_setopt($curl, CURLOPT_URL, $postUrl); curl_setopt($curl, CURLOPT_HEADER, 0); // 要求结果为字符串且输出到屏幕上 curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); // post提交方式 curl_setopt($curl, CURLOPT_POST, 1); curl_setopt($curl, CURLOPT_POSTFIELDS, $curlPost); // 运行curl $data = curl_exec($curl); curl_close($curl); return $data; } class Sample { public function run() { $curl = curl_init(); curl_setopt_array($curl, array( CURLOPT_URL => "https://aip.baidubce.com/oauth/2.0/token?client_id=【你的API Key】&client_secret=【你的Secret Key】&grant_type=client_credentials", CURLOPT_TIMEOUT => 30, CURLOPT_RETURNTRANSFER => true, CURLOPT_CUSTOMREQUEST => 'POST', CURLOPT_HTTPHEADER => array( 'Content-Type: application/json', 'Accept: application/json' ), )); $response = curl_exec($curl); curl_close($curl); return $response; } } function get_token(){ $time = time(); $rtn = (new Sample())->run(); //echo $rtn; $res = json_decode($rtn, true); $access_token = $res["access_token"]; $expires_in = $res["expires_in"]; $expires_time = $time + $expires_in; file_put_contents("expires.ini", $expires_time); file_put_contents("access.ini", $access_token); return $access_token; } header('Access-Control-Allow-Origin:*'); header('Content-type: application/json'); $pic=isset($_GET['pic'])? $_GET['pic'] :null; if(empty($pic)){die("请传入图片网址参数");} preg_match('/[^.]+$/', $pic, $matches); $extension = $matches[0]; $array = ["jpg","jpeg","bmp","png"]; if(!in_array($extension, $array)){die("当前图片格式不支持");} $expires_time = file_get_contents("expires_time.ini"); $now_time = time(); if($now_time < $expires_time){ $token = file_get_contents("access_token.ini"); }else{ $token = get_token(); } $file_time = gmdate('H-i-s', time() + 3600 * 8); $jpg = $file_time.'.'.$extension; getImg($pic,$jpg); $url = 'https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic?access_token=' . $token; $img = file_get_contents($jpg); $img = base64_encode($img); $bodys = array( 'image' => $img ); $str = request_post($url, $bodys); $res = json_decode($str, true); $words = ""; $num = $res['words_result_num']; for ($i=0; $i<$num; $i++) { $w = $res["words_result"][$i]["words"]; $words .= $w; if($i<($num-1)){ $words = "$words\n"; } } if($words) {$code = '200';} else{$code = '202';} $json_return = array( "code" => $code, "src" => $pic, "dst" => $words ); header('Access-Control-Allow-Origin:*'); header('Content-type: application/json'); echo json_encode($json_return, JSON_UNESCAPED_UNICODE); unlink($jpg);
三、API调用
请求示例:
https://api.szfx.top/bdocr/general/?pic=https://api.szfx.top/bdocr/demo.jpg
返回数据:
{ "code": "200", "src": "https://api.szfx.top/bdocr/demo.jpg", "dst": "关注【技术松鼠】公众号\n微信搜一搜\n获取优质资源,紧跟时代潮流\nQ技术松鼠" }
四、构建识别网站
这个比较简单,直接看效果:https://tool.szfx.top/bdocr
五、参考资料
一、通用文字识别(标准版):https://cloud.baidu.com/doc/OCR/s/zk3h7xz52
二、OCR文字识别 – 松鼠API:https://api.szfx.top/doc/bdocr/general.html