一、前言
之前推荐的 讯飞OCR API额度用掉了,于是转向百度OCR,不得不说还是百度大气:调用量:50000次/天;QPS/并发:2qps。下面以PHP为例介绍百度OCR API构建。
二、API构建
新建index.php、access.ini(存放access_token)、expires.ini(存放token过期时间戳)文件,提前申请好你的百度API Key和Secret Key。
php文件内写入
<?php
/*
*@通过curl方式获取指定的图片到本地
*@ 完整的图片地址
*@ 要存储的文件名
*/
function getImg($url = "", $filename = ""){
//去除URL连接上面可能的引号
$hander = curl_init();
$fp = fopen($filename, 'wb');
curl_setopt($hander, CURLOPT_URL, $url);
curl_setopt($hander, CURLOPT_FILE, $fp);
curl_setopt($hander, CURLOPT_HEADER, 0);
curl_setopt($hander, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($hander, CURLOPT_TIMEOUT, 60);
curl_exec($hander);
curl_close($hander);
fclose($fp);
return true;
}
/**
* 发起http post请求(REST API), 并获取REST请求的结果
* @param string $url
* @param string $param
* @return - http response body if succeeds, else false.
*/
function request_post($url = '', $param = '')
{
if (empty($url) || empty($param)) {
return false;
}
$postUrl = $url;
$curlPost = $param;
// 初始化curl
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $postUrl);
curl_setopt($curl, CURLOPT_HEADER, 0);
// 要求结果为字符串且输出到屏幕上
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
// post提交方式
curl_setopt($curl, CURLOPT_POST, 1);
curl_setopt($curl, CURLOPT_POSTFIELDS, $curlPost);
// 运行curl
$data = curl_exec($curl);
curl_close($curl);
return $data;
}
class Sample {
public function run() {
$curl = curl_init();
curl_setopt_array($curl, array(
CURLOPT_URL => "https://aip.baidubce.com/oauth/2.0/token?client_id=【你的API Key】&client_secret=【你的Secret Key】&grant_type=client_credentials",
CURLOPT_TIMEOUT => 30,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_CUSTOMREQUEST => 'POST',
CURLOPT_HTTPHEADER => array(
'Content-Type: application/json',
'Accept: application/json'
),
));
$response = curl_exec($curl);
curl_close($curl);
return $response;
}
}
function get_token(){
$time = time();
$rtn = (new Sample())->run();
//echo $rtn;
$res = json_decode($rtn, true);
$access_token = $res["access_token"];
$expires_in = $res["expires_in"];
$expires_time = $time + $expires_in;
file_put_contents("expires.ini", $expires_time);
file_put_contents("access.ini", $access_token);
return $access_token;
}
header('Access-Control-Allow-Origin:*');
header('Content-type: application/json');
$pic=isset($_GET['pic'])? $_GET['pic'] :null;
if(empty($pic)){die("请传入图片网址参数");}
preg_match('/[^.]+$/', $pic, $matches);
$extension = $matches[0];
$array = ["jpg","jpeg","bmp","png"];
if(!in_array($extension, $array)){die("当前图片格式不支持");}
$expires_time = file_get_contents("expires_time.ini");
$now_time = time();
if($now_time < $expires_time){
$token = file_get_contents("access_token.ini");
}else{
$token = get_token();
}
$file_time = gmdate('H-i-s', time() + 3600 * 8);
$jpg = $file_time.'.'.$extension;
getImg($pic,$jpg);
$url = 'https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic?access_token=' . $token;
$img = file_get_contents($jpg);
$img = base64_encode($img);
$bodys = array(
'image' => $img
);
$str = request_post($url, $bodys);
$res = json_decode($str, true);
$words = "";
$num = $res['words_result_num'];
for ($i=0; $i<$num; $i++)
{
$w = $res["words_result"][$i]["words"];
$words .= $w;
if($i<($num-1)){
$words = "$words\n";
}
}
if($words) {$code = '200';}
else{$code = '202';}
$json_return = array(
"code" => $code,
"src" => $pic,
"dst" => $words
);
header('Access-Control-Allow-Origin:*');
header('Content-type: application/json');
echo json_encode($json_return, JSON_UNESCAPED_UNICODE);
unlink($jpg);
三、API调用
请求示例:
https://api.szfx.top/bdocr/general/?pic=https://api.szfx.top/bdocr/demo.jpg
返回数据:
{
"code": "200",
"src": "https://api.szfx.top/bdocr/demo.jpg",
"dst": "关注【技术松鼠】公众号\n微信搜一搜\n获取优质资源,紧跟时代潮流\nQ技术松鼠"
}
四、构建识别网站
这个比较简单,直接看效果:https://tool.szfx.top/bdocr
五、参考资料
一、通用文字识别(标准版):https://cloud.baidu.com/doc/OCR/s/zk3h7xz52
二、OCR文字识别 – 松鼠API:https://api.szfx.top/api/bdocr.html