本文以科大讯飞通用文字识别 intsig API开放平台使用PHP调用python为例构建一个API。
还没有使用讯飞星火的朋友可以参考此文(附机器翻译API源码)
新建instig.py文件并写入内容:
#!/usr/bin/python
# -*- coding: UTF-8 -*-
from datetime import datetime
from wsgiref.handlers import format_date_time
from time import mktime
import hashlib
import base64
import hmac
from urllib.parse import urlencode
import json
import requests
import sys
class AssembleHeaderException(Exception):
def __init__(self, msg):
self.message = msg
class Url:
def __init__(this, host, path, schema):
this.host = host
this.path = path
this.schema = schema
pass
class universalOcr(object):
def __init__(self):
self.appid = appid
self.apikey = apikey
self.apisecret = apisecret
self.url = 'http://api.xf-yun.com/v1/private/hh_ocr_recognize_doc'
def parse_url(self,requset_url):
stidx = requset_url.index("://")
host = requset_url[stidx + 3:]
schema = requset_url[:stidx + 3]
edidx = host.index("/")
if edidx <= 0:
raise AssembleHeaderException("invalid request url:" + requset_url)
path = host[edidx:]
host = host[:edidx]
u = Url(host, path, schema)
return u
def get_body(self, file_path):
# 将payload中数据替换成实际能力内容,参考不同能力接口文档请求数据中payload
file = open(file_path, 'rb')
buf = file.read()
body = {
"header": {
"app_id": self.appid,
"status": 3
},
"parameter": {
"hh_ocr_recognize_doc": {
"recognizeDocumentRes": {
"encoding": "utf8",
"compress": "raw",
"format": "json"
}
}
},
"payload": {
"image": {
"encoding": "jpg",
"image": str(base64.b64encode(buf), 'utf-8'),
"status": 3
}
}
}
# print(body)
return body
# build websocket auth request url
def assemble_ws_auth_url(requset_url, method="GET", api_key="", api_secret=""):
u = universalOcr.parse_url(requset_url)
host = u.host
path = u.path
now = datetime.now()
date = format_date_time(mktime(now.timetuple()))
# date = "Mon, 22 Aug 2022 03:26:45 GMT"
signature_origin = "host: {}\ndate: {}\n{} {} HTTP/1.1".format(host, date, method, path)
signature_sha = hmac.new(api_secret.encode('utf-8'), signature_origin.encode('utf-8'),
digestmod=hashlib.sha256).digest()
signature_sha = base64.b64encode(signature_sha).decode(encoding='utf-8')
# print("signature:",signature_sha)
authorization_origin = "api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"" % (
api_key, "hmac-sha256", "host date request-line", signature_sha)
authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')
# print("authorization:",authorization)
values = {
"host": host,
"date": date,
"authorization": authorization
}
return requset_url + "?" + urlencode(values)
def get_result():
request_url = assemble_ws_auth_url(universalOcr.url, "POST", universalOcr.apikey, apisecret)
headers = {'content-type': "application/json", 'host': 'api.xf-yun.com', 'appid': 'APPID'}
# print(request_url)
body = universalOcr.get_body(file_path=file_path)
response = requests.post(request_url, data=json.dumps(body), headers=headers)
# print(response)
re = response.content.decode('utf8')
str_result = json.loads(re)
# print("\nresponse-content:", re)
if str_result.__contains__('header') and str_result['header']['code'] == 0:
renew_text = str_result['payload']['recognizeDocumentRes']['text']
print(str(base64.b64decode(renew_text), 'utf-8'))
if __name__ == "__main__":
# 填写在开放平台申请的APPID、APIKey、APISecret
appid = "xxxxxx"
apisecret = "xxxxxx"
apikey = "xxxxxx"
file_path = sys.argv[1]
universalOcr = universalOcr()
get_result()
index.php获取待识别的图片,存储到本地供python调用,识别完成后删除图片
<?php
/*
*@通过curl方式获取指定的图片到本地
*@ 完整的图片地址
*@ 要存储的文件名
*/
function getImg($url = "", $filename = ""){
//去除URL连接上面可能的引号
$hander = curl_init();
$fp = fopen($filename, 'wb');
curl_setopt($hander, CURLOPT_URL, $url);
curl_setopt($hander, CURLOPT_FILE, $fp);
curl_setopt($hander, CURLOPT_HEADER, 0);
curl_setopt($hander, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($hander, CURLOPT_TIMEOUT, 60);
curl_exec($hander);
curl_close($hander);
fclose($fp);
return true;
}
header('Access-Control-Allow-Origin:*');
header('Content-type: application/json');
$pic=isset($_GET['pic'])? $_GET['pic'] :null;
if(empty($pic)){die("请传入图片参数");}
$time = gmdate('H-i-s', time() + 3600 * 8);
$png = $time.'.png';
getImg($pic,$png);
$str = exec("python instig.py $png");
$res = json_decode($str, true);//转换为数组
$text = $res['whole_text'];
if($text) {$code = '200';}
else{$code = '202';}
$json_return = array(
"code" => $code,
"src" => $pic,
"dst" => $text
);
echo json_encode($json_return, JSON_UNESCAPED_UNICODE);
unlink($png)
?>