本文以科大讯飞通用文字识别 intsig API开放平台使用PHP调用python为例构建一个API。
还没有使用讯飞星火的朋友可以参考此文(附机器翻译API源码)
新建instig.py文件并写入内容:
#!/usr/bin/python # -*- coding: UTF-8 -*- from datetime import datetime from wsgiref.handlers import format_date_time from time import mktime import hashlib import base64 import hmac from urllib.parse import urlencode import json import requests import sys class AssembleHeaderException(Exception): def __init__(self, msg): self.message = msg class Url: def __init__(this, host, path, schema): this.host = host this.path = path this.schema = schema pass class universalOcr(object): def __init__(self): self.appid = appid self.apikey = apikey self.apisecret = apisecret self.url = 'http://api.xf-yun.com/v1/private/hh_ocr_recognize_doc' def parse_url(self,requset_url): stidx = requset_url.index("://") host = requset_url[stidx + 3:] schema = requset_url[:stidx + 3] edidx = host.index("/") if edidx <= 0: raise AssembleHeaderException("invalid request url:" + requset_url) path = host[edidx:] host = host[:edidx] u = Url(host, path, schema) return u def get_body(self, file_path): # 将payload中数据替换成实际能力内容,参考不同能力接口文档请求数据中payload file = open(file_path, 'rb') buf = file.read() body = { "header": { "app_id": self.appid, "status": 3 }, "parameter": { "hh_ocr_recognize_doc": { "recognizeDocumentRes": { "encoding": "utf8", "compress": "raw", "format": "json" } } }, "payload": { "image": { "encoding": "jpg", "image": str(base64.b64encode(buf), 'utf-8'), "status": 3 } } } # print(body) return body # build websocket auth request url def assemble_ws_auth_url(requset_url, method="GET", api_key="", api_secret=""): u = universalOcr.parse_url(requset_url) host = u.host path = u.path now = datetime.now() date = format_date_time(mktime(now.timetuple())) # date = "Mon, 22 Aug 2022 03:26:45 GMT" signature_origin = "host: {}\ndate: {}\n{} {} HTTP/1.1".format(host, date, method, path) signature_sha = hmac.new(api_secret.encode('utf-8'), signature_origin.encode('utf-8'), digestmod=hashlib.sha256).digest() signature_sha = base64.b64encode(signature_sha).decode(encoding='utf-8') # print("signature:",signature_sha) authorization_origin = "api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"" % ( api_key, "hmac-sha256", "host date request-line", signature_sha) authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8') # print("authorization:",authorization) values = { "host": host, "date": date, "authorization": authorization } return requset_url + "?" + urlencode(values) def get_result(): request_url = assemble_ws_auth_url(universalOcr.url, "POST", universalOcr.apikey, apisecret) headers = {'content-type': "application/json", 'host': 'api.xf-yun.com', 'appid': 'APPID'} # print(request_url) body = universalOcr.get_body(file_path=file_path) response = requests.post(request_url, data=json.dumps(body), headers=headers) # print(response) re = response.content.decode('utf8') str_result = json.loads(re) # print("\nresponse-content:", re) if str_result.__contains__('header') and str_result['header']['code'] == 0: renew_text = str_result['payload']['recognizeDocumentRes']['text'] print(str(base64.b64decode(renew_text), 'utf-8')) if __name__ == "__main__": # 填写在开放平台申请的APPID、APIKey、APISecret appid = "xxxxxx" apisecret = "xxxxxx" apikey = "xxxxxx" file_path = sys.argv[1] universalOcr = universalOcr() get_result()
index.php获取待识别的图片,存储到本地供python调用,识别完成后删除图片
<?php /* *@通过curl方式获取指定的图片到本地 *@ 完整的图片地址 *@ 要存储的文件名 */ function getImg($url = "", $filename = ""){ //去除URL连接上面可能的引号 $hander = curl_init(); $fp = fopen($filename, 'wb'); curl_setopt($hander, CURLOPT_URL, $url); curl_setopt($hander, CURLOPT_FILE, $fp); curl_setopt($hander, CURLOPT_HEADER, 0); curl_setopt($hander, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($hander, CURLOPT_TIMEOUT, 60); curl_exec($hander); curl_close($hander); fclose($fp); return true; } header('Access-Control-Allow-Origin:*'); header('Content-type: application/json'); $pic=isset($_GET['pic'])? $_GET['pic'] :null; if(empty($pic)){die("请传入图片参数");} $time = gmdate('H-i-s', time() + 3600 * 8); $png = $time.'.png'; getImg($pic,$png); $str = exec("python instig.py $png"); $res = json_decode($str, true);//转换为数组 $text = $res['whole_text']; if($text) {$code = '200';} else{$code = '202';} $json_return = array( "code" => $code, "src" => $pic, "dst" => $text ); echo json_encode($json_return, JSON_UNESCAPED_UNICODE); unlink($png) ?>