科大讯飞通用文字识别 intsig API接口接入

本文以科大讯飞通用文字识别 intsig API开放平台使用PHP调用python为例构建一个API。

还没有使用讯飞星火的朋友可以参考此文(附机器翻译API源码)

讯飞星火大模型API矩阵

新建instig.py文件并写入内容:

#!/usr/bin/python
# -*- coding: UTF-8 -*-
from datetime import datetime
from wsgiref.handlers import format_date_time
from time import mktime
import hashlib
import base64
import hmac
from urllib.parse import urlencode
import json
import requests
import sys

class AssembleHeaderException(Exception):
    def __init__(self, msg):
        self.message = msg


class Url:
    def __init__(this, host, path, schema):
        this.host = host
        this.path = path
        this.schema = schema
        pass


class universalOcr(object):
    def __init__(self):
        self.appid = appid
        self.apikey = apikey
        self.apisecret = apisecret
        self.url = 'http://api.xf-yun.com/v1/private/hh_ocr_recognize_doc'


    def parse_url(self,requset_url):
        stidx = requset_url.index("://")
        host = requset_url[stidx + 3:]
        schema = requset_url[:stidx + 3]
        edidx = host.index("/")
        if edidx <= 0:
            raise AssembleHeaderException("invalid request url:" + requset_url)
        path = host[edidx:]
        host = host[:edidx]
        u = Url(host, path, schema)
        return u

    def get_body(self, file_path):
        # 将payload中数据替换成实际能力内容,参考不同能力接口文档请求数据中payload
        file = open(file_path, 'rb')
        buf = file.read()
        body = {
            "header": {
                "app_id": self.appid,
                "status": 3
            },
            "parameter": {
                "hh_ocr_recognize_doc": {
                    "recognizeDocumentRes": {
                        "encoding": "utf8",
                        "compress": "raw",
                        "format": "json"
                    }
                }
            },
            "payload": {
                "image": {
                    "encoding": "jpg",
                    "image": str(base64.b64encode(buf), 'utf-8'),
                    "status": 3
                }
            }
        }
        # print(body)
        return body


# build websocket auth request url
def assemble_ws_auth_url(requset_url, method="GET", api_key="", api_secret=""):
    u = universalOcr.parse_url(requset_url)
    host = u.host
    path = u.path
    now = datetime.now()
    date = format_date_time(mktime(now.timetuple()))
    # date = "Mon, 22 Aug 2022 03:26:45 GMT"
    signature_origin = "host: {}\ndate: {}\n{} {} HTTP/1.1".format(host, date, method, path)
    signature_sha = hmac.new(api_secret.encode('utf-8'), signature_origin.encode('utf-8'),
                             digestmod=hashlib.sha256).digest()
    signature_sha = base64.b64encode(signature_sha).decode(encoding='utf-8')
    # print("signature:",signature_sha)
    authorization_origin = "api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"" % (
        api_key, "hmac-sha256", "host date request-line", signature_sha)
    authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')
    # print("authorization:",authorization)
    values = {
        "host": host,
        "date": date,
        "authorization": authorization
    }

    return requset_url + "?" + urlencode(values)



def get_result():
    request_url = assemble_ws_auth_url(universalOcr.url, "POST", universalOcr.apikey, apisecret)
    headers = {'content-type': "application/json", 'host': 'api.xf-yun.com', 'appid': 'APPID'}
    # print(request_url)
    body = universalOcr.get_body(file_path=file_path)
    response = requests.post(request_url, data=json.dumps(body), headers=headers)
    # print(response)
    re = response.content.decode('utf8')
    str_result = json.loads(re)
    # print("\nresponse-content:", re)
    if str_result.__contains__('header') and str_result['header']['code'] == 0:
        renew_text = str_result['payload']['recognizeDocumentRes']['text']
        print(str(base64.b64decode(renew_text), 'utf-8'))


if __name__ == "__main__":
    # 填写在开放平台申请的APPID、APIKey、APISecret
    appid = "xxxxxx"
    apisecret = "xxxxxx"
    apikey = "xxxxxx"
    file_path = sys.argv[1]
    
    universalOcr = universalOcr()
    get_result()    

index.php获取待识别的图片,存储到本地供python调用,识别完成后删除图片

<?php
/*
*@通过curl方式获取指定的图片到本地
*@ 完整的图片地址
*@ 要存储的文件名
*/
function getImg($url = "", $filename = ""){
    //去除URL连接上面可能的引号
    $hander = curl_init();
    $fp = fopen($filename, 'wb');
    curl_setopt($hander, CURLOPT_URL, $url);
    curl_setopt($hander, CURLOPT_FILE, $fp);
    curl_setopt($hander, CURLOPT_HEADER, 0);
    curl_setopt($hander, CURLOPT_FOLLOWLOCATION, 1);
    curl_setopt($hander, CURLOPT_TIMEOUT, 60);
    curl_exec($hander);
    curl_close($hander);
    fclose($fp);
    return true;
}

header('Access-Control-Allow-Origin:*');
header('Content-type: application/json');

$pic=isset($_GET['pic'])? $_GET['pic'] :null; 
if(empty($pic)){die("请传入图片参数");}
$time = gmdate('H-i-s', time() + 3600 * 8);   
$png = $time.'.png';
getImg($pic,$png);

$str = exec("python instig.py $png");
$res = json_decode($str, true);//转换为数组
$text = $res['whole_text'];
if($text) {$code = '200';}
else{$code = '202';}
$json_return = array(
    "code" => $code,
    "src" => $pic,
    "dst" => $text
);
echo json_encode($json_return, JSON_UNESCAPED_UNICODE);
unlink($png)
?>

Demo:https://api.szfx.top/api/xfocr.html

本文采用 CC BY-NC-SA 3.0 Unported 许可,转载请以超链接注明出处。
暂无评论

发送评论 编辑评论


				
|´・ω・)ノ
ヾ(≧∇≦*)ゝ
(☆ω☆)
(╯‵□′)╯︵┴─┴
 ̄﹃ ̄
(/ω\)
∠( ᐛ 」∠)_
(๑•̀ㅁ•́ฅ)
→_→
୧(๑•̀⌄•́๑)૭
٩(ˊᗜˋ*)و
(ノ°ο°)ノ
(´இ皿இ`)
⌇●﹏●⌇
(ฅ´ω`ฅ)
(╯°A°)╯︵○○○
φ( ̄∇ ̄o)
ヾ(´・ ・`。)ノ"
( ง ᵒ̌皿ᵒ̌)ง⁼³₌₃
(ó﹏ò。)
Σ(っ °Д °;)っ
( ,,´・ω・)ノ"(´っω・`。)
╮(╯▽╰)╭
o(*////▽////*)q
>﹏<
( ๑´•ω•) "(ㆆᴗㆆ)
Source: Telegram @AmashiroNatsukiEars_NoWord Sticker
Source: Github @zhheo/Sticker-Heo
Source: github.com/k4yt3x/flowerhd
颜文字
AmashiroNatsukiEars
Heo
小恐龙
花!
上一篇
下一篇