100字范文,内容丰富有趣,生活中的好帮手!
100字范文 > Python进行图片文字识别(表格图片)(使用阿里OCR接口)

Python进行图片文字识别(表格图片)(使用阿里OCR接口)

时间:2022-06-19 22:20:03

相关推荐

Python进行图片文字识别(表格图片)(使用阿里OCR接口)

使用阿里OCR接口进行图片识别

这里仅做代码的记录,用于后续的快速工作。不会进行太多代码的讲解,如果你也刚好需要的话,还需要结合阿里的OCR文档,会更快速。

阿里OCR链接:/

# 图片进行识别后再导出为表格文档import sysfrom typing import Listfrom alibabacloud_ocr_api0707.client import Client as ocr_api0707Clientfrom alibabacloud_tea_openapi import models as open_api_modelsfrom alibabacloud_darabonba_stream.client import Client as StreamClientfrom alibabacloud_ocr_api0707 import models as ocr_api_0707_modelsfrom alibabacloud_tea_util import models as util_modelsfrom alibabacloud_tea_util.client import Client as UtilClientdef create_client(access_key_id: str,access_key_secret: str,) -> ocr_api0707Client:config = open_api_models.Config(access_key_id=access_key_id,access_key_secret=access_key_secret)# 访问的域名config.endpoint = f'ocr--'return ocr_api0707Client(config)def pic_table(pic_path):"""这里需要改变!!!!!"""client = create_client('输入你的AccessKey ID', '输入你的AccessKey Secret')body_syream = StreamClient.read_from_file_path(pic_path)recognize_table_ocr_request = ocr_api_0707_models.RecognizeTableOcrRequest(body=body_syream)runtime = util_models.RuntimeOptions()response = client.recognize_table_ocr_with_options(recognize_table_ocr_request, runtime)body = response.body#那到数据后filename='body.json'with open(filename,'w',encoding='utf-8') as f:f.write(str(body))with open('body.json','r',encoding='utf-8') as f:content = f.read().rstrip()content = eval(content)data = content['Data']data = eval(data)key = data.keys()con = data['prism_tablesInfo'][0]['cellInfos']key = []value = []for i in range(len(con)):key.append(con[i]['yec'])value.append(con[i]['word'])dic = list(zip(key,value))c = []for i in list(set(key)):r = []for j in range(len(dic)):if dic[j][0] == i:r.append(dic[j][1])c.append(r)ls = []for i in range(len(c)):ls.append(len(c[i]))a = max(ls)for i in range(len(c)):if len(c[i]) < a:c[i].extend('N'*(a-len(c[i])))elif len(c[i]) > a:for j in range(a-len(c[1])):c[i].remove('')else:passimport pandas as pdtable = pd.DataFrame(columns=[i for i in range(a)])for i in range(len(c)):table.loc[i]=c[i]table.to_excel(pic_path[0:-4]+'table.xlsx',encoding='utf-8-sig',index=False)import os"""这里需要改变,输出表格后,存放的目录!!!!!"""path = '你的目录'dirs = os.listdir(path)pic_l = []for i in dirs:dir = os.path.join(path,i)pic_l.append(dir)for i in range(10,18):pics = os.listdir(pic_l[i])for j in pics:try:pic_s = os.path.join(pic_l[i], j)pic_table(pic_s)except:print('error')

# 仅仅做识别import sysimport osimport jsonfrom typing import Listfrom alibabacloud_ocr_api0707.client import Client as ocr_api0707Clientfrom alibabacloud_tea_openapi import models as open_api_modelsfrom alibabacloud_darabonba_stream.client import Client as StreamClientfrom alibabacloud_ocr_api0707 import models as ocr_api_0707_modelsfrom alibabacloud_tea_util import models as util_modelsfrom alibabacloud_tea_util.client import Client as UtilClientpath = '图片的路径'accessKeyId = '你的ID'accessKeySecret = '你的Secret'#dirs = os.listdir(path)class OCRStart:def __init__(self):pass@staticmethoddef create_client(access_key_id: str,access_key_secret: str,) -> ocr_api0707Client:config = open_api_models.Config(access_key_id= access_key_id,access_key_secret= access_key_secret)config.endpoint = f'ocr--'return ocr_api0707Client(config)@staticmethoddef main(args: List[str],) -> None:client = OCRStart.create_client(accessKeyId, accessKeySecret)# 需要安装额外的依赖库,直接点击下载完整工程即可看到所有依赖。body_syream = StreamClient.read_from_file_path('photo/5.0埋点v1.2.22.JPG')recognize_table_ocr_request = ocr_api_0707_models.RecognizeTableOcrRequest(body=body_syream)runtime = util_models.RuntimeOptions()try:reqData = client.recognize_table_ocr_with_options(recognize_table_ocr_request, runtime)print(reqData)#reqData 就是最终识别并返回的数据except Exception as error:# 如有需要,请打印 erroprint(error.message)# 和上面方法一样,只不是这个是 异步 的方式@staticmethodasync def main_async(args: List[str],) -> None:client = Sample.create_client(accessKeyId, accessKeySecret)body_syream = StreamClient.read_from_file_path('photo/5.0埋点v1.2.22.JPG')recognize_table_ocr_request = ocr_api_0707_models.RecognizeTableOcrRequest(body=body_syream)runtime = util_models.RuntimeOptions()try:reqData = client.recognize_table_ocr_with_options(recognize_table_ocr_request, runtime)print(reqData)except Exception as error:# 如有需要,请打印 erroprint(error.message)if __name__ == '__main__':OCRStart.main(sys.argv[1:])

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。