通过图文识别文字,使用openpyxl将识别的文字存放到excel

通过摄像头拍照后识别文字,并将有用的信息存放到excel

# -*- coding = utf-8 -*-
# @Software : PyCharm

import cv2
import requests
import base64
import json
import re
import openpyxl,random
import os
import win32api,win32con
import glob



if os.path.lexists('数据库.xlsx'):
	#os.remove('数据库.xlsx')
	#print('已经存在数据表')
	wb=openpyxl.load_workbook('数据库.xlsx')
	ws=wb.active
	ws['A1'].value='图片名称'
	ws['B1'].value='付款金额'
	ws['C1'].value='转账-交易单号'
else:
	wb=openpyxl.Workbook()
	ws=wb.active
	ws['A1'].value='图片名称'
	ws['B1'].value='付款金额'
	ws['C1'].value='转账-交易单号'
	#wb.save('./数据库.xlsx')
litq=[]
lith=[]
litn=[]
stu_num=[]
stu_num2=[]
jinge=''
dh=''

# 获取图像
def get_file_content(file):
    with open(file, 'rb') as f:
        return f.read()

# 获取access_token
def gettoken():
    # client_id 为官网获取的AK, client_secret 为官网获取的SK
    host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=PPKWbCWxkIh5kPhaYtoaZ5jr&client_secret=6f2adCZkDY6EOr8Z8FKhjceDwPRr89kQ'
    response = requests.get(host)
    # if response:
    #     print(response.json())
    return response.json()

# 把图片里的文字识别出来
def img_to_str(filename):
    # 通用文字识别(标准版)
    # request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic"
    # 通用文字识别(高精度版)
    request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic"
    # 网络图片识别
    #request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/webimage"
    # 二进制方式打开图片文件
    #request_url ="https://aip.baidubce.com/rest/2.0/realtime_search/v1/logo/add"

    f = get_file_content(filename)
    img = base64.b64encode(f)

    params = {"image": img}
    access_token = gettoken()
    request_url = request_url + "?access_token=" + access_token['access_token']
    headers = {'content-type': 'application/x-www-form-urlencoded'}
    response = requests.post(request_url, data=params, headers=headers)
    if response:
        #print(response.json())
        result_list = response.json()
    return result_list
def main():
    # 创建窗口
    cv2.namedWindow("camera", 1)
    # IP摄像头
    # capture = cv2.VideoCapture('http://admin:admin@192.168.0.100:8081/video')
    # 电脑本地摄像头
    capture = cv2.VideoCapture(0)

    while True:
        success, img = capture.read()
        cv2.imshow("camera", img)

        # 按键
        key = cv2.waitKey(10)
        if key == 27:                       # 对应esc键,可以自定义Key值决定键盘退出
            break
        if key == 32:                       # 对应空格键,可以自定义键值触发拍照
            filename = 'frames.jpg' #拍照存放照片的路径
            cv2.imwrite(filename, img)
            s = img_to_str(filename)
            sxtou(s)
            print(s)
		# 释放摄像头
    capture.release()
        # 关闭窗口
    cv2.destroyWindow("camera")
def sxtou(ss):
	for m in range(1,1000):
		if ss['words_result'][m]['words']=="当前状态":
			jinge=ss['words_result'][m-1]['words']
			#print(jinge)
		if (ss['words_result'][m]['words']=="交易单号") or (ss['words_result'][m]['words']=="转账单号"):
			dh=ss['words_result'][m+1]['words']
			chuli(jinge,dh)
			#print(dh)
			#wb.save('./数据库.xlsx')
			break
			#return jinge,dh
def tupian(tex):
    for i in range(1,1000):
	    if tex['words_result'][i]['words']=="当前状态":
		    jine=tex['words_result'][i-1]['words']
		    litq.append(jine)
	    if (tex['words_result'][i]['words']=="交易单号") or (tex['words_result'][i]['words']=="转账单号"):
		    danhao=tex['words_result'][i+1]['words']
		    lith.append(danhao)
		    #wb.save('./数据库.xlsx')
		    break
		    #return litq,lith
def to_excel(n,y,m):
	for i in range(0,len(n)):
		ws.cell(row=i+2,column=1,value=m[i])
		ws.cell(row=i+2,column=2,value=n[i])
		ws.cell(row=i+2,column=3,value=y[i])
	wb.save('./数据库.xlsx')
#遍历邮箱存放地址的函数
def get_filelist(dir):
    for home, dirs, files in os.walk(dir):
        for dir in dirs:
            print(dir)
        for filename in files:
            print(filename)
            litn.append(filename)
            fullname = os.path.join(home, filename)
            text = img_to_str(fullname)
            #print(type(text))
            tupian(text)
            to_excel(litq,lith,litn)
            #print(text)
            print(fullname)
def chuli(str_a,str_b):
	workbook1=openpyxl.load_workbook('数据库.xlsx')
	#选定目标sheet
	worksheet1 = workbook1.active
	py2=False
	for cell in worksheet1['B']:
	    #print(cell.value)
	    stu_num.append(cell.value)#这里用循环把A列每个cell的值写入开始定义的空列表
	for cell2 in worksheet1['C']:
	    #print(cell2.value)
	    stu_num2.append(cell2.value)#这里用循环把A列每个cell的值写入开始定义的空列表
	for i in range(0,len(stu_num)):
		if (str_a==stu_num[i]) and (str_b==stu_num2[i]):
	         py2=True
	if py2:
	    win32api.MessageBox(0, "数据库里有完全相同的数据,检验通过!!", "提示", win32con.MB_OK)
	    #('数据库里有完全相同的数据,检验通过!!')
	else:
	    win32api.MessageBox(0, "数据库没有完全相同的数据,检验不通过!!", "提示", win32con.MB_OK)
if __name__ == '__main__':
	path_file_number=glob.glob('./邮箱图片存放')
	if len(stu_num)-1!=path_file_number:
		get_filelist('D:\\修改\\邮箱图片存放')
	main()




上一篇:5 python 装饰器方式的添加路由


下一篇:java中如何打印出一个类中所有变量呢?