用AutoHotkey调用百度ocr接口提取增值税发票相关字段并写到Excel

功能介绍:

提取指定文件夹下的所有增值税发票(格式为jpg或png或pdf(暂时只处理第1页)), 把所有信息写到Excel表当前选中的单元格,并重命名原始发票(可指定规则)复制到新文件夹。

由于要用到百度的接口,所以需要注册百度智能云+实名认证+创建应用+领取资源

  1. https://console.bce.baidu.com/ai/#/ai/ocr/overview/index (产品服务→人工智能→文字识别)
  2. 实名认证
  3. 创建应用→随便输入应用名称→立即创建→查看应用详情→记录 appid apikey secretkey
  4. 领取相应的资源:文字识别→概览→右侧【领取免费资源】→选中【财务票据OCR】→全部→0元领取
  5. 等待资源到账:右键【资源列表】→已领取资源→核实是否拥有资源

使用步骤:

  1. 打开任意Excel表,选中第一个要填的单元格
  2. 读取脚本说明,修改相应内容后运行即可
;注册百度智能云+实名认证+创建应用+领取资源(财务)
;   https://console.bce.baidu.com/ai/#/ai/ocr/overview/index (产品服务→人工智能→文字识别)
;   实名认证
;   创建应用→随便输入应用名称→立即创建→查看应用详情→记录 appid apikey secretkey
;   领取相应的资源:
;       文字识别→概览→右侧【领取免费资源】→选中【财务票据OCR】→全部→0元领取
;       等待资源到账:右键【资源列表】→已领取资源
;搜索 hymodify 修改相应信息
;功能:
;   提取 dn0 文件夹电子发票信息(pdf只提取第1页),并写到当前已打开Excel表(从【当前选中单元格】开始写)
#SingleInstance force

if (!ProcessExist("Excel.exe")) {
    msgbox("请打开Excel并选中第一个要写入单元格",,0x40000)
    ExitApp
}

if (0) {
    dn0 := "c:\Users\Administrator\Desktop\11" ;hymodify 【旧】发票文件夹
    dn1 := "c:\Users\Administrator\Desktop\22" ;hymodify 【新】发票文件夹(发票重命名后复制到此文件夹)
    if !DirExist(dn1)
        DirCreate(dn1)
} else {
    dn0 := DirSelect(, 2, "选择【旧】发票文件夹")
    dn1 := DirSelect(, 2, "选择【新】发票文件夹")
}
arrOcr := [
    ["发票代码","InvoiceCode"],
    ["发票号码","InvoiceNum"],
    ["开票日期","InvoiceDate"],
    ["校验码","CheckCode"],
    ["机器编号","MachineCode"],
    ["金额","AmountInFiguers"],
    ["服务名称1","CommodityName"],
    ["税率1","CommodityTaxRate"],
    ["税额1","CommodityTax"],
    ["大写金额","AmountInWords"],
    ["销售方名称","SellerName"],
    ["销售方纳税人识别号","SellerRegisterNum"],
    ["销售方地址","SellerAddress"],
    ["销售方开户行","SellerBank"],
    ["购买方名称","PurchaserName"],
    ["购买方纳税人识别号","PurchaserRegisterNum"],
    ["购买方地址","PurchaserAddress"],
    ["购买方开户行","PurchaserBank"],
]
arrOther := [
    "新文件名", ;依赖 objOcr 结果
    "原文件名",
    "序号",
]

csOcr := arrOcr.length()
cs := csOcr+arrOther.length()
arrA := ComObjArray(12, 1, cs)
xl := ox()
st := xl.ActiveSheet
ac := xl.ActiveCell
r := 0
arrError := []
if (ac.row == 1) { ;在第1行,则初始化并写入标题
    st.cells.NumberFormat := "@"
    ;设置标题
    for _, arr in arrOcr
        arrA[0,A_Index-1] := arr[1]
    for _, v in arrOther
        arrA[0,csOcr+A_Index-1] := v
    ac.resize(1,cs).value := arrA ;要写的第1行
    rng1 := ac.offset(1).resize(1,cs)
} else
    rng1 := ac.resize(1,cs)
loop files, dn0 . "\*.*", "RF" { ;hymodify 带R会处理子文件夹
    if (A_LoopFileAttrib ~= "[HS]")
        continue
    if !(A_LoopFileName ~= "i)\.(pdf|jpg|png)") ;hymodify 过滤文件格式
        continue
    tooltip(A_Index . "`n" . A_LoopFileName)
    objOcr := _Web.baiduOcr_vatInvoice(A_LoopFileFullPath)
    ; hyf_objView(objOcr)
    arrA := ComObjArray(12, 1, cs) ;每行写一次
    ;写入 ocr 内容
    noExt := ""
    if (isobject(objOcr) && objOcr["TotalAmount"]) { ;成功获取结果
        for _, arr in arrOcr {
            res := objOcr[arr[2]]
            if (isobject(res)) {
                if (res.length())
                    arrA[0,A_Index-1] := res[1]["word"]
            } else
                arrA[0,A_Index-1] := res
        }
        noExt := format("{1}-{2}", objOcr["AmountInFiguers"],objOcr["InvoiceNum"]) ;hymodify 新文件名规则,默认是(金额-发票号码)
        arrA[0,csOcr] := noExt
    }
    ;常规内容
    arrA[0,csOcr+1] := A_LoopFileName
    arrA[0,csOcr+2] := r+1
    ;arrA写到整行
    rng1.offset(r).value := arrA
    r++
    ;文件处理
    if (strlen(noExt)) {
        SplitPath(A_LoopFileFullPath, fn,, ext)
        FileCopy(A_LoopFileFullPath, format("{1}\{2}.{3}", dn1,noExt,ext))
        if errorlevel
            arrError.push(A_LoopFileName)
    } else {
        SplitPath(A_LoopFileFullPath, fn,, ext, noExt)
        FileCopy(A_LoopFileFullPath, format("{1}\__{2}.{3}", dn1,noExt,ext))
    }
}
WinActivate("ahk_id " . st.parent)
tooltip
if arrError.length()
    hyf_objView(arrError, "以下文件复制时出错了,请核实")
else
    msgbox("已完成",,0x40000)
return

ox(winTitle:="ahk_class XLMAIN") {
    ctlID := ControlGetHwnd("EXCEL71", winTitle)
    if !ctlID
        ExitApp
    if dllcall("oleacc\AccessibleObjectFromWindow", "ptr",ctlID, "uint",4294967280, "ptr",-VarSetCapacity(IID,16)+NumPut(0x46000000000000C0,NumPut(0x0000000000020400,IID,"int64"),"int64"), "ptr*",pacc) = 0
        win := ComObject(9, pacc, 1)
    loop {
        try
            xl := win.application
        catch
            ControlSend("{escape}", "EXCEL71", winTitle)
    }
    until !!xl
    return xl
}
return

hyf_obj2Str(obj, char:="`n", level:=0) {
    static t := "", s := ""
    if level
        t .= A_Tab ;前置tab显示级数
    else
        t := "", s := "" ;防止多次运行时结果叠加
    if !isobject(obj)
        return "非对象,值为`n" . obj
    try { ;FIXME 无故出错
        for k, v in obj {
            if isobject(v) {
                s .= t . k . char
                %A_ThisFunc%(v, char, level + 1)
                t := substr(t, 2) ;删除一个tab
            }
            else
                s .= t . k . A_Tab . v . char
        }
    }
    if (char != "`n") ;强制换行
        s .= "`n"
    if (level = 0) ;返回结果
        return s
}

hyf_objView(obj, str:="", char:="`n", n:=0) {
    if strlen(str)
        return msgbox(str . "`n" . hyf_obj2Str(obj,char),,0x40000+n)
    else
        return msgbox(hyf_obj2Str(obj,char),,0x40000+n)
}

_pic1ToPdf(fp) {
    fpPdf := RegExReplace(fp, "\w+$", "pdf")
    RunWait(format(‘python d:\AA\tool\python\pdf\images2pdf.py "{1}" "{2}"‘, fp,fpPdf),, "hide")
}

class _Web {

    ;来自帮助 SysGetIPAddresses
    get(url) {
        rst := ComObjCreate("WinHttp.WinHttpRequest.5.1")
        rst.open("GET", url)
        try {
            rst.send()
            return rst.ResponseText
        }
    }

    ;网址,编码, 请求方式,post数据(NOTE 可能不好用)
    ;https://docs.microsoft.com/en-us/windows/win32/winhttp/iwinhttprequest-send
    post(url, postData:="", Encoding:="", headers:="") {
        rst := ComObjCreate("WinHttp.WinHttpRequest.5.1")
        rst.open("POST", url)
        if isobject(headers) {
            for k, v in headers {
                if v
                    rst.SetRequestHeader(k, v)
            }
        }
        rst.SetRequestHeader("Content-Type", "application/x-www-form-urlencoded")
        ; hyf_objView(postData)
        if isobject(postData) {
            for k, v in postData
                param := format("{1}={2}", k,this.UrlEncode(v)) ;NOTE 要转编码
            rst.send(param)
            rst.WaitForResponse(postData.haskey("timeout") ? postData.timeout : -1)
        } else {
            rst.send()
        }
        ; rsy.option(2) := nPage ;Codepage:nPage
        if Encoding && rst.ResponseBody {
            oADO := ComObjCreate("adodb.stream")
            oADO.Type := 1
            oADO.Mode := 3
            oADO.Open()
            oADO.Write(rst.ResponseBody)
            oADO.Position := 0
            oADO.Type := 2
            oADO.Charset := Encoding
            res := oADO.ReadText()
            oADO.Close()
            return res
        }
        return rst.ResponseText
    }

    baiduToken() {
        appid := "修改" ;hymodify
        apikey := "修改" ;hymodify
        secretkey := "修改" ;hymodify
        host := format("https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={1}&client_secret={2}&", apikey,secretkey)
        res := this.get(host)
        obj := json.load(res)
        return obj[‘access_token‘]
    }

    ; _Web.baiduOcr_vatInvoice("c:\Users\Administrator\Desktop\22\1.pdf")
    baiduOcr_vatInvoice(fp, bArr:=false) {
        b64 := (strlen(fp) >256) ? fp : _toBase64(fp)
        request_url := "https://aip.baidubce.com/rest/2.0/ocr/v1/vat_invoice"
        access_token := this.baiduToken()
        request_url := format("{1}?access_token={2}", request_url,access_token)
        if (strlen(fp) < 256 && (fp ~= "i)pdf$"))
            params := {"pdf_file":b64}
        else
            params := {"image":b64}
        response := this.post(request_url, params, "utf-8")
        obj := json.load(response)
        ; hyf_objView(obj)
        if (obj.haskey("error_code"))
            throw obj["error_code"] . "`n" . obj["error_msg"]
        else
            return obj["words_result"]
        _toBase64(fp) {
            f := FileOpen(fp, "r")
            size := f.length
            f.RawRead(Bin, size)
            f.Close()
            DllCall("Crypt32.dll\CryptBinaryToString", "Ptr",&Bin, "UInt",size, "UInt",0x01, "Ptr",0, "UIntP",B64Len)
            VarSetCapacity(b64, B64Len << 1, 0)
            DllCall("Crypt32.dll\CryptBinaryToString", "Ptr",&Bin, "UInt",size, "UInt",0x01, "Ptr",&b64, "UIntP",B64Len)
            Bin := ""
            VarSetCapacity(Bin, 0)
            VarSetCapacity(b64, -1)
            res := RegExReplace(b64, "\r\n")
            return res
        }
    }

    UrlEncode(str, enc:="UTF-8") { ;字符串特殊字符转义成URL格式(来自万年书妖)
        hex := "00"
        fun := "msvcrt\swprintf"
        VarSetCapacity(buff, size:=strput(str, enc))
        strput(str, &buff, enc)
        while(code:=numget(buff, A_Index - 1, "UChar")) && dllcall(fun, "str",hex, "str","%%%02X", "uchar",code, "cdecl")
            r .= hex
        return r
        ;StringReplace, str, str, `%, , A ;%为URL特殊转义符,先处理(Google对%符的搜索支持不好才删除,否则替换为%25)
        ;array := {"&":"%26"," ":"%20","(":"%28",")":"%29","‘":"%27",":":"%3A","/":"%2F","+":"%2B",A_Tab:"%21","`r`n":"%0A"} ;`r`n必须放一起,可用记事本测试
        ;for, key, value in array  ;特殊字符url转义
        ;StringReplace, str, str, %key%, %value%, A ;此处循环,两个参数必须一样
        ;return str
    }

}

class JSON {
    /**
    * Method: Load
    *     Parses a JSON string into an AHK value
    * Syntax:
    *     value := JSON.Load( Text [, reviver ] )
    * Parameter(s):
    *     value      [retval] - parsed value
    *     Text    [In, ByRef] - JSON formatted string
    *     reviver   [In, opt] - function Object, similar to JavaScript‘s
    *                           JSON.Parse() ‘reviver‘ parameter
    */
    class Load extends JSON.Functor {
        call(self, ByRef Text, reviver:="") {
            this.rev := isobject(reviver) ? reviver : False
            ; Object keys(And Array indices) are temporarily stored In arrays so that
            ; we can enumerate them In the order they appear In the Document/Text instead
            ; of alphabetically. Skip if No reviver function Is specified.
            this.keys := this.rev ? {} : False

            static quot := chr(34), bashq := "\" . quot
                , json_value := quot . "{[01234567890-tfn"
                , json_value_or_array_closing := quot . "{[]01234567890-tfn"
                , object_key_or_object_closing := quot . "}"

            key := ""
            is_key := False
            root := {}
            stack := [root]
            next := json_value
            Pos := 0

            While((ch:=SubStr(Text, ++Pos, 1)) != "") {
                if InStr(" `t`r`n", ch)
                    Continue
                if !InStr(next, ch, 1)
                    this.ParseError(next, Text, Pos)

                holder := stack[1]
                is_array := holder.IsArray

                if InStr(",:", ch) {
                    next := (is_key := !is_array && ch == ",") ? quot : json_value

                } else if InStr("}]", ch) {
                    stack.RemoveAt(1)
                    next := stack[1]==root ? "" : stack[1].IsArray ? ",]" : ",}"

                } else {
                    if InStr("{[", ch) {
                        ; Check if Array() Is overridden And if its return value has
                        ; the ‘IsArray‘ property. if so, Array() will be called normally,
                        ; otherwise, use a custom base Object For arrays
                        static json_array := Func("Array").IsBuiltIn || ![].IsArray ? {IsArray: True} : 0

                        ; sacrifice readability For minor(actually negligible) performance gain
                        (ch == "{")
                            ? ( is_key := True
                            , value := {}
                            , next := object_key_or_object_closing )
                        ; ch == "["
                            : ( value := json_array ? new json_array : []
                            , next := json_value_or_array_closing )

                        stack.insertat(1, value)

                        if (this.keys)
                            this.keys[value] := []

                    } else {
                        if (ch == quot) {
                            i := Pos
                            While(i:=InStr(Text, quot,, i+1)) {
                                value := StrReplace(SubStr(Text, Pos+1, i-Pos-1), "\\", "\u005c")
                                static tail := A_AhkVersion<"2" ? 0 : -1
                                if (SubStr(value, tail) != "\")
                                    Break
                            }

                            if (!i)
                                this.ParseError("‘", Text, Pos)

                            value := StrReplace(value,  "\/",  "/")
                                , value := StrReplace(value, bashq, quot)
                                , value := StrReplace(value,  "\b", "`b")
                                , value := StrReplace(value,  "\f", "`f")
                                , value := StrReplace(value,  "\n", "`n")
                                , value := StrReplace(value,  "\r", "`r")
                                , value := StrReplace(value,  "\t", "`t")

                            Pos := i ; update Pos

                            i := 0
                            While(i:=InStr(value, "\",, i+1)) {
                                if !(SubStr(value, i+1, 1) == "u")
                                    this.ParseError("\", Text, Pos - strlen(SubStr(value, i+1)))
                                uffff := Abs("0x" . SubStr(value, i+2, 4))
                                if (A_IsUnicode || uffff < 0x100)
                                    value := SubStr(value, 1, i-1) . chr(uffff) . SubStr(value, i+6)
                            }

                            if (is_key) {
                                key := value, next := ":"
                                Continue
                            }

                        } else {
                            value := SubStr(Text, Pos, i := RegExMatch(Text, "[\]\},\s]|$",, Pos)-Pos)

                            static Number := "Number", Integer :="Integer"
                            if value Is %Number% {
                                if value Is %Integer%
                                    value += 0
                            } else if (value == "True" || value == "False")
                                value := %value% + 0
                            else if (value == "null")
                                value := ""
                            else
                                ; we can do more here to pinpoint the actual culprit
                                ; but thats just too much extra work.
                                this.ParseError(next, Text, Pos, i)
                            Pos += i-1
                        }

                        next := holder==root ? "" : is_array ? ",]" : ",}"
                    } ; if InStr("{[", ch) { ... } else
                    is_array? key := holder.push(value) : holder[key] := value
                    if (this.keys && this.keys.haskey(holder))
                        this.keys[holder].Push(key)
                }

            } ; While ( ... )
            return this.rev ? this.Walk(root, "") : root[""]
        }

        ParseError(expect, ByRef Text, Pos, len:=1) {
            static quot := chr(34), qurly := quot . "}"

            line := StrSplit(SubStr(Text, 1, Pos), "`n", "`r").Length()
            col := Pos - InStr(Text, "`n",, -(strlen(Text)-Pos+1))
            msg := Format("{1}`n`nLine:`t{2}`nCol:`t{3}`nChar:`t{4}"
                ,     (expect == "")     ? "Extra data"
                : (expect == "‘")    ? "Unterminated string starting at"
                : (expect == "\")    ? "Invalid \Escape"
                : (expect == ":")    ? "Expecting ‘:‘ Delimiter"
                : (expect == quot)   ? "Expecting Object key enclosed In double quotes"
                : (expect == qurly)  ? "Expecting Object key enclosed In double quotes Or Object closing ‘}‘"
                : (expect == ",}")   ? "Expecting ‘,‘ Delimiter Or Object closing ‘}‘"
                : (expect == ",]")   ? "Expecting ‘,‘ Delimiter Or Array closing ‘]‘"
                : InStr(expect, "]") ? "Expecting JSON value Or Array closing ‘]‘"
                :                      "Expecting JSON value(string, Number, True, False, null, Object Or Array)"
                , line, col, Pos)

            static offset := A_AhkVersion<"2" ? -3 : -4
            Throw Exception(msg, offset, SubStr(Text, Pos, len))
        }

        Walk(holder, key) {
            value := holder[key]
            if isobject(value) {
                For i, k In this.keys[value] { ; Check if ObjHasKey(value, k) ??
                    v := this.Walk(value, k)
                    if (v != JSON.Undefined)
                        value[k] := v
                    else
                        value.delete(k)
                }
            }

            return this.rev.call(holder, key, value)
        }
    }

    /**
    * Method: Dump
    *     Converts an AHK value into a JSON string
    * Syntax:
    *     str := JSON.Dump( value [, replacer, Space ] )
    * Parameter(s):
    *     str        [retval] - JSON representation of an AHK value
    *     value          [In] - any value(Object, string, Number)
    *     replacer  [In, opt] - function Object, similar to JavaScript‘s
    *                           JSON.stringify() ‘replacer‘ parameter
    *     Space     [In, opt] - similar to JavaScript‘s JSON.stringify()
    *                           ‘Space‘ parameter
    */
    class Dump extends JSON.Functor {

        call(self, value, replacer:="", Space:="") {
            this.rep := isobject(replacer) ? replacer : ""
            this.gap := ""
            if (Space) {
                static Integer := "Integer"
                if (Space ~= "^\d+$") {
                    Loop(((n:=Abs(Space))>10 ? 10 : n))
                        this.gap .= " "
                } else {
                    this.gap := SubStr(Space, 1, 10)
                }
                this.indent := "`n"
            }
            return this.Str({"": value}, "")
        }

        Str(holder, key) {
            value := holder[key]
            if (this.rep)
                value := this.rep.call(holder, key, holder.haskey(key) ? value : JSON.Undefined)

            if isobject(value) {
                ; Check Object Type, skip serialization For other Object types such as
                ; ComObject, Func, BoundFunc, FileObject, RegExMatchObject, Property, etc.
                static Type := A_AhkVersion<"2" ? "" : Func("Type")
                ;if (Type ? Type.call(value) == "Object" : ObjGetCapacity(value) != "")
                if (isobject(value)) { ;hy
                    if (this.gap) {
                        stepback := this.indent
                        this.indent .= this.gap
                    }

                    is_array := value.IsArray
                    ; Array() Is Not overridden, rollback to old method of
                    ; identifying Array-like objects. Due to the use of a For-Loop
                    ; sparse arrays such as ‘[1,,3]‘ are detected as objects({}).
                    if (!is_array) {
                        For i In value
                            is_array := i == A_Index
                        Until !is_array
                    }

                    str := ""
                    if (is_array) {
                        Loop(value.Length()) {
                            if (this.gap)
                                str .= this.indent
                            v := this.Str(value, A_Index)
                            str .= (v != "") ? v . "," : "null,"
                        }
                    } else {
                        colon := this.gap ? ": " : ":"
                        For k In value {
                            v := this.Str(value, k)
                            if (v != "") {
                                if (this.gap)
                                    str .= this.indent
                                str .= this.Quote(k) . colon . v . ","
                            }
                        }
                    }
                    if (str != "") {
                        str := RTrim(str, ",")
                        if (this.gap)
                            str .= stepback
                    }
                    if (this.gap)
                        this.indent := stepback
                    return is_array ? "[" . str . "]" : "{" . str . "}"
                }
            } else ; is_number ? value : "value"
                return type(value) != "String" ? value : this.Quote(value)
            ;return ObjGetCapacity([value])=="" ? value : this.Quote(value) ;hy
        }

        Quote(str) {
            static quot := chr(34), bashq := "\" . quot
            if (str != "") {
                str := StrReplace(str,  "\",  "\\")
                ; , str := StrReplace(str,  "/",  "\/") ; optional In ECMAScript
                    , str := StrReplace(str, quot, bashq)
                    , str := StrReplace(str, "`b",  "\b")
                    , str := StrReplace(str, "`f",  "\f")
                    , str := StrReplace(str, "`n",  "\n")
                    , str := StrReplace(str, "`r",  "\r")
                    , str := StrReplace(str, "`t",  "\t")

                static rx_escapable := A_AhkVersion<"2" ? "O)[^\x20-\x7e]" : "[^\x20-\x7e]"
                ;While RegExMatch(str, rx_escapable, m) ;hy修改,中文不转成\u格式
                ;str := StrReplace(str, m.Value, Format("\u{1:04x}", Ord(m.Value)))
            }
            return quot . str . quot
        }
    }

    /**
    * Property: Undefined
    *     Proxy For ‘undefined‘ Type
    * Syntax:
    *     undefined := JSON.Undefined
    * Remarks:
    *     For use with reviver And replacer functions since AutoHotkey does Not
    *     have an ‘undefined‘ Type. Returning blank("") Or 0 won‘t work since these
    *     can‘t be distnguished from actual JSON values. this leaves us with objects.
    *     Replacer() - the caller may return a non-serializable AHK objects such as
    *     ComObject, Func, BoundFunc, FileObject, RegExMatchObject, And Property to
    *     mimic the behavior of returning ‘undefined‘ In JavaScript but For the sake
    *     of code readability And convenience, it‘s better to do ‘return JSON.Undefined‘.
    *     Internally, the property returns a ComObject with the variant Type of VT_EMPTY.
    */
    Undefined[] {
        get {
            static empty := {}, vt_empty := ComObject(0, &empty, 1)
            return vt_empty
        }
    }

    class Functor {
        __call(method, ByRef arg, args*) {
            ; When casting to call(), use a new instance of the "function Object"
            ; so as to avoid directly storing the properties(used across sub-methods)
            ; into the "function Object" itself.
            if isobject(method)
                return (new this).call(method, arg, args*)
            else if (method == "")
                return (new this).call(arg, args*)
        }
    }
}

 

用AutoHotkey调用百度ocr接口提取增值税发票相关字段并写到Excel

上一篇:C#中的多态性


下一篇:Codeforces Global Round 15 D. Array Differentiation