基于百度AI实现的语音识别搜索文件

网络安全课设:语音识别搜索文件

通过查询网上的方法,这里采用百度AI的方式来实现。百度语音识别链接:https://ai.baidu.com/tech/speech/asr 自己注册即可使用。
先上效果图:
基于百度AI实现的语音识别搜索文件
百度AI需要区分中文和英文,故添加了一个复选框。搜索方式按照课设要求支持模糊搜索和精确搜索。保存路径下面会有介绍,主要是存储路径下的所有文件,这里就不详细说明了。
要实现语音识别搜索文件第一步肯定就是语音识别了,实现代码如下:

import pyaudio
import wave
from aip import AipSpeech
import threading


class Audio:
    def __init__(self, chunk=1024, channels=1, rate=16000):
        self.CHUNK = chunk
        self.FORMAT = pyaudio.paInt16  # 量化位数
        self.CHANNELS = channels  # 单声道
        self.RATE = rate  # 16000采样频率
        self.running = True
        self.frames = []
        self.result = ""
        self.language = 1537  # 中文

    def start(self):  # 启动线程开始录音
        thread = threading.Thread(target=self.recording)
        thread.start()

    def recording(self):  # 录音
        self.running = True
        self.frames = []
        p = pyaudio.PyAudio()
        stream = p.open(format=self.FORMAT,
                        channels=self.CHANNELS,
                        rate=self.RATE,
                        input=True,
                        frames_per_buffer=self.CHUNK)
        while self.running:
            data = stream.read(self.CHUNK)
            self.frames.append(data)

        stream.stop_stream()
        stream.close()
        p.terminate()

    def stop(self):  # 停止录音,保存音频
        self.running = False
        p = pyaudio.PyAudio()
        wf = wave.open('test.wav', 'wb')
        wf.setnchannels(self.CHANNELS)
        wf.setsampwidth(p.get_sample_size(self.FORMAT))
        wf.setframerate(self.RATE)
        wf.writeframes(b''.join(self.frames))
        wf.close()
        print("Saved")

    def change_language(self, str_language):  # 切换语音识别的语言
        if str_language == "Chinese":
            self.language = 1537
        elif str_language == "English":
            self.language = 1737
        else:
            print("选择语言异常")

    @staticmethod
    def get_file_content():
        with open('test.wav', 'rb') as fp:
            return fp.read()

    def recognition(self):  # 语音识别
        app_id = '24040013'  # 根据自己百度AI进行更换
        api_key = 'QsS4t8bfRDE6e9BIpDyZBaaV'
        secret_key = '8BugGc4o6UGStaXAGTHRdsGlrccxs96b'

        client = AipSpeech(app_id, api_key, secret_key)
        result = client.asr(self.get_file_content(), 'wav', 16000, {
            'dev_pid': self.language,
        })
        print(result)
        if 'result' in result:
            self.result = result['result']
        else:
            self.result = result

然后是搜索文件,首先是模糊搜索:

def fuzzy_search(start_path, filename):
    text1.delete(1.0, END)
    for path, lists, files in os.walk(start_path):
        for file in files:
            if filename in file:
                write = os.path.join(path, file)
                print(write)
                text1.insert(END, write)
                text1.insert(tkinter.INSERT, '\n')
    tips = '查询完成!'
    text1.insert(END, tips)

精确搜索需要提供文件后缀,一般语音识别的话识别不出来后缀,可以采用将所有文件都去除后缀的方法,但是我当时为了省事没有这样做,后来也就忘记了。。。
精确搜索我想的是通过二分法查找文件名,进而取出其所有路径。

def binary_search(files, filename):  # 二分法查找文件
    length = len(files)
    if length > 0:
        mid = length // 2
        if filename == files[mid]:
            return True
        elif filename < files[mid]:
            return binary_search(files[:mid], filename)
        elif filename > files[mid]:
            return binary_search(files[mid + 1:], filename)
    else:
        return False


def accurate_search(path, name):
    text1.delete(1.0, END)
    with open('Allfiles.json', 'r', encoding='utf-8') as f:  # 这里的json文件是之前通过保存路径按钮将目录下所有文件以{文件名:所有路径名}的形式存储的
        file_dict = json.load(f)
    file_list = list(file_dict.keys())
    # path_list = file_dict.values()
    # if path in path_list:
    if binary_search(file_list, name):
        for root_path in file_dict[name]:
            final_path = os.path.join(root_path, name)
            print(final_path)
            text1.insert(END, final_path)
            text1.insert(tkinter.INSERT, '\n')
    else:
        tips = '查询无结果!'
        text1.insert(END, tips)

所有代码如下:

import tkinter
from tkinter import *
from tkinter import scrolledtext
from tkinter import ttk
from create_fname import Audio   # 这里的create_fname即上文语音识别
import os
import json
import threading

audio = Audio()
root = Tk()
root.title('语音识别搜索文件')
root.geometry('500x400')
language = StringVar()


def change_language(event):
    global language
    choice = choose_language.get()
    print(choice)
    audio.change_language(choice)


def start():
    audio.start()
    tips = '录音开始!'
    text1.insert(END, tips)
    text1.insert(tkinter.INSERT, '\n')


def over():
    audio.stop()
    audio.recognition()
    print(audio.result)
    result = ''.join(audio.result)
    result = result.replace('。', '')
    result = result.replace('.', '')
    print(result)
    text.delete(1.0, END)
    text.insert(END, result)


def search_start():
    thread = threading.Thread(target=file_search())
    thread.start()


def file_search():
    # global search
    choice = search.get()
    print(choice)
    start_path = text_path.get()
    filename = text.get('0.0', 'end')
    filename = filename.strip('\n')
    print(choice)
    print(filename)
    if choice == '模糊搜索':
        fuzzy_search(start_path, filename)
    elif choice == '精确搜索':
        accurate_search(start_path, filename)


def fuzzy_search(start_path, filename):
    text1.delete(1.0, END)
    for path, lists, files in os.walk(start_path):
        for file in files:
            if filename in file:
                write = os.path.join(path, file)
                print(write)
                text1.insert(END, write)
                text1.insert(tkinter.INSERT, '\n')
    tips = '查询完成!'
    text1.insert(END, tips)


def binary_search(files, filename):
    length = len(files)
    if length > 0:
        mid = length // 2
        if filename == files[mid]:
            return True
        elif filename < files[mid]:
            return binary_search(files[:mid], filename)
        elif filename > files[mid]:
            return binary_search(files[mid + 1:], filename)
    else:
        return False


def accurate_search(path, name):
    text1.delete(1.0, END)
    with open('Allfiles.json', 'r', encoding='utf-8') as f:
        file_dict = json.load(f)
    file_list = list(file_dict.keys())
    # path_list = file_dict.values()
    # if path in path_list:
    if binary_search(file_list, name):
        for root_path in file_dict[name]:
            final_path = os.path.join(root_path, name)
            print(final_path)
            text1.insert(END, final_path)
            text1.insert(tkinter.INSERT, '\n')
    else:
        tips = '查询无结果!'
        text1.insert(END, tips)


def file_save():
    start_path = text_path.get()
    file_roots = {}
    text1.delete(0.0, END)
    for root_path, lists, files in os.walk(start_path):
        for file in files:
            file_roots.setdefault(file, []).append(root_path)
    data_list = dict(sorted(file_roots.items(), key=lambda d: d[0], reverse=False))
    with open('Allfiles.json', 'w', encoding='utf-8') as json_file:
        json.dump(data_list, json_file, ensure_ascii=False, indent=4)
    tips = '路径保存完成!'
    text1.insert(END, tips)
    text1.insert(tkinter.INSERT, '\n')


choose_language = ttk.Combobox(root, textvariable=language, font=('隶书', 12), width=10)
choose_language["value"] = ("Chinese", "English")
choose_language.current(0)
choose_language.place(x=80, y=20)
choose_language.bind("<<ComboboxSelected>>", change_language)
label_language = Label(root, text='选择语言', font=('隶书', 12))
label_language.place(x=10, y=20)
# Chinese = Radiobutton(root, text='Chinese', variable=language, font=('隶书', 12),
#                       value='Chinese', command=change_language, width=7)
# English = Radiobutton(root, text="English", variable=language, font=('隶书', 12),
#                       value='English', command=change_language, width=7)
# Chinese.place(x=100, y=20)
# English.place(x=300, y=20)
start_button = Button(root, text='开始录音', font=('隶书', 12), width=7, height=1, command=start)
start_button.place(x=100, y=60)
over_button = Button(root, text='结束录音', font=('隶书', 12), width=7, height=1, command=over)
over_button.place(x=300, y=60)
search = StringVar()
search_method = ttk.Combobox(root, textvariable=search, font=('隶书', 12), width=10)
search_method["value"] = ("模糊搜索", "精确搜索")
search_method.current(1)
search_method.place(x=300, y=20)
# search_method.bind("<<ComboboxSelected>>", file_search)
label_search = Label(root, text='搜索方式', font=('隶书', 12))
label_search.place(x=230, y=20)
# fuzzy = Radiobutton(root, text='模糊搜索', variable=search, font=('隶书', 12),
#                     value='fuzzy', width=7)
# accurate = Radiobutton(root, text='精确搜索', variable=search, font=('隶书', 12),
#                        value='accurate', width=7)
# fuzzy.place(x=100, y=100)
# accurate.place(x=300, y=100)
label = Label(root, text='识别结果:', font=('隶书', 12))
label.place(x=40, y=110)
text = Text(root, font=('隶书', 12), width=20, height=1)
text.place(x=140, y=110)
label_path = Label(root, text='搜索路径:', font=('隶书', 12))
label_path.place(x=40, y=160)
addr = StringVar()
addr.set('D:\\python')
text_path = Entry(root, textvariable=addr, font=('隶书', 12), width=20)
text_path.place(x=140, y=160)
search_button = Button(root, text='搜索', font=('隶书', 12), width=7, command=search_start)
search_button.place(x=340, y=110)
save_button = Button(root, text='保存路径', font=('隶书', 12), width=7, command=file_save)
save_button.place(x=340, y=160)
label1 = Label(root, text='搜索结果:', font=('隶书', 12))
label1.place(x=10, y=210)
text1 = scrolledtext.ScrolledText(root, font=('隶书', 12), width=40, height=10)
text1.place(x=90, y=210)
root.mainloop()

写完整个课设之后才忽然想起来二分法虽然在时间效率上更优,但是其只是判断文件是否存在,最终还要去字典中找,而在字典中查找键值的时间远低于二分查找,因此完全没有必要再进行二分查找。(字典查找是根据哈希值进行查找其效率很高,理论上单纯论查找复杂度:对于无冲突的Hash表而言,查找复杂度为O(1))
写这篇博客距离课设结束已经过去了很久,很多问题也许考虑的不够周全,欢迎各位指正。

上一篇:idea 创建Android项目Language Level太低,修改语言级别


下一篇:2021-04-14