# -*- encoding:utf-8 -*-
import os
import queue
import time
import threading
# import commands
def compute_time(f):
def wrapper():
start_time = time.asctime(time.localtime(time.time()))
f()
end_time = time.asctime(time.localtime(time.time()))
print('start_time: %s, end_time: %s' % (start_time, end_time))
return wrapper
def source_get(source_file):
"""获取所有的align文件并存入列表"""
train_file = source_file
align_lst = []
for root, dirs, files in os.walk(train_file):
for file in files:
if file.endswith('.align'):
align_file = os.sep.join([root, file])
align_lst.append(align_file)
return align_lst
def read_fvlist():
mfcc_path_dict = {}
# with open('/dnn4_added/baoke/data/LIST.vt_zhCN_16k_train3k_6layer_256_noise_c.fvlist', 'r') as f:
with open('./mfcc.txt', 'r') as f:
for line in f.readlines():
# mfcc_path_dict[line.strip().split('=')[0]] = line.strip().split('=')[1]
mfcc_path_dict[line.strip().split("/")[-1].split(".")[0]] = line
return mfcc_path_dict
def replace_str(align_file, mfcc_path_dict):
"""替换为正确的路径"""
# 指定在当前文件夹下递归查找.mfcc文件
# mfcc_path = '/dnn4_added/baoke/vt/adapt/work/1020/temp/mfcc'
with open(align_file, 'r') as f1, open('%s.bak' % align_file, 'a') as f2:
for line in f1.readlines():
# print(line)
# 如果读取到以.rec结尾的行,截取其最后的文件名查找对应的mfcc路径并替换
if line.strip().endswith('.rec"'):
try:
key = line.strip().split('/')[-1][:-5]
if mfcc_path_dict.get(key):
rec = mfcc_path_dict[key].replace('.mfcc', '.rec')
line = '"' + rec + '"' + '\n'
else:
rec = line
except:
line = line
else:
line = line
# print(line)
f2.write(line)
os.remove(align_file)
os.rename('%s.bak' % align_file, align_file)
print('Complete')
# @compute_time
def main():
# align_lst = source_get('/dnn4_added/baoke/vt/adapt/work/1020/temp/align/clean/863')
align_lst = source_get('./align.txt')
# print(align_lst)
mfcc_path_dict = read_fvlist()
for align_file in align_lst:
replace_str(align_file, mfcc_path_dict)
if __name__ == '__main__':
main()