replace_align_path.py

# -*- encoding:utf-8 -*-
import os
import queue
import time
import threading
# import commands


def compute_time(f):
    def wrapper():
        start_time = time.asctime(time.localtime(time.time()))
        f()
        end_time = time.asctime(time.localtime(time.time()))
        print('start_time: %s, end_time: %s' % (start_time, end_time))

    return wrapper


def source_get(source_file):
    """获取所有的align文件并存入列表"""
    train_file = source_file
    align_lst = []
    for root, dirs, files in os.walk(train_file):
        for file in files:
            if file.endswith('.align'):
                align_file = os.sep.join([root, file])
                align_lst.append(align_file)

    return align_lst


def read_fvlist():
    mfcc_path_dict = {}
    # with open('/dnn4_added/baoke/data/LIST.vt_zhCN_16k_train3k_6layer_256_noise_c.fvlist', 'r') as f:
    with open('./mfcc.txt', 'r') as f:
        for line in f.readlines():
            # mfcc_path_dict[line.strip().split('=')[0]] = line.strip().split('=')[1]
            mfcc_path_dict[line.strip().split("/")[-1].split(".")[0]] = line
    return mfcc_path_dict


def replace_str(align_file, mfcc_path_dict):
    """替换为正确的路径"""
    # 指定在当前文件夹下递归查找.mfcc文件
    # mfcc_path = '/dnn4_added/baoke/vt/adapt/work/1020/temp/mfcc'
    with open(align_file, 'r') as f1, open('%s.bak' % align_file, 'a') as f2:
        for line in f1.readlines():
            # print(line)
            # 如果读取到以.rec结尾的行,截取其最后的文件名查找对应的mfcc路径并替换
            if line.strip().endswith('.rec"'):
                try:
                    key = line.strip().split('/')[-1][:-5]
                    if mfcc_path_dict.get(key):
                        rec = mfcc_path_dict[key].replace('.mfcc', '.rec')
                        line = '"' + rec + '"' + '\n'
                    else:
                        rec = line
                except:
                    line = line
            else:
                line = line
            # print(line)
            f2.write(line)

    os.remove(align_file)
    os.rename('%s.bak' % align_file, align_file)
    print('Complete')


# @compute_time
def main():
    # align_lst = source_get('/dnn4_added/baoke/vt/adapt/work/1020/temp/align/clean/863')
    align_lst = source_get('./align.txt')
    # print(align_lst)
    mfcc_path_dict = read_fvlist()
    for align_file in align_lst:
        replace_str(align_file, mfcc_path_dict)


if __name__ == '__main__':
    main()

上一篇:python使用matplotlib可视化线图(line plot)、在可视化图像中的指定位置添加横线、自定义线条的颜色、自定义线条的形式(add horizontal line)


下一篇:解决TypeError: tf__update_state() got an unexpected keyword argument ‘sample_weight‘