https://github.com/wenet-e2e/wenet wenet官方代码,在最新的UIO模式中加入链式处理数据
import time
import random
class Process():
def __init__(self ,data ,f):
self.data = data
self.f = f
def __iter__(self):
return self.f(iter(self.data))
# data = [[j + str(i) for i in range(10)] for j in ['a','b', 'c'] ]
data = ['a0', 'a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'a7', 'a8', 'a9','b0', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7', 'b8', 'b9','c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9']
def travel(d):
for i in d:
yield i
def shuffle(d , sf_size=15):
buf = []
for i in d:
buf.append(i)
if len(buf) >= sf_size:
random.shuffle(buf)
for j in buf:
# print('shuffle',j)
yield j
buf = []
for k in buf :
yield k
def sort(d):
buf = []
for i in d:
buf.append(i)
if len(buf) >= 5:
for i in buf:
# print('sort' , i )
yield i
buf = []
for k in buf:
yield k
def batch(d):
buf = []
for i in d:
buf.append(i)
if len(buf) >= 4:
for i in buf:
# print('batch' , i )
yield i
buf = []
p = Process(data , travel)
p = Process(p , shuffle)
# p = Process(p , sort)
p = Process(p , batch)
for i in p:
print(i , 'train')