subset_lat_dir.sh

#!/bin/bash

 
 

# Copyright 2018 Jarvan Wang

# Copyright 2017 Vimal Manohar

# Apache 2.0.

 
 

cmd=run.pl

nj=40

if [ -f ./path.sh ]; then . ./path.sh; fi

 
 

. ./utils/parse_options.sh

 
 

if [ $# -ne 3 ]; then

cat <<EOF

This script creates an lattice directory containing a subset of

utterances contained in <subset-data-dir> from the

original lattice directory containing lattices for utterances in

<full-data-dir>.

 
 

The number of split jobs in the output lattice directory is

equal to the number of jobs in the original lattice directory,

unless the subset data directory has too few speakers.

 
 

Usage: $0 [options] <subset-data-dir> <lat-dir> <subset-lat-dir>

e.g.: $0 data/train exp/tri3_lat_sp exp/tri3_lat

Options:

--cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.

EOF

exit 1

fi

 
 

subset_data=$1

lat_dir=$2

dir=$3

 
 

ori_nj=$(cat $lat_dir/num_jobs) || exit 1

 
 

mkdir -p $dir

cp $lat_dir/{final.mdl,*.mat,*_opts,tree} $dir/ || true

cp -r $lat_dir/phones $dir 2>/dev/null || true

 
 

$cmd JOB=1:$ori_nj $dir/log/copy_lattices.JOB.log \

lattice-copy "ark:gunzip -c $lat_dir/lat.JOB.gz |" \

ark,scp:$dir/lat_tmp.JOB.ark,$dir/lat_tmp.JOB.scp || exit 1

 
 

for n in `seq $ori_nj`; do

cat $dir/lat_tmp.$n.scp

done > $dir/lat_tmp.scp

 
 

#awk '{hash[$1]=$2}END{for(key in hash){printf("%s %s\n",key,hash[key])}}' < $dir/lat_tmp.scp > $dir/lat_tmp_sorted_uniq.scp

mv $dir/lat_tmp.scp $dir/lat_tmp.scp.bak

perl -e 'my %hash;while(<>){chomp;($key,$ark)=split;$hash{$key}=$ark};for $key (sort keys %hash){printf("%s %s\n",$key,$hash{$key})}' $dir/lat_tmp.scp.bak > $dir/lat_tmp.scp

 
 

utils/split_data.sh $subset_data $nj

$cmd JOB=1:$nj $dir/log/filter_lattices.JOB.log \

lattice-copy \

"scp:utils/filter_scp.pl $subset_data/split${nj}/JOB/utt2spk $dir/lat_tmp.scp |" \

"ark:| gzip -c > $dir/lat.JOB.gz" || exit 1

 
 

echo $nj > $dir/num_jobs

 
 

#rm $dir/lat_tmp.*.{ark,scp} $dir/lat_tmp.scp

 
 

exit 0

 
 

上一篇:《剑指offer》---把数组排成最小的数


下一篇:jquery导航动画