RNN

2024-04-11 08:36:01
import numpy as np
import os, pickle, random, datetime

from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM

FOLDERS = [
    {"class": 1, "folder": "/data1/linzn/data/ch_g729a_100_10000ms_FEAT"},
    # The folder that contains positive data files.
    {"class": 0, "folder": "/data1/linzn/data/ch_g729a_0_10000ms_FEAT"}  # The folder that contains negative data files.
]

SAMPLE_LENGTH = 10000  # The sample length (ms)
BATCH_SIZE = 32  # batch size
ITER = 30  # number of iteration
FOLD = 5  # = NUM_SAMPLE / number of testing samples

'''
Get the paths of all files in the folder
'''


def get_file_list(folder):
    file_list = []
    for file in os.listdir(folder):
        file_list.append(os.path.join(folder, file))
    return file_list


'''
Read codeword file
-------------
input
    file_path
        The path to an ASCII file.
        Each line contains three integers: x1 x2 x3, which are the three codewords of the frame.
        There are (number of frame) lines in total. 
output
    the list of codewords
'''


def parse_sample(file_path):
    file = open(file_path, 'r')
    lines = file.readlines()
    sample = []
    for line in lines:
        line_split = line.strip("\r\n\t").strip().split("\t")
        sample.append(line_split)
    return sample


'''
Save variable in pickle
'''


def save_variable(file_name, variable):
    file_object = open(file_name, "wb")
    pickle.dump(variable, file_object)
    file_object.close()


'''
Pruned RNN-SM training and testing
'''
if __name__ == '__main__':
    all_files = [(item, folder["class"]) for folder in FOLDERS for item in get_file_list(folder["folder"])]

    random.shuffle(all_files)

    save_variable('all_files.pkl', all_files)

    all_samples_x = [(parse_sample(item[0])) for item in all_files]
    all_samples_y = [item[1] for item in all_files]

    np_all_samples_x = np.asarray(all_samples_x)
    np_all_samples_y = np.asarray(all_samples_y)

    save_variable('np_all_samples_x.pkl', np_all_samples_x)
    save_variable('np_all_samples_y.pkl', np_all_samples_y)

    file_num = len(all_files)
    sub_file_num = int(file_num / FOLD)

    x_test = np_all_samples_x[0: sub_file_num]  # The samples for testing
    y_test = np_all_samples_y[0: sub_file_num]  # The label of the samples for testing

    x_train = np_all_samples_x[sub_file_num: file_num]  # The samples for training
    y_train = np_all_samples_y[sub_file_num: file_num]  # The label of the samples for training

    print("Building model")
    model = Sequential()
    model.add(LSTM(50, input_length=int(SAMPLE_LENGTH / 10), input_dim=3, return_sequences=True))  # first layer 
    model.add(LSTM(50))  # second layer
    model.add(Dense(1))  # output layer
    model.add(Activation('sigmoid'))  # activation function

    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=["accuracy"])

    print("Training")
    for i in range(ITER):
        model.fit(x_train, y_train, batch_size=BATCH_SIZE, nb_epoch=1, validation_data=(x_test, y_test))
        model.save('model_%d.h5' % (i + 1))
码农公寓

相关文章