如果我们数据量很大,那么是不可能将所有数据载入内存的,必将导致内存泄漏,这时候我们可以用fit_generator函数来进行训练。
from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras import layers
import numpy as np
import random
from sklearn.metrics import f1_score, accuracy_score
max_features = 100
maxlen = 50
batch_size = 320
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)
def generator():
while 1:
row = np.random.randint(0, len(x_train), size=batch_size)
x = np.zeros((batch_size, x_train.shape[-1]))
y = np.zeros((batch_size,))
x = x_train[row]
y = y_train[row]
yield x, y
# generator()
model = Sequential()
model.add(layers.Embedding(max_features, 32, input_length=maxlen))
model.add(layers.GRU(64, return_sequences=True))
model.add(layers.GRU(32))
# model.add(layers.Flatten())
# model.add(layers.Dense(32,activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
print(model.summary())
# history = model.fit(x_train, y_train, epochs=1,batch_size=32, validation_split=0.2)
# Keras中的fit()函数传入的x_train和y_train是被完整的加载进内存的,当然用起来很方便,但是如果我们数据量很大,
# 那么是不可能将所有数据载入内存的,必将导致内存泄漏,这时候我们可以用fit_generator函数来进行训练。
# fit_generator函数必须传入一个生成器,我们的训练数据也是通过生成器产生的
history = model.fit_generator(generator(), epochs=1, steps_per_epoch=len(x_train) // (batch_size))
print(model.evaluate(x_test, y_test))
y = model.predict_classes(x_test)
print(accuracy_score(y_test, y))
原文:
https://blog.csdn.net/mlp750303040/article/details/89207658