运用两层LSTM,激活函数为tanh,损失函数为mean_squared_error,optimizer为ADAM
导入库:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
rcParams['figure.figsize']=20,10
from keras.models import Sequential
from keras.layers import LSTM,Dropout,Dense
import time
import os
from keras.layers.normalization import BatchNormalization
from sklearn.preprocessing import MinMaxScaler
import math
from keras import backend
from sklearn.metrics import explained_variance_score as e_v
from sklearn.model_selection import learning_curve
导入数据,数据清洗
epoch=10
train_len=0.7
rf=0.04
cycle=252
# 导入数据,用上证的日线数据,对数据归一化,标准化
df=pd.read_csv("D://999999.csv")
df["index"]=pd.to_datetime(df["index"],format='%Y-%m-%d')
# plt.plot(df['close'],label="close price history")
data=df.filter(['open','close','high','low','vol'])
dataset=data.values
data1=df.filter(['close'])
dataset1=data1.values
training_data_len = math.ceil( len(dataset) * train_len)
scaler=MinMaxScaler(feature_range=(0,1))
scaled_data=scaler.fit_transform(dataset)
scaler1=MinMaxScaler(feature_range=(0,1))
scaled_data1=scaler1.fit_transform(dataset1)
train_data=scaled_data[0:training_data_len,:]
train_data1=train_data.tolist()
time1=time.strftime('%Y-%m-%d')
sv_path='F:\pred_data/'+time1
os.makedirs(sv_path,exist_ok=True)
由于运用的是LSTM,对于窗口的选取很重要,所以采用循环,来对比窗口长度
for needlen in range(10,80):
x_test=[]
x_train=[]
y_train=[]
temp_train=[]
temp1_train=[]
temp_test=[]
temp1_test=[]
#切出训练数据
for j in range(needlen,training_data_len):
for z in range(j-needlen,j-1):
temp_train.append(train_data1[z])
temp1_train=np.array(temp_train)
x_train.append(temp1_train)
temp_train=[]
y_train.append(train_data[j,1])
if j<=needlen:
print(x_train)
print(y_train)
# for f in x_train[:50]:
# print(len(f))
x_train,y_train=np.array(x_train),np.array(y_train)
x_train=backend.cast_to_floatx(x_train)
y_train=backend.cast_to_floatx(y_train)
#为训练模型Sequential使用add函数添加layer,训练
model=Sequential()
model.add(LSTM(50,return_sequences=True,input_shape=(x_train.shape[1],x_train.shape[2])))
model.add(LSTM(50))
model.add(Dense(1,activation='tanh'))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(x_train,y_train,epochs=epoch)
#切分测试数据
test_data=scaled_data[training_data_len-needlen:,:]
test_data1=train_data.tolist()
y_test=dataset[training_data_len:,1]
for i in range(needlen,len(test_data)):
for g in range(i-needlen,i-1):
temp_test.append(test_data1[g])
temp1_test=np.array(temp_test)
x_test.append(temp1_test)
temp_test=[]
x_test=np.array(x_test)
#预测
pred=model.predict(x_test)
pred=np.reshape(pred,(pred.shape[0],pred.shape[1]))
predtion=scaler1.inverse_transform(pred)
计算相关指标,保存对比图(代码接在上面循环里面)
ac=e_v(y_test,predtion)
print("accuracy:%.2f"%(ac*100.0))
#可视化预测和原始数据
train = data[:training_data_len]
valid = data[training_data_len:]
valid['predtion'] = predtion
valid['return']=(valid['close'].shift(-1)-valid['close'])/valid['close']
valid['exreturn']=valid['return']-rf/cycle
sharperatio=math.sqrt(cycle)*valid['exreturn'].mean()/valid['exreturn'].std()
valid['preturn']=(valid['predtion'].shift(-1)-valid['predtion'])/valid['predtion']
valid['pexreturn']=valid['preturn']-rf/cycle
sharperatio_p=math.sqrt(cycle)*valid['pexreturn'].mean()/valid['pexreturn'].std()
plt.figure(figsize=(16,8))
plt.title('2016——2021')
plt.xlabel('date', fontsize=18)
plt.ylabel('close prise', fontsize=18)
plt.plot(train['close'])
plt.plot(valid[['close','predtion']])
plt.legend(['Train','Val','Predictions'], loc='lower right')
plt.savefig(f'{sv_path}/predict%d %.2f %.2f %.2f.jpg'%(needlen,sharperatio,sharperatio_p,ac*100.0))
plt.close()