机器学习预测股票,保存对比图,计算夏普率以及得分

运用两层LSTM,激活函数为tanh,损失函数为mean_squared_error,optimizer为ADAM

导入库:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
rcParams['figure.figsize']=20,10
from keras.models import Sequential
from keras.layers import LSTM,Dropout,Dense
import time
import os
from keras.layers.normalization import BatchNormalization
from sklearn.preprocessing import MinMaxScaler
import math
from keras import backend
from sklearn.metrics import explained_variance_score as e_v
from sklearn.model_selection import learning_curve

导入数据,数据清洗

epoch=10
train_len=0.7
rf=0.04
cycle=252
# 导入数据,用上证的日线数据,对数据归一化,标准化
df=pd.read_csv("D://999999.csv")
df["index"]=pd.to_datetime(df["index"],format='%Y-%m-%d')
# plt.plot(df['close'],label="close price history")
data=df.filter(['open','close','high','low','vol'])
dataset=data.values
data1=df.filter(['close'])
dataset1=data1.values
training_data_len = math.ceil( len(dataset) * train_len)
scaler=MinMaxScaler(feature_range=(0,1))
scaled_data=scaler.fit_transform(dataset)
scaler1=MinMaxScaler(feature_range=(0,1))
scaled_data1=scaler1.fit_transform(dataset1)
train_data=scaled_data[0:training_data_len,:]
train_data1=train_data.tolist()
time1=time.strftime('%Y-%m-%d')
sv_path='F:\pred_data/'+time1
os.makedirs(sv_path,exist_ok=True)

由于运用的是LSTM,对于窗口的选取很重要,所以采用循环,来对比窗口长度

for needlen in range(10,80):
    x_test=[]
    x_train=[]
    y_train=[]
    temp_train=[]
    temp1_train=[]
    temp_test=[]
    temp1_test=[]
    #切出训练数据
    for j in range(needlen,training_data_len):
        for z in range(j-needlen,j-1):
            temp_train.append(train_data1[z])
        temp1_train=np.array(temp_train)
        x_train.append(temp1_train)
        temp_train=[]
        y_train.append(train_data[j,1])
        if j<=needlen:
            print(x_train)
            print(y_train)
    # for f in x_train[:50]:
    #     print(len(f))
    x_train,y_train=np.array(x_train),np.array(y_train)
    x_train=backend.cast_to_floatx(x_train)
    y_train=backend.cast_to_floatx(y_train)
    #为训练模型Sequential使用add函数添加layer,训练
    model=Sequential()
    model.add(LSTM(50,return_sequences=True,input_shape=(x_train.shape[1],x_train.shape[2])))
    model.add(LSTM(50))
    model.add(Dense(1,activation='tanh'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    model.fit(x_train,y_train,epochs=epoch)
    #切分测试数据
    test_data=scaled_data[training_data_len-needlen:,:]
    test_data1=train_data.tolist()
    y_test=dataset[training_data_len:,1]
    for i in range(needlen,len(test_data)):
        for g in range(i-needlen,i-1):
            temp_test.append(test_data1[g])
        temp1_test=np.array(temp_test)
        x_test.append(temp1_test)
        temp_test=[]
    x_test=np.array(x_test)
    #预测
    pred=model.predict(x_test)
    pred=np.reshape(pred,(pred.shape[0],pred.shape[1]))
    predtion=scaler1.inverse_transform(pred)

计算相关指标,保存对比图(代码接在上面循环里面)

	ac=e_v(y_test,predtion)
    print("accuracy:%.2f"%(ac*100.0))
    #可视化预测和原始数据
    train = data[:training_data_len]
    valid = data[training_data_len:]
    valid['predtion'] = predtion
    valid['return']=(valid['close'].shift(-1)-valid['close'])/valid['close']
    valid['exreturn']=valid['return']-rf/cycle
    sharperatio=math.sqrt(cycle)*valid['exreturn'].mean()/valid['exreturn'].std()
    valid['preturn']=(valid['predtion'].shift(-1)-valid['predtion'])/valid['predtion']
    valid['pexreturn']=valid['preturn']-rf/cycle
    sharperatio_p=math.sqrt(cycle)*valid['pexreturn'].mean()/valid['pexreturn'].std()
    plt.figure(figsize=(16,8))
    plt.title('2016——2021')
    plt.xlabel('date', fontsize=18)
    plt.ylabel('close prise', fontsize=18)
    plt.plot(train['close'])
    plt.plot(valid[['close','predtion']])
    plt.legend(['Train','Val','Predictions'], loc='lower right')
    plt.savefig(f'{sv_path}/predict%d %.2f %.2f %.2f.jpg'%(needlen,sharperatio,sharperatio_p,ac*100.0))
    plt.close()
上一篇:实时获取雪球某代码数据


下一篇:JDBC开发流程