Python基于dtw实现股票预测【多线程】

 # -*- coding: utf-8 -*-
"""
Created on Tue Dec 4 08:53:08 2018 @author: zhen
"""
from dtw import fastdtw
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import threading
import time
from datetime import datetime def normalization(x): # np.std:计算矩阵的标准差(方差的算术平方根)
return (x - np.mean(x)) / np.std(x) def corrcoef(a,b):
corrc = np.corrcoef(a,b) # 计算皮尔逊相关系数,用于度量两个变量之间的相关性,其值介于-1到1之间
corrc = corrc[0,1]
return (16 * ((1 - corrc) / (1 + corrc)) ** 1) # ** 表示乘方 print("begin Main Thread")
startTimeStamp = datetime.now() # 获取当前时间
# 加载数据
filename = 'C:/Users/zhen/.spyder-py3/sh000300_2017.csv'
# 获取第一,二列的数据
all_date = pd.read_csv(filename,usecols=[0, 1], dtype = 'str')
all_date = np.array(all_date)
data = all_date[:, 0]
times = all_date[:, 1] data_points = pd.read_csv(filename,usecols=[3])
data_points = np.array(data_points)
data_points = data_points[:,0] #数据 topk = 10 #只显示top-10
baselen = 100 # 假设在50到150之间变化
basebegin = 361
basedata = data[basebegin]+' '+times[basebegin]+'~'+data[basebegin+baselen-1]+' '+times[basebegin+baselen-1]
length = len(data_points) #数据长度 # 定义自定义线程类
class Thread_Local(threading.Thread):
def __init__(self, thread_id, name, counter):
threading.Thread.__init__(self)
self.thread_id = thread_id
self.name = name
self.counter = counter
self.__running = threading.Event() # 标识停止线程
self.__running.set() # 设置为True def run(self):
print("starting %s" % self.name)
split_data(self, self.counter) # 执行代码逻辑 def stop(self):
self.__running.clear() # 分割片段并执行匹配,多线程
def split_data(self, split_len):
base = data_points[basebegin:basebegin+baselen] # 获取初始要匹配的数据
subseries = []
dateseries = []
for j in range(0, length):
if (j < (basebegin - split_len) or j > (basebegin + split_len - 1)) and j <length - split_len:
subseries.append(data_points[j:j+split_len])
dateseries.append(j) #开始位置
search(self, subseries, base, dateseries) # 调用模式匹配 # 定义结果变量
result = []
base_list = []
date_list = []
def search(self, subseries, base, dateseries):
# 片段搜索
listdistance = []
for i in range(0, len(subseries)):
tt = np.array(subseries[i])
dist, cost, acc, path = fastdtw(base, tt, dist='euclidean')
listdistance.append(dist)
# distance = corrcoef(base, tt)
# listdistance.append(distance)
# 排序
index = np.argsort(listdistance, kind='quicksort') #排序,返回排序后的索引序列
result.append(subseries[index[0]])
print("result length is %d" % len(result))
base_list.append(base)
date_list.append(dateseries[index[0]])
# 关闭线程
self.stop() # 变换数据(收缩或扩展),生成50到150之间的数据,间隔为10
loc = 0
for split_len in range(round(0.5 * baselen), round(1.5 * baselen), 10):
# 执行匹配
thread = Thread_Local(1, "Thread" + str(loc), split_len)
loc += 1
# 开启线程
thread.start() boo = 1 while(boo > 0):
if(len(result) < 10):
if(boo % 100 == 0):
print("has running %d s" % boo)
boo += 1
time.sleep(1)
else:
boo = 0 # 片段搜索
listdistance = []
for i in range(0, len(result)):
tt = np.array(result[i])
dist, cost, acc, path = fastdtw(base_list[i], tt, dist='euclidean')
# distance = corrcoef(base_list[i], tt)
listdistance.append(dist)
# 最终排序
index = np.argsort(listdistance, kind='quicksort') #排序,返回排序后的索引序列
print("closed Main Thread")
endTimeStamp = datetime.now()
# 结果集对比
plt.figure(0)
plt.plot(normalization(base_list[index[0]]),label= basedata,linewidth='')
length = len(result[index[0]])
begin = data[date_list[index[0]]] + ' ' + times[date_list[index[0]]]
end = data[date_list[index[0]] + length - 1] + ' ' + times[date_list[index[0]] + length - 1]
label = begin + '~' + end
plt.plot(normalization(result[index[0]]), label=label, linewidth='')
plt.legend(loc='lower right')
plt.title('normal similarity search')
plt.show()
print('run time', (endTimeStamp-startTimeStamp).seconds, "s")

结果:

Python基于dtw实现股票预测【多线程】

has running 100 s
has running 200 s
has running 300 s
has running 400 s
has running 500 s
has running 600 s
has running 700 s
has running 800 s
has running 900 s
has running 1000 s
has running 1100 s
has running 1200 s
has running 1300 s
has running 1400 s
has running 1500 s
has running 1600 s
has running 1700 s
has running 1800 s
has running 1900 s
has running 2000 s
has running 2100 s
has running 2200 s
has running 2300 s
has running 2400 s
has running 2500 s
has running 2600 s
has running 2700 s
has running 2800 s
has running 2900 s
has running 3000 s
has running 3100 s
has running 3200 s
has running 3300 s
has running 3400 s
has running 3500 s
has running 3600 s
has running 3700 s
has running 3800 s
has running 3900 s
has running 4000 s
has running 4100 s
has running 4200 s
has running 4300 s
has running 4400 s
has running 4500 s
has running 4600 s
has running 4700 s
has running 4800 s
has running 4900 s
has running 5000 s
has running 5100 s
has running 5200 s
has running 5300 s
has running 5400 s
has running 5500 s
has running 5600 s
has running 5700 s
has running 5800 s
has running 5900 s
has running 6000 s
has running 6100 s
has running 6200 s
has running 6300 s
has running 6400 s
has running 6500 s
has running 6600 s
has running 6700 s
has running 6800 s
has running 6900 s
has running 7000 s
has running 7100 s
has running 7200 s
has running 7300 s
has running 7400 s
has running 7500 s
has running 7600 s
has running 7700 s
has running 7800 s
has running 7900 s
has running 8000 s
has running 8100 s
has running 8200 s
has running 8300 s
has running 8400 s
has running 8500 s
has running 8600 s
has running 8700 s
has running 8800 s
has running 8900 s
has running 9000 s
has running 9100 s
has running 9200 s
has running 9300 s
has running 9400 s
has running 9500 s
has running 9600 s
has running 9700 s
has running 9800 s
has running 9900 s
has running 10000 s
has running 10100 s
has running 10200 s
has running 10300 s
has running 10400 s
has running 10500 s
has running 10600 s
has running 10700 s
has running 10800 s
has running 10900 s
has running 11000 s
has running 11100 s
has running 11200 s
has running 11300 s
has running 11400 s
has running 11500 s
has running 11600 s
has running 11700 s
has running 11800 s
has running 11900 s
has running 12000 s
has running 12100 s
has running 12200 s
has running 12300 s
has running 12400 s
has running 12500 s
has running 12600 s
has running 12700 s
has running 12800 s
has running 12900 s
has running 13000 s
has running 13100 s
has running 13200 s
has running 13300 s
has running 13400 s
has running 13500 s
has running 13600 s
has running 13700 s
has running 13800 s
has running 13900 s
has running 14000 s
has running 14100 s
has running 14200 s
has running 14300 s
has running 14400 s
result length is 1
result length is 2
has running 14500 s
has running 14600 s
has running 14700 s
has running 14800 s
result length is 3
has running 14900 s
has running 15000 s
result length is 4
has running 15100 s
has running 15200 s
has running 15300 s
has running 15400 s
result length is 5
has running 15500 s
has running 15600 s
has running 15700 s
has running 15800 s
has running 15900 s
has running 16000 s
has running 16100 s
has running 16200 s
result length is 6
has running 16300 s
has running 16400 s
has running 16500 s
has running 16600 s
result length is 7
result length is 8
has running 16700 s
result length is 9
result length is 10
closed Main Thread

Python基于dtw实现股票预测【多线程】

上一篇:php中引用&的真正理解-变量引用、函数引用、对象引用


下一篇:学生各门课程成绩统计SQL语句大全