import os
import pandas as pd
from sklearn import linear_model path = r'D:\新数据\每日收益率'
filenames = os.listdir(path)
for filename in filenames:
print(filename) for i in filenames:
excel_path = 'D:\新数据\每日收益率\\' + i
f = open(excel_path,'rb')
data = pd.read_excel(f) #到此处已是循环读取某文件夹下所有excel文件,下面是在循环中对读进来的文件进行统一的重复的一致的处理
data['time'] = data.index
data = data.reset_index(drop = True) data1 = data.iloc[0:110,]#估计窗口的真实收益率
data2 = data.iloc[110:,]#事件窗口的真实收益率 feature = data.columns.tolist()
feature.remove('time')
feature.remove('000300')#沪深300指数
dfR = pd.DataFrame(data2['time'])
dfAR = pd.DataFrame(data2['time'])
for m in feature:
regr=linear_model.LinearRegression()
regr.fit(data1['000300'].values.reshape(-1, 1),data1[m].values.reshape(-1, 1))
y_pred1 = regr.predict(data2['000300'].values.reshape(-1, 1))#事件窗口的预期收益率
AR = data2[m].values.reshape(-1, 1)-y_pred1#真实收益率-预期收益率=超额收益率(事件窗口)
dfR[m] = y_pred1#预期收益率
dfAR[m] = AR #超额收益率
save_path1 = 'D:\新数据\日预期收益率\\' + i
save_path2 = 'D:\新数据\日超额收益率\\' + i
dfR.to_excel(save_path1,index=False)
dfAR.to_excel(save_path2,index=False)