读取某文件夹下所有excel文件 python

2024-02-03 21:44:22
import os

import pandas as pd

from sklearn import linear_model

path = r'D:\新数据\每日收益率'

filenames = os.listdir(path)

for filename in filenames:

    print(filename)

for i in filenames:

    excel_path = 'D:\新数据\每日收益率\\' + i

    f = open(excel_path,'rb')

    data = pd.read_excel(f) #到此处已是循环读取某文件夹下所有excel文件，下面是在循环中对读进来的文件进行统一的重复的一致的处理

    data['time'] = data.index

    data = data.reset_index(drop = True)

    data1 = data.iloc[0:110,]#估计窗口的真实收益率

    data2 = data.iloc[110:,]#事件窗口的真实收益率

    feature = data.columns.tolist()

    feature.remove('time')

    feature.remove('000300')#沪深300指数

    dfR = pd.DataFrame(data2['time'])

    dfAR = pd.DataFrame(data2['time'])

    for m in feature:

        regr=linear_model.LinearRegression()

        regr.fit(data1['000300'].values.reshape(-1, 1),data1[m].values.reshape(-1, 1))

        y_pred1 = regr.predict(data2['000300'].values.reshape(-1, 1))#事件窗口的预期收益率

        AR = data2[m].values.reshape(-1, 1)-y_pred1#真实收益率-预期收益率=超额收益率（事件窗口）

        dfR[m] = y_pred1#预期收益率

        dfAR[m] = AR #超额收益率

    save_path1 = 'D:\新数据\日预期收益率\\' + i

    save_path2 = 'D:\新数据\日超额收益率\\' + i

    dfR.to_excel(save_path1,index=False)

    dfAR.to_excel(save_path2,index=False)
码农公寓

相关文章