factor = “return_on_invested_capital”
----------------1. 准备因子数据----------------
1.1 获取因子数据
获取这一年的交易日期
date_data = get_trading_dates(start_date=“2020-01-01”, end_date=“2021-01-01”)
定义df数据集
all_data = pd.DataFrame()
for date in date_data:
# 获取当天因子数据
q = query(
fundamentals.financial_indicator.return_on_invested_capital
)
# 获取截面数据
fund = get_fundamentals(q, entry_date=date).iloc[:, 0, :]
# 创建日期列
fund["date"] = date
# 拼接
all_data = pd.concat([all_data, fund])
设置双重索引, 变成一个MultiIndex DataFrame
multiindex_df = all_data.set_index([“date”, all_data.index])
1.2 处理因子数据 (去极值, 标准化)
def mad(factor):
“”“3倍中位数去极值”""
# 求出因子值的中位数
median = np.median(factor)
# 求出因子值与中位数的差值, 进行绝对值
mad = np.median(abs(factor - median))
# 定义几倍的中位数上下限
high = median + (3 * 1.4826 * mad)
low = median - (3 * 1.4826 * mad)
# 替换上下限
factor = np.where(factor > high, high, factor)
factor = np.where(factor < low, low, factor)
return factor
def stand(factor):
“”“数据标准化”""
mean = factor.mean(http://www.yesedata.com/)
std = factor.std()
return (factor - mean) / std
multiindex_df[factor] = mad(multiindex_df[factor])
multiindex_df[factor] = stand(multiindex_df[factor])
----------------2. 准备收盘价数据----------------
转换为Series
singlefactor_series = multiindex_df[factor]
获取所有股票基础信息
stocks = all_instruments(“CS”)
得到合约代码
stocks_list = stocks[“order_book_id”]
获取收盘价
price = get_price(stocks_list ,start_date=“2020-01-01”, end_date=“2021-01-01”, fields=“close”)
----------------3. 生成通用Alphalens数据----------------
factor_return = utils.get_clean_factor_and_forward_returns(singlefactor_series.astype(float), price.astype(float))
----------------4. 生成通用Alphalens数据----------------
IC 值, 默认每天的IC结构
return_invested_capital_IC = performance.factor_information_coefficient(factor_return)