jqdata在提供基础数据的时候,并没有提供换手率这一数据,需要自己进行计算,本文将从财务数据里面计算出来换手率这一数据,合并到日数据和30分钟数据。
话不多说,直接上代码:
import pandas as pd
import jqdatasdk as JQ
stock_data_day_file = './data/day/'
stock_data_m30_file = './data/m30/'
# 获取日数据基本数据和财务数据
def get_day_data(stock,start_date,end_date):
# 获取基本数据 =======================================================
stock_pd = JQ.get_price(security=stock, start_date=start_date, end_date=end_date, frequency='1d',
fields=['open', 'high', 'low', 'close', 'avg', 'volume', 'money', 'high_limit', 'low_limit',
'pre_close', 'factor', 'paused'], fq='post').dropna()
# 股票数据小于100条的丢弃
if stock_pd.shape[0] < 100:
return None,pd.DataFrame({})
stock_pd = stock_pd.reset_index() # 去掉索引,把日期索引转化为列
# 处理日期格式
stock_pd['date'] = pd.to_datetime(stock_pd['index'].values).strftime(date_format='%Y%m%d')
stock_pd['date'] = stock_pd['date'].astype(int)
# 处理代码格式
stock_pd['code'] = stock.split('.')[0]
stock_pd['code'] = stock_pd['code'].astype(int)
# 处理成交量为前复权成交量
stock_pd['volume_fq'] = stock_pd['volume']
stock_pd['volume'] = stock_pd['volume'] * stock_pd['factor'] / 100 # /100 股转为手
# 成交额单位转换 元转换为千元 money
stock_pd['money'] = stock_pd['money'] / 1000
# 计算涨跌幅
stock_pd['pct_change'] = (stock_pd['close'] / stock_pd['pre_close'] - 1) * 100
# 排序字段
stock_pd = stock_pd[['code', 'date', 'open', 'high', 'low', 'close', 'avg', 'pre_close', 'pct_change','volume',
'money', 'high_limit','low_limit', 'volume_fq', 'factor', 'paused']]
# print(stock_pd)
# print(stock_pd.shape[0])
# 获取财务数据 ==========================================================================
# circulating_cap 流通股本(万股)
# circulating_market_cap 流通市值(亿元)
# turnover_ratio 换手率(%)
Query = JQ.query(JQ.valuation.circulating_cap,
JQ.valuation.market_cap,
JQ.valuation.turnover_ratio
).filter(JQ.valuation.code.in_([stock]))
panel = JQ.get_fundamentals_continuously(Query, end_date=end_date, count=stock_pd.shape[0])
# 判断当前的股票代码是否在panel里面,是代表有数据,否代表无数据 债没有财务数据,不判断这里会报错
if stock not in panel.minor_axis.values:
return None,pd.DataFrame({})
stock_finance_pd = panel.minor_xs(stock)
stock_finance_pd = stock_finance_pd.reset_index() # 去掉索引,把日期索引转化为列
# 处理日期
stock_finance_pd['date'] = pd.to_datetime(stock_finance_pd['day'].values).strftime(date_format='%Y%m%d')
stock_finance_pd['date'] = stock_finance_pd['date'].astype(int)
# 处理代码格式
stock_finance_pd['code'] = stock.split('.')[0]
stock_finance_pd['code'] = stock_finance_pd['code'].astype(int)
stock_finance_pd = stock_finance_pd[['code', 'date', 'circulating_cap', 'market_cap', 'turnover_ratio']]
# 合并股票基础数据和财务数据==========================================================================
stock_data = pd.merge(stock_pd, stock_finance_pd, on=['code', 'date'])
stock_data = stock_data[['code', 'date', 'open', 'high', 'low', 'close', 'avg', 'pre_close',
'pct_change','volume','money', 'turnover_ratio','high_limit','low_limit',
'volume_fq', 'circulating_cap','market_cap','factor', 'paused']]
save_path = stock_data_day_file + stock + '.csv'
stock_data.to_csv(save_path, index=False)
# 返回股票的复权因子,用来处理30分钟的成交量复权问题
stock_factor = stock_data[['code','date','factor']]
return save_path,stock_factor
# 获取30分钟基本数据
def get_m30_data(stock,stock_factor,start_date,end_date):
stock_m30_pd = JQ.get_price(security=stock, start_date=start_date, end_date=end_date+' 23:59:59', frequency='30m',
fields=['open', 'high', 'low', 'close', 'volume', 'money'], fq='post')
stock_m30_pd = stock_m30_pd.reset_index() # 去掉索引,把日期索引转化为列
# 处理日期格式
stock_m30_pd['date'] = pd.to_datetime(stock_m30_pd['index'].values).strftime(date_format='%Y%m%d')
stock_m30_pd['date'] = stock_m30_pd['date'].astype(int)
# 处理时间格式 原时间为10:00-15:00 处理为9:30-14:30
stock_m30_pd['time'] = (pd.to_datetime(stock_m30_pd['index'].values) - pd.Timedelta(minutes=30)).strftime(date_format='%H%M')
stock_m30_pd['time'] = stock_m30_pd['time'].astype(int)
# 处理代码格式
stock_m30_pd['code'] = stock.split('.')[0]
stock_m30_pd['code'] = stock_m30_pd['code'].astype(int)
stock_m30_pd = stock_m30_pd[['code', 'date', 'time', 'open', 'high', 'low', 'close', 'volume', 'money']]
# 处理成交量复权问题
stock_m30_data = pd.merge(stock_m30_pd,stock_factor, on=['code','date'])
stock_m30_data['volume'] = stock_m30_data['volume'] * stock_m30_data['factor'] / 100 # /100 成交量股转为手
# 成交额单位转换 元转换为千元 money
stock_m30_data['money'] = stock_m30_data['money'] / 1000
save_path = stock_data_m30_file + stock + '_m30.csv'
stock_m30_data.to_csv(save_path,index=False)
return save_path
def query_spare():
# 判断当日查询条数余额
spare = JQ.get_query_count()['spare']
if spare < 50000:
print('spare',spare)
sys.exit()
return spare
def main(start_date,end_date):
JQ.auth(username='1300000000', password=‘000000')
# 获取数据已经下载完成的股票代码
stocks_download_list = []
for name in os.listdir(stock_data_day_file):
if name[-4:] == '.csv':
stocks_download_list.append(str(name[:-4]))
# 获取所有股票代码
stocks_all_list = list(JQ.get_all_securities(['stock']).index)
# stocks_all_list = ['600631.XSHG']
# 去掉已经下载完成的股票代码
stocks_list = list(set(stocks_all_list).difference(set(stocks_download_list)))
nums = 1
for stock in stocks_list:
spare = query_spare()
day_save_path, stock_factor = get_day_data(stock,start_date,end_date)
if stock_factor.shape[0] == 0:
print(stock,' data error...')
continue
m30_save_path = get_m30_data(stock,stock_factor,start_date,end_date)
print(nums,len(stocks_list),day_save_path,m30_save_path,spare)
stocks_download_list.append(stock)
nums += 1
if __name__ == '__main__':
import os,sys,json
end_date = sys.argv[1] # format : %Y-%m-%d
# end_date = '2018-12-28'
start_date = '2010-01-01'
main(start_date,end_date)