参考链接:https://blog.csdn.net/jj8999999/article/details/108845791
import requests
import pandas as pd
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36",
}
datas = []
# 每集90多分钟,所以100够了!
for i in range(100):
print(f‘\r{i}‘, end=‘‘)
# search bullet
url = f‘https://bullet-ali.hitv.com/bullet/2021/08/17/192249/13137070/{i}.json‘
r = requests.get(url, headers=headers)
if r.status_code == 200:
data = r.json()
data = data[‘data‘][‘items‘]
datas.extend(data)
else:
break
df = pd.DataFrame(datas)
df.isnull().sum()
df = df[[‘ids‘, ‘uid‘, ‘content‘, ‘time‘, ‘v2_up_count‘]].fillna(0)
df[‘时间‘] = df.time // 60000
# data analysis
df.info()