材料:
豆瓣电影排行榜
- 抓包工具获取url,知道方法为get,爬取时需要参数
- 返回的数据为json
#!/usr/bin/python3
import requests
import json
#url需要的参数为param
url = 'https://movie.douban.com/j/chart/top_list'
param = {
'type': '24',
'interval_id': '100:90',
'action':'' ,
'start': '0',#从库中的起始位置
'limit': '20',#获取的个数
}
#UA伪装
header = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36'
}
response = requests.get(url = url,headers = header,params = param)
dic_obj = response.json()
fp = open('douban.json','w',encoding = 'utf-8')
json.dump(dic_obj,fp = fp, ensure_ascii = False)
print('over!!!!')