文章目录
使用Post方式爬虫
注意如果Request Payload 是json格式
则请求头需要设置:'Content-Type' : 'application/json'
常用的方法是:如果是要字典(dict)转换为json的话。
调用json.dumps 方法即可。
然后如果要String 转换为json 就用json.loads 即可。
一个实例:
爬虫爬取leetcode的每日一题。
这里使用直接调用的leetcode获取数据的接口,因为leetcode显示题目的逻辑是通过XHR展示,所以找到对应的请求,然后直接模拟获取数据即可。
比较显然。
def question_per_day_by_leetcode():
url = 'https://leetcode-cn.com/graphql/'
headers = {
'user-agent': f.user_agent(),
'referer': 'https://leetcode-cn.com/problemset/all/',
'Content-Type': 'application/json'
}
data = {
"query": "\n query questionOfToday {\n todayRecord {\n date\n userStatus\n question {\n "
"questionId\n frontendQuestionId: questionFrontendId\n difficulty\n title\n "
"titleCn: translatedTitle\n titleSlug\n paidOnly: isPaidOnly\n freqBar\n "
"isFavor\n acRate\n status\n solutionNum\n hasVideoSolution\n topicTags "
"{\n "
" name\n nameTranslated: translatedName\n id\n }\n extra {\n "
"topCompanyTags {\n imgUrl\n slug\n numSubscribed\n }\n "
"}\n "
" }\n lastSubmission {\n id\n }\n }\n}\n ",
"variables": {}, "operationName": "questionOfToday"}
data = json.dumps(data)
r = requests.post(url, headers=headers, data=data)
r.encoding = r.apparent_encoding
c = r.text
r.close() # https://leetcode-cn.com/problems/find-peak-element/
j = json.loads(c)
date = j['data']['todayRecord'][0]['date']
question = j['data']['todayRecord'][0]['question']
question_url = 'https://leetcode-cn.com/problems/' + question['titleSlug']
title_cn = question['titleCn']
topicTags = question['topicTags']
difficulty = question['difficulty']
acRate = round(question['acRate'], 2)
solutionNum = question['solutionNum'] # 题解个数
tag_lists = []
for i in topicTags:
tag_lists.append(i['nameTranslated'])
tag_lists = ','.join(tag_lists)
print(tag_lists)
msg = '每日一题 ({})\n\n标题: {}\n难度: {}\n通过率: {}\n题解个数: {}\n标签: {}\n链接: {}\n' \
.format(date, title_cn, difficulty, acRate, solutionNum, tag_lists, question_url)
return msg