爬取超级鹰当前用户的个人详情页数据

2023-11-22 19:20:10
 1 from Codeclass1 import Chaojiying_Client
 2 from lxml import etree
 3 import requests
 4 
 5 # session = requests.Session()
 6 
 7 # 将验证码图片下载到本地
 8 headers = {
 9     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36'
10 }
11 url = "https://www.chaojiying.com/user/login/"
12 page_text = requests.get(url=url, headers=headers).text
13 
14 # 解析验证码图片img中src的属性值
15 tree = etree.HTML(page_text)
16 code_img_src = 'https://www.chaojiying.com' + tree.xpath('/html/body/div[3]/div/div[3]/div[1]/form/div/img/@src')[0]
17 img_data = requests.get(url=code_img_src, headers=headers).content
18 # 将验证码图片保存到了本地
19 with open('./a.jpg', 'wb') as fp:
20     fp.write(img_data)
21 
22 #调用打码平台的示例程序进行验证码图片数据识别
23 chaojiying = Chaojiying_Client('1294541754', 'liusi0719', '920917')
24 im = open('./a.jpg', 'rb').read()
25 code_text = chaojiying.PostPic(im, 8001)['pic_str']
26 print(code_text)
27 
28 login_url = "https://www.chaojiying.com/user/login/"
29 data = {
30     "user": "1294541754",
31     "pass": "liusi0719",
32     "imgtxt": code_text,
33     "act": "1"
34 }
35 response = requests.post(url=login_url,headers=headers,data=data)
36 print(response.status_code)
37 
38 detail_url = "https://www.chaojiying.com/user/collect/day/"
39 headers = {
40     'Cookie':'__51cke__=; PHPSESSID=c27lmpe026ksc3qnk0fc289q2b; __tins__16851773=%7B%22sid%22%3A%201628657273012%2C%20%22vd%22%3A%201%2C%20%22expires%22%3A%201628659073012%7D; __51laig__=24'
41 }
42 detail_page_text = requests.get(url=detail_url,headers=headers).text
43 with open('chaojiying.html','w',encoding='utf-8') as fp:
44     fp.write(detail_page_text)
码农公寓

相关文章