验证码识别
反爬机制:验证码,识别验证码图片中的数据,用于模拟登录操作
验证码识别的操作:推荐第三方自动识别——超级鹰等
实战一:古诗文网登录页面中的验证码。
使用打码平台识别验证码的编码流程:
-将验证码图片进行本地下载
-调用平台提供的示例代码进行图片数据识别
1 import requests 2 from hashlib import md5 3 from lxml import etree 4 class Chaojiying_Client(object): 5 6 def __init__(self, username, password, soft_id): 7 self.username = username 8 password = password.encode('utf8') 9 self.password = md5(password).hexdigest() 10 self.soft_id = soft_id 11 self.base_params = { 12 'user': self.username, 13 'pass2': self.password, 14 'softid': self.soft_id, 15 } 16 self.headers = { 17 'Connection': 'Keep-Alive', 18 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)', 19 } 20 21 def PostPic(self, im, codetype): 22 """ 23 im: 图片字节 24 codetype: 题目类型 参考 http://www.chaojiying.com/price.html 25 """ 26 params = { 27 'codetype': codetype, 28 } 29 params.update(self.base_params) 30 files = {'userfile': ('ccc.jpg', im)} 31 r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers) 32 return r.json() 33 34 def ReportError(self, im_id): 35 """ 36 im_id:报错题目的图片ID 37 """ 38 params = { 39 'id': im_id, 40 } 41 params.update(self.base_params) 42 r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers) 43 return r.json() 44 45 def PicText(picname,chaojiying): 46 im = open(picname, 'rb').read() 47 print(chaojiying.PostPic(im, 1004)['pic_str']) 48 49 if __name__=="__main__": 50 headers = { 51 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36', 52 'Cookie': 'Hm_lvt_649f268280b553df1f778477ee743752=1613016932; key_kw=; key_cate=zuozhe; Hm_lpvt_649f268280b553df1f778477ee743752=1613016981' 53 } 54 url='https://so.gushiwen.cn/user/login.aspx?from=http://so.gushiwen.cn/user/collect.aspx' 55 #获取网页代码 56 page_text=requests.get(url=url,headers=headers).text 57 #将网页上传到xpath对象 58 tree=etree.HTML(page_text) 59 #获取登录图片并保存在本地 60 src=tree.xpath('//*[@id="imgCode"]/@src')[0] 61 code_img_src='https://so.gushiwen.cn'+src 62 #获取图片 63 img_data=requests.get(url=code_img_src,headers=headers).content 64 #本地保存验证码码图片 65 with open('古诗文验证码.jpg',"wb") as fp: 66 fp.write(img_data) 67 ''' 68 开始图片识别验证 69 ''' 70 chaojiying = Chaojiying_Client(超级鹰用户名,密码, 软件ID) 71 PicText('古诗文验证码.jpg',chaojiying)