爬虫学习06用selenium爬取空间

用selenium爬取空间
from selenium import webdriver
from lxml import etree
import time
pro = webdriver.Chrome(executable_path=r'C:\Users\古月蜀黍\Desktop\chromedriver_win32\chromedriver.exe')
pro.get(url='https://i.qq.com/?s_url=http%3A%2F%2Fuser.qzone.qq.com%2F1355144989%2Finfocenter')
# 获取iframe标签
pro.switch_to.frame('login_frame')
my_button = pro.find_element_by_id('switcher_plogin')
my_button.click()
# 输入账号密码
username = pro.find_element_by_id('u')
username.send_keys('1355144989')
password = pro.find_element_by_id('p')
password.send_keys('liqian521.1314')
login = pro.find_element_by_id('login_button')
login.click()
time.sleep(2) js = 'window.scrollTo(0, document.body.scrollHeight)'
pro.execute_script(js)
time.sleep(2)
pro.execute_script(js)
time.sleep(2)
pro.execute_script(js)
time.sleep(2)
pro.execute_script(js)
time.sleep(2)
pro.execute_script(js)
time.sleep(2)
# 获取当前显示页面的源数据
page_text = pro.page_source
tree = etree.HTML(page_text) text = tree.xpath('//div[@class="f-info"]//text()') print(text)
pro.quit() *面浏览器PhantomJS
from selenium import webdriver
import time
pro = webdriver.PhantomJS(executable_path=r'C:\Users\古月蜀黍\Desktop\文件汇总\爬虫\phantomjs\bin\phantomjs.exe')
pro.get(url = 'https://www.baidu.com')
# 根据find系列的函数定位到指定标签
my_input = pro.find_element_by_id('kw')
# 向获取的标签中输入数据
time.sleep(2)
my_input.send_keys('胡涛')
pro.save_screenshot('./1.jpg')
my_button = pro.find_element_by_id('su')
# 给标签绑定点击事件
time.sleep(2)
my_button.click()
# 获取当前显示页面的源码
time.sleep(2)
pro.save_screenshot('./2.jpg')
page_text = pro.page_source
print(page_text) # 退出页面
pro.quit() 谷歌*面浏览器的配置
# *面浏览器的配置
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu') from selenium import webdriver
import time
pro = webdriver.Chrome(executable_path=r'C:\Users\古月蜀黍\Desktop\chromedriver_win32\chromedriver.exe',chrome_options=chrome_options)
pro.get('https://www.baidu.com')
# 根据find系列的函数定位到指定标签
my_input = pro.find_element_by_id('kw')
# 向获取的标签中输入数据
time.sleep(2)
my_input.send_keys('胡涛')
pro.save_screenshot('./111.png')
my_button = pro.find_element_by_id('su')
# 给标签绑定点击事件
time.sleep(2)
my_button.click()
# 获取当前显示页面的源码
time.sleep(2)
pro.save_screenshot('./222.png')
page_text = pro.page_source
print(page_text) # 退出页面
pro.quit()

  

上一篇:飞桨学院-Python从小白逆袭大神-Day3-《青春有你2》选手数据分析


下一篇:Design Pattern - Chain of Responsibility(Java)