1、运用
#!/usr/bin/python #encoding=utf-8 import sys from selenium import webdriver reload(sys) sys.setdefaultencoding( "utf-8" ) driver = webdriver.PhantomJS(executable_path='/home/lhy/phantomjs-1.9.8-linux-x86_64/bin/phantomjs') driver.get("http://item.jd.com/2914823.html") #driver.find_element_by_id('search_form_input_homepage').send_keys("Nirvana") #driver.find_element_by_id("search_button_homepage").click() print driver.page_source fo = open("aaaa1.txt", "wb") fo.write(driver.page_source) fo.close() driver.quit()
2、抓取下拉加载的页面
#coding=utf-8 import requests import re import time from pyquery import PyQuery as pq from lxml import etree from bs4 import BeautifulSoup import sys from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities reload(sys) sys.setdefaultencoding("utf-8") urls=[] def getHtml2(url): user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; windows NT)' headers = {'User-Agent': user_agent} r = requests.post(url, headers=headers) fo = open("phonesinfo1.txt", "wb") fo.write(r.content) fo.close() #print r.content return r.content def getHtml(url): dcap = dict(DesiredCapabilities.PHANTOMJS) dcap["phantomjs.page.settings.userAgent"] = ( "Mozilla/4.0 (compatible; MSIE 5.5; windows NT)" ) driver = webdriver.PhantomJS(desired_capabilities=dcap) #driver = webdriver.PhantomJS(executable_path='/home/lhy/phantomjs-1.9.8-linux-x86_64/bin/phantomjs') #driver=webdriver.Chrome() driver.get(url) <span style="color:#FF0000;"> js="document.body.scrollTop=1000"#滚动条下拉1000px driver.execute_script(js)</span> driver.implicitly_wait(30) #time.sleep(5) #fo = open("phonesinfo2.txt", "wb") #fo.write(driver.page_source) #fo.close() html=driver.page_source driver.quit() return html def getPqHtml(html): pqHtml = pq(html) return pqHtml def getUrlsFromFile(fileUrl): with open('phoneurl.txt', 'r') as f: lines = f.readlines() for line in lines: url_one = line.strip() print url_one urls.append(url_one) url="http://localhost:8080/pro/html.html" text=getHtml(url) fo = open("taobao2.txt", "wb") fo.write(text) fo.close() print text
html 页面
<html> <head> </head> <body style="height:5000px"> <div id="top_div" style="display:none">ffffffffffffffffffffff</div> <script> //document.body.scrollTop=10000; window.onscroll = function(){ var t = document.documentElement.scrollTop || document.body.scrollTop; var top_div = document.getElementById( "top_div" ); if( t >= 300 ) { // alert(t); top_div.style.display = "block"; } // else { top_div.style.display = "none";} } </script> </body> </html>
3、模拟登陆
# coding = utf-8 from selenium import webdriver browser = webdriver.Firefox() <span style="color:#FF0000;">browser.get("http://localhost:8080/pro") browser.find_element_by_name("password").clear() #先清除文本框上密码 browser.find_element_by_name("username").send_keys("test") #设置值 browser.find_element_by_name("password").send_keys("123") #设置值 yzm=browser.find_element_by_class_name("yzm-img").find_element_by_tag_name("span").text #获取验证码值 yzm=yzm.replace(' ','') #清除空格 browser.find_element_by_class_name("yzm-sr").send_keys(yzm) #设置验证码 browser.find_element_by_id("tijiao").click() #点击按钮 提交表单 print browser.current_url browser.get("http://localhost:8080/pro/test.jsp")#模拟登陆成功后会自动把cookie保存在对象中,对需认证页面可直接访问</span> print browser.page_source #browser.quit()
4、百度搜索
# coding = utf-8 from selenium import webdriver browser = webdriver.Firefox() browser.get("http://www.baidu.com") browser.find_element_by_id("kw").clear() browser.find_element_by_id("kw").send_keys("selenium") browser.find_element_by_id("su").click() print browser.current_url #点击成功后调转页面的url #browser.quit()