想用python模拟浏览器访问web的方法测试些东西,有哪几种方法呢?
一类:单纯的访问web,不解析其js,css等。
1. urllib2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
|
#-*- coding:utf-8 -* import
urllib2
def Furllib2(ip,port,url,timeout):
proxydict =
{}
print
proxydict
proxy_handler =
urllib2.ProxyHandler(proxydict)
opener =
urllib2.build_opener(proxy_handler)
opener.addheaders =
[( ‘User-agent‘ , ‘Mozilla/5.0‘ )]
urllib2.install_opener(opener)
try :
response =
urllib2.urlopen(url,timeout = timeout)
print
response.geturl()
print
response.getcode()
print
response.info()
print
response.read()
return
True
except :
print
‘some errors occored‘ +
‘-‘ * 50
return
0
def
main():
proxyip =
‘14.18.16.69‘
proxyport =
‘80‘
timeout =
4
print
Furllib2(proxyip,proxyport,url,timeout)
if __name__ = =
"__main__" :
main()
|
2. mechanize(与网站的自动化交互)
http://wwwsearch.sourceforge.net/mechanize/doc.html
1
2
3
4
5
6
7
8
9
10
11
12
|
def Fmechanize(url):
cookies =
mechanize.CookieJar()
opener =
mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
try :
r =
opener. open (url) # GET
# r = opener.open("http://example.com/", data) # POST
print
r.geturl()
print
r.info()
return
True
except :
return
0
|
二类:模拟浏览器,使用firefox等的浏览器引擎,支持js,css等。
1. selenium 的firefox或者chrome等驱动,但是由于要打开一个浏览器,所以会比较慢(浏览器驱动可以到selenium官网上下载,也可以到firefox插件出搜索)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
def Fselenium_firefox(ip,port,url,timeout):
try :
profile =
webdriver.FirefoxProfile()
profile.set_preference( ‘network.proxy.type‘ , 1 )
profile.set_preference( ‘network.proxy.http‘ ,ip)
profile.set_preference( ‘network.proxy.http_port‘ , port)
profile.update_preferences()
driver =
webdriver.Firefox(profile,timeout =
timeout)
except
Exception:
print
traceback.print_exc()
return
0
pass
try :
driver.get(url)
time.sleep( 5 )
cookies =
driver.get_cookies()
print
cookies
# driver.get()
driver.quit()
return
1
except
Exception:
traceback.print_exc()
# print ‘not have Union allianceid‘
driver.quit()
return
0
|
2. selenium :headless test使用selenium+ phantomjs驱动,无需打开浏览器,但是支持js的模拟浏览器动作,也就说说和你手工打开是没有区别的。
http://selenium.googlecode.com/git/docs/api/py/api.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
def Fselenium_phantomjs(ip,port,url,timeout):
try :
proxyip =
‘%s%s%s%s‘ % ( ‘--proxy=‘ ,ip, ‘:‘ ,port)
proxyport =
‘--proxy-type=http‘
service_args =
[]
service_args.append(proxyip)
service_args.append(proxyport)
print
service_args
driver =
webdriver.PhantomJS(service_args =
service_args)
driver.set_page_load_timeout(timeout)
driver.get(url)
time.sleep( 4 )
except
Exception:
traceback.print_exc()
try :
geturl =
driver.current_url
print
driver.current_url
return
True
except
Exception:
traceback.print_exc()
geturl =
None
return
0
|
3. qt,网上戗来的代码
http://qt-project.org/wiki/PySide#PySide.QtWebKit.PySide.QtWebKit.QWebView.url
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
|
from PyQt4 import
QtCore, QtGui, QtWebKit, QtNetwork
class
cookieJar(QtNetwork.QNetworkCookieJar):
def
__init__( self , cookiesKey, parent = None ):
super (cookieJar, self ).__init__(parent)
self .mainWindow =
parent
self .cookiesKey =
cookiesKey
cookiesValue =
self .mainWindow.settings.value( self .cookiesKey)
if
cookiesValue:
cookiesList =
QtNetwork.QNetworkCookie.parseCookies(cookiesValue)
self .setAllCookies(cookiesList)
# def setCookiesFromUrl (self, cookieList, url):
# cookiesValue = self.mainWindow.settings.value(self.cookiesKey)
# cookiesArray = cookiesValue if cookiesValue else QtCore.QByteArray()
# for cookie in cookieList:
# cookiesArray.append(cookie.toRawForm() + "\n")
#self.mainWindow.settings.setValue(self.cookiesKey, cookiesArray)
#return super(cookieJar, self).setCookiesFromUrl(cookieList, url)
def
deleteCookie( self ,cookieList):
cookie =
[]
self .mainWindow.settings.value(cookie)
class
webView(QtWebKit.QWebView):
def
__init__( self , cookiesKey, url, parent = None ):
super (webView, self ).__init__(parent)
self .cookieJar =
cookieJar(cookiesKey, parent)
self .page().networkAccessManager().setCookieJar( self .cookieJar)
class
myWindow(QtGui.QMainWindow):
def
__init__( self , parent = None ):
super (myWindow, self ).__init__(parent)
self .cookiesKey =
"cookies"
self .centralwidget =
QtGui.QWidget( self )
self .tabWidget =
QtGui.QTabWidget( self .centralwidget)
self .tabWidget.setTabsClosable( True )
self .verticalLayout =
QtGui.QVBoxLayout( self .centralwidget)
self .verticalLayout.addWidget( self .tabWidget)
self .actionTabAdd =
QtGui.QAction( self )
self .actionTabAdd.setText( "Add Tab" )
self .actionTabAdd.triggered.connect( self .on_actionTabAdd_triggered)
self .lineEdit =
QtGui.QLineEdit( self )
self .toolBar =
QtGui.QToolBar( self )
self .toolBar.addAction( self .actionTabAdd)
self .toolBar.addWidget( self .lineEdit)
self .addToolBar(QtCore.Qt.ToolBarArea(QtCore.Qt.TopToolBarArea), self .toolBar)
self .setCentralWidget( self .tabWidget)
self .settings =
QtCore.QSettings()
@QtCore .pyqtSlot()
def
on_actionShowCookies_triggered( self ):
webView =
self .tabWidget.currentWidget()
listCookies =
webView.page().networkAccessManager().cookieJar().allCookies()
for
cookie in
listCookies:
print
cookie.toRawForm()
@QtCore .pyqtSlot()
def
on_actionTabAdd_triggered( self ):
url =
self .lineEdit.text()
self .addNewTab(url if
url else
‘about:blank‘ )
def
addNewTab( self , url):
tabName =
u "Tab {0}" . format ( str ( self .tabWidget.count()))
tabWidget =
webView( self .cookiesKey, url, self )
tabWidget.loadFinished.connect( self .on_tabWidget_loadFinished)
tabWidget.load(QtCore.QUrl(url))
tabIndex =
self .tabWidget.addTab(tabWidget, tabName)
self .tabWidget.setCurrentIndex(tabIndex)
@QtCore .pyqtSlot()
def
on_tabWidget_loadFinished( self ):
cookies2 =
self .settings.value( self .cookiesKey)
if __name__ = =
"__main__" :
import
sys
app =
QtGui.QApplication(sys.argv)
app.setApplicationName( ‘myWindow‘ )
main =
myWindow()
main.resize( 666 , 333 )
main.show()
sys.exit(app.exec_())
|
4. qt-headless
http://qt-project.org/wiki/PySide#PySide.QtWebKit.PySide.QtWebKit.QWebView.url
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
|
import
sys
from PyQt4.QtGui import
* from PyQt4.QtCore import
* from PyQt4.QtWebKit import
* class
Render(QWebPage):
def
__init__( self , url):
self .app =
QApplication(sys.argv)
QWebPage.__init__( self )
self .loadFinished.connect( self ._loadFinished)
self .mainFrame().load(QUrl(url))
self .app.exec_()
def
_loadFinished( self , result):
self .frame =
self .mainFrame()
self .app.quit()
r =
Render(url)
html =
r.frame.toHtml()
print
html
|
5. splinter :打开浏览器,模拟操作,python的
http://splinter.cobrateam.info/docs/tutorial.html
1
2
3
4
|
>>> from
splinter import
Browser
>>> browser =
Browser()
>>> browser.visit(url) |
具体用哪个要看你有什么具体的需求了