2021-10-15

import urllib3
import json

def write_to_file(filename, html):
f=open(filename,‘w’,encoding=‘utf-8’);
f.write(html);
f.close;

def load_page(filename,res):
html = res.data.decode(‘utf-8’)
print(html)
write_to_file(filename, html)

def sent_url():
urlstr=“http://www.baidu.com”

httpMgr=urllib3.PoolManager();
res=httpMgr.request("GET", urlstr)

print("status:%d" % res.status)
# print(res.data)
load_page('send_url.html', res)

def send_url_with_headers():
urlstr = “http://www.baidu.com”
headers={
“x-something”:“value”
}
httpMgr = urllib3.PoolManager();
res=httpMgr.request(“GET”, urlstr, headers=headers)
print(“status:%d” % res.status)
# print(res.data)
load_page(‘send_url_with_headers.html’, res)

def send_get_with_param001():
urlstr=“http://httpbin.org/get”
param={
‘arg1’:‘value1’,
‘arg2’:‘value2’
}
httpMgr = urllib3.PoolManager();
res = httpMgr.request(“GET”, urlstr, fields=param)
load_page(“send_get_with_param001.html”, res)

def send_get_with_param002():
urlstr=“http://cn.bing.com/search”
word = {“q”:“Python网络爬虫”}
headers = {
‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36’
}
httpMgr = urllib3.PoolManager();
res = httpMgr.request(“GET”, urlstr, headers=headers, fields=word)
load_page(“send_get_with_param002.html”, res)

def send_post_with_field():
urlstr = “http://httpbin.org/post”
param = {
‘arg1’: ‘value1’,
‘arg2’: ‘value2’
}
httpMgr = urllib3.PoolManager();
res = httpMgr.request(“POST”, urlstr, fields=param)
print(“status:%d” % res.status)
# print(res.data)
load_page(‘send_post_with_field.html’, res)

def send_post_with_body():
urlstr = “http://httpbin.org/post”
body = {
‘arg1’: ‘value1’,
‘arg2’: ‘value2’
}
encode_data=json.dumps(body).encode(‘utf-8’)
httpMgr = urllib3.PoolManager();
res=httpMgr.request(“POST”,urlstr,body=encode_data)
load_page(‘send_post_with_body.html’,res)

def send_post_with_file():
with open(‘example.txt’) as fp:
file_data=fp.read()

urlstr = "http://httpbin.org/post"
httpMgr = urllib3.PoolManager();
res = httpMgr.request("POST", urlstr,
                      fields={
                         'filefields':('example.txt', file_data, 'text/plain')
                      })
load_page('send_post_with_file_html', res)

def send_url_with_proxy():
try:
urlstr = “http://httpbin.org/ip”
httpMgr = urllib3.PoolManager();
res001= httpMgr.request(“GET”, urlstr)
load_page(‘send_url_with_proxy1_html’, res001)
proxy_httpMgr=urllib3.ProxyManager(‘http://50.233.137.33:80’,
headers={
‘connection’:‘keep-alive’
})
res=proxy_httpMgr.request(“GET”,urlstr,timeout=4.0)
load_page(‘send_url_with_proxy2_html’, res)

except urllib3.exceptions.MaxRetryError as e:
    print(e)

def send_url_with_timeout001():
urlstr=“http://httpbin.org/delay/3”
httpMgr=urllib3.PoolManager()
res=httpMgr.request(“GET”,urlstr,timeout=4.0)
print(“status:%d” % res.status)
load_page(‘send_url_with_timeout001.html’,res)

def send_url_with_timeout002():
urlstr=“http://httpbin.org/delay/3”
httpMgr=urllib3.PoolManager()
res=httpMgr.request(“GET”,urlstr,timeout=urllib3.Timeout(connect=1.0,read=2.0))
print(“status:%d” % res.status)
load_page(‘send_url_with_timeout001.html’,res)

def send_url_with_timeout003():
try:
urlstr=“http://httpbin.org/delay/3”
httpMgr=urllib3.PoolManager(timeout=urllib3.Timeout(connect=1.0,read=2.0))
res=httpMgr.request(“GET”,urlstr)
print(“status:%d” % res.status)
load_page(‘send_url_with_timeout001.html’,res)

except urllib3.exceptions.ReadTimeoutError as e:
    print(e)
except urllib3.exceptions.MaxRetryError as e:
    print(e)

def send_url_with_exception():
try:
urlstr = “http://www.xiangbaoerzi.com”
httpMgr = urllib3.PoolManager(timeout=urllib3.Timeout(connect=1.0, read=4.0))
res = httpMgr.request(“GET”, urlstr,retries=False)
print(“status:%d” % res.status)
load_page(‘send_url_with_timeout001.html’, res)
except urllib3.exceptions.HTTPError as e:
print(e)
except urllib3.exceptions.MaxRetryError as e:
print(e)
except urllib3.exceptions.NewConnectionError as e:
print(e)
if name==‘main’:
# sent_url()
# send_url_with_headers()
# send_get_with_param001()
# send_get_with_param002()
# send_post_with_field()
# send_post_with_body()
send_post_with_file()
send_url_with_proxy()
send_url_with_timeout001()
send_get_with_param002()
send_url_with_timeout003()
send_url_with_exception()

上一篇:urllib3


下一篇:2021-10-15