一个简单的爬虫 头部构造

# -*- coding:utf-8 -*-
import urllib.request
import random
ua_headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0',
}
head_list=[
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv2.0.1) Gecko/20100101 Firefox/4.0.1",
        "Mozilla/5.0 (Windows NT 6.1; rv2.0.1) Gecko/20100101 Firefox/4.0.1",
        "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",
        "Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11"
]
user_agent=random.choice(head_list)


url = r'https://www.baidu.com//'
request=urllib.request.Request(url)
request.add_header('User-Agent',user_agent)

print(request.get_header('User-agent'))

print(request.headers,request.type,request.data)

response=urllib.request.urlopen(request)

html=response.read()
print(html.decode('gb2312'))

 

上一篇:创建UA池和代理IP列表


下一篇:常见浏览器