1、requests方式
(1)无头部信息
import requests url = "https://www.cnblogs.com/dearvee/p/6558571.html" response = requests.get(url) response.encoding = 'utf-8' print(response.text)
(2)有头部信息
import requests url = "https://www.cnblogs.com/dearvee/p/6558571.html" headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"} response = requests.get(url, headers=headers) response.encoding = 'utf-8' print(response.text)
2、urllib.request方式
(1)无Request请求
from urllib import request url = "https://www.cnblogs.com/dearvee/p/6558571.html" response = request.urlopen(url) print(response.read().decode('utf-8'))
(2)构造Request请求
from urllib import request url = "https://www.cnblogs.com/dearvee/p/6558571.html" headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"} req = request.Request(url, headers=headers) response = request.urlopen(req) print(response.read().decode('utf-8'))
3、捕获错误信息
from urllib import request, error url = "https://www.douban.com" try: req = request.Request(url) response = request.urlopen(req) print(response.read().decode('utf-8')) except error.HTTPError as e: print(e)
4、随机获取头部信息
from fake_useragent import UserAgent ua = UserAgent() print(ua.ie) #随机打印ie浏览器任意版本 print(ua.firefox) #随机打印firefox浏览器任意版本 print(ua.chrome) #随机打印chrome浏览器任意版本 print(ua.random) #随机打印任意厂家的浏览器