网页采集器练习

 1 import requests
 2 if __name__ == "__main__":
 3     header = {
 4         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3868.400 QQBrowser/10.8.4394.400"
 5     }
 6 # UA伪装,将对应的User-Agent封装到一个字典中
 7     url = "https://www.baidu.com/s"
 8 # 指定url
 9     kw = input("请输入您要搜索的内容:")
10     param = {
11         "wd": kw
12     }
13 # 对指定的url发起的请求对应的url是携带参数的,并且请求过程中处理了参数
14     response = requests.get(url=url, params=param, headers=header)
15     page_text = response.text
16     fileName = kw+".html"
17     with open(fileName, "w", encoding="utf-8") as fp:
18         fp.write(page_text)
19     print(fileName, "保存成功!!!")

 

上一篇:业务爆发式增长,音视频服务如何做好质量监控与优化?


下一篇:编程范式的思考