1 import requests 2 import json 3 from bs4 import BeautifulSoup #网页解析获取数据 4 import sys 5 import re 6 import urllib.request,urllib.error #制定url,获取网页数据 7 import sqlite3 8 import xlwt #excel操作 9 10 def get_ten(): 11 url="https://v.qq.com/channel/movie?_all=1&channel=movie&listpage=1&sort=18" 12 headers={ 13 'user-agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '+ 14 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36' 15 } 16 # res = urllib.request.urlopen(url) 17 res = urllib.request.Request(url=url,headers=headers) #编辑request请求 18 response=urllib.request.urlopen(res).read().decode() #读取 19 html=BeautifulSoup(response,"html.parser") #解析 20 # 21 # list=html.select(".figure_score") 22 # for item in list: 23 # print(item) 24 dataRes=[] 25 findLink=re.compile(r'href="(.*?)"') #链接 26 findName=re.compile(r'title="(.*?)"') #影片名 27 soup=html.find_all(r"a",class_="figure") 28 for i in soup: 29 # print(i) 30 words=str(i) 31 dataRes.append(re.findall(findLink,words)) #添加链接 32 dataRes.append(re.findall(findName,words)) #添加影片名 33 for i in dataRes: 34 print(i) 35 # print(html) 36 # print(html.head.contents) #输出tag的所有子节点(list) 37 # print(response) 38 return res 39 if __name__ == '__main__': 40 get_ten()