python--爬取豆瓣电影名和评分

# 爬取豆瓣网电影排名
import re
import urllib.request
header = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36"
}
# https://movie.douban.com/j/search_subjects?type=movie&tag=%E8%B1%86%E7%93%A3%E9%AB%98%E5%88%86&sort=recommend&page_limit=20&page_start=20
# https://movie.douban.com/j/search_subjects?type=movie&tag=%E8%B1%86%E7%93%A3%E9%AB%98%E5%88%86&sort=recommend&page_limit=20&page_start=40
# https://movie.douban.com/j/search_subjects?type=movie&tag=%E8%B1%86%E7%93%A3%E9%AB%98%E5%88%86&sort=recommend&page_limit=20&page_start=60
url = "https://movie.douban.com/j/search_subjects?type=movie&tag=%E8%B1%86%E7%93%A3%E9%AB%98%E5%88%86&sort=recommend&page_limit=20&page_start=0"
req = urllib.request.Request(url, headers=header)
data = urllib.request.urlopen(req).read().decode()
# print(data)
# "rate":"8.8"  "title":"阿凡达"
pat1 = r'"rate":"(.*?)"'
pat2 = r'"title":"(.*?)"'
pattern1 = re.compile(pat1, re.I)
pattern2 = re.compile(pat2, re.I)
data1 = pattern1.findall(data)
data2 = pattern2.findall(data)
print(data1, data2)
for x in range(0, len(data1)):
    print("排名:"+str(x+1)+",电影名:"+data2[x]+",豆瓣评分:"+data1[x])


上一篇:实战SQL优化之时间从1831s到2.7s的征途


下一篇:hdu1232