''''''
'''
一、请求url
https://www.wandoujia.com/category/6001
二、请求方式
三、请求头
'''
#爬虫三部曲
#1.发送请求
import requests
def get_page(url):
reponse = requests.get(url)
#print(reponse.text)
return reponse
# 2.解析数据
'''
<a href="(.*?)" title="捕鱼大作战" class="name">(.*?)</a></h2><div class="meta"> <span class="install-count">(.*?)</span> .*?<span title="37.31MB">(.*?)</span>
</div><div class="app-desc"><h2 class="app-title-h2"><a href="https://www.wandoujia.com/apps/com.tuyoo.fish.uc" title="捕鱼大作战" class="name">捕鱼大作战</a></h2><div class="meta"> <span class="install-count">13.9万人安装</span> <span class="dot">・</span> <span title="37.31MB">37.31MB</span></div><div class="comment"> 捕鱼大作战,经典街机新体验 </div></div> <a class="tag-link" href="https://www.wandoujia.com/category/6001?pos=w/cardtag/gamecategory_com.tuyoo.fish.uc">休闲益智</a>
'''
import re
def parse_index(html):
movie_list = re.findall('<h2 class="app-title-h2"><a href="(.*?)" title="(.*?)" class="name">.*?</a>.*?<span class="install-count">(.*?)万人安装</span> <span class="dot">・</span> <span title="(.*?)MB">.*?MB</span>',html,re.S)
return movie_list
# 3.保存数据
def save_data(movie):
detail_url, app_name, download_num, app_size = movie
data = f'''
游戏名称:{app_name}
详情页url:{detail_url}
下载人数:{download_num}万人
app大小:{app_size}MB
\n
\n
'''
print(data)
with open('wandoujia.txt','a',encoding='utf-8') as f:
f.write(data)
if __name__ == '__main__':
#拼接所有主页
url=f'https://www.wandoujia.com/category/6001'
#1.往每个主页发送请求
index_res = get_page(url)
#2.解析主页获取电影信息
movie_list = parse_index(index_res.text)
for movie in movie_list:
#3.保存数据
#print(movie_list)
save_data(movie)