爬取酷狗音乐Top500榜单

import requests
from bs4 import BeautifulSoup
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'
}
#需求:爬取酷狗TOP500
#需要的字段 排名、歌名、歌曲时长、歌手
def get_info(url):
resp = requests.post(url,headers=headers)
content = resp.content.decode('utf-8')
soup = BeautifulSoup(content,'lxml')
rankings = soup.select('span.pc_temp_num') #获取排名
songs = soup.select('div.pc_temp_songlist > ul > li> a') #获取歌名和歌手
times = soup.select('span.pc_temp_tips_r > span') #获取时间
for rank, song, time in zip(rankings, songs, times):
data = {
'排名' : rank.get_text().strip(),
'歌手' : song.get_text().split('-')[0],
'歌名' : song.get_text().split('-')[1],
'时长' : time.get_text().strip()
}
print(data)
if __name__ == '__main__':
urls = ['https://www.kugou.com/yy/rank/home/{}-8888.html?from=rank'.format(str(num)) for num in range(1,24)]
for url in urls:
get_info(url)
上一篇:[EntLib]微软企业库5.0 学习之路——第五步、介绍EntLib.Validation模块信息、验证器的实现层级及内置的各种验证器的使用方法——下篇


下一篇:Page Rank 算法