import requests
from lxml import etree
class WangyiMusic:
def __init__(self,url):
self.tree = self.request_html(url)
self.parse_html()
def request_html(self,url):
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36'
}
response = requests.get(url=url, headers=headers).text
tree = etree.HTML(response)
return tree
def parse_html(self):
singer_list = self.tree.xpath('//div[@id="singer-cat-nav"]/div')
for singer in singer_list:
name_list = singer.xpath('.//a/text()')
href_list = singer.xpath('.//a/@href')
# print(href_list)
base_url = 'https://music.163.com'
for i, j in zip(name_list, href_list):
name = i
href = j
print('===================={}======================='.format(name))
singer_url = base_url + href
# response2 = requests.get(url=singer_url, headers=headers).text
# 根据歌手分类进行第二次请求
tree2 = self.request_html(singer_url)
# with open('huayunan.html','w',encoding='utf-8') as f:
# f.write(response2)
letter_list = tree2.xpath('//ul[@id="initial-selector"]/li[position()>1]/a/@href')
for letter in letter_list:
letter_url = base_url + letter
# print(letter_url)
# response3 = requests.get(url=letter_url, headers=headers).text
# 根据歌手首字母分类进行第三次请求
tree3 = self.request_html(letter_url)
singer_name = tree3.xpath('//ul[@id="m-artist-box"]//a[@class="nm nm-icn f-thide s-fc0"]/text()')
print(singer_name)
if __name__ == '__main__':
url = 'https://music.163.com/discover/artist/'
WangyiMusic(url)