爬虫-网易云音乐

import requests
from lxml import etree

class WangyiMusic:
    def __init__(self,url):
        self.tree = self.request_html(url)
        self.parse_html()
    def request_html(self,url):
        headers = {
            'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36'
        }
        response = requests.get(url=url, headers=headers).text
        tree = etree.HTML(response)
        return tree

    def parse_html(self):
        singer_list = self.tree.xpath('//div[@id="singer-cat-nav"]/div')
        for singer in singer_list:
            name_list = singer.xpath('.//a/text()')
            href_list = singer.xpath('.//a/@href')
            # print(href_list)
            base_url = 'https://music.163.com'
            for i, j in zip(name_list, href_list):
                name = i
                href = j
                print('===================={}======================='.format(name))
                singer_url = base_url + href
                # response2 = requests.get(url=singer_url, headers=headers).text
                # 根据歌手分类进行第二次请求
                tree2 = self.request_html(singer_url)
                # with open('huayunan.html','w',encoding='utf-8') as f:
                #     f.write(response2)
                letter_list = tree2.xpath('//ul[@id="initial-selector"]/li[position()>1]/a/@href')
                for letter in letter_list:
                    letter_url = base_url + letter
                    # print(letter_url)
                    # response3 = requests.get(url=letter_url, headers=headers).text
                    # 根据歌手首字母分类进行第三次请求
                    tree3 = self.request_html(letter_url)
                    singer_name = tree3.xpath('//ul[@id="m-artist-box"]//a[@class="nm nm-icn f-thide s-fc0"]/text()')
                    print(singer_name)

if __name__ == '__main__':

    url = 'https://music.163.com/discover/artist/'
    WangyiMusic(url)




上一篇:423. 从英文中重建数字


下一篇:用Python新建用户并产生随机密码