使用爬虫来爬取QQ音乐上的歌曲

近几天正在学习爬虫,于是做了一个这样的小东西,由于是初学者,写得不是很好,还请见凉!

以下直接上代码:

使用爬虫来爬取QQ音乐上的歌曲

这个是运行的界面,有点儿简陋,没有办法,个人技术有限。

以下是代码:

import requests
from lxml import etree
import json
import time
import random
import os
import re


songMidAndName=[]
def QQMusicList():#这里面得到了mid / media_mid / name 这里是下载热歌榜单里的歌
    url='https://u.y.qq.com/cgi-bin/musicu.fcg?-=getUCGI6133160755874543&g_tk=5381&loginUin=1533182258&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8&notice=0&platform=yqq.json&needNewCode=0&data=%7B%22detail%22%3A%7B%22module%22%3A%22musicToplist.ToplistInfoServer%22%2C%22method%22%3A%22GetDetail%22%2C%22param%22%3A%7B%22topId%22%3A26%2C%22offset%22%3A0%2C%22num%22%3A100%2C%22period%22%3A%222019_31%22%7D%7D%2C%22comm%22%3A%7B%22ct%22%3A24%2C%22cv%22%3A0%7D%7D'
    headers={
        "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36",
        "Referer": "https://y.qq.com/n/yqq/toplist/26.html"
    }
    r=requests.get(url,headers=headers).content.decode('utf-8')
    songlist=json.loads(r)['detail']['data']['songInfoList']
    for x in songlist:
        mid=x['mid']
        name=x['name']
        getPurl(mid,name)
        media_mid=x['file']['media_mid']
        songDict={'mid':mid,'name':name,'media_mid':media_mid}
        songMidAndName.append(songDict)

def getKey():#得到了一个随机的数字,加上mid就可以去匹配我们的purl了
    num=str(random.random()).replace("0.",'')
    return num

def getPurl(mid,name):
    url='https://u.y.qq.com/cgi-bin/musicu.fcg?-=getplaysongvkey'+getKey()+'&g_tk=142976560&loginUin=' \
        '1450133075&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8&notice=0&platform=yqq.json&need' \
        'NewCode=0&data=%7B"req_0"%3A%7B"module"%3A"vkey.GetVkeyServer"%2C"method"%3A"CgiGetVkey"%2C"para' \
        'm"%3A%7B"guid"%3A"2422934060"%2C"songmid"%3A%5B"'+mid+'"%5D%2C"songtype"%3A%5B0%5D%2C"uin"%3A"14' \
        '50133075"%2C"loginflag"%3A1%2C"platform"%3A"20"%7D%7D%2C"comm"%3A%7B"uin"%3A1450133075%2C"format' \
        '"%3A"json"%2C"ct"%3A24%2C"cv"%3A0%7D%7D'

    values=requests.get(url).content.decode('utf-8')
    values=json.loads(values)['req_0']['data']['midurlinfo'][0]['purl']
    songURL="http://isure.stream.qqmusic.qq.com/"+values
    if songURL=="http://isure.stream.qqmusic.qq.com/":
        print("名为%s的歌曲只能通过客端下载"%(name))
        return
    downland(songURL,name)

def downland(songURL,name):
    basefile='./QQ音乐/'
    if os.path.exists(basefile)==False:
        os.mkdir(basefile)
    name=re.sub("[/\\\\><\?\":\*\|]",'',name)
    with open(basefile+'%s.mp3'%(name),'wb') as file:
        html = requests.get(songURL)
        for x in html.iter_content(10240):
            file.write(x)
        print("名为%s.mp3的音乐下载成功!"%(name))


def songList(id=874601481):#这里是通过一个歌单的来下载歌曲,ID可以手动输入
    url='https://c.y.qq.com/qzone/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?type=1&json=1&utf8=1&onlysong=0&new_format=1&disstid='+str(id)+' &g_tk=322671599&loginUin=1450133075&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8&notice=0&platform=yqq.json&needNewCode=0'
    headers={
        "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36",
        "Referer":"https://y.qq.com/n/yqq/playlist/874601481.html"
    }
    r=requests.get(url,headers=headers).content.decode('utf-8')
    song=json.loads(r)['cdlist'][0]
    # songid=song['songids']
    songlist=song['songlist']
    for x in songlist:
        mid=x['mid']
        name=x['name']
        getPurl(mid,name)

def inputKey(num=200,name='初音未来'):#num指的是一共要下载多少首,name指的是所需要下载的歌手的名字
    url='https://c.y.qq.com/soso/fcgi-bin/client_search_cp?ct=24&qqmusic_ver=1298&new_json=1&remoteplace=txt.yqq.song&searchid=59046815302372957&t=0&aggr=1&cr=1&catZhida=1&lossless=0&flag_qc=0&p=1&n='+str(num)+'&w='+name+'&g_tk=322671599&loginUin=1450133075&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8&notice=0&platform=yqq.json&needNewCode=0'
    headers={
        "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36",
        "Referer":"https://y.qq.com/portal/search.html"
    }
    keys=requests.get(url,headers=headers).content.decode("utf-8")
    songkey=json.loads(keys)['data']['song']['list']
    print()
    for x in songkey:
        mid=x['mid']
        name=x['name']
        getPurl(mid, name)



def main():
    while True:
        print("请输入1,2,3等三个数字来选择下载的方式")
        print('1代表下载热榜歌单,2代表下载所需要的歌单,3代表通过搜索来下载')
        print('下载的顺序都是按照QQ音乐的默认排名顺序下载的,暂时没有通过歌曲名来下载的功能')
        print('本程序下载的歌曲不会进行分类,请慎重!')
        print("请认真阅读上面的文字,三秒后开始执行程序")
        time.sleep(3)
        num = int(input("请输入数字来选取下载方式:"))
        if num == 1:
            QQMusicList()  # 直接下载热榜歌单里的歌
        elif num == 2:
            print("歌单ID举例https://y.qq.com/n/yqq/playsquare/874601481.html#stat=y_new.index.playlist.pic"
                  "看到那个.html前面的一串数字了嘛?在网页上打开你所需下载的歌单,把地址栏上面的在这儿的同样的数字复制进来就可以了")
            print("现在请输入,记得不要输错,程序报错后,请重新执行!")
            print("请认真阅读上面的文字,三秒后开始执行程序")
            time.sleep(3)
            id = int(input("请输入歌单的ID"))
            songList(id)
        elif num == 3:
            print('请输入两个值,第一个是数字,代表下载多少首,第二个是歌手的名字')
            print("请认真阅读上面的文字,三秒后开始执行程序")
            time.sleep(3)
            num = int(input('请输入第一个值(输完后回车):'))
            name = input('请输入歌手的名字:')
            inputKey(num, name)

        print('请问你要继续嘛 ?(按#键结束)')
        endInput=input('结请输入#,否则任意输入都行')
        if endInput=='#':
            break
            
main()

 

上面的是代码,现在放上去我的文件

 

上一篇:[爬虫基础]爬取瓜子二手车车辆信息


下一篇:爬取猫眼和纵横中文网的榜单信息