这是我自己写的代码,有可以优化的地方请不要藏着,要告诉我哦!
import json
import os
import subprocess
import time
import requests
import re
class BLBL(object):
def __init__(self, url, cookie, referer):
self.base_url = url
# cookie内容
self.cookie = cookie
# referer内容
self.referer = referer
# 请求头信息
self.accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3'
self.accept_Encoding = 'gzip, deflate, br'
self.accept_Language = 'zh-CN,zh;q=0.9,en;q=0.8'
self.user_agent = "User-Agent:Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) "
def html(self):
# 访问起始网页需添加的请求头,不加的话,得不到完整的源代码(反爬)
base_headers = {
'Accept': self.accept,
'Accept-Encoding': self.accept_Encoding,
'Accept-Language': self.accept_Language,
'Cache-Control': 'no-cache',
'Cookie': self.cookie,
'Referer': self.referer,
'Upgrade-Insecure-Requests': '1',
'User-Agent': self.user_agent
}
# 请求网页
base_response = requests.get(self.base_url, headers=base_headers)
# 获取网页html代码
html = base_response.text
# print(html.headers)
return html
def xin_xi(self, html):
result = re.findall('<script>window.__playinfo__=(.*?)</script>', html, re.S)[0]
print(result)
title = re.findall('<span class="tit">(.*?)</span>', html)[0].replace('/','').replace(':','').replace(' ','').strip()
html_data = json.loads(result)
# 音频url地址
audio_url = html_data['data']['dash']['audio'][0]['backupUrl'][0]
# 视频url地址
video_url = html_data['data']['dash']['video'][0]['backupUrl'][0]
return title, audio_url, video_url
def video(self, html):
# 获取视频名称,音频网址,视频网址
title, audio_url, video_url = self.xin_xi(html)
# 请求视频下载地址时需要添加的请求头
download_headers = {
'User-Agent': self.user_agent,
'Referer': self.referer,
'Orig`in': 'https://www.bilibili.com',
'Accept': self.accept,
'Accept-Encoding': self.accept_Encoding,
'Accept-Language': self.accept_Language
}
audio_content = requests.get(audio_url,headers=download_headers).content
video_content = requests.get(video_url,headers=download_headers).content
with open(title + '.mp3', mode='wb') as f:
f.write(audio_content)
with open(title + '.mp4', mode='wb') as f:
f.write(video_content)
print('正在保存:', title)
self.video_audio_merge_single(title)
def run(self):
html = self.html()
self.video(html)
print('爬取成功')# 爬下来的是两个文件 一个音频一个视频 需要合成到一块才是完整的(使用ffmpeg)提前下载安装好并配置好环境变量
def video_audio_merge_single(self,video_name):
print("视频合成开始:", video_name)
# ffmpeg -i video.mp4 -i audio.wav -c:v copy -c:a aac -strict experimental output.mp4
command = 'ffmpeg -i {}.mp4 -i {}.mp3 -vcodec copy -acodec copy {}.mp4'.format(
video_name, video_name,video_name+'(合)')
subprocess.Popen(command, shell=True)
time.sleep(10)
print("视频合成结束:", video_name)
os.remove(f'{video_name}.mp3')
os.remove(f'{video_name}.mp4')
if __name__ == '__main__':
url= 视频播放地址 如:'https://www.bilibili.com/video/BV1yy4y1i766'
referer = 'https://space.bilibili.com/'
cookie = 登录后的cookie
blbl = BLBL(url, cookie, referer)
blbl.run()