代码如下
亲测有效
# encoding: utf-8
import requests # 模拟发送请求
import json
import re
import os
# 定义请求头
headers = {
'Accept': '*/*',
'Accept-Language': 'en-US,en;q=0.5',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'
}
class BilibiliVideoSpider(object):
def __init__(self, url, output_root):
self.url = url
self.output_root = output_root
self.headers = {
'Accept': '*/*',
'Accept-Language': 'en-US,en;q=0.5',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'
} # 定义请求头
def _match(self, text, pattern):
match = re.search(pattern, text)
if match is None:
print('this pattern was not matched !')
return json.loads(match.group(1))
def getHtml(self):
try:
response = requests.get(url=self.url, headers=self.headers) # 发请求,拿数据 (获取响应对象)
print(f'status_code: {response.status_code}')
if response.status_code == 200:
return response
except RequestException:
print('html reques error !')
def parseHtml(self, response):
playinfo = self._match(response.text, '__playinfo__=(.*?)</script><script>') # 视频详情json
initial_state = self._match(response.text, r'__INITIAL_STATE__=(.*?);\(function\(\)') # 视频内容json
video_url = playinfo['data']['dash']['video'][0]['baseUrl'] # 视频分多种格式,直接取分辨率最高的视频 1080p
audio_url = playinfo['data']['dash']['audio'][0]['baseUrl'] # 取音频地址
video_name = initial_state['videoData']['title'] # 取视频名字
# print(f'视频名字为: {video_name}')
# print(f'视频地址为:{video_url}')
# print(f'音频地址为:{audio_url}')
return video_url, audio_url, video_name
def downloadVideo(self, video_url, audio_url, video_name):
headers.update({"Referer": self.url})
print('开始下载视频: ')
video_content = requests.get(video_url, headers=headers)
audio_content = requests.get(audio_url, headers=headers)
print('%s视频大小:' % video_name, video_content.headers['content-length'])
print('%s音频大小:' % video_name, audio_content.headers['content-length'])
# 下载视频
received_video = 0
video = f'{self.output_root}video.mp4'
with open(video, 'ab') as output:
while int(video_content.headers['content-length']) > received_video:
headers['Range'] = 'bytes=' + str(received_video) + '-'
response = requests.get(video_url, headers=headers)
output.write(response.content)
received_video += len(response.content)
# 下载音频开始
audio_content = requests.get(audio_url, headers=headers)
received_audio = 0
audio = f'{self.output_root}audio.mp4'
with open(audio, 'ab') as output:
while int(audio_content.headers['content-length']) > received_audio:
headers['Range'] = 'bytes=' + str(received_audio) + '-'
response = requests.get(audio_url, headers=headers)
output.write(response.content)
received_audio += len(response.content)
print('视频下载完成')
root_path = os.path.abspath(os.path.dirname(__file__)).split('shippingSchedule')[0]
video_dst = root_path+'/download.mp4'
self.video_audio_merge(video, audio, video_dst)
print(f'下载的视频: {video_dst}')
os.remove(video)
os.remove(audio)
def video_audio_merge(self, video_src, audio_src, video_dst):
'''使用ffmpeg单个视频音频合并'''
import subprocess
command = 'ffmpeg -i %s_video.mp4 -i %s_audio.mp4 -c copy %s.mp4 -y -loglevel quiet' % (
video_src, audio_src, video_dst)
subprocess.Popen(command, shell=True)
def run(self):
response = self.getHtml()
video_url, audio_url, video_name = self.parseHtml(response)
self.downloadVideo(video_url, audio_url, video_name)
def demo():
url = 'https://www.bilibili.com/video/BV1Q5411p7bz?from=search&seid=14643382716113842219'
output_root = './'
b = BilibiliVideoSpider(url, output_root)
b.run()
if __name__ == '__main__':
demo()