Python 爬虫

import requests
import re

# 要爬的网站
url = 'http://'

# 模拟浏览器发送http请求
response = requests.get(url)
# 编码方式
response.encoding = 'utf-8'
# 目标小说主页的网页源码
html = response.text
# 小说名
title = re.findall(r'<meta property="og:novel:book_name" content="(.*?)" />',html,re.S)[0]
# 新建一个文件,以小说名命名
fb = open('%s.txt' % title, 'w', encoding='utf-8')
# 获取每一章的信息(章节,url)
# re.S .匹配任意字符包括不可见字符(空格回车)
chapter_info_list = re.findall(r'<a rel="nofollow" href="(.*?)">(.*?)</a>', html, re.S) # 第二个参数为string,使用str()将列表转换为string
for chapter_info in chapter_info_list:
    chapter_title = chapter_info[1]
    chapter_url = chapter_info[0]
    chapter_info = chapter_info_list[0]
    chapter_url, chapter_title = chapter_info # 与注释掉的两句同义
    chapter_response = requests.get('')
    chapter_response.encoding = 'utf-8'
    chapter_html = chapter_response.text
    chapter_content = re.findall(r'<div class="book_content" id="content">(.*?)<div class="con_l">', html, re.S)


