1 import requests 2 from lxml import etree 3 import os 4 if __name__ == "__main__": 5 url = "https://aspx.sc.chinaz.com/query.aspx" 6 headers = { 7 "User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3868.400 QQBrowser/10.8.4394.400" 8 } 9 if not os.path.exists('./zhanzhangsucai1'): 10 os.mkdir('./zhanzhangsucai1') 11 for page in range(11, 26): 12 page = str(page) 13 param = { 14 "keyword": "免费", 15 "issale": "", 16 "classID": "864", 17 "page": page 18 } 19 page_text = requests.get(url=url, params=param, headers=headers).text 20 tree = etree.HTML(page_text) 21 div_list = tree.xpath('//div[@class="box col3 ws_block"]') 22 print('第' + page + '页下载中') 23 for li in div_list: 24 detail_url = 'https:' + li.xpath('./a/@href')[0] 25 detail_page_text = requests.get(url=detail_url, headers=headers).text 26 detail_page_text = detail_page_text.encode('iso-8859-1').decode('utf-8') 27 detail_tree = etree.HTML(detail_page_text) 28 href_li = detail_tree.xpath('//div[@class="clearfix mt20 downlist"]/ul/li')[0] 29 ppt_url = href_li.xpath('./a/@href')[0] 30 ppt_Name = li.xpath('./a/img/@alt')[0] + '.rar' 31 file_data = requests.get(url=ppt_url, headers=headers).content 32 ppt_path = 'zhanzhangsucai1/' + ppt_Name 33 with open(ppt_path, 'wb') as fp: 34 fp.write(file_data) 35 print(ppt_Name, '下载成功!!!') 36 print('第' + page + '页下载完成')