import requests from lxml import etree # Host:mp.weixin.qq.com # https://mp.weixin.qq.com/s/CMn6fcupeGsXYc31OVY9Nw####*女儿逃缅蒙难记 # Referer:https://mp.weixin.qq.com/s/ZZTkvorEJ2pSb6q_N-ilVQ#####丽品文史 深度好文:世界变坏,是从嘲笑项羽开始的! # User-Agent:Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36 def body(): url = "https://mp.weixin.qq.com/s/mxkwl-IGpjvTvcJcJnzMbA" header = {‘User-Agent‘:‘Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36‘} respones = requests.get(url,headers=header) # print(respones) respones.encoding = "utf_8" html = respones.text obj = etree.HTML(html) obj_body = obj.xpath(‘//div[@class="rich_media_content "]//text() ‘) obj_titer = obj.xpath(‘//div[@id="img-content"]/h2/text()‘) y = [] for i in obj_titer: s = i.strip() y.append(s) obj = y + obj_body print(obj) v = [] for i in obj: # v = i +"\n" v.append("\u3000"+"\u3000" + i+"\n") S = str(v) with open(r"F:\day08\人民日报\微信文章\%s.text"%s,"w",encoding="utf-8") as f: for i in v: f.write(i) body()