1 # 开始写爬虫前,先确定目标网页是否允许爬取相关页面 2 from urllib.robotparser import RobotFileParser 3 4 UrlRobots = 'https://book.douban.com/robots.txt' 5 6 def GetRobotsTxt(url) : 7 rp = RobotFileParser() 8 rp.set_url(url) 9 rp.read() 10 print(rp.can_fetch('*', 'https://book.douban.com/tag/?view=type&icn=index-sorttags-all')) 11 print(rp.can_fetch('*', 'https://book.douban.com/tag/%E5%B0%8F%E8%AF%B4')) 12 print(rp.can_fetch('*', 'https://book.douban.com/')) 13 14 GetRobotsTxt(UrlRobots)