1.提取url信息 urlparse()
from urlparse import urlparse url = "http://scrapy-chs.readthedocs.io/zh_CN/1.0/topics/items.html"
urlparse(url)
#ParseResult(scheme='http', netloc='scrapy-chs.readthedocs.io', path='/zh_CN/1.0/topics/items.html', params='', query='', fragment='')