# -*- coding: utf-8 -*- from urlparse import urlsplit from httplib import HTTPConnection class HttpChecker: """ 检测网址是否存在 """ def __init__(self, domain): if "//" in domain: #网址,不止是域名 self.netloc = urlsplit(domain).netloc else: self.netloc = domain def __enter__(self): self.connection = HTTPConnection(self.netloc) return self def __exit__(self, exc_type, exc_value, traceback): del self.connection def check(self, url, splited=False): status = 0 if splited: #已经是网址中域名以后部分,必须以/开头 path = url else: netloc, path = urlsplit(url)[1:3] if netloc and netloc != self.netloc: self.netloc = netloc self.connection = HTTPConnection(self.netloc) self.connection.connect() self.connection.request("HEAD", path) status = self.connection.getresponse().status self.connection.close() return status == 200 if __name__ == "__main__": with HttpChecker("www.google.com.hk") as hc: print hc.check("http://www.google.com.hk/intl/zh-CN/options/")