中文输出
#-*-coding:utf8-*-
import requests
import re
timeout = 8
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537.36'} def banner(url):
try:
html = requests.get(url,headers=headers,timeout=timeout)
html.encoding = 'utf-8' #这一行是将编码转为utf-8否则中文会显示乱码。 banner = re.findall(r'<title>(.*?)</title>',html.text)
return banner[0]
except Exception,e:
print e
return "no" if __name__ == "__main__":
print banner('http://www.baidu.com') 将unicode写入文本
一:
>>> f = open('1.txt','w')
>>> f.write(u'叉叉')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
UnicodeEncodeError: 'ascii' codec can't encode characters in position 0-1: ordin
al not in range(128)
>>> a = unicode.encode(u'叉叉','utf-8')
>>> f.write(a)
>>> f.close()
二:
>>> import codecs
>>> f = codecs.open('1.txt','w')
>>> f.write(u'叉叉')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
UnicodeEncodeError: 'ascii' codec can't encode characters in position 0-1: ordin
al not in range(128)
>>> f = codecs.open('1.txt','w','utf-8')
>>> f.write(u'叉叉')
>>> f.close()