字符编码与转码
注: 该图转自 http://www.cnblogs.com/luotianshuai/p/5735051.html.
Python2 解码,编码
#Python2
#-*- coding:utf-8 -*- s = "你好"
s_to_unicode = s.decode("utfi-8")
s_to_gbk = s.decode("utf-8").encode("gbk") #先用指定码表解码,再用指定码表编码
print(s_to_bgk)
print(s_to_unicode) gbk_to_utf8 = s_to_gbk.decode("gbk").encode("utf-8")
print(gbk_to_utf8) s1 = u"你好" #前面加u代表码表是Unicode
print(s1) # 打印系统默认编码
import sys
print(sys.getdefaultencoding())
Python3 解码编码
# -*- encoding: utf-8 -*-
#Python3 默认码表是Unicode s = "你好" (Unicode,因为Python3默认数据的码表是Unicode. 改文件码表不影响其内容的数据存储形式)
s_gbk = s.encode("gbk")
print(s_gbk) # gbk
print(s.encode()) # utf-8 gbk_to_utf8 = s_gbk.decode("gbk").encode("utf-8")
print("utf8",gbk_to_utf8)
s = "你好"
print(s.encode("utf-8").decode("utf-8).encode("gb2312").decode("gb2312")) #encode("gb2312")后变成bytes; 之后再decode("gb2312")又转成字符串