''' 1、中文字符的编码范围是: u'\u4e00' - u'\u9fa5'; 2、但是跟python3的isalpha()函数(检测字符串中是否只有字符组成)一起判断时,要注意,isalpha()方法判断汉字时也返回True; ''' import importlib import sys import re importlib.reload(sys) def check_contain_chinese(check_str): china_num = 0 str_num = 0 digit_num = 0 other_num = 0 for ch in check_str: if u'\u4e00' <= ch <= u'\u9fa5': china_num = china_num + 1 elif ch.isalpha(): str_num = str_num + 1 elif ch.isdigit(): digit_num = digit_num + 1 else: other_num = other_num + 1 print("汉字数量:", china_num) print("字母数量:", str_num) print("数字数量:", digit_num) print("其他非数字字母汉字的数量:", other_num) # 通过正则表达式来判断; def check_re_chinese(check_str): result = re.compile(u'[\u4e00-\u9fa5]') num = 0 other_num = 0 for ch in check_str: if result.search(ch): num = num + 1 else: other_num = other_num + 1 print("通过re得到汉字数量:", num) print("通过re得到非汉字数量:", other_num) if __name__ == '__main__': check_contain_chinese("中12345qw!@#$%^&*erty 的是7我udf89 j50国") print("********************************") check_re_chinese("中12345qw!@#$ %^&*erty的是7我ud f89j50国")
运行结果:
汉字数量: 5
字母数量: 10
数字数量: 10
其他非数字字母汉字的数量: 10
********************************
通过re得到汉字数量: 5
通过re得到非汉字数量: 30