判断字符串中是否包括中文数字字母及其他

'''
1、中文字符的编码范围是: u'\u4e00' -  u'\u9fa5';
2、但是跟python3的isalpha()函数(检测字符串中是否只有字符组成)一起判断时,要注意,isalpha()方法判断汉字时也返回True;

'''
import importlib
import sys
import re

importlib.reload(sys)


def check_contain_chinese(check_str):
    china_num = 0
    str_num = 0
    digit_num = 0
    other_num = 0
    for ch in check_str:
        if u'\u4e00' <= ch <= u'\u9fa5':
            china_num = china_num + 1
        elif ch.isalpha():
            str_num = str_num + 1
        elif ch.isdigit():
            digit_num = digit_num + 1
        else:
            other_num = other_num + 1
    print("汉字数量:", china_num)
    print("字母数量:", str_num)
    print("数字数量:", digit_num)
    print("其他非数字字母汉字的数量:", other_num)


# 通过正则表达式来判断;
def check_re_chinese(check_str):
    result = re.compile(u'[\u4e00-\u9fa5]')
    num = 0
    other_num = 0
    for ch in check_str:
        if result.search(ch):
            num = num + 1
        else:
            other_num = other_num + 1

    print("通过re得到汉字数量:", num)
    print("通过re得到非汉字数量:", other_num)


if __name__ == '__main__':
    check_contain_chinese("中12345qw!@#$%^&*erty 的是7我udf89 j50国")
    print("********************************")
    check_re_chinese("中12345qw!@#$ %^&*erty的是7我ud f89j50国")

运行结果:
汉字数量: 5
字母数量: 10
数字数量: 10
其他非数字字母汉字的数量: 10
********************************
通过re得到汉字数量: 5
通过re得到非汉字数量: 30

上一篇:linux-高可用/七层负载均衡/四层负载均衡


下一篇:【codevs1004】四子连棋