判断字符串中是否包括中文数字字母及其他

2023-10-06 12:33:04

'''
1、中文字符的编码范围是： u'\u4e00' -  u'\u9fa5';
2、但是跟python3的isalpha()函数（检测字符串中是否只有字符组成）一起判断时，要注意，isalpha()方法判断汉字时也返回True;

'''
import importlib
import sys
import re

importlib.reload(sys)


def check_contain_chinese(check_str):
    china_num = 0
    str_num = 0
    digit_num = 0
    other_num = 0
    for ch in check_str:
        if u'\u4e00' <= ch <= u'\u9fa5':
            china_num = china_num + 1
        elif ch.isalpha():
            str_num = str_num + 1
        elif ch.isdigit():
            digit_num = digit_num + 1
        else:
            other_num = other_num + 1
    print("汉字数量：", china_num)
    print("字母数量:", str_num)
    print("数字数量：", digit_num)
    print("其他非数字字母汉字的数量：", other_num)


# 通过正则表达式来判断；
def check_re_chinese(check_str):
    result = re.compile(u'[\u4e00-\u9fa5]')
    num = 0
    other_num = 0
    for ch in check_str:
        if result.search(ch):
            num = num + 1
        else:
            other_num = other_num + 1

    print("通过re得到汉字数量：", num)
    print("通过re得到非汉字数量：", other_num)


if __name__ == '__main__':
    check_contain_chinese("中12345qw!@#$%^&*erty 的是7我udf89 j50国")
    print("********************************")
    check_re_chinese("中12345qw!@#$ %^&*erty的是7我ud f89j50国")

运行结果：
汉字数量： 5
字母数量: 10
数字数量： 10
其他非数字字母汉字的数量： 10
********************************
通过re得到汉字数量： 5
通过re得到非汉字数量： 30

码农公寓

相关文章