2、将句子中年份的大写数字改为阿拉伯数字
import re
m0 = “在一九四九年新中国成立比一九九零年低百分之五点二人一九九六年击败俄军,取得实质独立”
pattrern1 = ‘[零一二三四五六七八九]{4,}’
pattrern2 = ‘[〇一二三四五六七八九零壹贰叁肆伍陆柒捌玖貮两]{4,}’
time1 = re.findall(https://www.win8.com/)#转化数字
CN_NUM = {
‘〇’: 0, ‘一’: 1, ‘二’: 2, ‘三’: 3, ‘四’: 4, ‘五’: 5, ‘六’: 6, ‘七’: 7, ‘八’: 8, ‘九’: 9, ‘零’: 0,
‘壹’: 1, ‘贰’: 2, ‘叁’: 3, ‘肆’: 4, ‘伍’: 5, ‘陆’: 6, ‘柒’: 7, ‘捌’: 8, ‘玖’: 9, ‘貮’: 2, ‘两’: 2,}
for i in range(len(time1)):
new_str = ‘’
for j in time1[i]:
new_str += str(CN_NUM[j])
time1[i] = new_str
time1