正则 re

import re

line = 'asdf fjdk; afed, fjek,asdf,   foo'

# 切割

print(re.split(r'[;,\s]\s*', line))  # ['asdf', 'fjdk', 'afed', 'fjek', 'asdf', 'foo']

# 分组替换
print(re.sub(r'(1[3-9][0-9])(\d{4})(\d{4})', r'\1****\3', '13444444441'))  # 134****4441

# 如果使用了捕获分组,那么被匹配的文本也将出现在结果列表中
print(re.split(r'([;,\s])\s*', line))  # ['asdf', ' ', 'fjdk', ';', 'afed', ',', 'fjek', ',', 'asdf', ',', 'foo']
print(re.split(r'(?:[;,\s])\s*', line))  # 添加?:来确保分组是非捕获分组 ['asdf', 'fjdk', 'afed', 'fjek', 'asdf', 'foo']

fields = re.split(r'([;,\s])\s*', line)
v = fields[::2]
print(v)
print(len(v))
d = fields[1::2] + ['']
print(d)
print(len(v))

print(' '.join(a + b for a, b in zip(v, d)))

print(re.match('http|https', 'http://www.baidu.com'))  # <re.Match object; span=(0, 4), match='http'>
print(any(url.startswith(('http', 'https')) for url in ['http://www.baidu.com']))  # True

from fnmatch import fnmatch, fnmatchcase

print(fnmatch('a.jpg', '*.jpg'))
print(fnmatch('/a/v1/1', '/a/*'))
print(fnmatch('abc.txt', '??c.txt'))
print(fnmatch('/a/v1/1', '/a/v1/[0-9]'))
print(fnmatch('a.jpg', '*.JPG'))  # False # On OS X (Mac)  如果是Windows则是True
print(fnmatchcase('a.jpg', '*.JPG'))  # False   大小写匹配

text = 'Today is 11/27/2012. PyCon starts 3/13/2013.'
datepat = re.compile(r'(\d+)/(\d+)/(\d+)')
for m in datepat.finditer(text):
    print(m.groups())

# ('11', '27', '2012')
# ('3', '13', '2013')
print(re.findall(r'(\d+)/(\d+)/(\d+)', text))  # [('11', '27', '2012'), ('3', '13', '2013')]

from calendar import month_name, month_abbr

print(month_name[1])  # January
print(month_abbr[1])  # Jan

# 返回信息中包含替换次数
print(re.compile(r'a').subn('A', 'abcda'))  # ('AbcdA', 2)
print(re.compile(r'a').subn('A', 'abcda'))  # ('AbcdA', 2)

text = 'UPPER PYTHON, lower python, Mixed Python'
print(re.sub('python', 'snake', text, flags=re.IGNORECASE))  # UPPER snake, lower snake, Mixed snake


def matchcase(word):
    """回调函数"""

    def replace(m):
        print(m)  # 如: <re.Match object; span=(6, 12), match='PYTHON'>
        text = m.group()  # 如: PYTHON
        if text.isupper():
            return word.upper()
        elif text.islower():
            return word.lower()
        elif text[0].isupper():
            return word.capitalize()
        else:
            return word

    return replace


# 参数二 repl 如果它是可调用的,则将其传递给Match对象,并且必须返回要使用的替换字符串
print(re.sub('python', matchcase('snake'), text, flags=re.IGNORECASE))  # UPPER SNAKE, lower snake, Mixed Snake

# 最短匹配模式。默认是贪婪模式,它找到的是模式的最长可能匹配
str_num = re.compile(r'\"(.*)\"')
print(str_num.findall('Computer says "no."'))  # ['no.']
str_num2 = re.compile(r'\"(.*?)\"')
print(str_num.findall('Computer says "no." Phone says "yes."'))  # ['no." Phone says "yes.']
print(str_num2.findall('Computer says "no." Phone says "yes."'))  # ['no.', 'yes.']

s = 'pýtĥöñ\fis\tawesome\r\n'
print(s)

remap = {
    ord('\t'): ' ',  # ord返回第一个字符的ascii码
    ord('\f'): ' ',
    ord('\r'): None  # Deleted
}
a = s.translate(remap)
print(a)

import unicodedata
import sys

# 每个 Unicode 和音符作为键,对应的值全部为 None
cmb_chrs = dict.fromkeys(c for c in range(sys.maxunicode)
                         if unicodedata.combining(chr(c)))

# 将原始输入标准化为分解形式字符
# NFC 表示字符应该是整体组 成 (比如可能的话就使用单一编码),而 NFD 表示字符应该分解为多个组合字符表示
b = unicodedata.normalize('NFD', a)
print(b)
# 删除所有重音符
print(b.translate(cmb_chrs))  # python is awesome
print(ord('
上一篇:Python Linux系统管理之查找文件


下一篇:每周一个 Python 标准库 | fnmatch