import re
line = 'asdf fjdk; afed, fjek,asdf, foo'
# 切割
print(re.split(r'[;,\s]\s*', line)) # ['asdf', 'fjdk', 'afed', 'fjek', 'asdf', 'foo']
# 分组替换
print(re.sub(r'(1[3-9][0-9])(\d{4})(\d{4})', r'\1****\3', '13444444441')) # 134****4441
# 如果使用了捕获分组,那么被匹配的文本也将出现在结果列表中
print(re.split(r'([;,\s])\s*', line)) # ['asdf', ' ', 'fjdk', ';', 'afed', ',', 'fjek', ',', 'asdf', ',', 'foo']
print(re.split(r'(?:[;,\s])\s*', line)) # 添加?:来确保分组是非捕获分组 ['asdf', 'fjdk', 'afed', 'fjek', 'asdf', 'foo']
fields = re.split(r'([;,\s])\s*', line)
v = fields[::2]
print(v)
print(len(v))
d = fields[1::2] + ['']
print(d)
print(len(v))
print(' '.join(a + b for a, b in zip(v, d)))
print(re.match('http|https', 'http://www.baidu.com')) # <re.Match object; span=(0, 4), match='http'>
print(any(url.startswith(('http', 'https')) for url in ['http://www.baidu.com'])) # True
from fnmatch import fnmatch, fnmatchcase
print(fnmatch('a.jpg', '*.jpg'))
print(fnmatch('/a/v1/1', '/a/*'))
print(fnmatch('abc.txt', '??c.txt'))
print(fnmatch('/a/v1/1', '/a/v1/[0-9]'))
print(fnmatch('a.jpg', '*.JPG')) # False # On OS X (Mac) 如果是Windows则是True
print(fnmatchcase('a.jpg', '*.JPG')) # False 大小写匹配
text = 'Today is 11/27/2012. PyCon starts 3/13/2013.'
datepat = re.compile(r'(\d+)/(\d+)/(\d+)')
for m in datepat.finditer(text):
print(m.groups())
# ('11', '27', '2012')
# ('3', '13', '2013')
print(re.findall(r'(\d+)/(\d+)/(\d+)', text)) # [('11', '27', '2012'), ('3', '13', '2013')]
from calendar import month_name, month_abbr
print(month_name[1]) # January
print(month_abbr[1]) # Jan
# 返回信息中包含替换次数
print(re.compile(r'a').subn('A', 'abcda')) # ('AbcdA', 2)
print(re.compile(r'a').subn('A', 'abcda')) # ('AbcdA', 2)
text = 'UPPER PYTHON, lower python, Mixed Python'
print(re.sub('python', 'snake', text, flags=re.IGNORECASE)) # UPPER snake, lower snake, Mixed snake
def matchcase(word):
"""回调函数"""
def replace(m):
print(m) # 如: <re.Match object; span=(6, 12), match='PYTHON'>
text = m.group() # 如: PYTHON
if text.isupper():
return word.upper()
elif text.islower():
return word.lower()
elif text[0].isupper():
return word.capitalize()
else:
return word
return replace
# 参数二 repl 如果它是可调用的,则将其传递给Match对象,并且必须返回要使用的替换字符串
print(re.sub('python', matchcase('snake'), text, flags=re.IGNORECASE)) # UPPER SNAKE, lower snake, Mixed Snake
# 最短匹配模式。默认是贪婪模式,它找到的是模式的最长可能匹配
str_num = re.compile(r'\"(.*)\"')
print(str_num.findall('Computer says "no."')) # ['no.']
str_num2 = re.compile(r'\"(.*?)\"')
print(str_num.findall('Computer says "no." Phone says "yes."')) # ['no." Phone says "yes.']
print(str_num2.findall('Computer says "no." Phone says "yes."')) # ['no.', 'yes.']
s = 'pýtĥöñ\fis\tawesome\r\n'
print(s)
remap = {
ord('\t'): ' ', # ord返回第一个字符的ascii码
ord('\f'): ' ',
ord('\r'): None # Deleted
}
a = s.translate(remap)
print(a)
import unicodedata
import sys
# 每个 Unicode 和音符作为键,对应的值全部为 None
cmb_chrs = dict.fromkeys(c for c in range(sys.maxunicode)
if unicodedata.combining(chr(c)))
# 将原始输入标准化为分解形式字符
# NFC 表示字符应该是整体组 成 (比如可能的话就使用单一编码),而 NFD 表示字符应该分解为多个组合字符表示
b = unicodedata.normalize('NFD', a)
print(b)
# 删除所有重音符
print(b.translate(cmb_chrs)) # python is awesome
print(ord('