import re # 描述一个或多个空白符的regex是\s+ text = "foo bar\t baz \tqux" regex = re.compile('\s+') print(regex.split(text)) # 等于 re.split('\s+',text) # ['foo', 'bar', 'baz', 'qux'] print(regex.findall(text)) # 匹配到的模式 #[' ', '\t ', ' \t'] text = """ Dave dave@google.com Steve steve@gmail.com Rob rob@gmail.com Ryan ryan@yahoo.com """ pattern = r'[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}' regex = re.compile(pattern,flags=re.IGNORECASE) # re.IGNORECASE 忽略大小写 print(regex.findall(text)) # ['dave@google.com', 'steve@gmail.com', 'rob@gmail.com', 'ryan@yahoo.com'] # search返回第一个,只告诉原字符串中的起始和结束位置 m = regex.search(text) print(m) # <re.Match object; span=(6, 21), match='dave@google.com'> print(text[m.start():m.end()]) # dave@google.com # sub 将匹配到的字符串替换为指定字符串 print(regex.sub("REDACTED",text)) ''' Dave REDACTED Steve REDACTED Rob REDACTED Ryan REDACTED ''' pattern = r'([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\.([A-Z]{2,4})' regex = re.compile(pattern,flags=re.IGNORECASE) # re.IGNORECASE 忽略大小写 m = regex.match('wesm@bright.com') print(m.groups()) # ('wesm', 'bright', 'com') print(regex.findall(text)) ''' [('dave', 'google', 'com'), ('steve', 'gmail', 'com'), ('rob', 'gmail', 'com'), ('ryan', 'yahoo', 'com')] ''' # sub可以通过\1、\2之类的特殊符号访问各匹配项中的分组 print(regex.sub(r'Username:\1, Domain:\2, Suffix:\3',text)) ''' Dave Username:dave, Domain:google, Suffix:com Steve Username:steve, Domain:gmail, Suffix:com Rob Username:rob, Domain:gmail, Suffix:com Ryan Username:ryan, Domain:yahoo, Suffix:com ''' # 这种正则表达式所产生的对象可以得到一个简单易用的带有分组名称的字典 pattern = r""" (?P<username>[A-Z0-9._%+-]+) @ (?P<Domain>[A-Z0-9.-]+) \. (?P<Suffix>[A-Z]{2,4}) """ regex = re.compile(pattern,flags=re.IGNORECASE|re.VERBOSE) # re.IGNORECASE 忽略大小写 m = regex.match('wesm@bright.com') print(m.groupdict()) ''' {'username': 'wesm', 'Domain': 'bright', 'Suffix': 'com'} '''