常用匹配模式表
详解
import re # 匹配字母、数字、下划线 print(re.findall('\w','aAbc123_*()-=')) # ['a', 'A', 'b', 'c', '1', '2', '3', '_'] # 匹配非字母、数字、下划线 print(re.findall('\W','aAbc123_*()-= ')) # ['*', '(', ')', '-', '=', ' '] # 匹配任意空白字符,等价于[ \t \n \r \f ] print(re.findall('\s','aA\rbc\t\n12\f3_*()-= ')) # ['\r', '\t', '\n', '\x0c', ' '] # 匹配任意非空字符 print(re.findall('\S','aA\rbc\t\n12\f3_*()-= ')) # ['a', 'A', 'b', 'c', '1', '2', '3', '_', '*', '(', ')', '-', '='] # 匹配任意数字,等价于[0-9] print(re.findall('\d','aA\rbc\t\n12\f3_*()-= ')) # ['1', '2', '3'] # 匹配任意非数字 print(re.findall('\D','aA\rbc\t\n12\f3_*()-= ')) # ['a', 'A', '\r', 'b', 'c', '\t', '\n', '\x0c', '_', '*', '(', ')', '-', '=', ' '] # 匹配字符串开始 print(re.findall('\Aalex',' alexis alex sb')) # alex # [] #匹配字符串结束,如果是存在换行,只匹配到换行前的结束字符串 print(re.findall('sb\Z',' alexis alexsb sb')) # sb\Z # ['sb'] print(re.findall('sb\Z',"""alex alexis alex sb """)) # [] # 匹配字符串的开头 print(re.findall('^alex','alexis alex sb')) # ['alex'] # 匹配字符串的末尾 print(re.findall('sb$','alexis alex sb')) # ['sb'] print(re.findall('sb$',"""alex alexis alex sb """)) # ['sb'] print(re.findall('^alex$','alexis alex sb')) # [] print(re.findall('^alex$','al ex')) # [] print(re.findall('^alex$','alex')) # ['alex'] # 重复匹配:| . | * | ? | .* | .*? | + | {n,m} | # 1、.:匹配任意字符,除了换行符;当re.DOTALL标记被指定时,则可以匹配包括换行符的任意字符 print(re.findall('a.b','a1b a2b a b abbbb a\nb a\tb a*b')) # a.b # ['a1b','a2b','a b','abb','a\tb','a*b'] print(re.findall('a.b','a1b a2b a b abbbb a\nb a\tb a*b',re.DOTALL)) # ['a1b', 'a2b', 'a b', 'abb', 'a\nb', 'a\tb', 'a*b'] # 2、*:匹配0个或多个的表达式,左侧字符重复0次或无穷次,性格贪婪 print(re.findall('ab*','a ab abb abbbbbbbb bbbbbbbb')) # ab* #['a','ab','abb','abbbbbbbb'] # 3、+:匹配1个或多个的表达式,左侧字符重复1次或无穷次,性格贪婪 print(re.findall('ab+','a ab abb abbbbbbbb bbbbbbbb')) # ab+ # ['ab', 'abb', 'abbbbbbbb'] # 4、?:左侧字符重复0次或1次,性格贪婪 print(re.findall('ab?','a ab abb abbbbbbbb bbbbbbbb')) # ab? # ['a','ab','ab','ab'] # 5、{n,m}:匹配n到m次,左侧字符重复n次到m次,由前面的正则表达式定义的片段,贪婪方式 # {0,} => * # {1,} => + # {0,1} => ? # {n}单独一个n代表只出现n次,多一次不行少一次也不行 print(re.findall('ab{2,5}','a ab abb abbb abbbb abbbbbbbb bbbbbbbb')) # ab{2,5} # ['abb','abbb','abbbb','abbbbb] print(re.findall('\d+\.?\d*',"asdfasdf123as1111111.123dfa12adsf1asdf3")) # \d+\.?\d* \d+\.?\d+ # ['123', '1111111.123', '12', '1', '3'] # []匹配指定字符一个 print(re.findall('a\db','a1111111b a3b a4b a9b aXb a b a\nb',re.DOTALL)) # ['a3b', 'a4b', 'a9b'] print(re.findall('a[501234]b','a1111111b a3b a4b a9b aXb a b a\nb',re.DOTALL)) # ['a3b', 'a4b'] print(re.findall('a[0-5]b','a1111111b a3b a1b a0b a4b a9b aXb a b a\nb',re.DOTALL)) # ['a3b', 'a1b', 'a0b', 'a4b'] print(re.findall('a[0-9a-zA-Z]b','a1111111b axb a3b a1b a0b a4b a9b aXb a b a\nb',re.DOTALL)) # ['axb', 'a3b', 'a1b', 'a0b', 'a4b', 'a9b', 'aXb'] print(re.findall('a[^0-9a-zA-Z]b','a1111111b axb a3b a1b a0b a4b a9b aXb a b a\nb',re.DOTALL)) # ['a b', 'a\nb'] print(re.findall('a-b','a-b aXb a b a\nb',re.DOTALL)) # ['a-b'] print(re.findall('a[-0-9\n]b','a-b a0b a1b a8b aXb a b a\nb',re.DOTALL)) # ['a-b', 'a0b', 'a1b', 'a8b', 'a\nb']