Python的正则表达式总结

s='hello world'
# print(s.find('llo')) #返回的是索引
# ret=s.replace('ll','xx')
# print(ret)
# print(s.split('w'))

import re
# ret=re.findall('w\w{2}',s)
# print(ret)
#元字符
# .通配符
ret=re.findall('w..l','hello world') #.只能代指任意一个字符
print(ret)#['worl']
ret=re.findall('w..l','hello w\nrld') #.不能代替换行符
print(ret)#[]
#^:只对字符串的从开始进行匹配
ret=re.findall('^h...o','hsdsdsdsdshkkhjkhhello')#[]
print(ret)
ret=re.findall('^h...o','hellosdsdsdshkkhjkhhello')#['hello']
print(ret)

#$:只对字符串的从结束进行匹配
ret=re.findall('liu..jj$','liuddjjddddlidfjj')
print(ret)
#*:重复匹配[0,+无穷]
ret=re.findall('ba*','ssdkhjdkhjdkhjhjhjhaaajhjljjhjhjhjhjhjhjfjfjaljjfvcjbnmbaaaandxjvcbjbjaaavbjbvbjdvbjvbvc')#['b', 'baaaa', 'b', 'b', 'b', 'b', 'b', 'b', 'b']
print(ret)
#+:重复匹配[1,+无穷]
ret=re.findall('ba+','ssdkhjdkhjdkhjhjhjhaaajhjljjhjhjhjhjhjhjfjfjaljjfvcjbnmbaaaandxjvcbjbjaaavbjbvbjdvbjvbvc')#['baaaa']
print(ret)
#+:重复匹配[1,+无穷]
ret=re.findall('b+a','ssdkhjdkhjdkhjhjhjhaaajhjljjhjhjhjhjhjhjfjfjaljjfvcjbnmbaaaandxjvcbjbjaaavbjbvbjdvbjvbvc')#['ba']
print(ret)
#?:重复匹配[0,1]
ret=re.findall('b?a','ssdkhjdkhjdkhjhjhjhaaajhjljjhjhjhjhjhjhjfjfjaljjfvcjbnmbaaaandxjvcbjbjaaavbjbvbjdvbjvbvc')#['a', 'a', 'a', 'a', 'ba', 'a', 'a', 'a', 'a', 'a', 'a']
print(ret)
#{1,}代表{1,正无穷}
ret=re.findall('b{2}','ssdkhjdkhjdkhjhjhjhaaajhjljjhjhjhjhjhbbjhjfjfjaljjfvcjbbbnmbaaaanbbbbbdxjvcbjbjaaavbjbvbjdvbjvbvc')#['bb', 'bb', 'bb', 'bb']
print(ret)
ret=re.findall('b{1,3}','ssdkhjdkhjdkhjhjhjhaaajhjljjhjhjhjhjhbbjhjfjfjaljjfvcjbbbnmbaaaanbbbbbdxjvcbjbjaaavbjbvbjdvbjvbvc')#['bb', 'bbb', 'b', 'bbb', 'bb', 'b', 'b', 'b', 'b', 'b', 'b', 'b']
print(ret)

#结论:*等价于{0,正无穷} +等价于{1,正无穷} ?等价于{0,1}推荐使用前者

#findall找到所有的
#字符集
#[]
ret=re.findall('a[c,d]x','acxaxasssdsadxacdxdffdffadsdeddff')#['acx', 'adx']
print(ret)
ret = re.findall('[com,cn]','www.baidu.comwwww.guge.cn')
print(ret)
ret=re.findall('[a-z]','aasdsdfdffvvf')
print(ret)
# [] 字符集:取消元字符的特殊功能,例外情况(\,^,-)
ret=re.findall('[x,*]','dddfdfxsddx*ddd*')#['x', 'x', '*', '*']
print(ret)
ret=re.findall('[x,.]','dfghsdxefffddsaw.')#['x', '.']
print(ret)
ret=re.findall('[x,,]','dfghs,dxefffd,dsaw.')#[',', 'x', ',']
print(ret)
# ^ 放在[]意味着是取反的意思
ret=re.findall('[^t]','dfghs,dxefffd,dsaw.')#['d', 'f', 'g', 'h', 's', ',', 'd', 'x', 'e', 'f', 'f', 'f', 'd', ',', 'd', 's', 'a', 'w', '.']
print(ret)
ret=re.findall('[^4,5]','sdsdsq,dd46,775')#['s', 'd', 's', 'd', 's', 'q', 'd', 'd', '6', '7', '7']代表非4和5的
print(ret)
# \ 反斜杠后面跟元字符去除特殊功能
#反斜杠后面跟一部分普通字符实现特殊功能
# \d 匹配任何十进制数;相当于[0-9]
print(re.findall('\d{11}','sddffgg15991710523dsdf159917105261dfdfffg'))#['15991710523', '15991710526']
# \D匹配任何非数字字符,相当于[^0-9]
# \s匹配任何空白字符,相当于[ \t\n\r\f\v]
# \S匹配任何非空白字符,相当于[^ \t\n\r\f\v]
# \w匹配任何字母数字字符,相当于[a-zA-Z0-9]
# \W匹配任何非字母数字字符,相当于[^a-zA-Z0-9]
# \b匹配一个特殊字符的边界,也就是单词和空格的位置
print(re.findall('\sasd','asdddf asd'))#[' asd']
print(re.findall('\wasd','asdddf aasd'))#['aasd']
print(re.findall(r'I\b','I am a LI$st'))#['I', 'I']

########################
ret=re.search('wc','dfvdwcxxf')
print(ret)
print(ret.group())
ret=re.search('a.','agj').group()
print(ret)
ret=re.search('a\.','a.gj').group()
print(ret)#a.
ret=re.findall(r'A\\s','sddddA\s')
print(ret)
ret=re.findall('\\\\','aa\\vvwww')#['\\']
print(ret)
#()括起来代表一个整体
print(re.search('(sa)+','sasaddddsdwasasa').group())#sasa
#|或的关系
print(re.search('3|(sa)','3sa').group())#3

# 1,findall():所有结果都返回到一个列表里
# 2,search():返回一个对象(object),对象可以调用group()返回结果,返回匹配到的第一个对象
# 3,match():只在字符串开始匹配,对象可以调用group()返回结果,返回匹配到的第一个对象
ret=re.match('asd','asdass')
print(ret.group())

# 4,split()
ret=re.split('[k,a]','djkaffagg')
print(ret)

obj = re.compile('\.com')#编译成一个正则表达式对象,可供多次使用
ret=obj.split('dshjhjkhd.com')
print(ret)
ret = obj.findall('dshjhjkhd.com')
print(ret)

 

上一篇:常用模块【四】正则表达式


下一篇:爬虫-股吧