re正则表达式

  1 import re
  2 import unicodedata
  3 
  4 s = "a00xoghasalexjkdfldhfjk"
  5 v = s.find("alex")
  6 print(v)
  7 
  8 k = "23412342353464565346"
  9 # 正则模糊匹配
 10 
 11 # print(re.findall("alex", s))
 12 # #  .是通配符(除了换行之外)
 13 # print(re.findall("a..x", s))
 14 # #  ^ (开头)和 $(结尾)
 15 # print(re.findall("^a..x", s))
 16 # print(re.findall("h..k$", s))
 17 # #  * (0-max) ;+(1,max) ; ?  (0,1)  贪婪匹配
 18 # b = "ccsrwrfdddddddddddkokosadkfod"
 19 # print(re.findall("d*", b))
 20 # print(re.findall("d+", b))
 21 # print(re.findall("alex*", "asdhfale"))  # 0-无穷大个
 22 # print(re.findall("alex+", "asdhfale"))  # 1-无穷大
 23 # print(re.findall("alex?", "asdhfalex"))  # 0-1个
 24 # print(re.findall("alex?", "asdhfale"))  # 0-1个
 25 #
 26 # # {}0-无穷大个 == * ; {1,} == + ; {6} 6次
 27 # # {} 可以带贪婪匹配
 28 # print(re.findall("alex{1,4}", "asdhfalexxx"))  # 1-4个
 29 # print(re.findall("alex{6}", "asdhfalexxx"))  # 必须是6个
 30 # # ? 惰性匹配
 31 # print(re.findall("alex*?", "asdhfalexxx"))
 32 # print(re.findall("alex+?", "asdhfalexxx"))
 33 #
 34 # # []字符集  中没有特殊符号(除了\-^)
 35 # print(re.findall("www[oldboy baidu]", "wwwbaidu"))
 36 # print(re.findall("x[ys]", "xyyszz"))
 37 # print(re.findall("s[zby]a", "xyyszasya"))
 38 # print(re.findall("s[zb,]a", "xyyszas,a"))
 39 #
 40 # print(re.findall("s[zb,]a", "xyyszas,a"))
 41 # print(re.findall("s[zb*]a", "xyyscas,a"))
 42 #
 43 # # -之间
 44 # print(re.findall("s[a-z]*", "sqaaaaerqwr"))
 45 # print(re.findall("s[a-z]*", "sqaaaaerqwr9"))  # [a-z] 之间
 46 #
 47 # print(re.findall("s[0-9]*", "s9qaaaaerqwr9"))  # [a-z] 之间
 48 # # ^非
 49 # print(re.findall("s[^a-z]", "sqaaaaerqwr9"))  # [a-z] 之间
 50 # # \转义
 51 # print(re.findall("\([^()]*\)", "12*(34*6+2-5*(2-1))"))
 52 # print(re.findall("\([^()]+\)", "12*(34*6+2-5*(2-1))"))
 53 #
 54 # # \d 【0-9】的数值
 55 # print(re.findall("\d+", "12*(34*6+2-5*(2-1))"))  # [0-9]
 56 # print(re.findall("\D+", "12*(34*6+2-5*(2-1))"))  # [^0-9]
 57 # print(re.findall("\s+", "hello world"), "ssss")  # \t\n\r\f\v 任何空白符
 58 # print(re.findall("\S+", "hello world"))  # [^\t\n\r\f\v]
 59 # print(re.findall("\w+", "hello world"))  # [0-9a-zA-Z_]
 60 # print(re.findall("\W", "hello world"))  # [^0-9a-zA-Z_]
 61 # print(re.findall("\b", "hello world")) # 空格,&,#
 62 #
 63 # print(re.findall(r"I\b", "hello I am world"))  #
 64 # print(re.findall("I\\b", "hello I am world"))  #
 65 #
 66 
 67 # print(re.findall("c\\\f", r"abcde\fgh"))
 68 # # | 或
 69 # print(re.findall("gh|f", "abcde|fgh"))
 70 # # () 分组
 71 # print(re.findall("(abf)*", r"abfabfabfh"))
 72 #
 73 # print(re.findall("(?P<name>\w+)", r"abfabfabfh"))
 74 #
 75 # #  search 找到第一个就返回一个对象(需要用group取出),,findall找到所有满足的结果放入列表
 76 #
 77 # print(re.search("\d+","23414afdfasf324fa"))
 78 # print(re.search("\d+","23414afdfasf324fa").group())
 79 # # 分组命名
 80 # print(re.search("(?P<name>[a-z]+)(?P<age>\d+)","23414alex324fa").group("name","age"))
 81 #
 82 # # match 成功返回对象,失败啥也不返回
 83 # print(re.match("\d+", "24dsd143f"))
 84 #
 85 # # split 分割
 86 # print(re.split(" ", "hello abc asf"))
 87 # print(re.split("[ |]", "hello abc|asf"))
 88 # print(re.split("[ab]", "hebllo abc|asf"))
 89 # # ["he","llo abc|asf"]->["he",“llo ","bc|asf"]->
 90 # # ["he","llo "," ","c|asf"]->["he","llo "," ","c|","sf"],
 91 # print(re.split("[ab]", "abc"))
 92 # # 替换
 93 # print(re.sub("\d+","A","dsfaf123aasf42112dfa"))
 94 #
 95 # print(re.subn("\d+","A","dsfaf123aasf42112dfa"))
 96 #
 97 # # 规则,书写规则
 98 # com = re.compile("\d+")
 99 # str1 = "qfaqs234rer1344"
100 # print(com.findall(str1))
101 #
102 # com = re.compile("\d")
103 # print(com.findall(str1))
104 # # 迭代器
105 # ite = com.finditer(str1)
106 # # next(ite)
107 # print(ite)
108 # # ?: 去除优先级
109 # print(re.findall("www\.(baidu|163)\.com","www.163.com"))  # 163
110 # print(re.findall("www\.(?:baidu|163)\.com","www.163.com")) # www.163.com
# print(re.search("abc|bcd", "abc"))  # search
# print(re.search("a(bc)|bcd", "abc").group())
# "\(9[^()]+\)"

print(re.findall("(abc)+", "abcabcabc")) # 给整体添加匹配
print(re.findall("abc+", "abcccabcabcfadfabc")) # ?:去除优先级,给c添加重复匹配
 

 

上一篇:re模块: 正则表达式


下一篇:根据正则规则爬取一个页面视频的方法