个人想法:分析一个文件的功能,来源于需求
re.py是用来做什么的,在文件内实现了什么小功能,这些小功能怎样被引用,而构建这个文件用了什么写法,做到知其然而知所以然,我觉得很重要。
第一点先搞清楚,我来自哪里
import enum
import sre_compile
import sre_parse
import functools
try:
import _locale
except ImportError:
_locale = None
引入了enum(枚举),sre_compile(编译),sre_parse(分析),functools(函数工具),或_locale(本地)
第二点,我要实现什么功能
# public symbols
__all__ = [
"match", "fullmatch", "search", "sub", "subn", "split",
"findall", "finditer", "compile", "purge", "template", "escape",
"error", "Pattern", "Match", "A", "I", "L", "M", "S", "X", "U",
"ASCII", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
"UNICODE",
]
第三点,我要实现的功能缺乏些那些元素,我要添加
__version__ = "2.2.1"
class RegexFlag(enum.IntFlag):
ASCII = sre_compile.SRE_FLAG_ASCII # assume ascii "locale"
IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case
LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale
UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode "locale"
MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline
DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline
VERBOSE = sre_compile.SRE_FLAG_VERBOSE # ignore whitespace and comments
A = ASCII
I = IGNORECASE
L = LOCALE
U = UNICODE
M = MULTILINE
S = DOTALL
X = VERBOSE
# sre extensions (experimental, don't rely on these)
TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE # disable backtracking
T = TEMPLATE
DEBUG = sre_compile.SRE_FLAG_DEBUG # dump pattern after compilation
globals().update(RegexFlag.__members__)
第四点 接口
# public interface
def match(pattern, string, flags=0):
"""Try to apply the pattern at the start of the string, returning
a Match object, or None if no match was found."""
return _compile(pattern, flags).match(string)
def fullmatch(pattern, string, flags=0):
"""Try to apply the pattern to all of the string, returning
a Match object, or None if no match was found."""
return _compile(pattern, flags).fullmatch(string)
def search(pattern, string, flags=0):
"""Scan through string looking for a match to the pattern, returning
a Match object, or None if no match was found."""
return _compile(pattern, flags).search(string)
def sub(pattern, repl, string, count=0, flags=0):
"""Return the string obtained by replacing the leftmost
non-overlapping occurrences of the pattern in string by the
replacement repl. repl can be either a string or a callable;
if a string, backslash escapes in it are processed. If it is
a callable, it's passed the Match object and must return
a replacement string to be used."""
return _compile(pattern, flags).sub(repl, string, count)
def subn(pattern, repl, string, count=0, flags=0):
"""Return a 2-tuple containing (new_string, number).
new_string is the string obtained by replacing the leftmost
non-overlapping occurrences of the pattern in the source
string by the replacement repl. number is the number of
substitutions that were made. repl can be either a string or a
callable; if a string, backslash escapes in it are processed.
If it is a callable, it's passed the Match object and must
return a replacement string to be used."""
return _compile(pattern, flags).subn(repl, string, count)
def split(pattern, string, maxsplit=0, flags=0):
"""Split the source string by the occurrences of the pattern,
returning a list containing the resulting substrings. If
capturing parentheses are used in pattern, then the text of all
groups in the pattern are also returned as part of the resulting
list. If maxsplit is nonzero, at most maxsplit splits occur,
and the remainder of the string is returned as the final element
of the list."""
return _compile(pattern, flags).split(string, maxsplit)
def findall(pattern, string, flags=0):
"""Return a list of all non-overlapping matches in the string.
If one or more capturing groups are present in the pattern, return
a list of groups; this will be a list of tuples if the pattern
has more than one group.
Empty matches are included in the result."""
return _compile(pattern, flags).findall(string)
def finditer(pattern, string, flags=0):
"""Return an iterator over all non-overlapping matches in the
string. For each match, the iterator returns a Match object.
Empty matches are included in the result."""
return _compile(pattern, flags).finditer(string)
def compile(pattern, flags=0):
"Compile a regular expression pattern, returning a Pattern object."
return _compile(pattern, flags)
def purge():
"Clear the regular expression caches"
_cache.clear()
_compile_repl.cache_clear()
def template(pattern, flags=0):
"Compile a template pattern, returning a Pattern object"
return _compile(pattern, flags|T)
# SPECIAL_CHARS
# closing ')', '}' and ']'
# '-' (a range in character set)
# '&', '~', (extended character set operations)
# '#' (comment) and WHITESPACE (ignored) in verbose mode
_special_chars_map = {i: '\\' + chr(i) for i in b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f'}
def escape(pattern):
"""
Escape special characters in a string.
"""
if isinstance(pattern, str):
return pattern.translate(_special_chars_map)
else:
pattern = str(pattern, 'latin1')
return pattern.translate(_special_chars_map).encode('latin1')
Pattern = type(sre_compile.compile('', 0))
Match = type(sre_compile.compile('', 0).match(''))
第五点 网络转换
# internals
_cache = {} # ordered!
_MAXCACHE = 512
def _compile(pattern, flags):
# internal: compile pattern
if isinstance(flags, RegexFlag):
flags = flags.value
try:
return _cache[type(pattern), pattern, flags]
except KeyError:
pass
if isinstance(pattern, Pattern):
if flags:
raise ValueError(
"cannot process flags argument with a compiled pattern")
return pattern
if not sre_compile.isstring(pattern):
raise TypeError("first argument must be string or compiled pattern")
p = sre_compile.compile(pattern, flags)
if not (flags & DEBUG):
if len(_cache) >= _MAXCACHE:
# Drop the oldest item
try:
del _cache[next(iter(_cache))]
except (StopIteration, RuntimeError, KeyError):
pass
_cache[type(pattern), pattern, flags] = p
return p
@functools.lru_cache(_MAXCACHE)
def _compile_repl(repl, pattern):
# internal: compile replacement pattern
return sre_parse.parse_template(repl, pattern)
def _expand(pattern, match, template):
# internal: Match.expand implementation hook
template = sre_parse.parse_template(template, pattern)
return sre_parse.expand_template(template, match)
def _subx(pattern, template):
# internal: Pattern.sub/subn implementation helper
template = _compile_repl(template, pattern)
if not template[0] and len(template[1]) == 1:
# literal replacement
return template[1][0]
def filter(match, template=template):
return sre_parse.expand_template(template, match)
return filter
# register myself for pickling
import copyreg
def _pickle(p):
return _compile, (p.pattern, p.flags)
copyreg.pickle(Pattern, _pickle, _compile)
第六点 语法扫描器
# experimental stuff (see python-dev discussions for details)
class Scanner:
def __init__(self, lexicon, flags=0):
from sre_constants import BRANCH, SUBPATTERN
if isinstance(flags, RegexFlag):
flags = flags.value
self.lexicon = lexicon
# combine phrases into a compound pattern
p = []
s = sre_parse.Pattern()
s.flags = flags
for phrase, action in lexicon:
gid = s.opengroup()
p.append(sre_parse.SubPattern(s, [
(SUBPATTERN, (gid, 0, 0, sre_parse.parse(phrase, flags))),
]))
s.closegroup(gid, p[-1])
p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
self.scanner = sre_compile.compile(p)
def scan(self, string):
result = []
append = result.append
match = self.scanner.scanner(string).match
i = 0
while True:
m = match()
if not m:
break
j = m.end()
if i == j:
break
action = self.lexicon[m.lastindex-1][1]
if callable(action):
self.match = m
action = action(self, m.group())
if action is not None:
append(action)
i = j
return result, string[i:]