122122"""
123123
124124import enum
125- import sre_compile
126- import sre_parse
125+ from . import _compiler , _parser
127126import functools
128- try :
129- import _locale
130- except ImportError :
131- _locale = None
127+ import _sre
132128
133129
134130# public symbols
135131__all__ = [
136132 "match" , "fullmatch" , "search" , "sub" , "subn" , "split" ,
137- "findall" , "finditer" , "compile" , "purge" , "template" , " escape" ,
133+ "findall" , "finditer" , "compile" , "purge" , "escape" ,
138134 "error" , "Pattern" , "Match" , "A" , "I" , "L" , "M" , "S" , "X" , "U" ,
139135 "ASCII" , "IGNORECASE" , "LOCALE" , "MULTILINE" , "DOTALL" , "VERBOSE" ,
140- "UNICODE" ,
136+ "UNICODE" , "NOFLAG" , "RegexFlag" ,
141137]
142138
143139__version__ = "2.2.1"
144140
145- class RegexFlag (enum .IntFlag ):
146- ASCII = A = sre_compile .SRE_FLAG_ASCII # assume ascii "locale"
147- IGNORECASE = I = sre_compile .SRE_FLAG_IGNORECASE # ignore case
148- LOCALE = L = sre_compile .SRE_FLAG_LOCALE # assume current 8-bit locale
149- UNICODE = U = sre_compile .SRE_FLAG_UNICODE # assume unicode "locale"
150- MULTILINE = M = sre_compile .SRE_FLAG_MULTILINE # make anchors look for newline
151- DOTALL = S = sre_compile .SRE_FLAG_DOTALL # make dot match newline
152- VERBOSE = X = sre_compile .SRE_FLAG_VERBOSE # ignore whitespace and comments
141+ @enum .global_enum
142+ @enum ._simple_enum (enum .IntFlag , boundary = enum .KEEP )
143+ class RegexFlag :
144+ NOFLAG = 0
145+ ASCII = A = _compiler .SRE_FLAG_ASCII # assume ascii "locale"
146+ IGNORECASE = I = _compiler .SRE_FLAG_IGNORECASE # ignore case
147+ LOCALE = L = _compiler .SRE_FLAG_LOCALE # assume current 8-bit locale
148+ UNICODE = U = _compiler .SRE_FLAG_UNICODE # assume unicode "locale"
149+ MULTILINE = M = _compiler .SRE_FLAG_MULTILINE # make anchors look for newline
150+ DOTALL = S = _compiler .SRE_FLAG_DOTALL # make dot match newline
151+ VERBOSE = X = _compiler .SRE_FLAG_VERBOSE # ignore whitespace and comments
153152 # sre extensions (experimental, don't rely on these)
154- TEMPLATE = T = sre_compile .SRE_FLAG_TEMPLATE # disable backtracking
155- DEBUG = sre_compile .SRE_FLAG_DEBUG # dump pattern after compilation
156-
157- def __repr__ (self ):
158- if self ._name_ is not None :
159- return f're.{ self ._name_ } '
160- value = self ._value_
161- members = []
162- negative = value < 0
163- if negative :
164- value = ~ value
165- for m in self .__class__ :
166- if value & m ._value_ :
167- value &= ~ m ._value_
168- members .append (f're.{ m ._name_ } ' )
169- if value :
170- members .append (hex (value ))
171- res = '|' .join (members )
172- if negative :
173- if len (members ) > 1 :
174- res = f'~({ res } )'
175- else :
176- res = f'~{ res } '
177- return res
153+ DEBUG = _compiler .SRE_FLAG_DEBUG # dump pattern after compilation
178154 __str__ = object .__str__
179-
180- globals ().update (RegexFlag .__members__ )
155+ _numeric_repr_ = hex
181156
182157# sre exception
183- error = sre_compile .error
158+ error = _compiler .error
184159
185160# --------------------------------------------------------------------
186161# public interface
@@ -200,16 +175,39 @@ def search(pattern, string, flags=0):
200175 a Match object, or None if no match was found."""
201176 return _compile (pattern , flags ).search (string )
202177
203- def sub (pattern , repl , string , count = 0 , flags = 0 ):
178+ class _ZeroSentinel (int ):
179+ pass
180+ _zero_sentinel = _ZeroSentinel ()
181+
182+ def sub (pattern , repl , string , * args , count = _zero_sentinel , flags = _zero_sentinel ):
204183 """Return the string obtained by replacing the leftmost
205184 non-overlapping occurrences of the pattern in string by the
206185 replacement repl. repl can be either a string or a callable;
207186 if a string, backslash escapes in it are processed. If it is
208187 a callable, it's passed the Match object and must return
209188 a replacement string to be used."""
189+ if args :
190+ if count is not _zero_sentinel :
191+ raise TypeError ("sub() got multiple values for argument 'count'" )
192+ count , * args = args
193+ if args :
194+ if flags is not _zero_sentinel :
195+ raise TypeError ("sub() got multiple values for argument 'flags'" )
196+ flags , * args = args
197+ if args :
198+ raise TypeError ("sub() takes from 3 to 5 positional arguments "
199+ "but %d were given" % (5 + len (args )))
200+
201+ import warnings
202+ warnings .warn (
203+ "'count' is passed as positional argument" ,
204+ DeprecationWarning , stacklevel = 2
205+ )
206+
210207 return _compile (pattern , flags ).sub (repl , string , count )
208+ sub .__text_signature__ = '(pattern, repl, string, count=0, flags=0)'
211209
212- def subn (pattern , repl , string , count = 0 , flags = 0 ):
210+ def subn (pattern , repl , string , * args , count = _zero_sentinel , flags = _zero_sentinel ):
213211 """Return a 2-tuple containing (new_string, number).
214212 new_string is the string obtained by replacing the leftmost
215213 non-overlapping occurrences of the pattern in the source
@@ -218,17 +216,55 @@ def subn(pattern, repl, string, count=0, flags=0):
218216 callable; if a string, backslash escapes in it are processed.
219217 If it is a callable, it's passed the Match object and must
220218 return a replacement string to be used."""
219+ if args :
220+ if count is not _zero_sentinel :
221+ raise TypeError ("subn() got multiple values for argument 'count'" )
222+ count , * args = args
223+ if args :
224+ if flags is not _zero_sentinel :
225+ raise TypeError ("subn() got multiple values for argument 'flags'" )
226+ flags , * args = args
227+ if args :
228+ raise TypeError ("subn() takes from 3 to 5 positional arguments "
229+ "but %d were given" % (5 + len (args )))
230+
231+ import warnings
232+ warnings .warn (
233+ "'count' is passed as positional argument" ,
234+ DeprecationWarning , stacklevel = 2
235+ )
236+
221237 return _compile (pattern , flags ).subn (repl , string , count )
238+ subn .__text_signature__ = '(pattern, repl, string, count=0, flags=0)'
222239
223- def split (pattern , string , maxsplit = 0 , flags = 0 ):
240+ def split (pattern , string , * args , maxsplit = _zero_sentinel , flags = _zero_sentinel ):
224241 """Split the source string by the occurrences of the pattern,
225242 returning a list containing the resulting substrings. If
226243 capturing parentheses are used in pattern, then the text of all
227244 groups in the pattern are also returned as part of the resulting
228245 list. If maxsplit is nonzero, at most maxsplit splits occur,
229246 and the remainder of the string is returned as the final element
230247 of the list."""
248+ if args :
249+ if maxsplit is not _zero_sentinel :
250+ raise TypeError ("split() got multiple values for argument 'maxsplit'" )
251+ maxsplit , * args = args
252+ if args :
253+ if flags is not _zero_sentinel :
254+ raise TypeError ("split() got multiple values for argument 'flags'" )
255+ flags , * args = args
256+ if args :
257+ raise TypeError ("split() takes from 2 to 4 positional arguments "
258+ "but %d were given" % (4 + len (args )))
259+
260+ import warnings
261+ warnings .warn (
262+ "'maxsplit' is passed as positional argument" ,
263+ DeprecationWarning , stacklevel = 2
264+ )
265+
231266 return _compile (pattern , flags ).split (string , maxsplit )
267+ split .__text_signature__ = '(pattern, string, maxsplit=0, flags=0)'
232268
233269def findall (pattern , string , flags = 0 ):
234270 """Return a list of all non-overlapping matches in the string.
@@ -254,11 +290,9 @@ def compile(pattern, flags=0):
254290def purge ():
255291 "Clear the regular expression caches"
256292 _cache .clear ()
257- _compile_repl .cache_clear ()
293+ _cache2 .clear ()
294+ _compile_template .cache_clear ()
258295
259- def template (pattern , flags = 0 ):
260- "Compile a template pattern, returning a Pattern object"
261- return _compile (pattern , flags | T )
262296
263297# SPECIAL_CHARS
264298# closing ')', '}' and ']'
@@ -277,60 +311,69 @@ def escape(pattern):
277311 pattern = str (pattern , 'latin1' )
278312 return pattern .translate (_special_chars_map ).encode ('latin1' )
279313
280- Pattern = type (sre_compile .compile ('' , 0 ))
281- Match = type (sre_compile .compile ('' , 0 ).match ('' ))
314+ Pattern = type (_compiler .compile ('' , 0 ))
315+ Match = type (_compiler .compile ('' , 0 ).match ('' ))
282316
283317# --------------------------------------------------------------------
284318# internals
285319
286- _cache = {} # ordered!
287-
320+ # Use the fact that dict keeps the insertion order.
321+ # _cache2 uses the simple FIFO policy which has better latency.
322+ # _cache uses the LRU policy which has better hit rate.
323+ _cache = {} # LRU
324+ _cache2 = {} # FIFO
288325_MAXCACHE = 512
326+ _MAXCACHE2 = 256
327+ assert _MAXCACHE2 < _MAXCACHE
328+
289329def _compile (pattern , flags ):
290330 # internal: compile pattern
291331 if isinstance (flags , RegexFlag ):
292332 flags = flags .value
293333 try :
294- return _cache [type (pattern ), pattern , flags ]
334+ return _cache2 [type (pattern ), pattern , flags ]
295335 except KeyError :
296336 pass
297- if isinstance (pattern , Pattern ):
298- if flags :
299- raise ValueError (
300- "cannot process flags argument with a compiled pattern" )
301- return pattern
302- if not sre_compile .isstring (pattern ):
303- raise TypeError ("first argument must be string or compiled pattern" )
304- p = sre_compile .compile (pattern , flags )
305- if not (flags & DEBUG ):
337+
338+ key = (type (pattern ), pattern , flags )
339+ # Item in _cache should be moved to the end if found.
340+ p = _cache .pop (key , None )
341+ if p is None :
342+ if isinstance (pattern , Pattern ):
343+ if flags :
344+ raise ValueError (
345+ "cannot process flags argument with a compiled pattern" )
346+ return pattern
347+ if not _compiler .isstring (pattern ):
348+ raise TypeError ("first argument must be string or compiled pattern" )
349+ p = _compiler .compile (pattern , flags )
350+ if flags & DEBUG :
351+ return p
306352 if len (_cache ) >= _MAXCACHE :
307- # Drop the oldest item
353+ # Drop the least recently used item.
354+ # next(iter(_cache)) is known to have linear amortized time,
355+ # but it is used here to avoid a dependency from using OrderedDict.
356+ # For the small _MAXCACHE value it doesn't make much of a difference.
308357 try :
309358 del _cache [next (iter (_cache ))]
310359 except (StopIteration , RuntimeError , KeyError ):
311360 pass
312- _cache [type (pattern ), pattern , flags ] = p
361+ # Append to the end.
362+ _cache [key ] = p
363+
364+ if len (_cache2 ) >= _MAXCACHE2 :
365+ # Drop the oldest item.
366+ try :
367+ del _cache2 [next (iter (_cache2 ))]
368+ except (StopIteration , RuntimeError , KeyError ):
369+ pass
370+ _cache2 [key ] = p
313371 return p
314372
315373@functools .lru_cache (_MAXCACHE )
316- def _compile_repl ( repl , pattern ):
374+ def _compile_template ( pattern , repl ):
317375 # internal: compile replacement pattern
318- return sre_parse .parse_template (repl , pattern )
319-
320- def _expand (pattern , match , template ):
321- # internal: Match.expand implementation hook
322- template = sre_parse .parse_template (template , pattern )
323- return sre_parse .expand_template (template , match )
324-
325- def _subx (pattern , template ):
326- # internal: Pattern.sub/subn implementation helper
327- template = _compile_repl (template , pattern )
328- if not template [0 ] and len (template [1 ]) == 1 :
329- # literal replacement
330- return template [1 ][0 ]
331- def filter (match , template = template ):
332- return sre_parse .expand_template (template , match )
333- return filter
376+ return _sre .template (pattern , _parser .parse_template (repl , pattern ))
334377
335378# register myself for pickling
336379
@@ -346,22 +389,22 @@ def _pickle(p):
346389
347390class Scanner :
348391 def __init__ (self , lexicon , flags = 0 ):
349- from sre_constants import BRANCH , SUBPATTERN
392+ from . _constants import BRANCH , SUBPATTERN
350393 if isinstance (flags , RegexFlag ):
351394 flags = flags .value
352395 self .lexicon = lexicon
353396 # combine phrases into a compound pattern
354397 p = []
355- s = sre_parse .State ()
398+ s = _parser .State ()
356399 s .flags = flags
357400 for phrase , action in lexicon :
358401 gid = s .opengroup ()
359- p .append (sre_parse .SubPattern (s , [
360- (SUBPATTERN , (gid , 0 , 0 , sre_parse .parse (phrase , flags ))),
402+ p .append (_parser .SubPattern (s , [
403+ (SUBPATTERN , (gid , 0 , 0 , _parser .parse (phrase , flags ))),
361404 ]))
362405 s .closegroup (gid , p [- 1 ])
363- p = sre_parse .SubPattern (s , [(BRANCH , (None , p ))])
364- self .scanner = sre_compile .compile (p )
406+ p = _parser .SubPattern (s , [(BRANCH , (None , p ))])
407+ self .scanner = _compiler .compile (p )
365408 def scan (self , string ):
366409 result = []
367410 append = result .append
0 commit comments