Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
bbfd404
GH-72904: Add optional *seps* argument to `fnmatch.translate()`
barneygale Jun 18, 2023
da9948d
Simplify `_make_child_relpath()` further
barneygale Jul 13, 2023
a07118b
Fix default value in docs
barneygale Jul 13, 2023
2728dcd
Match style of surrounding `fnmatch` code a little better.
barneygale Jul 13, 2023
fbcf4e3
Merge branch 'main' into gh-72904-fnmatch-seps
barneygale Jul 19, 2023
a0ce9c4
Docs + naming improvements
barneygale Jul 19, 2023
5b620fb
Replace *seps* with *sep*
barneygale Jul 26, 2023
51f2698
Update Doc/library/fnmatch.rst
barneygale Aug 4, 2023
9c8c3f3
Move to `glob.translate()`
barneygale Aug 11, 2023
8518ea2
Whoops
barneygale Aug 12, 2023
75129c8
Deduplicate code to handle character sets
barneygale Aug 13, 2023
2505590
Add support for `include_hidden=False`
barneygale Sep 23, 2023
1754d42
Fix doctest
barneygale Sep 23, 2023
dd2d401
Merge branch 'main' into gh-72904-fnmatch-seps
barneygale Sep 23, 2023
7b1ad63
Improve implementation; minimise fnmatch and pathlib diffs.
barneygale Sep 25, 2023
1485ff3
Fix tests
barneygale Sep 26, 2023
4c6d6f0
Tiny performance tweak
barneygale Sep 26, 2023
5aae7a2
Merge branch 'main' into gh-72904-fnmatch-seps
barneygale Sep 26, 2023
afb2d43
Fix `_make_child_relpath()`
barneygale Sep 26, 2023
d73df1b
Minor code improvements
barneygale Sep 26, 2023
c70afe3
Add another test for `include_hidden=False`
barneygale Sep 26, 2023
9cb2952
Merge branch 'main' into gh-72904-fnmatch-seps
barneygale Sep 30, 2023
f178b14
Add whatsnew entry
barneygale Sep 30, 2023
4a726aa
Collapse adjacent `**` segments.
barneygale Sep 30, 2023
78292eb
Apply suggestions from code review
barneygale Sep 30, 2023
5d4062c
Add comment explaining regex that consumes "empty" paths.
barneygale Sep 30, 2023
1ad624d
Merge branch 'main' into gh-72904-fnmatch-seps
barneygale Oct 28, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add support for include_hidden=False
  • Loading branch information
barneygale committed Sep 23, 2023
commit 2505590e77dd7ebe56546b4fcdaafb63f1177834
12 changes: 5 additions & 7 deletions Doc/library/glob.rst
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ default. For example, consider a directory containing :file:`card.gif` and
['.card.gif']


.. function:: translate(pathname, *, recursive=False, seps=None)
.. function:: translate(pathname, *, recursive=False, include_hidden=False, seps=None)

Convert the given path specification to a regular expression for use with
:func:`re.match`. The path specification can contain shell-style wildcards.
Expand All @@ -155,7 +155,7 @@ default. For example, consider a directory containing :file:`card.gif` and

>>> import glob, re
>>>
>>> regex = glob.translate('**/*.txt', recursive=True)
>>> regex = glob.translate('**/*.txt', recursive=True, include_hidden=True)
>>> regex
'(?s:(?:.*/)?[^/]*\\.txt)\\Z'
>>> reobj = re.compile(regex)
Expand All @@ -170,14 +170,12 @@ default. For example, consider a directory containing :file:`card.gif` and
of path segments. If "``**``" occurs in any position other than a full
pattern segment, :exc:`ValueError` is raised.

If *include_hidden* is true, wildcards can match path segments that start
with a dot (``.``).

A sequence of path separators may be supplied to the *seps* argument. If
not given, :data:`os.sep` and :data:`~os.altsep` (if available) are used.

.. note::

Filenames that begin with a dot (``.``) are matched by wildcards, unlike
:func:`glob`.

.. seealso::

:meth:`pathlib.PurePath.match` and :meth:`pathlib.Path.glob` methods,
Expand Down
28 changes: 21 additions & 7 deletions Lib/glob.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,19 +251,19 @@ def escape(pathname):
_dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0)


def translate(pat, *, recursive=False, seps=None):
def translate(pat, *, recursive=False, include_hidden=False, seps=None):
"""Translate a pathname with shell wildcards to a regular expression.

If `recursive` is true, the pattern segment '**' will match any number of
path segments; if '**' appears outside its own segment, ValueError will be
raised.

If `include_hidden` is true, wildcards can match path segments beginning
with a dot ('.').

If a sequence of separator characters is given to `seps`, they will be
used to split the pattern into segments and match path separators. If not
given, os.path.sep and os.path.altsep (where available) are used.

Filenames beginning with a dot ('.') are NOT special in this method; they
are matched by wildcards, unlike in glob().
"""
if not seps:
if os.path.altsep:
Expand All @@ -273,9 +273,12 @@ def translate(pat, *, recursive=False, seps=None):
escaped_seps = ''.join(re.escape(sep) for sep in seps)
any_sep = f'[{escaped_seps}]' if len(seps) > 1 else escaped_seps
not_sep = f'[^{escaped_seps}]'
not_dot = r'(?!\.)'
res = []
add = res.append
i, n = 0, len(pat)
if pat[:1] != '.' and not include_hidden:
add(not_dot)
while i < n:
c = pat[i]
i = i+1
Expand All @@ -292,14 +295,25 @@ def translate(pat, *, recursive=False, seps=None):
add(f'{not_sep}*')
elif star_count == 2 and is_segment:
if i == n:
add('.*')
if include_hidden:
add('.*')
else:
add(fr'(?:{not_dot}{not_sep}+{any_sep})*{not_dot}{not_sep}*')
else:
add(f'(?:.*{any_sep})?')
i = i+1
if include_hidden:
add(f'(?:.+{any_sep})?')
i = i+1
else:
add(fr'(?:{not_dot}{not_sep}+{any_sep})*')
i = i+1
if i < n and pat[i] != '.':
add(not_dot)
else:
raise ValueError("Invalid pattern: '**' can only be an entire path component")
elif c in seps:
add(any_sep)
if i < n and pat[i] != '.' and not include_hidden:
add(not_dot)
elif c == '?':
add(not_sep)
elif c == '[':
Expand Down
2 changes: 1 addition & 1 deletion Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def _compile_pattern(pat, sep, case_sensitive):
"""Compile given glob pattern to a re.Pattern object (observing case
sensitivity)."""
flags = re.NOFLAG if case_sensitive else re.IGNORECASE
regex = glob.translate(pat, recursive=True, seps=sep)
regex = glob.translate(pat, recursive=True, include_hidden=True, seps=sep)
return re.compile(regex, flags).match


Expand Down
46 changes: 41 additions & 5 deletions Lib/test/test_glob.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import glob
import os
import re
import shutil
import sys
import unittest
Expand Down Expand Up @@ -349,9 +350,43 @@ def test_glob_many_open_files(self):
for it in iters:
self.assertEqual(next(it), p)


def test_translate(self):
match = re.compile(glob.translate('*')).match
self.assertIsNotNone(match('foo'))
self.assertIsNotNone(match('foo.bar'))
self.assertIsNone(match('.foo'))
match = re.compile(glob.translate('.*')).match
self.assertIsNotNone(match('.foo'))
match = re.compile(glob.translate('**', recursive=True)).match
self.assertIsNotNone(match('foo'))
self.assertIsNone(match('.foo'))
self.assertIsNotNone(match(os.path.join('foo', 'bar')))
self.assertIsNone(match(os.path.join('foo', '.bar')))
self.assertIsNone(match(os.path.join('.foo', 'bar')))
self.assertIsNone(match(os.path.join('.foo', '.bar')))
match = re.compile(glob.translate('**/*', recursive=True)).match
self.assertIsNotNone(match(os.path.join('foo', 'bar')))
self.assertIsNone(match(os.path.join('foo', '.bar')))
self.assertIsNone(match(os.path.join('.foo', 'bar')))
self.assertIsNone(match(os.path.join('.foo', '.bar')))
match = re.compile(glob.translate('*/**', recursive=True)).match
self.assertIsNotNone(match(os.path.join('foo', 'bar')))
self.assertIsNone(match(os.path.join('foo', '.bar')))
self.assertIsNone(match(os.path.join('.foo', 'bar')))
self.assertIsNone(match(os.path.join('.foo', '.bar')))
match = re.compile(glob.translate('**/.bar', recursive=True)).match
self.assertIsNotNone(match(os.path.join('foo', '.bar')))
self.assertIsNone(match(os.path.join('.foo', '.bar')))
match = re.compile(glob.translate('**/*.*', recursive=True)).match
self.assertIsNone(match(os.path.join('foo', 'bar')))
self.assertIsNone(match(os.path.join('foo', '.bar')))
self.assertIsNotNone(match(os.path.join('foo', 'bar.txt')))
self.assertIsNone(match(os.path.join('foo', '.bar.txt')))

def test_translate_include_hidden(self):
def fn(pat):
return glob.translate(pat, seps='/')
return glob.translate(pat, include_hidden=True, seps='/')
self.assertEqual(fn('foo'), r'(?s:foo)\Z')
self.assertEqual(fn('foo/bar'), r'(?s:foo/bar)\Z')
self.assertEqual(fn('*'), r'(?s:[^/]+)\Z')
Expand All @@ -370,20 +405,21 @@ def fn(pat):

def test_translate_recursive(self):
def fn(pat):
return glob.translate(pat, recursive=True, seps='/')
return glob.translate(pat, recursive=True, include_hidden=True, seps='/')
self.assertEqual(fn('*'), r'(?s:[^/]+)\Z')
self.assertEqual(fn('?'), r'(?s:[^/])\Z')
self.assertEqual(fn('**'), r'(?s:.*)\Z')
self.assertRaises(ValueError, fn, '***')
self.assertRaises(ValueError, fn, 'a**')
self.assertRaises(ValueError, fn, '**b')
self.assertEqual(fn('/**/*/*.*/**'), r'(?s:/(?:.*/)?[^/]+/[^/]*\.[^/]*/.*)\Z')
self.assertEqual(fn('/**/*/*.*/**'), r'(?s:/(?:.+/)?[^/]+/[^/]*\.[^/]*/.*)\Z')

def test_translate_seps(self):
def fn(pat):
return glob.translate(pat, recursive=True, seps=['/', '\\'])
return glob.translate(pat, recursive=True, include_hidden=True, seps=['/', '\\'])
self.assertEqual(fn('foo/bar\\baz'), r'(?s:foo[/\\]bar[/\\]baz)\Z')
self.assertEqual(fn('**/**'), r'(?s:(?:.*[/\\])?.*)\Z')
self.assertEqual(fn('**/**'), r'(?s:(?:.+[/\\])?.*)\Z')


@skip_unless_symlink
class SymlinkLoopGlobTests(unittest.TestCase):
Expand Down