From 97c11f22aaf3eacad42d4f78309ffc1f6965e955 Mon Sep 17 00:00:00 2001 From: Richard Si <63936253+ichard26@users.noreply.github.com> Date: Wed, 12 Aug 2020 23:07:19 -0400 Subject: [PATCH] Make --exclude only apply to recursively found files (#1591) Ever since --force-exclude was added, --exclude started to touch files that were given to Black through the CLI too. This is not documented behaviour and neither expected as --exclude and --force-exclude now behave the same! Before this commit, get_sources() when encountering a file that was passed explicitly through the CLI would pass a single Path object list to gen_python_files(). This causes bad behaviour since that function doesn't treat the exclude and force_exclude regexes differently. Which is fine for recursively found files, but *not* for files given through the CLI. Now when get_sources() iterates through srcs and encounters a file, it checks if the force_exclude regex matches, if not, then the file will be added to the computed sources set. A new function had to be created since before you can do regex matching, the path must be normalized. The full process of normalizing the path is somewhat long as there is special error handling. I didn't want to duplicate this logic in get_sources() and gen_python_files() so that's why there is a new helper function. --- src/black/__init__.py | 99 ++++++++++++++++++++++++++++--------------- tests/test_black.py | 63 ++++++++++++++++++++------- 2 files changed, 112 insertions(+), 50 deletions(-) diff --git a/src/black/__init__.py b/src/black/__init__.py index df38fd0..803c7a1 100644 --- a/src/black/__init__.py +++ b/src/black/__init__.py @@ -583,9 +583,7 @@ def get_sources( root = find_project_root(src) sources: Set[Path] = set() path_empty(src, "No Path provided. Nothing to do 😴", quiet, verbose, ctx) - exclude_regexes = [exclude_regex] - if force_exclude_regex is not None: - exclude_regexes.append(force_exclude_regex) + gitignore = get_gitignore(root) for s in src: p = Path(s) @@ -595,19 +593,30 @@ def get_sources( p.iterdir(), root, include_regex, - exclude_regexes, + exclude_regex, + force_exclude_regex, report, - get_gitignore(root), + gitignore, ) ) elif s == "-": sources.add(p) elif p.is_file(): - sources.update( - gen_python_files( - [p], root, None, exclude_regexes, report, get_gitignore(root) - ) - ) + normalized_path = normalize_path_maybe_ignore(p, root, report) + if normalized_path is None: + continue + + normalized_path = "/" + normalized_path + # Hard-exclude any files that matches the `--force-exclude` regex. + if force_exclude_regex: + force_exclude_match = force_exclude_regex.search(normalized_path) + else: + force_exclude_match = None + if force_exclude_match and force_exclude_match.group(0): + report.path_ignored(p, "matches the --force-exclude regular expression") + continue + + sources.add(p) else: err(f"invalid path: {s}") return sources @@ -5757,16 +5766,40 @@ def get_gitignore(root: Path) -> PathSpec: return PathSpec.from_lines("gitwildmatch", lines) +def normalize_path_maybe_ignore( + path: Path, root: Path, report: "Report" +) -> Optional[str]: + """Normalize `path`. May return `None` if `path` was ignored. + + `report` is where "path ignored" output goes. + """ + try: + normalized_path = path.resolve().relative_to(root).as_posix() + except OSError as e: + report.path_ignored(path, f"cannot be read because {e}") + return None + + except ValueError: + if path.is_symlink(): + report.path_ignored(path, f"is a symbolic link that points outside {root}") + return None + + raise + + return normalized_path + + def gen_python_files( paths: Iterable[Path], root: Path, include: Optional[Pattern[str]], - exclude_regexes: Iterable[Pattern[str]], + exclude: Pattern[str], + force_exclude: Optional[Pattern[str]], report: "Report", gitignore: PathSpec, ) -> Iterator[Path]: """Generate all files under `path` whose paths are not excluded by the - `exclude` regex, but are included by the `include` regex. + `exclude_regex` or `force_exclude` regexes, but are included by the `include` regex. Symbolic links pointing outside of the `root` directory are ignored. @@ -5774,43 +5807,41 @@ def gen_python_files( """ assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}" for child in paths: - # Then ignore with `exclude` option. - try: - normalized_path = child.resolve().relative_to(root).as_posix() - except OSError as e: - report.path_ignored(child, f"cannot be read because {e}") + normalized_path = normalize_path_maybe_ignore(child, root, report) + if normalized_path is None: continue - except ValueError: - if child.is_symlink(): - report.path_ignored( - child, f"is a symbolic link that points outside {root}" - ) - continue - - raise # First ignore files matching .gitignore if gitignore.match_file(normalized_path): report.path_ignored(child, "matches the .gitignore file content") continue + # Then ignore with `--exclude` and `--force-exclude` options. normalized_path = "/" + normalized_path if child.is_dir(): normalized_path += "/" - is_excluded = False - for exclude in exclude_regexes: - exclude_match = exclude.search(normalized_path) if exclude else None - if exclude_match and exclude_match.group(0): - report.path_ignored(child, "matches the --exclude regular expression") - is_excluded = True - break - if is_excluded: + exclude_match = exclude.search(normalized_path) if exclude else None + if exclude_match and exclude_match.group(0): + report.path_ignored(child, "matches the --exclude regular expression") + continue + + force_exclude_match = ( + force_exclude.search(normalized_path) if force_exclude else None + ) + if force_exclude_match and force_exclude_match.group(0): + report.path_ignored(child, "matches the --force-exclude regular expression") continue if child.is_dir(): yield from gen_python_files( - child.iterdir(), root, include, exclude_regexes, report, gitignore + child.iterdir(), + root, + include, + exclude, + force_exclude, + report, + gitignore, ) elif child.is_file(): diff --git a/tests/test_black.py b/tests/test_black.py index 3ed5daa..f405558 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -110,6 +110,20 @@ def skip_if_exception(e: str) -> Iterator[None]: raise +class FakeContext(click.Context): + """A fake click Context for when calling functions that need it.""" + + def __init__(self) -> None: + self.default_map: Dict[str, Any] = {} + + +class FakeParameter(click.Parameter): + """A fake click Parameter for when calling functions that need it.""" + + def __init__(self) -> None: + pass + + class BlackRunner(CliRunner): """Modify CliRunner so that stderr is not merged with stdout. @@ -1551,7 +1565,32 @@ class BlackTestCase(unittest.TestCase): this_abs = THIS_DIR.resolve() sources.extend( black.gen_python_files( - path.iterdir(), this_abs, include, [exclude], report, gitignore + path.iterdir(), this_abs, include, exclude, None, report, gitignore + ) + ) + self.assertEqual(sorted(expected), sorted(sources)) + + @patch("black.find_project_root", lambda *args: THIS_DIR.resolve()) + def test_exclude_for_issue_1572(self) -> None: + # Exclude shouldn't touch files that were explicitly given to Black through the + # CLI. Exclude is supposed to only apply to the recursive discovery of files. + # https://github.com/psf/black/issues/1572 + path = THIS_DIR / "data" / "include_exclude_tests" + include = "" + exclude = r"/exclude/|a\.py" + src = str(path / "b/exclude/a.py") + report = black.Report() + expected = [Path(path / "b/exclude/a.py")] + sources = list( + black.get_sources( + ctx=FakeContext(), + src=(src,), + quiet=True, + verbose=False, + include=include, + exclude=exclude, + force_exclude=None, + report=report, ) ) self.assertEqual(sorted(expected), sorted(sources)) @@ -1572,7 +1611,7 @@ class BlackTestCase(unittest.TestCase): this_abs = THIS_DIR.resolve() sources.extend( black.gen_python_files( - path.iterdir(), this_abs, include, [exclude], report, gitignore + path.iterdir(), this_abs, include, exclude, None, report, gitignore ) ) self.assertEqual(sorted(expected), sorted(sources)) @@ -1600,7 +1639,8 @@ class BlackTestCase(unittest.TestCase): path.iterdir(), this_abs, empty, - [re.compile(black.DEFAULT_EXCLUDES)], + re.compile(black.DEFAULT_EXCLUDES), + None, report, gitignore, ) @@ -1627,7 +1667,8 @@ class BlackTestCase(unittest.TestCase): path.iterdir(), this_abs, re.compile(black.DEFAULT_INCLUDES), - [empty], + empty, + None, report, gitignore, ) @@ -1684,7 +1725,7 @@ class BlackTestCase(unittest.TestCase): try: list( black.gen_python_files( - path.iterdir(), root, include, exclude, report, gitignore + path.iterdir(), root, include, exclude, None, report, gitignore ) ) except ValueError as ve: @@ -1698,7 +1739,7 @@ class BlackTestCase(unittest.TestCase): with self.assertRaises(ValueError): list( black.gen_python_files( - path.iterdir(), root, include, exclude, report, gitignore + path.iterdir(), root, include, exclude, None, report, gitignore ) ) path.iterdir.assert_called() @@ -1777,16 +1818,6 @@ class BlackTestCase(unittest.TestCase): def test_read_pyproject_toml(self) -> None: test_toml_file = THIS_DIR / "test.toml" - - # Fake a click context and parameter so mypy stays happy - class FakeContext(click.Context): - def __init__(self) -> None: - self.default_map: Dict[str, Any] = {} - - class FakeParameter(click.Parameter): - def __init__(self) -> None: - pass - fake_ctx = FakeContext() black.read_pyproject_toml( fake_ctx, FakeParameter(), str(test_toml_file), -- 2.39.2