From ffaaf4838228c922b586a87f717ed402031fcc0a Mon Sep 17 00:00:00 2001 From: Antonio Ossa-Guerra Date: Tue, 8 Nov 2022 12:50:04 -0300 Subject: [PATCH] Compare each .gitignore found with an appropiate relative path (#3338) * Apply .gitignore files considering their location When a .gitignore file contains the special rule to ignore every subfolder content (`*/*`) and the file is located in a subfolder relative to where the command is executed (root), the rule is incorrectly applied and ignores every file at the same level of the .gitignore file. The reason for this is that the `gitignore` variable accumulates the rules found in each .gitignore while traversing files and directories recursively. This makes sense and, in general, works as expected. The problem is that the gitignore rules are applied using as the relative path from root to target directory as a reference. This is the cause of the bug. The implemented solution keeps track of every .gitignore file found while traversing the targets and the absolute location of each .gitignore file. Then, when matching files to the .gitignore rules, compare each set of rules with the appropiate relative path to the candidate target file. To make this possible, we changed the single `gitignore` object with a dictionary of similar objects, where the corresponding key is the absolute path to the folder that contains that .gitignore file. This required changing the signature of the `get_sources` function. Also, we introduce a `is_ignored` function that compares a file with every set of rules. Finally, some tests required an update to pass the gitignore object in the new format. Signed-off-by: Antonio Ossa Guerra * Test .gitignore with `*/*` is applied correctly The test contains three cases: 1) when the .gitignore with the special rule to ignore every subfolder and its contents (*/*) is in the root, 2) when the file is inside a subfolder relative to root (nested), and 3) when the target folder contains the .gitignore and root is a parent folder of the target. In all of these cases, we compare the files that are visible by Black with a known list of paths containing the expected values. Before the fix introduced in the previous commit, these tests failed when the .gitignore file was nested (second case). Now, the test is passed for all cases. Signed-off-by: Antonio Ossa Guerra * Update CHANGES.md Add entry about fixed bug and changes introduced: ignore files by considering the location of each .gitignore file and the relative path of each target Signed-off-by: Antonio Ossa Guerra * Small refactor to improve code readability These changes are small improvements to improve code readability: rename a variable to a more descriptive name (from `exclude_is_None` to `using_default_exclude`), use a better syntax to include the type annotation for `gitignore` variable (from typing comment to Python-style typing annotation), and replace an if-else block with a single dictionary definition (in this case, we need to compare keys instead of values, meaning that the change works) Signed-off-by: Antonio Ossa Guerra * Make nested function a top-level function The function to match a given path with every discovered .gitignore file does not need to be a nested function and can be a top-level function. The arguments did not change, but the naming of local variables was improved for readability. Signed-off-by: Antonio Ossa Guerra Signed-off-by: Antonio Ossa Guerra --- CHANGES.md | 2 ++ src/black/__init__.py | 17 ++++------ src/black/files.py | 28 +++++++++++++--- .../ignore_subfolders_gitignore_tests/a.py | 0 .../subdir/.gitignore | 1 + .../subdir/b.py | 0 .../subdir/subdir/c.py | 0 tests/test_black.py | 32 +++++++++++++++++-- 8 files changed, 63 insertions(+), 17 deletions(-) create mode 100644 tests/data/ignore_subfolders_gitignore_tests/a.py create mode 100644 tests/data/ignore_subfolders_gitignore_tests/subdir/.gitignore create mode 100644 tests/data/ignore_subfolders_gitignore_tests/subdir/b.py create mode 100644 tests/data/ignore_subfolders_gitignore_tests/subdir/subdir/c.py diff --git a/CHANGES.md b/CHANGES.md index a1071a8..a9a1b27 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -22,6 +22,8 @@ +- Fix incorrectly applied .gitignore rules by considering the .gitignore location and + the relative path to the target file (#3338) - Fix incorrectly ignoring .gitignore presence when more than one source directory is specified (#3336) diff --git a/src/black/__init__.py b/src/black/__init__.py index 6c8d346..2786861 100644 --- a/src/black/__init__.py +++ b/src/black/__init__.py @@ -628,9 +628,9 @@ def get_sources( sources: Set[Path] = set() root = ctx.obj["root"] - exclude_is_None = exclude is None + using_default_exclude = exclude is None exclude = re_compile_maybe_verbose(DEFAULT_EXCLUDES) if exclude is None else exclude - gitignore = None # type: Optional[PathSpec] + gitignore: Optional[PathSpec] = None root_gitignore = get_gitignore(root) for s in src: @@ -666,14 +666,11 @@ def get_sources( sources.add(p) elif p.is_dir(): - if exclude_is_None: - p_gitignore = get_gitignore(p) - # No need to use p's gitignore if it is identical to root's gitignore - # (i.e. root and p point to the same directory). - if root_gitignore == p_gitignore: - gitignore = root_gitignore - else: - gitignore = root_gitignore + p_gitignore + if using_default_exclude: + gitignore = { + root: root_gitignore, + root / p: get_gitignore(p), + } sources.update( gen_python_files( p.iterdir(), diff --git a/src/black/files.py b/src/black/files.py index ed503f5..ea517f4 100644 --- a/src/black/files.py +++ b/src/black/files.py @@ -182,6 +182,19 @@ def normalize_path_maybe_ignore( return root_relative_path +def path_is_ignored( + path: Path, gitignore_dict: Dict[Path, PathSpec], report: Report +) -> bool: + for gitignore_path, pattern in gitignore_dict.items(): + relative_path = normalize_path_maybe_ignore(path, gitignore_path, report) + if relative_path is None: + break + if pattern.match_file(relative_path): + report.path_ignored(path, "matches a .gitignore file content") + return True + return False + + def path_is_excluded( normalized_path: str, pattern: Optional[Pattern[str]], @@ -198,7 +211,7 @@ def gen_python_files( extend_exclude: Optional[Pattern[str]], force_exclude: Optional[Pattern[str]], report: Report, - gitignore: Optional[PathSpec], + gitignore_dict: Optional[Dict[Path, PathSpec]], *, verbose: bool, quiet: bool, @@ -211,6 +224,7 @@ def gen_python_files( `report` is where output about exclusions goes. """ + assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}" for child in paths: normalized_path = normalize_path_maybe_ignore(child, root, report) @@ -218,8 +232,7 @@ def gen_python_files( continue # First ignore files matching .gitignore, if passed - if gitignore is not None and gitignore.match_file(normalized_path): - report.path_ignored(child, "matches the .gitignore file content") + if gitignore_dict and path_is_ignored(child, gitignore_dict, report): continue # Then ignore with `--exclude` `--extend-exclude` and `--force-exclude` options. @@ -244,6 +257,13 @@ def gen_python_files( if child.is_dir(): # If gitignore is None, gitignore usage is disabled, while a Falsey # gitignore is when the directory doesn't have a .gitignore file. + if gitignore_dict is not None: + new_gitignore_dict = { + **gitignore_dict, + root / child: get_gitignore(child), + } + else: + new_gitignore_dict = None yield from gen_python_files( child.iterdir(), root, @@ -252,7 +272,7 @@ def gen_python_files( extend_exclude, force_exclude, report, - gitignore + get_gitignore(child) if gitignore is not None else None, + new_gitignore_dict, verbose=verbose, quiet=quiet, ) diff --git a/tests/data/ignore_subfolders_gitignore_tests/a.py b/tests/data/ignore_subfolders_gitignore_tests/a.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/ignore_subfolders_gitignore_tests/subdir/.gitignore b/tests/data/ignore_subfolders_gitignore_tests/subdir/.gitignore new file mode 100644 index 0000000..150f68c --- /dev/null +++ b/tests/data/ignore_subfolders_gitignore_tests/subdir/.gitignore @@ -0,0 +1 @@ +*/* diff --git a/tests/data/ignore_subfolders_gitignore_tests/subdir/b.py b/tests/data/ignore_subfolders_gitignore_tests/subdir/b.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/ignore_subfolders_gitignore_tests/subdir/subdir/c.py b/tests/data/ignore_subfolders_gitignore_tests/subdir/subdir/c.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_black.py b/tests/test_black.py index 784eb0d..a43f05e 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -2042,7 +2042,7 @@ class TestFileCollection: None, None, report, - gitignore, + {path: gitignore}, verbose=False, quiet=False, ) @@ -2071,7 +2071,7 @@ class TestFileCollection: None, None, report, - root_gitignore, + {path: root_gitignore}, verbose=False, quiet=False, ) @@ -2109,6 +2109,32 @@ class TestFileCollection: gitignore = path / "a" / ".gitignore" assert f"Could not parse {gitignore}" in result.stderr_bytes.decode() + def test_gitignore_that_ignores_subfolders(self) -> None: + # If gitignore with */* is in root + root = Path(DATA_DIR / "ignore_subfolders_gitignore_tests" / "subdir") + expected = [root / "b.py"] + ctx = FakeContext() + ctx.obj["root"] = root + assert_collected_sources([root], expected, ctx=ctx) + + # If .gitignore with */* is nested + root = Path(DATA_DIR / "ignore_subfolders_gitignore_tests") + expected = [ + root / "a.py", + root / "subdir" / "b.py", + ] + ctx = FakeContext() + ctx.obj["root"] = root + assert_collected_sources([root], expected, ctx=ctx) + + # If command is executed from outer dir + root = Path(DATA_DIR / "ignore_subfolders_gitignore_tests") + target = root / "subdir" + expected = [target / "b.py"] + ctx = FakeContext() + ctx.obj["root"] = root + assert_collected_sources([target], expected, ctx=ctx) + def test_empty_include(self) -> None: path = DATA_DIR / "include_exclude_tests" src = [path] @@ -2163,7 +2189,7 @@ class TestFileCollection: None, None, report, - gitignore, + {path: gitignore}, verbose=False, quiet=False, ) -- 2.39.5