]> git.madduck.net Git - etc/vim.git/blobdiff - src/black/__init__.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Property-based fuzz test
[etc/vim.git] / src / black / __init__.py
index 886d9c291952ba2a7080940344eadc2218dd0803..faa88b38af7119fc408bf16e4237f12aa4319e5f 100644 (file)
@@ -65,7 +65,7 @@ if TYPE_CHECKING:
     import colorama  # noqa: F401
 
 DEFAULT_LINE_LENGTH = 88
-DEFAULT_EXCLUDES = r"/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist)/"  # noqa: B950
+DEFAULT_EXCLUDES = r"/(\.direnv|\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist)/"  # noqa: B950
 DEFAULT_INCLUDES = r"\.pyi?$"
 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
 
@@ -240,6 +240,7 @@ class Mode:
     target_versions: Set[TargetVersion] = field(default_factory=set)
     line_length: int = DEFAULT_LINE_LENGTH
     string_normalization: bool = True
+    experimental_string_processing: bool = False
     is_pyi: bool = False
 
     def get_cache_key(self) -> str:
@@ -267,7 +268,7 @@ def supports_feature(target_versions: Set[TargetVersion], feature: Feature) -> b
     return all(feature in VERSION_TO_FEATURES[version] for version in target_versions)
 
 
-def find_pyproject_toml(path_search_start: str) -> Optional[str]:
+def find_pyproject_toml(path_search_start: Iterable[str]) -> Optional[str]:
     """Find the absolute filepath to a pyproject.toml if it exists"""
     path_project_root = find_project_root(path_search_start)
     path_pyproject_toml = path_project_root / "pyproject.toml"
@@ -281,12 +282,7 @@ def parse_pyproject_toml(path_config: str) -> Dict[str, Any]:
     """
     pyproject_toml = toml.load(path_config)
     config = pyproject_toml.get("tool", {}).get("black", {})
-    return {
-        k.replace("--", "").replace("-", "_"): str(v)
-        if not isinstance(v, (list, dict))
-        else v
-        for k, v in config.items()
-    }
+    return {k.replace("--", "").replace("-", "_"): v for k, v in config.items()}
 
 
 def read_pyproject_toml(
@@ -311,6 +307,14 @@ def read_pyproject_toml(
 
     if not config:
         return None
+    else:
+        # Sanitize the values to be Click friendly. For more information please see:
+        # https://github.com/psf/black/issues/1458
+        # https://github.com/pallets/click/issues/1567
+        config = {
+            k: str(v) if not isinstance(v, (list, dict)) else v
+            for k, v in config.items()
+        }
 
     target_version = config.get("target_version")
     if target_version is not None and not isinstance(target_version, list):
@@ -373,6 +377,15 @@ def target_version_option_callback(
     is_flag=True,
     help="Don't normalize string quotes or prefixes.",
 )
+@click.option(
+    "--experimental-string-processing",
+    is_flag=True,
+    hidden=True,
+    help=(
+        "Experimental option that performs more normalization on string literals."
+        " Currently disabled because it leads to some crashes."
+    ),
+)
 @click.option(
     "--check",
     is_flag=True,
@@ -468,7 +481,7 @@ def target_version_option_callback(
     ),
     is_eager=True,
     callback=read_pyproject_toml,
-    help="Read configuration from PATH.",
+    help="Read configuration from FILE path.",
 )
 @click.pass_context
 def main(
@@ -482,6 +495,7 @@ def main(
     fast: bool,
     pyi: bool,
     skip_string_normalization: bool,
+    experimental_string_processing: bool,
     quiet: bool,
     verbose: bool,
     include: str,
@@ -502,6 +516,7 @@ def main(
         line_length=line_length,
         is_pyi=pyi,
         string_normalization=not skip_string_normalization,
+        experimental_string_processing=experimental_string_processing,
     )
     if config and verbose:
         out(f"Using configuration from {config}.", bold=False, fg="blue")
@@ -580,9 +595,7 @@ def get_sources(
     root = find_project_root(src)
     sources: Set[Path] = set()
     path_empty(src, "No Path provided. Nothing to do 😴", quiet, verbose, ctx)
-    exclude_regexes = [exclude_regex]
-    if force_exclude_regex is not None:
-        exclude_regexes.append(force_exclude_regex)
+    gitignore = get_gitignore(root)
 
     for s in src:
         p = Path(s)
@@ -592,19 +605,30 @@ def get_sources(
                     p.iterdir(),
                     root,
                     include_regex,
-                    exclude_regexes,
+                    exclude_regex,
+                    force_exclude_regex,
                     report,
-                    get_gitignore(root),
+                    gitignore,
                 )
             )
         elif s == "-":
             sources.add(p)
         elif p.is_file():
-            sources.update(
-                gen_python_files(
-                    [p], root, None, exclude_regexes, report, get_gitignore(root)
-                )
-            )
+            normalized_path = normalize_path_maybe_ignore(p, root, report)
+            if normalized_path is None:
+                continue
+
+            normalized_path = "/" + normalized_path
+            # Hard-exclude any files that matches the `--force-exclude` regex.
+            if force_exclude_regex:
+                force_exclude_match = force_exclude_regex.search(normalized_path)
+            else:
+                force_exclude_match = None
+            if force_exclude_match and force_exclude_match.group(0):
+                report.path_ignored(p, "matches the --force-exclude regular expression")
+                continue
+
+            sources.add(p)
         else:
             err(f"invalid path: {s}")
     return sources
@@ -652,6 +676,8 @@ def reformat_one(
                 write_cache(cache, [src], mode)
         report.done(src, changed)
     except Exception as exc:
+        if report.verbose:
+            traceback.print_exc()
         report.failed(src, str(exc))
 
 
@@ -926,6 +952,7 @@ def format_str(src_contents: str, *, mode: Mode) -> FileContent:
         ...
 
     A more complex example:
+
     >>> print(
     ...   black.format_str(
     ...     "def f(arg:str='')->None: hey",
@@ -970,10 +997,7 @@ def format_str(src_contents: str, *, mode: Mode) -> FileContent:
         before, after = elt.maybe_empty_lines(current_line)
         dst_contents.append(str(empty_line) * before)
         for line in transform_line(
-            current_line,
-            line_length=mode.line_length,
-            normalize_strings=mode.string_normalization,
-            features=split_line_features,
+            current_line, mode=mode, features=split_line_features
         ):
             dst_contents.append(str(line))
     return "".join(dst_contents)
@@ -1418,7 +1442,8 @@ class Line:
             )
         if self.inside_brackets or not preformatted:
             self.bracket_tracker.mark(leaf)
-            self.maybe_remove_trailing_comma(leaf)
+            if self.maybe_should_explode(leaf):
+                self.should_explode = True
         if not self.append_comment(leaf):
             self.leaves.append(leaf)
 
@@ -1470,69 +1495,6 @@ class Line:
             Leaf(token.DOT, ".") for _ in range(3)
         ]
 
-    @property
-    def is_collection_with_optional_trailing_comma(self) -> bool:
-        """Is this line a collection literal with a trailing comma that's optional?
-
-        Note that the trailing comma in a 1-tuple is not optional.
-        """
-        if not self.leaves or len(self.leaves) < 4:
-            return False
-
-        # Look for and address a trailing colon.
-        if self.leaves[-1].type == token.COLON:
-            closer = self.leaves[-2]
-            close_index = -2
-        else:
-            closer = self.leaves[-1]
-            close_index = -1
-        if closer.type not in CLOSING_BRACKETS or self.inside_brackets:
-            return False
-
-        if closer.type == token.RPAR:
-            # Tuples require an extra check, because if there's only
-            # one element in the tuple removing the comma unmakes the
-            # tuple.
-            #
-            # We also check for parens before looking for the trailing
-            # comma because in some cases (eg assigning a dict
-            # literal) the literal gets wrapped in temporary parens
-            # during parsing. This case is covered by the
-            # collections.py test data.
-            opener = closer.opening_bracket
-            for _open_index, leaf in enumerate(self.leaves):
-                if leaf is opener:
-                    break
-
-            else:
-                # Couldn't find the matching opening paren, play it safe.
-                return False
-
-            commas = 0
-            comma_depth = self.leaves[close_index - 1].bracket_depth
-            for leaf in self.leaves[_open_index + 1 : close_index]:
-                if leaf.bracket_depth == comma_depth and leaf.type == token.COMMA:
-                    commas += 1
-            if commas > 1:
-                # We haven't looked yet for the trailing comma because
-                # we might also have caught noop parens.
-                return self.leaves[close_index - 1].type == token.COMMA
-
-            elif commas == 1:
-                return False  # it's either a one-tuple or didn't have a trailing comma
-
-            if self.leaves[close_index - 1].type in CLOSING_BRACKETS:
-                close_index -= 1
-                closer = self.leaves[close_index]
-                if closer.type == token.RPAR:
-                    # TODO: this is a gut feeling. Will we ever see this?
-                    return False
-
-        if self.leaves[close_index - 1].type != token.COMMA:
-            return False
-
-        return True
-
     @property
     def is_def(self) -> bool:
         """Is this a function definition? (Also returns True for async defs.)"""
@@ -1657,42 +1619,29 @@ class Line:
     def contains_multiline_strings(self) -> bool:
         return any(is_multiline_string(leaf) for leaf in self.leaves)
 
-    def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
-        """Remove trailing comma if there is one and it's safe."""
-        if not (self.leaves and self.leaves[-1].type == token.COMMA):
-            return False
-
-        # We remove trailing commas only in the case of importing a
-        # single name from a module.
+    def maybe_should_explode(self, closing: Leaf) -> bool:
+        """Return True if this line should explode (always be split), that is when:
+        - there's a pre-existing trailing comma here; and
+        - it's not a one-tuple.
+        """
         if not (
-            self.leaves
-            and self.is_import
-            and len(self.leaves) > 4
+            closing.type in CLOSING_BRACKETS
+            and self.leaves
             and self.leaves[-1].type == token.COMMA
-            and closing.type in CLOSING_BRACKETS
-            and self.leaves[-4].type == token.NAME
-            and (
-                # regular `from foo import bar,`
-                self.leaves[-4].value == "import"
-                # `from foo import (bar as baz,)
-                or (
-                    len(self.leaves) > 6
-                    and self.leaves[-6].value == "import"
-                    and self.leaves[-3].value == "as"
-                )
-                # `from foo import bar as baz,`
-                or (
-                    len(self.leaves) > 5
-                    and self.leaves[-5].value == "import"
-                    and self.leaves[-3].value == "as"
-                )
-            )
-            and closing.type == token.RPAR
+            and not self.leaves[-1].was_checked  # pre-existing
         ):
             return False
 
-        self.remove_trailing_comma()
-        return True
+        if closing.type in {token.RBRACE, token.RSQB}:
+            return True
+
+        if self.is_import:
+            return True
+
+        if not is_one_tuple_between(closing.opening_bracket, closing, self.leaves):
+            return True
+
+        return False
 
     def append_comment(self, comment: Leaf) -> bool:
         """Add an inline or standalone comment to the line."""
@@ -2635,10 +2584,7 @@ def make_comment(content: str) -> str:
 
 
 def transform_line(
-    line: Line,
-    line_length: int,
-    normalize_strings: bool,
-    features: Collection[Feature] = (),
+    line: Line, mode: Mode, features: Collection[Feature] = ()
 ) -> Iterator[Line]:
     """Transform a `line`, potentially splitting it into many lines.
 
@@ -2654,7 +2600,7 @@ def transform_line(
 
     def init_st(ST: Type[StringTransformer]) -> StringTransformer:
         """Initialize StringTransformer"""
-        return ST(line_length, normalize_strings)
+        return ST(mode.line_length, mode.string_normalization)
 
     string_merge = init_st(StringMerger)
     string_paren_strip = init_st(StringParenStripper)
@@ -2665,23 +2611,27 @@ def transform_line(
     if (
         not line.contains_uncollapsable_type_comments()
         and not line.should_explode
-        and not line.is_collection_with_optional_trailing_comma
         and (
-            is_line_short_enough(line, line_length=line_length, line_str=line_str)
+            is_line_short_enough(line, line_length=mode.line_length, line_str=line_str)
             or line.contains_unsplittable_type_ignore()
         )
-        and not (line.contains_standalone_comments() and line.inside_brackets)
+        and not (line.inside_brackets and line.contains_standalone_comments())
     ):
         # Only apply basic string preprocessing, since lines shouldn't be split here.
-        transformers = [string_merge, string_paren_strip]
+        if mode.experimental_string_processing:
+            transformers = [string_merge, string_paren_strip]
+        else:
+            transformers = []
     elif line.is_def:
         transformers = [left_hand_split]
     else:
 
         def rhs(line: Line, features: Collection[Feature]) -> Iterator[Line]:
-            for omit in generate_trailers_to_omit(line, line_length):
-                lines = list(right_hand_split(line, line_length, features, omit=omit))
-                if is_line_short_enough(lines[0], line_length=line_length):
+            for omit in generate_trailers_to_omit(line, mode.line_length):
+                lines = list(
+                    right_hand_split(line, mode.line_length, features, omit=omit)
+                )
+                if is_line_short_enough(lines[0], line_length=mode.line_length):
                     yield from lines
                     return
 
@@ -2692,24 +2642,30 @@ def transform_line(
             # See #762 and #781 for the full story.
             yield from right_hand_split(line, line_length=1, features=features)
 
-        if line.inside_brackets:
-            transformers = [
-                string_merge,
-                string_paren_strip,
-                delimiter_split,
-                standalone_comment_split,
-                string_split,
-                string_paren_wrap,
-                rhs,
-            ]
+        if mode.experimental_string_processing:
+            if line.inside_brackets:
+                transformers = [
+                    string_merge,
+                    string_paren_strip,
+                    delimiter_split,
+                    standalone_comment_split,
+                    string_split,
+                    string_paren_wrap,
+                    rhs,
+                ]
+            else:
+                transformers = [
+                    string_merge,
+                    string_paren_strip,
+                    string_split,
+                    string_paren_wrap,
+                    rhs,
+                ]
         else:
-            transformers = [
-                string_merge,
-                string_paren_strip,
-                string_split,
-                string_paren_wrap,
-                rhs,
-            ]
+            if line.inside_brackets:
+                transformers = [delimiter_split, standalone_comment_split, rhs]
+            else:
+                transformers = [rhs]
 
     for transform in transformers:
         # We are accumulating lines in `result` because we might want to abort
@@ -2724,12 +2680,7 @@ def transform_line(
                     )
 
                 result.extend(
-                    transform_line(
-                        transformed_line,
-                        line_length=line_length,
-                        normalize_strings=normalize_strings,
-                        features=features,
-                    )
+                    transform_line(transformed_line, mode=mode, features=features)
                 )
         except CannotTransform:
             continue
@@ -3235,7 +3186,9 @@ class StringParenStripper(StringTransformer):
     Requirements:
         The line contains a string which is surrounded by parentheses and:
             - The target string is NOT the only argument to a function call).
-            - The RPAR is NOT followed by an attribute access (i.e. a dot).
+            - If the target string contains a PERCENT, the brackets are not
+              preceeded or followed by an operator with higher precedence than
+              PERCENT.
 
     Transformations:
         The parentheses mentioned in the 'Requirements' section are stripped.
@@ -3278,14 +3231,51 @@ class StringParenStripper(StringTransformer):
             string_parser = StringParser()
             next_idx = string_parser.parse(LL, string_idx)
 
+            # if the leaves in the parsed string include a PERCENT, we need to
+            # make sure the initial LPAR is NOT preceded by an operator with
+            # higher or equal precedence to PERCENT
+            if is_valid_index(idx - 2):
+                # mypy can't quite follow unless we name this
+                before_lpar = LL[idx - 2]
+                if token.PERCENT in {leaf.type for leaf in LL[idx - 1 : next_idx]} and (
+                    (
+                        before_lpar.type
+                        in {
+                            token.STAR,
+                            token.AT,
+                            token.SLASH,
+                            token.DOUBLESLASH,
+                            token.PERCENT,
+                            token.TILDE,
+                            token.DOUBLESTAR,
+                            token.AWAIT,
+                            token.LSQB,
+                            token.LPAR,
+                        }
+                    )
+                    or (
+                        # only unary PLUS/MINUS
+                        before_lpar.parent
+                        and before_lpar.parent.type == syms.factor
+                        and (before_lpar.type in {token.PLUS, token.MINUS})
+                    )
+                ):
+                    continue
+
             # Should be followed by a non-empty RPAR...
             if (
                 is_valid_index(next_idx)
                 and LL[next_idx].type == token.RPAR
                 and not is_empty_rpar(LL[next_idx])
             ):
-                # That RPAR should NOT be followed by a '.' symbol.
-                if is_valid_index(next_idx + 1) and LL[next_idx + 1].type == token.DOT:
+                # That RPAR should NOT be followed by anything with higher
+                # precedence than PERCENT
+                if is_valid_index(next_idx + 1) and LL[next_idx + 1].type in {
+                    token.DOUBLESTAR,
+                    token.LSQB,
+                    token.LPAR,
+                    token.DOT,
+                }:
                     continue
 
                 return Ok(string_idx)
@@ -3315,7 +3305,7 @@ class StringParenStripper(StringTransformer):
         new_line.append(string_leaf)
 
         append_leaves(
-            new_line, line, LL[string_idx + 1 : rpar_idx] + LL[rpar_idx + 1 :],
+            new_line, line, LL[string_idx + 1 : rpar_idx] + LL[rpar_idx + 1 :]
         )
 
         LL[rpar_idx].remove()
@@ -4591,8 +4581,6 @@ def append_leaves(new_line: Line, old_line: Line, leaves: List[Leaf]) -> None:
         set(@leaves) is a subset of set(@old_line.leaves).
     """
     for old_leaf in leaves:
-        assert old_leaf in old_line.leaves
-
         new_leaf = Leaf(old_leaf.type, old_leaf.value)
         replace_child(old_leaf, new_leaf)
         new_line.append(new_leaf)
@@ -4752,10 +4740,8 @@ def right_hand_split(
     tail = bracket_split_build_line(tail_leaves, line, opening_bracket)
     bracket_split_succeeded_or_raise(head, body, tail)
     if (
-        # the body shouldn't be exploded
-        not body.should_explode
         # the opening bracket is an optional paren
-        and opening_bracket.type == token.LPAR
+        opening_bracket.type == token.LPAR
         and not opening_bracket.value
         # the closing bracket is an optional paren
         and closing_bracket.type == token.RPAR
@@ -4852,7 +4838,9 @@ def bracket_split_build_line(
                         continue
 
                     if leaves[i].type != token.COMMA:
-                        leaves.insert(i + 1, Leaf(token.COMMA, ","))
+                        new_comma = Leaf(token.COMMA, ",")
+                        new_comma.was_checked = True
+                        leaves.insert(i + 1, new_comma)
                     break
 
     # Populate the line
@@ -4860,8 +4848,8 @@ def bracket_split_build_line(
         result.append(leaf, preformatted=True)
         for comment_after in original.comments_after(leaf):
             result.append(comment_after, preformatted=True)
-    if is_body:
-        result.should_explode = should_explode(result, opening_bracket)
+    if is_body and should_split_body_explode(result, opening_bracket):
+        result.should_explode = True
     return result
 
 
@@ -4946,7 +4934,9 @@ def delimiter_split(line: Line, features: Collection[Feature] = ()) -> Iterator[
             and current_line.leaves[-1].type != token.COMMA
             and current_line.leaves[-1].type != STANDALONE_COMMENT
         ):
-            current_line.append(Leaf(token.COMMA, ","))
+            new_comma = Leaf(token.COMMA, ",")
+            new_comma.was_checked = True
+            current_line.append(new_comma)
         yield current_line
 
 
@@ -5568,24 +5558,60 @@ def ensure_visible(leaf: Leaf) -> None:
         leaf.value = ")"
 
 
-def should_explode(line: Line, opening_bracket: Leaf) -> bool:
-    """Should `line` immediately be split with `delimiter_split()` after RHS?"""
+def should_split_body_explode(line: Line, opening_bracket: Leaf) -> bool:
+    """Should `line` be immediately split with `delimiter_split()` after RHS?"""
 
-    if not (
-        opening_bracket.parent
-        and opening_bracket.parent.type in {syms.atom, syms.import_from}
-        and opening_bracket.value in "[{("
-    ):
+    if not (opening_bracket.parent and opening_bracket.value in "[{("):
         return False
 
+    # We're essentially checking if the body is delimited by commas and there's more
+    # than one of them (we're excluding the trailing comma and if the delimiter priority
+    # is still commas, that means there's more).
+    exclude = set()
+    pre_existing_trailing_comma = False
     try:
         last_leaf = line.leaves[-1]
-        exclude = {id(last_leaf)} if last_leaf.type == token.COMMA else set()
+        if last_leaf.type == token.COMMA:
+            pre_existing_trailing_comma = not last_leaf.was_checked
+            exclude.add(id(last_leaf))
         max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude)
     except (IndexError, ValueError):
         return False
 
-    return max_priority == COMMA_PRIORITY
+    return max_priority == COMMA_PRIORITY and (
+        # always explode imports
+        opening_bracket.parent.type in {syms.atom, syms.import_from}
+        or pre_existing_trailing_comma
+    )
+
+
+def is_one_tuple_between(opening: Leaf, closing: Leaf, leaves: List[Leaf]) -> bool:
+    """Return True if content between `opening` and `closing` looks like a one-tuple."""
+    depth = closing.bracket_depth + 1
+    for _opening_index, leaf in enumerate(leaves):
+        if leaf is opening:
+            break
+
+    else:
+        raise LookupError("Opening paren not found in `leaves`")
+
+    commas = 0
+    _opening_index += 1
+    for leaf in leaves[_opening_index:]:
+        if leaf is closing:
+            break
+
+        bracket_depth = leaf.bracket_depth
+        if bracket_depth == depth and leaf.type == token.COMMA:
+            commas += 1
+            if leaf.parent and leaf.parent.type in {
+                syms.arglist,
+                syms.typedargslist,
+            }:
+                commas += 1
+                break
+
+    return commas < 2
 
 
 def get_features_used(node: Node) -> Set[Feature]:
@@ -5756,16 +5782,40 @@ def get_gitignore(root: Path) -> PathSpec:
     return PathSpec.from_lines("gitwildmatch", lines)
 
 
+def normalize_path_maybe_ignore(
+    path: Path, root: Path, report: "Report"
+) -> Optional[str]:
+    """Normalize `path`. May return `None` if `path` was ignored.
+
+    `report` is where "path ignored" output goes.
+    """
+    try:
+        normalized_path = path.resolve().relative_to(root).as_posix()
+    except OSError as e:
+        report.path_ignored(path, f"cannot be read because {e}")
+        return None
+
+    except ValueError:
+        if path.is_symlink():
+            report.path_ignored(path, f"is a symbolic link that points outside {root}")
+            return None
+
+        raise
+
+    return normalized_path
+
+
 def gen_python_files(
     paths: Iterable[Path],
     root: Path,
     include: Optional[Pattern[str]],
-    exclude_regexes: Iterable[Pattern[str]],
+    exclude: Pattern[str],
+    force_exclude: Optional[Pattern[str]],
     report: "Report",
     gitignore: PathSpec,
 ) -> Iterator[Path]:
     """Generate all files under `path` whose paths are not excluded by the
-    `exclude` regex, but are included by the `include` regex.
+    `exclude_regex` or `force_exclude` regexes, but are included by the `include` regex.
 
     Symbolic links pointing outside of the `root` directory are ignored.
 
@@ -5773,43 +5823,41 @@ def gen_python_files(
     """
     assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
     for child in paths:
-        # Then ignore with `exclude` option.
-        try:
-            normalized_path = child.resolve().relative_to(root).as_posix()
-        except OSError as e:
-            report.path_ignored(child, f"cannot be read because {e}")
+        normalized_path = normalize_path_maybe_ignore(child, root, report)
+        if normalized_path is None:
             continue
-        except ValueError:
-            if child.is_symlink():
-                report.path_ignored(
-                    child, f"is a symbolic link that points outside {root}"
-                )
-                continue
-
-            raise
 
         # First ignore files matching .gitignore
         if gitignore.match_file(normalized_path):
             report.path_ignored(child, "matches the .gitignore file content")
             continue
 
+        # Then ignore with `--exclude` and `--force-exclude` options.
         normalized_path = "/" + normalized_path
         if child.is_dir():
             normalized_path += "/"
 
-        is_excluded = False
-        for exclude in exclude_regexes:
-            exclude_match = exclude.search(normalized_path) if exclude else None
-            if exclude_match and exclude_match.group(0):
-                report.path_ignored(child, "matches the --exclude regular expression")
-                is_excluded = True
-                break
-        if is_excluded:
+        exclude_match = exclude.search(normalized_path) if exclude else None
+        if exclude_match and exclude_match.group(0):
+            report.path_ignored(child, "matches the --exclude regular expression")
+            continue
+
+        force_exclude_match = (
+            force_exclude.search(normalized_path) if force_exclude else None
+        )
+        if force_exclude_match and force_exclude_match.group(0):
+            report.path_ignored(child, "matches the --force-exclude regular expression")
             continue
 
         if child.is_dir():
             yield from gen_python_files(
-                child.iterdir(), root, include, exclude_regexes, report, gitignore
+                child.iterdir(),
+                root,
+                include,
+                exclude,
+                force_exclude,
+                report,
+                gitignore,
             )
 
         elif child.is_file():
@@ -5822,8 +5870,8 @@ def gen_python_files(
 def find_project_root(srcs: Iterable[str]) -> Path:
     """Return a directory containing .git, .hg, or pyproject.toml.
 
-    That directory can be one of the directories passed in `srcs` or their
-    common parent.
+    That directory will be a common parent of all files and directories
+    passed in `srcs`.
 
     If no directory in the tree contains a marker that would specify it's the
     project root, the root of the file system is returned.
@@ -5831,11 +5879,20 @@ def find_project_root(srcs: Iterable[str]) -> Path:
     if not srcs:
         return Path("/").resolve()
 
-    common_base = min(Path(src).resolve() for src in srcs)
-    if common_base.is_dir():
-        # Append a fake file so `parents` below returns `common_base_dir`, too.
-        common_base /= "fake-file"
-    for directory in common_base.parents:
+    path_srcs = [Path(Path.cwd(), src).resolve() for src in srcs]
+
+    # A list of lists of parents for each 'src'. 'src' is included as a
+    # "parent" of itself if it is a directory
+    src_parents = [
+        list(path.parents) + ([path] if path.is_dir() else []) for path in path_srcs
+    ]
+
+    common_base = max(
+        set.intersection(*(set(parents) for parents in src_parents)),
+        key=lambda path: path.parts,
+    )
+
+    for directory in (common_base, *common_base.parents):
         if (directory / ".git").exists():
             return directory
 
@@ -6020,7 +6077,7 @@ def _stringify_ast(
                 and field == "value"
                 and isinstance(value, str)
             ):
-                normalized = re.sub(r" *\n[ \t]+", "\n ", value).strip()
+                normalized = re.sub(r" *\n[ \t]*", "\n", value).strip()
             else:
                 normalized = value
             yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"