]> git.madduck.net Git - etc/vim.git/blobdiff - src/black/__init__.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Reformat docs/conf.py according to the new style
[etc/vim.git] / src / black / __init__.py
index df38fd01a9bda205653f757ffabc676faeebe7e8..8d0c70f06c7d9561dbc50d6fd48bbbe05d29f4c1 100644 (file)
@@ -240,6 +240,7 @@ class Mode:
     target_versions: Set[TargetVersion] = field(default_factory=set)
     line_length: int = DEFAULT_LINE_LENGTH
     string_normalization: bool = True
+    experimental_string_processing: bool = False
     is_pyi: bool = False
 
     def get_cache_key(self) -> str:
@@ -376,6 +377,15 @@ def target_version_option_callback(
     is_flag=True,
     help="Don't normalize string quotes or prefixes.",
 )
+@click.option(
+    "--experimental-string-processing",
+    is_flag=True,
+    hidden=True,
+    help=(
+        "Experimental option that performs more normalization on string literals."
+        " Currently disabled because it leads to some crashes."
+    ),
+)
 @click.option(
     "--check",
     is_flag=True,
@@ -485,6 +495,7 @@ def main(
     fast: bool,
     pyi: bool,
     skip_string_normalization: bool,
+    experimental_string_processing: bool,
     quiet: bool,
     verbose: bool,
     include: str,
@@ -505,6 +516,7 @@ def main(
         line_length=line_length,
         is_pyi=pyi,
         string_normalization=not skip_string_normalization,
+        experimental_string_processing=experimental_string_processing,
     )
     if config and verbose:
         out(f"Using configuration from {config}.", bold=False, fg="blue")
@@ -583,9 +595,7 @@ def get_sources(
     root = find_project_root(src)
     sources: Set[Path] = set()
     path_empty(src, "No Path provided. Nothing to do 😴", quiet, verbose, ctx)
-    exclude_regexes = [exclude_regex]
-    if force_exclude_regex is not None:
-        exclude_regexes.append(force_exclude_regex)
+    gitignore = get_gitignore(root)
 
     for s in src:
         p = Path(s)
@@ -595,19 +605,30 @@ def get_sources(
                     p.iterdir(),
                     root,
                     include_regex,
-                    exclude_regexes,
+                    exclude_regex,
+                    force_exclude_regex,
                     report,
-                    get_gitignore(root),
+                    gitignore,
                 )
             )
         elif s == "-":
             sources.add(p)
         elif p.is_file():
-            sources.update(
-                gen_python_files(
-                    [p], root, None, exclude_regexes, report, get_gitignore(root)
-                )
-            )
+            normalized_path = normalize_path_maybe_ignore(p, root, report)
+            if normalized_path is None:
+                continue
+
+            normalized_path = "/" + normalized_path
+            # Hard-exclude any files that matches the `--force-exclude` regex.
+            if force_exclude_regex:
+                force_exclude_match = force_exclude_regex.search(normalized_path)
+            else:
+                force_exclude_match = None
+            if force_exclude_match and force_exclude_match.group(0):
+                report.path_ignored(p, "matches the --force-exclude regular expression")
+                continue
+
+            sources.add(p)
         else:
             err(f"invalid path: {s}")
     return sources
@@ -655,6 +676,8 @@ def reformat_one(
                 write_cache(cache, [src], mode)
         report.done(src, changed)
     except Exception as exc:
+        if report.verbose:
+            traceback.print_exc()
         report.failed(src, str(exc))
 
 
@@ -929,6 +952,7 @@ def format_str(src_contents: str, *, mode: Mode) -> FileContent:
         ...
 
     A more complex example:
+
     >>> print(
     ...   black.format_str(
     ...     "def f(arg:str='')->None: hey",
@@ -973,10 +997,7 @@ def format_str(src_contents: str, *, mode: Mode) -> FileContent:
         before, after = elt.maybe_empty_lines(current_line)
         dst_contents.append(str(empty_line) * before)
         for line in transform_line(
-            current_line,
-            line_length=mode.line_length,
-            normalize_strings=mode.string_normalization,
-            features=split_line_features,
+            current_line, mode=mode, features=split_line_features
         ):
             dst_contents.append(str(line))
     return "".join(dst_contents)
@@ -1421,7 +1442,8 @@ class Line:
             )
         if self.inside_brackets or not preformatted:
             self.bracket_tracker.mark(leaf)
-            self.maybe_remove_trailing_comma(leaf)
+            if self.maybe_should_explode(leaf):
+                self.should_explode = True
         if not self.append_comment(leaf):
             self.leaves.append(leaf)
 
@@ -1473,69 +1495,6 @@ class Line:
             Leaf(token.DOT, ".") for _ in range(3)
         ]
 
-    @property
-    def is_collection_with_optional_trailing_comma(self) -> bool:
-        """Is this line a collection literal with a trailing comma that's optional?
-
-        Note that the trailing comma in a 1-tuple is not optional.
-        """
-        if not self.leaves or len(self.leaves) < 4:
-            return False
-
-        # Look for and address a trailing colon.
-        if self.leaves[-1].type == token.COLON:
-            closer = self.leaves[-2]
-            close_index = -2
-        else:
-            closer = self.leaves[-1]
-            close_index = -1
-        if closer.type not in CLOSING_BRACKETS or self.inside_brackets:
-            return False
-
-        if closer.type == token.RPAR:
-            # Tuples require an extra check, because if there's only
-            # one element in the tuple removing the comma unmakes the
-            # tuple.
-            #
-            # We also check for parens before looking for the trailing
-            # comma because in some cases (eg assigning a dict
-            # literal) the literal gets wrapped in temporary parens
-            # during parsing. This case is covered by the
-            # collections.py test data.
-            opener = closer.opening_bracket
-            for _open_index, leaf in enumerate(self.leaves):
-                if leaf is opener:
-                    break
-
-            else:
-                # Couldn't find the matching opening paren, play it safe.
-                return False
-
-            commas = 0
-            comma_depth = self.leaves[close_index - 1].bracket_depth
-            for leaf in self.leaves[_open_index + 1 : close_index]:
-                if leaf.bracket_depth == comma_depth and leaf.type == token.COMMA:
-                    commas += 1
-            if commas > 1:
-                # We haven't looked yet for the trailing comma because
-                # we might also have caught noop parens.
-                return self.leaves[close_index - 1].type == token.COMMA
-
-            elif commas == 1:
-                return False  # it's either a one-tuple or didn't have a trailing comma
-
-            if self.leaves[close_index - 1].type in CLOSING_BRACKETS:
-                close_index -= 1
-                closer = self.leaves[close_index]
-                if closer.type == token.RPAR:
-                    # TODO: this is a gut feeling. Will we ever see this?
-                    return False
-
-        if self.leaves[close_index - 1].type != token.COMMA:
-            return False
-
-        return True
-
     @property
     def is_def(self) -> bool:
         """Is this a function definition? (Also returns True for async defs.)"""
@@ -1660,42 +1619,29 @@ class Line:
     def contains_multiline_strings(self) -> bool:
         return any(is_multiline_string(leaf) for leaf in self.leaves)
 
-    def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
-        """Remove trailing comma if there is one and it's safe."""
-        if not (self.leaves and self.leaves[-1].type == token.COMMA):
-            return False
-
-        # We remove trailing commas only in the case of importing a
-        # single name from a module.
+    def maybe_should_explode(self, closing: Leaf) -> bool:
+        """Return True if this line should explode (always be split), that is when:
+        - there's a pre-existing trailing comma here; and
+        - it's not a one-tuple.
+        """
         if not (
-            self.leaves
-            and self.is_import
-            and len(self.leaves) > 4
+            closing.type in CLOSING_BRACKETS
+            and self.leaves
             and self.leaves[-1].type == token.COMMA
-            and closing.type in CLOSING_BRACKETS
-            and self.leaves[-4].type == token.NAME
-            and (
-                # regular `from foo import bar,`
-                self.leaves[-4].value == "import"
-                # `from foo import (bar as baz,)
-                or (
-                    len(self.leaves) > 6
-                    and self.leaves[-6].value == "import"
-                    and self.leaves[-3].value == "as"
-                )
-                # `from foo import bar as baz,`
-                or (
-                    len(self.leaves) > 5
-                    and self.leaves[-5].value == "import"
-                    and self.leaves[-3].value == "as"
-                )
-            )
-            and closing.type == token.RPAR
+            and not self.leaves[-1].was_checked  # pre-existing
         ):
             return False
 
-        self.remove_trailing_comma()
-        return True
+        if closing.type in {token.RBRACE, token.RSQB}:
+            return True
+
+        if self.is_import:
+            return True
+
+        if not is_one_tuple_between(closing.opening_bracket, closing, self.leaves):
+            return True
+
+        return False
 
     def append_comment(self, comment: Leaf) -> bool:
         """Add an inline or standalone comment to the line."""
@@ -2638,10 +2584,7 @@ def make_comment(content: str) -> str:
 
 
 def transform_line(
-    line: Line,
-    line_length: int,
-    normalize_strings: bool,
-    features: Collection[Feature] = (),
+    line: Line, mode: Mode, features: Collection[Feature] = ()
 ) -> Iterator[Line]:
     """Transform a `line`, potentially splitting it into many lines.
 
@@ -2657,7 +2600,7 @@ def transform_line(
 
     def init_st(ST: Type[StringTransformer]) -> StringTransformer:
         """Initialize StringTransformer"""
-        return ST(line_length, normalize_strings)
+        return ST(mode.line_length, mode.string_normalization)
 
     string_merge = init_st(StringMerger)
     string_paren_strip = init_st(StringParenStripper)
@@ -2668,23 +2611,27 @@ def transform_line(
     if (
         not line.contains_uncollapsable_type_comments()
         and not line.should_explode
-        and not line.is_collection_with_optional_trailing_comma
         and (
-            is_line_short_enough(line, line_length=line_length, line_str=line_str)
+            is_line_short_enough(line, line_length=mode.line_length, line_str=line_str)
             or line.contains_unsplittable_type_ignore()
         )
-        and not (line.contains_standalone_comments() and line.inside_brackets)
+        and not (line.inside_brackets and line.contains_standalone_comments())
     ):
         # Only apply basic string preprocessing, since lines shouldn't be split here.
-        transformers = [string_merge, string_paren_strip]
+        if mode.experimental_string_processing:
+            transformers = [string_merge, string_paren_strip]
+        else:
+            transformers = []
     elif line.is_def:
         transformers = [left_hand_split]
     else:
 
         def rhs(line: Line, features: Collection[Feature]) -> Iterator[Line]:
-            for omit in generate_trailers_to_omit(line, line_length):
-                lines = list(right_hand_split(line, line_length, features, omit=omit))
-                if is_line_short_enough(lines[0], line_length=line_length):
+            for omit in generate_trailers_to_omit(line, mode.line_length):
+                lines = list(
+                    right_hand_split(line, mode.line_length, features, omit=omit)
+                )
+                if is_line_short_enough(lines[0], line_length=mode.line_length):
                     yield from lines
                     return
 
@@ -2695,24 +2642,30 @@ def transform_line(
             # See #762 and #781 for the full story.
             yield from right_hand_split(line, line_length=1, features=features)
 
-        if line.inside_brackets:
-            transformers = [
-                string_merge,
-                string_paren_strip,
-                delimiter_split,
-                standalone_comment_split,
-                string_split,
-                string_paren_wrap,
-                rhs,
-            ]
+        if mode.experimental_string_processing:
+            if line.inside_brackets:
+                transformers = [
+                    string_merge,
+                    string_paren_strip,
+                    delimiter_split,
+                    standalone_comment_split,
+                    string_split,
+                    string_paren_wrap,
+                    rhs,
+                ]
+            else:
+                transformers = [
+                    string_merge,
+                    string_paren_strip,
+                    string_split,
+                    string_paren_wrap,
+                    rhs,
+                ]
         else:
-            transformers = [
-                string_merge,
-                string_paren_strip,
-                string_split,
-                string_paren_wrap,
-                rhs,
-            ]
+            if line.inside_brackets:
+                transformers = [delimiter_split, standalone_comment_split, rhs]
+            else:
+                transformers = [rhs]
 
     for transform in transformers:
         # We are accumulating lines in `result` because we might want to abort
@@ -2727,12 +2680,7 @@ def transform_line(
                     )
 
                 result.extend(
-                    transform_line(
-                        transformed_line,
-                        line_length=line_length,
-                        normalize_strings=normalize_strings,
-                        features=features,
-                    )
+                    transform_line(transformed_line, mode=mode, features=features)
                 )
         except CannotTransform:
             continue
@@ -3238,7 +3186,9 @@ class StringParenStripper(StringTransformer):
     Requirements:
         The line contains a string which is surrounded by parentheses and:
             - The target string is NOT the only argument to a function call).
-            - The RPAR is NOT followed by an attribute access (i.e. a dot).
+            - If the target string contains a PERCENT, the brackets are not
+              preceeded or followed by an operator with higher precedence than
+              PERCENT.
 
     Transformations:
         The parentheses mentioned in the 'Requirements' section are stripped.
@@ -3281,14 +3231,51 @@ class StringParenStripper(StringTransformer):
             string_parser = StringParser()
             next_idx = string_parser.parse(LL, string_idx)
 
+            # if the leaves in the parsed string include a PERCENT, we need to
+            # make sure the initial LPAR is NOT preceded by an operator with
+            # higher or equal precedence to PERCENT
+            if is_valid_index(idx - 2):
+                # mypy can't quite follow unless we name this
+                before_lpar = LL[idx - 2]
+                if token.PERCENT in {leaf.type for leaf in LL[idx - 1 : next_idx]} and (
+                    (
+                        before_lpar.type
+                        in {
+                            token.STAR,
+                            token.AT,
+                            token.SLASH,
+                            token.DOUBLESLASH,
+                            token.PERCENT,
+                            token.TILDE,
+                            token.DOUBLESTAR,
+                            token.AWAIT,
+                            token.LSQB,
+                            token.LPAR,
+                        }
+                    )
+                    or (
+                        # only unary PLUS/MINUS
+                        before_lpar.parent
+                        and before_lpar.parent.type == syms.factor
+                        and (before_lpar.type in {token.PLUS, token.MINUS})
+                    )
+                ):
+                    continue
+
             # Should be followed by a non-empty RPAR...
             if (
                 is_valid_index(next_idx)
                 and LL[next_idx].type == token.RPAR
                 and not is_empty_rpar(LL[next_idx])
             ):
-                # That RPAR should NOT be followed by a '.' symbol.
-                if is_valid_index(next_idx + 1) and LL[next_idx + 1].type == token.DOT:
+                # That RPAR should NOT be followed by anything with higher
+                # precedence than PERCENT
+                if is_valid_index(next_idx + 1) and LL[next_idx + 1].type in {
+                    token.DOUBLESTAR,
+                    token.LSQB,
+                    token.LPAR,
+                    token.DOT,
+                }:
                     continue
 
                 return Ok(string_idx)
@@ -3318,7 +3305,7 @@ class StringParenStripper(StringTransformer):
         new_line.append(string_leaf)
 
         append_leaves(
-            new_line, line, LL[string_idx + 1 : rpar_idx] + LL[rpar_idx + 1 :],
+            new_line, line, LL[string_idx + 1 : rpar_idx] + LL[rpar_idx + 1 :]
         )
 
         LL[rpar_idx].remove()
@@ -4753,10 +4740,8 @@ def right_hand_split(
     tail = bracket_split_build_line(tail_leaves, line, opening_bracket)
     bracket_split_succeeded_or_raise(head, body, tail)
     if (
-        # the body shouldn't be exploded
-        not body.should_explode
         # the opening bracket is an optional paren
-        and opening_bracket.type == token.LPAR
+        opening_bracket.type == token.LPAR
         and not opening_bracket.value
         # the closing bracket is an optional paren
         and closing_bracket.type == token.RPAR
@@ -4853,7 +4838,9 @@ def bracket_split_build_line(
                         continue
 
                     if leaves[i].type != token.COMMA:
-                        leaves.insert(i + 1, Leaf(token.COMMA, ","))
+                        new_comma = Leaf(token.COMMA, ",")
+                        new_comma.was_checked = True
+                        leaves.insert(i + 1, new_comma)
                     break
 
     # Populate the line
@@ -4861,8 +4848,8 @@ def bracket_split_build_line(
         result.append(leaf, preformatted=True)
         for comment_after in original.comments_after(leaf):
             result.append(comment_after, preformatted=True)
-    if is_body:
-        result.should_explode = should_explode(result, opening_bracket)
+    if is_body and should_split_body_explode(result, opening_bracket):
+        result.should_explode = True
     return result
 
 
@@ -4947,7 +4934,9 @@ def delimiter_split(line: Line, features: Collection[Feature] = ()) -> Iterator[
             and current_line.leaves[-1].type != token.COMMA
             and current_line.leaves[-1].type != STANDALONE_COMMENT
         ):
-            current_line.append(Leaf(token.COMMA, ","))
+            new_comma = Leaf(token.COMMA, ",")
+            new_comma.was_checked = True
+            current_line.append(new_comma)
         yield current_line
 
 
@@ -5569,24 +5558,60 @@ def ensure_visible(leaf: Leaf) -> None:
         leaf.value = ")"
 
 
-def should_explode(line: Line, opening_bracket: Leaf) -> bool:
+def should_split_body_explode(line: Line, opening_bracket: Leaf) -> bool:
     """Should `line` immediately be split with `delimiter_split()` after RHS?"""
 
-    if not (
-        opening_bracket.parent
-        and opening_bracket.parent.type in {syms.atom, syms.import_from}
-        and opening_bracket.value in "[{("
-    ):
+    if not (opening_bracket.parent and opening_bracket.value in "[{("):
         return False
 
+    # We're essentially checking if the body is delimited by commas and there's more
+    # than one of them (we're excluding the trailing comma and if the delimiter priority
+    # is still commas, that means there's more).
+    exclude = set()
+    pre_existing_trailing_comma = False
     try:
         last_leaf = line.leaves[-1]
-        exclude = {id(last_leaf)} if last_leaf.type == token.COMMA else set()
+        if last_leaf.type == token.COMMA:
+            pre_existing_trailing_comma = not last_leaf.was_checked
+            exclude.add(id(last_leaf))
         max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude)
     except (IndexError, ValueError):
         return False
 
-    return max_priority == COMMA_PRIORITY
+    return max_priority == COMMA_PRIORITY and (
+        # always explode imports
+        opening_bracket.parent.type in {syms.atom, syms.import_from}
+        or pre_existing_trailing_comma
+    )
+
+
+def is_one_tuple_between(opening: Leaf, closing: Leaf, leaves: List[Leaf]) -> bool:
+    """Return True if content between `opening` and `closing` looks like a one-tuple."""
+    depth = closing.bracket_depth + 1
+    for _opening_index, leaf in enumerate(leaves):
+        if leaf is opening:
+            break
+
+    else:
+        raise LookupError("Opening paren not found in `leaves`")
+
+    commas = 0
+    _opening_index += 1
+    for leaf in leaves[_opening_index:]:
+        if leaf is closing:
+            break
+
+        bracket_depth = leaf.bracket_depth
+        if bracket_depth == depth and leaf.type == token.COMMA:
+            commas += 1
+            if leaf.parent and leaf.parent.type in {
+                syms.arglist,
+                syms.typedargslist,
+            }:
+                commas += 1
+                break
+
+    return commas < 2
 
 
 def get_features_used(node: Node) -> Set[Feature]:
@@ -5757,16 +5782,40 @@ def get_gitignore(root: Path) -> PathSpec:
     return PathSpec.from_lines("gitwildmatch", lines)
 
 
+def normalize_path_maybe_ignore(
+    path: Path, root: Path, report: "Report"
+) -> Optional[str]:
+    """Normalize `path`. May return `None` if `path` was ignored.
+
+    `report` is where "path ignored" output goes.
+    """
+    try:
+        normalized_path = path.resolve().relative_to(root).as_posix()
+    except OSError as e:
+        report.path_ignored(path, f"cannot be read because {e}")
+        return None
+
+    except ValueError:
+        if path.is_symlink():
+            report.path_ignored(path, f"is a symbolic link that points outside {root}")
+            return None
+
+        raise
+
+    return normalized_path
+
+
 def gen_python_files(
     paths: Iterable[Path],
     root: Path,
     include: Optional[Pattern[str]],
-    exclude_regexes: Iterable[Pattern[str]],
+    exclude: Pattern[str],
+    force_exclude: Optional[Pattern[str]],
     report: "Report",
     gitignore: PathSpec,
 ) -> Iterator[Path]:
     """Generate all files under `path` whose paths are not excluded by the
-    `exclude` regex, but are included by the `include` regex.
+    `exclude_regex` or `force_exclude` regexes, but are included by the `include` regex.
 
     Symbolic links pointing outside of the `root` directory are ignored.
 
@@ -5774,43 +5823,41 @@ def gen_python_files(
     """
     assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
     for child in paths:
-        # Then ignore with `exclude` option.
-        try:
-            normalized_path = child.resolve().relative_to(root).as_posix()
-        except OSError as e:
-            report.path_ignored(child, f"cannot be read because {e}")
+        normalized_path = normalize_path_maybe_ignore(child, root, report)
+        if normalized_path is None:
             continue
-        except ValueError:
-            if child.is_symlink():
-                report.path_ignored(
-                    child, f"is a symbolic link that points outside {root}"
-                )
-                continue
-
-            raise
 
         # First ignore files matching .gitignore
         if gitignore.match_file(normalized_path):
             report.path_ignored(child, "matches the .gitignore file content")
             continue
 
+        # Then ignore with `--exclude` and `--force-exclude` options.
         normalized_path = "/" + normalized_path
         if child.is_dir():
             normalized_path += "/"
 
-        is_excluded = False
-        for exclude in exclude_regexes:
-            exclude_match = exclude.search(normalized_path) if exclude else None
-            if exclude_match and exclude_match.group(0):
-                report.path_ignored(child, "matches the --exclude regular expression")
-                is_excluded = True
-                break
-        if is_excluded:
+        exclude_match = exclude.search(normalized_path) if exclude else None
+        if exclude_match and exclude_match.group(0):
+            report.path_ignored(child, "matches the --exclude regular expression")
+            continue
+
+        force_exclude_match = (
+            force_exclude.search(normalized_path) if force_exclude else None
+        )
+        if force_exclude_match and force_exclude_match.group(0):
+            report.path_ignored(child, "matches the --force-exclude regular expression")
             continue
 
         if child.is_dir():
             yield from gen_python_files(
-                child.iterdir(), root, include, exclude_regexes, report, gitignore
+                child.iterdir(),
+                root,
+                include,
+                exclude,
+                force_exclude,
+                report,
+                gitignore,
             )
 
         elif child.is_file():
@@ -6030,7 +6077,7 @@ def _stringify_ast(
                 and field == "value"
                 and isinstance(value, str)
             ):
-                normalized = re.sub(r" *\n[ \t]+", "\n ", value).strip()
+                normalized = re.sub(r" *\n[ \t]*", "\n", value).strip()
             else:
                 normalized = value
             yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"