Update FAQ: Mention formatting of custom jupyter cell magic (#2982)

[etc/vim.git] / src / black / linegen.py
diff --git a/src/black/linegen.py b/src/black/linegen.py

index 7949654b40e5a4dee405c1182816a5f4363f0f87..2cf9cf3130a998273c0de6055775a3f5e13dc627 100644 (file)
--- a/src/black/linegen.py
+++ b/src/black/linegen.py
@@ -5,12 +5,16 @@ from functools import partial, wraps
  import sys
  from typing import Collection, Iterator, List, Optional, Set, Union
  
-from dataclasses import dataclass, field
-
-from black.nodes import WHITESPACE, STATEMENT, STANDALONE_COMMENT
+from black.nodes import WHITESPACE, RARROW, STATEMENT, STANDALONE_COMMENT
  from black.nodes import ASSIGNMENTS, OPENING_BRACKETS, CLOSING_BRACKETS
-from black.nodes import Visitor, syms, first_child_is_arith, ensure_visible
-from black.nodes import is_docstring, is_empty_tuple, is_one_tuple, is_one_tuple_between
+from black.nodes import Visitor, syms, is_arith_like, ensure_visible
+from black.nodes import (
+    is_docstring,
+    is_empty_tuple,
+    is_one_tuple,
+    is_one_sequence_between,
+)
+from black.nodes import is_name_token, is_lpar_token, is_rpar_token
  from black.nodes import is_walrus_assignment, is_yield, is_vararg, is_multiline_string
  from black.nodes import is_stub_suite, is_stub_body, is_atom_with_invisible_parens
  from black.nodes import wrap_in_parentheses
@@ -22,10 +26,9 @@ from black.comments import generate_comments, list_comments, FMT_OFF
  from black.numerics import normalize_numeric_literal
  from black.strings import get_string_prefix, fix_docstring
  from black.strings import normalize_string_prefix, normalize_string_quotes
-from black.trans import Transformer, CannotTransform, StringMerger
-from black.trans import StringSplitter, StringParenWrapper, StringParenStripper
-from black.mode import Mode
-from black.mode import Feature
+from black.trans import Transformer, CannotTransform, StringMerger, StringSplitter
+from black.trans import StringParenWrapper, StringParenStripper, hug_power_op
+from black.mode import Mode, Feature, Preview
  
  from blib2to3.pytree import Node, Leaf
  from blib2to3.pgen2 import token
@@ -40,7 +43,8 @@ class CannotSplit(CannotTransform):
      """A readable split that fits the allotted line length is impossible."""
  
  
-@dataclass
+# This isn't a dataclass because @dataclass + Generic breaks mypyc.
+# See also https://github.com/mypyc/mypyc/issues/827.
  class LineGenerator(Visitor[Line]):
      """Generates reformatted Line objects.  Empty lines are not emitted.
  
@@ -48,9 +52,10 @@ class LineGenerator(Visitor[Line]):
      in ways that will no longer stringify to valid Python code on the tree.
      """
  
-    mode: Mode
-    remove_u_prefix: bool = False
-    current_line: Line = field(init=False)
+    def __init__(self, mode: Mode) -> None:
+        self.mode = mode
+        self.current_line: Line
+        self.__post_init__()
  
      def line(self, indent: int = 0) -> Iterator[Line]:
          """Generate a line.
@@ -72,7 +77,7 @@ class LineGenerator(Visitor[Line]):
          """Default `visit_*()` implementation. Recurses to children of `node`."""
          if isinstance(node, Leaf):
              any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
-            for comment in generate_comments(node):
+            for comment in generate_comments(node, preview=self.mode.preview):
                  if any_open_brackets:
                      # any comment within brackets is subject to splitting
                      self.current_line.append(comment)
@@ -90,9 +95,7 @@ class LineGenerator(Visitor[Line]):
  
              normalize_prefix(node, inside_brackets=any_open_brackets)
              if self.mode.string_normalization and node.type == token.STRING:
-                node.value = normalize_string_prefix(
-                    node.value, remove_u_prefix=self.remove_u_prefix
-                )
+                node.value = normalize_string_prefix(node.value)
                  node.value = normalize_string_quotes(node.value)
              if node.type == token.NUMBER:
                  normalize_numeric_literal(node)
@@ -126,7 +129,7 @@ class LineGenerator(Visitor[Line]):
          """Visit a statement.
  
          This implementation is shared for `if`, `while`, `for`, `try`, `except`,
-        `def`, `with`, `class`, `assert` and assignments.
+        `def`, `with`, `class`, `assert`, and assignments.
  
          The relevant Python language `keywords` for a given statement will be
          NAME leaves within it. This methods puts those on a separate line.
@@ -134,13 +137,21 @@ class LineGenerator(Visitor[Line]):
          `parens` holds a set of string leaf values immediately after which
          invisible parens should be put.
          """
-        normalize_invisible_parens(node, parens_after=parens)
+        normalize_invisible_parens(node, parens_after=parens, preview=self.mode.preview)
          for child in node.children:
-            if child.type == token.NAME and child.value in keywords:  # type: ignore
+            if is_name_token(child) and child.value in keywords:
                  yield from self.line()
  
              yield from self.visit(child)
  
+    def visit_match_case(self, node: Node) -> Iterator[Line]:
+        """Visit either a match or case statement."""
+        normalize_invisible_parens(node, parens_after=set(), preview=self.mode.preview)
+
+        yield from self.line()
+        for child in node.children:
+            yield from self.visit(child)
+
      def visit_suite(self, node: Node) -> Iterator[Line]:
          """Visit a suite."""
          if self.mode.is_pyi and is_stub_suite(node):
@@ -150,8 +161,12 @@ class LineGenerator(Visitor[Line]):
  
      def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
          """Visit a statement without nested statements."""
-        if first_child_is_arith(node):
-            wrap_in_parentheses(node, node.children[0], visible=False)
+        prev_type: Optional[int] = None
+        for child in node.children:
+            if (prev_type is None or prev_type == token.SEMI) and is_arith_like(child):
+                wrap_in_parentheses(node, child, visible=False)
+            prev_type = child.type
+
          is_suite_like = node.parent and node.parent.type in STATEMENT
          if is_suite_like:
              if self.mode.is_pyi and is_stub_body(node):
@@ -191,6 +206,28 @@ class LineGenerator(Visitor[Line]):
              yield from self.line()
              yield from self.visit(child)
  
+    def visit_power(self, node: Node) -> Iterator[Line]:
+        for idx, leaf in enumerate(node.children[:-1]):
+            next_leaf = node.children[idx + 1]
+
+            if not isinstance(leaf, Leaf):
+                continue
+
+            value = leaf.value.lower()
+            if (
+                leaf.type == token.NUMBER
+                and next_leaf.type == syms.trailer
+                # Ensure that we are in an attribute trailer
+                and next_leaf.children[0].type == token.DOT
+                # It shouldn't wrap hexadecimal, binary and octal literals
+                and not value.startswith(("0x", "0b", "0o"))
+                # It shouldn't wrap complex literals
+                and "j" not in value
+            ):
+                wrap_in_parentheses(node, leaf)
+
+        yield from self.visit_default(node)
+
      def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
          """Remove a semicolon and put the other statement on a separate line."""
          yield from self.line()
@@ -226,8 +263,9 @@ class LineGenerator(Visitor[Line]):
          if is_docstring(leaf) and "\\\n" not in leaf.value:
              # We're ignoring docstrings with backslash newline escapes because changing
              # indentation of those changes the AST representation of the code.
-            prefix = get_string_prefix(leaf.value)
-            docstring = leaf.value[len(prefix) :]  # Remove the prefix
+            docstring = normalize_string_prefix(leaf.value)
+            prefix = get_string_prefix(docstring)
+            docstring = docstring[len(prefix) :]  # Remove the prefix
              quote_char = docstring[0]
              # A natural way to remove the outer quotes is to do:
              #   docstring = docstring.strip(quote_char)
@@ -280,8 +318,14 @@ class LineGenerator(Visitor[Line]):
          self.visit_try_stmt = partial(
              v, keywords={"try", "except", "else", "finally"}, parens=Ø
          )
-        self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
-        self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
+        if self.mode.preview:
+            self.visit_except_clause = partial(
+                v, keywords={"except"}, parens={"except"}
+            )
+            self.visit_with_stmt = partial(v, keywords={"with"}, parens={"with"})
+        else:
+            self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
+            self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
          self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
          self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
          self.visit_expr_stmt = partial(v, keywords=Ø, parens=ASSIGNMENTS)
@@ -291,6 +335,10 @@ class LineGenerator(Visitor[Line]):
          self.visit_async_funcdef = self.visit_async_stmt
          self.visit_decorated = self.visit_decorators
  
+        # PEP 634
+        self.visit_match_stmt = self.visit_match_case
+        self.visit_case_block = self.visit_match_case
+
  
  def transform_line(
      line: Line, mode: Mode, features: Collection[Feature] = ()
@@ -326,7 +374,7 @@ def transform_line(
          and not (line.inside_brackets and line.contains_standalone_comments())
      ):
          # Only apply basic string preprocessing, since lines shouldn't be split here.
-        if mode.experimental_string_processing:
+        if Preview.string_processing in mode:
              transformers = [string_merge, string_paren_strip]
          else:
              transformers = []
@@ -334,7 +382,9 @@ def transform_line(
          transformers = [left_hand_split]
      else:
  
-        def rhs(line: Line, features: Collection[Feature]) -> Iterator[Line]:
+        def _rhs(
+            self: object, line: Line, features: Collection[Feature]
+        ) -> Iterator[Line]:
              """Wraps calls to `right_hand_split`.
  
              The calls increasingly `omit` right-hand trailers (bracket pairs with
@@ -361,7 +411,13 @@ def transform_line(
                  line, line_length=mode.line_length, features=features
              )
  
-        if mode.experimental_string_processing:
+        # HACK: nested functions (like _rhs) compiled by mypyc don't retain their
+        # __name__ attribute which is needed in `run_transformer` further down.
+        # Unfortunately a nested class breaks mypyc too. So a class must be created
+        # via type ... https://github.com/mypyc/mypyc/issues/884
+        rhs = type("rhs", (), {"__call__": _rhs})()
+
+        if Preview.string_processing in mode:
              if line.inside_brackets:
                  transformers = [
                      string_merge,
@@ -385,6 +441,9 @@ def transform_line(
                  transformers = [delimiter_split, standalone_comment_split, rhs]
              else:
                  transformers = [rhs]
+    # It's always safe to attempt hugging of power operations and pretty much every line
+    # could match.
+    transformers.append(hug_power_op)
  
      for transform in transformers:
          # We are accumulating lines in `result` because we might want to abort
@@ -495,21 +554,21 @@ def right_hand_split(
          # there are no standalone comments in the body
          and not body.contains_standalone_comments(0)
          # and we can actually remove the parens
-        and can_omit_invisible_parens(body, line_length, omit_on_explode=omit)
+        and can_omit_invisible_parens(body, line_length)
      ):
          omit = {id(closing_bracket), *omit}
          try:
              yield from right_hand_split(line, line_length, features=features, omit=omit)
              return
  
-        except CannotSplit:
+        except CannotSplit as e:
              if not (
                  can_be_split(body)
                  or is_line_short_enough(body, line_length=line_length)
              ):
                  raise CannotSplit(
                      "Splitting failed, body is still too long and can't be split."
-                )
+                ) from e
  
              elif head.contains_multiline_strings() or tail.contains_multiline_strings():
                  raise CannotSplit(
@@ -517,7 +576,7 @@ def right_hand_split(
                      " satisfy the splitting algorithm because the head or the tail"
                      " contains multiline strings which by definition never fit one"
                      " line."
-                )
+                ) from e
  
      ensure_visible(opening_bracket)
      ensure_visible(closing_bracket)
@@ -573,6 +632,20 @@ def bracket_split_build_line(
                  original.is_def
                  and opening_bracket.value == "("
                  and not any(leaf.type == token.COMMA for leaf in leaves)
+                # In particular, don't add one within a parenthesized return annotation.
+                # Unfortunately the indicator we're in a return annotation (RARROW) may
+                # be defined directly in the parent node, the parent of the parent ...
+                # and so on depending on how complex the return annotation is.
+                # This isn't perfect and there's some false negatives but they are in
+                # contexts were a comma is actually fine.
+                and not any(
+                    node.prev_sibling.type == RARROW
+                    for node in (
+                        leaves[0].parent,
+                        getattr(leaves[0].parent, "parent", None),
+                    )
+                    if isinstance(node, Node) and isinstance(node.prev_sibling, Leaf)
+                )
              )
  
              if original.is_import or no_commas:
@@ -603,9 +676,9 @@ def dont_increase_indentation(split_func: Transformer) -> Transformer:
  
      @wraps(split_func)
      def split_wrapper(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]:
-        for line in split_func(line, features):
-            normalize_prefix(line.leaves[0], inside_brackets=True)
-            yield line
+        for split_line in split_func(line, features):
+            normalize_prefix(split_line.leaves[0], inside_brackets=True)
+            yield split_line
  
      return split_wrapper
  
@@ -620,13 +693,13 @@ def delimiter_split(line: Line, features: Collection[Feature] = ()) -> Iterator[
      try:
          last_leaf = line.leaves[-1]
      except IndexError:
-        raise CannotSplit("Line empty")
+        raise CannotSplit("Line empty") from None
  
      bt = line.bracket_tracker
      try:
          delimiter_priority = bt.max_delimiter_priority(exclude={id(last_leaf)})
      except ValueError:
-        raise CannotSplit("No delimiters found")
+        raise CannotSplit("No delimiters found") from None
  
      if delimiter_priority == DOT_PRIORITY:
          if bt.delimiter_count_with_priority(delimiter_priority) == 1:
@@ -740,7 +813,9 @@ def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
      leaf.prefix = ""
  
  
-def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
+def normalize_invisible_parens(
+    node: Node, parens_after: Set[str], *, preview: bool
+) -> None:
      """Make existing optional parentheses invisible or create new ones.
  
      `parens_after` is a set of string leaf values immediately after which parens
@@ -749,7 +824,7 @@ def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
      Standardizes on visible parentheses for single-element tuples, and keeps
      existing visible parentheses for other tuples and generator expressions.
      """
-    for pc in list_comments(node.prefix, is_endmarker=False):
+    for pc in list_comments(node.prefix, is_endmarker=False, preview=preview):
          if pc.value in FMT_OFF:
              # This `node` has a prefix with `# fmt: off`, don't mess with parens.
              return
@@ -758,7 +833,9 @@ def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
          # Fixes a bug where invisible parens are not properly stripped from
          # assignment statements that contain type annotations.
          if isinstance(child, Node) and child.type == syms.annassign:
-            normalize_invisible_parens(child, parens_after=parens_after)
+            normalize_invisible_parens(
+                child, parens_after=parens_after, preview=preview
+            )
  
          # Add parentheses around long tuple unpacking in assignments.
          if (
@@ -769,18 +846,38 @@ def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
              check_lpar = True
  
          if check_lpar:
-            if child.type == syms.atom:
-                if maybe_make_parens_invisible_in_atom(child, parent=node):
+            if (
+                preview
+                and child.type == syms.atom
+                and node.type == syms.for_stmt
+                and isinstance(child.prev_sibling, Leaf)
+                and child.prev_sibling.type == token.NAME
+                and child.prev_sibling.value == "for"
+            ):
+                if maybe_make_parens_invisible_in_atom(
+                    child,
+                    parent=node,
+                    remove_brackets_around_comma=True,
+                ):
+                    wrap_in_parentheses(node, child, visible=False)
+            elif preview and isinstance(child, Node) and node.type == syms.with_stmt:
+                remove_with_parens(child, node)
+            elif child.type == syms.atom:
+                if maybe_make_parens_invisible_in_atom(
+                    child,
+                    parent=node,
+                ):
                      wrap_in_parentheses(node, child, visible=False)
              elif is_one_tuple(child):
                  wrap_in_parentheses(node, child, visible=True)
              elif node.type == syms.import_from:
                  # "import from" nodes store parentheses directly as part of
                  # the statement
-                if child.type == token.LPAR:
+                if is_lpar_token(child):
+                    assert is_rpar_token(node.children[-1])
                      # make parentheses invisible
-                    child.value = ""  # type: ignore
-                    node.children[-1].value = ""  # type: ignore
+                    child.value = ""
+                    node.children[-1].value = ""
                  elif child.type != token.STAR:
                      # insert invisible parentheses
                      node.insert_child(index, Leaf(token.LPAR, ""))
@@ -790,24 +887,78 @@ def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
              elif not (isinstance(child, Leaf) and is_multiline_string(child)):
                  wrap_in_parentheses(node, child, visible=False)
  
-        check_lpar = isinstance(child, Leaf) and child.value in parens_after
+        comma_check = child.type == token.COMMA if preview else False
+
+        check_lpar = isinstance(child, Leaf) and (
+            child.value in parens_after or comma_check
+        )
  
  
-def maybe_make_parens_invisible_in_atom(node: LN, parent: LN) -> bool:
+def remove_with_parens(node: Node, parent: Node) -> None:
+    """Recursively hide optional parens in `with` statements."""
+    # Removing all unnecessary parentheses in with statements in one pass is a tad
+    # complex as different variations of bracketed statements result in pretty
+    # different parse trees:
+    #
+    # with (open("file")) as f:                       # this is an asexpr_test
+    #     ...
+    #
+    # with (open("file") as f):                       # this is an atom containing an
+    #     ...                                         # asexpr_test
+    #
+    # with (open("file")) as f, (open("file")) as f:  # this is asexpr_test, COMMA,
+    #     ...                                         # asexpr_test
+    #
+    # with (open("file") as f, open("file") as f):    # an atom containing a
+    #     ...                                         # testlist_gexp which then
+    #                                                 # contains multiple asexpr_test(s)
+    if node.type == syms.atom:
+        if maybe_make_parens_invisible_in_atom(
+            node,
+            parent=parent,
+            remove_brackets_around_comma=True,
+        ):
+            wrap_in_parentheses(parent, node, visible=False)
+        if isinstance(node.children[1], Node):
+            remove_with_parens(node.children[1], node)
+    elif node.type == syms.testlist_gexp:
+        for child in node.children:
+            if isinstance(child, Node):
+                remove_with_parens(child, node)
+    elif node.type == syms.asexpr_test and not any(
+        leaf.type == token.COLONEQUAL for leaf in node.leaves()
+    ):
+        if maybe_make_parens_invisible_in_atom(
+            node.children[0],
+            parent=node,
+            remove_brackets_around_comma=True,
+        ):
+            wrap_in_parentheses(node, node.children[0], visible=False)
+
+
+def maybe_make_parens_invisible_in_atom(
+    node: LN,
+    parent: LN,
+    remove_brackets_around_comma: bool = False,
+) -> bool:
      """If it's safe, make the parens in the atom `node` invisible, recursively.
      Additionally, remove repeated, adjacent invisible parens from the atom `node`
      as they are redundant.
  
      Returns whether the node should itself be wrapped in invisible parentheses.
-
      """
-
      if (
          node.type != syms.atom
          or is_empty_tuple(node)
          or is_one_tuple(node)
          or (is_yield(node) and parent.type != syms.expr_stmt)
-        or max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
+        or (
+            # This condition tries to prevent removing non-optional brackets
+            # around a tuple, however, can be a bit overzealous so we provide
+            # and option to skip this check for `for` and `with` statements.
+            not remove_brackets_around_comma
+            and max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
+        )
      ):
          return False
  
@@ -825,12 +976,16 @@ def maybe_make_parens_invisible_in_atom(node: LN, parent: LN) -> bool:
  
      first = node.children[0]
      last = node.children[-1]
-    if first.type == token.LPAR and last.type == token.RPAR:
+    if is_lpar_token(first) and is_rpar_token(last):
          middle = node.children[1]
          # make parentheses invisible
-        first.value = ""  # type: ignore
-        last.value = ""  # type: ignore
-        maybe_make_parens_invisible_in_atom(middle, parent=parent)
+        first.value = ""
+        last.value = ""
+        maybe_make_parens_invisible_in_atom(
+            middle,
+            parent=parent,
+            remove_brackets_around_comma=remove_brackets_around_comma,
+        )
  
          if is_atom_with_invisible_parens(middle):
              # Strip the invisible parens from `middle` by replacing
@@ -905,7 +1060,8 @@ def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[Leaf
                  if (
                      prev
                      and prev.type == token.COMMA
-                    and not is_one_tuple_between(
+                    and leaf.opening_bracket is not None
+                    and not is_one_sequence_between(
                          leaf.opening_bracket, leaf, line.leaves
                      )
                  ):
@@ -932,7 +1088,8 @@ def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[Leaf
              if (
                  prev
                  and prev.type == token.COMMA
-                and not is_one_tuple_between(leaf.opening_bracket, leaf, line.leaves)
+                and leaf.opening_bracket is not None
+                and not is_one_sequence_between(leaf.opening_bracket, leaf, line.leaves)
              ):
                  # Never omit bracket pairs with trailing commas.
                  # We need to explode on those.
@@ -960,14 +1117,19 @@ def run_transformer(
  
          result.extend(transform_line(transformed_line, mode=mode, features=features))
  
-    if not (
-        transform.__name__ == "rhs"
-        and line.bracket_tracker.invisible
-        and not any(bracket.value for bracket in line.bracket_tracker.invisible)
-        and not line.contains_multiline_strings()
-        and not result[0].contains_uncollapsable_type_comments()
-        and not result[0].contains_unsplittable_type_ignore()
-        and not is_line_short_enough(result[0], line_length=mode.line_length)
+    if (
+        transform.__class__.__name__ != "rhs"
+        or not line.bracket_tracker.invisible
+        or any(bracket.value for bracket in line.bracket_tracker.invisible)
+        or line.contains_multiline_strings()
+        or result[0].contains_uncollapsable_type_comments()
+        or result[0].contains_unsplittable_type_ignore()
+        or is_line_short_enough(result[0], line_length=mode.line_length)
+        # If any leaves have no parents (which _can_ occur since
+        # `transform(line)` potentially destroys the line's underlying node
+        # structure), then we can't proceed. Doing so would cause the below
+        # call to `append_leaves()` to fail.
+        or any(leaf.parent is None for leaf in line.leaves)
      ):
          return result