As long as it's black (#1893)

[etc/vim.git] / src / black / __init__.py
diff --git a/src/black/__init__.py b/src/black/__init__.py

index eed059e28379aa56dc6717bdbca75468443b9ad7..c7c5d724a9fd0d9744c7de45f25a0b33acbd250a 100644 (file)
--- a/src/black/__init__.py
+++ b/src/black/__init__.py
@@ -68,6 +68,7 @@ DEFAULT_LINE_LENGTH = 88
  DEFAULT_EXCLUDES = r"/(\.direnv|\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist)/"  # noqa: B950
  DEFAULT_INCLUDES = r"\.pyi?$"
  CACHE_DIR = Path(user_cache_dir("black", version=__version__))
+STDIN_PLACEHOLDER = "__BLACK_STDIN_FILENAME__"
  
  STRING_PREFIX_CHARS: Final = "furbFURB"  # All possible string prefix characters.
  
@@ -112,6 +113,10 @@ class InvalidInput(ValueError):
      """Raised when input source code fails all parse attempts."""
  
  
+class BracketMatchError(KeyError):
+    """Raised when an opening bracket is unable to be matched to a closing bracket."""
+
+
  T = TypeVar("T")
  E = TypeVar("E", bound=Exception)
  
@@ -174,14 +179,12 @@ class TargetVersion(Enum):
      PY36 = 6
      PY37 = 7
      PY38 = 8
+    PY39 = 9
  
      def is_python2(self) -> bool:
          return self is TargetVersion.PY27
  
  
-PY36_VERSIONS = {TargetVersion.PY36, TargetVersion.PY37, TargetVersion.PY38}
-
-
  class Feature(Enum):
      # All string literals are unicode
      UNICODE_LITERALS = 1
@@ -195,6 +198,7 @@ class Feature(Enum):
      ASYNC_KEYWORDS = 7
      ASSIGNMENT_EXPRESSIONS = 8
      POS_ONLY_ARGUMENTS = 9
+    RELAXED_DECORATORS = 10
      FORCE_OPTIONAL_PARENTHESES = 50
  
  
@@ -233,6 +237,17 @@ VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = {
          Feature.ASSIGNMENT_EXPRESSIONS,
          Feature.POS_ONLY_ARGUMENTS,
      },
+    TargetVersion.PY39: {
+        Feature.UNICODE_LITERALS,
+        Feature.F_STRINGS,
+        Feature.NUMERIC_UNDERSCORES,
+        Feature.TRAILING_COMMA_IN_CALL,
+        Feature.TRAILING_COMMA_IN_DEF,
+        Feature.ASYNC_KEYWORDS,
+        Feature.ASSIGNMENT_EXPRESSIONS,
+        Feature.RELAXED_DECORATORS,
+        Feature.POS_ONLY_ARGUMENTS,
+    },
  }
  
  
@@ -440,7 +455,16 @@ def target_version_option_callback(
      type=str,
      help=(
          "Like --exclude, but files and directories matching this regex will be "
-        "excluded even when they are passed explicitly as arguments"
+        "excluded even when they are passed explicitly as arguments."
+    ),
+)
+@click.option(
+    "--stdin-filename",
+    type=str,
+    help=(
+        "The name of the file when passing it through stdin. Useful to make "
+        "sure Black will respect --force-exclude option on some "
+        "editors that rely on using stdin."
      ),
  )
  @click.option(
@@ -502,6 +526,7 @@ def main(
      include: str,
      exclude: str,
      force_exclude: Optional[str],
+    stdin_filename: Optional[str],
      src: Tuple[str, ...],
      config: Optional[str],
  ) -> None:
@@ -534,6 +559,7 @@ def main(
          exclude=exclude,
          force_exclude=force_exclude,
          report=report,
+        stdin_filename=stdin_filename,
      )
  
      path_empty(
@@ -573,6 +599,7 @@ def get_sources(
      exclude: str,
      force_exclude: Optional[str],
      report: "Report",
+    stdin_filename: Optional[str],
  ) -> Set[Path]:
      """Compute the set of files to be formatted."""
      try:
@@ -599,22 +626,14 @@ def get_sources(
      gitignore = get_gitignore(root)
  
      for s in src:
-        p = Path(s)
-        if p.is_dir():
-            sources.update(
-                gen_python_files(
-                    p.iterdir(),
-                    root,
-                    include_regex,
-                    exclude_regex,
-                    force_exclude_regex,
-                    report,
-                    gitignore,
-                )
-            )
-        elif s == "-":
-            sources.add(p)
-        elif p.is_file():
+        if s == "-" and stdin_filename:
+            p = Path(stdin_filename)
+            is_stdin = True
+        else:
+            p = Path(s)
+            is_stdin = False
+
+        if is_stdin or p.is_file():
              normalized_path = normalize_path_maybe_ignore(p, root, report)
              if normalized_path is None:
                  continue
@@ -629,6 +648,23 @@ def get_sources(
                  report.path_ignored(p, "matches the --force-exclude regular expression")
                  continue
  
+            if is_stdin:
+                p = Path(f"{STDIN_PLACEHOLDER}{str(p)}")
+
+            sources.add(p)
+        elif p.is_dir():
+            sources.update(
+                gen_python_files(
+                    p.iterdir(),
+                    root,
+                    include_regex,
+                    exclude_regex,
+                    force_exclude_regex,
+                    report,
+                    gitignore,
+                )
+            )
+        elif s == "-":
              sources.add(p)
          else:
              err(f"invalid path: {s}")
@@ -656,7 +692,18 @@ def reformat_one(
      """
      try:
          changed = Changed.NO
-        if not src.is_file() and str(src) == "-":
+
+        if str(src) == "-":
+            is_stdin = True
+        elif str(src).startswith(STDIN_PLACEHOLDER):
+            is_stdin = True
+            # Use the original name again in case we want to print something
+            # to the user
+            src = Path(str(src)[len(STDIN_PLACEHOLDER) :])
+        else:
+            is_stdin = False
+
+        if is_stdin:
              if format_stdin_to_stdout(fast=fast, write_back=write_back, mode=mode):
                  changed = Changed.YES
          else:
@@ -845,9 +892,9 @@ def color_diff(contents: str) -> str:
      for i, line in enumerate(lines):
          if line.startswith("+++") or line.startswith("---"):
              line = "\033[1;37m" + line + "\033[0m"  # bold white, reset
-        if line.startswith("@@"):
+        elif line.startswith("@@"):
              line = "\033[36m" + line + "\033[0m"  # cyan, reset
-        if line.startswith("+"):
+        elif line.startswith("+"):
              line = "\033[32m" + line + "\033[0m"  # green, reset
          elif line.startswith("-"):
              line = "\033[31m" + line + "\033[0m"  # red, reset
@@ -857,30 +904,22 @@ def color_diff(contents: str) -> str:
  
  def wrap_stream_for_windows(
      f: io.TextIOWrapper,
-) -> Union[io.TextIOWrapper, "colorama.AnsiToWin32.AnsiToWin32"]:
+) -> Union[io.TextIOWrapper, "colorama.AnsiToWin32"]:
      """
-    Wrap the stream in colorama's wrap_stream so colors are shown on Windows.
+    Wrap stream with colorama's wrap_stream so colors are shown on Windows.
  
-    If `colorama` is not found, then no change is made. If `colorama` does
-    exist, then it handles the logic to determine whether or not to change
-    things.
+    If `colorama` is unavailable, the original stream is returned unmodified.
+    Otherwise, the `wrap_stream()` function determines whether the stream needs
+    to be wrapped for a Windows environment and will accordingly either return
+    an `AnsiToWin32` wrapper or the original stream.
      """
      try:
-        from colorama import initialise
-
-        # We set `strip=False` so that we can don't have to modify
-        # test_express_diff_with_color.
-        f = initialise.wrap_stream(
-            f, convert=None, strip=False, autoreset=False, wrap=True
-        )
-
-        # wrap_stream returns a `colorama.AnsiToWin32.AnsiToWin32` object
-        # which does not have a `detach()` method. So we fake one.
-        f.detach = lambda *args, **kwargs: None  # type: ignore
+        from colorama.initialise import wrap_stream
      except ImportError:
-        pass
-
-    return f
+        return f
+    else:
+        # Set `strip=False` to avoid needing to modify test_express_diff_with_color.
+        return wrap_stream(f, convert=None, strip=False, autoreset=False, wrap=True)
  
  
  def format_stdin_to_stdout(
@@ -921,7 +960,7 @@ def format_stdin_to_stdout(
  
  
  def format_file_contents(src_contents: str, *, fast: bool, mode: Mode) -> FileContent:
-    """Reformat contents a file and return new contents.
+    """Reformat contents of a file and return new contents.
  
      If `fast` is False, additionally confirm that the reformatted code is
      valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
@@ -947,7 +986,7 @@ def format_str(src_contents: str, *, mode: Mode) -> FileContent:
      allowed.  Example:
  
      >>> import black
-    >>> print(black.format_str("def f(arg:str='')->None:...", mode=Mode()))
+    >>> print(black.format_str("def f(arg:str='')->None:...", mode=black.Mode()))
      def f(arg: str = "") -> None:
          ...
  
@@ -1308,7 +1347,13 @@ class BracketTracker:
          self.maybe_decrement_after_lambda_arguments(leaf)
          if leaf.type in CLOSING_BRACKETS:
              self.depth -= 1
-            opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
+            try:
+                opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
+            except KeyError as e:
+                raise BracketMatchError(
+                    "Unable to match a closing bracket to the following opening"
+                    f" bracket: {leaf}"
+                ) from e
              leaf.opening_bracket = opening_bracket
              if not leaf.value:
                  self.invisible.append(leaf)
@@ -1824,6 +1869,10 @@ class EmptyLineTracker:
              return 0, 0
  
          if self.previous_line.is_decorator:
+            if self.is_pyi and current_line.is_stub_class:
+                # Insert an empty line after a decorated stub class
+                return 0, 1
+
              return 0, 0
  
          if self.previous_line.depth < current_line.depth and (
@@ -1847,8 +1896,11 @@ class EmptyLineTracker:
                      newlines = 0
                  else:
                      newlines = 1
-            elif current_line.is_def and not self.previous_line.is_def:
-                # Blank line between a block of functions and a block of non-functions
+            elif (
+                current_line.is_def or current_line.is_decorator
+            ) and not self.previous_line.is_def:
+                # Blank line between a block of functions (maybe with preceding
+                # decorators) and a block of non-functions
                  newlines = 1
              else:
                  newlines = 0
@@ -2167,6 +2219,9 @@ def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str:  # noqa: C901
          ):
              # Python 2 print chevron
              return NO
+        elif prevp.type == token.AT and p.parent and p.parent.type == syms.decorator:
+            # no space in decorators
+            return NO
  
      elif prev.type in OPENING_BRACKETS:
          return NO
@@ -2889,11 +2944,8 @@ class StringMerger(CustomSplitMapMixin, StringTransformer):
      """StringTransformer that merges strings together.
  
      Requirements:
-        (A) The line contains adjacent strings such that at most one substring
-        has inline comments AND none of those inline comments are pragmas AND
-        the set of all substring prefixes is either of length 1 or equal to
-        {"", "f"} AND none of the substrings are raw strings (i.e. are prefixed
-        with 'r').
+        (A) The line contains adjacent strings such that ALL of the validation checks
+        listed in StringMerger.__validate_msg(...)'s docstring pass.
              OR
          (B) The line contains a string which uses line continuation backslashes.
  
@@ -3142,6 +3194,7 @@ class StringMerger(CustomSplitMapMixin, StringTransformer):
              * Ok(None), if ALL validation checks (listed below) pass.
                  OR
              * Err(CannotTransform), if any of the following are true:
+                - The target string group does not contain ANY stand-alone comments.
                  - The target string is not in a string group (i.e. it has no
                    adjacent strings).
                  - The string group has more than one inline comment.
@@ -3150,6 +3203,26 @@ class StringMerger(CustomSplitMapMixin, StringTransformer):
                    length greater than one and is not equal to {"", "f"}.
                  - The string group consists of raw strings.
          """
+        # We first check for "inner" stand-alone comments (i.e. stand-alone
+        # comments that have a string leaf before them AND after them).
+        for inc in [1, -1]:
+            i = string_idx
+            found_sa_comment = False
+            is_valid_index = is_valid_index_factory(line.leaves)
+            while is_valid_index(i) and line.leaves[i].type in [
+                token.STRING,
+                STANDALONE_COMMENT,
+            ]:
+                if line.leaves[i].type == STANDALONE_COMMENT:
+                    found_sa_comment = True
+                elif found_sa_comment:
+                    return TErr(
+                        "StringMerger does NOT merge string groups which contain "
+                        "stand-alone comments."
+                    )
+
+                i += inc
+
          num_of_inline_string_comments = 0
          set_of_prefixes = set()
          num_of_strings = 0
@@ -3198,7 +3271,8 @@ class StringParenStripper(StringTransformer):
  
      Requirements:
          The line contains a string which is surrounded by parentheses and:
-            - The target string is NOT the only argument to a function call).
+            - The target string is NOT the only argument to a function call.
+            - The target string is NOT a "pointless" string.
              - If the target string contains a PERCENT, the brackets are not
                preceeded or followed by an operator with higher precedence than
                PERCENT.
@@ -3222,6 +3296,14 @@ class StringParenStripper(StringTransformer):
              if leaf.type != token.STRING:
                  continue
  
+            # If this is a "pointless" string...
+            if (
+                leaf.parent
+                and leaf.parent.parent
+                and leaf.parent.parent.type == syms.simple_stmt
+            ):
+                continue
+
              # Should be preceded by a non-empty LPAR...
              if (
                  not is_valid_index(idx - 1)
@@ -3306,10 +3388,17 @@ class StringParenStripper(StringTransformer):
                  yield TErr(
                      "Will not strip parentheses which have comments attached to them."
                  )
+                return
  
          new_line = line.clone()
          new_line.comments = line.comments.copy()
-        append_leaves(new_line, line, LL[: string_idx - 1])
+        try:
+            append_leaves(new_line, line, LL[: string_idx - 1])
+        except BracketMatchError:
+            # HACK: I believe there is currently a bug somewhere in
+            # right_hand_split() that is causing brackets to not be tracked
+            # properly by a shared BracketTracker.
+            append_leaves(new_line, line, LL[: string_idx - 1], preformatted=True)
  
          string_leaf = Leaf(token.STRING, LL[string_idx].value)
          LL[string_idx - 1].remove()
@@ -3472,9 +3561,12 @@ class BaseStringSplitter(StringTransformer):
                  # WMA4 a single space.
                  offset += 1
  
-                # WMA4 the lengths of any leaves that came before that space.
-                for leaf in LL[: p_idx + 1]:
+                # WMA4 the lengths of any leaves that came before that space,
+                # but after any closing bracket before that space.
+                for leaf in reversed(LL[: p_idx + 1]):
                      offset += len(str(leaf))
+                    if leaf.type in CLOSING_BRACKETS:
+                        break
  
          if is_valid_index(string_idx + 1):
              N = LL[string_idx + 1]
@@ -3540,7 +3632,8 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
          MIN_SUBSTR_SIZE characters.
  
          The string will ONLY be split on spaces (i.e. each new substring should
-        start with a space).
+        start with a space). Note that the string will NOT be split on a space
+        which is escaped with a backslash.
  
          If the string is an f-string, it will NOT be split in the middle of an
          f-expression (e.g. in f"FooBar: {foo() if x else bar()}", {foo() if x
@@ -3560,13 +3653,14 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
      MIN_SUBSTR_SIZE = 6
      # Matches an "f-expression" (e.g. {var}) that might be found in an f-string.
      RE_FEXPR = r"""
-    (?<!\{)\{
+    (?<!\{) (?:\{\{)* \{ (?!\{)
          (?:
              [^\{\}]
              | \{\{
              | \}\}
+            | (?R)
          )+?
-    (?<!\})(?:\}\})*\}(?!\})
+    (?<!\}) \} (?:\}\})* (?!\})
      """
  
      def do_splitter_match(self, line: Line) -> TMatchResult:
@@ -3880,11 +3974,23 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
                  section of this classes' docstring would be be met by returning @i.
              """
              is_space = string[i] == " "
+
+            is_not_escaped = True
+            j = i - 1
+            while is_valid_index(j) and string[j] == "\\":
+                is_not_escaped = not is_not_escaped
+                j -= 1
+
              is_big_enough = (
                  len(string[i:]) >= self.MIN_SUBSTR_SIZE
                  and len(string[:i]) >= self.MIN_SUBSTR_SIZE
              )
-            return is_space and is_big_enough and not breaks_fstring_expression(i)
+            return (
+                is_space
+                and is_not_escaped
+                and is_big_enough
+                and not breaks_fstring_expression(i)
+            )
  
          # First, we check all indices BELOW @max_break_idx.
          break_idx = max_break_idx
@@ -4580,7 +4686,9 @@ def line_to_string(line: Line) -> str:
      return str(line).strip("\n")
  
  
-def append_leaves(new_line: Line, old_line: Line, leaves: List[Leaf]) -> None:
+def append_leaves(
+    new_line: Line, old_line: Line, leaves: List[Leaf], preformatted: bool = False
+) -> None:
      """
      Append leaves (taken from @old_line) to @new_line, making sure to fix the
      underlying Node structure where appropriate.
@@ -4596,7 +4704,7 @@ def append_leaves(new_line: Line, old_line: Line, leaves: List[Leaf]) -> None:
      for old_leaf in leaves:
          new_leaf = Leaf(old_leaf.type, old_leaf.value)
          replace_child(old_leaf, new_leaf)
-        new_line.append(new_leaf)
+        new_line.append(new_leaf, preformatted=preformatted)
  
          for comment_leaf in old_line.comments_after(old_leaf):
              new_line.append(comment_leaf, preformatted=True)
@@ -5126,31 +5234,52 @@ def normalize_numeric_literal(leaf: Leaf) -> None:
          # Leave octal and binary literals alone.
          pass
      elif text.startswith("0x"):
-        # Change hex literals to upper case.
-        before, after = text[:2], text[2:]
-        text = f"{before}{after.upper()}"
+        text = format_hex(text)
      elif "e" in text:
-        before, after = text.split("e")
-        sign = ""
-        if after.startswith("-"):
-            after = after[1:]
-            sign = "-"
-        elif after.startswith("+"):
-            after = after[1:]
-        before = format_float_or_int_string(before)
-        text = f"{before}e{sign}{after}"
+        text = format_scientific_notation(text)
      elif text.endswith(("j", "l")):
-        number = text[:-1]
-        suffix = text[-1]
-        # Capitalize in "2L" because "l" looks too similar to "1".
-        if suffix == "l":
-            suffix = "L"
-        text = f"{format_float_or_int_string(number)}{suffix}"
+        text = format_long_or_complex_number(text)
      else:
          text = format_float_or_int_string(text)
      leaf.value = text
  
  
+def format_hex(text: str) -> str:
+    """
+    Formats a hexadecimal string like "0x12b3"
+
+    Uses lowercase because of similarity between "B" and "8", which
+    can cause security issues.
+    see: https://github.com/psf/black/issues/1692
+    """
+
+    before, after = text[:2], text[2:]
+    return f"{before}{after.lower()}"
+
+
+def format_scientific_notation(text: str) -> str:
+    """Formats a numeric string utilizing scentific notation"""
+    before, after = text.split("e")
+    sign = ""
+    if after.startswith("-"):
+        after = after[1:]
+        sign = "-"
+    elif after.startswith("+"):
+        after = after[1:]
+    before = format_float_or_int_string(before)
+    return f"{before}e{sign}{after}"
+
+
+def format_long_or_complex_number(text: str) -> str:
+    """Formats a long or complex string like `10L` or `10j`"""
+    number = text[:-1]
+    suffix = text[-1]
+    # Capitalize in "2L" because "l" looks too similar to "1".
+    if suffix == "l":
+        suffix = "L"
+    return f"{format_float_or_int_string(number)}{suffix}"
+
+
  def format_float_or_int_string(text: str) -> str:
      """Formats a float string like "1.0"."""
      if "." not in text:
@@ -5452,6 +5581,49 @@ def is_walrus_assignment(node: LN) -> bool:
      return inner is not None and inner.type == syms.namedexpr_test
  
  
+def is_simple_decorator_trailer(node: LN, last: bool = False) -> bool:
+    """Return True iff `node` is a trailer valid in a simple decorator"""
+    return node.type == syms.trailer and (
+        (
+            len(node.children) == 2
+            and node.children[0].type == token.DOT
+            and node.children[1].type == token.NAME
+        )
+        # last trailer can be arguments
+        or (
+            last
+            and len(node.children) == 3
+            and node.children[0].type == token.LPAR
+            # and node.children[1].type == syms.argument
+            and node.children[2].type == token.RPAR
+        )
+    )
+
+
+def is_simple_decorator_expression(node: LN) -> bool:
+    """Return True iff `node` could be a 'dotted name' decorator
+
+    This function takes the node of the 'namedexpr_test' of the new decorator
+    grammar and test if it would be valid under the old decorator grammar.
+
+    The old grammar was: decorator: @ dotted_name [arguments] NEWLINE
+    The new grammar is : decorator: @ namedexpr_test NEWLINE
+    """
+    if node.type == token.NAME:
+        return True
+    if node.type == syms.power:
+        if node.children:
+            return (
+                node.children[0].type == token.NAME
+                and all(map(is_simple_decorator_trailer, node.children[1:-1]))
+                and (
+                    len(node.children) < 2
+                    or is_simple_decorator_trailer(node.children[-1], last=True)
+                )
+            )
+    return False
+
+
  def is_yield(node: LN) -> bool:
      """Return True if `node` holds a `yield` or `yield from` expression."""
      if node.type == syms.yield_expr:
@@ -5637,6 +5809,8 @@ def get_features_used(node: Node) -> Set[Feature]:
      - underscores in numeric literals;
      - trailing commas after * or ** in function signatures and calls;
      - positional only arguments in function signatures and lambdas;
+    - assignment expression;
+    - relaxed decorator syntax;
      """
      features: Set[Feature] = set()
      for n in node.pre_order():
@@ -5656,6 +5830,12 @@ def get_features_used(node: Node) -> Set[Feature]:
          elif n.type == token.COLONEQUAL:
              features.add(Feature.ASSIGNMENT_EXPRESSIONS)
  
+        elif n.type == syms.decorator:
+            if len(n.children) > 1 and not is_simple_decorator_expression(
+                n.children[1]
+            ):
+                features.add(Feature.RELAXED_DECORATORS)
+
          elif (
              n.type in {syms.typedargslist, syms.arglist}
              and n.children
@@ -6627,13 +6807,33 @@ def is_docstring(leaf: Leaf) -> bool:
      return False
  
  
+def lines_with_leading_tabs_expanded(s: str) -> List[str]:
+    """
+    Splits string into lines and expands only leading tabs (following the normal
+    Python rules)
+    """
+    lines = []
+    for line in s.splitlines():
+        # Find the index of the first non-whitespace character after a string of
+        # whitespace that includes at least one tab
+        match = re.match(r"\s*\t+\s*(\S)", line)
+        if match:
+            first_non_whitespace_idx = match.start(1)
+
+            lines.append(
+                line[:first_non_whitespace_idx].expandtabs()
+                + line[first_non_whitespace_idx:]
+            )
+        else:
+            lines.append(line)
+    return lines
+
+
  def fix_docstring(docstring: str, prefix: str) -> str:
      # https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation
      if not docstring:
          return ""
-    # Convert tabs to spaces (following the normal Python rules)
-    # and split into a list of lines:
-    lines = docstring.expandtabs().splitlines()
+    lines = lines_with_leading_tabs_expanded(docstring)
      # Determine minimum indentation (first line doesn't count):
      indent = sys.maxsize
      for line in lines[1:]: