X-Git-Url: https://git.madduck.net/etc/vim.git/blobdiff_plain/1b51e2c07887bdd0aa12f8e79215ea72cbbc2ab4..b4cee97c99d5513ef81fdf2bff1809721662f87d:/black.py

diff --git a/black.py b/black.py
index 0fa9df0..e7a7aa8 100644
--- a/black.py
+++ b/black.py
@@ -24,6 +24,7 @@ from typing import (
     List,
     Optional,
     Pattern,
+    Sequence,
     Set,
     Tuple,
     Type,
@@ -41,6 +42,7 @@ from blib2to3 import pygram, pytree
 from blib2to3.pgen2 import driver, token
 from blib2to3.pgen2.parse import ParseError
 
+
 __version__ = "18.4a6"
 DEFAULT_LINE_LENGTH = 88
 
@@ -237,11 +239,8 @@ def reformat_one(
                 src = src.resolve()
                 if src in cache and cache[src] == get_cache_info(src):
                     changed = Changed.CACHED
-            if (
-                changed is not Changed.CACHED
-                and format_file_in_place(
-                    src, line_length=line_length, fast=fast, write_back=write_back
-                )
+            if changed is not Changed.CACHED and format_file_in_place(
+                src, line_length=line_length, fast=fast, write_back=write_back
             ):
                 changed = Changed.YES
             if write_back == WriteBack.YES and changed is not Changed.NO:
@@ -327,12 +326,13 @@ def format_file_in_place(
     If `write_back` is True, write reformatted code back to stdout.
     `line_length` and `fast` options are passed to :func:`format_file_contents`.
     """
+    is_pyi = src.suffix == ".pyi"
 
     with tokenize.open(src) as src_buffer:
         src_contents = src_buffer.read()
     try:
         dst_contents = format_file_contents(
-            src_contents, line_length=line_length, fast=fast
+            src_contents, line_length=line_length, fast=fast, is_pyi=is_pyi
         )
     except NothingChanged:
         return False
@@ -381,7 +381,7 @@ def format_stdin_to_stdout(
 
 
 def format_file_contents(
-    src_contents: str, line_length: int, fast: bool
+    src_contents: str, *, line_length: int, fast: bool, is_pyi: bool = False
 ) -> FileContent:
     """Reformat contents a file and return new contents.
 
@@ -392,26 +392,33 @@ def format_file_contents(
     if src_contents.strip() == "":
         raise NothingChanged
 
-    dst_contents = format_str(src_contents, line_length=line_length)
+    dst_contents = format_str(src_contents, line_length=line_length, is_pyi=is_pyi)
     if src_contents == dst_contents:
         raise NothingChanged
 
     if not fast:
         assert_equivalent(src_contents, dst_contents)
-        assert_stable(src_contents, dst_contents, line_length=line_length)
+        assert_stable(
+            src_contents, dst_contents, line_length=line_length, is_pyi=is_pyi
+        )
     return dst_contents
 
 
-def format_str(src_contents: str, line_length: int) -> FileContent:
+def format_str(
+    src_contents: str, line_length: int, *, is_pyi: bool = False
+) -> FileContent:
     """Reformat a string and return new contents.
 
     `line_length` determines how many characters per line are allowed.
     """
     src_node = lib2to3_parse(src_contents)
     dst_contents = ""
-    lines = LineGenerator()
-    elt = EmptyLineTracker()
+    future_imports = get_future_imports(src_node)
+    elt = EmptyLineTracker(is_pyi=is_pyi)
     py36 = is_python36(src_node)
+    lines = LineGenerator(
+        remove_u_prefix=py36 or "unicode_literals" in future_imports, is_pyi=is_pyi
+    )
     empty_line = Line()
     after = 0
     for current_line in lines.visit(src_node):
@@ -830,6 +837,14 @@ class Line:
             and self.leaves[0].value == "class"
         )
 
+    @property
+    def is_stub_class(self) -> bool:
+        """Is this line a class definition with a body consisting only of "..."?"""
+        return (
+            self.is_class
+            and self.leaves[-3:] == [Leaf(token.DOT, ".") for _ in range(3)]
+        )
+
     @property
     def is_def(self) -> bool:
         """Is this a function definition? (Also returns True for async defs.)"""
@@ -842,14 +857,11 @@ class Line:
             second_leaf: Optional[Leaf] = self.leaves[1]
         except IndexError:
             second_leaf = None
-        return (
-            (first_leaf.type == token.NAME and first_leaf.value == "def")
-            or (
-                first_leaf.type == token.ASYNC
-                and second_leaf is not None
-                and second_leaf.type == token.NAME
-                and second_leaf.value == "def"
-            )
+        return (first_leaf.type == token.NAME and first_leaf.value == "def") or (
+            first_leaf.type == token.ASYNC
+            and second_leaf is not None
+            and second_leaf.type == token.NAME
+            and second_leaf.value == "def"
         )
 
     @property
@@ -974,17 +986,21 @@ class Line:
             self.comments.append((after, comment))
             return True
 
-    def comments_after(self, leaf: Leaf) -> Iterator[Leaf]:
-        """Generate comments that should appear directly after `leaf`."""
-        for _leaf_index, _leaf in enumerate(self.leaves):
-            if leaf is _leaf:
-                break
+    def comments_after(self, leaf: Leaf, _index: int = -1) -> Iterator[Leaf]:
+        """Generate comments that should appear directly after `leaf`.
 
-        else:
-            return
+        Provide a non-negative leaf `_index` to speed up the function.
+        """
+        if _index == -1:
+            for _index, _leaf in enumerate(self.leaves):
+                if leaf is _leaf:
+                    break
+
+            else:
+                return
 
         for index, comment_after in self.comments:
-            if _leaf_index == index:
+            if _index == index:
                 yield comment_after
 
     def remove_trailing_comma(self) -> None:
@@ -1010,9 +1026,8 @@ class Line:
             and subscript_start.type == syms.subscriptlist
         ):
             subscript_start = child_towards(subscript_start, leaf)
-        return (
-            subscript_start is not None
-            and any(n.type in TEST_DESCENDANTS for n in subscript_start.pre_order())
+        return subscript_start is not None and any(
+            n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
         )
 
     def __str__(self) -> str:
@@ -1093,6 +1108,7 @@ class EmptyLineTracker:
     the prefix of the first leaf consists of optional newlines.  Those newlines
     are consumed by `maybe_empty_lines()` and included in the computation.
     """
+    is_pyi: bool = False
     previous_line: Optional[Line] = None
     previous_after: int = 0
     previous_defs: List[int] = Factory(list)
@@ -1116,7 +1132,7 @@ class EmptyLineTracker:
     def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
         max_allowed = 1
         if current_line.depth == 0:
-            max_allowed = 2
+            max_allowed = 1 if self.is_pyi else 2
         if current_line.leaves:
             # Consume the first leaf's extra newlines.
             first_leaf = current_line.leaves[0]
@@ -1128,7 +1144,10 @@ class EmptyLineTracker:
         depth = current_line.depth
         while self.previous_defs and self.previous_defs[-1] >= depth:
             self.previous_defs.pop()
-            before = 1 if depth else 2
+            if self.is_pyi:
+                before = 0 if depth else 1
+            else:
+                before = 1 if depth else 2
         is_decorator = current_line.is_decorator
         if is_decorator or current_line.is_def or current_line.is_class:
             if not is_decorator:
@@ -1147,8 +1166,19 @@ class EmptyLineTracker:
             ):
                 return 0, 0
 
-            newlines = 2
-            if current_line.depth:
+            if self.is_pyi:
+                if self.previous_line.depth > current_line.depth:
+                    newlines = 1
+                elif current_line.is_class or self.previous_line.is_class:
+                    if current_line.is_stub_class and self.previous_line.is_stub_class:
+                        newlines = 0
+                    else:
+                        newlines = 1
+                else:
+                    newlines = 0
+            else:
+                newlines = 2
+            if current_line.depth and newlines:
                 newlines -= 1
             return newlines, 0
 
@@ -1170,7 +1200,9 @@ class LineGenerator(Visitor[Line]):
     Note: destroys the tree it's visiting by mutating prefixes of its leaves
     in ways that will no longer stringify to valid Python code on the tree.
     """
+    is_pyi: bool = False
     current_line: Line = Factory(Line)
+    remove_u_prefix: bool = False
 
     def line(self, indent: int = 0, type: Type[Line] = Line) -> Iterator[Line]:
         """Generate a line.
@@ -1238,6 +1270,7 @@ class LineGenerator(Visitor[Line]):
             else:
                 normalize_prefix(node, inside_brackets=any_open_brackets)
                 if node.type == token.STRING:
+                    normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix)
                     normalize_string_quotes(node)
                 if node.type not in WHITESPACE:
                     self.current_line.append(node)
@@ -1284,16 +1317,27 @@ class LineGenerator(Visitor[Line]):
 
             yield from self.visit(child)
 
+    def visit_suite(self, node: Node) -> Iterator[Line]:
+        """Visit a suite."""
+        if self.is_pyi and is_stub_suite(node):
+            yield from self.visit(node.children[2])
+        else:
+            yield from self.visit_default(node)
+
     def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
         """Visit a statement without nested statements."""
         is_suite_like = node.parent and node.parent.type in STATEMENT
         if is_suite_like:
-            yield from self.line(+1)
-            yield from self.visit_default(node)
-            yield from self.line(-1)
+            if self.is_pyi and is_stub_body(node):
+                yield from self.visit_default(node)
+            else:
+                yield from self.line(+1)
+                yield from self.visit_default(node)
+                yield from self.line(-1)
 
         else:
-            yield from self.line()
+            if not self.is_pyi or not node.parent or not is_stub_suite(node.parent):
+                yield from self.line()
             yield from self.visit_default(node)
 
     def visit_async_stmt(self, node: Node) -> Iterator[Line]:
@@ -1821,11 +1865,7 @@ def split_line(
         return
 
     line_str = str(line).strip("\n")
-    if (
-        len(line_str) <= line_length
-        and "\n" not in line_str  # multiline strings
-        and not line.contains_standalone_comments()
-    ):
+    if is_line_short_enough(line, line_length=line_length, line_str=line_str):
         yield line
         return
 
@@ -1834,10 +1874,22 @@ def split_line(
         split_funcs = [left_hand_split]
     elif line.is_import:
         split_funcs = [explode_split]
-    elif line.inside_brackets:
-        split_funcs = [delimiter_split, standalone_comment_split, right_hand_split]
     else:
-        split_funcs = [right_hand_split]
+
+        def rhs(line: Line, py36: bool = False) -> Iterator[Line]:
+            for omit in generate_trailers_to_omit(line, line_length):
+                lines = list(right_hand_split(line, py36, omit=omit))
+                if is_line_short_enough(lines[0], line_length=line_length):
+                    yield from lines
+                    return
+
+            # All splits failed, best effort split with no omits.
+            yield from right_hand_split(line, py36)
+
+        if line.inside_brackets:
+            split_funcs = [delimiter_split, standalone_comment_split, rhs]
+        else:
+            split_funcs = [rhs]
     for split_func in split_funcs:
         # We are accumulating lines in `result` because we might want to abort
         # mission and return the original line in the end, or attempt a different
@@ -1910,6 +1962,10 @@ def right_hand_split(
     """Split line into many lines, starting with the last matching bracket pair.
 
     If the split was by optional parentheses, attempt splitting without them, too.
+    `omit` is a collection of closing bracket IDs that shouldn't be considered for
+    this split.
+
+    Note: running this function modifies `bracket_depth` on the leaves of `line`.
     """
     head = Line(depth=line.depth)
     body = Line(depth=line.depth + 1, inside_brackets=True)
@@ -1936,8 +1992,9 @@ def right_hand_split(
     # Since body is a new indent level, remove spurious leading whitespace.
     if body_leaves:
         normalize_prefix(body_leaves[0], inside_brackets=True)
-    elif not head_leaves:
-        # No `head` and no `body` means the split failed. `tail` has all content.
+    if not head_leaves:
+        # No `head` means the split failed. Either `tail` has all content or
+        # the matching `opening_bracket` wasn't available on `line` anymore.
         raise CannotSplit("No brackets found")
 
     # Build the new lines.
@@ -1955,19 +2012,27 @@ def right_hand_split(
         # the closing bracket is an optional paren
         and closing_bracket.type == token.RPAR
         and not closing_bracket.value
-        # there are no delimiters or standalone comments in the body
-        and not body.bracket_tracker.delimiters
+        # there are no standalone comments in the body
         and not line.contains_standalone_comments(0)
         # and it's not an import (optional parens are the only thing we can split
         # on in this case; attempting a split without them is a waste of time)
         and not line.is_import
     ):
         omit = {id(closing_bracket), *omit}
-        try:
-            yield from right_hand_split(line, py36=py36, omit=omit)
-            return
-        except CannotSplit:
-            pass
+        delimiter_count = len(body.bracket_tracker.delimiters)
+        if (
+            delimiter_count == 0
+            or delimiter_count == 1
+            and (
+                body.leaves[0].type in OPENING_BRACKETS
+                or body.leaves[-1].type in CLOSING_BRACKETS
+            )
+        ):
+            try:
+                yield from right_hand_split(line, py36=py36, omit=omit)
+                return
+            except CannotSplit:
+                pass
 
     ensure_visible(opening_bracket)
     ensure_visible(closing_bracket)
@@ -2052,10 +2117,10 @@ def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]:
             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
             current_line.append(leaf)
 
-    for leaf in line.leaves:
+    for index, leaf in enumerate(line.leaves):
         yield from append_to_line(leaf)
 
-        for comment_after in line.comments_after(leaf):
+        for comment_after in line.comments_after(leaf, index):
             yield from append_to_line(comment_after)
 
         lowest_depth = min(lowest_depth, leaf.bracket_depth)
@@ -2099,10 +2164,10 @@ def standalone_comment_split(line: Line, py36: bool = False) -> Iterator[Line]:
             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
             current_line.append(leaf)
 
-    for leaf in line.leaves:
+    for index, leaf in enumerate(line.leaves):
         yield from append_to_line(leaf)
 
-        for comment_after in line.comments_after(leaf):
+        for comment_after in line.comments_after(leaf, index):
             yield from append_to_line(comment_after)
 
     if current_line:
@@ -2161,6 +2226,22 @@ def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
     leaf.prefix = ""
 
 
+def normalize_string_prefix(leaf: Leaf, remove_u_prefix: bool = False) -> None:
+    """Make all string prefixes lowercase.
+
+    If remove_u_prefix is given, also removes any u prefix from the string.
+
+    Note: Mutates its argument.
+    """
+    match = re.match(r"^([furbFURB]*)(.*)$", leaf.value, re.DOTALL)
+    assert match is not None, f"failed to match string {leaf.value!r}"
+    orig_prefix = match.group(1)
+    new_prefix = orig_prefix.lower()
+    if remove_u_prefix:
+        new_prefix = new_prefix.replace("u", "")
+    leaf.value = f"{new_prefix}{match.group(2)}"
+
+
 def normalize_string_quotes(leaf: Leaf) -> None:
     """Prefer double quotes but only if it doesn't cause more escaping.
 
@@ -2258,6 +2339,7 @@ def maybe_make_parens_invisible_in_atom(node: LN) -> bool:
         node.type != syms.atom
         or is_empty_tuple(node)
         or is_one_tuple(node)
+        or is_yield(node)
         or max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
     ):
         return False
@@ -2308,12 +2390,33 @@ def is_one_tuple(node: LN) -> bool:
     )
 
 
+def is_yield(node: LN) -> bool:
+    """Return True if `node` holds a `yield` or `yield from` expression."""
+    if node.type == syms.yield_expr:
+        return True
+
+    if node.type == token.NAME and node.value == "yield":  # type: ignore
+        return True
+
+    if node.type != syms.atom:
+        return False
+
+    if len(node.children) != 3:
+        return False
+
+    lpar, expr, rpar = node.children
+    if lpar.type == token.LPAR and rpar.type == token.RPAR:
+        return is_yield(expr)
+
+    return False
+
+
 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
     """Return True if `leaf` is a star or double star in a vararg or kwarg.
 
     If `within` includes VARARGS_PARENTS, this applies to function signatures.
-    If `within` includes COLLECTION_LIBERALS_PARENTS, it applies to right
-    hand-side extended iterable unpacking (PEP 3132) and additional unpacking
+    If `within` includes UNPACKING_PARENTS, it applies to right hand-side
+    extended iterable unpacking (PEP 3132) and additional unpacking
     generalizations (PEP 448).
     """
     if leaf.type not in STARS or not leaf.parent:
@@ -2331,6 +2434,35 @@ def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
     return p.type in within
 
 
+def is_stub_suite(node: Node) -> bool:
+    """Return True if `node` is a suite with a stub body."""
+    if (
+        len(node.children) != 4
+        or node.children[0].type != token.NEWLINE
+        or node.children[1].type != token.INDENT
+        or node.children[3].type != token.DEDENT
+    ):
+        return False
+
+    return is_stub_body(node.children[2])
+
+
+def is_stub_body(node: LN) -> bool:
+    """Return True if `node` is a simple statement containing an ellipsis."""
+    if not isinstance(node, Node) or node.type != syms.simple_stmt:
+        return False
+
+    if len(node.children) != 2:
+        return False
+
+    child = node.children[0]
+    return (
+        child.type == syms.atom
+        and len(child.children) == 3
+        and all(leaf == Leaf(token.DOT, ".") for leaf in child.children)
+    )
+
+
 def max_delimiter_priority_in_atom(node: LN) -> int:
     """Return maximum delimiter priority inside `node`.
 
@@ -2401,7 +2533,103 @@ def is_python36(node: Node) -> bool:
     return False
 
 
-PYTHON_EXTENSIONS = {".py"}
+def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[LeafID]]:
+    """Generate sets of closing bracket IDs that should be omitted in a RHS.
+
+    Brackets can be omitted if the entire trailer up to and including
+    a preceding closing bracket fits in one line.
+
+    Yielded sets are cumulative (contain results of previous yields, too).  First
+    set is empty.
+    """
+
+    omit: Set[LeafID] = set()
+    yield omit
+
+    length = 4 * line.depth
+    opening_bracket = None
+    closing_bracket = None
+    optional_brackets: Set[LeafID] = set()
+    inner_brackets: Set[LeafID] = set()
+    for index, leaf in enumerate_reversed(line.leaves):
+        length += len(leaf.prefix) + len(leaf.value)
+        if length > line_length:
+            break
+
+        comment: Optional[Leaf]
+        for comment in line.comments_after(leaf, index):
+            if "\n" in comment.prefix:
+                break  # Oops, standalone comment!
+
+            length += len(comment.value)
+        else:
+            comment = None
+        if comment is not None:
+            break  # There was a standalone comment, we can't continue.
+
+        optional_brackets.discard(id(leaf))
+        if opening_bracket:
+            if leaf is opening_bracket:
+                opening_bracket = None
+            elif leaf.type in CLOSING_BRACKETS:
+                inner_brackets.add(id(leaf))
+        elif leaf.type in CLOSING_BRACKETS:
+            if not leaf.value:
+                optional_brackets.add(id(opening_bracket))
+                continue
+
+            if index > 0 and line.leaves[index - 1].type in OPENING_BRACKETS:
+                # Empty brackets would fail a split so treat them as "inner"
+                # brackets (e.g. only add them to the `omit` set if another
+                # pair of brackets was good enough.
+                inner_brackets.add(id(leaf))
+                continue
+
+            opening_bracket = leaf.opening_bracket
+            if closing_bracket:
+                omit.add(id(closing_bracket))
+                omit.update(inner_brackets)
+                inner_brackets.clear()
+                yield omit
+            closing_bracket = leaf
+
+
+def get_future_imports(node: Node) -> Set[str]:
+    """Return a set of __future__ imports in the file."""
+    imports = set()
+    for child in node.children:
+        if child.type != syms.simple_stmt:
+            break
+        first_child = child.children[0]
+        if isinstance(first_child, Leaf):
+            # Continue looking if we see a docstring; otherwise stop.
+            if (
+                len(child.children) == 2
+                and first_child.type == token.STRING
+                and child.children[1].type == token.NEWLINE
+            ):
+                continue
+            else:
+                break
+        elif first_child.type == syms.import_from:
+            module_name = first_child.children[1]
+            if not isinstance(module_name, Leaf) or module_name.value != "__future__":
+                break
+            for import_from_child in first_child.children[3:]:
+                if isinstance(import_from_child, Leaf):
+                    if import_from_child.type == token.NAME:
+                        imports.add(import_from_child.value)
+                else:
+                    assert import_from_child.type == syms.import_as_names
+                    for leaf in import_from_child.children:
+                        if isinstance(leaf, Leaf) and leaf.type == token.NAME:
+                            imports.add(leaf.value)
+        else:
+            break
+    return imports
+
+
+PYTHON_EXTENSIONS = {".py", ".pyi"}
 BLACKLISTED_DIRECTORIES = {
     "build", "buck-out", "dist", "_build", ".git", ".hg", ".mypy_cache", ".tox", ".venv"
 }
@@ -2418,7 +2646,7 @@ def gen_python_files_in_dir(path: Path) -> Iterator[Path]:
 
             yield from gen_python_files_in_dir(child)
 
-        elif child.suffix in PYTHON_EXTENSIONS:
+        elif child.is_file() and child.suffix in PYTHON_EXTENSIONS:
             yield child
 
 
@@ -2564,9 +2792,9 @@ def assert_equivalent(src: str, dst: str) -> None:
         ) from None
 
 
-def assert_stable(src: str, dst: str, line_length: int) -> None:
+def assert_stable(src: str, dst: str, line_length: int, is_pyi: bool = False) -> None:
     """Raise AssertionError if `dst` reformats differently the second time."""
-    newdst = format_str(dst, line_length=line_length)
+    newdst = format_str(dst, line_length=line_length, is_pyi=is_pyi)
     if dst != newdst:
         log = dump_to_file(
             diff(src, dst, "source", "first pass"),
@@ -2643,6 +2871,28 @@ def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
     return regex.sub(replacement, regex.sub(replacement, original))
 
 
+def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]:
+    """Like `reversed(enumerate(sequence))` if that were possible."""
+    index = len(sequence) - 1
+    for element in reversed(sequence):
+        yield (index, element)
+        index -= 1
+
+
+def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
+    """Return True if `line` is no longer than `line_length`.
+
+    Uses the provided `line_str` rendering, if any, otherwise computes a new one.
+    """
+    if not line_str:
+        line_str = str(line).strip("\n")
+    return (
+        len(line_str) <= line_length
+        and "\n" not in line_str  # multiline strings
+        and not line.contains_standalone_comments()
+    )
+
+
 CACHE_DIR = Path(user_cache_dir("black", version=__version__))