from dataclasses import dataclass, field import itertools import sys from typing import ( Callable, Collection, Dict, Iterator, List, Optional, Sequence, Tuple, TypeVar, cast, ) from blib2to3.pytree import Node, Leaf from blib2to3.pgen2 import token from black.brackets import BracketTracker, DOT_PRIORITY from black.mode import Mode from black.nodes import STANDALONE_COMMENT, TEST_DESCENDANTS from black.nodes import BRACKETS, OPENING_BRACKETS, CLOSING_BRACKETS from black.nodes import syms, whitespace, replace_child, child_towards from black.nodes import is_multiline_string, is_import, is_type_comment, last_two_except from black.nodes import is_one_tuple_between # types T = TypeVar("T") Index = int LeafID = int @dataclass class Line: """Holds leaves and comments. Can be printed with `str(line)`.""" mode: Mode depth: int = 0 leaves: List[Leaf] = field(default_factory=list) # keys ordered like `leaves` comments: Dict[LeafID, List[Leaf]] = field(default_factory=dict) bracket_tracker: BracketTracker = field(default_factory=BracketTracker) inside_brackets: bool = False should_split_rhs: bool = False magic_trailing_comma: Optional[Leaf] = None def append(self, leaf: Leaf, preformatted: bool = False) -> None: """Add a new `leaf` to the end of the line. Unless `preformatted` is True, the `leaf` will receive a new consistent whitespace prefix and metadata applied by :class:`BracketTracker`. Trailing commas are maybe removed, unpacked for loop variables are demoted from being delimiters. Inline comments are put aside. """ has_value = leaf.type in BRACKETS or bool(leaf.value.strip()) if not has_value: return if token.COLON == leaf.type and self.is_class_paren_empty: del self.leaves[-2:] if self.leaves and not preformatted: # Note: at this point leaf.prefix should be empty except for # imports, for which we only preserve newlines. leaf.prefix += whitespace( leaf, complex_subscript=self.is_complex_subscript(leaf) ) if self.inside_brackets or not preformatted: self.bracket_tracker.mark(leaf) if self.mode.magic_trailing_comma: if self.has_magic_trailing_comma(leaf): self.magic_trailing_comma = leaf elif self.has_magic_trailing_comma(leaf, ensure_removable=True): self.remove_trailing_comma() if not self.append_comment(leaf): self.leaves.append(leaf) def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None: """Like :func:`append()` but disallow invalid standalone comment structure. Raises ValueError when any `leaf` is appended after a standalone comment or when a standalone comment is not the first leaf on the line. """ if self.bracket_tracker.depth == 0: if self.is_comment: raise ValueError("cannot append to standalone comments") if self.leaves and leaf.type == STANDALONE_COMMENT: raise ValueError( "cannot append standalone comments to a populated line" ) self.append(leaf, preformatted=preformatted) @property def is_comment(self) -> bool: """Is this line a standalone comment?""" return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT @property def is_decorator(self) -> bool: """Is this line a decorator?""" return bool(self) and self.leaves[0].type == token.AT @property def is_import(self) -> bool: """Is this an import line?""" return bool(self) and is_import(self.leaves[0]) @property def is_class(self) -> bool: """Is this line a class definition?""" return ( bool(self) and self.leaves[0].type == token.NAME and self.leaves[0].value == "class" ) @property def is_stub_class(self) -> bool: """Is this line a class definition with a body consisting only of "..."?""" return self.is_class and self.leaves[-3:] == [ Leaf(token.DOT, ".") for _ in range(3) ] @property def is_def(self) -> bool: """Is this a function definition? (Also returns True for async defs.)""" try: first_leaf = self.leaves[0] except IndexError: return False try: second_leaf: Optional[Leaf] = self.leaves[1] except IndexError: second_leaf = None return (first_leaf.type == token.NAME and first_leaf.value == "def") or ( first_leaf.type == token.ASYNC and second_leaf is not None and second_leaf.type == token.NAME and second_leaf.value == "def" ) @property def is_class_paren_empty(self) -> bool: """Is this a class with no base classes but using parentheses? Those are unnecessary and should be removed. """ return ( bool(self) and len(self.leaves) == 4 and self.is_class and self.leaves[2].type == token.LPAR and self.leaves[2].value == "(" and self.leaves[3].type == token.RPAR and self.leaves[3].value == ")" ) @property def is_triple_quoted_string(self) -> bool: """Is the line a triple quoted string?""" return ( bool(self) and self.leaves[0].type == token.STRING and self.leaves[0].value.startswith(('"""', "'''")) ) def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool: """If so, needs to be split before emitting.""" for leaf in self.leaves: if leaf.type == STANDALONE_COMMENT and leaf.bracket_depth <= depth_limit: return True return False def contains_uncollapsable_type_comments(self) -> bool: ignored_ids = set() try: last_leaf = self.leaves[-1] ignored_ids.add(id(last_leaf)) if last_leaf.type == token.COMMA or ( last_leaf.type == token.RPAR and not last_leaf.value ): # When trailing commas or optional parens are inserted by Black for # consistency, comments after the previous last element are not moved # (they don't have to, rendering will still be correct). So we ignore # trailing commas and invisible. last_leaf = self.leaves[-2] ignored_ids.add(id(last_leaf)) except IndexError: return False # A type comment is uncollapsable if it is attached to a leaf # that isn't at the end of the line (since that could cause it # to get associated to a different argument) or if there are # comments before it (since that could cause it to get hidden # behind a comment. comment_seen = False for leaf_id, comments in self.comments.items(): for comment in comments: if is_type_comment(comment): if comment_seen or ( not is_type_comment(comment, " ignore") and leaf_id not in ignored_ids ): return True comment_seen = True return False def contains_unsplittable_type_ignore(self) -> bool: if not self.leaves: return False # If a 'type: ignore' is attached to the end of a line, we # can't split the line, because we can't know which of the # subexpressions the ignore was meant to apply to. # # We only want this to apply to actual physical lines from the # original source, though: we don't want the presence of a # 'type: ignore' at the end of a multiline expression to # justify pushing it all onto one line. Thus we # (unfortunately) need to check the actual source lines and # only report an unsplittable 'type: ignore' if this line was # one line in the original code. # Grab the first and last line numbers, skipping generated leaves first_line = next((leaf.lineno for leaf in self.leaves if leaf.lineno != 0), 0) last_line = next( (leaf.lineno for leaf in reversed(self.leaves) if leaf.lineno != 0), 0 ) if first_line == last_line: # We look at the last two leaves since a comma or an # invisible paren could have been added at the end of the # line. for node in self.leaves[-2:]: for comment in self.comments.get(id(node), []): if is_type_comment(comment, " ignore"): return True return False def contains_multiline_strings(self) -> bool: return any(is_multiline_string(leaf) for leaf in self.leaves) def has_magic_trailing_comma( self, closing: Leaf, ensure_removable: bool = False ) -> bool: """Return True if we have a magic trailing comma, that is when: - there's a trailing comma here - it's not a one-tuple Additionally, if ensure_removable: - it's not from square bracket indexing """ if not ( closing.type in CLOSING_BRACKETS and self.leaves and self.leaves[-1].type == token.COMMA ): return False if closing.type == token.RBRACE: return True if closing.type == token.RSQB: if not ensure_removable: return True comma = self.leaves[-1] return bool(comma.parent and comma.parent.type == syms.listmaker) if self.is_import: return True if not is_one_tuple_between(closing.opening_bracket, closing, self.leaves): return True return False def append_comment(self, comment: Leaf) -> bool: """Add an inline or standalone comment to the line.""" if ( comment.type == STANDALONE_COMMENT and self.bracket_tracker.any_open_brackets() ): comment.prefix = "" return False if comment.type != token.COMMENT: return False if not self.leaves: comment.type = STANDALONE_COMMENT comment.prefix = "" return False last_leaf = self.leaves[-1] if ( last_leaf.type == token.RPAR and not last_leaf.value and last_leaf.parent and len(list(last_leaf.parent.leaves())) <= 3 and not is_type_comment(comment) ): # Comments on an optional parens wrapping a single leaf should belong to # the wrapped node except if it's a type comment. Pinning the comment like # this avoids unstable formatting caused by comment migration. if len(self.leaves) < 2: comment.type = STANDALONE_COMMENT comment.prefix = "" return False last_leaf = self.leaves[-2] self.comments.setdefault(id(last_leaf), []).append(comment) return True def comments_after(self, leaf: Leaf) -> List[Leaf]: """Generate comments that should appear directly after `leaf`.""" return self.comments.get(id(leaf), []) def remove_trailing_comma(self) -> None: """Remove the trailing comma and moves the comments attached to it.""" trailing_comma = self.leaves.pop() trailing_comma_comments = self.comments.pop(id(trailing_comma), []) self.comments.setdefault(id(self.leaves[-1]), []).extend( trailing_comma_comments ) def is_complex_subscript(self, leaf: Leaf) -> bool: """Return True iff `leaf` is part of a slice with non-trivial exprs.""" open_lsqb = self.bracket_tracker.get_open_lsqb() if open_lsqb is None: return False subscript_start = open_lsqb.next_sibling if isinstance(subscript_start, Node): if subscript_start.type == syms.listmaker: return False if subscript_start.type == syms.subscriptlist: subscript_start = child_towards(subscript_start, leaf) return subscript_start is not None and any( n.type in TEST_DESCENDANTS for n in subscript_start.pre_order() ) def enumerate_with_length( self, reversed: bool = False ) -> Iterator[Tuple[Index, Leaf, int]]: """Return an enumeration of leaves with their length. Stops prematurely on multiline strings and standalone comments. """ op = cast( Callable[[Sequence[Leaf]], Iterator[Tuple[Index, Leaf]]], enumerate_reversed if reversed else enumerate, ) for index, leaf in op(self.leaves): length = len(leaf.prefix) + len(leaf.value) if "\n" in leaf.value: return # Multiline strings, we can't continue. for comment in self.comments_after(leaf): length += len(comment.value) yield index, leaf, length def clone(self) -> "Line": return Line( mode=self.mode, depth=self.depth, inside_brackets=self.inside_brackets, should_split_rhs=self.should_split_rhs, magic_trailing_comma=self.magic_trailing_comma, ) def __str__(self) -> str: """Render the line.""" if not self: return "\n" indent = " " * self.depth leaves = iter(self.leaves) first = next(leaves) res = f"{first.prefix}{indent}{first.value}" for leaf in leaves: res += str(leaf) for comment in itertools.chain.from_iterable(self.comments.values()): res += str(comment) return res + "\n" def __bool__(self) -> bool: """Return True if the line has leaves or comments.""" return bool(self.leaves or self.comments) @dataclass class EmptyLineTracker: """Provides a stateful method that returns the number of potential extra empty lines needed before and after the currently processed line. Note: this tracker works on lines that haven't been split yet. It assumes the prefix of the first leaf consists of optional newlines. Those newlines are consumed by `maybe_empty_lines()` and included in the computation. """ is_pyi: bool = False previous_line: Optional[Line] = None previous_after: int = 0 previous_defs: List[int] = field(default_factory=list) def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]: """Return the number of extra empty lines before and after the `current_line`. This is for separating `def`, `async def` and `class` with extra empty lines (two on module-level). """ before, after = self._maybe_empty_lines(current_line) before = ( # Black should not insert empty lines at the beginning # of the file 0 if self.previous_line is None else before - self.previous_after ) self.previous_after = after self.previous_line = current_line return before, after def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]: max_allowed = 1 if current_line.depth == 0: max_allowed = 1 if self.is_pyi else 2 if current_line.leaves: # Consume the first leaf's extra newlines. first_leaf = current_line.leaves[0] before = first_leaf.prefix.count("\n") before = min(before, max_allowed) first_leaf.prefix = "" else: before = 0 depth = current_line.depth while self.previous_defs and self.previous_defs[-1] >= depth: self.previous_defs.pop() if self.is_pyi: before = 0 if depth else 1 else: before = 1 if depth else 2 if current_line.is_decorator or current_line.is_def or current_line.is_class: return self._maybe_empty_lines_for_class_or_def(current_line, before) if ( self.previous_line and self.previous_line.is_import and not current_line.is_import and depth == self.previous_line.depth ): return (before or 1), 0 if ( self.previous_line and self.previous_line.is_class and current_line.is_triple_quoted_string ): return before, 1 return before, 0 def _maybe_empty_lines_for_class_or_def( self, current_line: Line, before: int ) -> Tuple[int, int]: if not current_line.is_decorator: self.previous_defs.append(current_line.depth) if self.previous_line is None: # Don't insert empty lines before the first line in the file. return 0, 0 if self.previous_line.is_decorator: if self.is_pyi and current_line.is_stub_class: # Insert an empty line after a decorated stub class return 0, 1 return 0, 0 if self.previous_line.depth < current_line.depth and ( self.previous_line.is_class or self.previous_line.is_def ): return 0, 0 if ( self.previous_line.is_comment and self.previous_line.depth == current_line.depth and before == 0 ): return 0, 0 if self.is_pyi: if self.previous_line.depth > current_line.depth: newlines = 1 elif current_line.is_class or self.previous_line.is_class: if current_line.is_stub_class and self.previous_line.is_stub_class: # No blank line between classes with an empty body newlines = 0 else: newlines = 1 elif ( current_line.is_def or current_line.is_decorator ) and not self.previous_line.is_def: # Blank line between a block of functions (maybe with preceding # decorators) and a block of non-functions newlines = 1 else: newlines = 0 else: newlines = 2 if current_line.depth and newlines: newlines -= 1 return newlines, 0 def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]: """Like `reversed(enumerate(sequence))` if that were possible.""" index = len(sequence) - 1 for element in reversed(sequence): yield (index, element) index -= 1 def append_leaves( new_line: Line, old_line: Line, leaves: List[Leaf], preformatted: bool = False ) -> None: """ Append leaves (taken from @old_line) to @new_line, making sure to fix the underlying Node structure where appropriate. All of the leaves in @leaves are duplicated. The duplicates are then appended to @new_line and used to replace their originals in the underlying Node structure. Any comments attached to the old leaves are reattached to the new leaves. Pre-conditions: set(@leaves) is a subset of set(@old_line.leaves). """ for old_leaf in leaves: new_leaf = Leaf(old_leaf.type, old_leaf.value) replace_child(old_leaf, new_leaf) new_line.append(new_leaf, preformatted=preformatted) for comment_leaf in old_line.comments_after(old_leaf): new_line.append(comment_leaf, preformatted=True) def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool: """Return True if `line` is no longer than `line_length`. Uses the provided `line_str` rendering, if any, otherwise computes a new one. """ if not line_str: line_str = line_to_string(line) return ( len(line_str) <= line_length and "\n" not in line_str # multiline strings and not line.contains_standalone_comments() ) def can_be_split(line: Line) -> bool: """Return False if the line cannot be split *for sure*. This is not an exhaustive search but a cheap heuristic that we can use to avoid some unfortunate formattings (mostly around wrapping unsplittable code in unnecessary parentheses). """ leaves = line.leaves if len(leaves) < 2: return False if leaves[0].type == token.STRING and leaves[1].type == token.DOT: call_count = 0 dot_count = 0 next = leaves[-1] for leaf in leaves[-2::-1]: if leaf.type in OPENING_BRACKETS: if next.type not in CLOSING_BRACKETS: return False call_count += 1 elif leaf.type == token.DOT: dot_count += 1 elif leaf.type == token.NAME: if not (next.type == token.DOT or next.type in OPENING_BRACKETS): return False elif leaf.type not in CLOSING_BRACKETS: return False if dot_count > 1 and call_count > 1: return False return True def can_omit_invisible_parens( line: Line, line_length: int, omit_on_explode: Collection[LeafID] = (), ) -> bool: """Does `line` have a shape safe to reformat without optional parens around it? Returns True for only a subset of potentially nice looking formattings but the point is to not return false positives that end up producing lines that are too long. """ bt = line.bracket_tracker if not bt.delimiters: # Without delimiters the optional parentheses are useless. return True max_priority = bt.max_delimiter_priority() if bt.delimiter_count_with_priority(max_priority) > 1: # With more than one delimiter of a kind the optional parentheses read better. return False if max_priority == DOT_PRIORITY: # A single stranded method call doesn't require optional parentheses. return True assert len(line.leaves) >= 2, "Stranded delimiter" # With a single delimiter, omit if the expression starts or ends with # a bracket. first = line.leaves[0] second = line.leaves[1] if first.type in OPENING_BRACKETS and second.type not in CLOSING_BRACKETS: if _can_omit_opening_paren(line, first=first, line_length=line_length): return True # Note: we are not returning False here because a line might have *both* # a leading opening bracket and a trailing closing bracket. If the # opening bracket doesn't match our rule, maybe the closing will. penultimate = line.leaves[-2] last = line.leaves[-1] if line.magic_trailing_comma: try: penultimate, last = last_two_except(line.leaves, omit=omit_on_explode) except LookupError: # Turns out we'd omit everything. We cannot skip the optional parentheses. return False if ( last.type == token.RPAR or last.type == token.RBRACE or ( # don't use indexing for omitting optional parentheses; # it looks weird last.type == token.RSQB and last.parent and last.parent.type != syms.trailer ) ): if penultimate.type in OPENING_BRACKETS: # Empty brackets don't help. return False if is_multiline_string(first): # Additional wrapping of a multiline string in this situation is # unnecessary. return True if line.magic_trailing_comma and penultimate.type == token.COMMA: # The rightmost non-omitted bracket pair is the one we want to explode on. return True if _can_omit_closing_paren(line, last=last, line_length=line_length): return True return False def _can_omit_opening_paren(line: Line, *, first: Leaf, line_length: int) -> bool: """See `can_omit_invisible_parens`.""" remainder = False length = 4 * line.depth _index = -1 for _index, leaf, leaf_length in line.enumerate_with_length(): if leaf.type in CLOSING_BRACKETS and leaf.opening_bracket is first: remainder = True if remainder: length += leaf_length if length > line_length: break if leaf.type in OPENING_BRACKETS: # There are brackets we can further split on. remainder = False else: # checked the entire string and line length wasn't exceeded if len(line.leaves) == _index + 1: return True return False def _can_omit_closing_paren(line: Line, *, last: Leaf, line_length: int) -> bool: """See `can_omit_invisible_parens`.""" length = 4 * line.depth seen_other_brackets = False for _index, leaf, leaf_length in line.enumerate_with_length(): length += leaf_length if leaf is last.opening_bracket: if seen_other_brackets or length <= line_length: return True elif leaf.type in OPENING_BRACKETS: # There are brackets we can further split on. seen_other_brackets = True return False def line_to_string(line: Line) -> str: """Returns the string representation of @line. WARNING: This is known to be computationally expensive. """ return str(line).strip("\n")