]> git.madduck.net Git - etc/vim.git/blob - src/black/linegen.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

1f132b7888fe9a07fda7ad738a198d70e0e4e0a4
[etc/vim.git] / src / black / linegen.py
1 """
2 Generating lines of code.
3 """
4 from functools import partial, wraps
5 import sys
6 from typing import Collection, Iterator, List, Optional, Set, Union, cast
7
8 from black.nodes import WHITESPACE, RARROW, STATEMENT, STANDALONE_COMMENT
9 from black.nodes import ASSIGNMENTS, OPENING_BRACKETS, CLOSING_BRACKETS
10 from black.nodes import Visitor, syms, is_arith_like, ensure_visible
11 from black.nodes import (
12     is_docstring,
13     is_empty_tuple,
14     is_one_tuple,
15     is_one_sequence_between,
16 )
17 from black.nodes import is_name_token, is_lpar_token, is_rpar_token
18 from black.nodes import is_walrus_assignment, is_yield, is_vararg, is_multiline_string
19 from black.nodes import is_stub_suite, is_stub_body, is_atom_with_invisible_parens
20 from black.nodes import wrap_in_parentheses
21 from black.brackets import max_delimiter_priority_in_atom
22 from black.brackets import DOT_PRIORITY, COMMA_PRIORITY
23 from black.lines import Line, line_to_string, is_line_short_enough
24 from black.lines import can_omit_invisible_parens, can_be_split, append_leaves
25 from black.comments import generate_comments, list_comments, FMT_OFF
26 from black.numerics import normalize_numeric_literal
27 from black.strings import get_string_prefix, fix_docstring
28 from black.strings import normalize_string_prefix, normalize_string_quotes
29 from black.trans import Transformer, CannotTransform, StringMerger, StringSplitter
30 from black.trans import StringParenWrapper, StringParenStripper, hug_power_op
31 from black.mode import Mode, Feature, Preview
32
33 from blib2to3.pytree import Node, Leaf
34 from blib2to3.pgen2 import token
35
36
37 # types
38 LeafID = int
39 LN = Union[Leaf, Node]
40
41
42 class CannotSplit(CannotTransform):
43     """A readable split that fits the allotted line length is impossible."""
44
45
46 # This isn't a dataclass because @dataclass + Generic breaks mypyc.
47 # See also https://github.com/mypyc/mypyc/issues/827.
48 class LineGenerator(Visitor[Line]):
49     """Generates reformatted Line objects.  Empty lines are not emitted.
50
51     Note: destroys the tree it's visiting by mutating prefixes of its leaves
52     in ways that will no longer stringify to valid Python code on the tree.
53     """
54
55     def __init__(self, mode: Mode) -> None:
56         self.mode = mode
57         self.current_line: Line
58         self.__post_init__()
59
60     def line(self, indent: int = 0) -> Iterator[Line]:
61         """Generate a line.
62
63         If the line is empty, only emit if it makes sense.
64         If the line is too long, split it first and then generate.
65
66         If any lines were generated, set up a new current_line.
67         """
68         if not self.current_line:
69             self.current_line.depth += indent
70             return  # Line is empty, don't emit. Creating a new one unnecessary.
71
72         complete_line = self.current_line
73         self.current_line = Line(mode=self.mode, depth=complete_line.depth + indent)
74         yield complete_line
75
76     def visit_default(self, node: LN) -> Iterator[Line]:
77         """Default `visit_*()` implementation. Recurses to children of `node`."""
78         if isinstance(node, Leaf):
79             any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
80             for comment in generate_comments(node, preview=self.mode.preview):
81                 if any_open_brackets:
82                     # any comment within brackets is subject to splitting
83                     self.current_line.append(comment)
84                 elif comment.type == token.COMMENT:
85                     # regular trailing comment
86                     self.current_line.append(comment)
87                     yield from self.line()
88
89                 else:
90                     # regular standalone comment
91                     yield from self.line()
92
93                     self.current_line.append(comment)
94                     yield from self.line()
95
96             normalize_prefix(node, inside_brackets=any_open_brackets)
97             if self.mode.string_normalization and node.type == token.STRING:
98                 node.value = normalize_string_prefix(node.value)
99                 node.value = normalize_string_quotes(node.value)
100             if node.type == token.NUMBER:
101                 normalize_numeric_literal(node)
102             if node.type not in WHITESPACE:
103                 self.current_line.append(node)
104         yield from super().visit_default(node)
105
106     def visit_INDENT(self, node: Leaf) -> Iterator[Line]:
107         """Increase indentation level, maybe yield a line."""
108         # In blib2to3 INDENT never holds comments.
109         yield from self.line(+1)
110         yield from self.visit_default(node)
111
112     def visit_DEDENT(self, node: Leaf) -> Iterator[Line]:
113         """Decrease indentation level, maybe yield a line."""
114         # The current line might still wait for trailing comments.  At DEDENT time
115         # there won't be any (they would be prefixes on the preceding NEWLINE).
116         # Emit the line then.
117         yield from self.line()
118
119         # While DEDENT has no value, its prefix may contain standalone comments
120         # that belong to the current indentation level.  Get 'em.
121         yield from self.visit_default(node)
122
123         # Finally, emit the dedent.
124         yield from self.line(-1)
125
126     def visit_stmt(
127         self, node: Node, keywords: Set[str], parens: Set[str]
128     ) -> Iterator[Line]:
129         """Visit a statement.
130
131         This implementation is shared for `if`, `while`, `for`, `try`, `except`,
132         `def`, `with`, `class`, `assert`, and assignments.
133
134         The relevant Python language `keywords` for a given statement will be
135         NAME leaves within it. This methods puts those on a separate line.
136
137         `parens` holds a set of string leaf values immediately after which
138         invisible parens should be put.
139         """
140         normalize_invisible_parens(node, parens_after=parens, preview=self.mode.preview)
141         for child in node.children:
142             if is_name_token(child) and child.value in keywords:
143                 yield from self.line()
144
145             yield from self.visit(child)
146
147     def visit_funcdef(self, node: Node) -> Iterator[Line]:
148         """Visit function definition."""
149         if Preview.annotation_parens not in self.mode:
150             yield from self.visit_stmt(node, keywords={"def"}, parens=set())
151         else:
152             yield from self.line()
153
154             # Remove redundant brackets around return type annotation.
155             is_return_annotation = False
156             for child in node.children:
157                 if child.type == token.RARROW:
158                     is_return_annotation = True
159                 elif is_return_annotation:
160                     if child.type == syms.atom and child.children[0].type == token.LPAR:
161                         if maybe_make_parens_invisible_in_atom(
162                             child,
163                             parent=node,
164                             remove_brackets_around_comma=False,
165                         ):
166                             wrap_in_parentheses(node, child, visible=False)
167                     else:
168                         wrap_in_parentheses(node, child, visible=False)
169                     is_return_annotation = False
170
171             for child in node.children:
172                 yield from self.visit(child)
173
174     def visit_match_case(self, node: Node) -> Iterator[Line]:
175         """Visit either a match or case statement."""
176         normalize_invisible_parens(node, parens_after=set(), preview=self.mode.preview)
177
178         yield from self.line()
179         for child in node.children:
180             yield from self.visit(child)
181
182     def visit_suite(self, node: Node) -> Iterator[Line]:
183         """Visit a suite."""
184         if self.mode.is_pyi and is_stub_suite(node):
185             yield from self.visit(node.children[2])
186         else:
187             yield from self.visit_default(node)
188
189     def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
190         """Visit a statement without nested statements."""
191         prev_type: Optional[int] = None
192         for child in node.children:
193             if (prev_type is None or prev_type == token.SEMI) and is_arith_like(child):
194                 wrap_in_parentheses(node, child, visible=False)
195             prev_type = child.type
196
197         is_suite_like = node.parent and node.parent.type in STATEMENT
198         if is_suite_like:
199             if self.mode.is_pyi and is_stub_body(node):
200                 yield from self.visit_default(node)
201             else:
202                 yield from self.line(+1)
203                 yield from self.visit_default(node)
204                 yield from self.line(-1)
205
206         else:
207             if (
208                 not self.mode.is_pyi
209                 or not node.parent
210                 or not is_stub_suite(node.parent)
211             ):
212                 yield from self.line()
213             yield from self.visit_default(node)
214
215     def visit_async_stmt(self, node: Node) -> Iterator[Line]:
216         """Visit `async def`, `async for`, `async with`."""
217         yield from self.line()
218
219         children = iter(node.children)
220         for child in children:
221             yield from self.visit(child)
222
223             if child.type == token.ASYNC:
224                 break
225
226         internal_stmt = next(children)
227         for child in internal_stmt.children:
228             yield from self.visit(child)
229
230     def visit_decorators(self, node: Node) -> Iterator[Line]:
231         """Visit decorators."""
232         for child in node.children:
233             yield from self.line()
234             yield from self.visit(child)
235
236     def visit_power(self, node: Node) -> Iterator[Line]:
237         for idx, leaf in enumerate(node.children[:-1]):
238             next_leaf = node.children[idx + 1]
239
240             if not isinstance(leaf, Leaf):
241                 continue
242
243             value = leaf.value.lower()
244             if (
245                 leaf.type == token.NUMBER
246                 and next_leaf.type == syms.trailer
247                 # Ensure that we are in an attribute trailer
248                 and next_leaf.children[0].type == token.DOT
249                 # It shouldn't wrap hexadecimal, binary and octal literals
250                 and not value.startswith(("0x", "0b", "0o"))
251                 # It shouldn't wrap complex literals
252                 and "j" not in value
253             ):
254                 wrap_in_parentheses(node, leaf)
255
256         if Preview.remove_redundant_parens in self.mode:
257             remove_await_parens(node)
258
259         yield from self.visit_default(node)
260
261     def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
262         """Remove a semicolon and put the other statement on a separate line."""
263         yield from self.line()
264
265     def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
266         """End of file. Process outstanding comments and end with a newline."""
267         yield from self.visit_default(leaf)
268         yield from self.line()
269
270     def visit_STANDALONE_COMMENT(self, leaf: Leaf) -> Iterator[Line]:
271         if not self.current_line.bracket_tracker.any_open_brackets():
272             yield from self.line()
273         yield from self.visit_default(leaf)
274
275     def visit_factor(self, node: Node) -> Iterator[Line]:
276         """Force parentheses between a unary op and a binary power:
277
278         -2 ** 8 -> -(2 ** 8)
279         """
280         _operator, operand = node.children
281         if (
282             operand.type == syms.power
283             and len(operand.children) == 3
284             and operand.children[1].type == token.DOUBLESTAR
285         ):
286             lpar = Leaf(token.LPAR, "(")
287             rpar = Leaf(token.RPAR, ")")
288             index = operand.remove() or 0
289             node.insert_child(index, Node(syms.atom, [lpar, operand, rpar]))
290         yield from self.visit_default(node)
291
292     def visit_STRING(self, leaf: Leaf) -> Iterator[Line]:
293         if is_docstring(leaf) and "\\\n" not in leaf.value:
294             # We're ignoring docstrings with backslash newline escapes because changing
295             # indentation of those changes the AST representation of the code.
296             if Preview.normalize_docstring_quotes_and_prefixes_properly in self.mode:
297                 # There was a bug where --skip-string-normalization wouldn't stop us
298                 # from normalizing docstring prefixes. To maintain stability, we can
299                 # only address this buggy behaviour while the preview style is enabled.
300                 if self.mode.string_normalization:
301                     docstring = normalize_string_prefix(leaf.value)
302                     # visit_default() does handle string normalization for us, but
303                     # since this method acts differently depending on quote style (ex.
304                     # see padding logic below), there's a possibility for unstable
305                     # formatting as visit_default() is called *after*. To avoid a
306                     # situation where this function formats a docstring differently on
307                     # the second pass, normalize it early.
308                     docstring = normalize_string_quotes(docstring)
309                 else:
310                     docstring = leaf.value
311             else:
312                 # ... otherwise, we'll keep the buggy behaviour >.<
313                 docstring = normalize_string_prefix(leaf.value)
314             prefix = get_string_prefix(docstring)
315             docstring = docstring[len(prefix) :]  # Remove the prefix
316             quote_char = docstring[0]
317             # A natural way to remove the outer quotes is to do:
318             #   docstring = docstring.strip(quote_char)
319             # but that breaks on """""x""" (which is '""x').
320             # So we actually need to remove the first character and the next two
321             # characters but only if they are the same as the first.
322             quote_len = 1 if docstring[1] != quote_char else 3
323             docstring = docstring[quote_len:-quote_len]
324             docstring_started_empty = not docstring
325             indent = " " * 4 * self.current_line.depth
326
327             if is_multiline_string(leaf):
328                 docstring = fix_docstring(docstring, indent)
329             else:
330                 docstring = docstring.strip()
331
332             if docstring:
333                 # Add some padding if the docstring starts / ends with a quote mark.
334                 if docstring[0] == quote_char:
335                     docstring = " " + docstring
336                 if docstring[-1] == quote_char:
337                     docstring += " "
338                 if docstring[-1] == "\\":
339                     backslash_count = len(docstring) - len(docstring.rstrip("\\"))
340                     if backslash_count % 2:
341                         # Odd number of tailing backslashes, add some padding to
342                         # avoid escaping the closing string quote.
343                         docstring += " "
344             elif not docstring_started_empty:
345                 docstring = " "
346
347             # We could enforce triple quotes at this point.
348             quote = quote_char * quote_len
349
350             # It's invalid to put closing single-character quotes on a new line.
351             if Preview.long_docstring_quotes_on_newline in self.mode and quote_len == 3:
352                 # We need to find the length of the last line of the docstring
353                 # to find if we can add the closing quotes to the line without
354                 # exceeding the maximum line length.
355                 # If docstring is one line, then we need to add the length
356                 # of the indent, prefix, and starting quotes. Ending quotes are
357                 # handled later.
358                 lines = docstring.splitlines()
359                 last_line_length = len(lines[-1]) if docstring else 0
360
361                 if len(lines) == 1:
362                     last_line_length += len(indent) + len(prefix) + quote_len
363
364                 # If adding closing quotes would cause the last line to exceed
365                 # the maximum line length then put a line break before the
366                 # closing quotes
367                 if last_line_length + quote_len > self.mode.line_length:
368                     leaf.value = prefix + quote + docstring + "\n" + indent + quote
369                 else:
370                     leaf.value = prefix + quote + docstring + quote
371             else:
372                 leaf.value = prefix + quote + docstring + quote
373
374         yield from self.visit_default(leaf)
375
376     def __post_init__(self) -> None:
377         """You are in a twisty little maze of passages."""
378         self.current_line = Line(mode=self.mode)
379
380         v = self.visit_stmt
381         Ø: Set[str] = set()
382         self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
383         self.visit_if_stmt = partial(
384             v, keywords={"if", "else", "elif"}, parens={"if", "elif"}
385         )
386         self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
387         self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
388         self.visit_try_stmt = partial(
389             v, keywords={"try", "except", "else", "finally"}, parens=Ø
390         )
391         if self.mode.preview:
392             self.visit_except_clause = partial(
393                 v, keywords={"except"}, parens={"except"}
394             )
395             self.visit_with_stmt = partial(v, keywords={"with"}, parens={"with"})
396         else:
397             self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
398             self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
399         self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
400         self.visit_expr_stmt = partial(v, keywords=Ø, parens=ASSIGNMENTS)
401         self.visit_return_stmt = partial(v, keywords={"return"}, parens={"return"})
402         self.visit_import_from = partial(v, keywords=Ø, parens={"import"})
403         self.visit_del_stmt = partial(v, keywords=Ø, parens={"del"})
404         self.visit_async_funcdef = self.visit_async_stmt
405         self.visit_decorated = self.visit_decorators
406
407         # PEP 634
408         self.visit_match_stmt = self.visit_match_case
409         self.visit_case_block = self.visit_match_case
410
411
412 def transform_line(
413     line: Line, mode: Mode, features: Collection[Feature] = ()
414 ) -> Iterator[Line]:
415     """Transform a `line`, potentially splitting it into many lines.
416
417     They should fit in the allotted `line_length` but might not be able to.
418
419     `features` are syntactical features that may be used in the output.
420     """
421     if line.is_comment:
422         yield line
423         return
424
425     line_str = line_to_string(line)
426
427     ll = mode.line_length
428     sn = mode.string_normalization
429     string_merge = StringMerger(ll, sn)
430     string_paren_strip = StringParenStripper(ll, sn)
431     string_split = StringSplitter(ll, sn)
432     string_paren_wrap = StringParenWrapper(ll, sn)
433
434     transformers: List[Transformer]
435     if (
436         not line.contains_uncollapsable_type_comments()
437         and not line.should_split_rhs
438         and not line.magic_trailing_comma
439         and (
440             is_line_short_enough(line, line_length=mode.line_length, line_str=line_str)
441             or line.contains_unsplittable_type_ignore()
442         )
443         and not (line.inside_brackets and line.contains_standalone_comments())
444     ):
445         # Only apply basic string preprocessing, since lines shouldn't be split here.
446         if Preview.string_processing in mode:
447             transformers = [string_merge, string_paren_strip]
448         else:
449             transformers = []
450     elif line.is_def:
451         transformers = [left_hand_split]
452     else:
453
454         def _rhs(
455             self: object, line: Line, features: Collection[Feature]
456         ) -> Iterator[Line]:
457             """Wraps calls to `right_hand_split`.
458
459             The calls increasingly `omit` right-hand trailers (bracket pairs with
460             content), meaning the trailers get glued together to split on another
461             bracket pair instead.
462             """
463             for omit in generate_trailers_to_omit(line, mode.line_length):
464                 lines = list(
465                     right_hand_split(line, mode.line_length, features, omit=omit)
466                 )
467                 # Note: this check is only able to figure out if the first line of the
468                 # *current* transformation fits in the line length.  This is true only
469                 # for simple cases.  All others require running more transforms via
470                 # `transform_line()`.  This check doesn't know if those would succeed.
471                 if is_line_short_enough(lines[0], line_length=mode.line_length):
472                     yield from lines
473                     return
474
475             # All splits failed, best effort split with no omits.
476             # This mostly happens to multiline strings that are by definition
477             # reported as not fitting a single line, as well as lines that contain
478             # trailing commas (those have to be exploded).
479             yield from right_hand_split(
480                 line, line_length=mode.line_length, features=features
481             )
482
483         # HACK: nested functions (like _rhs) compiled by mypyc don't retain their
484         # __name__ attribute which is needed in `run_transformer` further down.
485         # Unfortunately a nested class breaks mypyc too. So a class must be created
486         # via type ... https://github.com/mypyc/mypyc/issues/884
487         rhs = type("rhs", (), {"__call__": _rhs})()
488
489         if Preview.string_processing in mode:
490             if line.inside_brackets:
491                 transformers = [
492                     string_merge,
493                     string_paren_strip,
494                     string_split,
495                     delimiter_split,
496                     standalone_comment_split,
497                     string_paren_wrap,
498                     rhs,
499                 ]
500             else:
501                 transformers = [
502                     string_merge,
503                     string_paren_strip,
504                     string_split,
505                     string_paren_wrap,
506                     rhs,
507                 ]
508         else:
509             if line.inside_brackets:
510                 transformers = [delimiter_split, standalone_comment_split, rhs]
511             else:
512                 transformers = [rhs]
513     # It's always safe to attempt hugging of power operations and pretty much every line
514     # could match.
515     transformers.append(hug_power_op)
516
517     for transform in transformers:
518         # We are accumulating lines in `result` because we might want to abort
519         # mission and return the original line in the end, or attempt a different
520         # split altogether.
521         try:
522             result = run_transformer(line, transform, mode, features, line_str=line_str)
523         except CannotTransform:
524             continue
525         else:
526             yield from result
527             break
528
529     else:
530         yield line
531
532
533 def left_hand_split(line: Line, _features: Collection[Feature] = ()) -> Iterator[Line]:
534     """Split line into many lines, starting with the first matching bracket pair.
535
536     Note: this usually looks weird, only use this for function definitions.
537     Prefer RHS otherwise.  This is why this function is not symmetrical with
538     :func:`right_hand_split` which also handles optional parentheses.
539     """
540     tail_leaves: List[Leaf] = []
541     body_leaves: List[Leaf] = []
542     head_leaves: List[Leaf] = []
543     current_leaves = head_leaves
544     matching_bracket: Optional[Leaf] = None
545     for leaf in line.leaves:
546         if (
547             current_leaves is body_leaves
548             and leaf.type in CLOSING_BRACKETS
549             and leaf.opening_bracket is matching_bracket
550             and isinstance(matching_bracket, Leaf)
551         ):
552             ensure_visible(leaf)
553             ensure_visible(matching_bracket)
554             current_leaves = tail_leaves if body_leaves else head_leaves
555         current_leaves.append(leaf)
556         if current_leaves is head_leaves:
557             if leaf.type in OPENING_BRACKETS:
558                 matching_bracket = leaf
559                 current_leaves = body_leaves
560     if not matching_bracket:
561         raise CannotSplit("No brackets found")
562
563     head = bracket_split_build_line(head_leaves, line, matching_bracket)
564     body = bracket_split_build_line(body_leaves, line, matching_bracket, is_body=True)
565     tail = bracket_split_build_line(tail_leaves, line, matching_bracket)
566     bracket_split_succeeded_or_raise(head, body, tail)
567     for result in (head, body, tail):
568         if result:
569             yield result
570
571
572 def right_hand_split(
573     line: Line,
574     line_length: int,
575     features: Collection[Feature] = (),
576     omit: Collection[LeafID] = (),
577 ) -> Iterator[Line]:
578     """Split line into many lines, starting with the last matching bracket pair.
579
580     If the split was by optional parentheses, attempt splitting without them, too.
581     `omit` is a collection of closing bracket IDs that shouldn't be considered for
582     this split.
583
584     Note: running this function modifies `bracket_depth` on the leaves of `line`.
585     """
586     tail_leaves: List[Leaf] = []
587     body_leaves: List[Leaf] = []
588     head_leaves: List[Leaf] = []
589     current_leaves = tail_leaves
590     opening_bracket: Optional[Leaf] = None
591     closing_bracket: Optional[Leaf] = None
592     for leaf in reversed(line.leaves):
593         if current_leaves is body_leaves:
594             if leaf is opening_bracket:
595                 current_leaves = head_leaves if body_leaves else tail_leaves
596         current_leaves.append(leaf)
597         if current_leaves is tail_leaves:
598             if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
599                 opening_bracket = leaf.opening_bracket
600                 closing_bracket = leaf
601                 current_leaves = body_leaves
602     if not (opening_bracket and closing_bracket and head_leaves):
603         # If there is no opening or closing_bracket that means the split failed and
604         # all content is in the tail.  Otherwise, if `head_leaves` are empty, it means
605         # the matching `opening_bracket` wasn't available on `line` anymore.
606         raise CannotSplit("No brackets found")
607
608     tail_leaves.reverse()
609     body_leaves.reverse()
610     head_leaves.reverse()
611     head = bracket_split_build_line(head_leaves, line, opening_bracket)
612     body = bracket_split_build_line(body_leaves, line, opening_bracket, is_body=True)
613     tail = bracket_split_build_line(tail_leaves, line, opening_bracket)
614     bracket_split_succeeded_or_raise(head, body, tail)
615     if (
616         Feature.FORCE_OPTIONAL_PARENTHESES not in features
617         # the opening bracket is an optional paren
618         and opening_bracket.type == token.LPAR
619         and not opening_bracket.value
620         # the closing bracket is an optional paren
621         and closing_bracket.type == token.RPAR
622         and not closing_bracket.value
623         # it's not an import (optional parens are the only thing we can split on
624         # in this case; attempting a split without them is a waste of time)
625         and not line.is_import
626         # there are no standalone comments in the body
627         and not body.contains_standalone_comments(0)
628         # and we can actually remove the parens
629         and can_omit_invisible_parens(body, line_length)
630     ):
631         omit = {id(closing_bracket), *omit}
632         try:
633             yield from right_hand_split(line, line_length, features=features, omit=omit)
634             return
635
636         except CannotSplit as e:
637             if not (
638                 can_be_split(body)
639                 or is_line_short_enough(body, line_length=line_length)
640             ):
641                 raise CannotSplit(
642                     "Splitting failed, body is still too long and can't be split."
643                 ) from e
644
645             elif head.contains_multiline_strings() or tail.contains_multiline_strings():
646                 raise CannotSplit(
647                     "The current optional pair of parentheses is bound to fail to"
648                     " satisfy the splitting algorithm because the head or the tail"
649                     " contains multiline strings which by definition never fit one"
650                     " line."
651                 ) from e
652
653     ensure_visible(opening_bracket)
654     ensure_visible(closing_bracket)
655     for result in (head, body, tail):
656         if result:
657             yield result
658
659
660 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
661     """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
662
663     Do nothing otherwise.
664
665     A left- or right-hand split is based on a pair of brackets. Content before
666     (and including) the opening bracket is left on one line, content inside the
667     brackets is put on a separate line, and finally content starting with and
668     following the closing bracket is put on a separate line.
669
670     Those are called `head`, `body`, and `tail`, respectively. If the split
671     produced the same line (all content in `head`) or ended up with an empty `body`
672     and the `tail` is just the closing bracket, then it's considered failed.
673     """
674     tail_len = len(str(tail).strip())
675     if not body:
676         if tail_len == 0:
677             raise CannotSplit("Splitting brackets produced the same line")
678
679         elif tail_len < 3:
680             raise CannotSplit(
681                 f"Splitting brackets on an empty body to save {tail_len} characters is"
682                 " not worth it"
683             )
684
685
686 def bracket_split_build_line(
687     leaves: List[Leaf], original: Line, opening_bracket: Leaf, *, is_body: bool = False
688 ) -> Line:
689     """Return a new line with given `leaves` and respective comments from `original`.
690
691     If `is_body` is True, the result line is one-indented inside brackets and as such
692     has its first leaf's prefix normalized and a trailing comma added when expected.
693     """
694     result = Line(mode=original.mode, depth=original.depth)
695     if is_body:
696         result.inside_brackets = True
697         result.depth += 1
698         if leaves:
699             # Since body is a new indent level, remove spurious leading whitespace.
700             normalize_prefix(leaves[0], inside_brackets=True)
701             # Ensure a trailing comma for imports and standalone function arguments, but
702             # be careful not to add one after any comments or within type annotations.
703             no_commas = (
704                 original.is_def
705                 and opening_bracket.value == "("
706                 and not any(leaf.type == token.COMMA for leaf in leaves)
707                 # In particular, don't add one within a parenthesized return annotation.
708                 # Unfortunately the indicator we're in a return annotation (RARROW) may
709                 # be defined directly in the parent node, the parent of the parent ...
710                 # and so on depending on how complex the return annotation is.
711                 # This isn't perfect and there's some false negatives but they are in
712                 # contexts were a comma is actually fine.
713                 and not any(
714                     node.prev_sibling.type == RARROW
715                     for node in (
716                         leaves[0].parent,
717                         getattr(leaves[0].parent, "parent", None),
718                     )
719                     if isinstance(node, Node) and isinstance(node.prev_sibling, Leaf)
720                 )
721             )
722
723             if original.is_import or no_commas:
724                 for i in range(len(leaves) - 1, -1, -1):
725                     if leaves[i].type == STANDALONE_COMMENT:
726                         continue
727
728                     if leaves[i].type != token.COMMA:
729                         new_comma = Leaf(token.COMMA, ",")
730                         leaves.insert(i + 1, new_comma)
731                     break
732
733     # Populate the line
734     for leaf in leaves:
735         result.append(leaf, preformatted=True)
736         for comment_after in original.comments_after(leaf):
737             result.append(comment_after, preformatted=True)
738     if is_body and should_split_line(result, opening_bracket):
739         result.should_split_rhs = True
740     return result
741
742
743 def dont_increase_indentation(split_func: Transformer) -> Transformer:
744     """Normalize prefix of the first leaf in every line returned by `split_func`.
745
746     This is a decorator over relevant split functions.
747     """
748
749     @wraps(split_func)
750     def split_wrapper(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]:
751         for split_line in split_func(line, features):
752             normalize_prefix(split_line.leaves[0], inside_brackets=True)
753             yield split_line
754
755     return split_wrapper
756
757
758 @dont_increase_indentation
759 def delimiter_split(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]:
760     """Split according to delimiters of the highest priority.
761
762     If the appropriate Features are given, the split will add trailing commas
763     also in function signatures and calls that contain `*` and `**`.
764     """
765     try:
766         last_leaf = line.leaves[-1]
767     except IndexError:
768         raise CannotSplit("Line empty") from None
769
770     bt = line.bracket_tracker
771     try:
772         delimiter_priority = bt.max_delimiter_priority(exclude={id(last_leaf)})
773     except ValueError:
774         raise CannotSplit("No delimiters found") from None
775
776     if delimiter_priority == DOT_PRIORITY:
777         if bt.delimiter_count_with_priority(delimiter_priority) == 1:
778             raise CannotSplit("Splitting a single attribute from its owner looks wrong")
779
780     current_line = Line(
781         mode=line.mode, depth=line.depth, inside_brackets=line.inside_brackets
782     )
783     lowest_depth = sys.maxsize
784     trailing_comma_safe = True
785
786     def append_to_line(leaf: Leaf) -> Iterator[Line]:
787         """Append `leaf` to current line or to new line if appending impossible."""
788         nonlocal current_line
789         try:
790             current_line.append_safe(leaf, preformatted=True)
791         except ValueError:
792             yield current_line
793
794             current_line = Line(
795                 mode=line.mode, depth=line.depth, inside_brackets=line.inside_brackets
796             )
797             current_line.append(leaf)
798
799     for leaf in line.leaves:
800         yield from append_to_line(leaf)
801
802         for comment_after in line.comments_after(leaf):
803             yield from append_to_line(comment_after)
804
805         lowest_depth = min(lowest_depth, leaf.bracket_depth)
806         if leaf.bracket_depth == lowest_depth:
807             if is_vararg(leaf, within={syms.typedargslist}):
808                 trailing_comma_safe = (
809                     trailing_comma_safe and Feature.TRAILING_COMMA_IN_DEF in features
810                 )
811             elif is_vararg(leaf, within={syms.arglist, syms.argument}):
812                 trailing_comma_safe = (
813                     trailing_comma_safe and Feature.TRAILING_COMMA_IN_CALL in features
814                 )
815
816         leaf_priority = bt.delimiters.get(id(leaf))
817         if leaf_priority == delimiter_priority:
818             yield current_line
819
820             current_line = Line(
821                 mode=line.mode, depth=line.depth, inside_brackets=line.inside_brackets
822             )
823     if current_line:
824         if (
825             trailing_comma_safe
826             and delimiter_priority == COMMA_PRIORITY
827             and current_line.leaves[-1].type != token.COMMA
828             and current_line.leaves[-1].type != STANDALONE_COMMENT
829         ):
830             new_comma = Leaf(token.COMMA, ",")
831             current_line.append(new_comma)
832         yield current_line
833
834
835 @dont_increase_indentation
836 def standalone_comment_split(
837     line: Line, features: Collection[Feature] = ()
838 ) -> Iterator[Line]:
839     """Split standalone comments from the rest of the line."""
840     if not line.contains_standalone_comments(0):
841         raise CannotSplit("Line does not have any standalone comments")
842
843     current_line = Line(
844         mode=line.mode, depth=line.depth, inside_brackets=line.inside_brackets
845     )
846
847     def append_to_line(leaf: Leaf) -> Iterator[Line]:
848         """Append `leaf` to current line or to new line if appending impossible."""
849         nonlocal current_line
850         try:
851             current_line.append_safe(leaf, preformatted=True)
852         except ValueError:
853             yield current_line
854
855             current_line = Line(
856                 line.mode, depth=line.depth, inside_brackets=line.inside_brackets
857             )
858             current_line.append(leaf)
859
860     for leaf in line.leaves:
861         yield from append_to_line(leaf)
862
863         for comment_after in line.comments_after(leaf):
864             yield from append_to_line(comment_after)
865
866     if current_line:
867         yield current_line
868
869
870 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
871     """Leave existing extra newlines if not `inside_brackets`. Remove everything
872     else.
873
874     Note: don't use backslashes for formatting or you'll lose your voting rights.
875     """
876     if not inside_brackets:
877         spl = leaf.prefix.split("#")
878         if "\\" not in spl[0]:
879             nl_count = spl[-1].count("\n")
880             if len(spl) > 1:
881                 nl_count -= 1
882             leaf.prefix = "\n" * nl_count
883             return
884
885     leaf.prefix = ""
886
887
888 def normalize_invisible_parens(
889     node: Node, parens_after: Set[str], *, preview: bool
890 ) -> None:
891     """Make existing optional parentheses invisible or create new ones.
892
893     `parens_after` is a set of string leaf values immediately after which parens
894     should be put.
895
896     Standardizes on visible parentheses for single-element tuples, and keeps
897     existing visible parentheses for other tuples and generator expressions.
898     """
899     for pc in list_comments(node.prefix, is_endmarker=False, preview=preview):
900         if pc.value in FMT_OFF:
901             # This `node` has a prefix with `# fmt: off`, don't mess with parens.
902             return
903     check_lpar = False
904     for index, child in enumerate(list(node.children)):
905         # Fixes a bug where invisible parens are not properly stripped from
906         # assignment statements that contain type annotations.
907         if isinstance(child, Node) and child.type == syms.annassign:
908             normalize_invisible_parens(
909                 child, parens_after=parens_after, preview=preview
910             )
911
912         # Add parentheses around long tuple unpacking in assignments.
913         if (
914             index == 0
915             and isinstance(child, Node)
916             and child.type == syms.testlist_star_expr
917         ):
918             check_lpar = True
919
920         if check_lpar:
921             if (
922                 preview
923                 and child.type == syms.atom
924                 and node.type == syms.for_stmt
925                 and isinstance(child.prev_sibling, Leaf)
926                 and child.prev_sibling.type == token.NAME
927                 and child.prev_sibling.value == "for"
928             ):
929                 if maybe_make_parens_invisible_in_atom(
930                     child,
931                     parent=node,
932                     remove_brackets_around_comma=True,
933                 ):
934                     wrap_in_parentheses(node, child, visible=False)
935             elif preview and isinstance(child, Node) and node.type == syms.with_stmt:
936                 remove_with_parens(child, node)
937             elif child.type == syms.atom:
938                 if maybe_make_parens_invisible_in_atom(
939                     child,
940                     parent=node,
941                 ):
942                     wrap_in_parentheses(node, child, visible=False)
943             elif is_one_tuple(child):
944                 wrap_in_parentheses(node, child, visible=True)
945             elif node.type == syms.import_from:
946                 # "import from" nodes store parentheses directly as part of
947                 # the statement
948                 if is_lpar_token(child):
949                     assert is_rpar_token(node.children[-1])
950                     # make parentheses invisible
951                     child.value = ""
952                     node.children[-1].value = ""
953                 elif child.type != token.STAR:
954                     # insert invisible parentheses
955                     node.insert_child(index, Leaf(token.LPAR, ""))
956                     node.append_child(Leaf(token.RPAR, ""))
957                 break
958             elif (
959                 index == 1
960                 and child.type == token.STAR
961                 and node.type == syms.except_clause
962             ):
963                 # In except* (PEP 654), the star is actually part of
964                 # of the keyword. So we need to skip the insertion of
965                 # invisible parentheses to work more precisely.
966                 continue
967
968             elif not (isinstance(child, Leaf) and is_multiline_string(child)):
969                 wrap_in_parentheses(node, child, visible=False)
970
971         comma_check = child.type == token.COMMA if preview else False
972
973         check_lpar = isinstance(child, Leaf) and (
974             child.value in parens_after or comma_check
975         )
976
977
978 def remove_await_parens(node: Node) -> None:
979     if node.children[0].type == token.AWAIT and len(node.children) > 1:
980         if (
981             node.children[1].type == syms.atom
982             and node.children[1].children[0].type == token.LPAR
983         ):
984             if maybe_make_parens_invisible_in_atom(
985                 node.children[1],
986                 parent=node,
987                 remove_brackets_around_comma=True,
988             ):
989                 wrap_in_parentheses(node, node.children[1], visible=False)
990
991             # Since await is an expression we shouldn't remove
992             # brackets in cases where this would change
993             # the AST due to operator precedence.
994             # Therefore we only aim to remove brackets around
995             # power nodes that aren't also await expressions themselves.
996             # https://peps.python.org/pep-0492/#updated-operator-precedence-table
997             # N.B. We've still removed any redundant nested brackets though :)
998             opening_bracket = cast(Leaf, node.children[1].children[0])
999             closing_bracket = cast(Leaf, node.children[1].children[-1])
1000             bracket_contents = cast(Node, node.children[1].children[1])
1001             if bracket_contents.type != syms.power:
1002                 ensure_visible(opening_bracket)
1003                 ensure_visible(closing_bracket)
1004             elif (
1005                 bracket_contents.type == syms.power
1006                 and bracket_contents.children[0].type == token.AWAIT
1007             ):
1008                 ensure_visible(opening_bracket)
1009                 ensure_visible(closing_bracket)
1010                 # If we are in a nested await then recurse down.
1011                 remove_await_parens(bracket_contents)
1012
1013
1014 def remove_with_parens(node: Node, parent: Node) -> None:
1015     """Recursively hide optional parens in `with` statements."""
1016     # Removing all unnecessary parentheses in with statements in one pass is a tad
1017     # complex as different variations of bracketed statements result in pretty
1018     # different parse trees:
1019     #
1020     # with (open("file")) as f:                       # this is an asexpr_test
1021     #     ...
1022     #
1023     # with (open("file") as f):                       # this is an atom containing an
1024     #     ...                                         # asexpr_test
1025     #
1026     # with (open("file")) as f, (open("file")) as f:  # this is asexpr_test, COMMA,
1027     #     ...                                         # asexpr_test
1028     #
1029     # with (open("file") as f, open("file") as f):    # an atom containing a
1030     #     ...                                         # testlist_gexp which then
1031     #                                                 # contains multiple asexpr_test(s)
1032     if node.type == syms.atom:
1033         if maybe_make_parens_invisible_in_atom(
1034             node,
1035             parent=parent,
1036             remove_brackets_around_comma=True,
1037         ):
1038             wrap_in_parentheses(parent, node, visible=False)
1039         if isinstance(node.children[1], Node):
1040             remove_with_parens(node.children[1], node)
1041     elif node.type == syms.testlist_gexp:
1042         for child in node.children:
1043             if isinstance(child, Node):
1044                 remove_with_parens(child, node)
1045     elif node.type == syms.asexpr_test and not any(
1046         leaf.type == token.COLONEQUAL for leaf in node.leaves()
1047     ):
1048         if maybe_make_parens_invisible_in_atom(
1049             node.children[0],
1050             parent=node,
1051             remove_brackets_around_comma=True,
1052         ):
1053             wrap_in_parentheses(node, node.children[0], visible=False)
1054
1055
1056 def maybe_make_parens_invisible_in_atom(
1057     node: LN,
1058     parent: LN,
1059     remove_brackets_around_comma: bool = False,
1060 ) -> bool:
1061     """If it's safe, make the parens in the atom `node` invisible, recursively.
1062     Additionally, remove repeated, adjacent invisible parens from the atom `node`
1063     as they are redundant.
1064
1065     Returns whether the node should itself be wrapped in invisible parentheses.
1066     """
1067     if (
1068         node.type != syms.atom
1069         or is_empty_tuple(node)
1070         or is_one_tuple(node)
1071         or (is_yield(node) and parent.type != syms.expr_stmt)
1072         or (
1073             # This condition tries to prevent removing non-optional brackets
1074             # around a tuple, however, can be a bit overzealous so we provide
1075             # and option to skip this check for `for` and `with` statements.
1076             not remove_brackets_around_comma
1077             and max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
1078         )
1079     ):
1080         return False
1081
1082     if is_walrus_assignment(node):
1083         if parent.type in [
1084             syms.annassign,
1085             syms.expr_stmt,
1086             syms.assert_stmt,
1087             syms.return_stmt,
1088             # these ones aren't useful to end users, but they do please fuzzers
1089             syms.for_stmt,
1090             syms.del_stmt,
1091         ]:
1092             return False
1093
1094     first = node.children[0]
1095     last = node.children[-1]
1096     if is_lpar_token(first) and is_rpar_token(last):
1097         middle = node.children[1]
1098         # make parentheses invisible
1099         first.value = ""
1100         last.value = ""
1101         maybe_make_parens_invisible_in_atom(
1102             middle,
1103             parent=parent,
1104             remove_brackets_around_comma=remove_brackets_around_comma,
1105         )
1106
1107         if is_atom_with_invisible_parens(middle):
1108             # Strip the invisible parens from `middle` by replacing
1109             # it with the child in-between the invisible parens
1110             middle.replace(middle.children[1])
1111
1112         return False
1113
1114     return True
1115
1116
1117 def should_split_line(line: Line, opening_bracket: Leaf) -> bool:
1118     """Should `line` be immediately split with `delimiter_split()` after RHS?"""
1119
1120     if not (opening_bracket.parent and opening_bracket.value in "[{("):
1121         return False
1122
1123     # We're essentially checking if the body is delimited by commas and there's more
1124     # than one of them (we're excluding the trailing comma and if the delimiter priority
1125     # is still commas, that means there's more).
1126     exclude = set()
1127     trailing_comma = False
1128     try:
1129         last_leaf = line.leaves[-1]
1130         if last_leaf.type == token.COMMA:
1131             trailing_comma = True
1132             exclude.add(id(last_leaf))
1133         max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude)
1134     except (IndexError, ValueError):
1135         return False
1136
1137     return max_priority == COMMA_PRIORITY and (
1138         (line.mode.magic_trailing_comma and trailing_comma)
1139         # always explode imports
1140         or opening_bracket.parent.type in {syms.atom, syms.import_from}
1141     )
1142
1143
1144 def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[LeafID]]:
1145     """Generate sets of closing bracket IDs that should be omitted in a RHS.
1146
1147     Brackets can be omitted if the entire trailer up to and including
1148     a preceding closing bracket fits in one line.
1149
1150     Yielded sets are cumulative (contain results of previous yields, too).  First
1151     set is empty, unless the line should explode, in which case bracket pairs until
1152     the one that needs to explode are omitted.
1153     """
1154
1155     omit: Set[LeafID] = set()
1156     if not line.magic_trailing_comma:
1157         yield omit
1158
1159     length = 4 * line.depth
1160     opening_bracket: Optional[Leaf] = None
1161     closing_bracket: Optional[Leaf] = None
1162     inner_brackets: Set[LeafID] = set()
1163     for index, leaf, leaf_length in line.enumerate_with_length(reversed=True):
1164         length += leaf_length
1165         if length > line_length:
1166             break
1167
1168         has_inline_comment = leaf_length > len(leaf.value) + len(leaf.prefix)
1169         if leaf.type == STANDALONE_COMMENT or has_inline_comment:
1170             break
1171
1172         if opening_bracket:
1173             if leaf is opening_bracket:
1174                 opening_bracket = None
1175             elif leaf.type in CLOSING_BRACKETS:
1176                 prev = line.leaves[index - 1] if index > 0 else None
1177                 if (
1178                     prev
1179                     and prev.type == token.COMMA
1180                     and leaf.opening_bracket is not None
1181                     and not is_one_sequence_between(
1182                         leaf.opening_bracket, leaf, line.leaves
1183                     )
1184                 ):
1185                     # Never omit bracket pairs with trailing commas.
1186                     # We need to explode on those.
1187                     break
1188
1189                 inner_brackets.add(id(leaf))
1190         elif leaf.type in CLOSING_BRACKETS:
1191             prev = line.leaves[index - 1] if index > 0 else None
1192             if prev and prev.type in OPENING_BRACKETS:
1193                 # Empty brackets would fail a split so treat them as "inner"
1194                 # brackets (e.g. only add them to the `omit` set if another
1195                 # pair of brackets was good enough.
1196                 inner_brackets.add(id(leaf))
1197                 continue
1198
1199             if closing_bracket:
1200                 omit.add(id(closing_bracket))
1201                 omit.update(inner_brackets)
1202                 inner_brackets.clear()
1203                 yield omit
1204
1205             if (
1206                 prev
1207                 and prev.type == token.COMMA
1208                 and leaf.opening_bracket is not None
1209                 and not is_one_sequence_between(leaf.opening_bracket, leaf, line.leaves)
1210             ):
1211                 # Never omit bracket pairs with trailing commas.
1212                 # We need to explode on those.
1213                 break
1214
1215             if leaf.value:
1216                 opening_bracket = leaf.opening_bracket
1217                 closing_bracket = leaf
1218
1219
1220 def run_transformer(
1221     line: Line,
1222     transform: Transformer,
1223     mode: Mode,
1224     features: Collection[Feature],
1225     *,
1226     line_str: str = "",
1227 ) -> List[Line]:
1228     if not line_str:
1229         line_str = line_to_string(line)
1230     result: List[Line] = []
1231     for transformed_line in transform(line, features):
1232         if str(transformed_line).strip("\n") == line_str:
1233             raise CannotTransform("Line transformer returned an unchanged result")
1234
1235         result.extend(transform_line(transformed_line, mode=mode, features=features))
1236
1237     if (
1238         transform.__class__.__name__ != "rhs"
1239         or not line.bracket_tracker.invisible
1240         or any(bracket.value for bracket in line.bracket_tracker.invisible)
1241         or line.contains_multiline_strings()
1242         or result[0].contains_uncollapsable_type_comments()
1243         or result[0].contains_unsplittable_type_ignore()
1244         or is_line_short_enough(result[0], line_length=mode.line_length)
1245         # If any leaves have no parents (which _can_ occur since
1246         # `transform(line)` potentially destroys the line's underlying node
1247         # structure), then we can't proceed. Doing so would cause the below
1248         # call to `append_leaves()` to fail.
1249         or any(leaf.parent is None for leaf in line.leaves)
1250     ):
1251         return result
1252
1253     line_copy = line.clone()
1254     append_leaves(line_copy, line, line.leaves)
1255     features_fop = set(features) | {Feature.FORCE_OPTIONAL_PARENTHESES}
1256     second_opinion = run_transformer(
1257         line_copy, transform, mode, features_fop, line_str=line_str
1258     )
1259     if all(
1260         is_line_short_enough(ln, line_length=mode.line_length) for ln in second_opinion
1261     ):
1262         result = second_opinion
1263     return result