From aafc21aa77f5c4d2ebcb833aa60faba6c2138b94 Mon Sep 17 00:00:00 2001 From: "Yilei \"Dolee\" Yang" Date: Thu, 15 Dec 2022 15:58:51 -0800 Subject: [PATCH] Prefer splitting right hand side of assignment statements. (#3368) --- CHANGES.md | 2 + src/black/linegen.py | 122 +++++++++++++++--- src/black/mode.py | 1 + .../data/preview/long_strings__regression.py | 6 +- tests/data/preview/prefer_rhs_split.py | 85 ++++++++++++ .../preview/prefer_rhs_split_reformatted.py | 38 ++++++ 6 files changed, 236 insertions(+), 18 deletions(-) create mode 100644 tests/data/preview/prefer_rhs_split.py create mode 100644 tests/data/preview/prefer_rhs_split_reformatted.py diff --git a/CHANGES.md b/CHANGES.md index 03c7a28..e1ad5e1 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -73,6 +73,8 @@ present) or as a single newline character (if a newline is present) (#3348) - Implicitly concatenated strings used as function args are now wrapped inside parentheses (#3307) +- For assignment statements, prefer splitting the right hand side if the left hand side + fits on a single line (#3368) - Correctly handle trailing commas that are inside a line's leading non-nested parens (#3370) diff --git a/src/black/linegen.py b/src/black/linegen.py index 244dbe7..9122374 100644 --- a/src/black/linegen.py +++ b/src/black/linegen.py @@ -2,6 +2,7 @@ Generating lines of code. """ import sys +from dataclasses import dataclass from enum import Enum, auto from functools import partial, wraps from typing import Collection, Iterator, List, Optional, Set, Union, cast @@ -24,6 +25,7 @@ from black.lines import ( from black.mode import Feature, Mode, Preview from black.nodes import ( ASSIGNMENTS, + BRACKETS, CLOSING_BRACKETS, OPENING_BRACKETS, RARROW, @@ -634,6 +636,17 @@ def left_hand_split(line: Line, _features: Collection[Feature] = ()) -> Iterator yield result +@dataclass +class _RHSResult: + """Intermediate split result from a right hand split.""" + + head: Line + body: Line + tail: Line + opening_bracket: Leaf + closing_bracket: Leaf + + def right_hand_split( line: Line, line_length: int, @@ -648,6 +661,22 @@ def right_hand_split( Note: running this function modifies `bracket_depth` on the leaves of `line`. """ + rhs_result = _first_right_hand_split(line, omit=omit) + yield from _maybe_split_omitting_optional_parens( + rhs_result, line, line_length, features=features, omit=omit + ) + + +def _first_right_hand_split( + line: Line, + omit: Collection[LeafID] = (), +) -> _RHSResult: + """Split the line into head, body, tail starting with the last bracket pair. + + Note: this function should not have side effects. It's relied upon by + _maybe_split_omitting_optional_parens to get an opinion whether to prefer + splitting on the right side of an assignment statement. + """ tail_leaves: List[Leaf] = [] body_leaves: List[Leaf] = [] head_leaves: List[Leaf] = [] @@ -683,37 +712,71 @@ def right_hand_split( tail_leaves, line, opening_bracket, component=_BracketSplitComponent.tail ) bracket_split_succeeded_or_raise(head, body, tail) + return _RHSResult(head, body, tail, opening_bracket, closing_bracket) + + +def _maybe_split_omitting_optional_parens( + rhs: _RHSResult, + line: Line, + line_length: int, + features: Collection[Feature] = (), + omit: Collection[LeafID] = (), +) -> Iterator[Line]: if ( Feature.FORCE_OPTIONAL_PARENTHESES not in features # the opening bracket is an optional paren - and opening_bracket.type == token.LPAR - and not opening_bracket.value + and rhs.opening_bracket.type == token.LPAR + and not rhs.opening_bracket.value # the closing bracket is an optional paren - and closing_bracket.type == token.RPAR - and not closing_bracket.value + and rhs.closing_bracket.type == token.RPAR + and not rhs.closing_bracket.value # it's not an import (optional parens are the only thing we can split on # in this case; attempting a split without them is a waste of time) and not line.is_import # there are no standalone comments in the body - and not body.contains_standalone_comments(0) + and not rhs.body.contains_standalone_comments(0) # and we can actually remove the parens - and can_omit_invisible_parens(body, line_length) + and can_omit_invisible_parens(rhs.body, line_length) ): - omit = {id(closing_bracket), *omit} + omit = {id(rhs.closing_bracket), *omit} try: - yield from right_hand_split(line, line_length, features=features, omit=omit) - return + # The _RHSResult Omitting Optional Parens. + rhs_oop = _first_right_hand_split(line, omit=omit) + if not ( + Preview.prefer_splitting_right_hand_side_of_assignments in line.mode + # the split is right after `=` + and len(rhs.head.leaves) >= 2 + and rhs.head.leaves[-2].type == token.EQUAL + # the left side of assignement contains brackets + and any(leaf.type in BRACKETS for leaf in rhs.head.leaves[:-1]) + # the left side of assignment is short enough (the -1 is for the ending + # optional paren) + and is_line_short_enough(rhs.head, line_length=line_length - 1) + # the left side of assignment won't explode further because of magic + # trailing comma + and rhs.head.magic_trailing_comma is None + # the split by omitting optional parens isn't preferred by some other + # reason + and not _prefer_split_rhs_oop(rhs_oop, line_length=line_length) + ): + yield from _maybe_split_omitting_optional_parens( + rhs_oop, line, line_length, features=features, omit=omit + ) + return except CannotSplit as e: if not ( - can_be_split(body) - or is_line_short_enough(body, line_length=line_length) + can_be_split(rhs.body) + or is_line_short_enough(rhs.body, line_length=line_length) ): raise CannotSplit( "Splitting failed, body is still too long and can't be split." ) from e - elif head.contains_multiline_strings() or tail.contains_multiline_strings(): + elif ( + rhs.head.contains_multiline_strings() + or rhs.tail.contains_multiline_strings() + ): raise CannotSplit( "The current optional pair of parentheses is bound to fail to" " satisfy the splitting algorithm because the head or the tail" @@ -721,13 +784,42 @@ def right_hand_split( " line." ) from e - ensure_visible(opening_bracket) - ensure_visible(closing_bracket) - for result in (head, body, tail): + ensure_visible(rhs.opening_bracket) + ensure_visible(rhs.closing_bracket) + for result in (rhs.head, rhs.body, rhs.tail): if result: yield result +def _prefer_split_rhs_oop(rhs_oop: _RHSResult, line_length: int) -> bool: + """ + Returns whether we should prefer the result from a split omitting optional parens. + """ + has_closing_bracket_after_assign = False + for leaf in reversed(rhs_oop.head.leaves): + if leaf.type == token.EQUAL: + break + if leaf.type in CLOSING_BRACKETS: + has_closing_bracket_after_assign = True + break + return ( + # contains matching brackets after the `=` (done by checking there is a + # closing bracket) + has_closing_bracket_after_assign + or ( + # the split is actually from inside the optional parens (done by checking + # the first line still contains the `=`) + any(leaf.type == token.EQUAL for leaf in rhs_oop.head.leaves) + # the first line is short enough + and is_line_short_enough(rhs_oop.head, line_length=line_length) + ) + # contains unsplittable type ignore + or rhs_oop.head.contains_unsplittable_type_ignore() + or rhs_oop.body.contains_unsplittable_type_ignore() + or rhs_oop.tail.contains_unsplittable_type_ignore() + ) + + def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None: """Raise :exc:`CannotSplit` if the last left- or right-hand split failed. diff --git a/src/black/mode.py b/src/black/mode.py index bcd35b4..a104d1b 100644 --- a/src/black/mode.py +++ b/src/black/mode.py @@ -155,6 +155,7 @@ class Preview(Enum): long_docstring_quotes_on_newline = auto() normalize_docstring_quotes_and_prefixes_properly = auto() one_element_subscript = auto() + prefer_splitting_right_hand_side_of_assignments = auto() remove_block_trailing_newline = auto() remove_redundant_parens = auto() # NOTE: string_processing requires wrap_long_dict_values_in_parens diff --git a/tests/data/preview/long_strings__regression.py b/tests/data/preview/long_strings__regression.py index 6d56dcc..8b8fc17 100644 --- a/tests/data/preview/long_strings__regression.py +++ b/tests/data/preview/long_strings__regression.py @@ -983,9 +983,9 @@ class xxxxxxxxxxxxxxxxxxxxx(xxxx.xxxxxxxxxxxxx): ) -value.__dict__[ - key -] = "test" # set some Thrift field to non-None in the struct aa bb cc dd ee +value.__dict__[key] = ( + "test" # set some Thrift field to non-None in the struct aa bb cc dd ee +) RE_ONE_BACKSLASH = { "asdf_hjkl_jkl": re.compile( diff --git a/tests/data/preview/prefer_rhs_split.py b/tests/data/preview/prefer_rhs_split.py new file mode 100644 index 0000000..5b89113 --- /dev/null +++ b/tests/data/preview/prefer_rhs_split.py @@ -0,0 +1,85 @@ +first_item, second_item = ( + some_looooooooong_module.some_looooooooooooooong_function_name( + first_argument, second_argument, third_argument + ) +) + +some_dict["with_a_long_key"] = ( + some_looooooooong_module.some_looooooooooooooong_function_name( + first_argument, second_argument, third_argument + ) +) + +# Make sure it works when the RHS only has one pair of (optional) parens. +first_item, second_item = ( + some_looooooooong_module.SomeClass.some_looooooooooooooong_variable_name +) + +some_dict["with_a_long_key"] = ( + some_looooooooong_module.SomeClass.some_looooooooooooooong_variable_name +) + +# Make sure chaining assignments work. +first_item, second_item, third_item, forth_item = m["everything"] = ( + some_looooooooong_module.some_looooooooooooooong_function_name( + first_argument, second_argument, third_argument + ) +) + +# Make sure when the RHS's first split at the non-optional paren fits, +# we split there instead of the outer RHS optional paren. +first_item, second_item = some_looooooooong_module.some_loooooog_function_name( + first_argument, second_argument, third_argument +) + +( + first_item, + second_item, + third_item, + forth_item, + fifth_item, + last_item_very_loooooong, +) = some_looooooooong_module.some_looooooooooooooong_function_name( + first_argument, second_argument, third_argument +) + +( + first_item, + second_item, + third_item, + forth_item, + fifth_item, + last_item_very_loooooong, +) = everyting = some_loooooog_function_name( + first_argument, second_argument, third_argument +) + + +# Make sure unsplittable type ignore won't be moved. +some_kind_of_table[some_key] = util.some_function( # type: ignore # noqa: E501 + some_arg +).intersection(pk_cols) + +some_kind_of_table[ + some_key +] = lambda obj: obj.some_long_named_method() # type: ignore # noqa: E501 + +some_kind_of_table[ + some_key # type: ignore # noqa: E501 +] = lambda obj: obj.some_long_named_method() + + +# Make when when the left side of assignement plus the opening paren "... = (" is +# exactly line length limit + 1, it won't be split like that. +xxxxxxxxx_yyy_zzzzzzzz[ + xx.xxxxxx(x_yyy_zzzzzz.xxxxx[0]), x_yyy_zzzzzz.xxxxxx(xxxx=1) +] = 1 + + +# Right side of assignment contains un-nested pairs of inner parens. +some_kind_of_instance.some_kind_of_map[a_key] = ( + isinstance(some_var, SomeClass) + and table.something_and_something != table.something_else +) or ( + isinstance(some_other_var, BaseClass) and table.something != table.some_other_thing +) diff --git a/tests/data/preview/prefer_rhs_split_reformatted.py b/tests/data/preview/prefer_rhs_split_reformatted.py new file mode 100644 index 0000000..781e75b --- /dev/null +++ b/tests/data/preview/prefer_rhs_split_reformatted.py @@ -0,0 +1,38 @@ +# Test cases separate from `prefer_rhs_split.py` that contains unformatted source. + +# Left hand side fits in a single line but will still be exploded by the +# magic trailing comma. +first_value, (m1, m2,), third_value = xxxxxx_yyyyyy_zzzzzz_wwwwww_uuuuuuu_vvvvvvvvvvv( + arg1, + arg2, +) + +# Make when when the left side of assignement plus the opening paren "... = (" is +# exactly line length limit + 1, it won't be split like that. +xxxxxxxxx_yyy_zzzzzzzz[xx.xxxxxx(x_yyy_zzzzzz.xxxxx[0]), x_yyy_zzzzzz.xxxxxx(xxxx=1)] = 1 + + +# output + + +# Test cases separate from `prefer_rhs_split.py` that contains unformatted source. + +# Left hand side fits in a single line but will still be exploded by the +# magic trailing comma. +( + first_value, + ( + m1, + m2, + ), + third_value, +) = xxxxxx_yyyyyy_zzzzzz_wwwwww_uuuuuuu_vvvvvvvvvvv( + arg1, + arg2, +) + +# Make when when the left side of assignement plus the opening paren "... = (" is +# exactly line length limit + 1, it won't be split like that. +xxxxxxxxx_yyy_zzzzzzzz[ + xx.xxxxxx(x_yyy_zzzzzz.xxxxx[0]), x_yyy_zzzzzz.xxxxxx(xxxx=1) +] = 1 -- 2.39.5