From 4ca4407b4adc49b96c9536b16ed7d0a1e0b2deca Mon Sep 17 00:00:00 2001 From: Bryan Bugyi Date: Sun, 30 May 2021 17:41:03 -0400 Subject: [PATCH] Make sure to split lines that start with a string operator (#2286) Fixes #2284 --- CHANGES.md | 1 + src/black/trans.py | 78 +++++++++++++++++------- tests/data/long_strings__regression.py | 84 ++++++++++++++++++++++++++ 3 files changed, 142 insertions(+), 21 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 63c7a2c..c761d14 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -11,6 +11,7 @@ - Restored compatibility with Click 8.0 on Python 3.6 when LANG=C used (#2227) - Add extra uvloop install + import support if in python env (#2258) - Fix --experimental-string-processing crash when matching parens are not found (#2283) +- Make sure to split lines that start with a string operator (#2286) ### _Blackd_ diff --git a/src/black/trans.py b/src/black/trans.py index 169b675..80e88a2 100644 --- a/src/black/trans.py +++ b/src/black/trans.py @@ -920,9 +920,9 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter): lines by themselves). Requirements: - * The line consists ONLY of a single string (with the exception of a - '+' symbol which MAY exist at the start of the line), MAYBE a string - trailer, and MAYBE a trailing comma. + * The line consists ONLY of a single string (possibly prefixed by a + string operator [e.g. '+' or '==']), MAYBE a string trailer, and MAYBE + a trailing comma. AND * All of the requirements listed in BaseStringSplitter's docstring. @@ -952,6 +952,16 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter): CustomSplit objects and add them to the custom split map. """ + STRING_OPERATORS = [ + token.PLUS, + token.STAR, + token.EQEQUAL, + token.NOTEQUAL, + token.LESS, + token.LESSEQUAL, + token.GREATER, + token.GREATEREQUAL, + ] MIN_SUBSTR_SIZE = 6 # Matches an "f-expression" (e.g. {var}) that might be found in an f-string. RE_FEXPR = r""" @@ -972,8 +982,20 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter): idx = 0 - # The first leaf MAY be a '+' symbol... - if is_valid_index(idx) and LL[idx].type == token.PLUS: + # The first two leaves MAY be the 'not in' keywords... + if ( + is_valid_index(idx) + and is_valid_index(idx + 1) + and [LL[idx].type, LL[idx + 1].type] == [token.NAME, token.NAME] + and str(LL[idx]) + str(LL[idx + 1]) == "not in" + ): + idx += 2 + # Else the first leaf MAY be a string operator symbol or the 'in' keyword... + elif is_valid_index(idx) and ( + LL[idx].type in self.STRING_OPERATORS + or LL[idx].type == token.NAME + and str(LL[idx]) == "in" + ): idx += 1 # The next/first leaf MAY be an empty LPAR... @@ -1023,23 +1045,26 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter): ) first_string_line = True - starts_with_plus = LL[0].type == token.PLUS - def line_needs_plus() -> bool: - return first_string_line and starts_with_plus + string_op_leaves = self._get_string_operator_leaves(LL) + string_op_leaves_length = ( + sum([len(str(prefix_leaf)) for prefix_leaf in string_op_leaves]) + 1 + if string_op_leaves + else 0 + ) - def maybe_append_plus(new_line: Line) -> None: + def maybe_append_string_operators(new_line: Line) -> None: """ Side Effects: - If @line starts with a plus and this is the first line we are - constructing, this function appends a PLUS leaf to @new_line - and replaces the old PLUS leaf in the node structure. Otherwise - this function does nothing. + If @line starts with a string operator and this is the first + line we are constructing, this function appends the string + operator to @new_line and replaces the old string operator leaf + in the node structure. Otherwise this function does nothing. """ - if line_needs_plus(): - plus_leaf = Leaf(token.PLUS, "+") - replace_child(LL[0], plus_leaf) - new_line.append(plus_leaf) + maybe_prefix_leaves = string_op_leaves if first_string_line else [] + for i, prefix_leaf in enumerate(maybe_prefix_leaves): + replace_child(LL[i], prefix_leaf) + new_line.append(prefix_leaf) ends_with_comma = ( is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA @@ -1054,7 +1079,7 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter): result = self.line_length result -= line.depth * 4 result -= 1 if ends_with_comma else 0 - result -= 2 if line_needs_plus() else 0 + result -= string_op_leaves_length return result # --- Calculate Max Break Index (for string value) @@ -1103,7 +1128,7 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter): break_idx = csplit.break_idx else: # Algorithmic Split (automatic) - max_bidx = max_break_idx - 2 if line_needs_plus() else max_break_idx + max_bidx = max_break_idx - string_op_leaves_length maybe_break_idx = self._get_break_idx(rest_value, max_bidx) if maybe_break_idx is None: # If we are unable to algorithmically determine a good split @@ -1148,7 +1173,7 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter): # --- Construct `next_line` next_line = line.clone() - maybe_append_plus(next_line) + maybe_append_string_operators(next_line) next_line.append(next_leaf) string_line_results.append(Ok(next_line)) @@ -1169,7 +1194,7 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter): self._maybe_normalize_string_quotes(rest_leaf) last_line = line.clone() - maybe_append_plus(last_line) + maybe_append_string_operators(last_line) # If there are any leaves to the right of the target string... if is_valid_index(string_idx + 1): @@ -1345,6 +1370,17 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter): else: return string + def _get_string_operator_leaves(self, leaves: Iterable[Leaf]) -> List[Leaf]: + LL = list(leaves) + + string_op_leaves = [] + i = 0 + while LL[i].type in self.STRING_OPERATORS + [token.NAME]: + prefix_leaf = Leaf(LL[i].type, str(LL[i]).strip()) + string_op_leaves.append(prefix_leaf) + i += 1 + return string_op_leaves + class StringParenWrapper(CustomSplitMapMixin, BaseStringSplitter): """ diff --git a/tests/data/long_strings__regression.py b/tests/data/long_strings__regression.py index 231d886..bd7b635 100644 --- a/tests/data/long_strings__regression.py +++ b/tests/data/long_strings__regression.py @@ -406,6 +406,40 @@ def _legacy_listen_examples(): } ) + +assert str(suffix_arr) == ( + "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', " + "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', " + "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']" +) +assert str(suffix_arr) != ( + "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', " + "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', " + "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']" +) +assert str(suffix_arr) <= ( + "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', " + "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', " + "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']" +) +assert str(suffix_arr) >= ( + "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', " + "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', " + "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']" +) +assert str(suffix_arr) < ( + "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', " + "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', " + "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']" +) +assert str(suffix_arr) > ( + "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', " + "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', " + "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']" +) +assert str(suffix_arr) in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']" +assert str(suffix_arr) not in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']" + # output @@ -906,3 +940,53 @@ def _legacy_listen_examples(): "since": since, } ) + + +assert ( + str(suffix_arr) + == "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', " + "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', " + "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']" +) +assert ( + str(suffix_arr) + != "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', " + "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', " + "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']" +) +assert ( + str(suffix_arr) + <= "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', " + "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', " + "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']" +) +assert ( + str(suffix_arr) + >= "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', " + "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', " + "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']" +) +assert ( + str(suffix_arr) + < "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', " + "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', " + "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']" +) +assert ( + str(suffix_arr) + > "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', " + "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', " + "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']" +) +assert ( + str(suffix_arr) + in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$'," + " 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$', 'rykangaroo$'," + " 'ykangaroo$']" +) +assert ( + str(suffix_arr) + not in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$'," + " 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$'," + " 'rykangaroo$', 'ykangaroo$']" +) -- 2.39.5