Make sure to split lines that start with a string operator (#2286)

author Bryan Bugyi <bryanbugyi34@gmail.com>

Sun, 30 May 2021 21:41:03 +0000 (17:41 -0400)

committer GitHub <noreply@github.com>

Sun, 30 May 2021 21:41:03 +0000 (23:41 +0200)
author Bryan Bugyi <bryanbugyi34@gmail.com>
Sun, 30 May 2021 21:41:03 +0000 (17:41 -0400)
committer GitHub <noreply@github.com>
Sun, 30 May 2021 21:41:03 +0000 (23:41 +0200)
diff --git a/CHANGES.md b/CHANGES.md

index 63c7a2cf30deb81b9f3207a939b5218244d0cbcc..c761d14624acf1f8c68dae73d14408b4b71012c2 100644 (file)
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -11,6 +11,7 @@
  - Restored compatibility with Click 8.0 on Python 3.6 when LANG=C used (#2227)
  - Add extra uvloop install + import support if in python env (#2258)
  - Fix --experimental-string-processing crash when matching parens are not found (#2283)
+- Make sure to split lines that start with a string operator (#2286)
  
  ### _Blackd_
  
diff --git a/src/black/trans.py b/src/black/trans.py

index 169b675be3d1a84dee8920ded3745556ca1061c9..80e88a2d2fb78d834ed281256b8b3dd97511a16a 100644 (file)
--- a/src/black/trans.py
+++ b/src/black/trans.py
@@ -920,9 +920,9 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
      lines by themselves).
  
      Requirements:
-        * The line consists ONLY of a single string (with the exception of a
-        '+' symbol which MAY exist at the start of the line), MAYBE a string
-        trailer, and MAYBE a trailing comma.
+        * The line consists ONLY of a single string (possibly prefixed by a
+        string operator [e.g. '+' or '==']), MAYBE a string trailer, and MAYBE
+        a trailing comma.
              AND
          * All of the requirements listed in BaseStringSplitter's docstring.
  
@@ -952,6 +952,16 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
          CustomSplit objects and add them to the custom split map.
      """
  
+    STRING_OPERATORS = [
+        token.PLUS,
+        token.STAR,
+        token.EQEQUAL,
+        token.NOTEQUAL,
+        token.LESS,
+        token.LESSEQUAL,
+        token.GREATER,
+        token.GREATEREQUAL,
+    ]
      MIN_SUBSTR_SIZE = 6
      # Matches an "f-expression" (e.g. {var}) that might be found in an f-string.
      RE_FEXPR = r"""
@@ -972,8 +982,20 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
  
          idx = 0
  
-        # The first leaf MAY be a '+' symbol...
-        if is_valid_index(idx) and LL[idx].type == token.PLUS:
+        # The first two leaves MAY be the 'not in' keywords...
+        if (
+            is_valid_index(idx)
+            and is_valid_index(idx + 1)
+            and [LL[idx].type, LL[idx + 1].type] == [token.NAME, token.NAME]
+            and str(LL[idx]) + str(LL[idx + 1]) == "not in"
+        ):
+            idx += 2
+        # Else the first leaf MAY be a string operator symbol or the 'in' keyword...
+        elif is_valid_index(idx) and (
+            LL[idx].type in self.STRING_OPERATORS
+            or LL[idx].type == token.NAME
+            and str(LL[idx]) == "in"
+        ):
              idx += 1
  
          # The next/first leaf MAY be an empty LPAR...
@@ -1023,23 +1045,26 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
          )
  
          first_string_line = True
-        starts_with_plus = LL[0].type == token.PLUS
  
-        def line_needs_plus() -> bool:
-            return first_string_line and starts_with_plus
+        string_op_leaves = self._get_string_operator_leaves(LL)
+        string_op_leaves_length = (
+            sum([len(str(prefix_leaf)) for prefix_leaf in string_op_leaves]) + 1
+            if string_op_leaves
+            else 0
+        )
  
-        def maybe_append_plus(new_line: Line) -> None:
+        def maybe_append_string_operators(new_line: Line) -> None:
              """
              Side Effects:
-                If @line starts with a plus and this is the first line we are
-                constructing, this function appends a PLUS leaf to @new_line
-                and replaces the old PLUS leaf in the node structure. Otherwise
-                this function does nothing.
+                If @line starts with a string operator and this is the first
+                line we are constructing, this function appends the string
+                operator to @new_line and replaces the old string operator leaf
+                in the node structure. Otherwise this function does nothing.
              """
-            if line_needs_plus():
-                plus_leaf = Leaf(token.PLUS, "+")
-                replace_child(LL[0], plus_leaf)
-                new_line.append(plus_leaf)
+            maybe_prefix_leaves = string_op_leaves if first_string_line else []
+            for i, prefix_leaf in enumerate(maybe_prefix_leaves):
+                replace_child(LL[i], prefix_leaf)
+                new_line.append(prefix_leaf)
  
          ends_with_comma = (
              is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA
@@ -1054,7 +1079,7 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
              result = self.line_length
              result -= line.depth * 4
              result -= 1 if ends_with_comma else 0
-            result -= 2 if line_needs_plus() else 0
+            result -= string_op_leaves_length
              return result
  
          # --- Calculate Max Break Index (for string value)
@@ -1103,7 +1128,7 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
                  break_idx = csplit.break_idx
              else:
                  # Algorithmic Split (automatic)
-                max_bidx = max_break_idx - 2 if line_needs_plus() else max_break_idx
+                max_bidx = max_break_idx - string_op_leaves_length
                  maybe_break_idx = self._get_break_idx(rest_value, max_bidx)
                  if maybe_break_idx is None:
                      # If we are unable to algorithmically determine a good split
@@ -1148,7 +1173,7 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
  
              # --- Construct `next_line`
              next_line = line.clone()
-            maybe_append_plus(next_line)
+            maybe_append_string_operators(next_line)
              next_line.append(next_leaf)
              string_line_results.append(Ok(next_line))
  
@@ -1169,7 +1194,7 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
          self._maybe_normalize_string_quotes(rest_leaf)
  
          last_line = line.clone()
-        maybe_append_plus(last_line)
+        maybe_append_string_operators(last_line)
  
          # If there are any leaves to the right of the target string...
          if is_valid_index(string_idx + 1):
@@ -1345,6 +1370,17 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
          else:
              return string
  
+    def _get_string_operator_leaves(self, leaves: Iterable[Leaf]) -> List[Leaf]:
+        LL = list(leaves)
+
+        string_op_leaves = []
+        i = 0
+        while LL[i].type in self.STRING_OPERATORS + [token.NAME]:
+            prefix_leaf = Leaf(LL[i].type, str(LL[i]).strip())
+            string_op_leaves.append(prefix_leaf)
+            i += 1
+        return string_op_leaves
+
  
  class StringParenWrapper(CustomSplitMapMixin, BaseStringSplitter):
      """
diff --git a/tests/data/long_strings__regression.py b/tests/data/long_strings__regression.py

index 231d88651b656e6e5345537a9993c864facb27aa..bd7b635132175710c390db23f611214155d108d4 100644 (file)
--- a/tests/data/long_strings__regression.py
+++ b/tests/data/long_strings__regression.py
@@ -406,6 +406,40 @@ def _legacy_listen_examples():
          }
      )
  
+
+assert str(suffix_arr) == (
+    "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
+    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
+    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+)
+assert str(suffix_arr) != (
+    "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
+    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
+    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+)
+assert str(suffix_arr) <= (
+    "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
+    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
+    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+)
+assert str(suffix_arr) >= (
+    "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
+    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
+    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+)
+assert str(suffix_arr) < (
+    "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
+    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
+    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+)
+assert str(suffix_arr) > (
+    "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
+    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
+    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+)
+assert str(suffix_arr) in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+assert str(suffix_arr) not in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+
  # output
  
  
@@ -906,3 +940,53 @@ def _legacy_listen_examples():
              "since": since,
          }
      )
+
+
+assert (
+    str(suffix_arr)
+    == "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
+    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
+    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+)
+assert (
+    str(suffix_arr)
+    != "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
+    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
+    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+)
+assert (
+    str(suffix_arr)
+    <= "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
+    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
+    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+)
+assert (
+    str(suffix_arr)
+    >= "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
+    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
+    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+)
+assert (
+    str(suffix_arr)
+    < "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
+    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
+    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+)
+assert (
+    str(suffix_arr)
+    > "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
+    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
+    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+)
+assert (
+    str(suffix_arr)
+    in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$',"
+    " 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$', 'rykangaroo$',"
+    " 'ykangaroo$']"
+)
+assert (
+    str(suffix_arr)
+    not in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$',"
+    " 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$',"
+    " 'rykangaroo$', 'ykangaroo$']"
+)
author	Bryan Bugyi <bryanbugyi34@gmail.com>
	Sun, 30 May 2021 21:41:03 +0000 (17:41 -0400)
committer	GitHub <noreply@github.com>
	Sun, 30 May 2021 21:41:03 +0000 (23:41 +0200)
CHANGES.md		patch \| blob \| history
src/black/trans.py		patch \| blob \| history
tests/data/long_strings__regression.py		patch \| blob \| history