X-Git-Url: https://git.madduck.net/etc/vim.git/blobdiff_plain/3feff21eca0eee4b9fe72d12b506ac273cb5bcd0..a07871b9cd1b5a1469271be6aaac5766f5a0e0fc:/src/black/trans.py?ds=inline

diff --git a/src/black/trans.py b/src/black/trans.py
index ec07f5e..1e28ed0 100644
--- a/src/black/trans.py
+++ b/src/black/trans.py
@@ -32,7 +32,7 @@ from mypy_extensions import trait
 
 from black.comments import contains_pragma_comment
 from black.lines import Line, append_leaves
-from black.mode import Feature
+from black.mode import Feature, Mode
 from black.nodes import (
     CLOSING_BRACKETS,
     OPENING_BRACKETS,
@@ -48,9 +48,11 @@ from black.nodes import (
 from black.rusty import Err, Ok, Result
 from black.strings import (
     assert_is_leaf_string,
+    count_chars_in_width,
     get_string_prefix,
     has_triple_quotes,
     normalize_string_quotes,
+    str_width,
 )
 from blib2to3.pgen2 import token
 from blib2to3.pytree import Leaf, Node
@@ -63,7 +65,7 @@ class CannotTransform(Exception):
 # types
 T = TypeVar("T")
 LN = Union[Leaf, Node]
-Transformer = Callable[[Line, Collection[Feature]], Iterator[Line]]
+Transformer = Callable[[Line, Collection[Feature], Mode], Iterator[Line]]
 Index = int
 NodeType = int
 ParserState = int
@@ -71,6 +73,8 @@ StringID = int
 TResult = Result[T, CannotTransform]  # (T)ransform Result
 TMatchResult = TResult[List[Index]]
 
+SPLIT_SAFE_CHARS = frozenset(["\u3001", "\u3002", "\uff0c"])  # East Asian stops
+
 
 def TErr(err_msg: str) -> Err[CannotTransform]:
     """(T)ransform Err
@@ -81,7 +85,9 @@ def TErr(err_msg: str) -> Err[CannotTransform]:
     return Err(cant_transform)
 
 
-def hug_power_op(line: Line, features: Collection[Feature]) -> Iterator[Line]:
+def hug_power_op(
+    line: Line, features: Collection[Feature], mode: Mode
+) -> Iterator[Line]:
     """A transformer which normalizes spacing around power operators."""
 
     # Performance optimization to avoid unnecessary Leaf clones and other ops.
@@ -228,7 +234,9 @@ class StringTransformer(ABC):
             yield an CannotTransform after that point.)
         """
 
-    def __call__(self, line: Line, _features: Collection[Feature]) -> Iterator[Line]:
+    def __call__(
+        self, line: Line, _features: Collection[Feature], _mode: Mode
+    ) -> Iterator[Line]:
         """
         StringTransformer instances have a call signature that mirrors that of
         the Transformer type.
@@ -572,6 +580,12 @@ class StringMerger(StringTransformer, CustomSplitMapMixin):
                 characters have been escaped.
             """
             assert_is_leaf_string(string)
+            if "f" in string_prefix:
+                string = _toggle_fexpr_quotes(string, QUOTE)
+                # After quotes toggling, quotes in expressions won't be escaped
+                # because quotes can't be reused in f-strings. So we can simply
+                # let the escaping logic below run without knowing f-string
+                # expressions.
 
             RE_EVEN_BACKSLASHES = r"(?:(?<!\\)(?:\\\\)*)"
             naked_string = string[len(string_prefix) + 1 : -1]
@@ -1154,7 +1168,7 @@ class BaseStringSplitter(StringTransformer):
             # WMA4 the length of the inline comment.
             offset += len(comment_leaf.value)
 
-        max_string_length = self.line_length - offset
+        max_string_length = count_chars_in_width(str(line), self.line_length - offset)
         return max_string_length
 
     @staticmethod
@@ -1172,19 +1186,33 @@ class BaseStringSplitter(StringTransformer):
         if LL[0].type != token.STRING:
             return None
 
-        # If the string is surrounded by commas (or is the first/last child)...
-        prev_sibling = LL[0].prev_sibling
-        next_sibling = LL[0].next_sibling
-        if not prev_sibling and not next_sibling and parent_type(LL[0]) == syms.atom:
-            # If it's an atom string, we need to check the parent atom's siblings.
-            parent = LL[0].parent
-            assert parent is not None  # For type checkers.
-            prev_sibling = parent.prev_sibling
-            next_sibling = parent.next_sibling
-        if (not prev_sibling or prev_sibling.type == token.COMMA) and (
-            not next_sibling or next_sibling.type == token.COMMA
+        matching_nodes = [
+            syms.listmaker,
+            syms.dictsetmaker,
+            syms.testlist_gexp,
+        ]
+        # If the string is an immediate child of a list/set/tuple literal...
+        if (
+            parent_type(LL[0]) in matching_nodes
+            or parent_type(LL[0].parent) in matching_nodes
         ):
-            return 0
+            # And the string is surrounded by commas (or is the first/last child)...
+            prev_sibling = LL[0].prev_sibling
+            next_sibling = LL[0].next_sibling
+            if (
+                not prev_sibling
+                and not next_sibling
+                and parent_type(LL[0]) == syms.atom
+            ):
+                # If it's an atom string, we need to check the parent atom's siblings.
+                parent = LL[0].parent
+                assert parent is not None  # For type checkers.
+                prev_sibling = parent.prev_sibling
+                next_sibling = parent.next_sibling
+            if (not prev_sibling or prev_sibling.type == token.COMMA) and (
+                not next_sibling or next_sibling.type == token.COMMA
+            ):
+                return 0
 
         return None
 
@@ -1240,6 +1268,30 @@ def fstring_contains_expr(s: str) -> bool:
     return any(iter_fexpr_spans(s))
 
 
+def _toggle_fexpr_quotes(fstring: str, old_quote: str) -> str:
+    """
+    Toggles quotes used in f-string expressions that are `old_quote`.
+
+    f-string expressions can't contain backslashes, so we need to toggle the
+    quotes if the f-string itself will end up using the same quote. We can
+    simply toggle without escaping because, quotes can't be reused in f-string
+    expressions. They will fail to parse.
+
+    NOTE: If PEP 701 is accepted, above statement will no longer be true.
+    Though if quotes can be reused, we can simply reuse them without updates or
+    escaping, once Black figures out how to parse the new grammar.
+    """
+    new_quote = "'" if old_quote == '"' else '"'
+    parts = []
+    previous_index = 0
+    for start, end in iter_fexpr_spans(fstring):
+        parts.append(fstring[previous_index:start])
+        parts.append(fstring[start:end].replace(old_quote, new_quote))
+        previous_index = end
+    parts.append(fstring[previous_index:])
+    return "".join(parts)
+
+
 class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
     """
     StringTransformer that splits "atom" strings (i.e. strings which exist on
@@ -1385,11 +1437,13 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
             is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA
         )
 
-        def max_last_string() -> int:
+        def max_last_string_column() -> int:
             """
             Returns:
-                The max allowed length of the string value used for the last
-                line we will construct.
+                The max allowed width of the string value used for the last
+                line we will construct.  Note that this value means the width
+                rather than the number of characters (e.g., many East Asian
+                characters expand to two columns).
             """
             result = self.line_length
             result -= line.depth * 4
@@ -1397,14 +1451,14 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
             result -= string_op_leaves_length
             return result
 
-        # --- Calculate Max Break Index (for string value)
+        # --- Calculate Max Break Width (for string value)
         # We start with the line length limit
-        max_break_idx = self.line_length
+        max_break_width = self.line_length
         # The last index of a string of length N is N-1.
-        max_break_idx -= 1
+        max_break_width -= 1
         # Leading whitespace is not present in the string value (e.g. Leaf.value).
-        max_break_idx -= line.depth * 4
-        if max_break_idx < 0:
+        max_break_width -= line.depth * 4
+        if max_break_width < 0:
             yield TErr(
                 f"Unable to split {LL[string_idx].value} at such high of a line depth:"
                 f" {line.depth}"
@@ -1417,7 +1471,7 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
         # line limit.
         use_custom_breakpoints = bool(
             custom_splits
-            and all(csplit.break_idx <= max_break_idx for csplit in custom_splits)
+            and all(csplit.break_idx <= max_break_width for csplit in custom_splits)
         )
 
         # Temporary storage for the remaining chunk of the string line that
@@ -1433,7 +1487,7 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
             if use_custom_breakpoints:
                 return len(custom_splits) > 1
             else:
-                return len(rest_value) > max_last_string()
+                return str_width(rest_value) > max_last_string_column()
 
         string_line_results: List[Ok[Line]] = []
         while more_splits_should_be_made():
@@ -1443,7 +1497,10 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
                 break_idx = csplit.break_idx
             else:
                 # Algorithmic Split (automatic)
-                max_bidx = max_break_idx - string_op_leaves_length
+                max_bidx = (
+                    count_chars_in_width(rest_value, max_break_width)
+                    - string_op_leaves_length
+                )
                 maybe_break_idx = self._get_break_idx(rest_value, max_bidx)
                 if maybe_break_idx is None:
                     # If we are unable to algorithmically determine a good split
@@ -1540,7 +1597,7 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
 
             # Try to fit them all on the same line with the last substring...
             if (
-                len(temp_value) <= max_last_string()
+                str_width(temp_value) <= max_last_string_column()
                 or LL[string_idx + 1].type == token.COMMA
             ):
                 last_line.append(rest_leaf)
@@ -1660,6 +1717,7 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
                 section of this classes' docstring would be be met by returning @i.
             """
             is_space = string[i] == " "
+            is_split_safe = is_valid_index(i - 1) and string[i - 1] in SPLIT_SAFE_CHARS
 
             is_not_escaped = True
             j = i - 1
@@ -1672,7 +1730,7 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
                 and len(string[:i]) >= self.MIN_SUBSTR_SIZE
             )
             return (
-                is_space
+                (is_space or is_split_safe)
                 and is_not_escaped
                 and is_big_enough
                 and not breaks_unsplittable_expression(i)
@@ -1767,8 +1825,9 @@ class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin):
         * The line is an lambda expression and the value is a string.
             OR
         * The line starts with an "atom" string that prefers to be wrapped in
-        parens. It's preferred to be wrapped when the string is surrounded by
-        commas (or is the first/last child).
+        parens. It's preferred to be wrapped when it's is an immediate child of
+        a list/set/tuple literal, AND the string is surrounded by commas (or is
+        the first/last child).
 
     Transformations:
         The chosen string is wrapped in parentheses and then split at the LPAR.
@@ -1817,11 +1876,13 @@ class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin):
 
         if string_idx is not None:
             string_value = line.leaves[string_idx].value
-            # If the string has no spaces...
-            if " " not in string_value:
+            # If the string has neither spaces nor East Asian stops...
+            if not any(
+                char == " " or char in SPLIT_SAFE_CHARS for char in string_value
+            ):
                 # And will still violate the line length limit when split...
-                max_string_length = self.line_length - ((line.depth + 1) * 4)
-                if len(string_value) > max_string_length:
+                max_string_width = self.line_length - ((line.depth + 1) * 4)
+                if str_width(string_value) > max_string_width:
                     # And has no associated custom splits...
                     if not self.has_custom_splits(string_value):
                         # Then we should NOT put this string on its own line.