]> git.madduck.net Git - etc/vim.git/commitdiff

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Support named escapes (`\N{...}`) in string processing (#2319)
authorjack1142 <6032823+jack1142@users.noreply.github.com>
Wed, 9 Jun 2021 19:29:32 +0000 (21:29 +0200)
committerGitHub <noreply@github.com>
Wed, 9 Jun 2021 19:29:32 +0000 (12:29 -0700)
Co-authored-by: Felix Hildén <felix.hilden@gmail.com>
Co-authored-by: Jelle Zijlstra <jelle.zijlstra@gmail.com>
CHANGES.md
src/black/trans.py
tests/data/long_strings.py
tests/data/long_strings__regression.py

index 9c2939e1b1bd6e3aaa5edbaa86726aa44a71c605..01c02fe2b70a30cc8b59466ef60b064bbe7fbb19 100644 (file)
@@ -13,6 +13,8 @@
 - Fix incorrect custom breakpoint indices when string group contains fake f-strings
   (#2311)
 - Fix regression where `R` prefixes would be lowercased for docstrings (#2285)
+- Fix handling of named escapes (`\N{...}`) when `--experimental-string-processing` is
+  used (#2319)
 
 ## 21.5b2
 
index ca620f6b2a5dd4236d5cc45e380e03604380b8e1..023dcd3618a5c0edb6b6f5e7e91e09bb6d3add30 100644 (file)
@@ -15,6 +15,7 @@ from typing import (
     List,
     Optional,
     Sequence,
+    Set,
     Tuple,
     TypeVar,
     Union,
@@ -1243,6 +1244,61 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
             last_line.comments = line.comments.copy()
             yield Ok(last_line)
 
+    def _iter_nameescape_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
+        """
+        Yields:
+            All ranges of @string which, if @string were to be split there,
+            would result in the splitting of an \\N{...} expression (which is NOT
+            allowed).
+        """
+        # True - the previous backslash was unescaped
+        # False - the previous backslash was escaped *or* there was no backslash
+        previous_was_unescaped_backslash = False
+        it = iter(enumerate(string))
+        for idx, c in it:
+            if c == "\\":
+                previous_was_unescaped_backslash = not previous_was_unescaped_backslash
+                continue
+            if not previous_was_unescaped_backslash or c != "N":
+                previous_was_unescaped_backslash = False
+                continue
+            previous_was_unescaped_backslash = False
+
+            begin = idx - 1  # the position of backslash before \N{...}
+            for idx, c in it:
+                if c == "}":
+                    end = idx
+                    break
+            else:
+                # malformed nameescape expression?
+                # should have been detected by AST parsing earlier...
+                raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
+            yield begin, end
+
+    def _iter_fexpr_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
+        """
+        Yields:
+            All ranges of @string which, if @string were to be split there,
+            would result in the splitting of an f-expression (which is NOT
+            allowed).
+        """
+        if "f" not in get_string_prefix(string).lower():
+            return
+
+        for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE):
+            yield match.span()
+
+    def _get_illegal_split_indices(self, string: str) -> Set[Index]:
+        illegal_indices: Set[Index] = set()
+        iterators = [
+            self._iter_fexpr_slices(string),
+            self._iter_nameescape_slices(string),
+        ]
+        for it in iterators:
+            for begin, end in it:
+                illegal_indices.update(range(begin, end + 1))
+        return illegal_indices
+
     def _get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
         """
         This method contains the algorithm that StringSplitter uses to
@@ -1272,40 +1328,15 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
         assert is_valid_index(max_break_idx)
         assert_is_leaf_string(string)
 
-        _fexpr_slices: Optional[List[Tuple[Index, Index]]] = None
-
-        def fexpr_slices() -> Iterator[Tuple[Index, Index]]:
-            """
-            Yields:
-                All ranges of @string which, if @string were to be split there,
-                would result in the splitting of an f-expression (which is NOT
-                allowed).
-            """
-            nonlocal _fexpr_slices
-
-            if _fexpr_slices is None:
-                _fexpr_slices = []
-                for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE):
-                    _fexpr_slices.append(match.span())
-
-            yield from _fexpr_slices
-
-        is_fstring = "f" in get_string_prefix(string).lower()
+        _illegal_split_indices = self._get_illegal_split_indices(string)
 
-        def breaks_fstring_expression(i: Index) -> bool:
+        def breaks_unsplittable_expression(i: Index) -> bool:
             """
             Returns:
                 True iff returning @i would result in the splitting of an
-                f-expression (which is NOT allowed).
+                unsplittable expression (which is NOT allowed).
             """
-            if not is_fstring:
-                return False
-
-            for (start, end) in fexpr_slices():
-                if start <= i < end:
-                    return True
-
-            return False
+            return i in _illegal_split_indices
 
         def passes_all_checks(i: Index) -> bool:
             """
@@ -1329,7 +1360,7 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
                 is_space
                 and is_not_escaped
                 and is_big_enough
-                and not breaks_fstring_expression(i)
+                and not breaks_unsplittable_expression(i)
             )
 
         # First, we check all indices BELOW @max_break_idx.
index 151396b5239ee2a99921f7d5460c6bf2dfeeaf7a..430f760cf0b9dc9a8490974198904b4ff01195ba 100644 (file)
@@ -207,6 +207,38 @@ long_unmergable_string_with_pragma = (
     " of it."
 )
 
+string_with_nameescape = (
+    "........................................................................ \N{LAO KO LA}"
+)
+
+string_with_nameescape = (
+    "........................................................................... \N{LAO KO LA}"
+)
+
+string_with_nameescape = (
+    "............................................................................ \N{LAO KO LA}"
+)
+
+string_with_nameescape_and_escaped_backslash = (
+    "...................................................................... \\\N{LAO KO LA}"
+)
+
+string_with_nameescape_and_escaped_backslash = (
+    "......................................................................... \\\N{LAO KO LA}"
+)
+
+string_with_nameescape_and_escaped_backslash = (
+    ".......................................................................... \\\N{LAO KO LA}"
+)
+
+string_with_escaped_nameescape = (
+    "........................................................................ \\N{LAO KO LA}"
+)
+
+string_with_escaped_nameescape = (
+    "........................................................................... \\N{LAO KO LA}"
+)
+
 
 # output
 
@@ -587,3 +619,43 @@ long_unmergable_string_with_pragma = (
     "This is a really long string that can't be merged because it has a likely pragma at the end"  # pylint: disable=some-pylint-check
     " of it."
 )
+
+string_with_nameescape = (
+    "........................................................................"
+    " \N{LAO KO LA}"
+)
+
+string_with_nameescape = (
+    "..........................................................................."
+    " \N{LAO KO LA}"
+)
+
+string_with_nameescape = (
+    "............................................................................"
+    " \N{LAO KO LA}"
+)
+
+string_with_nameescape_and_escaped_backslash = (
+    "......................................................................"
+    " \\\N{LAO KO LA}"
+)
+
+string_with_nameescape_and_escaped_backslash = (
+    "........................................................................."
+    " \\\N{LAO KO LA}"
+)
+
+string_with_nameescape_and_escaped_backslash = (
+    ".........................................................................."
+    " \\\N{LAO KO LA}"
+)
+
+string_with_escaped_nameescape = (
+    "........................................................................ \\N{LAO"
+    " KO LA}"
+)
+
+string_with_escaped_nameescape = (
+    "..........................................................................."
+    " \\N{LAO KO LA}"
+)
index e4234b2f97c160d98e11e8e2127d94c37d9e2d75..61c28d376ef8519fba80d286f711d735fb827aff 100644 (file)
@@ -514,6 +514,10 @@ fstring = F"f-strings definitely make things more {difficult} than they need to
 
 x = F"This is a long string which contains an f-expr that should not split {{{[i for i in range(5)]}}}."
 
+x = (
+    "\N{BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR}\N{VARIATION SELECTOR-16}"
+)
+
 
 # output
 
@@ -1142,3 +1146,7 @@ x = (
     "This is a long string which contains an f-expr that should not split"
     f" {{{[i for i in range(5)]}}}."
 )
+
+x = (
+    "\N{BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR}\N{VARIATION SELECTOR-16}"
+)