From: jack1142 <6032823+jack1142@users.noreply.github.com> Date: Wed, 9 Jun 2021 19:29:32 +0000 (+0200) Subject: Support named escapes (`\N{...}`) in string processing (#2319) X-Git-Url: https://git.madduck.net/etc/vim.git/commitdiff_plain/62402a32618bc62ae90cfcdc3d47c7ad20e60e10?ds=inline Support named escapes (`\N{...}`) in string processing (#2319) Co-authored-by: Felix Hildén Co-authored-by: Jelle Zijlstra --- diff --git a/CHANGES.md b/CHANGES.md index 9c2939e..01c02fe 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -13,6 +13,8 @@ - Fix incorrect custom breakpoint indices when string group contains fake f-strings (#2311) - Fix regression where `R` prefixes would be lowercased for docstrings (#2285) +- Fix handling of named escapes (`\N{...}`) when `--experimental-string-processing` is + used (#2319) ## 21.5b2 diff --git a/src/black/trans.py b/src/black/trans.py index ca620f6..023dcd3 100644 --- a/src/black/trans.py +++ b/src/black/trans.py @@ -15,6 +15,7 @@ from typing import ( List, Optional, Sequence, + Set, Tuple, TypeVar, Union, @@ -1243,6 +1244,61 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter): last_line.comments = line.comments.copy() yield Ok(last_line) + def _iter_nameescape_slices(self, string: str) -> Iterator[Tuple[Index, Index]]: + """ + Yields: + All ranges of @string which, if @string were to be split there, + would result in the splitting of an \\N{...} expression (which is NOT + allowed). + """ + # True - the previous backslash was unescaped + # False - the previous backslash was escaped *or* there was no backslash + previous_was_unescaped_backslash = False + it = iter(enumerate(string)) + for idx, c in it: + if c == "\\": + previous_was_unescaped_backslash = not previous_was_unescaped_backslash + continue + if not previous_was_unescaped_backslash or c != "N": + previous_was_unescaped_backslash = False + continue + previous_was_unescaped_backslash = False + + begin = idx - 1 # the position of backslash before \N{...} + for idx, c in it: + if c == "}": + end = idx + break + else: + # malformed nameescape expression? + # should have been detected by AST parsing earlier... + raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!") + yield begin, end + + def _iter_fexpr_slices(self, string: str) -> Iterator[Tuple[Index, Index]]: + """ + Yields: + All ranges of @string which, if @string were to be split there, + would result in the splitting of an f-expression (which is NOT + allowed). + """ + if "f" not in get_string_prefix(string).lower(): + return + + for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE): + yield match.span() + + def _get_illegal_split_indices(self, string: str) -> Set[Index]: + illegal_indices: Set[Index] = set() + iterators = [ + self._iter_fexpr_slices(string), + self._iter_nameescape_slices(string), + ] + for it in iterators: + for begin, end in it: + illegal_indices.update(range(begin, end + 1)) + return illegal_indices + def _get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]: """ This method contains the algorithm that StringSplitter uses to @@ -1272,40 +1328,15 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter): assert is_valid_index(max_break_idx) assert_is_leaf_string(string) - _fexpr_slices: Optional[List[Tuple[Index, Index]]] = None - - def fexpr_slices() -> Iterator[Tuple[Index, Index]]: - """ - Yields: - All ranges of @string which, if @string were to be split there, - would result in the splitting of an f-expression (which is NOT - allowed). - """ - nonlocal _fexpr_slices - - if _fexpr_slices is None: - _fexpr_slices = [] - for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE): - _fexpr_slices.append(match.span()) - - yield from _fexpr_slices - - is_fstring = "f" in get_string_prefix(string).lower() + _illegal_split_indices = self._get_illegal_split_indices(string) - def breaks_fstring_expression(i: Index) -> bool: + def breaks_unsplittable_expression(i: Index) -> bool: """ Returns: True iff returning @i would result in the splitting of an - f-expression (which is NOT allowed). + unsplittable expression (which is NOT allowed). """ - if not is_fstring: - return False - - for (start, end) in fexpr_slices(): - if start <= i < end: - return True - - return False + return i in _illegal_split_indices def passes_all_checks(i: Index) -> bool: """ @@ -1329,7 +1360,7 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter): is_space and is_not_escaped and is_big_enough - and not breaks_fstring_expression(i) + and not breaks_unsplittable_expression(i) ) # First, we check all indices BELOW @max_break_idx. diff --git a/tests/data/long_strings.py b/tests/data/long_strings.py index 151396b..430f760 100644 --- a/tests/data/long_strings.py +++ b/tests/data/long_strings.py @@ -207,6 +207,38 @@ long_unmergable_string_with_pragma = ( " of it." ) +string_with_nameescape = ( + "........................................................................ \N{LAO KO LA}" +) + +string_with_nameescape = ( + "........................................................................... \N{LAO KO LA}" +) + +string_with_nameescape = ( + "............................................................................ \N{LAO KO LA}" +) + +string_with_nameescape_and_escaped_backslash = ( + "...................................................................... \\\N{LAO KO LA}" +) + +string_with_nameescape_and_escaped_backslash = ( + "......................................................................... \\\N{LAO KO LA}" +) + +string_with_nameescape_and_escaped_backslash = ( + ".......................................................................... \\\N{LAO KO LA}" +) + +string_with_escaped_nameescape = ( + "........................................................................ \\N{LAO KO LA}" +) + +string_with_escaped_nameescape = ( + "........................................................................... \\N{LAO KO LA}" +) + # output @@ -587,3 +619,43 @@ long_unmergable_string_with_pragma = ( "This is a really long string that can't be merged because it has a likely pragma at the end" # pylint: disable=some-pylint-check " of it." ) + +string_with_nameescape = ( + "........................................................................" + " \N{LAO KO LA}" +) + +string_with_nameescape = ( + "..........................................................................." + " \N{LAO KO LA}" +) + +string_with_nameescape = ( + "............................................................................" + " \N{LAO KO LA}" +) + +string_with_nameescape_and_escaped_backslash = ( + "......................................................................" + " \\\N{LAO KO LA}" +) + +string_with_nameescape_and_escaped_backslash = ( + "........................................................................." + " \\\N{LAO KO LA}" +) + +string_with_nameescape_and_escaped_backslash = ( + ".........................................................................." + " \\\N{LAO KO LA}" +) + +string_with_escaped_nameescape = ( + "........................................................................ \\N{LAO" + " KO LA}" +) + +string_with_escaped_nameescape = ( + "..........................................................................." + " \\N{LAO KO LA}" +) diff --git a/tests/data/long_strings__regression.py b/tests/data/long_strings__regression.py index e4234b2..61c28d3 100644 --- a/tests/data/long_strings__regression.py +++ b/tests/data/long_strings__regression.py @@ -514,6 +514,10 @@ fstring = F"f-strings definitely make things more {difficult} than they need to x = F"This is a long string which contains an f-expr that should not split {{{[i for i in range(5)]}}}." +x = ( + "\N{BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR}\N{VARIATION SELECTOR-16}" +) + # output @@ -1142,3 +1146,7 @@ x = ( "This is a long string which contains an f-expr that should not split" f" {{{[i for i in range(5)]}}}." ) + +x = ( + "\N{BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR}\N{VARIATION SELECTOR-16}" +)