X-Git-Url: https://git.madduck.net/etc/vim.git/blobdiff_plain/e401b6bb1e1c0ed534bba59d9dc908caf7ba898c..a36878eb2f375e2ac1e13052f663909f3835ec46:/src/black/strings.py diff --git a/src/black/strings.py b/src/black/strings.py index 262c2ba..3e3bc12 100644 --- a/src/black/strings.py +++ b/src/black/strings.py @@ -5,7 +5,9 @@ Simple formatting on strings. Further string formatting code is in trans.py. import re import sys from functools import lru_cache -from typing import List, Pattern +from typing import List, Match, Pattern + +from blib2to3.pytree import Leaf if sys.version_info < (3, 8): from typing_extensions import Final @@ -18,6 +20,15 @@ STRING_PREFIX_RE: Final = re.compile( r"^([" + STRING_PREFIX_CHARS + r"]*)(.*)$", re.DOTALL ) FIRST_NON_WHITESPACE_RE: Final = re.compile(r"\s*\t+\s*(\S)") +UNICODE_ESCAPE_RE: Final = re.compile( + r"(?P\\+)(?P" + r"(u(?P[a-fA-F0-9]{4}))" # Character with 16-bit hex value xxxx + r"|(U(?P[a-fA-F0-9]{8}))" # Character with 32-bit hex value xxxxxxxx + r"|(x(?P[a-fA-F0-9]{2}))" # Character with hex value hh + r"|(N\{(?P[a-zA-Z0-9 \-]{2,})\})" # Character named name in the Unicode database + r")", + re.VERBOSE, +) def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str: @@ -149,6 +160,10 @@ def normalize_string_prefix(s: str) -> str: .replace("U", "") .replace("u", "") ) + + # Python syntax guarantees max 2 prefixes and that one of them is "r" + if len(new_prefix) == 2 and "r" != new_prefix[0].lower(): + new_prefix = new_prefix[::-1] return f"{new_prefix}{match.group(2)}" @@ -232,3 +247,34 @@ def normalize_string_quotes(s: str) -> str: return s # Prefer double quotes return f"{prefix}{new_quote}{new_body}{new_quote}" + + +def normalize_unicode_escape_sequences(leaf: Leaf) -> None: + """Replace hex codes in Unicode escape sequences with lowercase representation.""" + text = leaf.value + prefix = get_string_prefix(text) + if "r" in prefix.lower(): + return + + def replace(m: Match[str]) -> str: + groups = m.groupdict() + back_slashes = groups["backslashes"] + + if len(back_slashes) % 2 == 0: + return back_slashes + groups["body"] + + if groups["u"]: + # \u + return back_slashes + "u" + groups["u"].lower() + elif groups["U"]: + # \U + return back_slashes + "U" + groups["U"].lower() + elif groups["x"]: + # \x + return back_slashes + "x" + groups["x"].lower() + else: + assert groups["N"], f"Unexpected match: {m}" + # \N{} + return back_slashes + "N{" + groups["N"].upper() + "}" + + leaf.value = re.sub(UNICODE_ESCAPE_RE, replace, text)