]>
git.madduck.net Git - etc/vim.git/blobdiff - src/black/strings.py
madduck's git repository
Every one of the projects in this repository is available at the canonical
URL git://git.madduck.net/madduck/pub/<projectpath> — see
each project's metadata for the exact URL.
All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@ git. madduck. net .
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
SSH access, as well as push access can be individually
arranged .
If you use my repositories frequently, consider adding the following
snippet to ~/.gitconfig and using the third clone URL listed for each
project:
[url "git://git.madduck.net/madduck/"]
insteadOf = madduck:
Simple formatting on strings. Further string formatting code is in trans.py.
"""
Simple formatting on strings. Further string formatting code is in trans.py.
"""
+from functools import lru_cache
from typing import List, Pattern
from typing import List, Pattern
+if sys.version_info < (3, 8):
+ from typing_extensions import Final
+else:
+ from typing import Final
-STRING_PREFIX_CHARS = "furbFURB" # All possible string prefix characters.
+
+STRING_PREFIX_CHARS: Final = "furbFURB" # All possible string prefix characters.
+STRING_PREFIX_RE: Final = re.compile(
+ r"^([" + STRING_PREFIX_CHARS + r"]*)(.*)$", re.DOTALL
+)
+FIRST_NON_WHITESPACE_RE: Final = re.compile(r"\s*\t+\s*(\S)")
def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
for line in s.splitlines():
# Find the index of the first non-whitespace character after a string of
# whitespace that includes at least one tab
for line in s.splitlines():
# Find the index of the first non-whitespace character after a string of
# whitespace that includes at least one tab
- match = re.match(r"\s*\t+\s*(\S)", line)
+ match = FIRST_NON_WHITESPACE_RE.match( line)
if match:
first_non_whitespace_idx = match.start(1)
if match:
first_non_whitespace_idx = match.start(1)
), f"{set(string[:quote_idx])} is NOT a subset of {set(STRING_PREFIX_CHARS)}."
), f"{set(string[:quote_idx])} is NOT a subset of {set(STRING_PREFIX_CHARS)}."
-def normalize_string_prefix(s: str, remove_u_prefix: bool = False) -> str:
- """Make all string prefixes lowercase.
-
- If remove_u_prefix is given, also removes any u prefix from the string.
- """
- match = re.match(r"^([" + STRING_PREFIX_CHARS + r"]*)(.*)$", s, re.DOTALL)
+def normalize_string_prefix(s: str) -> str:
+ """Make all string prefixes lowercase."""
+ match = STRING_PREFIX_RE.match(s)
assert match is not None, f"failed to match string {s!r}"
orig_prefix = match.group(1)
assert match is not None, f"failed to match string {s!r}"
orig_prefix = match.group(1)
- new_prefix = orig_prefix.replace("F", "f").replace("B", "b").replace("U", "u")
- if remove_u_prefix:
- new_prefix = new_prefix.replace("u", "")
+ new_prefix = (
+ orig_prefix.replace("F", "f")
+ .replace("B", "b")
+ .replace("U", "")
+ .replace("u", "")
+ )
return f"{new_prefix}{match.group(2)}"
return f"{new_prefix}{match.group(2)}"
+# Re(gex) does actually cache patterns internally but this still improves
+# performance on a long list literal of strings by 5-9% since lru_cache's
+# caching overhead is much lower.
+@lru_cache(maxsize=64)
+def _cached_compile(pattern: str) -> Pattern[str]:
+ return re.compile(pattern)
+
+
def normalize_string_quotes(s: str) -> str:
"""Prefer double quotes but only if it doesn't cause more escaping.
def normalize_string_quotes(s: str) -> str:
"""Prefer double quotes but only if it doesn't cause more escaping.
return s # There's an internal error
prefix = s[:first_quote_pos]
return s # There's an internal error
prefix = s[:first_quote_pos]
- unescaped_new_quote = re. compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
- escaped_new_quote = re. compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
- escaped_orig_quote = re. compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
+ unescaped_new_quote = _cached_ compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
+ escaped_new_quote = _cached_ compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
+ escaped_orig_quote = _cached_ compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
body = s[first_quote_pos + len(orig_quote) : -len(orig_quote)]
if "r" in prefix.casefold():
if unescaped_new_quote.search(body):
body = s[first_quote_pos + len(orig_quote) : -len(orig_quote)]
if "r" in prefix.casefold():
if unescaped_new_quote.search(body):
if "f" in prefix.casefold():
matches = re.findall(
r"""
if "f" in prefix.casefold():
matches = re.findall(
r"""
- (?:[^{] |^)\{ # start of the string or a non-{ followed by a single {
+ (?:(?<!\{) |^)\{ # start of the string or a non-{ followed by a single {
([^{].*?) # contents of the brackets except if begins with {{
([^{].*?) # contents of the brackets except if begins with {{
- \}(?:[^}] |$) # A } followed by end of the string or a non-}
+ \}(?:(?!\}) |$) # A } followed by end of the string or a non-}
""",
new_body,
re.VERBOSE,
""",
new_body,
re.VERBOSE,