From: Felix Hildén Date: Thu, 13 Jan 2022 17:59:43 +0000 (+0200) Subject: Normalise string prefix order (#2297) X-Git-Url: https://git.madduck.net/etc/vim.git/commitdiff_plain/799f76f537f72ade97b8e6637c59fee49e05a4ab?ds=sidebyside Normalise string prefix order (#2297) Closes #2171 --- diff --git a/CHANGES.md b/CHANGES.md index 565c36f..5a8a0ef 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -28,6 +28,7 @@ `--target-version` is set to 3.10 and higher). (#2728) - Fix handling of standalone `match()` or `case()` when there is a trailing newline or a comment inside of the parentheses. (#2760) +- Black now normalizes string prefix order (#2297) ### Packaging diff --git a/docs/the_black_code_style/current_style.md b/docs/the_black_code_style/current_style.md index 68dff3e..11fe2c8 100644 --- a/docs/the_black_code_style/current_style.md +++ b/docs/the_black_code_style/current_style.md @@ -233,10 +233,10 @@ _Black_ prefers double quotes (`"` and `"""`) over single quotes (`'` and `'''`) will replace the latter with the former as long as it does not result in more backslash escapes than before. -_Black_ also standardizes string prefixes, making them always lowercase. On top of that, -if your code is already Python 3.6+ only or it's using the `unicode_literals` future -import, _Black_ will remove `u` from the string prefix as it is meaningless in those -scenarios. +_Black_ also standardizes string prefixes. Prefix characters are made lowercase with the +exception of [capital "R" prefixes](#rstrings-and-rstrings), unicode literal markers +(`u`) are removed because they are meaningless in Python 3, and in the case of multiple +characters "r" is put first as in spoken language: "raw f-string". The main reason to standardize on a single form of quotes is aesthetics. Having one kind of quotes everywhere reduces reader distraction. It will also enable a future version of diff --git a/src/black/strings.py b/src/black/strings.py index 262c2ba..9d0e2eb 100644 --- a/src/black/strings.py +++ b/src/black/strings.py @@ -149,6 +149,10 @@ def normalize_string_prefix(s: str) -> str: .replace("U", "") .replace("u", "") ) + + # Python syntax guarantees max 2 prefixes and that one of them is "r" + if len(new_prefix) == 2 and "r" != new_prefix[0].lower(): + new_prefix = new_prefix[::-1] return f"{new_prefix}{match.group(2)}" diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py index a7e17df..257dbef 100644 --- a/src/blib2to3/pgen2/tokenize.py +++ b/src/blib2to3/pgen2/tokenize.py @@ -293,7 +293,7 @@ class Untokenizer: cookie_re = re.compile(r"^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)", re.ASCII) -blank_re = re.compile(br"^[ \t\f]*(?:[#\r\n]|$)", re.ASCII) +blank_re = re.compile(rb"^[ \t\f]*(?:[#\r\n]|$)", re.ASCII) def _get_normal_name(orig_enc: str) -> str: diff --git a/tests/data/string_prefixes.py b/tests/data/string_prefixes.py index 9ddc2b5..f86da69 100644 --- a/tests/data/string_prefixes.py +++ b/tests/data/string_prefixes.py @@ -1,10 +1,13 @@ -#!/usr/bin/env python3.6 +#!/usr/bin/env python3 -name = R"Łukasz" -F"hello {name}" -B"hello" -r"hello" -fR"hello" +name = "Łukasz" +(f"hello {name}", F"hello {name}") +(b"", B"") +(u"", U"") +(r"", R"") + +(rf"", fr"", Rf"", fR"", rF"", Fr"", RF"", FR"") +(rb"", br"", Rb"", bR"", rB"", Br"", RB"", BR"") def docstring_singleline(): @@ -20,13 +23,16 @@ def docstring_multiline(): # output -#!/usr/bin/env python3.6 +#!/usr/bin/env python3 + +name = "Łukasz" +(f"hello {name}", f"hello {name}") +(b"", b"") +("", "") +(r"", R"") -name = R"Łukasz" -f"hello {name}" -b"hello" -r"hello" -fR"hello" +(rf"", rf"", Rf"", Rf"", rf"", rf"", Rf"", Rf"") +(rb"", rb"", Rb"", Rb"", rb"", rb"", Rb"", Rb"") def docstring_singleline():