From: Felix Hildén <felix.hilden@gmail.com>
Date: Thu, 13 Jan 2022 17:59:43 +0000 (+0200)
Subject: Normalise string prefix order (#2297)
X-Git-Url: https://git.madduck.net/etc/vim.git/commitdiff_plain/799f76f537f72ade97b8e6637c59fee49e05a4ab?ds=sidebyside

Normalise string prefix order (#2297)

Closes #2171
---

diff --git a/CHANGES.md b/CHANGES.md
index 565c36f8..5a8a0ef9 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -28,6 +28,7 @@
   `--target-version` is set to 3.10 and higher). (#2728)
 - Fix handling of standalone `match()` or `case()` when there is a trailing newline or a
   comment inside of the parentheses. (#2760)
+- Black now normalizes string prefix order (#2297)
 
 ### Packaging
 
diff --git a/docs/the_black_code_style/current_style.md b/docs/the_black_code_style/current_style.md
index 68dff3ee..11fe2c8c 100644
--- a/docs/the_black_code_style/current_style.md
+++ b/docs/the_black_code_style/current_style.md
@@ -233,10 +233,10 @@ _Black_ prefers double quotes (`"` and `"""`) over single quotes (`'` and `'''`)
 will replace the latter with the former as long as it does not result in more backslash
 escapes than before.
 
-_Black_ also standardizes string prefixes, making them always lowercase. On top of that,
-if your code is already Python 3.6+ only or it's using the `unicode_literals` future
-import, _Black_ will remove `u` from the string prefix as it is meaningless in those
-scenarios.
+_Black_ also standardizes string prefixes. Prefix characters are made lowercase with the
+exception of [capital "R" prefixes](#rstrings-and-rstrings), unicode literal markers
+(`u`) are removed because they are meaningless in Python 3, and in the case of multiple
+characters "r" is put first as in spoken language: "raw f-string".
 
 The main reason to standardize on a single form of quotes is aesthetics. Having one kind
 of quotes everywhere reduces reader distraction. It will also enable a future version of
diff --git a/src/black/strings.py b/src/black/strings.py
index 262c2ba4..9d0e2eb8 100644
--- a/src/black/strings.py
+++ b/src/black/strings.py
@@ -149,6 +149,10 @@ def normalize_string_prefix(s: str) -> str:
         .replace("U", "")
         .replace("u", "")
     )
+
+    # Python syntax guarantees max 2 prefixes and that one of them is "r"
+    if len(new_prefix) == 2 and "r" != new_prefix[0].lower():
+        new_prefix = new_prefix[::-1]
     return f"{new_prefix}{match.group(2)}"
 
 
diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index a7e17df1..257dbef4 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -293,7 +293,7 @@ class Untokenizer:
 
 
 cookie_re = re.compile(r"^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)", re.ASCII)
-blank_re = re.compile(br"^[ \t\f]*(?:[#\r\n]|$)", re.ASCII)
+blank_re = re.compile(rb"^[ \t\f]*(?:[#\r\n]|$)", re.ASCII)
 
 
 def _get_normal_name(orig_enc: str) -> str:
diff --git a/tests/data/string_prefixes.py b/tests/data/string_prefixes.py
index 9ddc2b54..f86da696 100644
--- a/tests/data/string_prefixes.py
+++ b/tests/data/string_prefixes.py
@@ -1,10 +1,13 @@
-#!/usr/bin/env python3.6
+#!/usr/bin/env python3
 
-name = R"Åukasz"
-F"hello {name}"
-B"hello"
-r"hello"
-fR"hello"
+name = "Åukasz"
+(f"hello {name}", F"hello {name}")
+(b"", B"")
+(u"", U"")
+(r"", R"")
+
+(rf"", fr"", Rf"", fR"", rF"", Fr"", RF"", FR"")
+(rb"", br"", Rb"", bR"", rB"", Br"", RB"", BR"")
 
 
 def docstring_singleline():
@@ -20,13 +23,16 @@ def docstring_multiline():
 # output
 
 
-#!/usr/bin/env python3.6
+#!/usr/bin/env python3
+
+name = "Åukasz"
+(f"hello {name}", f"hello {name}")
+(b"", b"")
+("", "")
+(r"", R"")
 
-name = R"Åukasz"
-f"hello {name}"
-b"hello"
-r"hello"
-fR"hello"
+(rf"", rf"", Rf"", Rf"", rf"", rf"", Rf"", Rf"")
+(rb"", rb"", Rb"", Rb"", rb"", rb"", Rb"", Rb"")
 
 
 def docstring_singleline():