From 00e7e12a3a412ea386806d5d4eeaed345e912940 Mon Sep 17 00:00:00 2001
From: Richard Si <63936253+ichard26@users.noreply.github.com>
Date: Tue, 8 Jun 2021 20:46:09 -0400
Subject: [PATCH] Regression fix: leave R prefixes capitalization alone (#2285)

`black.strings.get_string_prefix` used to lowercase the extracted
prefix before returning it. This is wrong because 1) it ignores the
fact we should leave R prefixes alone because of MagicPython, and 2)
there is dedicated prefix casing handling code that fixes issue 1.
`.lower` is too naive.

This was originally fixed in 20.8b0, but was reintroduced since 21.4b0.

I also added proper prefix normalization for docstrings by using the
`black.strings.normalize_string_prefix` helper.

Some more test strings were added to make sure strings with capitalized
prefixes aren't treated differently (actually happened with my original
patch, Jelle had to point it out to me).
---
 CHANGES.md                             |  1 +
 src/black/linegen.py                   |  5 +--
 src/black/strings.py                   |  2 +-
 src/black/trans.py                     | 10 +++---
 tests/data/long_strings__regression.py | 42 ++++++++++++++++++++++++++
 tests/data/string_prefixes.py          | 21 +++++++++++++
 6 files changed, 73 insertions(+), 8 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 2d2b3b4c..9c2939e1 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -12,6 +12,7 @@
 - Added `--required-version` option to require a specific version to be running (#2300)
 - Fix incorrect custom breakpoint indices when string group contains fake f-strings
   (#2311)
+- Fix regression where `R` prefixes would be lowercased for docstrings (#2285)
 
 ## 21.5b2
 
diff --git a/src/black/linegen.py b/src/black/linegen.py
index 7949654b..3b811f02 100644
--- a/src/black/linegen.py
+++ b/src/black/linegen.py
@@ -226,8 +226,9 @@ class LineGenerator(Visitor[Line]):
         if is_docstring(leaf) and "\\\n" not in leaf.value:
             # We're ignoring docstrings with backslash newline escapes because changing
             # indentation of those changes the AST representation of the code.
-            prefix = get_string_prefix(leaf.value)
-            docstring = leaf.value[len(prefix) :]  # Remove the prefix
+            docstring = normalize_string_prefix(leaf.value, self.remove_u_prefix)
+            prefix = get_string_prefix(docstring)
+            docstring = docstring[len(prefix) :]  # Remove the prefix
             quote_char = docstring[0]
             # A natural way to remove the outer quotes is to do:
             #   docstring = docstring.strip(quote_char)
diff --git a/src/black/strings.py b/src/black/strings.py
index 5b443dde..80f588f5 100644
--- a/src/black/strings.py
+++ b/src/black/strings.py
@@ -87,7 +87,7 @@ def get_string_prefix(string: str) -> str:
     prefix = ""
     prefix_idx = 0
     while string[prefix_idx] in STRING_PREFIX_CHARS:
-        prefix += string[prefix_idx].lower()
+        prefix += string[prefix_idx]
         prefix_idx += 1
 
     return prefix
diff --git a/src/black/trans.py b/src/black/trans.py
index 4fb2c4d0..ca620f6b 100644
--- a/src/black/trans.py
+++ b/src/black/trans.py
@@ -411,7 +411,7 @@ class StringMerger(CustomSplitMapMixin, StringTransformer):
             and is_valid_index(next_str_idx)
             and LL[next_str_idx].type == token.STRING
         ):
-            prefix = get_string_prefix(LL[next_str_idx].value)
+            prefix = get_string_prefix(LL[next_str_idx].value).lower()
             next_str_idx += 1
 
         # The next loop merges the string group. The final string will be
@@ -431,7 +431,7 @@ class StringMerger(CustomSplitMapMixin, StringTransformer):
             num_of_strings += 1
 
             SS = LL[next_str_idx].value
-            next_prefix = get_string_prefix(SS)
+            next_prefix = get_string_prefix(SS).lower()
 
             # If this is an f-string group but this substring is not prefixed
             # with 'f'...
@@ -541,7 +541,7 @@ class StringMerger(CustomSplitMapMixin, StringTransformer):
                 return TErr("StringMerger does NOT merge multiline strings.")
 
             num_of_strings += 1
-            prefix = get_string_prefix(leaf.value)
+            prefix = get_string_prefix(leaf.value).lower()
             if "r" in prefix:
                 return TErr("StringMerger does NOT merge raw strings.")
 
@@ -1036,7 +1036,7 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
         is_valid_index = is_valid_index_factory(LL)
         insert_str_child = insert_str_child_factory(LL[string_idx])
 
-        prefix = get_string_prefix(LL[string_idx].value)
+        prefix = get_string_prefix(LL[string_idx].value).lower()
 
         # We MAY choose to drop the 'f' prefix from substrings that don't
         # contain any f-expressions, but ONLY if the original f-string
@@ -1290,7 +1290,7 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
 
             yield from _fexpr_slices
 
-        is_fstring = "f" in get_string_prefix(string)
+        is_fstring = "f" in get_string_prefix(string).lower()
 
         def breaks_fstring_expression(i: Index) -> bool:
             """
diff --git a/tests/data/long_strings__regression.py b/tests/data/long_strings__regression.py
index 83cae614..e4234b2f 100644
--- a/tests/data/long_strings__regression.py
+++ b/tests/data/long_strings__regression.py
@@ -495,6 +495,26 @@ message = (
     f"9. You now have a key to add to `{prefix}set api youtube api_key`"
 )
 
+# It shouldn't matter if the string prefixes are capitalized.
+temp_msg = (
+    F"{F'{humanize_number(pos)}.': <{pound_len+2}} "
+    F"{balance: <{bal_len + 5}} "
+    F"<<{author.display_name}>>\n"
+)
+
+fstring = (
+    F"We have to remember to escape {braces}."
+    " Like {these}."
+    F" But not {this}."
+)
+
+welcome_to_programming = R"hello," R" world!"
+
+fstring = F"f-strings definitely make things more {difficult} than they need to be for {{black}}. But boy they sure are handy. The problem is that some lines will need to have the 'f' whereas others do not. This {line}, for example, needs one."
+
+x = F"This is a long string which contains an f-expr that should not split {{{[i for i in range(5)]}}}."
+
+
 # output
 
 
@@ -1100,3 +1120,25 @@ message = (
     "8. No application restrictions are needed. Click Create at the bottom."
     f"9. You now have a key to add to `{prefix}set api youtube api_key`"
 )
+
+# It shouldn't matter if the string prefixes are capitalized.
+temp_msg = (
+    f"{F'{humanize_number(pos)}.': <{pound_len+2}} "
+    f"{balance: <{bal_len + 5}} "
+    f"<<{author.display_name}>>\n"
+)
+
+fstring = f"We have to remember to escape {braces}. Like {{these}}. But not {this}."
+
+welcome_to_programming = R"hello," R" world!"
+
+fstring = (
+    f"f-strings definitely make things more {difficult} than they need to be for"
+    " {black}. But boy they sure are handy. The problem is that some lines will need"
+    f" to have the 'f' whereas others do not. This {line}, for example, needs one."
+)
+
+x = (
+    "This is a long string which contains an f-expr that should not split"
+    f" {{{[i for i in range(5)]}}}."
+)
diff --git a/tests/data/string_prefixes.py b/tests/data/string_prefixes.py
index 0ca3686a..9ddc2b54 100644
--- a/tests/data/string_prefixes.py
+++ b/tests/data/string_prefixes.py
@@ -6,6 +6,17 @@ B"hello"
 r"hello"
 fR"hello"
 
+
+def docstring_singleline():
+    R"""2020 was one hell of a year. The good news is that we were able to"""
+
+
+def docstring_multiline():
+    R"""
+    clear out all of the issues opened in that time :p
+    """
+
+
 # output
 
 
@@ -16,3 +27,13 @@ f"hello {name}"
 b"hello"
 r"hello"
 fR"hello"
+
+
+def docstring_singleline():
+    R"""2020 was one hell of a year. The good news is that we were able to"""
+
+
+def docstring_multiline():
+    R"""
+    clear out all of the issues opened in that time :p
+    """
-- 
2.39.5