From ecdbf085a772e8d737b8a8735d39a7af413cecfb Mon Sep 17 00:00:00 2001 From: Zsolt Dollenstein Date: Mon, 9 Apr 2018 22:36:40 +0100 Subject: [PATCH] Add support for all valid string literals (#115) --- black.py | 8 ++-- blib2to3/README | 1 + blib2to3/pgen2/tokenize.py | 78 +++++++++++++------------------------- tests/python2.py | 4 +- tests/string_quotes.py | 2 + 5 files changed, 35 insertions(+), 58 deletions(-) diff --git a/black.py b/black.py index 9b144ed..19871aa 100644 --- a/black.py +++ b/black.py @@ -1923,8 +1923,8 @@ def normalize_string_quotes(leaf: Leaf) -> None: prefix = leaf.value[:first_quote_pos] body = leaf.value[first_quote_pos + len(orig_quote):-len(orig_quote)] - unescaped_new_quote = re.compile(r"(([^\\]|^)(\\\\)*)" + new_quote) - escaped_orig_quote = re.compile(r"\\(\\\\)*" + orig_quote) + unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}") + escaped_orig_quote = re.compile(rf"\\(\\\\)*{orig_quote}") if "r" in prefix.casefold(): if unescaped_new_quote.search(body): # There's at least one unescaped new_quote in this raw string @@ -1934,8 +1934,8 @@ def normalize_string_quotes(leaf: Leaf) -> None: # Do not introduce or remove backslashes in raw strings new_body = body else: - new_body = escaped_orig_quote.sub(f"\\1{orig_quote}", body) - new_body = unescaped_new_quote.sub(f"\\1\\\\{new_quote}", new_body) + new_body = escaped_orig_quote.sub(rf"\1{orig_quote}", body) + new_body = unescaped_new_quote.sub(rf"\1\\{new_quote}", new_body) if new_quote == '"""' and new_body[-1] == '"': # edge case: new_body = new_body[:-1] + '\\"' diff --git a/blib2to3/README b/blib2to3/README index 6f9da92..ad9f1c2 100644 --- a/blib2to3/README +++ b/blib2to3/README @@ -7,6 +7,7 @@ Reasons for forking: *args and **kwargs - backport of GH-6143 that restores the ability to reformat legacy usage of `async` +- support all types of string literals - better ability to debug (better reprs) - INDENT and DEDENT don't hold whitespace and comment prefixes - ability to Cythonize diff --git a/blib2to3/pgen2/tokenize.py b/blib2to3/pgen2/tokenize.py index 6b8a5cb..4f03130 100644 --- a/blib2to3/pgen2/tokenize.py +++ b/blib2to3/pgen2/tokenize.py @@ -48,6 +48,10 @@ except NameError: def group(*choices): return '(' + '|'.join(choices) + ')' def any(*choices): return group(*choices) + '*' def maybe(*choices): return group(*choices) + '?' +def _combinations(*l): + return set( + x + y for x in l for y in l + ("",) if x.casefold() != y.casefold() + ) Whitespace = r'[ \f\t]*' Comment = r'#[^\r\n]*' @@ -74,7 +78,7 @@ Double = r'[^"\\]*(?:\\.[^"\\]*)*"' Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''" # Tail end of """ string. Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""' -_litprefix = r"(?:[uUrRbBfF]|[rR][bB]|[bBuU][rR])?" +_litprefix = r"(?:[uUrRbBfF]|[rR][fFbB]|[fFbBuU][rR])?" Triple = group(_litprefix + "'''", _litprefix + '"""') # Single-line ' or " string. String = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'", @@ -107,59 +111,29 @@ tokenprog = re.compile(Token, re.UNICODE) pseudoprog = re.compile(PseudoToken, re.UNICODE) single3prog = re.compile(Single3) double3prog = re.compile(Double3) + +_strprefixes = ( + _combinations('r', 'R', 'f', 'F') | + _combinations('r', 'R', 'b', 'B') | + {'u', 'U', 'ur', 'uR', 'Ur', 'UR'} +) + endprogs = {"'": re.compile(Single), '"': re.compile(Double), "'''": single3prog, '"""': double3prog, - "r'''": single3prog, 'r"""': double3prog, - "u'''": single3prog, 'u"""': double3prog, - "b'''": single3prog, 'b"""': double3prog, - "f'''": single3prog, 'f"""': double3prog, - "ur'''": single3prog, 'ur"""': double3prog, - "br'''": single3prog, 'br"""': double3prog, - "rb'''": single3prog, 'rb"""': double3prog, - "R'''": single3prog, 'R"""': double3prog, - "U'''": single3prog, 'U"""': double3prog, - "B'''": single3prog, 'B"""': double3prog, - "F'''": single3prog, 'F"""': double3prog, - "uR'''": single3prog, 'uR"""': double3prog, - "Ur'''": single3prog, 'Ur"""': double3prog, - "UR'''": single3prog, 'UR"""': double3prog, - "bR'''": single3prog, 'bR"""': double3prog, - "Br'''": single3prog, 'Br"""': double3prog, - "BR'''": single3prog, 'BR"""': double3prog, - "rB'''": single3prog, 'rB"""': double3prog, - "Rb'''": single3prog, 'Rb"""': double3prog, - "RB'''": single3prog, 'RB"""': double3prog, - 'r': None, 'R': None, - 'u': None, 'U': None, - 'f': None, 'F': None, - 'b': None, 'B': None} - -triple_quoted = {} -for t in ("'''", '"""', - "r'''", 'r"""', "R'''", 'R"""', - "u'''", 'u"""', "U'''", 'U"""', - "b'''", 'b"""', "B'''", 'B"""', - "f'''", 'f"""', "F'''", 'F"""', - "ur'''", 'ur"""', "Ur'''", 'Ur"""', - "uR'''", 'uR"""', "UR'''", 'UR"""', - "br'''", 'br"""', "Br'''", 'Br"""', - "bR'''", 'bR"""', "BR'''", 'BR"""', - "rb'''", 'rb"""', "Rb'''", 'Rb"""', - "rB'''", 'rB"""', "RB'''", 'RB"""',): - triple_quoted[t] = t -single_quoted = {} -for t in ("'", '"', - "r'", 'r"', "R'", 'R"', - "u'", 'u"', "U'", 'U"', - "b'", 'b"', "B'", 'B"', - "f'", 'f"', "F'", 'F"', - "ur'", 'ur"', "Ur'", 'Ur"', - "uR'", 'uR"', "UR'", 'UR"', - "br'", 'br"', "Br'", 'Br"', - "bR'", 'bR"', "BR'", 'BR"', - "rb'", 'rb"', "Rb'", 'Rb"', - "rB'", 'rB"', "RB'", 'RB"',): - single_quoted[t] = t + **{f"{prefix}'''": single3prog for prefix in _strprefixes}, + **{f'{prefix}"""': double3prog for prefix in _strprefixes}, + **{prefix: None for prefix in _strprefixes}} + +triple_quoted = ( + {"'''", '"""'} | + {f"{prefix}'''" for prefix in _strprefixes} | + {f'{prefix}"""' for prefix in _strprefixes} +) +single_quoted = ( + {"'", '"'} | + {f"{prefix}'" for prefix in _strprefixes} | + {f'{prefix}"' for prefix in _strprefixes} +) tabsize = 8 diff --git a/tests/python2.py b/tests/python2.py index 5214add..4a22f46 100644 --- a/tests/python2.py +++ b/tests/python2.py @@ -8,7 +8,7 @@ print >> sys.stderr , "Look, a repr:", `sys` def function((_globals, _locals)): - exec "print 'hi from exec!'" in _globals, _locals + exec ur"print 'hi from exec!'" in _globals, _locals function((globals(), locals())) @@ -27,7 +27,7 @@ print >>sys.stderr, "Look, a repr:", ` sys ` def function((_globals, _locals)): - exec "print 'hi from exec!'" in _globals, _locals + exec ur"print 'hi from exec!'" in _globals, _locals function((globals(), locals())) diff --git a/tests/string_quotes.py b/tests/string_quotes.py index c66b6ee..1532a7a 100644 --- a/tests/string_quotes.py +++ b/tests/string_quotes.py @@ -22,6 +22,7 @@ r"raw string ftw" r'Date d\'expiration:(.*)' r'Tricky "quote' r'Not-so-tricky \"quote' +rf'{yay}' '\n\ The \"quick\"\n\ brown fox\n\ @@ -56,6 +57,7 @@ r"raw string ftw" r"Date d\'expiration:(.*)" r'Tricky "quote' r"Not-so-tricky \"quote" +rf"{yay}" "\n\ The \"quick\"\n\ brown fox\n\ -- 2.39.5