Add support for all valid string literals (#115)

author Zsolt Dollenstein <zsol.zsol@gmail.com>

Mon, 9 Apr 2018 21:36:40 +0000 (22:36 +0100)

committer Łukasz Langa <lukasz@langa.pl>

Mon, 9 Apr 2018 21:36:40 +0000 (14:36 -0700)
author Zsolt Dollenstein <zsol.zsol@gmail.com>
Mon, 9 Apr 2018 21:36:40 +0000 (22:36 +0100)
committer Łukasz Langa <lukasz@langa.pl>
Mon, 9 Apr 2018 21:36:40 +0000 (14:36 -0700)
diff --git a/black.py b/black.py

index 9b144edb579d923d7479bdae8565f5df7507fd81..19871aa7e7e5576cebd917310bba117bc447f5b4 100644 (file)
--- a/black.py
+++ b/black.py
@@ -1923,8 +1923,8 @@ def normalize_string_quotes(leaf: Leaf) -> None:
  
      prefix = leaf.value[:first_quote_pos]
      body = leaf.value[first_quote_pos + len(orig_quote):-len(orig_quote)]
  
      prefix = leaf.value[:first_quote_pos]
      body = leaf.value[first_quote_pos + len(orig_quote):-len(orig_quote)]
-    unescaped_new_quote = re.compile(r"(([^\\]|^)(\\\\)*)" + new_quote)
-    escaped_orig_quote = re.compile(r"\\(\\\\)*" + orig_quote)
+    unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
+    escaped_orig_quote = re.compile(rf"\\(\\\\)*{orig_quote}")
      if "r" in prefix.casefold():
          if unescaped_new_quote.search(body):
              # There's at least one unescaped new_quote in this raw string
      if "r" in prefix.casefold():
          if unescaped_new_quote.search(body):
              # There's at least one unescaped new_quote in this raw string
@@ -1934,8 +1934,8 @@ def normalize_string_quotes(leaf: Leaf) -> None:
          # Do not introduce or remove backslashes in raw strings
          new_body = body
      else:
          # Do not introduce or remove backslashes in raw strings
          new_body = body
      else:
-        new_body = escaped_orig_quote.sub(f"\\1{orig_quote}", body)
-        new_body = unescaped_new_quote.sub(f"\\1\\\\{new_quote}", new_body)
+        new_body = escaped_orig_quote.sub(rf"\1{orig_quote}", body)
+        new_body = unescaped_new_quote.sub(rf"\1\\{new_quote}", new_body)
      if new_quote == '"""' and new_body[-1] == '"':
          # edge case:
          new_body = new_body[:-1] + '\\"'
      if new_quote == '"""' and new_body[-1] == '"':
          # edge case:
          new_body = new_body[:-1] + '\\"'
diff --git a/blib2to3/README b/blib2to3/README

index 6f9da929cdb3794e5f08dea56ef39cd727726526..ad9f1c23258aaab7ba88343af3ddc42d6c9c69d8 100644 (file)
--- a/blib2to3/README
+++ b/blib2to3/README
@@ -7,6 +7,7 @@ Reasons for forking:
    *args and **kwargs
  - backport of GH-6143 that restores the ability to reformat legacy usage of
    `async`
    *args and **kwargs
  - backport of GH-6143 that restores the ability to reformat legacy usage of
    `async`
+- support all types of string literals
  - better ability to debug (better reprs)
  - INDENT and DEDENT don't hold whitespace and comment prefixes
  - ability to Cythonize
  - better ability to debug (better reprs)
  - INDENT and DEDENT don't hold whitespace and comment prefixes
  - ability to Cythonize
diff --git a/blib2to3/pgen2/tokenize.py b/blib2to3/pgen2/tokenize.py

index 6b8a5cb2ef54fb0bdbd98f2d2e20ac73f7ae3c3c..4f031306378ab20d13aec985e3c8715f59c8606a 100644 (file)
--- a/blib2to3/pgen2/tokenize.py
+++ b/blib2to3/pgen2/tokenize.py
@@ -48,6 +48,10 @@ except NameError:
  def group(*choices): return '(' + '|'.join(choices) + ')'
  def any(*choices): return group(*choices) + '*'
  def maybe(*choices): return group(*choices) + '?'
  def group(*choices): return '(' + '|'.join(choices) + ')'
  def any(*choices): return group(*choices) + '*'
  def maybe(*choices): return group(*choices) + '?'
+def _combinations(*l):
+    return set(
+        x + y for x in l for y in l + ("",) if x.casefold() != y.casefold()
+    )
  
  Whitespace = r'[ \f\t]*'
  Comment = r'#[^\r\n]*'
  
  Whitespace = r'[ \f\t]*'
  Comment = r'#[^\r\n]*'
@@ -74,7 +78,7 @@ Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
  Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
  # Tail end of """ string.
  Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
  Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
  # Tail end of """ string.
  Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
-_litprefix = r"(?:[uUrRbBfF]|[rR][bB]|[bBuU][rR])?"
+_litprefix = r"(?:[uUrRbBfF]|[rR][fFbB]|[fFbBuU][rR])?"
  Triple = group(_litprefix + "'''", _litprefix + '"""')
  # Single-line ' or " string.
  String = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
  Triple = group(_litprefix + "'''", _litprefix + '"""')
  # Single-line ' or " string.
  String = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
@@ -107,59 +111,29 @@ tokenprog = re.compile(Token, re.UNICODE)
  pseudoprog = re.compile(PseudoToken, re.UNICODE)
  single3prog = re.compile(Single3)
  double3prog = re.compile(Double3)
  pseudoprog = re.compile(PseudoToken, re.UNICODE)
  single3prog = re.compile(Single3)
  double3prog = re.compile(Double3)
+
+_strprefixes = (
+    _combinations('r', 'R', 'f', 'F') |
+    _combinations('r', 'R', 'b', 'B') |
+    {'u', 'U', 'ur', 'uR', 'Ur', 'UR'}
+)
+
  endprogs = {"'": re.compile(Single), '"': re.compile(Double),
              "'''": single3prog, '"""': double3prog,
  endprogs = {"'": re.compile(Single), '"': re.compile(Double),
              "'''": single3prog, '"""': double3prog,
-            "r'''": single3prog, 'r"""': double3prog,
-            "u'''": single3prog, 'u"""': double3prog,
-            "b'''": single3prog, 'b"""': double3prog,
-            "f'''": single3prog, 'f"""': double3prog,
-            "ur'''": single3prog, 'ur"""': double3prog,
-            "br'''": single3prog, 'br"""': double3prog,
-            "rb'''": single3prog, 'rb"""': double3prog,
-            "R'''": single3prog, 'R"""': double3prog,
-            "U'''": single3prog, 'U"""': double3prog,
-            "B'''": single3prog, 'B"""': double3prog,
-            "F'''": single3prog, 'F"""': double3prog,
-            "uR'''": single3prog, 'uR"""': double3prog,
-            "Ur'''": single3prog, 'Ur"""': double3prog,
-            "UR'''": single3prog, 'UR"""': double3prog,
-            "bR'''": single3prog, 'bR"""': double3prog,
-            "Br'''": single3prog, 'Br"""': double3prog,
-            "BR'''": single3prog, 'BR"""': double3prog,
-            "rB'''": single3prog, 'rB"""': double3prog,
-            "Rb'''": single3prog, 'Rb"""': double3prog,
-            "RB'''": single3prog, 'RB"""': double3prog,
-            'r': None, 'R': None,
-            'u': None, 'U': None,
-            'f': None, 'F': None,
-            'b': None, 'B': None}
-
-triple_quoted = {}
-for t in ("'''", '"""',
-          "r'''", 'r"""', "R'''", 'R"""',
-          "u'''", 'u"""', "U'''", 'U"""',
-          "b'''", 'b"""', "B'''", 'B"""',
-          "f'''", 'f"""', "F'''", 'F"""',
-          "ur'''", 'ur"""', "Ur'''", 'Ur"""',
-          "uR'''", 'uR"""', "UR'''", 'UR"""',
-          "br'''", 'br"""', "Br'''", 'Br"""',
-          "bR'''", 'bR"""', "BR'''", 'BR"""',
-          "rb'''", 'rb"""', "Rb'''", 'Rb"""',
-          "rB'''", 'rB"""', "RB'''", 'RB"""',):
-    triple_quoted[t] = t
-single_quoted = {}
-for t in ("'", '"',
-          "r'", 'r"', "R'", 'R"',
-          "u'", 'u"', "U'", 'U"',
-          "b'", 'b"', "B'", 'B"',
-          "f'", 'f"', "F'", 'F"',
-          "ur'", 'ur"', "Ur'", 'Ur"',
-          "uR'", 'uR"', "UR'", 'UR"',
-          "br'", 'br"', "Br'", 'Br"',
-          "bR'", 'bR"', "BR'", 'BR"',
-          "rb'", 'rb"', "Rb'", 'Rb"',
-          "rB'", 'rB"', "RB'", 'RB"',):
-    single_quoted[t] = t
+            **{f"{prefix}'''": single3prog for prefix in _strprefixes},
+            **{f'{prefix}"""': double3prog for prefix in _strprefixes},
+            **{prefix: None for prefix in _strprefixes}}
+
+triple_quoted = (
+    {"'''", '"""'} |
+    {f"{prefix}'''" for prefix in _strprefixes} |
+    {f'{prefix}"""' for prefix in _strprefixes}
+)
+single_quoted = (
+    {"'", '"'} |
+    {f"{prefix}'" for prefix in _strprefixes} |
+    {f'{prefix}"' for prefix in _strprefixes}
+)
  
  tabsize = 8
  
  
  tabsize = 8
  
diff --git a/tests/python2.py b/tests/python2.py

index 5214add9971e1d7038b19c9813c83486a1361043..4a22f46de4268f659661cfe1031cdad6caf3ee63 100644 (file)
--- a/tests/python2.py
+++ b/tests/python2.py
@@ -8,7 +8,7 @@ print >> sys.stderr , "Look, a repr:", `sys`
  
  
  def function((_globals, _locals)):
  
  
  def function((_globals, _locals)):
-    exec "print 'hi from exec!'" in _globals, _locals
+    exec ur"print 'hi from exec!'" in _globals, _locals
  
  
  function((globals(), locals()))
  
  
  function((globals(), locals()))
@@ -27,7 +27,7 @@ print >>sys.stderr, "Look, a repr:", ` sys `
  
  
  def function((_globals, _locals)):
  
  
  def function((_globals, _locals)):
-    exec "print 'hi from exec!'" in _globals, _locals
+    exec ur"print 'hi from exec!'" in _globals, _locals
  
  
  function((globals(), locals()))
  
  
  function((globals(), locals()))
diff --git a/tests/string_quotes.py b/tests/string_quotes.py

index c66b6ee8fbd17bdf08b608f21b1d6718832abfd9..1532a7ab74c5ce8b3fb2e9081b1a89f880308aea 100644 (file)
--- a/tests/string_quotes.py
+++ b/tests/string_quotes.py
@@ -22,6 +22,7 @@ r"raw string ftw"
  r'Date d\'expiration:(.*)'
  r'Tricky "quote'
  r'Not-so-tricky \"quote'
  r'Date d\'expiration:(.*)'
  r'Tricky "quote'
  r'Not-so-tricky \"quote'
+rf'{yay}'
  '\n\
  The \"quick\"\n\
  brown fox\n\
  '\n\
  The \"quick\"\n\
  brown fox\n\
@@ -56,6 +57,7 @@ r"raw string ftw"
  r"Date d\'expiration:(.*)"
  r'Tricky "quote'
  r"Not-so-tricky \"quote"
  r"Date d\'expiration:(.*)"
  r'Tricky "quote'
  r"Not-so-tricky \"quote"
+rf"{yay}"
  "\n\
  The \"quick\"\n\
  brown fox\n\
  "\n\
  The \"quick\"\n\
  brown fox\n\
author	Zsolt Dollenstein <zsol.zsol@gmail.com>
	Mon, 9 Apr 2018 21:36:40 +0000 (22:36 +0100)
committer	Łukasz Langa <lukasz@langa.pl>
	Mon, 9 Apr 2018 21:36:40 +0000 (14:36 -0700)
black.py		patch \| blob \| history
blib2to3/README		patch \| blob \| history
blib2to3/pgen2/tokenize.py		patch \| blob \| history
tests/python2.py		patch \| blob \| history
tests/string_quotes.py		patch \| blob \| history