From 2104b7cdcb529056521a37f50c1e494f0b869d21 Mon Sep 17 00:00:00 2001 From: Zsolt Dollenstein Date: Fri, 13 Apr 2018 19:31:23 +0100 Subject: [PATCH 1/1] Handle unnecessarily escaped strings (#128) --- README.md | 3 +++ black.py | 28 +++++++++++++++++++------- docs/reference/reference_functions.rst | 2 ++ tests/string_quotes.py | 8 ++++++++ 4 files changed, 34 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 65eda2a..cff4bdf 100644 --- a/README.md +++ b/README.md @@ -499,6 +499,9 @@ More details can be found in [CONTRIBUTING](CONTRIBUTING.md). * Vim plugin now works on Windows, too +* fixed unstable formatting when encountering unnecessarily escaped quotes + in a string (#120) + ### 18.4a1 diff --git a/black.py b/black.py index 587d9b3..ccc1e94 100644 --- a/black.py +++ b/black.py @@ -24,6 +24,7 @@ from typing import ( Iterator, List, Optional, + Pattern, Set, Tuple, Type, @@ -1984,9 +1985,10 @@ def normalize_string_quotes(leaf: Leaf) -> None: return # There's an internal error prefix = leaf.value[:first_quote_pos] - body = leaf.value[first_quote_pos + len(orig_quote):-len(orig_quote)] unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}") - escaped_orig_quote = re.compile(rf"\\(\\\\)*{orig_quote}") + escaped_new_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{new_quote}") + escaped_orig_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{orig_quote}") + body = leaf.value[first_quote_pos + len(orig_quote):-len(orig_quote)] if "r" in prefix.casefold(): if unescaped_new_quote.search(body): # There's at least one unescaped new_quote in this raw string @@ -1996,11 +1998,14 @@ def normalize_string_quotes(leaf: Leaf) -> None: # Do not introduce or remove backslashes in raw strings new_body = body else: - new_body = escaped_orig_quote.sub(rf"\1{orig_quote}", body) - new_body = unescaped_new_quote.sub(rf"\1\\{new_quote}", new_body) - # Add escapes again for consecutive occurences of new_quote (sub - # doesn't match overlapping substrings). - new_body = unescaped_new_quote.sub(rf"\1\\{new_quote}", new_body) + # remove unnecessary quotes + new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body) + if body != new_body: + # Consider the string without unnecessary quotes as the original + body = new_body + leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}" + new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body) + new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body) if new_quote == '"""' and new_body[-1] == '"': # edge case: new_body = new_body[:-1] + '\\"' @@ -2374,5 +2379,14 @@ def shutdown(loop: BaseEventLoop) -> None: loop.close() +def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str: + """Replace `regex` with `replacement` twice on `original`. + + This is used by string normalization to perform replaces on + overlapping matches. + """ + return regex.sub(replacement, regex.sub(replacement, original)) + + if __name__ == "__main__": main() diff --git a/docs/reference/reference_functions.rst b/docs/reference/reference_functions.rst index 2b8c08e..19128ba 100644 --- a/docs/reference/reference_functions.rst +++ b/docs/reference/reference_functions.rst @@ -91,4 +91,6 @@ Utilities .. autofunction:: black.preceding_leaf +.. autofunction:: black.sub_twice + .. autofunction:: black.whitespace diff --git a/tests/string_quotes.py b/tests/string_quotes.py index 8ccd041..1ac6b06 100644 --- a/tests/string_quotes.py +++ b/tests/string_quotes.py @@ -38,6 +38,10 @@ re.compile(r'[\\"]') "x = '''; y = \"\"\"\"" "x = ''''; y = \"\"\"\"\"" "x = '' ''; y = \"\"\"\"\"" +'unnecessary \"\"escaping' +"unnecessary \'\'escaping" +'\\""' +"\\''" # output @@ -81,3 +85,7 @@ re.compile(r'[\\"]') 'x = \'\'\'; y = """"' 'x = \'\'\'\'; y = """""' 'x = \'\' \'\'; y = """""' +'unnecessary ""escaping' +"unnecessary ''escaping" +'\\""' +"\\''" -- 2.39.5