From 4a063a9f8d7069ea82186ac9aff5a2cd1c2618d7 Mon Sep 17 00:00:00 2001 From: Aneesh Agrawal Date: Tue, 7 Mar 2023 14:52:19 -0500 Subject: [PATCH] Improve multiline string handling (#1879) Co-authored-by: Olivia Hong Co-authored-by: Olivia Hong <24500729+olivia-hong@users.noreply.github.com> --- CHANGES.md | 1 + docs/the_black_code_style/future_style.md | 48 +++ src/black/linegen.py | 43 ++- src/black/lines.py | 96 +++++- src/black/mode.py | 1 + tests/data/preview/multiline_strings.py | 358 ++++++++++++++++++++++ 6 files changed, 514 insertions(+), 33 deletions(-) create mode 100644 tests/data/preview/multiline_strings.py diff --git a/CHANGES.md b/CHANGES.md index a8b556c..53682df 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -133,6 +133,7 @@ versions separately. code. Implicitly concatenated f-strings with different quotes can now be merged or quote-normalized by changing the quotes used in expressions. (#3509) - Fix crash on `await (yield)` when Black is compiled with mypyc (#3533) +- Improve handling of multiline strings by changing line split behavior (#1879) ### Configuration diff --git a/docs/the_black_code_style/future_style.md b/docs/the_black_code_style/future_style.md index b2cdca2..96abc99 100644 --- a/docs/the_black_code_style/future_style.md +++ b/docs/the_black_code_style/future_style.md @@ -111,3 +111,51 @@ my_dict = { "another key": short_value, } ``` + +### Improved multiline string handling + +_Black_ is smarter when formatting multiline strings, especially in function arguments, +to avoid introducing extra line breaks. Previously, it would always consider multiline +strings as not fitting on a single line. With this new feature, _Black_ looks at the +context around the multiline string to decide if it should be inlined or split to a +separate line. For example, when a multiline string is passed to a function, _Black_ +will only split the multiline string if a line is too long or if multiple arguments are +being passed. + +For example, _Black_ will reformat + +```python +textwrap.dedent( + """\ + This is a + multiline string +""" +) +``` + +to: + +```python +textwrap.dedent("""\ + This is a + multiline string +""") +``` + +And: + +```python +MULTILINE = """ +foobar +""".replace( + "\n", "" +) +``` + +to: + +```python +MULTILINE = """ +foobar +""".replace("\n", "") +``` diff --git a/src/black/linegen.py b/src/black/linegen.py index f7d3655..95d5583 100644 --- a/src/black/linegen.py +++ b/src/black/linegen.py @@ -2,7 +2,7 @@ Generating lines of code. """ import sys -from dataclasses import dataclass +from dataclasses import dataclass, replace from enum import Enum, auto from functools import partial, wraps from typing import Collection, Iterator, List, Optional, Set, Union, cast @@ -505,7 +505,7 @@ def transform_line( and not line.should_split_rhs and not line.magic_trailing_comma and ( - is_line_short_enough(line, line_length=mode.line_length, line_str=line_str) + is_line_short_enough(line, mode=mode, line_str=line_str) or line.contains_unsplittable_type_ignore() ) and not (line.inside_brackets and line.contains_standalone_comments()) @@ -529,14 +529,12 @@ def transform_line( bracket pair instead. """ for omit in generate_trailers_to_omit(line, mode.line_length): - lines = list( - right_hand_split(line, mode.line_length, features, omit=omit) - ) + lines = list(right_hand_split(line, mode, features, omit=omit)) # Note: this check is only able to figure out if the first line of the # *current* transformation fits in the line length. This is true only # for simple cases. All others require running more transforms via # `transform_line()`. This check doesn't know if those would succeed. - if is_line_short_enough(lines[0], line_length=mode.line_length): + if is_line_short_enough(lines[0], mode=mode): yield from lines return @@ -544,9 +542,7 @@ def transform_line( # This mostly happens to multiline strings that are by definition # reported as not fitting a single line, as well as lines that contain # trailing commas (those have to be exploded). - yield from right_hand_split( - line, line_length=mode.line_length, features=features - ) + yield from right_hand_split(line, mode, features=features) # HACK: nested functions (like _rhs) compiled by mypyc don't retain their # __name__ attribute which is needed in `run_transformer` further down. @@ -664,7 +660,7 @@ class _RHSResult: def right_hand_split( line: Line, - line_length: int, + mode: Mode, features: Collection[Feature] = (), omit: Collection[LeafID] = (), ) -> Iterator[Line]: @@ -678,7 +674,7 @@ def right_hand_split( """ rhs_result = _first_right_hand_split(line, omit=omit) yield from _maybe_split_omitting_optional_parens( - rhs_result, line, line_length, features=features, omit=omit + rhs_result, line, mode, features=features, omit=omit ) @@ -733,7 +729,7 @@ def _first_right_hand_split( def _maybe_split_omitting_optional_parens( rhs: _RHSResult, line: Line, - line_length: int, + mode: Mode, features: Collection[Feature] = (), omit: Collection[LeafID] = (), ) -> Iterator[Line]: @@ -751,7 +747,7 @@ def _maybe_split_omitting_optional_parens( # there are no standalone comments in the body and not rhs.body.contains_standalone_comments(0) # and we can actually remove the parens - and can_omit_invisible_parens(rhs.body, line_length) + and can_omit_invisible_parens(rhs.body, mode.line_length) ): omit = {id(rhs.closing_bracket), *omit} try: @@ -766,23 +762,24 @@ def _maybe_split_omitting_optional_parens( and any(leaf.type in BRACKETS for leaf in rhs.head.leaves[:-1]) # the left side of assignment is short enough (the -1 is for the ending # optional paren) - and is_line_short_enough(rhs.head, line_length=line_length - 1) + and is_line_short_enough( + rhs.head, mode=replace(mode, line_length=mode.line_length - 1) + ) # the left side of assignment won't explode further because of magic # trailing comma and rhs.head.magic_trailing_comma is None # the split by omitting optional parens isn't preferred by some other # reason - and not _prefer_split_rhs_oop(rhs_oop, line_length=line_length) + and not _prefer_split_rhs_oop(rhs_oop, mode) ): yield from _maybe_split_omitting_optional_parens( - rhs_oop, line, line_length, features=features, omit=omit + rhs_oop, line, mode, features=features, omit=omit ) return except CannotSplit as e: if not ( - can_be_split(rhs.body) - or is_line_short_enough(rhs.body, line_length=line_length) + can_be_split(rhs.body) or is_line_short_enough(rhs.body, mode=mode) ): raise CannotSplit( "Splitting failed, body is still too long and can't be split." @@ -806,7 +803,7 @@ def _maybe_split_omitting_optional_parens( yield result -def _prefer_split_rhs_oop(rhs_oop: _RHSResult, line_length: int) -> bool: +def _prefer_split_rhs_oop(rhs_oop: _RHSResult, mode: Mode) -> bool: """ Returns whether we should prefer the result from a split omitting optional parens. """ @@ -826,7 +823,7 @@ def _prefer_split_rhs_oop(rhs_oop: _RHSResult, line_length: int) -> bool: # the first line still contains the `=`) any(leaf.type == token.EQUAL for leaf in rhs_oop.head.leaves) # the first line is short enough - and is_line_short_enough(rhs_oop.head, line_length=line_length) + and is_line_short_enough(rhs_oop.head, mode=mode) ) # contains unsplittable type ignore or rhs_oop.head.contains_unsplittable_type_ignore() @@ -1525,7 +1522,7 @@ def run_transformer( or line.contains_multiline_strings() or result[0].contains_uncollapsable_type_comments() or result[0].contains_unsplittable_type_ignore() - or is_line_short_enough(result[0], line_length=mode.line_length) + or is_line_short_enough(result[0], mode=mode) # If any leaves have no parents (which _can_ occur since # `transform(line)` potentially destroys the line's underlying node # structure), then we can't proceed. Doing so would cause the below @@ -1540,8 +1537,6 @@ def run_transformer( second_opinion = run_transformer( line_copy, transform, mode, features_fop, line_str=line_str ) - if all( - is_line_short_enough(ln, line_length=mode.line_length) for ln in second_opinion - ): + if all(is_line_short_enough(ln, mode=mode) for ln in second_opinion): result = second_opinion return result diff --git a/src/black/lines.py b/src/black/lines.py index ec6ef5d..b656048 100644 --- a/src/black/lines.py +++ b/src/black/lines.py @@ -1,4 +1,5 @@ import itertools +import math import sys from dataclasses import dataclass, field from typing import ( @@ -10,11 +11,12 @@ from typing import ( Sequence, Tuple, TypeVar, + Union, cast, ) from black.brackets import DOT_PRIORITY, BracketTracker -from black.mode import Mode +from black.mode import Mode, Preview from black.nodes import ( BRACKETS, CLOSING_BRACKETS, @@ -37,6 +39,7 @@ from blib2to3.pytree import Leaf, Node T = TypeVar("T") Index = int LeafID = int +LN = Union[Leaf, Node] @dataclass @@ -701,18 +704,93 @@ def append_leaves( new_line.append(comment_leaf, preformatted=True) -def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool: - """Return True if `line` is no longer than `line_length`. - +def is_line_short_enough( # noqa: C901 + line: Line, *, mode: Mode, line_str: str = "" +) -> bool: + """For non-multiline strings, return True if `line` is no longer than `line_length`. + For multiline strings, looks at the context around `line` to determine + if it should be inlined or split up. Uses the provided `line_str` rendering, if any, otherwise computes a new one. """ if not line_str: line_str = line_to_string(line) - return ( - len(line_str) <= line_length - and "\n" not in line_str # multiline strings - and not line.contains_standalone_comments() - ) + + if Preview.multiline_string_handling not in mode: + return ( + len(line_str) <= mode.line_length + and "\n" not in line_str # multiline strings + and not line.contains_standalone_comments() + ) + + if line.contains_standalone_comments(): + return False + if "\n" not in line_str: + # No multiline strings (MLS) present + return len(line_str) <= mode.line_length + + first, *_, last = line_str.split("\n") + if len(first) > mode.line_length or len(last) > mode.line_length: + return False + + # Traverse the AST to examine the context of the multiline string (MLS), + # tracking aspects such as depth and comma existence, + # to determine whether to split the MLS or keep it together. + # Depth (which is based on the existing bracket_depth concept) + # is needed to determine nesting level of the MLS. + # Includes special case for trailing commas. + commas: List[int] = [] # tracks number of commas per depth level + multiline_string: Optional[Leaf] = None + # store the leaves that contain parts of the MLS + multiline_string_contexts: List[LN] = [] + + max_level_to_update = math.inf # track the depth of the MLS + for i, leaf in enumerate(line.leaves): + if max_level_to_update == math.inf: + had_comma: Optional[int] = None + if leaf.bracket_depth + 1 > len(commas): + commas.append(0) + elif leaf.bracket_depth + 1 < len(commas): + had_comma = commas.pop() + if ( + had_comma is not None + and multiline_string is not None + and multiline_string.bracket_depth == leaf.bracket_depth + 1 + ): + # Have left the level with the MLS, stop tracking commas + max_level_to_update = leaf.bracket_depth + if had_comma > 0: + # MLS was in parens with at least one comma - force split + return False + + if leaf.bracket_depth <= max_level_to_update and leaf.type == token.COMMA: + # Ignore non-nested trailing comma + # directly after MLS/MLS-containing expression + ignore_ctxs: List[Optional[LN]] = [None] + ignore_ctxs += multiline_string_contexts + if not (leaf.prev_sibling in ignore_ctxs and i == len(line.leaves) - 1): + commas[leaf.bracket_depth] += 1 + if max_level_to_update != math.inf: + max_level_to_update = min(max_level_to_update, leaf.bracket_depth) + + if is_multiline_string(leaf): + if len(multiline_string_contexts) > 0: + # >1 multiline string cannot fit on a single line - force split + return False + multiline_string = leaf + ctx: LN = leaf + # fetch the leaf components of the MLS in the AST + while str(ctx) in line_str: + multiline_string_contexts.append(ctx) + if ctx.parent is None: + break + ctx = ctx.parent + + # May not have a triple-quoted multiline string at all, + # in case of a regular string with embedded newlines and line continuations + if len(multiline_string_contexts) == 0: + return True + + return all(val == 0 for val in commas) def can_be_split(line: Line) -> bool: diff --git a/src/black/mode.py b/src/black/mode.py index c9a4c2b..d703889 100644 --- a/src/black/mode.py +++ b/src/black/mode.py @@ -155,6 +155,7 @@ class Preview(Enum): add_trailing_comma_consistently = auto() hex_codes_in_unicode_sequences = auto() + multiline_string_handling = auto() prefer_splitting_right_hand_side_of_assignments = auto() # NOTE: string_processing requires wrap_long_dict_values_in_parens # for https://github.com/psf/black/issues/3117 to be fixed. diff --git a/tests/data/preview/multiline_strings.py b/tests/data/preview/multiline_strings.py new file mode 100644 index 0000000..bb517d1 --- /dev/null +++ b/tests/data/preview/multiline_strings.py @@ -0,0 +1,358 @@ +"""cow +say""", +call(3, "dogsay", textwrap.dedent("""dove + coo""" % "cowabunga")) +call(3, "dogsay", textwrap.dedent("""dove +coo""" % "cowabunga")) +call(3, textwrap.dedent("""cow + moo""" % "cowabunga"), "dogsay") +call(3, "dogsay", textwrap.dedent("""crow + caw""" % "cowabunga"),) +call(3, textwrap.dedent("""cat + meow""" % "cowabunga"), {"dog", "say"}) +call(3, {"dog", "say"}, textwrap.dedent("""horse + neigh""" % "cowabunga")) +call(3, {"dog", "say"}, textwrap.dedent("""pig + oink""" % "cowabunga"),) +textwrap.dedent("""A one-line triple-quoted string.""") +textwrap.dedent("""A two-line triple-quoted string +since it goes to the next line.""") +textwrap.dedent("""A three-line triple-quoted string +that not only goes to the next line +but also goes one line beyond.""") +textwrap.dedent("""\ + A triple-quoted string + actually leveraging the textwrap.dedent functionality + that ends in a trailing newline, + representing e.g. file contents. +""") +path.write_text(textwrap.dedent("""\ + A triple-quoted string + actually leveraging the textwrap.dedent functionality + that ends in a trailing newline, + representing e.g. file contents. +""")) +path.write_text(textwrap.dedent("""\ + A triple-quoted string + actually leveraging the textwrap.dedent functionality + that ends in a trailing newline, + representing e.g. {config_filename} file contents. +""".format("config_filename", config_filename))) +# Another use case +data = yaml.load("""\ +a: 1 +b: 2 +""") +data = yaml.load("""\ +a: 1 +b: 2 +""",) +data = yaml.load( + """\ + a: 1 + b: 2 +""" +) + +MULTILINE = """ +foo +""".replace("\n", "") +generated_readme = lambda project_name: """ +{} + + +""".strip().format(project_name) +parser.usage += """ +Custom extra help summary. + +Extra test: +- with +- bullets +""" + + +def get_stuff(cr, value): + # original + cr.execute(""" + SELECT whatever + FROM some_table t + WHERE id = %s + """, [value]) + return cr.fetchone() + + +def get_stuff(cr, value): + # preferred + cr.execute( + """ + SELECT whatever + FROM some_table t + WHERE id = %s + """, + [value], + ) + return cr.fetchone() + + +call(arg1, arg2, """ +short +""", arg3=True) +test_vectors = [ + "one-liner\n", + "two\nliner\n", + """expressed +as a three line +mulitline string""", +] + +_wat = re.compile( + r""" + regex + """, + re.MULTILINE | re.VERBOSE, +) +dis_c_instance_method = """\ +%3d 0 LOAD_FAST 1 (x) + 2 LOAD_CONST 1 (1) + 4 COMPARE_OP 2 (==) + 6 LOAD_FAST 0 (self) + 8 STORE_ATTR 0 (x) + 10 LOAD_CONST 0 (None) + 12 RETURN_VALUE +""" % (_C.__init__.__code__.co_firstlineno + 1,) +path.write_text(textwrap.dedent("""\ + A triple-quoted string + actually {verb} the textwrap.dedent functionality + that ends in a trailing newline, + representing e.g. {file_type} file contents. +""".format(verb="using", file_type="test"))) +{"""cow +moos"""} +["""cow +moos"""] +["""cow +moos""", """dog +woofs +and +barks"""] +def dastardly_default_value( + cow: String = json.loads("""this +is +quite +the +dastadardly +value!"""), + **kwargs, +): + pass + +print(f""" + This {animal} + moos and barks +{animal} say +""") +msg = f"""The arguments {bad_arguments} were passed in. +Please use `--build-option` instead, +`--global-option` is reserved to flags like `--verbose` or `--quiet`. +""" + +# output +"""cow +say""", +call( + 3, + "dogsay", + textwrap.dedent("""dove + coo""" % "cowabunga"), +) +call( + 3, + "dogsay", + textwrap.dedent("""dove +coo""" % "cowabunga"), +) +call( + 3, + textwrap.dedent("""cow + moo""" % "cowabunga"), + "dogsay", +) +call( + 3, + "dogsay", + textwrap.dedent("""crow + caw""" % "cowabunga"), +) +call( + 3, + textwrap.dedent("""cat + meow""" % "cowabunga"), + {"dog", "say"}, +) +call( + 3, + {"dog", "say"}, + textwrap.dedent("""horse + neigh""" % "cowabunga"), +) +call( + 3, + {"dog", "say"}, + textwrap.dedent("""pig + oink""" % "cowabunga"), +) +textwrap.dedent("""A one-line triple-quoted string.""") +textwrap.dedent("""A two-line triple-quoted string +since it goes to the next line.""") +textwrap.dedent("""A three-line triple-quoted string +that not only goes to the next line +but also goes one line beyond.""") +textwrap.dedent("""\ + A triple-quoted string + actually leveraging the textwrap.dedent functionality + that ends in a trailing newline, + representing e.g. file contents. +""") +path.write_text(textwrap.dedent("""\ + A triple-quoted string + actually leveraging the textwrap.dedent functionality + that ends in a trailing newline, + representing e.g. file contents. +""")) +path.write_text(textwrap.dedent("""\ + A triple-quoted string + actually leveraging the textwrap.dedent functionality + that ends in a trailing newline, + representing e.g. {config_filename} file contents. +""".format("config_filename", config_filename))) +# Another use case +data = yaml.load("""\ +a: 1 +b: 2 +""") +data = yaml.load( + """\ +a: 1 +b: 2 +""", +) +data = yaml.load("""\ + a: 1 + b: 2 +""") + +MULTILINE = """ +foo +""".replace("\n", "") +generated_readme = lambda project_name: """ +{} + + +""".strip().format(project_name) +parser.usage += """ +Custom extra help summary. + +Extra test: +- with +- bullets +""" + + +def get_stuff(cr, value): + # original + cr.execute( + """ + SELECT whatever + FROM some_table t + WHERE id = %s + """, + [value], + ) + return cr.fetchone() + + +def get_stuff(cr, value): + # preferred + cr.execute( + """ + SELECT whatever + FROM some_table t + WHERE id = %s + """, + [value], + ) + return cr.fetchone() + + +call( + arg1, + arg2, + """ +short +""", + arg3=True, +) +test_vectors = [ + "one-liner\n", + "two\nliner\n", + """expressed +as a three line +mulitline string""", +] + +_wat = re.compile( + r""" + regex + """, + re.MULTILINE | re.VERBOSE, +) +dis_c_instance_method = """\ +%3d 0 LOAD_FAST 1 (x) + 2 LOAD_CONST 1 (1) + 4 COMPARE_OP 2 (==) + 6 LOAD_FAST 0 (self) + 8 STORE_ATTR 0 (x) + 10 LOAD_CONST 0 (None) + 12 RETURN_VALUE +""" % (_C.__init__.__code__.co_firstlineno + 1,) +path.write_text(textwrap.dedent("""\ + A triple-quoted string + actually {verb} the textwrap.dedent functionality + that ends in a trailing newline, + representing e.g. {file_type} file contents. +""".format(verb="using", file_type="test"))) +{"""cow +moos"""} +["""cow +moos"""] +[ + """cow +moos""", + """dog +woofs +and +barks""", +] + + +def dastardly_default_value( + cow: String = json.loads("""this +is +quite +the +dastadardly +value!"""), + **kwargs, +): + pass + + +print(f""" + This {animal} + moos and barks +{animal} say +""") +msg = f"""The arguments {bad_arguments} were passed in. +Please use `--build-option` instead, +`--global-option` is reserved to flags like `--verbose` or `--quiet`. +""" -- 2.39.2