]> git.madduck.net Git - etc/vim.git/commitdiff

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Improve multiline string handling (#1879)
authorAneesh Agrawal <aneeshusa@gmail.com>
Tue, 7 Mar 2023 19:52:19 +0000 (14:52 -0500)
committerGitHub <noreply@github.com>
Tue, 7 Mar 2023 19:52:19 +0000 (11:52 -0800)
Co-authored-by: Olivia Hong <ohong@lyft.com>
Co-authored-by: Olivia Hong <24500729+olivia-hong@users.noreply.github.com>
CHANGES.md
docs/the_black_code_style/future_style.md
src/black/linegen.py
src/black/lines.py
src/black/mode.py
tests/data/preview/multiline_strings.py [new file with mode: 0644]

index a8b556cb7e8692c241f4566e7e5c4bf08d5cafd5..53682df2b398f5db1cc5f32ec027be4ff8e9734c 100644 (file)
@@ -133,6 +133,7 @@ versions separately.
   code. Implicitly concatenated f-strings with different quotes can now be merged or
   quote-normalized by changing the quotes used in expressions. (#3509)
 - Fix crash on `await (yield)` when Black is compiled with mypyc (#3533)
+- Improve handling of multiline strings by changing line split behavior (#1879)
 
 ### Configuration
 
index b2cdca2590dab86ca015c2ad01a5324f29a8033c..96abc99ef41335cefd7d88fab8245156f0d2ad64 100644 (file)
@@ -111,3 +111,51 @@ my_dict = {
     "another key": short_value,
 }
 ```
+
+### Improved multiline string handling
+
+_Black_ is smarter when formatting multiline strings, especially in function arguments,
+to avoid introducing extra line breaks. Previously, it would always consider multiline
+strings as not fitting on a single line. With this new feature, _Black_ looks at the
+context around the multiline string to decide if it should be inlined or split to a
+separate line. For example, when a multiline string is passed to a function, _Black_
+will only split the multiline string if a line is too long or if multiple arguments are
+being passed.
+
+For example, _Black_ will reformat
+
+```python
+textwrap.dedent(
+    """\
+    This is a
+    multiline string
+"""
+)
+```
+
+to:
+
+```python
+textwrap.dedent("""\
+    This is a
+    multiline string
+""")
+```
+
+And:
+
+```python
+MULTILINE = """
+foobar
+""".replace(
+    "\n", ""
+)
+```
+
+to:
+
+```python
+MULTILINE = """
+foobar
+""".replace("\n", "")
+```
index f7d3655e962bbb5cbdf2d27f894216aafd351dac..95d5583c5f53fd1ab97287c78a41cc90d3d8c33d 100644 (file)
@@ -2,7 +2,7 @@
 Generating lines of code.
 """
 import sys
-from dataclasses import dataclass
+from dataclasses import dataclass, replace
 from enum import Enum, auto
 from functools import partial, wraps
 from typing import Collection, Iterator, List, Optional, Set, Union, cast
@@ -505,7 +505,7 @@ def transform_line(
         and not line.should_split_rhs
         and not line.magic_trailing_comma
         and (
-            is_line_short_enough(line, line_length=mode.line_length, line_str=line_str)
+            is_line_short_enough(line, mode=mode, line_str=line_str)
             or line.contains_unsplittable_type_ignore()
         )
         and not (line.inside_brackets and line.contains_standalone_comments())
@@ -529,14 +529,12 @@ def transform_line(
             bracket pair instead.
             """
             for omit in generate_trailers_to_omit(line, mode.line_length):
-                lines = list(
-                    right_hand_split(line, mode.line_length, features, omit=omit)
-                )
+                lines = list(right_hand_split(line, mode, features, omit=omit))
                 # Note: this check is only able to figure out if the first line of the
                 # *current* transformation fits in the line length.  This is true only
                 # for simple cases.  All others require running more transforms via
                 # `transform_line()`.  This check doesn't know if those would succeed.
-                if is_line_short_enough(lines[0], line_length=mode.line_length):
+                if is_line_short_enough(lines[0], mode=mode):
                     yield from lines
                     return
 
@@ -544,9 +542,7 @@ def transform_line(
             # This mostly happens to multiline strings that are by definition
             # reported as not fitting a single line, as well as lines that contain
             # trailing commas (those have to be exploded).
-            yield from right_hand_split(
-                line, line_length=mode.line_length, features=features
-            )
+            yield from right_hand_split(line, mode, features=features)
 
         # HACK: nested functions (like _rhs) compiled by mypyc don't retain their
         # __name__ attribute which is needed in `run_transformer` further down.
@@ -664,7 +660,7 @@ class _RHSResult:
 
 def right_hand_split(
     line: Line,
-    line_length: int,
+    mode: Mode,
     features: Collection[Feature] = (),
     omit: Collection[LeafID] = (),
 ) -> Iterator[Line]:
@@ -678,7 +674,7 @@ def right_hand_split(
     """
     rhs_result = _first_right_hand_split(line, omit=omit)
     yield from _maybe_split_omitting_optional_parens(
-        rhs_result, line, line_length, features=features, omit=omit
+        rhs_result, line, mode, features=features, omit=omit
     )
 
 
@@ -733,7 +729,7 @@ def _first_right_hand_split(
 def _maybe_split_omitting_optional_parens(
     rhs: _RHSResult,
     line: Line,
-    line_length: int,
+    mode: Mode,
     features: Collection[Feature] = (),
     omit: Collection[LeafID] = (),
 ) -> Iterator[Line]:
@@ -751,7 +747,7 @@ def _maybe_split_omitting_optional_parens(
         # there are no standalone comments in the body
         and not rhs.body.contains_standalone_comments(0)
         # and we can actually remove the parens
-        and can_omit_invisible_parens(rhs.body, line_length)
+        and can_omit_invisible_parens(rhs.body, mode.line_length)
     ):
         omit = {id(rhs.closing_bracket), *omit}
         try:
@@ -766,23 +762,24 @@ def _maybe_split_omitting_optional_parens(
                 and any(leaf.type in BRACKETS for leaf in rhs.head.leaves[:-1])
                 # the left side of assignment is short enough (the -1 is for the ending
                 # optional paren)
-                and is_line_short_enough(rhs.head, line_length=line_length - 1)
+                and is_line_short_enough(
+                    rhs.head, mode=replace(mode, line_length=mode.line_length - 1)
+                )
                 # the left side of assignment won't explode further because of magic
                 # trailing comma
                 and rhs.head.magic_trailing_comma is None
                 # the split by omitting optional parens isn't preferred by some other
                 # reason
-                and not _prefer_split_rhs_oop(rhs_oop, line_length=line_length)
+                and not _prefer_split_rhs_oop(rhs_oop, mode)
             ):
                 yield from _maybe_split_omitting_optional_parens(
-                    rhs_oop, line, line_length, features=features, omit=omit
+                    rhs_oop, line, mode, features=features, omit=omit
                 )
                 return
 
         except CannotSplit as e:
             if not (
-                can_be_split(rhs.body)
-                or is_line_short_enough(rhs.body, line_length=line_length)
+                can_be_split(rhs.body) or is_line_short_enough(rhs.body, mode=mode)
             ):
                 raise CannotSplit(
                     "Splitting failed, body is still too long and can't be split."
@@ -806,7 +803,7 @@ def _maybe_split_omitting_optional_parens(
             yield result
 
 
-def _prefer_split_rhs_oop(rhs_oop: _RHSResult, line_length: int) -> bool:
+def _prefer_split_rhs_oop(rhs_oop: _RHSResult, mode: Mode) -> bool:
     """
     Returns whether we should prefer the result from a split omitting optional parens.
     """
@@ -826,7 +823,7 @@ def _prefer_split_rhs_oop(rhs_oop: _RHSResult, line_length: int) -> bool:
             # the first line still contains the `=`)
             any(leaf.type == token.EQUAL for leaf in rhs_oop.head.leaves)
             # the first line is short enough
-            and is_line_short_enough(rhs_oop.head, line_length=line_length)
+            and is_line_short_enough(rhs_oop.head, mode=mode)
         )
         # contains unsplittable type ignore
         or rhs_oop.head.contains_unsplittable_type_ignore()
@@ -1525,7 +1522,7 @@ def run_transformer(
         or line.contains_multiline_strings()
         or result[0].contains_uncollapsable_type_comments()
         or result[0].contains_unsplittable_type_ignore()
-        or is_line_short_enough(result[0], line_length=mode.line_length)
+        or is_line_short_enough(result[0], mode=mode)
         # If any leaves have no parents (which _can_ occur since
         # `transform(line)` potentially destroys the line's underlying node
         # structure), then we can't proceed. Doing so would cause the below
@@ -1540,8 +1537,6 @@ def run_transformer(
     second_opinion = run_transformer(
         line_copy, transform, mode, features_fop, line_str=line_str
     )
-    if all(
-        is_line_short_enough(ln, line_length=mode.line_length) for ln in second_opinion
-    ):
+    if all(is_line_short_enough(ln, mode=mode) for ln in second_opinion):
         result = second_opinion
     return result
index ec6ef5d952290ad153d4db27b6a99ff0c709ebe8..b65604864a4b73fabb69f893e13b99c39806ea93 100644 (file)
@@ -1,4 +1,5 @@
 import itertools
+import math
 import sys
 from dataclasses import dataclass, field
 from typing import (
@@ -10,11 +11,12 @@ from typing import (
     Sequence,
     Tuple,
     TypeVar,
+    Union,
     cast,
 )
 
 from black.brackets import DOT_PRIORITY, BracketTracker
-from black.mode import Mode
+from black.mode import Mode, Preview
 from black.nodes import (
     BRACKETS,
     CLOSING_BRACKETS,
@@ -37,6 +39,7 @@ from blib2to3.pytree import Leaf, Node
 T = TypeVar("T")
 Index = int
 LeafID = int
+LN = Union[Leaf, Node]
 
 
 @dataclass
@@ -701,18 +704,93 @@ def append_leaves(
             new_line.append(comment_leaf, preformatted=True)
 
 
-def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
-    """Return True if `line` is no longer than `line_length`.
-
+def is_line_short_enough(  # noqa: C901
+    line: Line, *, mode: Mode, line_str: str = ""
+) -> bool:
+    """For non-multiline strings, return True if `line` is no longer than `line_length`.
+    For multiline strings, looks at the context around `line` to determine
+    if it should be inlined or split up.
     Uses the provided `line_str` rendering, if any, otherwise computes a new one.
     """
     if not line_str:
         line_str = line_to_string(line)
-    return (
-        len(line_str) <= line_length
-        and "\n" not in line_str  # multiline strings
-        and not line.contains_standalone_comments()
-    )
+
+    if Preview.multiline_string_handling not in mode:
+        return (
+            len(line_str) <= mode.line_length
+            and "\n" not in line_str  # multiline strings
+            and not line.contains_standalone_comments()
+        )
+
+    if line.contains_standalone_comments():
+        return False
+    if "\n" not in line_str:
+        # No multiline strings (MLS) present
+        return len(line_str) <= mode.line_length
+
+    first, *_, last = line_str.split("\n")
+    if len(first) > mode.line_length or len(last) > mode.line_length:
+        return False
+
+    # Traverse the AST to examine the context of the multiline string (MLS),
+    # tracking aspects such as depth and comma existence,
+    # to determine whether to split the MLS or keep it together.
+    # Depth (which is based on the existing bracket_depth concept)
+    # is needed to determine nesting level of the MLS.
+    # Includes special case for trailing commas.
+    commas: List[int] = []  # tracks number of commas per depth level
+    multiline_string: Optional[Leaf] = None
+    # store the leaves that contain parts of the MLS
+    multiline_string_contexts: List[LN] = []
+
+    max_level_to_update = math.inf  # track the depth of the MLS
+    for i, leaf in enumerate(line.leaves):
+        if max_level_to_update == math.inf:
+            had_comma: Optional[int] = None
+            if leaf.bracket_depth + 1 > len(commas):
+                commas.append(0)
+            elif leaf.bracket_depth + 1 < len(commas):
+                had_comma = commas.pop()
+            if (
+                had_comma is not None
+                and multiline_string is not None
+                and multiline_string.bracket_depth == leaf.bracket_depth + 1
+            ):
+                # Have left the level with the MLS, stop tracking commas
+                max_level_to_update = leaf.bracket_depth
+                if had_comma > 0:
+                    # MLS was in parens with at least one comma - force split
+                    return False
+
+        if leaf.bracket_depth <= max_level_to_update and leaf.type == token.COMMA:
+            # Ignore non-nested trailing comma
+            # directly after MLS/MLS-containing expression
+            ignore_ctxs: List[Optional[LN]] = [None]
+            ignore_ctxs += multiline_string_contexts
+            if not (leaf.prev_sibling in ignore_ctxs and i == len(line.leaves) - 1):
+                commas[leaf.bracket_depth] += 1
+        if max_level_to_update != math.inf:
+            max_level_to_update = min(max_level_to_update, leaf.bracket_depth)
+
+        if is_multiline_string(leaf):
+            if len(multiline_string_contexts) > 0:
+                # >1 multiline string cannot fit on a single line - force split
+                return False
+            multiline_string = leaf
+            ctx: LN = leaf
+            # fetch the leaf components of the MLS in the AST
+            while str(ctx) in line_str:
+                multiline_string_contexts.append(ctx)
+                if ctx.parent is None:
+                    break
+                ctx = ctx.parent
+
+    # May not have a triple-quoted multiline string at all,
+    # in case of a regular string with embedded newlines and line continuations
+    if len(multiline_string_contexts) == 0:
+        return True
+
+    return all(val == 0 for val in commas)
 
 
 def can_be_split(line: Line) -> bool:
index c9a4c2b080c7c8d2b7159ecbde403fc188e1abdc..d70388916dabf1053d62f20921cb83f64abd9922 100644 (file)
@@ -155,6 +155,7 @@ class Preview(Enum):
 
     add_trailing_comma_consistently = auto()
     hex_codes_in_unicode_sequences = auto()
+    multiline_string_handling = auto()
     prefer_splitting_right_hand_side_of_assignments = auto()
     # NOTE: string_processing requires wrap_long_dict_values_in_parens
     # for https://github.com/psf/black/issues/3117 to be fixed.
diff --git a/tests/data/preview/multiline_strings.py b/tests/data/preview/multiline_strings.py
new file mode 100644 (file)
index 0000000..bb517d1
--- /dev/null
@@ -0,0 +1,358 @@
+"""cow
+say""",
+call(3, "dogsay", textwrap.dedent("""dove
+    coo""" % "cowabunga"))
+call(3, "dogsay", textwrap.dedent("""dove
+coo""" % "cowabunga"))
+call(3, textwrap.dedent("""cow
+    moo""" % "cowabunga"), "dogsay")
+call(3, "dogsay", textwrap.dedent("""crow
+    caw""" % "cowabunga"),)
+call(3, textwrap.dedent("""cat
+    meow""" % "cowabunga"), {"dog", "say"})
+call(3, {"dog", "say"}, textwrap.dedent("""horse
+    neigh""" % "cowabunga"))
+call(3, {"dog", "say"}, textwrap.dedent("""pig
+    oink""" % "cowabunga"),)
+textwrap.dedent("""A one-line triple-quoted string.""")
+textwrap.dedent("""A two-line triple-quoted string
+since it goes to the next line.""")
+textwrap.dedent("""A three-line triple-quoted string
+that not only goes to the next line
+but also goes one line beyond.""")
+textwrap.dedent("""\
+    A triple-quoted string
+    actually leveraging the textwrap.dedent functionality
+    that ends in a trailing newline,
+    representing e.g. file contents.
+""")
+path.write_text(textwrap.dedent("""\
+    A triple-quoted string
+    actually leveraging the textwrap.dedent functionality
+    that ends in a trailing newline,
+    representing e.g. file contents.
+"""))
+path.write_text(textwrap.dedent("""\
+    A triple-quoted string
+    actually leveraging the textwrap.dedent functionality
+    that ends in a trailing newline,
+    representing e.g. {config_filename} file contents.
+""".format("config_filename", config_filename)))
+# Another use case
+data = yaml.load("""\
+a: 1
+b: 2
+""")
+data = yaml.load("""\
+a: 1
+b: 2
+""",)
+data = yaml.load(
+    """\
+    a: 1
+    b: 2
+"""
+)
+
+MULTILINE = """
+foo
+""".replace("\n", "")
+generated_readme = lambda project_name: """
+{}
+
+<Add content here!>
+""".strip().format(project_name)
+parser.usage += """
+Custom extra help summary.
+
+Extra test:
+- with
+- bullets
+"""
+
+
+def get_stuff(cr, value):
+    # original
+    cr.execute("""
+        SELECT whatever
+          FROM some_table t
+         WHERE id = %s
+    """, [value])
+    return cr.fetchone()
+
+
+def get_stuff(cr, value):
+    # preferred
+    cr.execute(
+        """
+        SELECT whatever
+          FROM some_table t
+         WHERE id = %s
+        """,
+        [value],
+    )
+    return cr.fetchone()
+
+
+call(arg1, arg2, """
+short
+""", arg3=True)
+test_vectors = [
+    "one-liner\n",
+    "two\nliner\n",
+    """expressed
+as a three line
+mulitline string""",
+]
+
+_wat = re.compile(
+    r"""
+    regex
+    """,
+    re.MULTILINE | re.VERBOSE,
+)
+dis_c_instance_method = """\
+%3d           0 LOAD_FAST                1 (x)
+              2 LOAD_CONST               1 (1)
+              4 COMPARE_OP               2 (==)
+              6 LOAD_FAST                0 (self)
+              8 STORE_ATTR               0 (x)
+             10 LOAD_CONST               0 (None)
+             12 RETURN_VALUE
+""" % (_C.__init__.__code__.co_firstlineno + 1,)
+path.write_text(textwrap.dedent("""\
+    A triple-quoted string
+    actually {verb} the textwrap.dedent functionality
+    that ends in a trailing newline,
+    representing e.g. {file_type} file contents.
+""".format(verb="using", file_type="test")))
+{"""cow
+moos"""}
+["""cow
+moos"""]
+["""cow
+moos""", """dog
+woofs
+and
+barks"""]
+def dastardly_default_value(
+    cow: String = json.loads("""this
+is
+quite
+the
+dastadardly
+value!"""),
+    **kwargs,
+):
+    pass
+
+print(f"""
+    This {animal}
+    moos and barks
+{animal} say
+""")
+msg = f"""The arguments {bad_arguments} were passed in.
+Please use `--build-option` instead,
+`--global-option` is reserved to flags like `--verbose` or `--quiet`.
+"""
+
+# output
+"""cow
+say""",
+call(
+    3,
+    "dogsay",
+    textwrap.dedent("""dove
+    coo""" % "cowabunga"),
+)
+call(
+    3,
+    "dogsay",
+    textwrap.dedent("""dove
+coo""" % "cowabunga"),
+)
+call(
+    3,
+    textwrap.dedent("""cow
+    moo""" % "cowabunga"),
+    "dogsay",
+)
+call(
+    3,
+    "dogsay",
+    textwrap.dedent("""crow
+    caw""" % "cowabunga"),
+)
+call(
+    3,
+    textwrap.dedent("""cat
+    meow""" % "cowabunga"),
+    {"dog", "say"},
+)
+call(
+    3,
+    {"dog", "say"},
+    textwrap.dedent("""horse
+    neigh""" % "cowabunga"),
+)
+call(
+    3,
+    {"dog", "say"},
+    textwrap.dedent("""pig
+    oink""" % "cowabunga"),
+)
+textwrap.dedent("""A one-line triple-quoted string.""")
+textwrap.dedent("""A two-line triple-quoted string
+since it goes to the next line.""")
+textwrap.dedent("""A three-line triple-quoted string
+that not only goes to the next line
+but also goes one line beyond.""")
+textwrap.dedent("""\
+    A triple-quoted string
+    actually leveraging the textwrap.dedent functionality
+    that ends in a trailing newline,
+    representing e.g. file contents.
+""")
+path.write_text(textwrap.dedent("""\
+    A triple-quoted string
+    actually leveraging the textwrap.dedent functionality
+    that ends in a trailing newline,
+    representing e.g. file contents.
+"""))
+path.write_text(textwrap.dedent("""\
+    A triple-quoted string
+    actually leveraging the textwrap.dedent functionality
+    that ends in a trailing newline,
+    representing e.g. {config_filename} file contents.
+""".format("config_filename", config_filename)))
+# Another use case
+data = yaml.load("""\
+a: 1
+b: 2
+""")
+data = yaml.load(
+    """\
+a: 1
+b: 2
+""",
+)
+data = yaml.load("""\
+    a: 1
+    b: 2
+""")
+
+MULTILINE = """
+foo
+""".replace("\n", "")
+generated_readme = lambda project_name: """
+{}
+
+<Add content here!>
+""".strip().format(project_name)
+parser.usage += """
+Custom extra help summary.
+
+Extra test:
+- with
+- bullets
+"""
+
+
+def get_stuff(cr, value):
+    # original
+    cr.execute(
+        """
+        SELECT whatever
+          FROM some_table t
+         WHERE id = %s
+    """,
+        [value],
+    )
+    return cr.fetchone()
+
+
+def get_stuff(cr, value):
+    # preferred
+    cr.execute(
+        """
+        SELECT whatever
+          FROM some_table t
+         WHERE id = %s
+        """,
+        [value],
+    )
+    return cr.fetchone()
+
+
+call(
+    arg1,
+    arg2,
+    """
+short
+""",
+    arg3=True,
+)
+test_vectors = [
+    "one-liner\n",
+    "two\nliner\n",
+    """expressed
+as a three line
+mulitline string""",
+]
+
+_wat = re.compile(
+    r"""
+    regex
+    """,
+    re.MULTILINE | re.VERBOSE,
+)
+dis_c_instance_method = """\
+%3d           0 LOAD_FAST                1 (x)
+              2 LOAD_CONST               1 (1)
+              4 COMPARE_OP               2 (==)
+              6 LOAD_FAST                0 (self)
+              8 STORE_ATTR               0 (x)
+             10 LOAD_CONST               0 (None)
+             12 RETURN_VALUE
+""" % (_C.__init__.__code__.co_firstlineno + 1,)
+path.write_text(textwrap.dedent("""\
+    A triple-quoted string
+    actually {verb} the textwrap.dedent functionality
+    that ends in a trailing newline,
+    representing e.g. {file_type} file contents.
+""".format(verb="using", file_type="test")))
+{"""cow
+moos"""}
+["""cow
+moos"""]
+[
+    """cow
+moos""",
+    """dog
+woofs
+and
+barks""",
+]
+
+
+def dastardly_default_value(
+    cow: String = json.loads("""this
+is
+quite
+the
+dastadardly
+value!"""),
+    **kwargs,
+):
+    pass
+
+
+print(f"""
+    This {animal}
+    moos and barks
+{animal} say
+""")
+msg = f"""The arguments {bad_arguments} were passed in.
+Please use `--build-option` instead,
+`--global-option` is reserved to flags like `--verbose` or `--quiet`.
+"""