All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
3 from abc import ABC, abstractmethod
4 from collections import defaultdict
5 from concurrent.futures import Executor, ProcessPoolExecutor
6 from contextlib import contextmanager
7 from datetime import datetime
9 from functools import lru_cache, partial, wraps
13 from multiprocessing import Manager, freeze_support
15 from pathlib import Path
44 from typing_extensions import Final
45 from mypy_extensions import mypyc_attr
47 from appdirs import user_cache_dir
48 from dataclasses import dataclass, field, replace
51 from typed_ast import ast3, ast27
52 from pathspec import PathSpec
55 from blib2to3.pytree import Node, Leaf, type_repr
56 from blib2to3 import pygram, pytree
57 from blib2to3.pgen2 import driver, token
58 from blib2to3.pgen2.grammar import Grammar
59 from blib2to3.pgen2.parse import ParseError
61 from _black_version import version as __version__
64 import colorama # noqa: F401
66 DEFAULT_LINE_LENGTH = 88
67 DEFAULT_EXCLUDES = r"/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist)/" # noqa: B950
68 DEFAULT_INCLUDES = r"\.pyi?$"
69 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
71 STRING_PREFIX_CHARS: Final = "furbFURB" # All possible string prefix characters.
85 LN = Union[Leaf, Node]
86 Transformer = Callable[["Line", Collection["Feature"]], Iterator["Line"]]
89 CacheInfo = Tuple[Timestamp, FileSize]
90 Cache = Dict[Path, CacheInfo]
91 out = partial(click.secho, bold=True, err=True)
92 err = partial(click.secho, fg="red", err=True)
94 pygram.initialize(CACHE_DIR)
95 syms = pygram.python_symbols
98 class NothingChanged(UserWarning):
99 """Raised when reformatted code is the same as source."""
102 class CannotTransform(Exception):
103 """Base class for errors raised by Transformers."""
106 class CannotSplit(CannotTransform):
107 """A readable split that fits the allotted line length is impossible."""
110 class InvalidInput(ValueError):
111 """Raised when input source code fails all parse attempts."""
115 E = TypeVar("E", bound=Exception)
118 class Ok(Generic[T]):
119 def __init__(self, value: T) -> None:
126 class Err(Generic[E]):
127 def __init__(self, e: E) -> None:
134 # The 'Result' return type is used to implement an error-handling model heavily
135 # influenced by that used by the Rust programming language
136 # (see https://doc.rust-lang.org/book/ch09-00-error-handling.html).
137 Result = Union[Ok[T], Err[E]]
138 TResult = Result[T, CannotTransform] # (T)ransform Result
139 TMatchResult = TResult[Index]
142 class WriteBack(Enum):
150 def from_configuration(
151 cls, *, check: bool, diff: bool, color: bool = False
153 if check and not diff:
157 return cls.COLOR_DIFF
159 return cls.DIFF if diff else cls.YES
168 class TargetVersion(Enum):
177 def is_python2(self) -> bool:
178 return self is TargetVersion.PY27
181 PY36_VERSIONS = {TargetVersion.PY36, TargetVersion.PY37, TargetVersion.PY38}
185 # All string literals are unicode
188 NUMERIC_UNDERSCORES = 3
189 TRAILING_COMMA_IN_CALL = 4
190 TRAILING_COMMA_IN_DEF = 5
191 # The following two feature-flags are mutually exclusive, and exactly one should be
192 # set for every version of python.
193 ASYNC_IDENTIFIERS = 6
195 ASSIGNMENT_EXPRESSIONS = 8
196 POS_ONLY_ARGUMENTS = 9
199 VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = {
200 TargetVersion.PY27: {Feature.ASYNC_IDENTIFIERS},
201 TargetVersion.PY33: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS},
202 TargetVersion.PY34: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS},
203 TargetVersion.PY35: {
204 Feature.UNICODE_LITERALS,
205 Feature.TRAILING_COMMA_IN_CALL,
206 Feature.ASYNC_IDENTIFIERS,
208 TargetVersion.PY36: {
209 Feature.UNICODE_LITERALS,
211 Feature.NUMERIC_UNDERSCORES,
212 Feature.TRAILING_COMMA_IN_CALL,
213 Feature.TRAILING_COMMA_IN_DEF,
214 Feature.ASYNC_IDENTIFIERS,
216 TargetVersion.PY37: {
217 Feature.UNICODE_LITERALS,
219 Feature.NUMERIC_UNDERSCORES,
220 Feature.TRAILING_COMMA_IN_CALL,
221 Feature.TRAILING_COMMA_IN_DEF,
222 Feature.ASYNC_KEYWORDS,
224 TargetVersion.PY38: {
225 Feature.UNICODE_LITERALS,
227 Feature.NUMERIC_UNDERSCORES,
228 Feature.TRAILING_COMMA_IN_CALL,
229 Feature.TRAILING_COMMA_IN_DEF,
230 Feature.ASYNC_KEYWORDS,
231 Feature.ASSIGNMENT_EXPRESSIONS,
232 Feature.POS_ONLY_ARGUMENTS,
239 target_versions: Set[TargetVersion] = field(default_factory=set)
240 line_length: int = DEFAULT_LINE_LENGTH
241 string_normalization: bool = True
244 def get_cache_key(self) -> str:
245 if self.target_versions:
246 version_str = ",".join(
248 for version in sorted(self.target_versions, key=lambda v: v.value)
254 str(self.line_length),
255 str(int(self.string_normalization)),
256 str(int(self.is_pyi)),
258 return ".".join(parts)
261 # Legacy name, left for integrations.
265 def supports_feature(target_versions: Set[TargetVersion], feature: Feature) -> bool:
266 return all(feature in VERSION_TO_FEATURES[version] for version in target_versions)
269 def find_pyproject_toml(path_search_start: str) -> Optional[str]:
270 """Find the absolute filepath to a pyproject.toml if it exists"""
271 path_project_root = find_project_root(path_search_start)
272 path_pyproject_toml = path_project_root / "pyproject.toml"
273 return str(path_pyproject_toml) if path_pyproject_toml.is_file() else None
276 def parse_pyproject_toml(path_config: str) -> Dict[str, Any]:
277 """Parse a pyproject toml file, pulling out relevant parts for Black
279 If parsing fails, will raise a toml.TomlDecodeError
281 pyproject_toml = toml.load(path_config)
282 config = pyproject_toml.get("tool", {}).get("black", {})
283 return {k.replace("--", "").replace("-", "_"): v for k, v in config.items()}
286 def read_pyproject_toml(
287 ctx: click.Context, param: click.Parameter, value: Optional[str]
289 """Inject Black configuration from "pyproject.toml" into defaults in `ctx`.
291 Returns the path to a successfully found and read configuration file, None
295 value = find_pyproject_toml(ctx.params.get("src", ()))
300 config = parse_pyproject_toml(value)
301 except (toml.TomlDecodeError, OSError) as e:
302 raise click.FileError(
303 filename=value, hint=f"Error reading configuration file: {e}"
309 target_version = config.get("target_version")
310 if target_version is not None and not isinstance(target_version, list):
311 raise click.BadOptionUsage(
312 "target-version", f"Config key target-version must be a list"
315 default_map: Dict[str, Any] = {}
317 default_map.update(ctx.default_map)
318 default_map.update(config)
320 ctx.default_map = default_map
324 def target_version_option_callback(
325 c: click.Context, p: Union[click.Option, click.Parameter], v: Tuple[str, ...]
326 ) -> List[TargetVersion]:
327 """Compute the target versions from a --target-version flag.
329 This is its own function because mypy couldn't infer the type correctly
330 when it was a lambda, causing mypyc trouble.
332 return [TargetVersion[val.upper()] for val in v]
335 @click.command(context_settings=dict(help_option_names=["-h", "--help"]))
336 @click.option("-c", "--code", type=str, help="Format the code passed in as a string.")
341 default=DEFAULT_LINE_LENGTH,
342 help="How many characters per line to allow.",
348 type=click.Choice([v.name.lower() for v in TargetVersion]),
349 callback=target_version_option_callback,
352 "Python versions that should be supported by Black's output. [default: per-file"
360 "Allow using Python 3.6-only syntax on all input files. This will put trailing"
361 " commas in function signatures and calls also after *args and **kwargs."
362 " Deprecated; use --target-version instead. [default: per-file auto-detection]"
369 "Format all input files like typing stubs regardless of file extension (useful"
370 " when piping source on standard input)."
375 "--skip-string-normalization",
377 help="Don't normalize string quotes or prefixes.",
383 "Don't write the files back, just return the status. Return code 0 means"
384 " nothing would change. Return code 1 means some files would be reformatted."
385 " Return code 123 means there was an internal error."
391 help="Don't write the files back, just output a diff for each file on stdout.",
394 "--color/--no-color",
396 help="Show colored diff. Only applies when `--diff` is given.",
401 help="If --fast given, skip temporary sanity checks. [default: --safe]",
406 default=DEFAULT_INCLUDES,
408 "A regular expression that matches files and directories that should be"
409 " included on recursive searches. An empty value means all files are included"
410 " regardless of the name. Use forward slashes for directories on all platforms"
411 " (Windows, too). Exclusions are calculated first, inclusions later."
418 default=DEFAULT_EXCLUDES,
420 "A regular expression that matches files and directories that should be"
421 " excluded on recursive searches. An empty value means no paths are excluded."
422 " Use forward slashes for directories on all platforms (Windows, too). "
423 " Exclusions are calculated first, inclusions later."
432 "Don't emit non-error messages to stderr. Errors are still emitted; silence"
433 " those with 2>/dev/null."
441 "Also emit messages to stderr about files that were not changed or were ignored"
442 " due to --exclude=."
445 @click.version_option(version=__version__)
450 exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
465 callback=read_pyproject_toml,
466 help="Read configuration from PATH.",
473 target_version: List[TargetVersion],
480 skip_string_normalization: bool,
485 src: Tuple[str, ...],
486 config: Optional[str],
488 """The uncompromising code formatter."""
489 write_back = WriteBack.from_configuration(check=check, diff=diff, color=color)
492 err("Cannot use both --target-version and --py36")
495 versions = set(target_version)
498 "--py36 is deprecated and will be removed in a future version. Use"
499 " --target-version py36 instead."
501 versions = PY36_VERSIONS
503 # We'll autodetect later.
506 target_versions=versions,
507 line_length=line_length,
509 string_normalization=not skip_string_normalization,
511 if config and verbose:
512 out(f"Using configuration from {config}.", bold=False, fg="blue")
514 print(format_str(code, mode=mode))
517 include_regex = re_compile_maybe_verbose(include)
519 err(f"Invalid regular expression for include given: {include!r}")
522 exclude_regex = re_compile_maybe_verbose(exclude)
524 err(f"Invalid regular expression for exclude given: {exclude!r}")
526 report = Report(check=check, diff=diff, quiet=quiet, verbose=verbose)
527 root = find_project_root(src)
528 sources: Set[Path] = set()
529 path_empty(src, quiet, verbose, ctx)
534 gen_python_files_in_dir(
535 p, root, include_regex, exclude_regex, report, get_gitignore(root)
538 elif p.is_file() or s == "-":
539 # if a file was explicitly given, we don't care about its extension
542 err(f"invalid path: {s}")
543 if len(sources) == 0:
544 if verbose or not quiet:
545 out("No Python files are present to be formatted. Nothing to do 😴")
548 if len(sources) == 1:
552 write_back=write_back,
558 sources=sources, fast=fast, write_back=write_back, mode=mode, report=report
561 if verbose or not quiet:
562 out("Oh no! 💥 💔 💥" if report.return_code else "All done! ✨ 🍰 ✨")
563 click.secho(str(report), err=True)
564 ctx.exit(report.return_code)
568 src: Tuple[str, ...], quiet: bool, verbose: bool, ctx: click.Context
571 Exit if there is no `src` provided for formatting
574 if verbose or not quiet:
575 out("No Path provided. Nothing to do 😴")
580 src: Path, fast: bool, write_back: WriteBack, mode: Mode, report: "Report"
582 """Reformat a single file under `src` without spawning child processes.
584 `fast`, `write_back`, and `mode` options are passed to
585 :func:`format_file_in_place` or :func:`format_stdin_to_stdout`.
589 if not src.is_file() and str(src) == "-":
590 if format_stdin_to_stdout(fast=fast, write_back=write_back, mode=mode):
591 changed = Changed.YES
594 if write_back != WriteBack.DIFF:
595 cache = read_cache(mode)
596 res_src = src.resolve()
597 if res_src in cache and cache[res_src] == get_cache_info(res_src):
598 changed = Changed.CACHED
599 if changed is not Changed.CACHED and format_file_in_place(
600 src, fast=fast, write_back=write_back, mode=mode
602 changed = Changed.YES
603 if (write_back is WriteBack.YES and changed is not Changed.CACHED) or (
604 write_back is WriteBack.CHECK and changed is Changed.NO
606 write_cache(cache, [src], mode)
607 report.done(src, changed)
608 except Exception as exc:
609 report.failed(src, str(exc))
613 sources: Set[Path], fast: bool, write_back: WriteBack, mode: Mode, report: "Report"
615 """Reformat multiple files using a ProcessPoolExecutor."""
616 loop = asyncio.get_event_loop()
617 worker_count = os.cpu_count()
618 if sys.platform == "win32":
619 # Work around https://bugs.python.org/issue26903
620 worker_count = min(worker_count, 61)
621 executor = ProcessPoolExecutor(max_workers=worker_count)
623 loop.run_until_complete(
627 write_back=write_back,
639 async def schedule_formatting(
642 write_back: WriteBack,
645 loop: asyncio.AbstractEventLoop,
648 """Run formatting of `sources` in parallel using the provided `executor`.
650 (Use ProcessPoolExecutors for actual parallelism.)
652 `write_back`, `fast`, and `mode` options are passed to
653 :func:`format_file_in_place`.
656 if write_back != WriteBack.DIFF:
657 cache = read_cache(mode)
658 sources, cached = filter_cached(cache, sources)
659 for src in sorted(cached):
660 report.done(src, Changed.CACHED)
665 sources_to_cache = []
667 if write_back == WriteBack.DIFF:
668 # For diff output, we need locks to ensure we don't interleave output
669 # from different processes.
671 lock = manager.Lock()
673 asyncio.ensure_future(
674 loop.run_in_executor(
675 executor, format_file_in_place, src, fast, mode, write_back, lock
678 for src in sorted(sources)
680 pending: Iterable["asyncio.Future[bool]"] = tasks.keys()
682 loop.add_signal_handler(signal.SIGINT, cancel, pending)
683 loop.add_signal_handler(signal.SIGTERM, cancel, pending)
684 except NotImplementedError:
685 # There are no good alternatives for these on Windows.
688 done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
690 src = tasks.pop(task)
692 cancelled.append(task)
693 elif task.exception():
694 report.failed(src, str(task.exception()))
696 changed = Changed.YES if task.result() else Changed.NO
697 # If the file was written back or was successfully checked as
698 # well-formatted, store this information in the cache.
699 if write_back is WriteBack.YES or (
700 write_back is WriteBack.CHECK and changed is Changed.NO
702 sources_to_cache.append(src)
703 report.done(src, changed)
705 await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
707 write_cache(cache, sources_to_cache, mode)
710 def format_file_in_place(
714 write_back: WriteBack = WriteBack.NO,
715 lock: Any = None, # multiprocessing.Manager().Lock() is some crazy proxy
717 """Format file under `src` path. Return True if changed.
719 If `write_back` is DIFF, write a diff to stdout. If it is YES, write reformatted
721 `mode` and `fast` options are passed to :func:`format_file_contents`.
723 if src.suffix == ".pyi":
724 mode = replace(mode, is_pyi=True)
726 then = datetime.utcfromtimestamp(src.stat().st_mtime)
727 with open(src, "rb") as buf:
728 src_contents, encoding, newline = decode_bytes(buf.read())
730 dst_contents = format_file_contents(src_contents, fast=fast, mode=mode)
731 except NothingChanged:
734 if write_back == WriteBack.YES:
735 with open(src, "w", encoding=encoding, newline=newline) as f:
736 f.write(dst_contents)
737 elif write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
738 now = datetime.utcnow()
739 src_name = f"{src}\t{then} +0000"
740 dst_name = f"{src}\t{now} +0000"
741 diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
743 if write_back == write_back.COLOR_DIFF:
744 diff_contents = color_diff(diff_contents)
746 with lock or nullcontext():
747 f = io.TextIOWrapper(
753 f = wrap_stream_for_windows(f)
754 f.write(diff_contents)
760 def color_diff(contents: str) -> str:
761 """Inject the ANSI color codes to the diff."""
762 lines = contents.split("\n")
763 for i, line in enumerate(lines):
764 if line.startswith("+++") or line.startswith("---"):
765 line = "\033[1;37m" + line + "\033[0m" # bold white, reset
766 if line.startswith("@@"):
767 line = "\033[36m" + line + "\033[0m" # cyan, reset
768 if line.startswith("+"):
769 line = "\033[32m" + line + "\033[0m" # green, reset
770 elif line.startswith("-"):
771 line = "\033[31m" + line + "\033[0m" # red, reset
773 return "\n".join(lines)
776 def wrap_stream_for_windows(
778 ) -> Union[io.TextIOWrapper, "colorama.AnsiToWin32.AnsiToWin32"]:
780 Wrap the stream in colorama's wrap_stream so colors are shown on Windows.
782 If `colorama` is not found, then no change is made. If `colorama` does
783 exist, then it handles the logic to determine whether or not to change
787 from colorama import initialise
789 # We set `strip=False` so that we can don't have to modify
790 # test_express_diff_with_color.
791 f = initialise.wrap_stream(
792 f, convert=None, strip=False, autoreset=False, wrap=True
795 # wrap_stream returns a `colorama.AnsiToWin32.AnsiToWin32` object
796 # which does not have a `detach()` method. So we fake one.
797 f.detach = lambda *args, **kwargs: None # type: ignore
804 def format_stdin_to_stdout(
805 fast: bool, *, write_back: WriteBack = WriteBack.NO, mode: Mode
807 """Format file on stdin. Return True if changed.
809 If `write_back` is YES, write reformatted code back to stdout. If it is DIFF,
810 write a diff to stdout. The `mode` argument is passed to
811 :func:`format_file_contents`.
813 then = datetime.utcnow()
814 src, encoding, newline = decode_bytes(sys.stdin.buffer.read())
817 dst = format_file_contents(src, fast=fast, mode=mode)
820 except NothingChanged:
824 f = io.TextIOWrapper(
825 sys.stdout.buffer, encoding=encoding, newline=newline, write_through=True
827 if write_back == WriteBack.YES:
829 elif write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
830 now = datetime.utcnow()
831 src_name = f"STDIN\t{then} +0000"
832 dst_name = f"STDOUT\t{now} +0000"
833 d = diff(src, dst, src_name, dst_name)
834 if write_back == WriteBack.COLOR_DIFF:
836 f = wrap_stream_for_windows(f)
841 def format_file_contents(src_contents: str, *, fast: bool, mode: Mode) -> FileContent:
842 """Reformat contents a file and return new contents.
844 If `fast` is False, additionally confirm that the reformatted code is
845 valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
846 `mode` is passed to :func:`format_str`.
848 if src_contents.strip() == "":
851 dst_contents = format_str(src_contents, mode=mode)
852 if src_contents == dst_contents:
856 assert_equivalent(src_contents, dst_contents)
857 assert_stable(src_contents, dst_contents, mode=mode)
861 def format_str(src_contents: str, *, mode: Mode) -> FileContent:
862 """Reformat a string and return new contents.
864 `mode` determines formatting options, such as how many characters per line are
868 >>> print(black.format_str("def f(arg:str='')->None:...", mode=Mode()))
869 def f(arg: str = "") -> None:
872 A more complex example:
874 ... black.format_str(
875 ... "def f(arg:str='')->None: hey",
877 ... target_versions={black.TargetVersion.PY36},
879 ... string_normalization=False,
890 src_node = lib2to3_parse(src_contents.lstrip(), mode.target_versions)
892 future_imports = get_future_imports(src_node)
893 if mode.target_versions:
894 versions = mode.target_versions
896 versions = detect_target_versions(src_node)
897 normalize_fmt_off(src_node)
898 lines = LineGenerator(
899 remove_u_prefix="unicode_literals" in future_imports
900 or supports_feature(versions, Feature.UNICODE_LITERALS),
902 normalize_strings=mode.string_normalization,
904 elt = EmptyLineTracker(is_pyi=mode.is_pyi)
907 split_line_features = {
909 for feature in {Feature.TRAILING_COMMA_IN_CALL, Feature.TRAILING_COMMA_IN_DEF}
910 if supports_feature(versions, feature)
912 for current_line in lines.visit(src_node):
913 dst_contents.append(str(empty_line) * after)
914 before, after = elt.maybe_empty_lines(current_line)
915 dst_contents.append(str(empty_line) * before)
916 for line in transform_line(
918 line_length=mode.line_length,
919 normalize_strings=mode.string_normalization,
920 features=split_line_features,
922 dst_contents.append(str(line))
923 return "".join(dst_contents)
926 def decode_bytes(src: bytes) -> Tuple[FileContent, Encoding, NewLine]:
927 """Return a tuple of (decoded_contents, encoding, newline).
929 `newline` is either CRLF or LF but `decoded_contents` is decoded with
930 universal newlines (i.e. only contains LF).
932 srcbuf = io.BytesIO(src)
933 encoding, lines = tokenize.detect_encoding(srcbuf.readline)
935 return "", encoding, "\n"
937 newline = "\r\n" if b"\r\n" == lines[0][-2:] else "\n"
939 with io.TextIOWrapper(srcbuf, encoding) as tiow:
940 return tiow.read(), encoding, newline
943 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
944 if not target_versions:
945 # No target_version specified, so try all grammars.
948 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
950 pygram.python_grammar_no_print_statement_no_exec_statement,
951 # Python 2.7 with future print_function import
952 pygram.python_grammar_no_print_statement,
954 pygram.python_grammar,
957 if all(version.is_python2() for version in target_versions):
958 # Python 2-only code, so try Python 2 grammars.
960 # Python 2.7 with future print_function import
961 pygram.python_grammar_no_print_statement,
963 pygram.python_grammar,
966 # Python 3-compatible code, so only try Python 3 grammar.
968 # If we have to parse both, try to parse async as a keyword first
969 if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
972 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
974 if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
976 grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
977 # At least one of the above branches must have been taken, because every Python
978 # version has exactly one of the two 'ASYNC_*' flags
982 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
983 """Given a string with source, return the lib2to3 Node."""
984 if src_txt[-1:] != "\n":
987 for grammar in get_grammars(set(target_versions)):
988 drv = driver.Driver(grammar, pytree.convert)
990 result = drv.parse_string(src_txt, True)
993 except ParseError as pe:
994 lineno, column = pe.context[1]
995 lines = src_txt.splitlines()
997 faulty_line = lines[lineno - 1]
999 faulty_line = "<line number missing in source>"
1000 exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
1004 if isinstance(result, Leaf):
1005 result = Node(syms.file_input, [result])
1009 def lib2to3_unparse(node: Node) -> str:
1010 """Given a lib2to3 node, return its string representation."""
1015 class Visitor(Generic[T]):
1016 """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
1018 def visit(self, node: LN) -> Iterator[T]:
1019 """Main method to visit `node` and its children.
1021 It tries to find a `visit_*()` method for the given `node.type`, like
1022 `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
1023 If no dedicated `visit_*()` method is found, chooses `visit_default()`
1026 Then yields objects of type `T` from the selected visitor.
1029 name = token.tok_name[node.type]
1031 name = str(type_repr(node.type))
1032 # We explicitly branch on whether a visitor exists (instead of
1033 # using self.visit_default as the default arg to getattr) in order
1034 # to save needing to create a bound method object and so mypyc can
1035 # generate a native call to visit_default.
1036 visitf = getattr(self, f"visit_{name}", None)
1038 yield from visitf(node)
1040 yield from self.visit_default(node)
1042 def visit_default(self, node: LN) -> Iterator[T]:
1043 """Default `visit_*()` implementation. Recurses to children of `node`."""
1044 if isinstance(node, Node):
1045 for child in node.children:
1046 yield from self.visit(child)
1050 class DebugVisitor(Visitor[T]):
1053 def visit_default(self, node: LN) -> Iterator[T]:
1054 indent = " " * (2 * self.tree_depth)
1055 if isinstance(node, Node):
1056 _type = type_repr(node.type)
1057 out(f"{indent}{_type}", fg="yellow")
1058 self.tree_depth += 1
1059 for child in node.children:
1060 yield from self.visit(child)
1062 self.tree_depth -= 1
1063 out(f"{indent}/{_type}", fg="yellow", bold=False)
1065 _type = token.tok_name.get(node.type, str(node.type))
1066 out(f"{indent}{_type}", fg="blue", nl=False)
1068 # We don't have to handle prefixes for `Node` objects since
1069 # that delegates to the first child anyway.
1070 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
1071 out(f" {node.value!r}", fg="blue", bold=False)
1074 def show(cls, code: Union[str, Leaf, Node]) -> None:
1075 """Pretty-print the lib2to3 AST of a given string of `code`.
1077 Convenience method for debugging.
1079 v: DebugVisitor[None] = DebugVisitor()
1080 if isinstance(code, str):
1081 code = lib2to3_parse(code)
1085 WHITESPACE: Final = {token.DEDENT, token.INDENT, token.NEWLINE}
1086 STATEMENT: Final = {
1096 STANDALONE_COMMENT: Final = 153
1097 token.tok_name[STANDALONE_COMMENT] = "STANDALONE_COMMENT"
1098 LOGIC_OPERATORS: Final = {"and", "or"}
1099 COMPARATORS: Final = {
1107 MATH_OPERATORS: Final = {
1123 STARS: Final = {token.STAR, token.DOUBLESTAR}
1124 VARARGS_SPECIALS: Final = STARS | {token.SLASH}
1125 VARARGS_PARENTS: Final = {
1127 syms.argument, # double star in arglist
1128 syms.trailer, # single argument to call
1130 syms.varargslist, # lambdas
1132 UNPACKING_PARENTS: Final = {
1133 syms.atom, # single element of a list or set literal
1137 syms.testlist_star_expr,
1139 TEST_DESCENDANTS: Final = {
1156 ASSIGNMENTS: Final = {
1172 COMPREHENSION_PRIORITY: Final = 20
1173 COMMA_PRIORITY: Final = 18
1174 TERNARY_PRIORITY: Final = 16
1175 LOGIC_PRIORITY: Final = 14
1176 STRING_PRIORITY: Final = 12
1177 COMPARATOR_PRIORITY: Final = 10
1178 MATH_PRIORITIES: Final = {
1180 token.CIRCUMFLEX: 8,
1183 token.RIGHTSHIFT: 6,
1188 token.DOUBLESLASH: 4,
1192 token.DOUBLESTAR: 2,
1194 DOT_PRIORITY: Final = 1
1198 class BracketTracker:
1199 """Keeps track of brackets on a line."""
1202 bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = field(default_factory=dict)
1203 delimiters: Dict[LeafID, Priority] = field(default_factory=dict)
1204 previous: Optional[Leaf] = None
1205 _for_loop_depths: List[int] = field(default_factory=list)
1206 _lambda_argument_depths: List[int] = field(default_factory=list)
1208 def mark(self, leaf: Leaf) -> None:
1209 """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
1211 All leaves receive an int `bracket_depth` field that stores how deep
1212 within brackets a given leaf is. 0 means there are no enclosing brackets
1213 that started on this line.
1215 If a leaf is itself a closing bracket, it receives an `opening_bracket`
1216 field that it forms a pair with. This is a one-directional link to
1217 avoid reference cycles.
1219 If a leaf is a delimiter (a token on which Black can split the line if
1220 needed) and it's on depth 0, its `id()` is stored in the tracker's
1223 if leaf.type == token.COMMENT:
1226 self.maybe_decrement_after_for_loop_variable(leaf)
1227 self.maybe_decrement_after_lambda_arguments(leaf)
1228 if leaf.type in CLOSING_BRACKETS:
1230 opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
1231 leaf.opening_bracket = opening_bracket
1232 leaf.bracket_depth = self.depth
1234 delim = is_split_before_delimiter(leaf, self.previous)
1235 if delim and self.previous is not None:
1236 self.delimiters[id(self.previous)] = delim
1238 delim = is_split_after_delimiter(leaf, self.previous)
1240 self.delimiters[id(leaf)] = delim
1241 if leaf.type in OPENING_BRACKETS:
1242 self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
1244 self.previous = leaf
1245 self.maybe_increment_lambda_arguments(leaf)
1246 self.maybe_increment_for_loop_variable(leaf)
1248 def any_open_brackets(self) -> bool:
1249 """Return True if there is an yet unmatched open bracket on the line."""
1250 return bool(self.bracket_match)
1252 def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> Priority:
1253 """Return the highest priority of a delimiter found on the line.
1255 Values are consistent with what `is_split_*_delimiter()` return.
1256 Raises ValueError on no delimiters.
1258 return max(v for k, v in self.delimiters.items() if k not in exclude)
1260 def delimiter_count_with_priority(self, priority: Priority = 0) -> int:
1261 """Return the number of delimiters with the given `priority`.
1263 If no `priority` is passed, defaults to max priority on the line.
1265 if not self.delimiters:
1268 priority = priority or self.max_delimiter_priority()
1269 return sum(1 for p in self.delimiters.values() if p == priority)
1271 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
1272 """In a for loop, or comprehension, the variables are often unpacks.
1274 To avoid splitting on the comma in this situation, increase the depth of
1275 tokens between `for` and `in`.
1277 if leaf.type == token.NAME and leaf.value == "for":
1279 self._for_loop_depths.append(self.depth)
1284 def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
1285 """See `maybe_increment_for_loop_variable` above for explanation."""
1287 self._for_loop_depths
1288 and self._for_loop_depths[-1] == self.depth
1289 and leaf.type == token.NAME
1290 and leaf.value == "in"
1293 self._for_loop_depths.pop()
1298 def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
1299 """In a lambda expression, there might be more than one argument.
1301 To avoid splitting on the comma in this situation, increase the depth of
1302 tokens between `lambda` and `:`.
1304 if leaf.type == token.NAME and leaf.value == "lambda":
1306 self._lambda_argument_depths.append(self.depth)
1311 def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
1312 """See `maybe_increment_lambda_arguments` above for explanation."""
1314 self._lambda_argument_depths
1315 and self._lambda_argument_depths[-1] == self.depth
1316 and leaf.type == token.COLON
1319 self._lambda_argument_depths.pop()
1324 def get_open_lsqb(self) -> Optional[Leaf]:
1325 """Return the most recent opening square bracket (if any)."""
1326 return self.bracket_match.get((self.depth - 1, token.RSQB))
1331 """Holds leaves and comments. Can be printed with `str(line)`."""
1334 leaves: List[Leaf] = field(default_factory=list)
1335 # keys ordered like `leaves`
1336 comments: Dict[LeafID, List[Leaf]] = field(default_factory=dict)
1337 bracket_tracker: BracketTracker = field(default_factory=BracketTracker)
1338 inside_brackets: bool = False
1339 should_explode: bool = False
1341 def append(self, leaf: Leaf, preformatted: bool = False) -> None:
1342 """Add a new `leaf` to the end of the line.
1344 Unless `preformatted` is True, the `leaf` will receive a new consistent
1345 whitespace prefix and metadata applied by :class:`BracketTracker`.
1346 Trailing commas are maybe removed, unpacked for loop variables are
1347 demoted from being delimiters.
1349 Inline comments are put aside.
1351 has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
1355 if token.COLON == leaf.type and self.is_class_paren_empty:
1356 del self.leaves[-2:]
1357 if self.leaves and not preformatted:
1358 # Note: at this point leaf.prefix should be empty except for
1359 # imports, for which we only preserve newlines.
1360 leaf.prefix += whitespace(
1361 leaf, complex_subscript=self.is_complex_subscript(leaf)
1363 if self.inside_brackets or not preformatted:
1364 self.bracket_tracker.mark(leaf)
1365 self.maybe_remove_trailing_comma(leaf)
1366 if not self.append_comment(leaf):
1367 self.leaves.append(leaf)
1369 def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
1370 """Like :func:`append()` but disallow invalid standalone comment structure.
1372 Raises ValueError when any `leaf` is appended after a standalone comment
1373 or when a standalone comment is not the first leaf on the line.
1375 if self.bracket_tracker.depth == 0:
1377 raise ValueError("cannot append to standalone comments")
1379 if self.leaves and leaf.type == STANDALONE_COMMENT:
1381 "cannot append standalone comments to a populated line"
1384 self.append(leaf, preformatted=preformatted)
1387 def is_comment(self) -> bool:
1388 """Is this line a standalone comment?"""
1389 return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
1392 def is_decorator(self) -> bool:
1393 """Is this line a decorator?"""
1394 return bool(self) and self.leaves[0].type == token.AT
1397 def is_import(self) -> bool:
1398 """Is this an import line?"""
1399 return bool(self) and is_import(self.leaves[0])
1402 def is_class(self) -> bool:
1403 """Is this line a class definition?"""
1406 and self.leaves[0].type == token.NAME
1407 and self.leaves[0].value == "class"
1411 def is_stub_class(self) -> bool:
1412 """Is this line a class definition with a body consisting only of "..."?"""
1413 return self.is_class and self.leaves[-3:] == [
1414 Leaf(token.DOT, ".") for _ in range(3)
1418 def is_collection_with_optional_trailing_comma(self) -> bool:
1419 """Is this line a collection literal with a trailing comma that's optional?
1421 Note that the trailing comma in a 1-tuple is not optional.
1423 if not self.leaves or len(self.leaves) < 4:
1426 # Look for and address a trailing colon.
1427 if self.leaves[-1].type == token.COLON:
1428 closer = self.leaves[-2]
1431 closer = self.leaves[-1]
1433 if closer.type not in CLOSING_BRACKETS or self.inside_brackets:
1436 if closer.type == token.RPAR:
1437 # Tuples require an extra check, because if there's only
1438 # one element in the tuple removing the comma unmakes the
1441 # We also check for parens before looking for the trailing
1442 # comma because in some cases (eg assigning a dict
1443 # literal) the literal gets wrapped in temporary parens
1444 # during parsing. This case is covered by the
1445 # collections.py test data.
1446 opener = closer.opening_bracket
1447 for _open_index, leaf in enumerate(self.leaves):
1452 # Couldn't find the matching opening paren, play it safe.
1456 comma_depth = self.leaves[close_index - 1].bracket_depth
1457 for leaf in self.leaves[_open_index + 1 : close_index]:
1458 if leaf.bracket_depth == comma_depth and leaf.type == token.COMMA:
1461 # We haven't looked yet for the trailing comma because
1462 # we might also have caught noop parens.
1463 return self.leaves[close_index - 1].type == token.COMMA
1466 return False # it's either a one-tuple or didn't have a trailing comma
1468 if self.leaves[close_index - 1].type in CLOSING_BRACKETS:
1470 closer = self.leaves[close_index]
1471 if closer.type == token.RPAR:
1472 # TODO: this is a gut feeling. Will we ever see this?
1475 if self.leaves[close_index - 1].type != token.COMMA:
1481 def is_def(self) -> bool:
1482 """Is this a function definition? (Also returns True for async defs.)"""
1484 first_leaf = self.leaves[0]
1489 second_leaf: Optional[Leaf] = self.leaves[1]
1492 return (first_leaf.type == token.NAME and first_leaf.value == "def") or (
1493 first_leaf.type == token.ASYNC
1494 and second_leaf is not None
1495 and second_leaf.type == token.NAME
1496 and second_leaf.value == "def"
1500 def is_class_paren_empty(self) -> bool:
1501 """Is this a class with no base classes but using parentheses?
1503 Those are unnecessary and should be removed.
1507 and len(self.leaves) == 4
1509 and self.leaves[2].type == token.LPAR
1510 and self.leaves[2].value == "("
1511 and self.leaves[3].type == token.RPAR
1512 and self.leaves[3].value == ")"
1516 def is_triple_quoted_string(self) -> bool:
1517 """Is the line a triple quoted string?"""
1520 and self.leaves[0].type == token.STRING
1521 and self.leaves[0].value.startswith(('"""', "'''"))
1524 def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
1525 """If so, needs to be split before emitting."""
1526 for leaf in self.leaves:
1527 if leaf.type == STANDALONE_COMMENT and leaf.bracket_depth <= depth_limit:
1532 def contains_uncollapsable_type_comments(self) -> bool:
1535 last_leaf = self.leaves[-1]
1536 ignored_ids.add(id(last_leaf))
1537 if last_leaf.type == token.COMMA or (
1538 last_leaf.type == token.RPAR and not last_leaf.value
1540 # When trailing commas or optional parens are inserted by Black for
1541 # consistency, comments after the previous last element are not moved
1542 # (they don't have to, rendering will still be correct). So we ignore
1543 # trailing commas and invisible.
1544 last_leaf = self.leaves[-2]
1545 ignored_ids.add(id(last_leaf))
1549 # A type comment is uncollapsable if it is attached to a leaf
1550 # that isn't at the end of the line (since that could cause it
1551 # to get associated to a different argument) or if there are
1552 # comments before it (since that could cause it to get hidden
1554 comment_seen = False
1555 for leaf_id, comments in self.comments.items():
1556 for comment in comments:
1557 if is_type_comment(comment):
1558 if comment_seen or (
1559 not is_type_comment(comment, " ignore")
1560 and leaf_id not in ignored_ids
1568 def contains_unsplittable_type_ignore(self) -> bool:
1572 # If a 'type: ignore' is attached to the end of a line, we
1573 # can't split the line, because we can't know which of the
1574 # subexpressions the ignore was meant to apply to.
1576 # We only want this to apply to actual physical lines from the
1577 # original source, though: we don't want the presence of a
1578 # 'type: ignore' at the end of a multiline expression to
1579 # justify pushing it all onto one line. Thus we
1580 # (unfortunately) need to check the actual source lines and
1581 # only report an unsplittable 'type: ignore' if this line was
1582 # one line in the original code.
1584 # Grab the first and last line numbers, skipping generated leaves
1585 first_line = next((l.lineno for l in self.leaves if l.lineno != 0), 0)
1586 last_line = next((l.lineno for l in reversed(self.leaves) if l.lineno != 0), 0)
1588 if first_line == last_line:
1589 # We look at the last two leaves since a comma or an
1590 # invisible paren could have been added at the end of the
1592 for node in self.leaves[-2:]:
1593 for comment in self.comments.get(id(node), []):
1594 if is_type_comment(comment, " ignore"):
1599 def contains_multiline_strings(self) -> bool:
1600 return any(is_multiline_string(leaf) for leaf in self.leaves)
1602 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1603 """Remove trailing comma if there is one and it's safe."""
1604 if not (self.leaves and self.leaves[-1].type == token.COMMA):
1607 # We remove trailing commas only in the case of importing a
1608 # single name from a module.
1612 and len(self.leaves) > 4
1613 and self.leaves[-1].type == token.COMMA
1614 and closing.type in CLOSING_BRACKETS
1615 and self.leaves[-4].type == token.NAME
1617 # regular `from foo import bar,`
1618 self.leaves[-4].value == "import"
1619 # `from foo import (bar as baz,)
1621 len(self.leaves) > 6
1622 and self.leaves[-6].value == "import"
1623 and self.leaves[-3].value == "as"
1625 # `from foo import bar as baz,`
1627 len(self.leaves) > 5
1628 and self.leaves[-5].value == "import"
1629 and self.leaves[-3].value == "as"
1632 and closing.type == token.RPAR
1636 self.remove_trailing_comma()
1639 def append_comment(self, comment: Leaf) -> bool:
1640 """Add an inline or standalone comment to the line."""
1642 comment.type == STANDALONE_COMMENT
1643 and self.bracket_tracker.any_open_brackets()
1648 if comment.type != token.COMMENT:
1652 comment.type = STANDALONE_COMMENT
1656 last_leaf = self.leaves[-1]
1658 last_leaf.type == token.RPAR
1659 and not last_leaf.value
1660 and last_leaf.parent
1661 and len(list(last_leaf.parent.leaves())) <= 3
1662 and not is_type_comment(comment)
1664 # Comments on an optional parens wrapping a single leaf should belong to
1665 # the wrapped node except if it's a type comment. Pinning the comment like
1666 # this avoids unstable formatting caused by comment migration.
1667 if len(self.leaves) < 2:
1668 comment.type = STANDALONE_COMMENT
1672 last_leaf = self.leaves[-2]
1673 self.comments.setdefault(id(last_leaf), []).append(comment)
1676 def comments_after(self, leaf: Leaf) -> List[Leaf]:
1677 """Generate comments that should appear directly after `leaf`."""
1678 return self.comments.get(id(leaf), [])
1680 def remove_trailing_comma(self) -> None:
1681 """Remove the trailing comma and moves the comments attached to it."""
1682 trailing_comma = self.leaves.pop()
1683 trailing_comma_comments = self.comments.pop(id(trailing_comma), [])
1684 self.comments.setdefault(id(self.leaves[-1]), []).extend(
1685 trailing_comma_comments
1688 def is_complex_subscript(self, leaf: Leaf) -> bool:
1689 """Return True iff `leaf` is part of a slice with non-trivial exprs."""
1690 open_lsqb = self.bracket_tracker.get_open_lsqb()
1691 if open_lsqb is None:
1694 subscript_start = open_lsqb.next_sibling
1696 if isinstance(subscript_start, Node):
1697 if subscript_start.type == syms.listmaker:
1700 if subscript_start.type == syms.subscriptlist:
1701 subscript_start = child_towards(subscript_start, leaf)
1702 return subscript_start is not None and any(
1703 n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
1706 def clone(self) -> "Line":
1709 inside_brackets=self.inside_brackets,
1710 should_explode=self.should_explode,
1713 def __str__(self) -> str:
1714 """Render the line."""
1718 indent = " " * self.depth
1719 leaves = iter(self.leaves)
1720 first = next(leaves)
1721 res = f"{first.prefix}{indent}{first.value}"
1724 for comment in itertools.chain.from_iterable(self.comments.values()):
1729 def __bool__(self) -> bool:
1730 """Return True if the line has leaves or comments."""
1731 return bool(self.leaves or self.comments)
1735 class EmptyLineTracker:
1736 """Provides a stateful method that returns the number of potential extra
1737 empty lines needed before and after the currently processed line.
1739 Note: this tracker works on lines that haven't been split yet. It assumes
1740 the prefix of the first leaf consists of optional newlines. Those newlines
1741 are consumed by `maybe_empty_lines()` and included in the computation.
1744 is_pyi: bool = False
1745 previous_line: Optional[Line] = None
1746 previous_after: int = 0
1747 previous_defs: List[int] = field(default_factory=list)
1749 def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1750 """Return the number of extra empty lines before and after the `current_line`.
1752 This is for separating `def`, `async def` and `class` with extra empty
1753 lines (two on module-level).
1755 before, after = self._maybe_empty_lines(current_line)
1757 # Black should not insert empty lines at the beginning
1760 if self.previous_line is None
1761 else before - self.previous_after
1763 self.previous_after = after
1764 self.previous_line = current_line
1765 return before, after
1767 def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1769 if current_line.depth == 0:
1770 max_allowed = 1 if self.is_pyi else 2
1771 if current_line.leaves:
1772 # Consume the first leaf's extra newlines.
1773 first_leaf = current_line.leaves[0]
1774 before = first_leaf.prefix.count("\n")
1775 before = min(before, max_allowed)
1776 first_leaf.prefix = ""
1779 depth = current_line.depth
1780 while self.previous_defs and self.previous_defs[-1] >= depth:
1781 self.previous_defs.pop()
1783 before = 0 if depth else 1
1785 before = 1 if depth else 2
1786 if current_line.is_decorator or current_line.is_def or current_line.is_class:
1787 return self._maybe_empty_lines_for_class_or_def(current_line, before)
1791 and self.previous_line.is_import
1792 and not current_line.is_import
1793 and depth == self.previous_line.depth
1795 return (before or 1), 0
1799 and self.previous_line.is_class
1800 and current_line.is_triple_quoted_string
1806 def _maybe_empty_lines_for_class_or_def(
1807 self, current_line: Line, before: int
1808 ) -> Tuple[int, int]:
1809 if not current_line.is_decorator:
1810 self.previous_defs.append(current_line.depth)
1811 if self.previous_line is None:
1812 # Don't insert empty lines before the first line in the file.
1815 if self.previous_line.is_decorator:
1818 if self.previous_line.depth < current_line.depth and (
1819 self.previous_line.is_class or self.previous_line.is_def
1824 self.previous_line.is_comment
1825 and self.previous_line.depth == current_line.depth
1831 if self.previous_line.depth > current_line.depth:
1833 elif current_line.is_class or self.previous_line.is_class:
1834 if current_line.is_stub_class and self.previous_line.is_stub_class:
1835 # No blank line between classes with an empty body
1839 elif current_line.is_def and not self.previous_line.is_def:
1840 # Blank line between a block of functions and a block of non-functions
1846 if current_line.depth and newlines:
1852 class LineGenerator(Visitor[Line]):
1853 """Generates reformatted Line objects. Empty lines are not emitted.
1855 Note: destroys the tree it's visiting by mutating prefixes of its leaves
1856 in ways that will no longer stringify to valid Python code on the tree.
1859 is_pyi: bool = False
1860 normalize_strings: bool = True
1861 current_line: Line = field(default_factory=Line)
1862 remove_u_prefix: bool = False
1864 def line(self, indent: int = 0) -> Iterator[Line]:
1867 If the line is empty, only emit if it makes sense.
1868 If the line is too long, split it first and then generate.
1870 If any lines were generated, set up a new current_line.
1872 if not self.current_line:
1873 self.current_line.depth += indent
1874 return # Line is empty, don't emit. Creating a new one unnecessary.
1876 complete_line = self.current_line
1877 self.current_line = Line(depth=complete_line.depth + indent)
1880 def visit_default(self, node: LN) -> Iterator[Line]:
1881 """Default `visit_*()` implementation. Recurses to children of `node`."""
1882 if isinstance(node, Leaf):
1883 any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1884 for comment in generate_comments(node):
1885 if any_open_brackets:
1886 # any comment within brackets is subject to splitting
1887 self.current_line.append(comment)
1888 elif comment.type == token.COMMENT:
1889 # regular trailing comment
1890 self.current_line.append(comment)
1891 yield from self.line()
1894 # regular standalone comment
1895 yield from self.line()
1897 self.current_line.append(comment)
1898 yield from self.line()
1900 normalize_prefix(node, inside_brackets=any_open_brackets)
1901 if self.normalize_strings and node.type == token.STRING:
1902 normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix)
1903 normalize_string_quotes(node)
1904 if node.type == token.NUMBER:
1905 normalize_numeric_literal(node)
1906 if node.type not in WHITESPACE:
1907 self.current_line.append(node)
1908 yield from super().visit_default(node)
1910 def visit_INDENT(self, node: Leaf) -> Iterator[Line]:
1911 """Increase indentation level, maybe yield a line."""
1912 # In blib2to3 INDENT never holds comments.
1913 yield from self.line(+1)
1914 yield from self.visit_default(node)
1916 def visit_DEDENT(self, node: Leaf) -> Iterator[Line]:
1917 """Decrease indentation level, maybe yield a line."""
1918 # The current line might still wait for trailing comments. At DEDENT time
1919 # there won't be any (they would be prefixes on the preceding NEWLINE).
1920 # Emit the line then.
1921 yield from self.line()
1923 # While DEDENT has no value, its prefix may contain standalone comments
1924 # that belong to the current indentation level. Get 'em.
1925 yield from self.visit_default(node)
1927 # Finally, emit the dedent.
1928 yield from self.line(-1)
1931 self, node: Node, keywords: Set[str], parens: Set[str]
1932 ) -> Iterator[Line]:
1933 """Visit a statement.
1935 This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1936 `def`, `with`, `class`, `assert` and assignments.
1938 The relevant Python language `keywords` for a given statement will be
1939 NAME leaves within it. This methods puts those on a separate line.
1941 `parens` holds a set of string leaf values immediately after which
1942 invisible parens should be put.
1944 normalize_invisible_parens(node, parens_after=parens)
1945 for child in node.children:
1946 if child.type == token.NAME and child.value in keywords: # type: ignore
1947 yield from self.line()
1949 yield from self.visit(child)
1951 def visit_suite(self, node: Node) -> Iterator[Line]:
1952 """Visit a suite."""
1953 if self.is_pyi and is_stub_suite(node):
1954 yield from self.visit(node.children[2])
1956 yield from self.visit_default(node)
1958 def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1959 """Visit a statement without nested statements."""
1960 is_suite_like = node.parent and node.parent.type in STATEMENT
1962 if self.is_pyi and is_stub_body(node):
1963 yield from self.visit_default(node)
1965 yield from self.line(+1)
1966 yield from self.visit_default(node)
1967 yield from self.line(-1)
1970 if not self.is_pyi or not node.parent or not is_stub_suite(node.parent):
1971 yield from self.line()
1972 yield from self.visit_default(node)
1974 def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1975 """Visit `async def`, `async for`, `async with`."""
1976 yield from self.line()
1978 children = iter(node.children)
1979 for child in children:
1980 yield from self.visit(child)
1982 if child.type == token.ASYNC:
1985 internal_stmt = next(children)
1986 for child in internal_stmt.children:
1987 yield from self.visit(child)
1989 def visit_decorators(self, node: Node) -> Iterator[Line]:
1990 """Visit decorators."""
1991 for child in node.children:
1992 yield from self.line()
1993 yield from self.visit(child)
1995 def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
1996 """Remove a semicolon and put the other statement on a separate line."""
1997 yield from self.line()
1999 def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
2000 """End of file. Process outstanding comments and end with a newline."""
2001 yield from self.visit_default(leaf)
2002 yield from self.line()
2004 def visit_STANDALONE_COMMENT(self, leaf: Leaf) -> Iterator[Line]:
2005 if not self.current_line.bracket_tracker.any_open_brackets():
2006 yield from self.line()
2007 yield from self.visit_default(leaf)
2009 def visit_factor(self, node: Node) -> Iterator[Line]:
2010 """Force parentheses between a unary op and a binary power:
2012 -2 ** 8 -> -(2 ** 8)
2014 _operator, operand = node.children
2016 operand.type == syms.power
2017 and len(operand.children) == 3
2018 and operand.children[1].type == token.DOUBLESTAR
2020 lpar = Leaf(token.LPAR, "(")
2021 rpar = Leaf(token.RPAR, ")")
2022 index = operand.remove() or 0
2023 node.insert_child(index, Node(syms.atom, [lpar, operand, rpar]))
2024 yield from self.visit_default(node)
2026 def visit_STRING(self, leaf: Leaf) -> Iterator[Line]:
2027 # Check if it's a docstring
2028 if prev_siblings_are(
2029 leaf.parent, [None, token.NEWLINE, token.INDENT, syms.simple_stmt]
2030 ) and is_multiline_string(leaf):
2031 prefix = " " * self.current_line.depth
2032 docstring = fix_docstring(leaf.value[3:-3], prefix)
2033 leaf.value = leaf.value[0:3] + docstring + leaf.value[-3:]
2034 normalize_string_quotes(leaf)
2036 yield from self.visit_default(leaf)
2038 def __post_init__(self) -> None:
2039 """You are in a twisty little maze of passages."""
2042 self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
2043 self.visit_if_stmt = partial(
2044 v, keywords={"if", "else", "elif"}, parens={"if", "elif"}
2046 self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
2047 self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
2048 self.visit_try_stmt = partial(
2049 v, keywords={"try", "except", "else", "finally"}, parens=Ø
2051 self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
2052 self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
2053 self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
2054 self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
2055 self.visit_expr_stmt = partial(v, keywords=Ø, parens=ASSIGNMENTS)
2056 self.visit_return_stmt = partial(v, keywords={"return"}, parens={"return"})
2057 self.visit_import_from = partial(v, keywords=Ø, parens={"import"})
2058 self.visit_del_stmt = partial(v, keywords=Ø, parens={"del"})
2059 self.visit_async_funcdef = self.visit_async_stmt
2060 self.visit_decorated = self.visit_decorators
2063 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
2064 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
2065 OPENING_BRACKETS = set(BRACKET.keys())
2066 CLOSING_BRACKETS = set(BRACKET.values())
2067 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
2068 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
2071 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str: # noqa: C901
2072 """Return whitespace prefix if needed for the given `leaf`.
2074 `complex_subscript` signals whether the given leaf is part of a subscription
2075 which has non-trivial arguments, like arithmetic expressions or function calls.
2083 if t in ALWAYS_NO_SPACE:
2086 if t == token.COMMENT:
2089 assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
2090 if t == token.COLON and p.type not in {
2097 prev = leaf.prev_sibling
2099 prevp = preceding_leaf(p)
2100 if not prevp or prevp.type in OPENING_BRACKETS:
2103 if t == token.COLON:
2104 if prevp.type == token.COLON:
2107 elif prevp.type != token.COMMA and not complex_subscript:
2112 if prevp.type == token.EQUAL:
2114 if prevp.parent.type in {
2122 elif prevp.parent.type == syms.typedargslist:
2123 # A bit hacky: if the equal sign has whitespace, it means we
2124 # previously found it's a typed argument. So, we're using
2128 elif prevp.type in VARARGS_SPECIALS:
2129 if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
2132 elif prevp.type == token.COLON:
2133 if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
2134 return SPACE if complex_subscript else NO
2138 and prevp.parent.type == syms.factor
2139 and prevp.type in MATH_OPERATORS
2144 prevp.type == token.RIGHTSHIFT
2146 and prevp.parent.type == syms.shift_expr
2147 and prevp.prev_sibling
2148 and prevp.prev_sibling.type == token.NAME
2149 and prevp.prev_sibling.value == "print" # type: ignore
2151 # Python 2 print chevron
2154 elif prev.type in OPENING_BRACKETS:
2157 if p.type in {syms.parameters, syms.arglist}:
2158 # untyped function signatures or calls
2159 if not prev or prev.type != token.COMMA:
2162 elif p.type == syms.varargslist:
2164 if prev and prev.type != token.COMMA:
2167 elif p.type == syms.typedargslist:
2168 # typed function signatures
2172 if t == token.EQUAL:
2173 if prev.type != syms.tname:
2176 elif prev.type == token.EQUAL:
2177 # A bit hacky: if the equal sign has whitespace, it means we
2178 # previously found it's a typed argument. So, we're using that, too.
2181 elif prev.type != token.COMMA:
2184 elif p.type == syms.tname:
2187 prevp = preceding_leaf(p)
2188 if not prevp or prevp.type != token.COMMA:
2191 elif p.type == syms.trailer:
2192 # attributes and calls
2193 if t == token.LPAR or t == token.RPAR:
2198 prevp = preceding_leaf(p)
2199 if not prevp or prevp.type != token.NUMBER:
2202 elif t == token.LSQB:
2205 elif prev.type != token.COMMA:
2208 elif p.type == syms.argument:
2210 if t == token.EQUAL:
2214 prevp = preceding_leaf(p)
2215 if not prevp or prevp.type == token.LPAR:
2218 elif prev.type in {token.EQUAL} | VARARGS_SPECIALS:
2221 elif p.type == syms.decorator:
2225 elif p.type == syms.dotted_name:
2229 prevp = preceding_leaf(p)
2230 if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
2233 elif p.type == syms.classdef:
2237 if prev and prev.type == token.LPAR:
2240 elif p.type in {syms.subscript, syms.sliceop}:
2243 assert p.parent is not None, "subscripts are always parented"
2244 if p.parent.type == syms.subscriptlist:
2249 elif not complex_subscript:
2252 elif p.type == syms.atom:
2253 if prev and t == token.DOT:
2254 # dots, but not the first one.
2257 elif p.type == syms.dictsetmaker:
2259 if prev and prev.type == token.DOUBLESTAR:
2262 elif p.type in {syms.factor, syms.star_expr}:
2265 prevp = preceding_leaf(p)
2266 if not prevp or prevp.type in OPENING_BRACKETS:
2269 prevp_parent = prevp.parent
2270 assert prevp_parent is not None
2271 if prevp.type == token.COLON and prevp_parent.type in {
2277 elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
2280 elif t in {token.NAME, token.NUMBER, token.STRING}:
2283 elif p.type == syms.import_from:
2285 if prev and prev.type == token.DOT:
2288 elif t == token.NAME:
2292 if prev and prev.type == token.DOT:
2295 elif p.type == syms.sliceop:
2301 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
2302 """Return the first leaf that precedes `node`, if any."""
2304 res = node.prev_sibling
2306 if isinstance(res, Leaf):
2310 return list(res.leaves())[-1]
2319 def prev_siblings_are(node: Optional[LN], tokens: List[Optional[NodeType]]) -> bool:
2320 """Return if the `node` and its previous siblings match types against the provided
2321 list of tokens; the provided `node`has its type matched against the last element in
2322 the list. `None` can be used as the first element to declare that the start of the
2323 list is anchored at the start of its parent's children."""
2326 if tokens[-1] is None:
2330 if node.type != tokens[-1]:
2332 return prev_siblings_are(node.prev_sibling, tokens[:-1])
2335 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
2336 """Return the child of `ancestor` that contains `descendant`."""
2337 node: Optional[LN] = descendant
2338 while node and node.parent != ancestor:
2343 def container_of(leaf: Leaf) -> LN:
2344 """Return `leaf` or one of its ancestors that is the topmost container of it.
2346 By "container" we mean a node where `leaf` is the very first child.
2348 same_prefix = leaf.prefix
2349 container: LN = leaf
2351 parent = container.parent
2355 if parent.children[0].prefix != same_prefix:
2358 if parent.type == syms.file_input:
2361 if parent.prev_sibling is not None and parent.prev_sibling.type in BRACKETS:
2368 def is_split_after_delimiter(leaf: Leaf, previous: Optional[Leaf] = None) -> Priority:
2369 """Return the priority of the `leaf` delimiter, given a line break after it.
2371 The delimiter priorities returned here are from those delimiters that would
2372 cause a line break after themselves.
2374 Higher numbers are higher priority.
2376 if leaf.type == token.COMMA:
2377 return COMMA_PRIORITY
2382 def is_split_before_delimiter(leaf: Leaf, previous: Optional[Leaf] = None) -> Priority:
2383 """Return the priority of the `leaf` delimiter, given a line break before it.
2385 The delimiter priorities returned here are from those delimiters that would
2386 cause a line break before themselves.
2388 Higher numbers are higher priority.
2390 if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
2391 # * and ** might also be MATH_OPERATORS but in this case they are not.
2392 # Don't treat them as a delimiter.
2396 leaf.type == token.DOT
2398 and leaf.parent.type not in {syms.import_from, syms.dotted_name}
2399 and (previous is None or previous.type in CLOSING_BRACKETS)
2404 leaf.type in MATH_OPERATORS
2406 and leaf.parent.type not in {syms.factor, syms.star_expr}
2408 return MATH_PRIORITIES[leaf.type]
2410 if leaf.type in COMPARATORS:
2411 return COMPARATOR_PRIORITY
2414 leaf.type == token.STRING
2415 and previous is not None
2416 and previous.type == token.STRING
2418 return STRING_PRIORITY
2420 if leaf.type not in {token.NAME, token.ASYNC}:
2426 and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
2427 or leaf.type == token.ASYNC
2430 not isinstance(leaf.prev_sibling, Leaf)
2431 or leaf.prev_sibling.value != "async"
2433 return COMPREHENSION_PRIORITY
2438 and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
2440 return COMPREHENSION_PRIORITY
2442 if leaf.value in {"if", "else"} and leaf.parent and leaf.parent.type == syms.test:
2443 return TERNARY_PRIORITY
2445 if leaf.value == "is":
2446 return COMPARATOR_PRIORITY
2451 and leaf.parent.type in {syms.comp_op, syms.comparison}
2453 previous is not None
2454 and previous.type == token.NAME
2455 and previous.value == "not"
2458 return COMPARATOR_PRIORITY
2463 and leaf.parent.type == syms.comp_op
2465 previous is not None
2466 and previous.type == token.NAME
2467 and previous.value == "is"
2470 return COMPARATOR_PRIORITY
2472 if leaf.value in LOGIC_OPERATORS and leaf.parent:
2473 return LOGIC_PRIORITY
2478 FMT_OFF = {"# fmt: off", "# fmt:off", "# yapf: disable"}
2479 FMT_ON = {"# fmt: on", "# fmt:on", "# yapf: enable"}
2482 def generate_comments(leaf: LN) -> Iterator[Leaf]:
2483 """Clean the prefix of the `leaf` and generate comments from it, if any.
2485 Comments in lib2to3 are shoved into the whitespace prefix. This happens
2486 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
2487 move because it does away with modifying the grammar to include all the
2488 possible places in which comments can be placed.
2490 The sad consequence for us though is that comments don't "belong" anywhere.
2491 This is why this function generates simple parentless Leaf objects for
2492 comments. We simply don't know what the correct parent should be.
2494 No matter though, we can live without this. We really only need to
2495 differentiate between inline and standalone comments. The latter don't
2496 share the line with any code.
2498 Inline comments are emitted as regular token.COMMENT leaves. Standalone
2499 are emitted with a fake STANDALONE_COMMENT token identifier.
2501 for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER):
2502 yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
2507 """Describes a piece of syntax that is a comment.
2509 It's not a :class:`blib2to3.pytree.Leaf` so that:
2511 * it can be cached (`Leaf` objects should not be reused more than once as
2512 they store their lineno, column, prefix, and parent information);
2513 * `newlines` and `consumed` fields are kept separate from the `value`. This
2514 simplifies handling of special marker comments like ``# fmt: off/on``.
2517 type: int # token.COMMENT or STANDALONE_COMMENT
2518 value: str # content of the comment
2519 newlines: int # how many newlines before the comment
2520 consumed: int # how many characters of the original leaf's prefix did we consume
2523 @lru_cache(maxsize=4096)
2524 def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]:
2525 """Return a list of :class:`ProtoComment` objects parsed from the given `prefix`."""
2526 result: List[ProtoComment] = []
2527 if not prefix or "#" not in prefix:
2533 for index, line in enumerate(prefix.split("\n")):
2534 consumed += len(line) + 1 # adding the length of the split '\n'
2535 line = line.lstrip()
2538 if not line.startswith("#"):
2539 # Escaped newlines outside of a comment are not really newlines at
2540 # all. We treat a single-line comment following an escaped newline
2541 # as a simple trailing comment.
2542 if line.endswith("\\"):
2546 if index == ignored_lines and not is_endmarker:
2547 comment_type = token.COMMENT # simple trailing comment
2549 comment_type = STANDALONE_COMMENT
2550 comment = make_comment(line)
2553 type=comment_type, value=comment, newlines=nlines, consumed=consumed
2560 def make_comment(content: str) -> str:
2561 """Return a consistently formatted comment from the given `content` string.
2563 All comments (except for "##", "#!", "#:", '#'", "#%%") should have a single
2564 space between the hash sign and the content.
2566 If `content` didn't start with a hash sign, one is provided.
2568 content = content.rstrip()
2572 if content[0] == "#":
2573 content = content[1:]
2574 if content and content[0] not in " !:#'%":
2575 content = " " + content
2576 return "#" + content
2582 normalize_strings: bool,
2583 features: Collection[Feature] = (),
2584 ) -> Iterator[Line]:
2585 """Transform a `line`, potentially splitting it into many lines.
2587 They should fit in the allotted `line_length` but might not be able to.
2589 `features` are syntactical features that may be used in the output.
2595 line_str = line_to_string(line)
2597 def init_st(ST: Type[StringTransformer]) -> StringTransformer:
2598 """Initialize StringTransformer"""
2599 return ST(line_length, normalize_strings)
2601 string_merge = init_st(StringMerger)
2602 string_paren_strip = init_st(StringParenStripper)
2603 string_split = init_st(StringSplitter)
2604 string_paren_wrap = init_st(StringParenWrapper)
2606 transformers: List[Transformer]
2608 not line.contains_uncollapsable_type_comments()
2609 and not line.should_explode
2610 and not line.is_collection_with_optional_trailing_comma
2612 is_line_short_enough(line, line_length=line_length, line_str=line_str)
2613 or line.contains_unsplittable_type_ignore()
2615 and not (line.contains_standalone_comments() and line.inside_brackets)
2617 # Only apply basic string preprocessing, since lines shouldn't be split here.
2618 transformers = [string_merge, string_paren_strip]
2620 transformers = [left_hand_split]
2623 def rhs(line: Line, features: Collection[Feature]) -> Iterator[Line]:
2624 for omit in generate_trailers_to_omit(line, line_length):
2625 lines = list(right_hand_split(line, line_length, features, omit=omit))
2626 if is_line_short_enough(lines[0], line_length=line_length):
2630 # All splits failed, best effort split with no omits.
2631 # This mostly happens to multiline strings that are by definition
2632 # reported as not fitting a single line.
2633 # line_length=1 here was historically a bug that somehow became a feature.
2634 # See #762 and #781 for the full story.
2635 yield from right_hand_split(line, line_length=1, features=features)
2637 if line.inside_brackets:
2642 standalone_comment_split,
2656 for transform in transformers:
2657 # We are accumulating lines in `result` because we might want to abort
2658 # mission and return the original line in the end, or attempt a different
2660 result: List[Line] = []
2662 for l in transform(line, features):
2663 if str(l).strip("\n") == line_str:
2664 raise CannotTransform(
2665 "Line transformer returned an unchanged result"
2671 line_length=line_length,
2672 normalize_strings=normalize_strings,
2676 except CannotTransform:
2686 @dataclass # type: ignore
2687 class StringTransformer(ABC):
2689 An implementation of the Transformer protocol that relies on its
2690 subclasses overriding the template methods `do_match(...)` and
2691 `do_transform(...)`.
2693 This Transformer works exclusively on strings (for example, by merging
2696 The following sections can be found among the docstrings of each concrete
2697 StringTransformer subclass.
2700 Which requirements must be met of the given Line for this
2701 StringTransformer to be applied?
2704 If the given Line meets all of the above requirments, which string
2705 transformations can you expect to be applied to it by this
2709 What contractual agreements does this StringTransformer have with other
2710 StringTransfomers? Such collaborations should be eliminated/minimized
2711 as much as possible.
2715 normalize_strings: bool
2718 def do_match(self, line: Line) -> TMatchResult:
2721 * Ok(string_idx) such that `line.leaves[string_idx]` is our target
2722 string, if a match was able to be made.
2724 * Err(CannotTransform), if a match was not able to be made.
2728 def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
2731 * Ok(new_line) where new_line is the new transformed line.
2733 * Err(CannotTransform) if the transformation failed for some reason. The
2734 `do_match(...)` template method should usually be used to reject
2735 the form of the given Line, but in some cases it is difficult to
2736 know whether or not a Line meets the StringTransformer's
2737 requirements until the transformation is already midway.
2740 This method should NOT mutate @line directly, but it MAY mutate the
2741 Line's underlying Node structure. (WARNING: If the underlying Node
2742 structure IS altered, then this method should NOT be allowed to
2743 yield an CannotTransform after that point.)
2746 def __call__(self, line: Line, _features: Collection[Feature]) -> Iterator[Line]:
2748 StringTransformer instances have a call signature that mirrors that of
2749 the Transformer type.
2752 CannotTransform(...) if the concrete StringTransformer class is unable
2755 # Optimization to avoid calling `self.do_match(...)` when the line does
2756 # not contain any string.
2757 if not any(leaf.type == token.STRING for leaf in line.leaves):
2758 raise CannotTransform("There are no strings in this line.")
2760 match_result = self.do_match(line)
2762 if isinstance(match_result, Err):
2763 cant_transform = match_result.err()
2764 raise CannotTransform(
2765 f"The string transformer {self.__class__.__name__} does not recognize"
2766 " this line as one that it can transform."
2767 ) from cant_transform
2769 string_idx = match_result.ok()
2771 for line_result in self.do_transform(line, string_idx):
2772 if isinstance(line_result, Err):
2773 cant_transform = line_result.err()
2774 raise CannotTransform(
2775 "StringTransformer failed while attempting to transform string."
2776 ) from cant_transform
2777 line = line_result.ok()
2783 """A custom (i.e. manual) string split.
2785 A single CustomSplit instance represents a single substring.
2788 Consider the following string:
2795 This string will correspond to the following three CustomSplit instances:
2797 CustomSplit(False, 16)
2798 CustomSplit(False, 17)
2799 CustomSplit(True, 16)
2807 class CustomSplitMapMixin:
2809 This mixin class is used to map merged strings to a sequence of
2810 CustomSplits, which will then be used to re-split the strings iff none of
2811 the resultant substrings go over the configured max line length.
2814 _Key = Tuple[StringID, str]
2815 _CUSTOM_SPLIT_MAP: Dict[_Key, Tuple[CustomSplit, ...]] = defaultdict(tuple)
2818 def _get_key(string: str) -> "CustomSplitMapMixin._Key":
2821 A unique identifier that is used internally to map @string to a
2822 group of custom splits.
2824 return (id(string), string)
2826 def add_custom_splits(
2827 self, string: str, custom_splits: Iterable[CustomSplit]
2829 """Custom Split Map Setter Method
2832 Adds a mapping from @string to the custom splits @custom_splits.
2834 key = self._get_key(string)
2835 self._CUSTOM_SPLIT_MAP[key] = tuple(custom_splits)
2837 def pop_custom_splits(self, string: str) -> List[CustomSplit]:
2838 """Custom Split Map Getter Method
2841 * A list of the custom splits that are mapped to @string, if any
2847 Deletes the mapping between @string and its associated custom
2848 splits (which are returned to the caller).
2850 key = self._get_key(string)
2852 custom_splits = self._CUSTOM_SPLIT_MAP[key]
2853 del self._CUSTOM_SPLIT_MAP[key]
2855 return list(custom_splits)
2857 def has_custom_splits(self, string: str) -> bool:
2860 True iff @string is associated with a set of custom splits.
2862 key = self._get_key(string)
2863 return key in self._CUSTOM_SPLIT_MAP
2866 class StringMerger(CustomSplitMapMixin, StringTransformer):
2867 """StringTransformer that merges strings together.
2870 (A) The line contains adjacent strings such that at most one substring
2871 has inline comments AND none of those inline comments are pragmas AND
2872 the set of all substring prefixes is either of length 1 or equal to
2873 {"", "f"} AND none of the substrings are raw strings (i.e. are prefixed
2876 (B) The line contains a string which uses line continuation backslashes.
2879 Depending on which of the two requirements above where met, either:
2881 (A) The string group associated with the target string is merged.
2883 (B) All line-continuation backslashes are removed from the target string.
2886 StringMerger provides custom split information to StringSplitter.
2889 def do_match(self, line: Line) -> TMatchResult:
2892 is_valid_index = is_valid_index_factory(LL)
2894 for (i, leaf) in enumerate(LL):
2896 leaf.type == token.STRING
2897 and is_valid_index(i + 1)
2898 and LL[i + 1].type == token.STRING
2902 if leaf.type == token.STRING and "\\\n" in leaf.value:
2905 return TErr("This line has no strings that need merging.")
2907 def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
2909 rblc_result = self.__remove_backslash_line_continuation_chars(
2910 new_line, string_idx
2912 if isinstance(rblc_result, Ok):
2913 new_line = rblc_result.ok()
2915 msg_result = self.__merge_string_group(new_line, string_idx)
2916 if isinstance(msg_result, Ok):
2917 new_line = msg_result.ok()
2919 if isinstance(rblc_result, Err) and isinstance(msg_result, Err):
2920 msg_cant_transform = msg_result.err()
2921 rblc_cant_transform = rblc_result.err()
2922 cant_transform = CannotTransform(
2923 "StringMerger failed to merge any strings in this line."
2926 # Chain the errors together using `__cause__`.
2927 msg_cant_transform.__cause__ = rblc_cant_transform
2928 cant_transform.__cause__ = msg_cant_transform
2930 yield Err(cant_transform)
2935 def __remove_backslash_line_continuation_chars(
2936 line: Line, string_idx: int
2939 Merge strings that were split across multiple lines using
2940 line-continuation backslashes.
2943 Ok(new_line), if @line contains backslash line-continuation
2946 Err(CannotTransform), otherwise.
2950 string_leaf = LL[string_idx]
2952 string_leaf.type == token.STRING
2953 and "\\\n" in string_leaf.value
2954 and not has_triple_quotes(string_leaf.value)
2957 f"String leaf {string_leaf} does not contain any backslash line"
2958 " continuation characters."
2961 new_line = line.clone()
2962 new_line.comments = line.comments
2963 append_leaves(new_line, line, LL)
2965 new_string_leaf = new_line.leaves[string_idx]
2966 new_string_leaf.value = new_string_leaf.value.replace("\\\n", "")
2970 def __merge_string_group(self, line: Line, string_idx: int) -> TResult[Line]:
2972 Merges string group (i.e. set of adjacent strings) where the first
2973 string in the group is `line.leaves[string_idx]`.
2976 Ok(new_line), if ALL of the validation checks found in
2977 __validate_msg(...) pass.
2979 Err(CannotTransform), otherwise.
2983 is_valid_index = is_valid_index_factory(LL)
2985 vresult = self.__validate_msg(line, string_idx)
2986 if isinstance(vresult, Err):
2989 # If the string group is wrapped inside an Atom node, we must make sure
2990 # to later replace that Atom with our new (merged) string leaf.
2991 atom_node = LL[string_idx].parent
2993 # We will place BREAK_MARK in between every two substrings that we
2994 # merge. We will then later go through our final result and use the
2995 # various instances of BREAK_MARK we find to add the right values to
2996 # the custom split map.
2997 BREAK_MARK = "@@@@@ BLACK BREAKPOINT MARKER @@@@@"
2999 QUOTE = LL[string_idx].value[-1]
3001 def make_naked(string: str, string_prefix: str) -> str:
3002 """Strip @string (i.e. make it a "naked" string)
3005 * assert_is_leaf_string(@string)
3008 A string that is identical to @string except that
3009 @string_prefix has been stripped, the surrounding QUOTE
3010 characters have been removed, and any remaining QUOTE
3011 characters have been escaped.
3013 assert_is_leaf_string(string)
3015 RE_EVEN_BACKSLASHES = r"(?:(?<!\\)(?:\\\\)*)"
3016 naked_string = string[len(string_prefix) + 1 : -1]
3017 naked_string = re.sub(
3018 "(" + RE_EVEN_BACKSLASHES + ")" + QUOTE, r"\1\\" + QUOTE, naked_string
3022 # Holds the CustomSplit objects that will later be added to the custom
3026 # Temporary storage for the 'has_prefix' part of the CustomSplit objects.
3029 # Sets the 'prefix' variable. This is the prefix that the final merged
3031 next_str_idx = string_idx
3035 and is_valid_index(next_str_idx)
3036 and LL[next_str_idx].type == token.STRING
3038 prefix = get_string_prefix(LL[next_str_idx].value)
3041 # The next loop merges the string group. The final string will be
3044 # The following convenience variables are used:
3049 # NSS: naked next string
3053 next_str_idx = string_idx
3054 while is_valid_index(next_str_idx) and LL[next_str_idx].type == token.STRING:
3057 SS = LL[next_str_idx].value
3058 next_prefix = get_string_prefix(SS)
3060 # If this is an f-string group but this substring is not prefixed
3062 if "f" in prefix and "f" not in next_prefix:
3063 # Then we must escape any braces contained in this substring.
3064 SS = re.subf(r"(\{|\})", "{1}{1}", SS)
3066 NSS = make_naked(SS, next_prefix)
3068 has_prefix = bool(next_prefix)
3069 prefix_tracker.append(has_prefix)
3071 S = prefix + QUOTE + NS + NSS + BREAK_MARK + QUOTE
3072 NS = make_naked(S, prefix)
3076 S_leaf = Leaf(token.STRING, S)
3077 if self.normalize_strings:
3078 normalize_string_quotes(S_leaf)
3080 # Fill the 'custom_splits' list with the appropriate CustomSplit objects.
3081 temp_string = S_leaf.value[len(prefix) + 1 : -1]
3082 for has_prefix in prefix_tracker:
3083 mark_idx = temp_string.find(BREAK_MARK)
3086 ), "Logic error while filling the custom string breakpoint cache."
3088 temp_string = temp_string[mark_idx + len(BREAK_MARK) :]
3089 breakpoint_idx = mark_idx + (len(prefix) if has_prefix else 0) + 1
3090 custom_splits.append(CustomSplit(has_prefix, breakpoint_idx))
3092 string_leaf = Leaf(token.STRING, S_leaf.value.replace(BREAK_MARK, ""))
3094 if atom_node is not None:
3095 replace_child(atom_node, string_leaf)
3097 # Build the final line ('new_line') that this method will later return.
3098 new_line = line.clone()
3099 for (i, leaf) in enumerate(LL):
3101 new_line.append(string_leaf)
3103 if string_idx <= i < string_idx + num_of_strings:
3104 for comment_leaf in line.comments_after(LL[i]):
3105 new_line.append(comment_leaf, preformatted=True)
3108 append_leaves(new_line, line, [leaf])
3110 self.add_custom_splits(string_leaf.value, custom_splits)
3114 def __validate_msg(line: Line, string_idx: int) -> TResult[None]:
3115 """Validate (M)erge (S)tring (G)roup
3117 Transform-time string validation logic for __merge_string_group(...).
3120 * Ok(None), if ALL validation checks (listed below) pass.
3122 * Err(CannotTransform), if any of the following are true:
3123 - The target string is not in a string group (i.e. it has no
3125 - The string group has more than one inline comment.
3126 - The string group has an inline comment that appears to be a pragma.
3127 - The set of all string prefixes in the string group is of
3128 length greater than one and is not equal to {"", "f"}.
3129 - The string group consists of raw strings.
3131 num_of_inline_string_comments = 0
3132 set_of_prefixes = set()
3134 for leaf in line.leaves[string_idx:]:
3135 if leaf.type != token.STRING:
3136 # If the string group is trailed by a comma, we count the
3137 # comments trailing the comma to be one of the string group's
3139 if leaf.type == token.COMMA and id(leaf) in line.comments:
3140 num_of_inline_string_comments += 1
3143 if has_triple_quotes(leaf.value):
3144 return TErr("StringMerger does NOT merge multiline strings.")
3147 prefix = get_string_prefix(leaf.value)
3149 return TErr("StringMerger does NOT merge raw strings.")
3151 set_of_prefixes.add(prefix)
3153 if id(leaf) in line.comments:
3154 num_of_inline_string_comments += 1
3155 if contains_pragma_comment(line.comments[id(leaf)]):
3156 return TErr("Cannot merge strings which have pragma comments.")
3158 if num_of_strings < 2:
3160 f"Not enough strings to merge (num_of_strings={num_of_strings})."
3163 if num_of_inline_string_comments > 1:
3165 f"Too many inline string comments ({num_of_inline_string_comments})."
3168 if len(set_of_prefixes) > 1 and set_of_prefixes != {"", "f"}:
3169 return TErr(f"Too many different prefixes ({set_of_prefixes}).")
3174 class StringParenStripper(StringTransformer):
3175 """StringTransformer that strips surrounding parentheses from strings.
3178 The line contains a string which is surrounded by parentheses and:
3179 - The target string is NOT the only argument to a function call).
3180 - The RPAR is NOT followed by an attribute access (i.e. a dot).
3183 The parentheses mentioned in the 'Requirements' section are stripped.
3186 StringParenStripper has its own inherent usefulness, but it is also
3187 relied on to clean up the parentheses created by StringParenWrapper (in
3188 the event that they are no longer needed).
3191 def do_match(self, line: Line) -> TMatchResult:
3194 is_valid_index = is_valid_index_factory(LL)
3196 for (idx, leaf) in enumerate(LL):
3197 # Should be a string...
3198 if leaf.type != token.STRING:
3201 # Should be preceded by a non-empty LPAR...
3203 not is_valid_index(idx - 1)
3204 or LL[idx - 1].type != token.LPAR
3205 or is_empty_lpar(LL[idx - 1])
3209 # That LPAR should NOT be preceded by a function name or a closing
3210 # bracket (which could be a function which returns a function or a
3211 # list/dictionary that contains a function)...
3212 if is_valid_index(idx - 2) and (
3213 LL[idx - 2].type == token.NAME or LL[idx - 2].type in CLOSING_BRACKETS
3219 # Skip the string trailer, if one exists.
3220 string_parser = StringParser()
3221 next_idx = string_parser.parse(LL, string_idx)
3223 # Should be followed by a non-empty RPAR...
3225 is_valid_index(next_idx)
3226 and LL[next_idx].type == token.RPAR
3227 and not is_empty_rpar(LL[next_idx])
3229 # That RPAR should NOT be followed by a '.' symbol.
3230 if is_valid_index(next_idx + 1) and LL[next_idx + 1].type == token.DOT:
3233 return Ok(string_idx)
3235 return TErr("This line has no strings wrapped in parens.")
3237 def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
3240 string_parser = StringParser()
3241 rpar_idx = string_parser.parse(LL, string_idx)
3243 for leaf in (LL[string_idx - 1], LL[rpar_idx]):
3244 if line.comments_after(leaf):
3246 "Will not strip parentheses which have comments attached to them."
3249 new_line = line.clone()
3250 new_line.comments = line.comments.copy()
3252 append_leaves(new_line, line, LL[: string_idx - 1])
3254 string_leaf = Leaf(token.STRING, LL[string_idx].value)
3255 LL[string_idx - 1].remove()
3256 replace_child(LL[string_idx], string_leaf)
3257 new_line.append(string_leaf)
3260 new_line, line, LL[string_idx + 1 : rpar_idx] + LL[rpar_idx + 1 :],
3263 LL[rpar_idx].remove()
3268 class BaseStringSplitter(StringTransformer):
3270 Abstract class for StringTransformers which transform a Line's strings by splitting
3271 them or placing them on their own lines where necessary to avoid going over
3272 the configured line length.
3275 * The target string value is responsible for the line going over the
3276 line length limit. It follows that after all of black's other line
3277 split methods have been exhausted, this line (or one of the resulting
3278 lines after all line splits are performed) would still be over the
3279 line_length limit unless we split this string.
3281 * The target string is NOT a "pointless" string (i.e. a string that has
3282 no parent or siblings).
3284 * The target string is not followed by an inline comment that appears
3287 * The target string is not a multiline (i.e. triple-quote) string.
3291 def do_splitter_match(self, line: Line) -> TMatchResult:
3293 BaseStringSplitter asks its clients to override this method instead of
3294 `StringTransformer.do_match(...)`.
3296 Follows the same protocol as `StringTransformer.do_match(...)`.
3298 Refer to `help(StringTransformer.do_match)` for more information.
3301 def do_match(self, line: Line) -> TMatchResult:
3302 match_result = self.do_splitter_match(line)
3303 if isinstance(match_result, Err):
3306 string_idx = match_result.ok()
3307 vresult = self.__validate(line, string_idx)
3308 if isinstance(vresult, Err):
3313 def __validate(self, line: Line, string_idx: int) -> TResult[None]:
3315 Checks that @line meets all of the requirements listed in this classes'
3316 docstring. Refer to `help(BaseStringSplitter)` for a detailed
3317 description of those requirements.
3320 * Ok(None), if ALL of the requirements are met.
3322 * Err(CannotTransform), if ANY of the requirements are NOT met.
3326 string_leaf = LL[string_idx]
3328 max_string_length = self.__get_max_string_length(line, string_idx)
3329 if len(string_leaf.value) <= max_string_length:
3331 "The string itself is not what is causing this line to be too long."
3334 if not string_leaf.parent or [L.type for L in string_leaf.parent.children] == [
3339 f"This string ({string_leaf.value}) appears to be pointless (i.e. has"
3343 if id(line.leaves[string_idx]) in line.comments and contains_pragma_comment(
3344 line.comments[id(line.leaves[string_idx])]
3347 "Line appears to end with an inline pragma comment. Splitting the line"
3348 " could modify the pragma's behavior."
3351 if has_triple_quotes(string_leaf.value):
3352 return TErr("We cannot split multiline strings.")
3356 def __get_max_string_length(self, line: Line, string_idx: int) -> int:
3358 Calculates the max string length used when attempting to determine
3359 whether or not the target string is responsible for causing the line to
3360 go over the line length limit.
3362 WARNING: This method is tightly coupled to both StringSplitter and
3363 (especially) StringParenWrapper. There is probably a better way to
3364 accomplish what is being done here.
3367 max_string_length: such that `line.leaves[string_idx].value >
3368 max_string_length` implies that the target string IS responsible
3369 for causing this line to exceed the line length limit.
3373 is_valid_index = is_valid_index_factory(LL)
3375 # We use the shorthand "WMA4" in comments to abbreviate "We must
3376 # account for". When giving examples, we use STRING to mean some/any
3379 # Finally, we use the following convenience variables:
3381 # P: The leaf that is before the target string leaf.
3382 # N: The leaf that is after the target string leaf.
3383 # NN: The leaf that is after N.
3385 # WMA4 the whitespace at the beginning of the line.
3386 offset = line.depth * 4
3388 if is_valid_index(string_idx - 1):
3389 p_idx = string_idx - 1
3391 LL[string_idx - 1].type == token.LPAR
3392 and LL[string_idx - 1].value == ""
3395 # If the previous leaf is an empty LPAR placeholder, we should skip it.
3399 if P.type == token.PLUS:
3400 # WMA4 a space and a '+' character (e.g. `+ STRING`).
3403 if P.type == token.COMMA:
3404 # WMA4 a space, a comma, and a closing bracket [e.g. `), STRING`].
3407 if P.type in [token.COLON, token.EQUAL, token.NAME]:
3408 # This conditional branch is meant to handle dictionary keys,
3409 # variable assignments, 'return STRING' statement lines, and
3410 # 'else STRING' ternary expression lines.
3412 # WMA4 a single space.
3415 # WMA4 the lengths of any leaves that came before that space.
3416 for leaf in LL[: p_idx + 1]:
3417 offset += len(str(leaf))
3419 if is_valid_index(string_idx + 1):
3420 N = LL[string_idx + 1]
3421 if N.type == token.RPAR and N.value == "" and len(LL) > string_idx + 2:
3422 # If the next leaf is an empty RPAR placeholder, we should skip it.
3423 N = LL[string_idx + 2]
3425 if N.type == token.COMMA:
3426 # WMA4 a single comma at the end of the string (e.g `STRING,`).
3429 if is_valid_index(string_idx + 2):
3430 NN = LL[string_idx + 2]
3432 if N.type == token.DOT and NN.type == token.NAME:
3433 # This conditional branch is meant to handle method calls invoked
3434 # off of a string literal up to and including the LPAR character.
3436 # WMA4 the '.' character.
3440 is_valid_index(string_idx + 3)
3441 and LL[string_idx + 3].type == token.LPAR
3443 # WMA4 the left parenthesis character.
3446 # WMA4 the length of the method's name.
3447 offset += len(NN.value)
3449 has_comments = False
3450 for comment_leaf in line.comments_after(LL[string_idx]):
3451 if not has_comments:
3453 # WMA4 two spaces before the '#' character.
3456 # WMA4 the length of the inline comment.
3457 offset += len(comment_leaf.value)
3459 max_string_length = self.line_length - offset
3460 return max_string_length
3463 class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
3465 StringTransformer that splits "atom" strings (i.e. strings which exist on
3466 lines by themselves).
3469 * The line consists ONLY of a single string (with the exception of a
3470 '+' symbol which MAY exist at the start of the line), MAYBE a string
3471 trailer, and MAYBE a trailing comma.
3473 * All of the requirements listed in BaseStringSplitter's docstring.
3476 The string mentioned in the 'Requirements' section is split into as
3477 many substrings as necessary to adhere to the configured line length.
3479 In the final set of substrings, no substring should be smaller than
3480 MIN_SUBSTR_SIZE characters.
3482 The string will ONLY be split on spaces (i.e. each new substring should
3483 start with a space).
3485 If the string is an f-string, it will NOT be split in the middle of an
3486 f-expression (e.g. in f"FooBar: {foo() if x else bar()}", {foo() if x
3487 else bar()} is an f-expression).
3489 If the string that is being split has an associated set of custom split
3490 records and those custom splits will NOT result in any line going over
3491 the configured line length, those custom splits are used. Otherwise the
3492 string is split as late as possible (from left-to-right) while still
3493 adhering to the transformation rules listed above.
3496 StringSplitter relies on StringMerger to construct the appropriate
3497 CustomSplit objects and add them to the custom split map.
3501 # Matches an "f-expression" (e.g. {var}) that might be found in an f-string.
3509 (?<!\})(?:\}\})*\}(?!\})
3512 def do_splitter_match(self, line: Line) -> TMatchResult:
3515 is_valid_index = is_valid_index_factory(LL)
3519 # The first leaf MAY be a '+' symbol...
3520 if is_valid_index(idx) and LL[idx].type == token.PLUS:
3523 # The next/first leaf MAY be an empty LPAR...
3524 if is_valid_index(idx) and is_empty_lpar(LL[idx]):
3527 # The next/first leaf MUST be a string...
3528 if not is_valid_index(idx) or LL[idx].type != token.STRING:
3529 return TErr("Line does not start with a string.")
3533 # Skip the string trailer, if one exists.
3534 string_parser = StringParser()
3535 idx = string_parser.parse(LL, string_idx)
3537 # That string MAY be followed by an empty RPAR...
3538 if is_valid_index(idx) and is_empty_rpar(LL[idx]):
3541 # That string / empty RPAR leaf MAY be followed by a comma...
3542 if is_valid_index(idx) and LL[idx].type == token.COMMA:
3545 # But no more leaves are allowed...
3546 if is_valid_index(idx):
3547 return TErr("This line does not end with a string.")
3549 return Ok(string_idx)
3551 def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
3554 QUOTE = LL[string_idx].value[-1]
3556 is_valid_index = is_valid_index_factory(LL)
3557 insert_str_child = insert_str_child_factory(LL[string_idx])
3559 prefix = get_string_prefix(LL[string_idx].value)
3561 # We MAY choose to drop the 'f' prefix from substrings that don't
3562 # contain any f-expressions, but ONLY if the original f-string
3563 # containes at least one f-expression. Otherwise, we will alter the AST
3565 drop_pointless_f_prefix = ("f" in prefix) and re.search(
3566 self.RE_FEXPR, LL[string_idx].value, re.VERBOSE
3569 first_string_line = True
3570 starts_with_plus = LL[0].type == token.PLUS
3572 def line_needs_plus() -> bool:
3573 return first_string_line and starts_with_plus
3575 def maybe_append_plus(new_line: Line) -> None:
3578 If @line starts with a plus and this is the first line we are
3579 constructing, this function appends a PLUS leaf to @new_line
3580 and replaces the old PLUS leaf in the node structure. Otherwise
3581 this function does nothing.
3583 if line_needs_plus():
3584 plus_leaf = Leaf(token.PLUS, "+")
3585 replace_child(LL[0], plus_leaf)
3586 new_line.append(plus_leaf)
3589 is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA
3592 def max_last_string() -> int:
3595 The max allowed length of the string value used for the last
3596 line we will construct.
3598 result = self.line_length
3599 result -= line.depth * 4
3600 result -= 1 if ends_with_comma else 0
3601 result -= 2 if line_needs_plus() else 0
3604 # --- Calculate Max Break Index (for string value)
3605 # We start with the line length limit
3606 max_break_idx = self.line_length
3607 # The last index of a string of length N is N-1.
3609 # Leading whitespace is not present in the string value (e.g. Leaf.value).
3610 max_break_idx -= line.depth * 4
3611 if max_break_idx < 0:
3613 f"Unable to split {LL[string_idx].value} at such high of a line depth:"
3618 # Check if StringMerger registered any custom splits.
3619 custom_splits = self.pop_custom_splits(LL[string_idx].value)
3620 # We use them ONLY if none of them would produce lines that exceed the
3622 use_custom_breakpoints = bool(
3624 and all(csplit.break_idx <= max_break_idx for csplit in custom_splits)
3627 # Temporary storage for the remaining chunk of the string line that
3628 # can't fit onto the line currently being constructed.
3629 rest_value = LL[string_idx].value
3631 def more_splits_should_be_made() -> bool:
3634 True iff `rest_value` (the remaining string value from the last
3635 split), should be split again.
3637 if use_custom_breakpoints:
3638 return len(custom_splits) > 1
3640 return len(rest_value) > max_last_string()
3642 string_line_results: List[Ok[Line]] = []
3643 while more_splits_should_be_made():
3644 if use_custom_breakpoints:
3645 # Custom User Split (manual)
3646 csplit = custom_splits.pop(0)
3647 break_idx = csplit.break_idx
3649 # Algorithmic Split (automatic)
3650 max_bidx = max_break_idx - 2 if line_needs_plus() else max_break_idx
3651 maybe_break_idx = self.__get_break_idx(rest_value, max_bidx)
3652 if maybe_break_idx is None:
3653 # If we are unable to algorthmically determine a good split
3654 # and this string has custom splits registered to it, we
3655 # fall back to using them--which means we have to start
3656 # over from the beginning.
3658 rest_value = LL[string_idx].value
3659 string_line_results = []
3660 first_string_line = True
3661 use_custom_breakpoints = True
3664 # Otherwise, we stop splitting here.
3667 break_idx = maybe_break_idx
3669 # --- Construct `next_value`
3670 next_value = rest_value[:break_idx] + QUOTE
3672 # Are we allowed to try to drop a pointless 'f' prefix?
3673 drop_pointless_f_prefix
3674 # If we are, will we be successful?
3675 and next_value != self.__normalize_f_string(next_value, prefix)
3677 # If the current custom split did NOT originally use a prefix,
3678 # then `csplit.break_idx` will be off by one after removing
3682 if use_custom_breakpoints and not csplit.has_prefix
3685 next_value = rest_value[:break_idx] + QUOTE
3686 next_value = self.__normalize_f_string(next_value, prefix)
3688 # --- Construct `next_leaf`
3689 next_leaf = Leaf(token.STRING, next_value)
3690 insert_str_child(next_leaf)
3691 self.__maybe_normalize_string_quotes(next_leaf)
3693 # --- Construct `next_line`
3694 next_line = line.clone()
3695 maybe_append_plus(next_line)
3696 next_line.append(next_leaf)
3697 string_line_results.append(Ok(next_line))
3699 rest_value = prefix + QUOTE + rest_value[break_idx:]
3700 first_string_line = False
3702 yield from string_line_results
3704 if drop_pointless_f_prefix:
3705 rest_value = self.__normalize_f_string(rest_value, prefix)
3707 rest_leaf = Leaf(token.STRING, rest_value)
3708 insert_str_child(rest_leaf)
3710 # NOTE: I could not find a test case that verifies that the following
3711 # line is actually necessary, but it seems to be. Otherwise we risk
3712 # not normalizing the last substring, right?
3713 self.__maybe_normalize_string_quotes(rest_leaf)
3715 last_line = line.clone()
3716 maybe_append_plus(last_line)
3718 # If there are any leaves to the right of the target string...
3719 if is_valid_index(string_idx + 1):
3720 # We use `temp_value` here to determine how long the last line
3721 # would be if we were to append all the leaves to the right of the
3722 # target string to the last string line.
3723 temp_value = rest_value
3724 for leaf in LL[string_idx + 1 :]:
3725 temp_value += str(leaf)
3726 if leaf.type == token.LPAR:
3729 # Try to fit them all on the same line with the last substring...
3731 len(temp_value) <= max_last_string()
3732 or LL[string_idx + 1].type == token.COMMA
3734 last_line.append(rest_leaf)
3735 append_leaves(last_line, line, LL[string_idx + 1 :])
3737 # Otherwise, place the last substring on one line and everything
3738 # else on a line below that...
3740 last_line.append(rest_leaf)
3743 non_string_line = line.clone()
3744 append_leaves(non_string_line, line, LL[string_idx + 1 :])
3745 yield Ok(non_string_line)
3746 # Else the target string was the last leaf...
3748 last_line.append(rest_leaf)
3749 last_line.comments = line.comments.copy()
3752 def __get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
3754 This method contains the algorithm that StringSplitter uses to
3755 determine which character to split each string at.
3758 @string: The substring that we are attempting to split.
3759 @max_break_idx: The ideal break index. We will return this value if it
3760 meets all the necessary conditions. In the likely event that it
3761 doesn't we will try to find the closest index BELOW @max_break_idx
3762 that does. If that fails, we will expand our search by also
3763 considering all valid indices ABOVE @max_break_idx.
3766 * assert_is_leaf_string(@string)
3767 * 0 <= @max_break_idx < len(@string)
3770 break_idx, if an index is able to be found that meets all of the
3771 conditions listed in the 'Transformations' section of this classes'
3776 is_valid_index = is_valid_index_factory(string)
3778 assert is_valid_index(max_break_idx)
3779 assert_is_leaf_string(string)
3781 _fexpr_slices: Optional[List[Tuple[Index, Index]]] = None
3783 def fexpr_slices() -> Iterator[Tuple[Index, Index]]:
3786 All ranges of @string which, if @string were to be split there,
3787 would result in the splitting of an f-expression (which is NOT
3790 nonlocal _fexpr_slices
3792 if _fexpr_slices is None:
3794 for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE):
3795 _fexpr_slices.append(match.span())
3797 yield from _fexpr_slices
3799 is_fstring = "f" in get_string_prefix(string)
3801 def breaks_fstring_expression(i: Index) -> bool:
3804 True iff returning @i would result in the splitting of an
3805 f-expression (which is NOT allowed).
3810 for (start, end) in fexpr_slices():
3811 if start <= i < end:
3816 def passes_all_checks(i: Index) -> bool:
3819 True iff ALL of the conditions listed in the 'Transformations'
3820 section of this classes' docstring would be be met by returning @i.
3822 is_space = string[i] == " "
3824 len(string[i:]) >= self.MIN_SUBSTR_SIZE
3825 and len(string[:i]) >= self.MIN_SUBSTR_SIZE
3827 return is_space and is_big_enough and not breaks_fstring_expression(i)
3829 # First, we check all indices BELOW @max_break_idx.
3830 break_idx = max_break_idx
3831 while is_valid_index(break_idx - 1) and not passes_all_checks(break_idx):
3834 if not passes_all_checks(break_idx):
3835 # If that fails, we check all indices ABOVE @max_break_idx.
3837 # If we are able to find a valid index here, the next line is going
3838 # to be longer than the specified line length, but it's probably
3839 # better than doing nothing at all.
3840 break_idx = max_break_idx + 1
3841 while is_valid_index(break_idx + 1) and not passes_all_checks(break_idx):
3844 if not is_valid_index(break_idx) or not passes_all_checks(break_idx):
3849 def __maybe_normalize_string_quotes(self, leaf: Leaf) -> None:
3850 if self.normalize_strings:
3851 normalize_string_quotes(leaf)
3853 def __normalize_f_string(self, string: str, prefix: str) -> str:
3856 * assert_is_leaf_string(@string)
3859 * If @string is an f-string that contains no f-expressions, we
3860 return a string identical to @string except that the 'f' prefix
3861 has been stripped and all double braces (i.e. '{{' or '}}') have
3862 been normalized (i.e. turned into '{' or '}').
3864 * Otherwise, we return @string.
3866 assert_is_leaf_string(string)
3868 if "f" in prefix and not re.search(self.RE_FEXPR, string, re.VERBOSE):
3869 new_prefix = prefix.replace("f", "")
3871 temp = string[len(prefix) :]
3872 temp = re.sub(r"\{\{", "{", temp)
3873 temp = re.sub(r"\}\}", "}", temp)
3876 return f"{new_prefix}{new_string}"
3881 class StringParenWrapper(CustomSplitMapMixin, BaseStringSplitter):
3883 StringTransformer that splits non-"atom" strings (i.e. strings that do not
3884 exist on lines by themselves).
3887 All of the requirements listed in BaseStringSplitter's docstring in
3888 addition to the requirements listed below:
3890 * The line is a return/yield statement, which returns/yields a string.
3892 * The line is part of a ternary expression (e.g. `x = y if cond else
3893 z`) such that the line starts with `else <string>`, where <string> is
3896 * The line is an assert statement, which ends with a string.
3898 * The line is an assignment statement (e.g. `x = <string>` or `x +=
3899 <string>`) such that the variable is being assigned the value of some
3902 * The line is a dictionary key assignment where some valid key is being
3903 assigned the value of some string.
3906 The chosen string is wrapped in parentheses and then split at the LPAR.
3908 We then have one line which ends with an LPAR and another line that
3909 starts with the chosen string. The latter line is then split again at
3910 the RPAR. This results in the RPAR (and possibly a trailing comma)
3911 being placed on its own line.
3913 NOTE: If any leaves exist to the right of the chosen string (except
3914 for a trailing comma, which would be placed after the RPAR), those
3915 leaves are placed inside the parentheses. In effect, the chosen
3916 string is not necessarily being "wrapped" by parentheses. We can,
3917 however, count on the LPAR being placed directly before the chosen
3920 In other words, StringParenWrapper creates "atom" strings. These
3921 can then be split again by StringSplitter, if necessary.
3924 In the event that a string line split by StringParenWrapper is
3925 changed such that it no longer needs to be given its own line,
3926 StringParenWrapper relies on StringParenStripper to clean up the
3927 parentheses it created.
3930 def do_splitter_match(self, line: Line) -> TMatchResult:
3934 string_idx = string_idx or self._return_match(LL)
3935 string_idx = string_idx or self._else_match(LL)
3936 string_idx = string_idx or self._assert_match(LL)
3937 string_idx = string_idx or self._assign_match(LL)
3938 string_idx = string_idx or self._dict_match(LL)
3940 if string_idx is not None:
3941 string_value = line.leaves[string_idx].value
3942 # If the string has no spaces...
3943 if " " not in string_value:
3944 # And will still violate the line length limit when split...
3945 max_string_length = self.line_length - ((line.depth + 1) * 4)
3946 if len(string_value) > max_string_length:
3947 # And has no associated custom splits...
3948 if not self.has_custom_splits(string_value):
3949 # Then we should NOT put this string on its own line.
3951 "We do not wrap long strings in parentheses when the"
3952 " resultant line would still be over the specified line"
3953 " length and can't be split further by StringSplitter."
3955 return Ok(string_idx)
3957 return TErr("This line does not contain any non-atomic strings.")
3960 def _return_match(LL: List[Leaf]) -> Optional[int]:
3963 string_idx such that @LL[string_idx] is equal to our target (i.e.
3964 matched) string, if this line matches the return/yield statement
3965 requirements listed in the 'Requirements' section of this classes'
3970 # If this line is apart of a return/yield statement and the first leaf
3971 # contains either the "return" or "yield" keywords...
3972 if parent_type(LL[0]) in [syms.return_stmt, syms.yield_expr] and LL[
3974 ].value in ["return", "yield"]:
3975 is_valid_index = is_valid_index_factory(LL)
3977 idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
3978 # The next visible leaf MUST contain a string...
3979 if is_valid_index(idx) and LL[idx].type == token.STRING:
3985 def _else_match(LL: List[Leaf]) -> Optional[int]:
3988 string_idx such that @LL[string_idx] is equal to our target (i.e.
3989 matched) string, if this line matches the ternary expression
3990 requirements listed in the 'Requirements' section of this classes'
3995 # If this line is apart of a ternary expression and the first leaf
3996 # contains the "else" keyword...
3998 parent_type(LL[0]) == syms.test
3999 and LL[0].type == token.NAME
4000 and LL[0].value == "else"
4002 is_valid_index = is_valid_index_factory(LL)
4004 idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
4005 # The next visible leaf MUST contain a string...
4006 if is_valid_index(idx) and LL[idx].type == token.STRING:
4012 def _assert_match(LL: List[Leaf]) -> Optional[int]:
4015 string_idx such that @LL[string_idx] is equal to our target (i.e.
4016 matched) string, if this line matches the assert statement
4017 requirements listed in the 'Requirements' section of this classes'
4022 # If this line is apart of an assert statement and the first leaf
4023 # contains the "assert" keyword...
4024 if parent_type(LL[0]) == syms.assert_stmt and LL[0].value == "assert":
4025 is_valid_index = is_valid_index_factory(LL)
4027 for (i, leaf) in enumerate(LL):
4028 # We MUST find a comma...
4029 if leaf.type == token.COMMA:
4030 idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
4032 # That comma MUST be followed by a string...
4033 if is_valid_index(idx) and LL[idx].type == token.STRING:
4036 # Skip the string trailer, if one exists.
4037 string_parser = StringParser()
4038 idx = string_parser.parse(LL, string_idx)
4040 # But no more leaves are allowed...
4041 if not is_valid_index(idx):
4047 def _assign_match(LL: List[Leaf]) -> Optional[int]:
4050 string_idx such that @LL[string_idx] is equal to our target (i.e.
4051 matched) string, if this line matches the assignment statement
4052 requirements listed in the 'Requirements' section of this classes'
4057 # If this line is apart of an expression statement or is a function
4058 # argument AND the first leaf contains a variable name...
4060 parent_type(LL[0]) in [syms.expr_stmt, syms.argument, syms.power]
4061 and LL[0].type == token.NAME
4063 is_valid_index = is_valid_index_factory(LL)
4065 for (i, leaf) in enumerate(LL):
4066 # We MUST find either an '=' or '+=' symbol...
4067 if leaf.type in [token.EQUAL, token.PLUSEQUAL]:
4068 idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
4070 # That symbol MUST be followed by a string...
4071 if is_valid_index(idx) and LL[idx].type == token.STRING:
4074 # Skip the string trailer, if one exists.
4075 string_parser = StringParser()
4076 idx = string_parser.parse(LL, string_idx)
4078 # The next leaf MAY be a comma iff this line is apart
4079 # of a function argument...
4081 parent_type(LL[0]) == syms.argument
4082 and is_valid_index(idx)
4083 and LL[idx].type == token.COMMA
4087 # But no more leaves are allowed...
4088 if not is_valid_index(idx):
4094 def _dict_match(LL: List[Leaf]) -> Optional[int]:
4097 string_idx such that @LL[string_idx] is equal to our target (i.e.
4098 matched) string, if this line matches the dictionary key assignment
4099 statement requirements listed in the 'Requirements' section of this
4104 # If this line is apart of a dictionary key assignment...
4105 if syms.dictsetmaker in [parent_type(LL[0]), parent_type(LL[0].parent)]:
4106 is_valid_index = is_valid_index_factory(LL)
4108 for (i, leaf) in enumerate(LL):
4109 # We MUST find a colon...
4110 if leaf.type == token.COLON:
4111 idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
4113 # That colon MUST be followed by a string...
4114 if is_valid_index(idx) and LL[idx].type == token.STRING:
4117 # Skip the string trailer, if one exists.
4118 string_parser = StringParser()
4119 idx = string_parser.parse(LL, string_idx)
4121 # That string MAY be followed by a comma...
4122 if is_valid_index(idx) and LL[idx].type == token.COMMA:
4125 # But no more leaves are allowed...
4126 if not is_valid_index(idx):
4131 def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
4134 is_valid_index = is_valid_index_factory(LL)
4135 insert_str_child = insert_str_child_factory(LL[string_idx])
4137 comma_idx = len(LL) - 1
4138 ends_with_comma = False
4139 if LL[comma_idx].type == token.COMMA:
4140 ends_with_comma = True
4142 leaves_to_steal_comments_from = [LL[string_idx]]
4144 leaves_to_steal_comments_from.append(LL[comma_idx])
4147 first_line = line.clone()
4148 left_leaves = LL[:string_idx]
4150 # We have to remember to account for (possibly invisible) LPAR and RPAR
4151 # leaves that already wrapped the target string. If these leaves do
4152 # exist, we will replace them with our own LPAR and RPAR leaves.
4153 old_parens_exist = False
4154 if left_leaves and left_leaves[-1].type == token.LPAR:
4155 old_parens_exist = True
4156 leaves_to_steal_comments_from.append(left_leaves[-1])
4159 append_leaves(first_line, line, left_leaves)
4161 lpar_leaf = Leaf(token.LPAR, "(")
4162 if old_parens_exist:
4163 replace_child(LL[string_idx - 1], lpar_leaf)
4165 insert_str_child(lpar_leaf)
4166 first_line.append(lpar_leaf)
4168 # We throw inline comments that were originally to the right of the
4169 # target string to the top line. They will now be shown to the right of
4171 for leaf in leaves_to_steal_comments_from:
4172 for comment_leaf in line.comments_after(leaf):
4173 first_line.append(comment_leaf, preformatted=True)
4175 yield Ok(first_line)
4177 # --- Middle (String) Line
4178 # We only need to yield one (possibly too long) string line, since the
4179 # `StringSplitter` will break it down further if necessary.
4180 string_value = LL[string_idx].value
4182 depth=line.depth + 1,
4183 inside_brackets=True,
4184 should_explode=line.should_explode,
4186 string_leaf = Leaf(token.STRING, string_value)
4187 insert_str_child(string_leaf)
4188 string_line.append(string_leaf)
4190 old_rpar_leaf = None
4191 if is_valid_index(string_idx + 1):
4192 right_leaves = LL[string_idx + 1 :]
4196 if old_parens_exist:
4198 right_leaves and right_leaves[-1].type == token.RPAR
4199 ), "Apparently, old parentheses do NOT exist?!"
4200 old_rpar_leaf = right_leaves.pop()
4202 append_leaves(string_line, line, right_leaves)
4204 yield Ok(string_line)
4207 last_line = line.clone()
4208 last_line.bracket_tracker = first_line.bracket_tracker
4210 new_rpar_leaf = Leaf(token.RPAR, ")")
4211 if old_rpar_leaf is not None:
4212 replace_child(old_rpar_leaf, new_rpar_leaf)
4214 insert_str_child(new_rpar_leaf)
4215 last_line.append(new_rpar_leaf)
4217 # If the target string ended with a comma, we place this comma to the
4218 # right of the RPAR on the last line.
4220 comma_leaf = Leaf(token.COMMA, ",")
4221 replace_child(LL[comma_idx], comma_leaf)
4222 last_line.append(comma_leaf)
4229 A state machine that aids in parsing a string's "trailer", which can be
4230 either non-existant, an old-style formatting sequence (e.g. `% varX` or `%
4231 (varX, varY)`), or a method-call / attribute access (e.g. `.format(varX,
4234 NOTE: A new StringParser object MUST be instantiated for each string
4235 trailer we need to parse.
4238 We shall assume that `line` equals the `Line` object that corresponds
4239 to the following line of python code:
4241 x = "Some {}.".format("String") + some_other_string
4244 Furthermore, we will assume that `string_idx` is some index such that:
4246 assert line.leaves[string_idx].value == "Some {}."
4249 The following code snippet then holds:
4251 string_parser = StringParser()
4252 idx = string_parser.parse(line.leaves, string_idx)
4253 assert line.leaves[idx].type == token.PLUS
4259 # String Parser States
4269 # Lookup Table for Next State
4270 _goto: Dict[Tuple[ParserState, NodeType], ParserState] = {
4271 # A string trailer may start with '.' OR '%'.
4272 (START, token.DOT): DOT,
4273 (START, token.PERCENT): PERCENT,
4274 (START, DEFAULT_TOKEN): DONE,
4275 # A '.' MUST be followed by an attribute or method name.
4276 (DOT, token.NAME): NAME,
4277 # A method name MUST be followed by an '(', whereas an attribute name
4278 # is the last symbol in the string trailer.
4279 (NAME, token.LPAR): LPAR,
4280 (NAME, DEFAULT_TOKEN): DONE,
4281 # A '%' symbol can be followed by an '(' or a single argument (e.g. a
4282 # string or variable name).
4283 (PERCENT, token.LPAR): LPAR,
4284 (PERCENT, DEFAULT_TOKEN): SINGLE_FMT_ARG,
4285 # If a '%' symbol is followed by a single argument, that argument is
4286 # the last leaf in the string trailer.
4287 (SINGLE_FMT_ARG, DEFAULT_TOKEN): DONE,
4288 # If present, a ')' symbol is the last symbol in a string trailer.
4289 # (NOTE: LPARS and nested RPARS are not included in this lookup table,
4290 # since they are treated as a special case by the parsing logic in this
4291 # classes' implementation.)
4292 (RPAR, DEFAULT_TOKEN): DONE,
4295 def __init__(self) -> None:
4296 self._state = self.START
4297 self._unmatched_lpars = 0
4299 def parse(self, leaves: List[Leaf], string_idx: int) -> int:
4302 * @leaves[@string_idx].type == token.STRING
4305 The index directly after the last leaf which is apart of the string
4306 trailer, if a "trailer" exists.
4308 @string_idx + 1, if no string "trailer" exists.
4310 assert leaves[string_idx].type == token.STRING
4312 idx = string_idx + 1
4313 while idx < len(leaves) and self._next_state(leaves[idx]):
4317 def _next_state(self, leaf: Leaf) -> bool:
4320 * On the first call to this function, @leaf MUST be the leaf that
4321 was directly after the string leaf in question (e.g. if our target
4322 string is `line.leaves[i]` then the first call to this method must
4323 be `line.leaves[i + 1]`).
4324 * On the next call to this function, the leaf paramater passed in
4325 MUST be the leaf directly following @leaf.
4328 True iff @leaf is apart of the string's trailer.
4330 # We ignore empty LPAR or RPAR leaves.
4331 if is_empty_par(leaf):
4334 next_token = leaf.type
4335 if next_token == token.LPAR:
4336 self._unmatched_lpars += 1
4338 current_state = self._state
4340 # The LPAR parser state is a special case. We will return True until we
4341 # find the matching RPAR token.
4342 if current_state == self.LPAR:
4343 if next_token == token.RPAR:
4344 self._unmatched_lpars -= 1
4345 if self._unmatched_lpars == 0:
4346 self._state = self.RPAR
4347 # Otherwise, we use a lookup table to determine the next state.
4349 # If the lookup table matches the current state to the next
4350 # token, we use the lookup table.
4351 if (current_state, next_token) in self._goto:
4352 self._state = self._goto[current_state, next_token]
4354 # Otherwise, we check if a the current state was assigned a
4356 if (current_state, self.DEFAULT_TOKEN) in self._goto:
4357 self._state = self._goto[current_state, self.DEFAULT_TOKEN]
4358 # If no default has been assigned, then this parser has a logic
4361 raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
4363 if self._state == self.DONE:
4369 def TErr(err_msg: str) -> Err[CannotTransform]:
4372 Convenience function used when working with the TResult type.
4374 cant_transform = CannotTransform(err_msg)
4375 return Err(cant_transform)
4378 def contains_pragma_comment(comment_list: List[Leaf]) -> bool:
4381 True iff one of the comments in @comment_list is a pragma used by one
4382 of the more common static analysis tools for python (e.g. mypy, flake8,
4385 for comment in comment_list:
4386 if comment.value.startswith(("# type:", "# noqa", "# pylint:")):
4392 def insert_str_child_factory(string_leaf: Leaf) -> Callable[[LN], None]:
4394 Factory for a convenience function that is used to orphan @string_leaf
4395 and then insert multiple new leaves into the same part of the node
4396 structure that @string_leaf had originally occupied.
4399 Let `string_leaf = Leaf(token.STRING, '"foo"')` and `N =
4400 string_leaf.parent`. Assume the node `N` has the following
4407 Leaf(STRING, '"foo"'),
4411 We then run the code snippet shown below.
4413 insert_str_child = insert_str_child_factory(string_leaf)
4415 lpar = Leaf(token.LPAR, '(')
4416 insert_str_child(lpar)
4418 bar = Leaf(token.STRING, '"bar"')
4419 insert_str_child(bar)
4421 rpar = Leaf(token.RPAR, ')')
4422 insert_str_child(rpar)
4425 After which point, it follows that `string_leaf.parent is None` and
4426 the node `N` now has the following structure:
4433 Leaf(STRING, '"bar"'),
4438 string_parent = string_leaf.parent
4439 string_child_idx = string_leaf.remove()
4441 def insert_str_child(child: LN) -> None:
4442 nonlocal string_child_idx
4444 assert string_parent is not None
4445 assert string_child_idx is not None
4447 string_parent.insert_child(string_child_idx, child)
4448 string_child_idx += 1
4450 return insert_str_child
4453 def has_triple_quotes(string: str) -> bool:
4456 True iff @string starts with three quotation characters.
4458 raw_string = string.lstrip(STRING_PREFIX_CHARS)
4459 return raw_string[:3] in {'"""', "'''"}
4462 def parent_type(node: Optional[LN]) -> Optional[NodeType]:
4465 @node.parent.type, if @node is not None and has a parent.
4469 if node is None or node.parent is None:
4472 return node.parent.type
4475 def is_empty_par(leaf: Leaf) -> bool:
4476 return is_empty_lpar(leaf) or is_empty_rpar(leaf)
4479 def is_empty_lpar(leaf: Leaf) -> bool:
4480 return leaf.type == token.LPAR and leaf.value == ""
4483 def is_empty_rpar(leaf: Leaf) -> bool:
4484 return leaf.type == token.RPAR and leaf.value == ""
4487 def is_valid_index_factory(seq: Sequence[Any]) -> Callable[[int], bool]:
4493 is_valid_index = is_valid_index_factory(my_list)
4495 assert is_valid_index(0)
4496 assert is_valid_index(2)
4498 assert not is_valid_index(3)
4499 assert not is_valid_index(-1)
4503 def is_valid_index(idx: int) -> bool:
4506 True iff @idx is positive AND seq[@idx] does NOT raise an
4509 return 0 <= idx < len(seq)
4511 return is_valid_index
4514 def line_to_string(line: Line) -> str:
4515 """Returns the string representation of @line.
4517 WARNING: This is known to be computationally expensive.
4519 return str(line).strip("\n")
4522 def append_leaves(new_line: Line, old_line: Line, leaves: List[Leaf]) -> None:
4524 Append leaves (taken from @old_line) to @new_line, making sure to fix the
4525 underlying Node structure where appropriate.
4527 All of the leaves in @leaves are duplicated. The duplicates are then
4528 appended to @new_line and used to replace their originals in the underlying
4529 Node structure. Any comments attatched to the old leaves are reattached to
4533 set(@leaves) is a subset of set(@old_line.leaves).
4535 for old_leaf in leaves:
4536 assert old_leaf in old_line.leaves
4538 new_leaf = Leaf(old_leaf.type, old_leaf.value)
4539 replace_child(old_leaf, new_leaf)
4540 new_line.append(new_leaf)
4542 for comment_leaf in old_line.comments_after(old_leaf):
4543 new_line.append(comment_leaf, preformatted=True)
4546 def replace_child(old_child: LN, new_child: LN) -> None:
4549 * If @old_child.parent is set, replace @old_child with @new_child in
4550 @old_child's underlying Node structure.
4552 * Otherwise, this function does nothing.
4554 parent = old_child.parent
4558 child_idx = old_child.remove()
4559 if child_idx is not None:
4560 parent.insert_child(child_idx, new_child)
4563 def get_string_prefix(string: str) -> str:
4566 * assert_is_leaf_string(@string)
4569 @string's prefix (e.g. '', 'r', 'f', or 'rf').
4571 assert_is_leaf_string(string)
4575 while string[prefix_idx] in STRING_PREFIX_CHARS:
4576 prefix += string[prefix_idx].lower()
4582 def assert_is_leaf_string(string: str) -> None:
4584 Checks the pre-condition that @string has the format that you would expect
4585 of `leaf.value` where `leaf` is some Leaf such that `leaf.type ==
4586 token.STRING`. A more precise description of the pre-conditions that are
4587 checked are listed below.
4590 * @string starts with either ', ", <prefix>', or <prefix>" where
4591 `set(<prefix>)` is some subset of `set(STRING_PREFIX_CHARS)`.
4592 * @string ends with a quote character (' or ").
4595 AssertionError(...) if the pre-conditions listed above are not
4598 dquote_idx = string.find('"')
4599 squote_idx = string.find("'")
4600 if -1 in [dquote_idx, squote_idx]:
4601 quote_idx = max(dquote_idx, squote_idx)
4603 quote_idx = min(squote_idx, dquote_idx)
4606 0 <= quote_idx < len(string) - 1
4607 ), f"{string!r} is missing a starting quote character (' or \")."
4608 assert string[-1] in (
4611 ), f"{string!r} is missing an ending quote character (' or \")."
4612 assert set(string[:quote_idx]).issubset(
4613 set(STRING_PREFIX_CHARS)
4614 ), f"{set(string[:quote_idx])} is NOT a subset of {set(STRING_PREFIX_CHARS)}."
4617 def left_hand_split(line: Line, _features: Collection[Feature] = ()) -> Iterator[Line]:
4618 """Split line into many lines, starting with the first matching bracket pair.
4620 Note: this usually looks weird, only use this for function definitions.
4621 Prefer RHS otherwise. This is why this function is not symmetrical with
4622 :func:`right_hand_split` which also handles optional parentheses.
4624 tail_leaves: List[Leaf] = []
4625 body_leaves: List[Leaf] = []
4626 head_leaves: List[Leaf] = []
4627 current_leaves = head_leaves
4628 matching_bracket: Optional[Leaf] = None
4629 for leaf in line.leaves:
4631 current_leaves is body_leaves
4632 and leaf.type in CLOSING_BRACKETS
4633 and leaf.opening_bracket is matching_bracket
4635 current_leaves = tail_leaves if body_leaves else head_leaves
4636 current_leaves.append(leaf)
4637 if current_leaves is head_leaves:
4638 if leaf.type in OPENING_BRACKETS:
4639 matching_bracket = leaf
4640 current_leaves = body_leaves
4641 if not matching_bracket:
4642 raise CannotSplit("No brackets found")
4644 head = bracket_split_build_line(head_leaves, line, matching_bracket)
4645 body = bracket_split_build_line(body_leaves, line, matching_bracket, is_body=True)
4646 tail = bracket_split_build_line(tail_leaves, line, matching_bracket)
4647 bracket_split_succeeded_or_raise(head, body, tail)
4648 for result in (head, body, tail):
4653 def right_hand_split(
4656 features: Collection[Feature] = (),
4657 omit: Collection[LeafID] = (),
4658 ) -> Iterator[Line]:
4659 """Split line into many lines, starting with the last matching bracket pair.
4661 If the split was by optional parentheses, attempt splitting without them, too.
4662 `omit` is a collection of closing bracket IDs that shouldn't be considered for
4665 Note: running this function modifies `bracket_depth` on the leaves of `line`.
4667 tail_leaves: List[Leaf] = []
4668 body_leaves: List[Leaf] = []
4669 head_leaves: List[Leaf] = []
4670 current_leaves = tail_leaves
4671 opening_bracket: Optional[Leaf] = None
4672 closing_bracket: Optional[Leaf] = None
4673 for leaf in reversed(line.leaves):
4674 if current_leaves is body_leaves:
4675 if leaf is opening_bracket:
4676 current_leaves = head_leaves if body_leaves else tail_leaves
4677 current_leaves.append(leaf)
4678 if current_leaves is tail_leaves:
4679 if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
4680 opening_bracket = leaf.opening_bracket
4681 closing_bracket = leaf
4682 current_leaves = body_leaves
4683 if not (opening_bracket and closing_bracket and head_leaves):
4684 # If there is no opening or closing_bracket that means the split failed and
4685 # all content is in the tail. Otherwise, if `head_leaves` are empty, it means
4686 # the matching `opening_bracket` wasn't available on `line` anymore.
4687 raise CannotSplit("No brackets found")
4689 tail_leaves.reverse()
4690 body_leaves.reverse()
4691 head_leaves.reverse()
4692 head = bracket_split_build_line(head_leaves, line, opening_bracket)
4693 body = bracket_split_build_line(body_leaves, line, opening_bracket, is_body=True)
4694 tail = bracket_split_build_line(tail_leaves, line, opening_bracket)
4695 bracket_split_succeeded_or_raise(head, body, tail)
4697 # the body shouldn't be exploded
4698 not body.should_explode
4699 # the opening bracket is an optional paren
4700 and opening_bracket.type == token.LPAR
4701 and not opening_bracket.value
4702 # the closing bracket is an optional paren
4703 and closing_bracket.type == token.RPAR
4704 and not closing_bracket.value
4705 # it's not an import (optional parens are the only thing we can split on
4706 # in this case; attempting a split without them is a waste of time)
4707 and not line.is_import
4708 # there are no standalone comments in the body
4709 and not body.contains_standalone_comments(0)
4710 # and we can actually remove the parens
4711 and can_omit_invisible_parens(body, line_length)
4713 omit = {id(closing_bracket), *omit}
4715 yield from right_hand_split(line, line_length, features=features, omit=omit)
4721 or is_line_short_enough(body, line_length=line_length)
4724 "Splitting failed, body is still too long and can't be split."
4727 elif head.contains_multiline_strings() or tail.contains_multiline_strings():
4729 "The current optional pair of parentheses is bound to fail to"
4730 " satisfy the splitting algorithm because the head or the tail"
4731 " contains multiline strings which by definition never fit one"
4735 ensure_visible(opening_bracket)
4736 ensure_visible(closing_bracket)
4737 for result in (head, body, tail):
4742 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
4743 """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
4745 Do nothing otherwise.
4747 A left- or right-hand split is based on a pair of brackets. Content before
4748 (and including) the opening bracket is left on one line, content inside the
4749 brackets is put on a separate line, and finally content starting with and
4750 following the closing bracket is put on a separate line.
4752 Those are called `head`, `body`, and `tail`, respectively. If the split
4753 produced the same line (all content in `head`) or ended up with an empty `body`
4754 and the `tail` is just the closing bracket, then it's considered failed.
4756 tail_len = len(str(tail).strip())
4759 raise CannotSplit("Splitting brackets produced the same line")
4763 f"Splitting brackets on an empty body to save {tail_len} characters is"
4768 def bracket_split_build_line(
4769 leaves: List[Leaf], original: Line, opening_bracket: Leaf, *, is_body: bool = False
4771 """Return a new line with given `leaves` and respective comments from `original`.
4773 If `is_body` is True, the result line is one-indented inside brackets and as such
4774 has its first leaf's prefix normalized and a trailing comma added when expected.
4776 result = Line(depth=original.depth)
4778 result.inside_brackets = True
4781 # Since body is a new indent level, remove spurious leading whitespace.
4782 normalize_prefix(leaves[0], inside_brackets=True)
4783 # Ensure a trailing comma for imports and standalone function arguments, but
4784 # be careful not to add one after any comments or within type annotations.
4787 and opening_bracket.value == "("
4788 and not any(l.type == token.COMMA for l in leaves)
4791 if original.is_import or no_commas:
4792 for i in range(len(leaves) - 1, -1, -1):
4793 if leaves[i].type == STANDALONE_COMMENT:
4796 if leaves[i].type != token.COMMA:
4797 leaves.insert(i + 1, Leaf(token.COMMA, ","))
4802 result.append(leaf, preformatted=True)
4803 for comment_after in original.comments_after(leaf):
4804 result.append(comment_after, preformatted=True)
4806 result.should_explode = should_explode(result, opening_bracket)
4810 def dont_increase_indentation(split_func: Transformer) -> Transformer:
4811 """Normalize prefix of the first leaf in every line returned by `split_func`.
4813 This is a decorator over relevant split functions.
4817 def split_wrapper(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]:
4818 for l in split_func(line, features):
4819 normalize_prefix(l.leaves[0], inside_brackets=True)
4822 return split_wrapper
4825 @dont_increase_indentation
4826 def delimiter_split(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]:
4827 """Split according to delimiters of the highest priority.
4829 If the appropriate Features are given, the split will add trailing commas
4830 also in function signatures and calls that contain `*` and `**`.
4833 last_leaf = line.leaves[-1]
4835 raise CannotSplit("Line empty")
4837 bt = line.bracket_tracker
4839 delimiter_priority = bt.max_delimiter_priority(exclude={id(last_leaf)})
4841 raise CannotSplit("No delimiters found")
4843 if delimiter_priority == DOT_PRIORITY:
4844 if bt.delimiter_count_with_priority(delimiter_priority) == 1:
4845 raise CannotSplit("Splitting a single attribute from its owner looks wrong")
4847 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
4848 lowest_depth = sys.maxsize
4849 trailing_comma_safe = True
4851 def append_to_line(leaf: Leaf) -> Iterator[Line]:
4852 """Append `leaf` to current line or to new line if appending impossible."""
4853 nonlocal current_line
4855 current_line.append_safe(leaf, preformatted=True)
4859 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
4860 current_line.append(leaf)
4862 for leaf in line.leaves:
4863 yield from append_to_line(leaf)
4865 for comment_after in line.comments_after(leaf):
4866 yield from append_to_line(comment_after)
4868 lowest_depth = min(lowest_depth, leaf.bracket_depth)
4869 if leaf.bracket_depth == lowest_depth:
4870 if is_vararg(leaf, within={syms.typedargslist}):
4871 trailing_comma_safe = (
4872 trailing_comma_safe and Feature.TRAILING_COMMA_IN_DEF in features
4874 elif is_vararg(leaf, within={syms.arglist, syms.argument}):
4875 trailing_comma_safe = (
4876 trailing_comma_safe and Feature.TRAILING_COMMA_IN_CALL in features
4879 leaf_priority = bt.delimiters.get(id(leaf))
4880 if leaf_priority == delimiter_priority:
4883 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
4887 and delimiter_priority == COMMA_PRIORITY
4888 and current_line.leaves[-1].type != token.COMMA
4889 and current_line.leaves[-1].type != STANDALONE_COMMENT
4891 current_line.append(Leaf(token.COMMA, ","))
4895 @dont_increase_indentation
4896 def standalone_comment_split(
4897 line: Line, features: Collection[Feature] = ()
4898 ) -> Iterator[Line]:
4899 """Split standalone comments from the rest of the line."""
4900 if not line.contains_standalone_comments(0):
4901 raise CannotSplit("Line does not have any standalone comments")
4903 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
4905 def append_to_line(leaf: Leaf) -> Iterator[Line]:
4906 """Append `leaf` to current line or to new line if appending impossible."""
4907 nonlocal current_line
4909 current_line.append_safe(leaf, preformatted=True)
4913 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
4914 current_line.append(leaf)
4916 for leaf in line.leaves:
4917 yield from append_to_line(leaf)
4919 for comment_after in line.comments_after(leaf):
4920 yield from append_to_line(comment_after)
4926 def is_import(leaf: Leaf) -> bool:
4927 """Return True if the given leaf starts an import statement."""
4934 (v == "import" and p and p.type == syms.import_name)
4935 or (v == "from" and p and p.type == syms.import_from)
4940 def is_type_comment(leaf: Leaf, suffix: str = "") -> bool:
4941 """Return True if the given leaf is a special comment.
4942 Only returns true for type comments for now."""
4945 return t in {token.COMMENT, STANDALONE_COMMENT} and v.startswith("# type:" + suffix)
4948 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
4949 """Leave existing extra newlines if not `inside_brackets`. Remove everything
4952 Note: don't use backslashes for formatting or you'll lose your voting rights.
4954 if not inside_brackets:
4955 spl = leaf.prefix.split("#")
4956 if "\\" not in spl[0]:
4957 nl_count = spl[-1].count("\n")
4960 leaf.prefix = "\n" * nl_count
4966 def normalize_string_prefix(leaf: Leaf, remove_u_prefix: bool = False) -> None:
4967 """Make all string prefixes lowercase.
4969 If remove_u_prefix is given, also removes any u prefix from the string.
4971 Note: Mutates its argument.
4973 match = re.match(r"^([" + STRING_PREFIX_CHARS + r"]*)(.*)$", leaf.value, re.DOTALL)
4974 assert match is not None, f"failed to match string {leaf.value!r}"
4975 orig_prefix = match.group(1)
4976 new_prefix = orig_prefix.replace("F", "f").replace("B", "b").replace("U", "u")
4978 new_prefix = new_prefix.replace("u", "")
4979 leaf.value = f"{new_prefix}{match.group(2)}"
4982 def normalize_string_quotes(leaf: Leaf) -> None:
4983 """Prefer double quotes but only if it doesn't cause more escaping.
4985 Adds or removes backslashes as appropriate. Doesn't parse and fix
4986 strings nested in f-strings (yet).
4988 Note: Mutates its argument.
4990 value = leaf.value.lstrip(STRING_PREFIX_CHARS)
4991 if value[:3] == '"""':
4994 elif value[:3] == "'''":
4997 elif value[0] == '"':
5003 first_quote_pos = leaf.value.find(orig_quote)
5004 if first_quote_pos == -1:
5005 return # There's an internal error
5007 prefix = leaf.value[:first_quote_pos]
5008 unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
5009 escaped_new_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
5010 escaped_orig_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
5011 body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
5012 if "r" in prefix.casefold():
5013 if unescaped_new_quote.search(body):
5014 # There's at least one unescaped new_quote in this raw string
5015 # so converting is impossible
5018 # Do not introduce or remove backslashes in raw strings
5021 # remove unnecessary escapes
5022 new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
5023 if body != new_body:
5024 # Consider the string without unnecessary escapes as the original
5026 leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
5027 new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
5028 new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
5029 if "f" in prefix.casefold():
5030 matches = re.findall(
5032 (?:[^{]|^)\{ # start of the string or a non-{ followed by a single {
5033 ([^{].*?) # contents of the brackets except if begins with {{
5034 \}(?:[^}]|$) # A } followed by end of the string or a non-}
5041 # Do not introduce backslashes in interpolated expressions
5044 if new_quote == '"""' and new_body[-1:] == '"':
5046 new_body = new_body[:-1] + '\\"'
5047 orig_escape_count = body.count("\\")
5048 new_escape_count = new_body.count("\\")
5049 if new_escape_count > orig_escape_count:
5050 return # Do not introduce more escaping
5052 if new_escape_count == orig_escape_count and orig_quote == '"':
5053 return # Prefer double quotes
5055 leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
5058 def normalize_numeric_literal(leaf: Leaf) -> None:
5059 """Normalizes numeric (float, int, and complex) literals.
5061 All letters used in the representation are normalized to lowercase (except
5062 in Python 2 long literals).
5064 text = leaf.value.lower()
5065 if text.startswith(("0o", "0b")):
5066 # Leave octal and binary literals alone.
5068 elif text.startswith("0x"):
5069 # Change hex literals to upper case.
5070 before, after = text[:2], text[2:]
5071 text = f"{before}{after.upper()}"
5073 before, after = text.split("e")
5075 if after.startswith("-"):
5078 elif after.startswith("+"):
5080 before = format_float_or_int_string(before)
5081 text = f"{before}e{sign}{after}"
5082 elif text.endswith(("j", "l")):
5085 # Capitalize in "2L" because "l" looks too similar to "1".
5088 text = f"{format_float_or_int_string(number)}{suffix}"
5090 text = format_float_or_int_string(text)
5094 def format_float_or_int_string(text: str) -> str:
5095 """Formats a float string like "1.0"."""
5099 before, after = text.split(".")
5100 return f"{before or 0}.{after or 0}"
5103 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
5104 """Make existing optional parentheses invisible or create new ones.
5106 `parens_after` is a set of string leaf values immediately after which parens
5109 Standardizes on visible parentheses for single-element tuples, and keeps
5110 existing visible parentheses for other tuples and generator expressions.
5112 for pc in list_comments(node.prefix, is_endmarker=False):
5113 if pc.value in FMT_OFF:
5114 # This `node` has a prefix with `# fmt: off`, don't mess with parens.
5117 for index, child in enumerate(list(node.children)):
5118 # Fixes a bug where invisible parens are not properly stripped from
5119 # assignment statements that contain type annotations.
5120 if isinstance(child, Node) and child.type == syms.annassign:
5121 normalize_invisible_parens(child, parens_after=parens_after)
5123 # Add parentheses around long tuple unpacking in assignments.
5126 and isinstance(child, Node)
5127 and child.type == syms.testlist_star_expr
5132 if is_walrus_assignment(child):
5135 if child.type == syms.atom:
5136 if maybe_make_parens_invisible_in_atom(child, parent=node):
5137 wrap_in_parentheses(node, child, visible=False)
5138 elif is_one_tuple(child):
5139 wrap_in_parentheses(node, child, visible=True)
5140 elif node.type == syms.import_from:
5141 # "import from" nodes store parentheses directly as part of
5143 if child.type == token.LPAR:
5144 # make parentheses invisible
5145 child.value = "" # type: ignore
5146 node.children[-1].value = "" # type: ignore
5147 elif child.type != token.STAR:
5148 # insert invisible parentheses
5149 node.insert_child(index, Leaf(token.LPAR, ""))
5150 node.append_child(Leaf(token.RPAR, ""))
5153 elif not (isinstance(child, Leaf) and is_multiline_string(child)):
5154 wrap_in_parentheses(node, child, visible=False)
5156 check_lpar = isinstance(child, Leaf) and child.value in parens_after
5159 def normalize_fmt_off(node: Node) -> None:
5160 """Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
5163 try_again = convert_one_fmt_off_pair(node)
5166 def convert_one_fmt_off_pair(node: Node) -> bool:
5167 """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
5169 Returns True if a pair was converted.
5171 for leaf in node.leaves():
5172 previous_consumed = 0
5173 for comment in list_comments(leaf.prefix, is_endmarker=False):
5174 if comment.value in FMT_OFF:
5175 # We only want standalone comments. If there's no previous leaf or
5176 # the previous leaf is indentation, it's a standalone comment in
5178 if comment.type != STANDALONE_COMMENT:
5179 prev = preceding_leaf(leaf)
5180 if prev and prev.type not in WHITESPACE:
5183 ignored_nodes = list(generate_ignored_nodes(leaf))
5184 if not ignored_nodes:
5187 first = ignored_nodes[0] # Can be a container node with the `leaf`.
5188 parent = first.parent
5189 prefix = first.prefix
5190 first.prefix = prefix[comment.consumed :]
5192 comment.value + "\n" + "".join(str(n) for n in ignored_nodes)
5194 if hidden_value.endswith("\n"):
5195 # That happens when one of the `ignored_nodes` ended with a NEWLINE
5196 # leaf (possibly followed by a DEDENT).
5197 hidden_value = hidden_value[:-1]
5198 first_idx: Optional[int] = None
5199 for ignored in ignored_nodes:
5200 index = ignored.remove()
5201 if first_idx is None:
5203 assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)"
5204 assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)"
5205 parent.insert_child(
5210 prefix=prefix[:previous_consumed] + "\n" * comment.newlines,
5215 previous_consumed = comment.consumed
5220 def generate_ignored_nodes(leaf: Leaf) -> Iterator[LN]:
5221 """Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
5223 Stops at the end of the block.
5225 container: Optional[LN] = container_of(leaf)
5226 while container is not None and container.type != token.ENDMARKER:
5227 if fmt_on(container):
5230 # fix for fmt: on in children
5231 if contains_fmt_on_at_column(container, leaf.column):
5232 for child in container.children:
5233 if contains_fmt_on_at_column(child, leaf.column):
5238 container = container.next_sibling
5241 def fmt_on(container: LN) -> bool:
5243 for comment in list_comments(container.prefix, is_endmarker=False):
5244 if comment.value in FMT_ON:
5246 elif comment.value in FMT_OFF:
5251 def contains_fmt_on_at_column(container: LN, column: int) -> bool:
5252 for child in container.children:
5254 isinstance(child, Node)
5255 and first_leaf_column(child) == column
5256 or isinstance(child, Leaf)
5257 and child.column == column
5265 def first_leaf_column(node: Node) -> Optional[int]:
5266 for child in node.children:
5267 if isinstance(child, Leaf):
5272 def maybe_make_parens_invisible_in_atom(node: LN, parent: LN) -> bool:
5273 """If it's safe, make the parens in the atom `node` invisible, recursively.
5274 Additionally, remove repeated, adjacent invisible parens from the atom `node`
5275 as they are redundant.
5277 Returns whether the node should itself be wrapped in invisible parentheses.
5281 node.type != syms.atom
5282 or is_empty_tuple(node)
5283 or is_one_tuple(node)
5284 or (is_yield(node) and parent.type != syms.expr_stmt)
5285 or max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
5289 first = node.children[0]
5290 last = node.children[-1]
5291 if first.type == token.LPAR and last.type == token.RPAR:
5292 middle = node.children[1]
5293 # make parentheses invisible
5294 first.value = "" # type: ignore
5295 last.value = "" # type: ignore
5296 maybe_make_parens_invisible_in_atom(middle, parent=parent)
5298 if is_atom_with_invisible_parens(middle):
5299 # Strip the invisible parens from `middle` by replacing
5300 # it with the child in-between the invisible parens
5301 middle.replace(middle.children[1])
5308 def is_atom_with_invisible_parens(node: LN) -> bool:
5309 """Given a `LN`, determines whether it's an atom `node` with invisible
5310 parens. Useful in dedupe-ing and normalizing parens.
5312 if isinstance(node, Leaf) or node.type != syms.atom:
5315 first, last = node.children[0], node.children[-1]
5317 isinstance(first, Leaf)
5318 and first.type == token.LPAR
5319 and first.value == ""
5320 and isinstance(last, Leaf)
5321 and last.type == token.RPAR
5322 and last.value == ""
5326 def is_empty_tuple(node: LN) -> bool:
5327 """Return True if `node` holds an empty tuple."""
5329 node.type == syms.atom
5330 and len(node.children) == 2
5331 and node.children[0].type == token.LPAR
5332 and node.children[1].type == token.RPAR
5336 def unwrap_singleton_parenthesis(node: LN) -> Optional[LN]:
5337 """Returns `wrapped` if `node` is of the shape ( wrapped ).
5339 Parenthesis can be optional. Returns None otherwise"""
5340 if len(node.children) != 3:
5343 lpar, wrapped, rpar = node.children
5344 if not (lpar.type == token.LPAR and rpar.type == token.RPAR):
5350 def wrap_in_parentheses(parent: Node, child: LN, *, visible: bool = True) -> None:
5351 """Wrap `child` in parentheses.
5353 This replaces `child` with an atom holding the parentheses and the old
5354 child. That requires moving the prefix.
5356 If `visible` is False, the leaves will be valueless (and thus invisible).
5358 lpar = Leaf(token.LPAR, "(" if visible else "")
5359 rpar = Leaf(token.RPAR, ")" if visible else "")
5360 prefix = child.prefix
5362 index = child.remove() or 0
5363 new_child = Node(syms.atom, [lpar, child, rpar])
5364 new_child.prefix = prefix
5365 parent.insert_child(index, new_child)
5368 def is_one_tuple(node: LN) -> bool:
5369 """Return True if `node` holds a tuple with one element, with or without parens."""
5370 if node.type == syms.atom:
5371 gexp = unwrap_singleton_parenthesis(node)
5372 if gexp is None or gexp.type != syms.testlist_gexp:
5375 return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
5378 node.type in IMPLICIT_TUPLE
5379 and len(node.children) == 2
5380 and node.children[1].type == token.COMMA
5384 def is_walrus_assignment(node: LN) -> bool:
5385 """Return True iff `node` is of the shape ( test := test )"""
5386 inner = unwrap_singleton_parenthesis(node)
5387 return inner is not None and inner.type == syms.namedexpr_test
5390 def is_yield(node: LN) -> bool:
5391 """Return True if `node` holds a `yield` or `yield from` expression."""
5392 if node.type == syms.yield_expr:
5395 if node.type == token.NAME and node.value == "yield": # type: ignore
5398 if node.type != syms.atom:
5401 if len(node.children) != 3:
5404 lpar, expr, rpar = node.children
5405 if lpar.type == token.LPAR and rpar.type == token.RPAR:
5406 return is_yield(expr)
5411 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
5412 """Return True if `leaf` is a star or double star in a vararg or kwarg.
5414 If `within` includes VARARGS_PARENTS, this applies to function signatures.
5415 If `within` includes UNPACKING_PARENTS, it applies to right hand-side
5416 extended iterable unpacking (PEP 3132) and additional unpacking
5417 generalizations (PEP 448).
5419 if leaf.type not in VARARGS_SPECIALS or not leaf.parent:
5423 if p.type == syms.star_expr:
5424 # Star expressions are also used as assignment targets in extended
5425 # iterable unpacking (PEP 3132). See what its parent is instead.
5431 return p.type in within
5434 def is_multiline_string(leaf: Leaf) -> bool:
5435 """Return True if `leaf` is a multiline string that actually spans many lines."""
5436 return has_triple_quotes(leaf.value) and "\n" in leaf.value
5439 def is_stub_suite(node: Node) -> bool:
5440 """Return True if `node` is a suite with a stub body."""
5442 len(node.children) != 4
5443 or node.children[0].type != token.NEWLINE
5444 or node.children[1].type != token.INDENT
5445 or node.children[3].type != token.DEDENT
5449 return is_stub_body(node.children[2])
5452 def is_stub_body(node: LN) -> bool:
5453 """Return True if `node` is a simple statement containing an ellipsis."""
5454 if not isinstance(node, Node) or node.type != syms.simple_stmt:
5457 if len(node.children) != 2:
5460 child = node.children[0]
5462 child.type == syms.atom
5463 and len(child.children) == 3
5464 and all(leaf == Leaf(token.DOT, ".") for leaf in child.children)
5468 def max_delimiter_priority_in_atom(node: LN) -> Priority:
5469 """Return maximum delimiter priority inside `node`.
5471 This is specific to atoms with contents contained in a pair of parentheses.
5472 If `node` isn't an atom or there are no enclosing parentheses, returns 0.
5474 if node.type != syms.atom:
5477 first = node.children[0]
5478 last = node.children[-1]
5479 if not (first.type == token.LPAR and last.type == token.RPAR):
5482 bt = BracketTracker()
5483 for c in node.children[1:-1]:
5484 if isinstance(c, Leaf):
5487 for leaf in c.leaves():
5490 return bt.max_delimiter_priority()
5496 def ensure_visible(leaf: Leaf) -> None:
5497 """Make sure parentheses are visible.
5499 They could be invisible as part of some statements (see
5500 :func:`normalize_invisible_parens` and :func:`visit_import_from`).
5502 if leaf.type == token.LPAR:
5504 elif leaf.type == token.RPAR:
5508 def should_explode(line: Line, opening_bracket: Leaf) -> bool:
5509 """Should `line` immediately be split with `delimiter_split()` after RHS?"""
5512 opening_bracket.parent
5513 and opening_bracket.parent.type in {syms.atom, syms.import_from}
5514 and opening_bracket.value in "[{("
5519 last_leaf = line.leaves[-1]
5520 exclude = {id(last_leaf)} if last_leaf.type == token.COMMA else set()
5521 max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude)
5522 except (IndexError, ValueError):
5525 return max_priority == COMMA_PRIORITY
5528 def get_features_used(node: Node) -> Set[Feature]:
5529 """Return a set of (relatively) new Python features used in this file.
5531 Currently looking for:
5533 - underscores in numeric literals;
5534 - trailing commas after * or ** in function signatures and calls;
5535 - positional only arguments in function signatures and lambdas;
5537 features: Set[Feature] = set()
5538 for n in node.pre_order():
5539 if n.type == token.STRING:
5540 value_head = n.value[:2] # type: ignore
5541 if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
5542 features.add(Feature.F_STRINGS)
5544 elif n.type == token.NUMBER:
5545 if "_" in n.value: # type: ignore
5546 features.add(Feature.NUMERIC_UNDERSCORES)
5548 elif n.type == token.SLASH:
5549 if n.parent and n.parent.type in {syms.typedargslist, syms.arglist}:
5550 features.add(Feature.POS_ONLY_ARGUMENTS)
5552 elif n.type == token.COLONEQUAL:
5553 features.add(Feature.ASSIGNMENT_EXPRESSIONS)
5556 n.type in {syms.typedargslist, syms.arglist}
5558 and n.children[-1].type == token.COMMA
5560 if n.type == syms.typedargslist:
5561 feature = Feature.TRAILING_COMMA_IN_DEF
5563 feature = Feature.TRAILING_COMMA_IN_CALL
5565 for ch in n.children:
5566 if ch.type in STARS:
5567 features.add(feature)
5569 if ch.type == syms.argument:
5570 for argch in ch.children:
5571 if argch.type in STARS:
5572 features.add(feature)
5577 def detect_target_versions(node: Node) -> Set[TargetVersion]:
5578 """Detect the version to target based on the nodes used."""
5579 features = get_features_used(node)
5581 version for version in TargetVersion if features <= VERSION_TO_FEATURES[version]
5585 def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[LeafID]]:
5586 """Generate sets of closing bracket IDs that should be omitted in a RHS.
5588 Brackets can be omitted if the entire trailer up to and including
5589 a preceding closing bracket fits in one line.
5591 Yielded sets are cumulative (contain results of previous yields, too). First
5595 omit: Set[LeafID] = set()
5598 length = 4 * line.depth
5599 opening_bracket: Optional[Leaf] = None
5600 closing_bracket: Optional[Leaf] = None
5601 inner_brackets: Set[LeafID] = set()
5602 for index, leaf, leaf_length in enumerate_with_length(line, reversed=True):
5603 length += leaf_length
5604 if length > line_length:
5607 has_inline_comment = leaf_length > len(leaf.value) + len(leaf.prefix)
5608 if leaf.type == STANDALONE_COMMENT or has_inline_comment:
5612 if leaf is opening_bracket:
5613 opening_bracket = None
5614 elif leaf.type in CLOSING_BRACKETS:
5615 inner_brackets.add(id(leaf))
5616 elif leaf.type in CLOSING_BRACKETS:
5617 if index > 0 and line.leaves[index - 1].type in OPENING_BRACKETS:
5618 # Empty brackets would fail a split so treat them as "inner"
5619 # brackets (e.g. only add them to the `omit` set if another
5620 # pair of brackets was good enough.
5621 inner_brackets.add(id(leaf))
5625 omit.add(id(closing_bracket))
5626 omit.update(inner_brackets)
5627 inner_brackets.clear()
5631 opening_bracket = leaf.opening_bracket
5632 closing_bracket = leaf
5635 def get_future_imports(node: Node) -> Set[str]:
5636 """Return a set of __future__ imports in the file."""
5637 imports: Set[str] = set()
5639 def get_imports_from_children(children: List[LN]) -> Generator[str, None, None]:
5640 for child in children:
5641 if isinstance(child, Leaf):
5642 if child.type == token.NAME:
5645 elif child.type == syms.import_as_name:
5646 orig_name = child.children[0]
5647 assert isinstance(orig_name, Leaf), "Invalid syntax parsing imports"
5648 assert orig_name.type == token.NAME, "Invalid syntax parsing imports"
5649 yield orig_name.value
5651 elif child.type == syms.import_as_names:
5652 yield from get_imports_from_children(child.children)
5655 raise AssertionError("Invalid syntax parsing imports")
5657 for child in node.children:
5658 if child.type != syms.simple_stmt:
5661 first_child = child.children[0]
5662 if isinstance(first_child, Leaf):
5663 # Continue looking if we see a docstring; otherwise stop.
5665 len(child.children) == 2
5666 and first_child.type == token.STRING
5667 and child.children[1].type == token.NEWLINE
5673 elif first_child.type == syms.import_from:
5674 module_name = first_child.children[1]
5675 if not isinstance(module_name, Leaf) or module_name.value != "__future__":
5678 imports |= set(get_imports_from_children(first_child.children[3:]))
5686 def get_gitignore(root: Path) -> PathSpec:
5687 """ Return a PathSpec matching gitignore content if present."""
5688 gitignore = root / ".gitignore"
5689 lines: List[str] = []
5690 if gitignore.is_file():
5691 with gitignore.open() as gf:
5692 lines = gf.readlines()
5693 return PathSpec.from_lines("gitwildmatch", lines)
5696 def gen_python_files_in_dir(
5699 include: Pattern[str],
5700 exclude: Pattern[str],
5702 gitignore: PathSpec,
5703 ) -> Iterator[Path]:
5704 """Generate all files under `path` whose paths are not excluded by the
5705 `exclude` regex, but are included by the `include` regex.
5707 Symbolic links pointing outside of the `root` directory are ignored.
5709 `report` is where output about exclusions goes.
5711 assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
5712 for child in path.iterdir():
5713 # First ignore files matching .gitignore
5714 if gitignore.match_file(child.as_posix()):
5715 report.path_ignored(child, "matches the .gitignore file content")
5718 # Then ignore with `exclude` option.
5720 normalized_path = "/" + child.resolve().relative_to(root).as_posix()
5721 except OSError as e:
5722 report.path_ignored(child, f"cannot be read because {e}")
5726 if child.is_symlink():
5727 report.path_ignored(
5728 child, f"is a symbolic link that points outside {root}"
5735 normalized_path += "/"
5737 exclude_match = exclude.search(normalized_path)
5738 if exclude_match and exclude_match.group(0):
5739 report.path_ignored(child, "matches the --exclude regular expression")
5743 yield from gen_python_files_in_dir(
5744 child, root, include, exclude, report, gitignore
5747 elif child.is_file():
5748 include_match = include.search(normalized_path)
5754 def find_project_root(srcs: Iterable[str]) -> Path:
5755 """Return a directory containing .git, .hg, or pyproject.toml.
5757 That directory can be one of the directories passed in `srcs` or their
5760 If no directory in the tree contains a marker that would specify it's the
5761 project root, the root of the file system is returned.
5764 return Path("/").resolve()
5766 common_base = min(Path(src).resolve() for src in srcs)
5767 if common_base.is_dir():
5768 # Append a fake file so `parents` below returns `common_base_dir`, too.
5769 common_base /= "fake-file"
5770 for directory in common_base.parents:
5771 if (directory / ".git").exists():
5774 if (directory / ".hg").is_dir():
5777 if (directory / "pyproject.toml").is_file():
5785 """Provides a reformatting counter. Can be rendered with `str(report)`."""
5790 verbose: bool = False
5791 change_count: int = 0
5793 failure_count: int = 0
5795 def done(self, src: Path, changed: Changed) -> None:
5796 """Increment the counter for successful reformatting. Write out a message."""
5797 if changed is Changed.YES:
5798 reformatted = "would reformat" if self.check or self.diff else "reformatted"
5799 if self.verbose or not self.quiet:
5800 out(f"{reformatted} {src}")
5801 self.change_count += 1
5804 if changed is Changed.NO:
5805 msg = f"{src} already well formatted, good job."
5807 msg = f"{src} wasn't modified on disk since last run."
5808 out(msg, bold=False)
5809 self.same_count += 1
5811 def failed(self, src: Path, message: str) -> None:
5812 """Increment the counter for failed reformatting. Write out a message."""
5813 err(f"error: cannot format {src}: {message}")
5814 self.failure_count += 1
5816 def path_ignored(self, path: Path, message: str) -> None:
5818 out(f"{path} ignored: {message}", bold=False)
5821 def return_code(self) -> int:
5822 """Return the exit code that the app should use.
5824 This considers the current state of changed files and failures:
5825 - if there were any failures, return 123;
5826 - if any files were changed and --check is being used, return 1;
5827 - otherwise return 0.
5829 # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
5830 # 126 we have special return codes reserved by the shell.
5831 if self.failure_count:
5834 elif self.change_count and self.check:
5839 def __str__(self) -> str:
5840 """Render a color report of the current state.
5842 Use `click.unstyle` to remove colors.
5844 if self.check or self.diff:
5845 reformatted = "would be reformatted"
5846 unchanged = "would be left unchanged"
5847 failed = "would fail to reformat"
5849 reformatted = "reformatted"
5850 unchanged = "left unchanged"
5851 failed = "failed to reformat"
5853 if self.change_count:
5854 s = "s" if self.change_count > 1 else ""
5856 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
5859 s = "s" if self.same_count > 1 else ""
5860 report.append(f"{self.same_count} file{s} {unchanged}")
5861 if self.failure_count:
5862 s = "s" if self.failure_count > 1 else ""
5864 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
5866 return ", ".join(report) + "."
5869 def parse_ast(src: str) -> Union[ast.AST, ast3.AST, ast27.AST]:
5870 filename = "<unknown>"
5871 if sys.version_info >= (3, 8):
5872 # TODO: support Python 4+ ;)
5873 for minor_version in range(sys.version_info[1], 4, -1):
5875 return ast.parse(src, filename, feature_version=(3, minor_version))
5879 for feature_version in (7, 6):
5881 return ast3.parse(src, filename, feature_version=feature_version)
5885 return ast27.parse(src)
5888 def _fixup_ast_constants(
5889 node: Union[ast.AST, ast3.AST, ast27.AST]
5890 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
5891 """Map ast nodes deprecated in 3.8 to Constant."""
5892 if isinstance(node, (ast.Str, ast3.Str, ast27.Str, ast.Bytes, ast3.Bytes)):
5893 return ast.Constant(value=node.s)
5895 if isinstance(node, (ast.Num, ast3.Num, ast27.Num)):
5896 return ast.Constant(value=node.n)
5898 if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
5899 return ast.Constant(value=node.value)
5905 node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0
5907 """Simple visitor generating strings to compare ASTs by content."""
5909 node = _fixup_ast_constants(node)
5911 yield f"{' ' * depth}{node.__class__.__name__}("
5913 for field in sorted(node._fields): # noqa: F402
5914 # TypeIgnore has only one field 'lineno' which breaks this comparison
5915 type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore)
5916 if sys.version_info >= (3, 8):
5917 type_ignore_classes += (ast.TypeIgnore,)
5918 if isinstance(node, type_ignore_classes):
5922 value = getattr(node, field)
5923 except AttributeError:
5926 yield f"{' ' * (depth+1)}{field}="
5928 if isinstance(value, list):
5930 # Ignore nested tuples within del statements, because we may insert
5931 # parentheses and they change the AST.
5934 and isinstance(node, (ast.Delete, ast3.Delete, ast27.Delete))
5935 and isinstance(item, (ast.Tuple, ast3.Tuple, ast27.Tuple))
5937 for item in item.elts:
5938 yield from _stringify_ast(item, depth + 2)
5940 elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)):
5941 yield from _stringify_ast(item, depth + 2)
5943 elif isinstance(value, (ast.AST, ast3.AST, ast27.AST)):
5944 yield from _stringify_ast(value, depth + 2)
5947 # Constant strings may be indented across newlines, if they are
5948 # docstrings; fold spaces after newlines when comparing
5950 isinstance(node, ast.Constant)
5951 and field == "value"
5952 and isinstance(value, str)
5954 normalized = re.sub(r"\n[ \t]+", "\n ", value)
5957 yield f"{' ' * (depth+2)}{normalized!r}, # {value.__class__.__name__}"
5959 yield f"{' ' * depth}) # /{node.__class__.__name__}"
5962 def assert_equivalent(src: str, dst: str) -> None:
5963 """Raise AssertionError if `src` and `dst` aren't equivalent."""
5965 src_ast = parse_ast(src)
5966 except Exception as exc:
5967 raise AssertionError(
5968 "cannot use --safe with this file; failed to parse source file. AST"
5969 f" error message: {exc}"
5973 dst_ast = parse_ast(dst)
5974 except Exception as exc:
5975 log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
5976 raise AssertionError(
5977 f"INTERNAL ERROR: Black produced invalid code: {exc}. Please report a bug"
5978 " on https://github.com/psf/black/issues. This invalid output might be"
5982 src_ast_str = "\n".join(_stringify_ast(src_ast))
5983 dst_ast_str = "\n".join(_stringify_ast(dst_ast))
5984 if src_ast_str != dst_ast_str:
5985 log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
5986 raise AssertionError(
5987 "INTERNAL ERROR: Black produced code that is not equivalent to the"
5988 " source. Please report a bug on https://github.com/psf/black/issues. "
5989 f" This diff might be helpful: {log}"
5993 def assert_stable(src: str, dst: str, mode: Mode) -> None:
5994 """Raise AssertionError if `dst` reformats differently the second time."""
5995 newdst = format_str(dst, mode=mode)
5998 diff(src, dst, "source", "first pass"),
5999 diff(dst, newdst, "first pass", "second pass"),
6001 raise AssertionError(
6002 "INTERNAL ERROR: Black produced different code on the second pass of the"
6003 " formatter. Please report a bug on https://github.com/psf/black/issues."
6004 f" This diff might be helpful: {log}"
6008 @mypyc_attr(patchable=True)
6009 def dump_to_file(*output: str) -> str:
6010 """Dump `output` to a temporary file. Return path to the file."""
6011 with tempfile.NamedTemporaryFile(
6012 mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
6014 for lines in output:
6016 if lines and lines[-1] != "\n":
6022 def nullcontext() -> Iterator[None]:
6023 """Return an empty context manager.
6025 To be used like `nullcontext` in Python 3.7.
6030 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
6031 """Return a unified diff string between strings `a` and `b`."""
6034 a_lines = [line + "\n" for line in a.splitlines()]
6035 b_lines = [line + "\n" for line in b.splitlines()]
6037 difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
6041 def cancel(tasks: Iterable["asyncio.Task[Any]"]) -> None:
6042 """asyncio signal handler that cancels all `tasks` and reports to stderr."""
6048 def shutdown(loop: asyncio.AbstractEventLoop) -> None:
6049 """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
6051 if sys.version_info[:2] >= (3, 7):
6052 all_tasks = asyncio.all_tasks
6054 all_tasks = asyncio.Task.all_tasks
6055 # This part is borrowed from asyncio/runners.py in Python 3.7b2.
6056 to_cancel = [task for task in all_tasks(loop) if not task.done()]
6060 for task in to_cancel:
6062 loop.run_until_complete(
6063 asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
6066 # `concurrent.futures.Future` objects cannot be cancelled once they
6067 # are already running. There might be some when the `shutdown()` happened.
6068 # Silence their logger's spew about the event loop being closed.
6069 cf_logger = logging.getLogger("concurrent.futures")
6070 cf_logger.setLevel(logging.CRITICAL)
6074 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
6075 """Replace `regex` with `replacement` twice on `original`.
6077 This is used by string normalization to perform replaces on
6078 overlapping matches.
6080 return regex.sub(replacement, regex.sub(replacement, original))
6083 def re_compile_maybe_verbose(regex: str) -> Pattern[str]:
6084 """Compile a regular expression string in `regex`.
6086 If it contains newlines, use verbose mode.
6089 regex = "(?x)" + regex
6090 compiled: Pattern[str] = re.compile(regex)
6094 def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]:
6095 """Like `reversed(enumerate(sequence))` if that were possible."""
6096 index = len(sequence) - 1
6097 for element in reversed(sequence):
6098 yield (index, element)
6102 def enumerate_with_length(
6103 line: Line, reversed: bool = False
6104 ) -> Iterator[Tuple[Index, Leaf, int]]:
6105 """Return an enumeration of leaves with their length.
6107 Stops prematurely on multiline strings and standalone comments.
6110 Callable[[Sequence[Leaf]], Iterator[Tuple[Index, Leaf]]],
6111 enumerate_reversed if reversed else enumerate,
6113 for index, leaf in op(line.leaves):
6114 length = len(leaf.prefix) + len(leaf.value)
6115 if "\n" in leaf.value:
6116 return # Multiline strings, we can't continue.
6118 for comment in line.comments_after(leaf):
6119 length += len(comment.value)
6121 yield index, leaf, length
6124 def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
6125 """Return True if `line` is no longer than `line_length`.
6127 Uses the provided `line_str` rendering, if any, otherwise computes a new one.
6130 line_str = line_to_string(line)
6132 len(line_str) <= line_length
6133 and "\n" not in line_str # multiline strings
6134 and not line.contains_standalone_comments()
6138 def can_be_split(line: Line) -> bool:
6139 """Return False if the line cannot be split *for sure*.
6141 This is not an exhaustive search but a cheap heuristic that we can use to
6142 avoid some unfortunate formattings (mostly around wrapping unsplittable code
6143 in unnecessary parentheses).
6145 leaves = line.leaves
6149 if leaves[0].type == token.STRING and leaves[1].type == token.DOT:
6153 for leaf in leaves[-2::-1]:
6154 if leaf.type in OPENING_BRACKETS:
6155 if next.type not in CLOSING_BRACKETS:
6159 elif leaf.type == token.DOT:
6161 elif leaf.type == token.NAME:
6162 if not (next.type == token.DOT or next.type in OPENING_BRACKETS):
6165 elif leaf.type not in CLOSING_BRACKETS:
6168 if dot_count > 1 and call_count > 1:
6174 def can_omit_invisible_parens(line: Line, line_length: int) -> bool:
6175 """Does `line` have a shape safe to reformat without optional parens around it?
6177 Returns True for only a subset of potentially nice looking formattings but
6178 the point is to not return false positives that end up producing lines that
6181 bt = line.bracket_tracker
6182 if not bt.delimiters:
6183 # Without delimiters the optional parentheses are useless.
6186 max_priority = bt.max_delimiter_priority()
6187 if bt.delimiter_count_with_priority(max_priority) > 1:
6188 # With more than one delimiter of a kind the optional parentheses read better.
6191 if max_priority == DOT_PRIORITY:
6192 # A single stranded method call doesn't require optional parentheses.
6195 assert len(line.leaves) >= 2, "Stranded delimiter"
6197 first = line.leaves[0]
6198 second = line.leaves[1]
6199 penultimate = line.leaves[-2]
6200 last = line.leaves[-1]
6202 # With a single delimiter, omit if the expression starts or ends with
6204 if first.type in OPENING_BRACKETS and second.type not in CLOSING_BRACKETS:
6206 length = 4 * line.depth
6207 for _index, leaf, leaf_length in enumerate_with_length(line):
6208 if leaf.type in CLOSING_BRACKETS and leaf.opening_bracket is first:
6211 length += leaf_length
6212 if length > line_length:
6215 if leaf.type in OPENING_BRACKETS:
6216 # There are brackets we can further split on.
6220 # checked the entire string and line length wasn't exceeded
6221 if len(line.leaves) == _index + 1:
6224 # Note: we are not returning False here because a line might have *both*
6225 # a leading opening bracket and a trailing closing bracket. If the
6226 # opening bracket doesn't match our rule, maybe the closing will.
6229 last.type == token.RPAR
6230 or last.type == token.RBRACE
6232 # don't use indexing for omitting optional parentheses;
6234 last.type == token.RSQB
6236 and last.parent.type != syms.trailer
6239 if penultimate.type in OPENING_BRACKETS:
6240 # Empty brackets don't help.
6243 if is_multiline_string(first):
6244 # Additional wrapping of a multiline string in this situation is
6248 length = 4 * line.depth
6249 seen_other_brackets = False
6250 for _index, leaf, leaf_length in enumerate_with_length(line):
6251 length += leaf_length
6252 if leaf is last.opening_bracket:
6253 if seen_other_brackets or length <= line_length:
6256 elif leaf.type in OPENING_BRACKETS:
6257 # There are brackets we can further split on.
6258 seen_other_brackets = True
6263 def get_cache_file(mode: Mode) -> Path:
6264 return CACHE_DIR / f"cache.{mode.get_cache_key()}.pickle"
6267 def read_cache(mode: Mode) -> Cache:
6268 """Read the cache if it exists and is well formed.
6270 If it is not well formed, the call to write_cache later should resolve the issue.
6272 cache_file = get_cache_file(mode)
6273 if not cache_file.exists():
6276 with cache_file.open("rb") as fobj:
6278 cache: Cache = pickle.load(fobj)
6279 except (pickle.UnpicklingError, ValueError):
6285 def get_cache_info(path: Path) -> CacheInfo:
6286 """Return the information used to check if a file is already formatted or not."""
6288 return stat.st_mtime, stat.st_size
6291 def filter_cached(cache: Cache, sources: Iterable[Path]) -> Tuple[Set[Path], Set[Path]]:
6292 """Split an iterable of paths in `sources` into two sets.
6294 The first contains paths of files that modified on disk or are not in the
6295 cache. The other contains paths to non-modified files.
6297 todo, done = set(), set()
6300 if cache.get(src) != get_cache_info(src):
6307 def write_cache(cache: Cache, sources: Iterable[Path], mode: Mode) -> None:
6308 """Update the cache file."""
6309 cache_file = get_cache_file(mode)
6311 CACHE_DIR.mkdir(parents=True, exist_ok=True)
6312 new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
6313 with tempfile.NamedTemporaryFile(dir=str(cache_file.parent), delete=False) as f:
6314 pickle.dump(new_cache, f, protocol=4)
6315 os.replace(f.name, cache_file)
6320 def patch_click() -> None:
6321 """Make Click not crash.
6323 On certain misconfigured environments, Python 3 selects the ASCII encoding as the
6324 default which restricts paths that it can access during the lifetime of the
6325 application. Click refuses to work in this scenario by raising a RuntimeError.
6327 In case of Black the likelihood that non-ASCII characters are going to be used in
6328 file paths is minimal since it's Python source code. Moreover, this crash was
6329 spurious on Python 3.7 thanks to PEP 538 and PEP 540.
6332 from click import core
6333 from click import _unicodefun # type: ignore
6334 except ModuleNotFoundError:
6337 for module in (core, _unicodefun):
6338 if hasattr(module, "_verify_python3_env"):
6339 module._verify_python3_env = lambda: None
6342 def patched_main() -> None:
6348 def fix_docstring(docstring: str, prefix: str) -> str:
6349 # https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation
6352 # Convert tabs to spaces (following the normal Python rules)
6353 # and split into a list of lines:
6354 lines = docstring.expandtabs().splitlines()
6355 # Determine minimum indentation (first line doesn't count):
6356 indent = sys.maxsize
6357 for line in lines[1:]:
6358 stripped = line.lstrip()
6360 indent = min(indent, len(line) - len(stripped))
6361 # Remove indentation (first line is special):
6362 trimmed = [lines[0].strip()]
6363 if indent < sys.maxsize:
6364 last_line_idx = len(lines) - 2
6365 for i, line in enumerate(lines[1:]):
6366 stripped_line = line[indent:].rstrip()
6367 if stripped_line or i == last_line_idx:
6368 trimmed.append(prefix + stripped_line)
6371 # Return a single string:
6372 return "\n".join(trimmed)
6375 if __name__ == "__main__":