All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
2 from asyncio.base_events import BaseEventLoop
3 from concurrent.futures import Executor, ProcessPoolExecutor
4 from datetime import datetime
5 from enum import Enum, Flag
6 from functools import lru_cache, partial, wraps
10 from multiprocessing import Manager
12 from pathlib import Path
38 from appdirs import user_cache_dir
39 from attr import dataclass, Factory
44 from blib2to3.pytree import Node, Leaf, type_repr
45 from blib2to3 import pygram, pytree
46 from blib2to3.pgen2 import driver, token
47 from blib2to3.pgen2.parse import ParseError
50 __version__ = "18.6b4"
51 DEFAULT_LINE_LENGTH = 88
53 r"/(\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|_build|buck-out|build|dist)/"
55 DEFAULT_INCLUDES = r"\.pyi?$"
56 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
68 LN = Union[Leaf, Node]
69 SplitFunc = Callable[["Line", bool], Iterator["Line"]]
72 CacheInfo = Tuple[Timestamp, FileSize]
73 Cache = Dict[Path, CacheInfo]
74 out = partial(click.secho, bold=True, err=True)
75 err = partial(click.secho, fg="red", err=True)
77 pygram.initialize(CACHE_DIR)
78 syms = pygram.python_symbols
81 class NothingChanged(UserWarning):
82 """Raised when reformatted code is the same as source."""
85 class CannotSplit(Exception):
86 """A readable split that fits the allotted line length is impossible."""
89 class InvalidInput(ValueError):
90 """Raised when input source code fails all parse attempts."""
93 class WriteBack(Enum):
100 def from_configuration(cls, *, check: bool, diff: bool) -> "WriteBack":
101 if check and not diff:
104 return cls.DIFF if diff else cls.YES
113 class FileMode(Flag):
117 NO_STRING_NORMALIZATION = 4
118 NO_NUMERIC_UNDERSCORE_NORMALIZATION = 8
121 def from_configuration(
126 skip_string_normalization: bool,
127 skip_numeric_underscore_normalization: bool,
129 mode = cls.AUTO_DETECT
134 if skip_string_normalization:
135 mode |= cls.NO_STRING_NORMALIZATION
136 if skip_numeric_underscore_normalization:
137 mode |= cls.NO_NUMERIC_UNDERSCORE_NORMALIZATION
141 def read_pyproject_toml(
142 ctx: click.Context, param: click.Parameter, value: Union[str, int, bool, None]
144 """Inject Black configuration from "pyproject.toml" into defaults in `ctx`.
146 Returns the path to a successfully found and read configuration file, None
149 assert not isinstance(value, (int, bool)), "Invalid parameter type passed"
151 root = find_project_root(ctx.params.get("src", ()))
152 path = root / "pyproject.toml"
159 pyproject_toml = toml.load(value)
160 config = pyproject_toml.get("tool", {}).get("black", {})
161 except (toml.TomlDecodeError, OSError) as e:
162 raise click.BadOptionUsage(f"Error reading configuration file: {e}", ctx)
167 if ctx.default_map is None:
169 ctx.default_map.update( # type: ignore # bad types in .pyi
170 {k.replace("--", "").replace("-", "_"): v for k, v in config.items()}
175 @click.command(context_settings=dict(help_option_names=["-h", "--help"]))
180 default=DEFAULT_LINE_LENGTH,
181 help="How many characters per line to allow.",
188 "Allow using Python 3.6-only syntax on all input files. This will put "
189 "trailing commas in function signatures and calls also after *args and "
190 "**kwargs. [default: per-file auto-detection]"
197 "Format all input files like typing stubs regardless of file extension "
198 "(useful when piping source on standard input)."
203 "--skip-string-normalization",
205 help="Don't normalize string quotes or prefixes.",
209 "--skip-numeric-underscore-normalization",
211 help="Don't normalize underscores in numeric literals.",
217 "Don't write the files back, just return the status. Return code 0 "
218 "means nothing would change. Return code 1 means some files would be "
219 "reformatted. Return code 123 means there was an internal error."
225 help="Don't write the files back, just output a diff for each file on stdout.",
230 help="If --fast given, skip temporary sanity checks. [default: --safe]",
235 default=DEFAULT_INCLUDES,
237 "A regular expression that matches files and directories that should be "
238 "included on recursive searches. An empty value means all files are "
239 "included regardless of the name. Use forward slashes for directories on "
240 "all platforms (Windows, too). Exclusions are calculated first, inclusions "
248 default=DEFAULT_EXCLUDES,
250 "A regular expression that matches files and directories that should be "
251 "excluded on recursive searches. An empty value means no paths are excluded. "
252 "Use forward slashes for directories on all platforms (Windows, too). "
253 "Exclusions are calculated first, inclusions later."
262 "Don't emit non-error messages to stderr. Errors are still emitted, "
263 "silence those with 2>/dev/null."
271 "Also emit messages to stderr about files that were not changed or were "
272 "ignored due to --exclude=."
275 @click.version_option(version=__version__)
280 exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
287 exists=False, file_okay=True, dir_okay=False, readable=True, allow_dash=False
290 callback=read_pyproject_toml,
291 help="Read configuration from PATH.",
302 skip_string_normalization: bool,
303 skip_numeric_underscore_normalization: bool,
309 config: Optional[str],
311 """The uncompromising code formatter."""
312 write_back = WriteBack.from_configuration(check=check, diff=diff)
313 mode = FileMode.from_configuration(
316 skip_string_normalization=skip_string_normalization,
317 skip_numeric_underscore_normalization=skip_numeric_underscore_normalization,
319 if config and verbose:
320 out(f"Using configuration from {config}.", bold=False, fg="blue")
322 include_regex = re_compile_maybe_verbose(include)
324 err(f"Invalid regular expression for include given: {include!r}")
327 exclude_regex = re_compile_maybe_verbose(exclude)
329 err(f"Invalid regular expression for exclude given: {exclude!r}")
331 report = Report(check=check, quiet=quiet, verbose=verbose)
332 root = find_project_root(src)
333 sources: Set[Path] = set()
338 gen_python_files_in_dir(p, root, include_regex, exclude_regex, report)
340 elif p.is_file() or s == "-":
341 # if a file was explicitly given, we don't care about its extension
344 err(f"invalid path: {s}")
345 if len(sources) == 0:
346 if verbose or not quiet:
347 out("No paths given. Nothing to do 😴")
350 if len(sources) == 1:
353 line_length=line_length,
355 write_back=write_back,
360 loop = asyncio.get_event_loop()
361 executor = ProcessPoolExecutor(max_workers=os.cpu_count())
363 loop.run_until_complete(
366 line_length=line_length,
368 write_back=write_back,
377 if verbose or not quiet:
378 bang = "💥 💔 💥" if report.return_code else "✨ 🍰 ✨"
379 out(f"All done! {bang}")
380 click.secho(str(report), err=True)
381 ctx.exit(report.return_code)
388 write_back: WriteBack,
392 """Reformat a single file under `src` without spawning child processes.
394 If `quiet` is True, non-error messages are not output. `line_length`,
395 `write_back`, `fast` and `pyi` options are passed to
396 :func:`format_file_in_place` or :func:`format_stdin_to_stdout`.
400 if not src.is_file() and str(src) == "-":
401 if format_stdin_to_stdout(
402 line_length=line_length, fast=fast, write_back=write_back, mode=mode
404 changed = Changed.YES
407 if write_back != WriteBack.DIFF:
408 cache = read_cache(line_length, mode)
409 res_src = src.resolve()
410 if res_src in cache and cache[res_src] == get_cache_info(res_src):
411 changed = Changed.CACHED
412 if changed is not Changed.CACHED and format_file_in_place(
414 line_length=line_length,
416 write_back=write_back,
419 changed = Changed.YES
420 if (write_back is WriteBack.YES and changed is not Changed.CACHED) or (
421 write_back is WriteBack.CHECK and changed is Changed.NO
423 write_cache(cache, [src], line_length, mode)
424 report.done(src, changed)
425 except Exception as exc:
426 report.failed(src, str(exc))
429 async def schedule_formatting(
433 write_back: WriteBack,
439 """Run formatting of `sources` in parallel using the provided `executor`.
441 (Use ProcessPoolExecutors for actual parallelism.)
443 `line_length`, `write_back`, `fast`, and `pyi` options are passed to
444 :func:`format_file_in_place`.
447 if write_back != WriteBack.DIFF:
448 cache = read_cache(line_length, mode)
449 sources, cached = filter_cached(cache, sources)
450 for src in sorted(cached):
451 report.done(src, Changed.CACHED)
456 sources_to_cache = []
458 if write_back == WriteBack.DIFF:
459 # For diff output, we need locks to ensure we don't interleave output
460 # from different processes.
462 lock = manager.Lock()
464 loop.run_in_executor(
466 format_file_in_place,
474 for src in sorted(sources)
476 pending: Iterable[asyncio.Task] = tasks.keys()
478 loop.add_signal_handler(signal.SIGINT, cancel, pending)
479 loop.add_signal_handler(signal.SIGTERM, cancel, pending)
480 except NotImplementedError:
481 # There are no good alternatives for these on Windows.
484 done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
486 src = tasks.pop(task)
488 cancelled.append(task)
489 elif task.exception():
490 report.failed(src, str(task.exception()))
492 changed = Changed.YES if task.result() else Changed.NO
493 # If the file was written back or was successfully checked as
494 # well-formatted, store this information in the cache.
495 if write_back is WriteBack.YES or (
496 write_back is WriteBack.CHECK and changed is Changed.NO
498 sources_to_cache.append(src)
499 report.done(src, changed)
501 await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
503 write_cache(cache, sources_to_cache, line_length, mode)
506 def format_file_in_place(
510 write_back: WriteBack = WriteBack.NO,
511 mode: FileMode = FileMode.AUTO_DETECT,
512 lock: Any = None, # multiprocessing.Manager().Lock() is some crazy proxy
514 """Format file under `src` path. Return True if changed.
516 If `write_back` is DIFF, write a diff to stdout. If it is YES, write reformatted
518 `line_length` and `fast` options are passed to :func:`format_file_contents`.
520 if src.suffix == ".pyi":
523 then = datetime.utcfromtimestamp(src.stat().st_mtime)
524 with open(src, "rb") as buf:
525 src_contents, encoding, newline = decode_bytes(buf.read())
527 dst_contents = format_file_contents(
528 src_contents, line_length=line_length, fast=fast, mode=mode
530 except NothingChanged:
533 if write_back == write_back.YES:
534 with open(src, "w", encoding=encoding, newline=newline) as f:
535 f.write(dst_contents)
536 elif write_back == write_back.DIFF:
537 now = datetime.utcnow()
538 src_name = f"{src}\t{then} +0000"
539 dst_name = f"{src}\t{now} +0000"
540 diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
544 f = io.TextIOWrapper(
550 f.write(diff_contents)
558 def format_stdin_to_stdout(
561 write_back: WriteBack = WriteBack.NO,
562 mode: FileMode = FileMode.AUTO_DETECT,
564 """Format file on stdin. Return True if changed.
566 If `write_back` is YES, write reformatted code back to stdout. If it is DIFF,
567 write a diff to stdout.
568 `line_length`, `fast`, `is_pyi`, and `force_py36` arguments are passed to
569 :func:`format_file_contents`.
571 then = datetime.utcnow()
572 src, encoding, newline = decode_bytes(sys.stdin.buffer.read())
575 dst = format_file_contents(src, line_length=line_length, fast=fast, mode=mode)
578 except NothingChanged:
582 f = io.TextIOWrapper(
583 sys.stdout.buffer, encoding=encoding, newline=newline, write_through=True
585 if write_back == WriteBack.YES:
587 elif write_back == WriteBack.DIFF:
588 now = datetime.utcnow()
589 src_name = f"STDIN\t{then} +0000"
590 dst_name = f"STDOUT\t{now} +0000"
591 f.write(diff(src, dst, src_name, dst_name))
595 def format_file_contents(
600 mode: FileMode = FileMode.AUTO_DETECT,
602 """Reformat contents a file and return new contents.
604 If `fast` is False, additionally confirm that the reformatted code is
605 valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
606 `line_length` is passed to :func:`format_str`.
608 if src_contents.strip() == "":
611 dst_contents = format_str(src_contents, line_length=line_length, mode=mode)
612 if src_contents == dst_contents:
616 assert_equivalent(src_contents, dst_contents)
617 assert_stable(src_contents, dst_contents, line_length=line_length, mode=mode)
622 src_contents: str, line_length: int, *, mode: FileMode = FileMode.AUTO_DETECT
624 """Reformat a string and return new contents.
626 `line_length` determines how many characters per line are allowed.
628 src_node = lib2to3_parse(src_contents)
630 future_imports = get_future_imports(src_node)
631 is_pyi = bool(mode & FileMode.PYI)
632 py36 = bool(mode & FileMode.PYTHON36) or is_python36(src_node)
633 normalize_strings = not bool(mode & FileMode.NO_STRING_NORMALIZATION)
634 normalize_fmt_off(src_node)
635 lines = LineGenerator(
636 remove_u_prefix=py36 or "unicode_literals" in future_imports,
638 normalize_strings=normalize_strings,
639 allow_underscores=py36
640 and not bool(mode & FileMode.NO_NUMERIC_UNDERSCORE_NORMALIZATION),
642 elt = EmptyLineTracker(is_pyi=is_pyi)
645 for current_line in lines.visit(src_node):
646 for _ in range(after):
647 dst_contents += str(empty_line)
648 before, after = elt.maybe_empty_lines(current_line)
649 for _ in range(before):
650 dst_contents += str(empty_line)
651 for line in split_line(current_line, line_length=line_length, py36=py36):
652 dst_contents += str(line)
656 def decode_bytes(src: bytes) -> Tuple[FileContent, Encoding, NewLine]:
657 """Return a tuple of (decoded_contents, encoding, newline).
659 `newline` is either CRLF or LF but `decoded_contents` is decoded with
660 universal newlines (i.e. only contains LF).
662 srcbuf = io.BytesIO(src)
663 encoding, lines = tokenize.detect_encoding(srcbuf.readline)
665 return "", encoding, "\n"
667 newline = "\r\n" if b"\r\n" == lines[0][-2:] else "\n"
669 with io.TextIOWrapper(srcbuf, encoding) as tiow:
670 return tiow.read(), encoding, newline
674 pygram.python_grammar_no_print_statement_no_exec_statement,
675 pygram.python_grammar_no_print_statement,
676 pygram.python_grammar,
680 def lib2to3_parse(src_txt: str) -> Node:
681 """Given a string with source, return the lib2to3 Node."""
682 grammar = pygram.python_grammar_no_print_statement
683 if src_txt[-1:] != "\n":
685 for grammar in GRAMMARS:
686 drv = driver.Driver(grammar, pytree.convert)
688 result = drv.parse_string(src_txt, True)
691 except ParseError as pe:
692 lineno, column = pe.context[1]
693 lines = src_txt.splitlines()
695 faulty_line = lines[lineno - 1]
697 faulty_line = "<line number missing in source>"
698 exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
702 if isinstance(result, Leaf):
703 result = Node(syms.file_input, [result])
707 def lib2to3_unparse(node: Node) -> str:
708 """Given a lib2to3 node, return its string representation."""
716 class Visitor(Generic[T]):
717 """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
719 def visit(self, node: LN) -> Iterator[T]:
720 """Main method to visit `node` and its children.
722 It tries to find a `visit_*()` method for the given `node.type`, like
723 `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
724 If no dedicated `visit_*()` method is found, chooses `visit_default()`
727 Then yields objects of type `T` from the selected visitor.
730 name = token.tok_name[node.type]
732 name = type_repr(node.type)
733 yield from getattr(self, f"visit_{name}", self.visit_default)(node)
735 def visit_default(self, node: LN) -> Iterator[T]:
736 """Default `visit_*()` implementation. Recurses to children of `node`."""
737 if isinstance(node, Node):
738 for child in node.children:
739 yield from self.visit(child)
743 class DebugVisitor(Visitor[T]):
746 def visit_default(self, node: LN) -> Iterator[T]:
747 indent = " " * (2 * self.tree_depth)
748 if isinstance(node, Node):
749 _type = type_repr(node.type)
750 out(f"{indent}{_type}", fg="yellow")
752 for child in node.children:
753 yield from self.visit(child)
756 out(f"{indent}/{_type}", fg="yellow", bold=False)
758 _type = token.tok_name.get(node.type, str(node.type))
759 out(f"{indent}{_type}", fg="blue", nl=False)
761 # We don't have to handle prefixes for `Node` objects since
762 # that delegates to the first child anyway.
763 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
764 out(f" {node.value!r}", fg="blue", bold=False)
767 def show(cls, code: Union[str, Leaf, Node]) -> None:
768 """Pretty-print the lib2to3 AST of a given string of `code`.
770 Convenience method for debugging.
772 v: DebugVisitor[None] = DebugVisitor()
773 if isinstance(code, str):
774 code = lib2to3_parse(code)
778 KEYWORDS = set(keyword.kwlist)
779 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
780 FLOW_CONTROL = {"return", "raise", "break", "continue"}
791 STANDALONE_COMMENT = 153
792 token.tok_name[STANDALONE_COMMENT] = "STANDALONE_COMMENT"
793 LOGIC_OPERATORS = {"and", "or"}
818 STARS = {token.STAR, token.DOUBLESTAR}
821 syms.argument, # double star in arglist
822 syms.trailer, # single argument to call
824 syms.varargslist, # lambdas
826 UNPACKING_PARENTS = {
827 syms.atom, # single element of a list or set literal
831 syms.testlist_star_expr,
866 COMPREHENSION_PRIORITY = 20
868 TERNARY_PRIORITY = 16
871 COMPARATOR_PRIORITY = 10
882 token.DOUBLESLASH: 4,
892 class BracketTracker:
893 """Keeps track of brackets on a line."""
896 bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
897 delimiters: Dict[LeafID, Priority] = Factory(dict)
898 previous: Optional[Leaf] = None
899 _for_loop_depths: List[int] = Factory(list)
900 _lambda_argument_depths: List[int] = Factory(list)
902 def mark(self, leaf: Leaf) -> None:
903 """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
905 All leaves receive an int `bracket_depth` field that stores how deep
906 within brackets a given leaf is. 0 means there are no enclosing brackets
907 that started on this line.
909 If a leaf is itself a closing bracket, it receives an `opening_bracket`
910 field that it forms a pair with. This is a one-directional link to
911 avoid reference cycles.
913 If a leaf is a delimiter (a token on which Black can split the line if
914 needed) and it's on depth 0, its `id()` is stored in the tracker's
917 if leaf.type == token.COMMENT:
920 self.maybe_decrement_after_for_loop_variable(leaf)
921 self.maybe_decrement_after_lambda_arguments(leaf)
922 if leaf.type in CLOSING_BRACKETS:
924 opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
925 leaf.opening_bracket = opening_bracket
926 leaf.bracket_depth = self.depth
928 delim = is_split_before_delimiter(leaf, self.previous)
929 if delim and self.previous is not None:
930 self.delimiters[id(self.previous)] = delim
932 delim = is_split_after_delimiter(leaf, self.previous)
934 self.delimiters[id(leaf)] = delim
935 if leaf.type in OPENING_BRACKETS:
936 self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
939 self.maybe_increment_lambda_arguments(leaf)
940 self.maybe_increment_for_loop_variable(leaf)
942 def any_open_brackets(self) -> bool:
943 """Return True if there is an yet unmatched open bracket on the line."""
944 return bool(self.bracket_match)
946 def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> int:
947 """Return the highest priority of a delimiter found on the line.
949 Values are consistent with what `is_split_*_delimiter()` return.
950 Raises ValueError on no delimiters.
952 return max(v for k, v in self.delimiters.items() if k not in exclude)
954 def delimiter_count_with_priority(self, priority: int = 0) -> int:
955 """Return the number of delimiters with the given `priority`.
957 If no `priority` is passed, defaults to max priority on the line.
959 if not self.delimiters:
962 priority = priority or self.max_delimiter_priority()
963 return sum(1 for p in self.delimiters.values() if p == priority)
965 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
966 """In a for loop, or comprehension, the variables are often unpacks.
968 To avoid splitting on the comma in this situation, increase the depth of
969 tokens between `for` and `in`.
971 if leaf.type == token.NAME and leaf.value == "for":
973 self._for_loop_depths.append(self.depth)
978 def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
979 """See `maybe_increment_for_loop_variable` above for explanation."""
981 self._for_loop_depths
982 and self._for_loop_depths[-1] == self.depth
983 and leaf.type == token.NAME
984 and leaf.value == "in"
987 self._for_loop_depths.pop()
992 def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
993 """In a lambda expression, there might be more than one argument.
995 To avoid splitting on the comma in this situation, increase the depth of
996 tokens between `lambda` and `:`.
998 if leaf.type == token.NAME and leaf.value == "lambda":
1000 self._lambda_argument_depths.append(self.depth)
1005 def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
1006 """See `maybe_increment_lambda_arguments` above for explanation."""
1008 self._lambda_argument_depths
1009 and self._lambda_argument_depths[-1] == self.depth
1010 and leaf.type == token.COLON
1013 self._lambda_argument_depths.pop()
1018 def get_open_lsqb(self) -> Optional[Leaf]:
1019 """Return the most recent opening square bracket (if any)."""
1020 return self.bracket_match.get((self.depth - 1, token.RSQB))
1025 """Holds leaves and comments. Can be printed with `str(line)`."""
1028 leaves: List[Leaf] = Factory(list)
1029 comments: List[Tuple[Index, Leaf]] = Factory(list)
1030 bracket_tracker: BracketTracker = Factory(BracketTracker)
1031 inside_brackets: bool = False
1032 should_explode: bool = False
1034 def append(self, leaf: Leaf, preformatted: bool = False) -> None:
1035 """Add a new `leaf` to the end of the line.
1037 Unless `preformatted` is True, the `leaf` will receive a new consistent
1038 whitespace prefix and metadata applied by :class:`BracketTracker`.
1039 Trailing commas are maybe removed, unpacked for loop variables are
1040 demoted from being delimiters.
1042 Inline comments are put aside.
1044 has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
1048 if token.COLON == leaf.type and self.is_class_paren_empty:
1049 del self.leaves[-2:]
1050 if self.leaves and not preformatted:
1051 # Note: at this point leaf.prefix should be empty except for
1052 # imports, for which we only preserve newlines.
1053 leaf.prefix += whitespace(
1054 leaf, complex_subscript=self.is_complex_subscript(leaf)
1056 if self.inside_brackets or not preformatted:
1057 self.bracket_tracker.mark(leaf)
1058 self.maybe_remove_trailing_comma(leaf)
1059 if not self.append_comment(leaf):
1060 self.leaves.append(leaf)
1062 def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
1063 """Like :func:`append()` but disallow invalid standalone comment structure.
1065 Raises ValueError when any `leaf` is appended after a standalone comment
1066 or when a standalone comment is not the first leaf on the line.
1068 if self.bracket_tracker.depth == 0:
1070 raise ValueError("cannot append to standalone comments")
1072 if self.leaves and leaf.type == STANDALONE_COMMENT:
1074 "cannot append standalone comments to a populated line"
1077 self.append(leaf, preformatted=preformatted)
1080 def is_comment(self) -> bool:
1081 """Is this line a standalone comment?"""
1082 return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
1085 def is_decorator(self) -> bool:
1086 """Is this line a decorator?"""
1087 return bool(self) and self.leaves[0].type == token.AT
1090 def is_import(self) -> bool:
1091 """Is this an import line?"""
1092 return bool(self) and is_import(self.leaves[0])
1095 def is_class(self) -> bool:
1096 """Is this line a class definition?"""
1099 and self.leaves[0].type == token.NAME
1100 and self.leaves[0].value == "class"
1104 def is_stub_class(self) -> bool:
1105 """Is this line a class definition with a body consisting only of "..."?"""
1106 return self.is_class and self.leaves[-3:] == [
1107 Leaf(token.DOT, ".") for _ in range(3)
1111 def is_def(self) -> bool:
1112 """Is this a function definition? (Also returns True for async defs.)"""
1114 first_leaf = self.leaves[0]
1119 second_leaf: Optional[Leaf] = self.leaves[1]
1122 return (first_leaf.type == token.NAME and first_leaf.value == "def") or (
1123 first_leaf.type == token.ASYNC
1124 and second_leaf is not None
1125 and second_leaf.type == token.NAME
1126 and second_leaf.value == "def"
1130 def is_class_paren_empty(self) -> bool:
1131 """Is this a class with no base classes but using parentheses?
1133 Those are unnecessary and should be removed.
1137 and len(self.leaves) == 4
1139 and self.leaves[2].type == token.LPAR
1140 and self.leaves[2].value == "("
1141 and self.leaves[3].type == token.RPAR
1142 and self.leaves[3].value == ")"
1146 def is_triple_quoted_string(self) -> bool:
1147 """Is the line a triple quoted string?"""
1150 and self.leaves[0].type == token.STRING
1151 and self.leaves[0].value.startswith(('"""', "'''"))
1154 def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
1155 """If so, needs to be split before emitting."""
1156 for leaf in self.leaves:
1157 if leaf.type == STANDALONE_COMMENT:
1158 if leaf.bracket_depth <= depth_limit:
1163 def contains_multiline_strings(self) -> bool:
1164 for leaf in self.leaves:
1165 if is_multiline_string(leaf):
1170 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1171 """Remove trailing comma if there is one and it's safe."""
1174 and self.leaves[-1].type == token.COMMA
1175 and closing.type in CLOSING_BRACKETS
1179 if closing.type == token.RBRACE:
1180 self.remove_trailing_comma()
1183 if closing.type == token.RSQB:
1184 comma = self.leaves[-1]
1185 if comma.parent and comma.parent.type == syms.listmaker:
1186 self.remove_trailing_comma()
1189 # For parens let's check if it's safe to remove the comma.
1190 # Imports are always safe.
1192 self.remove_trailing_comma()
1195 # Otherwise, if the trailing one is the only one, we might mistakenly
1196 # change a tuple into a different type by removing the comma.
1197 depth = closing.bracket_depth + 1
1199 opening = closing.opening_bracket
1200 for _opening_index, leaf in enumerate(self.leaves):
1207 for leaf in self.leaves[_opening_index + 1 :]:
1211 bracket_depth = leaf.bracket_depth
1212 if bracket_depth == depth and leaf.type == token.COMMA:
1214 if leaf.parent and leaf.parent.type == syms.arglist:
1219 self.remove_trailing_comma()
1224 def append_comment(self, comment: Leaf) -> bool:
1225 """Add an inline or standalone comment to the line."""
1227 comment.type == STANDALONE_COMMENT
1228 and self.bracket_tracker.any_open_brackets()
1233 if comment.type != token.COMMENT:
1236 after = len(self.leaves) - 1
1238 comment.type = STANDALONE_COMMENT
1243 self.comments.append((after, comment))
1246 def comments_after(self, leaf: Leaf, _index: int = -1) -> Iterator[Leaf]:
1247 """Generate comments that should appear directly after `leaf`.
1249 Provide a non-negative leaf `_index` to speed up the function.
1251 if not self.comments:
1255 for _index, _leaf in enumerate(self.leaves):
1262 for index, comment_after in self.comments:
1266 def remove_trailing_comma(self) -> None:
1267 """Remove the trailing comma and moves the comments attached to it."""
1268 comma_index = len(self.leaves) - 1
1269 for i in range(len(self.comments)):
1270 comment_index, comment = self.comments[i]
1271 if comment_index == comma_index:
1272 self.comments[i] = (comma_index - 1, comment)
1275 def is_complex_subscript(self, leaf: Leaf) -> bool:
1276 """Return True iff `leaf` is part of a slice with non-trivial exprs."""
1277 open_lsqb = self.bracket_tracker.get_open_lsqb()
1278 if open_lsqb is None:
1281 subscript_start = open_lsqb.next_sibling
1283 if isinstance(subscript_start, Node):
1284 if subscript_start.type == syms.listmaker:
1287 if subscript_start.type == syms.subscriptlist:
1288 subscript_start = child_towards(subscript_start, leaf)
1289 return subscript_start is not None and any(
1290 n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
1293 def __str__(self) -> str:
1294 """Render the line."""
1298 indent = " " * self.depth
1299 leaves = iter(self.leaves)
1300 first = next(leaves)
1301 res = f"{first.prefix}{indent}{first.value}"
1304 for _, comment in self.comments:
1308 def __bool__(self) -> bool:
1309 """Return True if the line has leaves or comments."""
1310 return bool(self.leaves or self.comments)
1314 class EmptyLineTracker:
1315 """Provides a stateful method that returns the number of potential extra
1316 empty lines needed before and after the currently processed line.
1318 Note: this tracker works on lines that haven't been split yet. It assumes
1319 the prefix of the first leaf consists of optional newlines. Those newlines
1320 are consumed by `maybe_empty_lines()` and included in the computation.
1323 is_pyi: bool = False
1324 previous_line: Optional[Line] = None
1325 previous_after: int = 0
1326 previous_defs: List[int] = Factory(list)
1328 def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1329 """Return the number of extra empty lines before and after the `current_line`.
1331 This is for separating `def`, `async def` and `class` with extra empty
1332 lines (two on module-level).
1334 before, after = self._maybe_empty_lines(current_line)
1335 before -= self.previous_after
1336 self.previous_after = after
1337 self.previous_line = current_line
1338 return before, after
1340 def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1342 if current_line.depth == 0:
1343 max_allowed = 1 if self.is_pyi else 2
1344 if current_line.leaves:
1345 # Consume the first leaf's extra newlines.
1346 first_leaf = current_line.leaves[0]
1347 before = first_leaf.prefix.count("\n")
1348 before = min(before, max_allowed)
1349 first_leaf.prefix = ""
1352 depth = current_line.depth
1353 while self.previous_defs and self.previous_defs[-1] >= depth:
1354 self.previous_defs.pop()
1356 before = 0 if depth else 1
1358 before = 1 if depth else 2
1359 if current_line.is_decorator or current_line.is_def or current_line.is_class:
1360 return self._maybe_empty_lines_for_class_or_def(current_line, before)
1364 and self.previous_line.is_import
1365 and not current_line.is_import
1366 and depth == self.previous_line.depth
1368 return (before or 1), 0
1372 and self.previous_line.is_class
1373 and current_line.is_triple_quoted_string
1379 def _maybe_empty_lines_for_class_or_def(
1380 self, current_line: Line, before: int
1381 ) -> Tuple[int, int]:
1382 if not current_line.is_decorator:
1383 self.previous_defs.append(current_line.depth)
1384 if self.previous_line is None:
1385 # Don't insert empty lines before the first line in the file.
1388 if self.previous_line.is_decorator:
1391 if self.previous_line.depth < current_line.depth and (
1392 self.previous_line.is_class or self.previous_line.is_def
1397 self.previous_line.is_comment
1398 and self.previous_line.depth == current_line.depth
1404 if self.previous_line.depth > current_line.depth:
1406 elif current_line.is_class or self.previous_line.is_class:
1407 if current_line.is_stub_class and self.previous_line.is_stub_class:
1408 # No blank line between classes with an empty body
1412 elif current_line.is_def and not self.previous_line.is_def:
1413 # Blank line between a block of functions and a block of non-functions
1419 if current_line.depth and newlines:
1425 class LineGenerator(Visitor[Line]):
1426 """Generates reformatted Line objects. Empty lines are not emitted.
1428 Note: destroys the tree it's visiting by mutating prefixes of its leaves
1429 in ways that will no longer stringify to valid Python code on the tree.
1432 is_pyi: bool = False
1433 normalize_strings: bool = True
1434 current_line: Line = Factory(Line)
1435 remove_u_prefix: bool = False
1436 allow_underscores: bool = False
1438 def line(self, indent: int = 0) -> Iterator[Line]:
1441 If the line is empty, only emit if it makes sense.
1442 If the line is too long, split it first and then generate.
1444 If any lines were generated, set up a new current_line.
1446 if not self.current_line:
1447 self.current_line.depth += indent
1448 return # Line is empty, don't emit. Creating a new one unnecessary.
1450 complete_line = self.current_line
1451 self.current_line = Line(depth=complete_line.depth + indent)
1454 def visit_default(self, node: LN) -> Iterator[Line]:
1455 """Default `visit_*()` implementation. Recurses to children of `node`."""
1456 if isinstance(node, Leaf):
1457 any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1458 for comment in generate_comments(node):
1459 if any_open_brackets:
1460 # any comment within brackets is subject to splitting
1461 self.current_line.append(comment)
1462 elif comment.type == token.COMMENT:
1463 # regular trailing comment
1464 self.current_line.append(comment)
1465 yield from self.line()
1468 # regular standalone comment
1469 yield from self.line()
1471 self.current_line.append(comment)
1472 yield from self.line()
1474 normalize_prefix(node, inside_brackets=any_open_brackets)
1475 if self.normalize_strings and node.type == token.STRING:
1476 normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix)
1477 normalize_string_quotes(node)
1478 if node.type == token.NUMBER:
1479 normalize_numeric_literal(node, self.allow_underscores)
1480 if node.type not in WHITESPACE:
1481 self.current_line.append(node)
1482 yield from super().visit_default(node)
1484 def visit_INDENT(self, node: Node) -> Iterator[Line]:
1485 """Increase indentation level, maybe yield a line."""
1486 # In blib2to3 INDENT never holds comments.
1487 yield from self.line(+1)
1488 yield from self.visit_default(node)
1490 def visit_DEDENT(self, node: Node) -> Iterator[Line]:
1491 """Decrease indentation level, maybe yield a line."""
1492 # The current line might still wait for trailing comments. At DEDENT time
1493 # there won't be any (they would be prefixes on the preceding NEWLINE).
1494 # Emit the line then.
1495 yield from self.line()
1497 # While DEDENT has no value, its prefix may contain standalone comments
1498 # that belong to the current indentation level. Get 'em.
1499 yield from self.visit_default(node)
1501 # Finally, emit the dedent.
1502 yield from self.line(-1)
1505 self, node: Node, keywords: Set[str], parens: Set[str]
1506 ) -> Iterator[Line]:
1507 """Visit a statement.
1509 This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1510 `def`, `with`, `class`, `assert` and assignments.
1512 The relevant Python language `keywords` for a given statement will be
1513 NAME leaves within it. This methods puts those on a separate line.
1515 `parens` holds a set of string leaf values immediately after which
1516 invisible parens should be put.
1518 normalize_invisible_parens(node, parens_after=parens)
1519 for child in node.children:
1520 if child.type == token.NAME and child.value in keywords: # type: ignore
1521 yield from self.line()
1523 yield from self.visit(child)
1525 def visit_suite(self, node: Node) -> Iterator[Line]:
1526 """Visit a suite."""
1527 if self.is_pyi and is_stub_suite(node):
1528 yield from self.visit(node.children[2])
1530 yield from self.visit_default(node)
1532 def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1533 """Visit a statement without nested statements."""
1534 is_suite_like = node.parent and node.parent.type in STATEMENT
1536 if self.is_pyi and is_stub_body(node):
1537 yield from self.visit_default(node)
1539 yield from self.line(+1)
1540 yield from self.visit_default(node)
1541 yield from self.line(-1)
1544 if not self.is_pyi or not node.parent or not is_stub_suite(node.parent):
1545 yield from self.line()
1546 yield from self.visit_default(node)
1548 def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1549 """Visit `async def`, `async for`, `async with`."""
1550 yield from self.line()
1552 children = iter(node.children)
1553 for child in children:
1554 yield from self.visit(child)
1556 if child.type == token.ASYNC:
1559 internal_stmt = next(children)
1560 for child in internal_stmt.children:
1561 yield from self.visit(child)
1563 def visit_decorators(self, node: Node) -> Iterator[Line]:
1564 """Visit decorators."""
1565 for child in node.children:
1566 yield from self.line()
1567 yield from self.visit(child)
1569 def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
1570 """Remove a semicolon and put the other statement on a separate line."""
1571 yield from self.line()
1573 def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
1574 """End of file. Process outstanding comments and end with a newline."""
1575 yield from self.visit_default(leaf)
1576 yield from self.line()
1578 def visit_STANDALONE_COMMENT(self, leaf: Leaf) -> Iterator[Line]:
1579 if not self.current_line.bracket_tracker.any_open_brackets():
1580 yield from self.line()
1581 yield from self.visit_default(leaf)
1583 def __attrs_post_init__(self) -> None:
1584 """You are in a twisty little maze of passages."""
1587 self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
1588 self.visit_if_stmt = partial(
1589 v, keywords={"if", "else", "elif"}, parens={"if", "elif"}
1591 self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
1592 self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
1593 self.visit_try_stmt = partial(
1594 v, keywords={"try", "except", "else", "finally"}, parens=Ø
1596 self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
1597 self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
1598 self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
1599 self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
1600 self.visit_expr_stmt = partial(v, keywords=Ø, parens=ASSIGNMENTS)
1601 self.visit_return_stmt = partial(v, keywords={"return"}, parens={"return"})
1602 self.visit_import_from = partial(v, keywords=Ø, parens={"import"})
1603 self.visit_async_funcdef = self.visit_async_stmt
1604 self.visit_decorated = self.visit_decorators
1607 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
1608 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
1609 OPENING_BRACKETS = set(BRACKET.keys())
1610 CLOSING_BRACKETS = set(BRACKET.values())
1611 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
1612 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
1615 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str: # noqa C901
1616 """Return whitespace prefix if needed for the given `leaf`.
1618 `complex_subscript` signals whether the given leaf is part of a subscription
1619 which has non-trivial arguments, like arithmetic expressions or function calls.
1627 if t in ALWAYS_NO_SPACE:
1630 if t == token.COMMENT:
1633 assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
1634 if t == token.COLON and p.type not in {
1641 prev = leaf.prev_sibling
1643 prevp = preceding_leaf(p)
1644 if not prevp or prevp.type in OPENING_BRACKETS:
1647 if t == token.COLON:
1648 if prevp.type == token.COLON:
1651 elif prevp.type != token.COMMA and not complex_subscript:
1656 if prevp.type == token.EQUAL:
1658 if prevp.parent.type in {
1666 elif prevp.parent.type == syms.typedargslist:
1667 # A bit hacky: if the equal sign has whitespace, it means we
1668 # previously found it's a typed argument. So, we're using
1672 elif prevp.type in STARS:
1673 if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1676 elif prevp.type == token.COLON:
1677 if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
1678 return SPACE if complex_subscript else NO
1682 and prevp.parent.type == syms.factor
1683 and prevp.type in MATH_OPERATORS
1688 prevp.type == token.RIGHTSHIFT
1690 and prevp.parent.type == syms.shift_expr
1691 and prevp.prev_sibling
1692 and prevp.prev_sibling.type == token.NAME
1693 and prevp.prev_sibling.value == "print" # type: ignore
1695 # Python 2 print chevron
1698 elif prev.type in OPENING_BRACKETS:
1701 if p.type in {syms.parameters, syms.arglist}:
1702 # untyped function signatures or calls
1703 if not prev or prev.type != token.COMMA:
1706 elif p.type == syms.varargslist:
1708 if prev and prev.type != token.COMMA:
1711 elif p.type == syms.typedargslist:
1712 # typed function signatures
1716 if t == token.EQUAL:
1717 if prev.type != syms.tname:
1720 elif prev.type == token.EQUAL:
1721 # A bit hacky: if the equal sign has whitespace, it means we
1722 # previously found it's a typed argument. So, we're using that, too.
1725 elif prev.type != token.COMMA:
1728 elif p.type == syms.tname:
1731 prevp = preceding_leaf(p)
1732 if not prevp or prevp.type != token.COMMA:
1735 elif p.type == syms.trailer:
1736 # attributes and calls
1737 if t == token.LPAR or t == token.RPAR:
1742 prevp = preceding_leaf(p)
1743 if not prevp or prevp.type != token.NUMBER:
1746 elif t == token.LSQB:
1749 elif prev.type != token.COMMA:
1752 elif p.type == syms.argument:
1754 if t == token.EQUAL:
1758 prevp = preceding_leaf(p)
1759 if not prevp or prevp.type == token.LPAR:
1762 elif prev.type in {token.EQUAL} | STARS:
1765 elif p.type == syms.decorator:
1769 elif p.type == syms.dotted_name:
1773 prevp = preceding_leaf(p)
1774 if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
1777 elif p.type == syms.classdef:
1781 if prev and prev.type == token.LPAR:
1784 elif p.type in {syms.subscript, syms.sliceop}:
1787 assert p.parent is not None, "subscripts are always parented"
1788 if p.parent.type == syms.subscriptlist:
1793 elif not complex_subscript:
1796 elif p.type == syms.atom:
1797 if prev and t == token.DOT:
1798 # dots, but not the first one.
1801 elif p.type == syms.dictsetmaker:
1803 if prev and prev.type == token.DOUBLESTAR:
1806 elif p.type in {syms.factor, syms.star_expr}:
1809 prevp = preceding_leaf(p)
1810 if not prevp or prevp.type in OPENING_BRACKETS:
1813 prevp_parent = prevp.parent
1814 assert prevp_parent is not None
1815 if prevp.type == token.COLON and prevp_parent.type in {
1821 elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1824 elif t in {token.NAME, token.NUMBER, token.STRING}:
1827 elif p.type == syms.import_from:
1829 if prev and prev.type == token.DOT:
1832 elif t == token.NAME:
1836 if prev and prev.type == token.DOT:
1839 elif p.type == syms.sliceop:
1845 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1846 """Return the first leaf that precedes `node`, if any."""
1848 res = node.prev_sibling
1850 if isinstance(res, Leaf):
1854 return list(res.leaves())[-1]
1863 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
1864 """Return the child of `ancestor` that contains `descendant`."""
1865 node: Optional[LN] = descendant
1866 while node and node.parent != ancestor:
1871 def container_of(leaf: Leaf) -> LN:
1872 """Return `leaf` or one of its ancestors that is the topmost container of it.
1874 By "container" we mean a node where `leaf` is the very first child.
1876 same_prefix = leaf.prefix
1877 container: LN = leaf
1879 parent = container.parent
1883 if parent.children[0].prefix != same_prefix:
1886 if parent.type == syms.file_input:
1889 if parent.prev_sibling is not None and parent.prev_sibling.type in BRACKETS:
1896 def is_split_after_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1897 """Return the priority of the `leaf` delimiter, given a line break after it.
1899 The delimiter priorities returned here are from those delimiters that would
1900 cause a line break after themselves.
1902 Higher numbers are higher priority.
1904 if leaf.type == token.COMMA:
1905 return COMMA_PRIORITY
1910 def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1911 """Return the priority of the `leaf` delimiter, given a line break before it.
1913 The delimiter priorities returned here are from those delimiters that would
1914 cause a line break before themselves.
1916 Higher numbers are higher priority.
1918 if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1919 # * and ** might also be MATH_OPERATORS but in this case they are not.
1920 # Don't treat them as a delimiter.
1924 leaf.type == token.DOT
1926 and leaf.parent.type not in {syms.import_from, syms.dotted_name}
1927 and (previous is None or previous.type in CLOSING_BRACKETS)
1932 leaf.type in MATH_OPERATORS
1934 and leaf.parent.type not in {syms.factor, syms.star_expr}
1936 return MATH_PRIORITIES[leaf.type]
1938 if leaf.type in COMPARATORS:
1939 return COMPARATOR_PRIORITY
1942 leaf.type == token.STRING
1943 and previous is not None
1944 and previous.type == token.STRING
1946 return STRING_PRIORITY
1948 if leaf.type not in {token.NAME, token.ASYNC}:
1954 and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
1955 or leaf.type == token.ASYNC
1958 not isinstance(leaf.prev_sibling, Leaf)
1959 or leaf.prev_sibling.value != "async"
1961 return COMPREHENSION_PRIORITY
1966 and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
1968 return COMPREHENSION_PRIORITY
1970 if leaf.value in {"if", "else"} and leaf.parent and leaf.parent.type == syms.test:
1971 return TERNARY_PRIORITY
1973 if leaf.value == "is":
1974 return COMPARATOR_PRIORITY
1979 and leaf.parent.type in {syms.comp_op, syms.comparison}
1981 previous is not None
1982 and previous.type == token.NAME
1983 and previous.value == "not"
1986 return COMPARATOR_PRIORITY
1991 and leaf.parent.type == syms.comp_op
1993 previous is not None
1994 and previous.type == token.NAME
1995 and previous.value == "is"
1998 return COMPARATOR_PRIORITY
2000 if leaf.value in LOGIC_OPERATORS and leaf.parent:
2001 return LOGIC_PRIORITY
2006 FMT_OFF = {"# fmt: off", "# fmt:off", "# yapf: disable"}
2007 FMT_ON = {"# fmt: on", "# fmt:on", "# yapf: enable"}
2010 def generate_comments(leaf: LN) -> Iterator[Leaf]:
2011 """Clean the prefix of the `leaf` and generate comments from it, if any.
2013 Comments in lib2to3 are shoved into the whitespace prefix. This happens
2014 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
2015 move because it does away with modifying the grammar to include all the
2016 possible places in which comments can be placed.
2018 The sad consequence for us though is that comments don't "belong" anywhere.
2019 This is why this function generates simple parentless Leaf objects for
2020 comments. We simply don't know what the correct parent should be.
2022 No matter though, we can live without this. We really only need to
2023 differentiate between inline and standalone comments. The latter don't
2024 share the line with any code.
2026 Inline comments are emitted as regular token.COMMENT leaves. Standalone
2027 are emitted with a fake STANDALONE_COMMENT token identifier.
2029 for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER):
2030 yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
2035 """Describes a piece of syntax that is a comment.
2037 It's not a :class:`blib2to3.pytree.Leaf` so that:
2039 * it can be cached (`Leaf` objects should not be reused more than once as
2040 they store their lineno, column, prefix, and parent information);
2041 * `newlines` and `consumed` fields are kept separate from the `value`. This
2042 simplifies handling of special marker comments like ``# fmt: off/on``.
2045 type: int # token.COMMENT or STANDALONE_COMMENT
2046 value: str # content of the comment
2047 newlines: int # how many newlines before the comment
2048 consumed: int # how many characters of the original leaf's prefix did we consume
2051 @lru_cache(maxsize=4096)
2052 def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]:
2053 """Return a list of :class:`ProtoComment` objects parsed from the given `prefix`."""
2054 result: List[ProtoComment] = []
2055 if not prefix or "#" not in prefix:
2060 for index, line in enumerate(prefix.split("\n")):
2061 consumed += len(line) + 1 # adding the length of the split '\n'
2062 line = line.lstrip()
2065 if not line.startswith("#"):
2068 if index == 0 and not is_endmarker:
2069 comment_type = token.COMMENT # simple trailing comment
2071 comment_type = STANDALONE_COMMENT
2072 comment = make_comment(line)
2075 type=comment_type, value=comment, newlines=nlines, consumed=consumed
2082 def make_comment(content: str) -> str:
2083 """Return a consistently formatted comment from the given `content` string.
2085 All comments (except for "##", "#!", "#:") should have a single space between
2086 the hash sign and the content.
2088 If `content` didn't start with a hash sign, one is provided.
2090 content = content.rstrip()
2094 if content[0] == "#":
2095 content = content[1:]
2096 if content and content[0] not in " !:#":
2097 content = " " + content
2098 return "#" + content
2102 line: Line, line_length: int, inner: bool = False, py36: bool = False
2103 ) -> Iterator[Line]:
2104 """Split a `line` into potentially many lines.
2106 They should fit in the allotted `line_length` but might not be able to.
2107 `inner` signifies that there were a pair of brackets somewhere around the
2108 current `line`, possibly transitively. This means we can fallback to splitting
2109 by delimiters if the LHS/RHS don't yield any results.
2111 If `py36` is True, splitting may generate syntax that is only compatible
2112 with Python 3.6 and later.
2118 line_str = str(line).strip("\n")
2119 if not line.should_explode and is_line_short_enough(
2120 line, line_length=line_length, line_str=line_str
2125 split_funcs: List[SplitFunc]
2127 split_funcs = [left_hand_split]
2130 def rhs(line: Line, py36: bool = False) -> Iterator[Line]:
2131 for omit in generate_trailers_to_omit(line, line_length):
2132 lines = list(right_hand_split(line, line_length, py36, omit=omit))
2133 if is_line_short_enough(lines[0], line_length=line_length):
2137 # All splits failed, best effort split with no omits.
2138 # This mostly happens to multiline strings that are by definition
2139 # reported as not fitting a single line.
2140 yield from right_hand_split(line, py36)
2142 if line.inside_brackets:
2143 split_funcs = [delimiter_split, standalone_comment_split, rhs]
2146 for split_func in split_funcs:
2147 # We are accumulating lines in `result` because we might want to abort
2148 # mission and return the original line in the end, or attempt a different
2150 result: List[Line] = []
2152 for l in split_func(line, py36):
2153 if str(l).strip("\n") == line_str:
2154 raise CannotSplit("Split function returned an unchanged result")
2157 split_line(l, line_length=line_length, inner=True, py36=py36)
2159 except CannotSplit as cs:
2170 def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
2171 """Split line into many lines, starting with the first matching bracket pair.
2173 Note: this usually looks weird, only use this for function definitions.
2174 Prefer RHS otherwise. This is why this function is not symmetrical with
2175 :func:`right_hand_split` which also handles optional parentheses.
2177 head = Line(depth=line.depth)
2178 body = Line(depth=line.depth + 1, inside_brackets=True)
2179 tail = Line(depth=line.depth)
2180 tail_leaves: List[Leaf] = []
2181 body_leaves: List[Leaf] = []
2182 head_leaves: List[Leaf] = []
2183 current_leaves = head_leaves
2184 matching_bracket = None
2185 for leaf in line.leaves:
2187 current_leaves is body_leaves
2188 and leaf.type in CLOSING_BRACKETS
2189 and leaf.opening_bracket is matching_bracket
2191 current_leaves = tail_leaves if body_leaves else head_leaves
2192 current_leaves.append(leaf)
2193 if current_leaves is head_leaves:
2194 if leaf.type in OPENING_BRACKETS:
2195 matching_bracket = leaf
2196 current_leaves = body_leaves
2197 # Since body is a new indent level, remove spurious leading whitespace.
2199 normalize_prefix(body_leaves[0], inside_brackets=True)
2200 # Build the new lines.
2201 for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
2203 result.append(leaf, preformatted=True)
2204 for comment_after in line.comments_after(leaf):
2205 result.append(comment_after, preformatted=True)
2206 bracket_split_succeeded_or_raise(head, body, tail)
2207 for result in (head, body, tail):
2212 def right_hand_split( # noqa C901
2213 line: Line, line_length: int, py36: bool = False, omit: Collection[LeafID] = ()
2214 ) -> Iterator[Line]:
2215 """Split line into many lines, starting with the last matching bracket pair.
2217 If the split was by optional parentheses, attempt splitting without them, too.
2218 `omit` is a collection of closing bracket IDs that shouldn't be considered for
2221 Note: running this function modifies `bracket_depth` on the leaves of `line`.
2223 head = Line(depth=line.depth)
2224 body = Line(depth=line.depth + 1, inside_brackets=True)
2225 tail = Line(depth=line.depth)
2226 tail_leaves: List[Leaf] = []
2227 body_leaves: List[Leaf] = []
2228 head_leaves: List[Leaf] = []
2229 current_leaves = tail_leaves
2230 opening_bracket = None
2231 closing_bracket = None
2232 for leaf in reversed(line.leaves):
2233 if current_leaves is body_leaves:
2234 if leaf is opening_bracket:
2235 current_leaves = head_leaves if body_leaves else tail_leaves
2236 current_leaves.append(leaf)
2237 if current_leaves is tail_leaves:
2238 if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
2239 opening_bracket = leaf.opening_bracket
2240 closing_bracket = leaf
2241 current_leaves = body_leaves
2242 tail_leaves.reverse()
2243 body_leaves.reverse()
2244 head_leaves.reverse()
2245 # Since body is a new indent level, remove spurious leading whitespace.
2247 normalize_prefix(body_leaves[0], inside_brackets=True)
2249 # No `head` means the split failed. Either `tail` has all content or
2250 # the matching `opening_bracket` wasn't available on `line` anymore.
2251 raise CannotSplit("No brackets found")
2253 if line.is_import and len(body_leaves) == 1:
2254 body_leaves.append(Leaf(token.COMMA, ","))
2256 # Build the new lines.
2257 for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
2259 result.append(leaf, preformatted=True)
2260 for comment_after in line.comments_after(leaf):
2261 result.append(comment_after, preformatted=True)
2262 assert opening_bracket and closing_bracket
2263 body.should_explode = should_explode(body, opening_bracket)
2264 bracket_split_succeeded_or_raise(head, body, tail)
2266 # the body shouldn't be exploded
2267 not body.should_explode
2268 # the opening bracket is an optional paren
2269 and opening_bracket.type == token.LPAR
2270 and not opening_bracket.value
2271 # the closing bracket is an optional paren
2272 and closing_bracket.type == token.RPAR
2273 and not closing_bracket.value
2274 # it's not an import (optional parens are the only thing we can split on
2275 # in this case; attempting a split without them is a waste of time)
2276 and not line.is_import
2277 # there are no standalone comments in the body
2278 and not body.contains_standalone_comments(0)
2279 # and we can actually remove the parens
2280 and can_omit_invisible_parens(body, line_length)
2282 omit = {id(closing_bracket), *omit}
2284 yield from right_hand_split(line, line_length, py36=py36, omit=omit)
2290 or is_line_short_enough(body, line_length=line_length)
2293 "Splitting failed, body is still too long and can't be split."
2296 elif head.contains_multiline_strings() or tail.contains_multiline_strings():
2298 "The current optional pair of parentheses is bound to fail to "
2299 "satisfy the splitting algorithm because the head or the tail "
2300 "contains multiline strings which by definition never fit one "
2304 ensure_visible(opening_bracket)
2305 ensure_visible(closing_bracket)
2306 for result in (head, body, tail):
2311 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
2312 """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
2314 Do nothing otherwise.
2316 A left- or right-hand split is based on a pair of brackets. Content before
2317 (and including) the opening bracket is left on one line, content inside the
2318 brackets is put on a separate line, and finally content starting with and
2319 following the closing bracket is put on a separate line.
2321 Those are called `head`, `body`, and `tail`, respectively. If the split
2322 produced the same line (all content in `head`) or ended up with an empty `body`
2323 and the `tail` is just the closing bracket, then it's considered failed.
2325 tail_len = len(str(tail).strip())
2328 raise CannotSplit("Splitting brackets produced the same line")
2332 f"Splitting brackets on an empty body to save "
2333 f"{tail_len} characters is not worth it"
2337 def dont_increase_indentation(split_func: SplitFunc) -> SplitFunc:
2338 """Normalize prefix of the first leaf in every line returned by `split_func`.
2340 This is a decorator over relevant split functions.
2344 def split_wrapper(line: Line, py36: bool = False) -> Iterator[Line]:
2345 for l in split_func(line, py36):
2346 normalize_prefix(l.leaves[0], inside_brackets=True)
2349 return split_wrapper
2352 @dont_increase_indentation
2353 def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]:
2354 """Split according to delimiters of the highest priority.
2356 If `py36` is True, the split will add trailing commas also in function
2357 signatures that contain `*` and `**`.
2360 last_leaf = line.leaves[-1]
2362 raise CannotSplit("Line empty")
2364 bt = line.bracket_tracker
2366 delimiter_priority = bt.max_delimiter_priority(exclude={id(last_leaf)})
2368 raise CannotSplit("No delimiters found")
2370 if delimiter_priority == DOT_PRIORITY:
2371 if bt.delimiter_count_with_priority(delimiter_priority) == 1:
2372 raise CannotSplit("Splitting a single attribute from its owner looks wrong")
2374 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2375 lowest_depth = sys.maxsize
2376 trailing_comma_safe = True
2378 def append_to_line(leaf: Leaf) -> Iterator[Line]:
2379 """Append `leaf` to current line or to new line if appending impossible."""
2380 nonlocal current_line
2382 current_line.append_safe(leaf, preformatted=True)
2383 except ValueError as ve:
2386 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2387 current_line.append(leaf)
2389 for index, leaf in enumerate(line.leaves):
2390 yield from append_to_line(leaf)
2392 for comment_after in line.comments_after(leaf, index):
2393 yield from append_to_line(comment_after)
2395 lowest_depth = min(lowest_depth, leaf.bracket_depth)
2396 if leaf.bracket_depth == lowest_depth and is_vararg(
2397 leaf, within=VARARGS_PARENTS
2399 trailing_comma_safe = trailing_comma_safe and py36
2400 leaf_priority = bt.delimiters.get(id(leaf))
2401 if leaf_priority == delimiter_priority:
2404 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2408 and delimiter_priority == COMMA_PRIORITY
2409 and current_line.leaves[-1].type != token.COMMA
2410 and current_line.leaves[-1].type != STANDALONE_COMMENT
2412 current_line.append(Leaf(token.COMMA, ","))
2416 @dont_increase_indentation
2417 def standalone_comment_split(line: Line, py36: bool = False) -> Iterator[Line]:
2418 """Split standalone comments from the rest of the line."""
2419 if not line.contains_standalone_comments(0):
2420 raise CannotSplit("Line does not have any standalone comments")
2422 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2424 def append_to_line(leaf: Leaf) -> Iterator[Line]:
2425 """Append `leaf` to current line or to new line if appending impossible."""
2426 nonlocal current_line
2428 current_line.append_safe(leaf, preformatted=True)
2429 except ValueError as ve:
2432 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2433 current_line.append(leaf)
2435 for index, leaf in enumerate(line.leaves):
2436 yield from append_to_line(leaf)
2438 for comment_after in line.comments_after(leaf, index):
2439 yield from append_to_line(comment_after)
2445 def is_import(leaf: Leaf) -> bool:
2446 """Return True if the given leaf starts an import statement."""
2453 (v == "import" and p and p.type == syms.import_name)
2454 or (v == "from" and p and p.type == syms.import_from)
2459 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
2460 """Leave existing extra newlines if not `inside_brackets`. Remove everything
2463 Note: don't use backslashes for formatting or you'll lose your voting rights.
2465 if not inside_brackets:
2466 spl = leaf.prefix.split("#")
2467 if "\\" not in spl[0]:
2468 nl_count = spl[-1].count("\n")
2471 leaf.prefix = "\n" * nl_count
2477 def normalize_string_prefix(leaf: Leaf, remove_u_prefix: bool = False) -> None:
2478 """Make all string prefixes lowercase.
2480 If remove_u_prefix is given, also removes any u prefix from the string.
2482 Note: Mutates its argument.
2484 match = re.match(r"^([furbFURB]*)(.*)$", leaf.value, re.DOTALL)
2485 assert match is not None, f"failed to match string {leaf.value!r}"
2486 orig_prefix = match.group(1)
2487 new_prefix = orig_prefix.lower()
2489 new_prefix = new_prefix.replace("u", "")
2490 leaf.value = f"{new_prefix}{match.group(2)}"
2493 def normalize_string_quotes(leaf: Leaf) -> None:
2494 """Prefer double quotes but only if it doesn't cause more escaping.
2496 Adds or removes backslashes as appropriate. Doesn't parse and fix
2497 strings nested in f-strings (yet).
2499 Note: Mutates its argument.
2501 value = leaf.value.lstrip("furbFURB")
2502 if value[:3] == '"""':
2505 elif value[:3] == "'''":
2508 elif value[0] == '"':
2514 first_quote_pos = leaf.value.find(orig_quote)
2515 if first_quote_pos == -1:
2516 return # There's an internal error
2518 prefix = leaf.value[:first_quote_pos]
2519 unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
2520 escaped_new_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
2521 escaped_orig_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
2522 body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
2523 if "r" in prefix.casefold():
2524 if unescaped_new_quote.search(body):
2525 # There's at least one unescaped new_quote in this raw string
2526 # so converting is impossible
2529 # Do not introduce or remove backslashes in raw strings
2532 # remove unnecessary escapes
2533 new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
2534 if body != new_body:
2535 # Consider the string without unnecessary escapes as the original
2537 leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
2538 new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
2539 new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
2540 if "f" in prefix.casefold():
2541 matches = re.findall(r"[^{]\{(.*?)\}[^}]", new_body)
2544 # Do not introduce backslashes in interpolated expressions
2546 if new_quote == '"""' and new_body[-1:] == '"':
2548 new_body = new_body[:-1] + '\\"'
2549 orig_escape_count = body.count("\\")
2550 new_escape_count = new_body.count("\\")
2551 if new_escape_count > orig_escape_count:
2552 return # Do not introduce more escaping
2554 if new_escape_count == orig_escape_count and orig_quote == '"':
2555 return # Prefer double quotes
2557 leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
2560 def normalize_numeric_literal(leaf: Leaf, allow_underscores: bool) -> None:
2561 """Normalizes numeric (float, int, and complex) literals.
2563 All letters used in the representation are normalized to lowercase (except
2564 in Python 2 long literals), and long number literals are split using underscores.
2566 text = leaf.value.lower()
2567 if text.startswith(("0o", "0b")):
2568 # Leave octal and binary literals alone.
2570 elif text.startswith("0x"):
2571 # Change hex literals to upper case.
2572 before, after = text[:2], text[2:]
2573 text = f"{before}{after.upper()}"
2575 before, after = text.split("e")
2577 if after.startswith("-"):
2580 elif after.startswith("+"):
2582 before = format_float_or_int_string(before, allow_underscores)
2583 after = format_int_string(after, allow_underscores)
2584 text = f"{before}e{sign}{after}"
2585 elif text.endswith(("j", "l")):
2588 # Capitalize in "2L" because "l" looks too similar to "1".
2591 text = f"{format_float_or_int_string(number, allow_underscores)}{suffix}"
2593 text = format_float_or_int_string(text, allow_underscores)
2597 def format_float_or_int_string(text: str, allow_underscores: bool) -> str:
2598 """Formats a float string like "1.0"."""
2600 return format_int_string(text, allow_underscores)
2602 before, after = text.split(".")
2603 before = format_int_string(before, allow_underscores) if before else "0"
2605 after = format_int_string(after, allow_underscores, count_from_end=False)
2608 return f"{before}.{after}"
2611 def format_int_string(
2612 text: str, allow_underscores: bool, count_from_end: bool = True
2614 """Normalizes underscores in a string to e.g. 1_000_000.
2616 Input must be a string of digits and optional underscores.
2617 If count_from_end is False, we add underscores after groups of three digits
2618 counting from the beginning instead of the end of the strings. This is used
2619 for the fractional part of float literals.
2621 if not allow_underscores:
2624 text = text.replace("_", "")
2626 # No underscores for numbers <= 5 digits long.
2630 # Avoid removing leading zeros, which are important if we're formatting
2631 # part of a number like "0.001".
2632 return format(int("1" + text), "3_")[1:].lstrip("_")
2634 return "_".join(text[i : i + 3] for i in range(0, len(text), 3))
2637 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
2638 """Make existing optional parentheses invisible or create new ones.
2640 `parens_after` is a set of string leaf values immeditely after which parens
2643 Standardizes on visible parentheses for single-element tuples, and keeps
2644 existing visible parentheses for other tuples and generator expressions.
2646 for pc in list_comments(node.prefix, is_endmarker=False):
2647 if pc.value in FMT_OFF:
2648 # This `node` has a prefix with `# fmt: off`, don't mess with parens.
2652 for index, child in enumerate(list(node.children)):
2654 if child.type == syms.atom:
2655 if maybe_make_parens_invisible_in_atom(child):
2656 lpar = Leaf(token.LPAR, "")
2657 rpar = Leaf(token.RPAR, "")
2658 index = child.remove() or 0
2659 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2660 elif is_one_tuple(child):
2661 # wrap child in visible parentheses
2662 lpar = Leaf(token.LPAR, "(")
2663 rpar = Leaf(token.RPAR, ")")
2665 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2666 elif node.type == syms.import_from:
2667 # "import from" nodes store parentheses directly as part of
2669 if child.type == token.LPAR:
2670 # make parentheses invisible
2671 child.value = "" # type: ignore
2672 node.children[-1].value = "" # type: ignore
2673 elif child.type != token.STAR:
2674 # insert invisible parentheses
2675 node.insert_child(index, Leaf(token.LPAR, ""))
2676 node.append_child(Leaf(token.RPAR, ""))
2679 elif not (isinstance(child, Leaf) and is_multiline_string(child)):
2680 # wrap child in invisible parentheses
2681 lpar = Leaf(token.LPAR, "")
2682 rpar = Leaf(token.RPAR, "")
2683 index = child.remove() or 0
2684 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2686 check_lpar = isinstance(child, Leaf) and child.value in parens_after
2689 def normalize_fmt_off(node: Node) -> None:
2690 """Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
2693 try_again = convert_one_fmt_off_pair(node)
2696 def convert_one_fmt_off_pair(node: Node) -> bool:
2697 """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
2699 Returns True if a pair was converted.
2701 for leaf in node.leaves():
2702 previous_consumed = 0
2703 for comment in list_comments(leaf.prefix, is_endmarker=False):
2704 if comment.value in FMT_OFF:
2705 # We only want standalone comments. If there's no previous leaf or
2706 # the previous leaf is indentation, it's a standalone comment in
2708 if comment.type != STANDALONE_COMMENT:
2709 prev = preceding_leaf(leaf)
2710 if prev and prev.type not in WHITESPACE:
2713 ignored_nodes = list(generate_ignored_nodes(leaf))
2714 if not ignored_nodes:
2717 first = ignored_nodes[0] # Can be a container node with the `leaf`.
2718 parent = first.parent
2719 prefix = first.prefix
2720 first.prefix = prefix[comment.consumed :]
2722 comment.value + "\n" + "".join(str(n) for n in ignored_nodes)
2724 if hidden_value.endswith("\n"):
2725 # That happens when one of the `ignored_nodes` ended with a NEWLINE
2726 # leaf (possibly followed by a DEDENT).
2727 hidden_value = hidden_value[:-1]
2729 for ignored in ignored_nodes:
2730 index = ignored.remove()
2731 if first_idx is None:
2733 assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)"
2734 assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)"
2735 parent.insert_child(
2740 prefix=prefix[:previous_consumed] + "\n" * comment.newlines,
2745 previous_consumed = comment.consumed
2750 def generate_ignored_nodes(leaf: Leaf) -> Iterator[LN]:
2751 """Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
2753 Stops at the end of the block.
2755 container: Optional[LN] = container_of(leaf)
2756 while container is not None and container.type != token.ENDMARKER:
2757 for comment in list_comments(container.prefix, is_endmarker=False):
2758 if comment.value in FMT_ON:
2763 container = container.next_sibling
2766 def maybe_make_parens_invisible_in_atom(node: LN) -> bool:
2767 """If it's safe, make the parens in the atom `node` invisible, recursively.
2769 Returns whether the node should itself be wrapped in invisible parentheses.
2773 node.type != syms.atom
2774 or is_empty_tuple(node)
2775 or is_one_tuple(node)
2777 or max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
2781 first = node.children[0]
2782 last = node.children[-1]
2783 if first.type == token.LPAR and last.type == token.RPAR:
2784 # make parentheses invisible
2785 first.value = "" # type: ignore
2786 last.value = "" # type: ignore
2787 if len(node.children) > 1:
2788 maybe_make_parens_invisible_in_atom(node.children[1])
2794 def is_empty_tuple(node: LN) -> bool:
2795 """Return True if `node` holds an empty tuple."""
2797 node.type == syms.atom
2798 and len(node.children) == 2
2799 and node.children[0].type == token.LPAR
2800 and node.children[1].type == token.RPAR
2804 def is_one_tuple(node: LN) -> bool:
2805 """Return True if `node` holds a tuple with one element, with or without parens."""
2806 if node.type == syms.atom:
2807 if len(node.children) != 3:
2810 lpar, gexp, rpar = node.children
2812 lpar.type == token.LPAR
2813 and gexp.type == syms.testlist_gexp
2814 and rpar.type == token.RPAR
2818 return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
2821 node.type in IMPLICIT_TUPLE
2822 and len(node.children) == 2
2823 and node.children[1].type == token.COMMA
2827 def is_yield(node: LN) -> bool:
2828 """Return True if `node` holds a `yield` or `yield from` expression."""
2829 if node.type == syms.yield_expr:
2832 if node.type == token.NAME and node.value == "yield": # type: ignore
2835 if node.type != syms.atom:
2838 if len(node.children) != 3:
2841 lpar, expr, rpar = node.children
2842 if lpar.type == token.LPAR and rpar.type == token.RPAR:
2843 return is_yield(expr)
2848 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
2849 """Return True if `leaf` is a star or double star in a vararg or kwarg.
2851 If `within` includes VARARGS_PARENTS, this applies to function signatures.
2852 If `within` includes UNPACKING_PARENTS, it applies to right hand-side
2853 extended iterable unpacking (PEP 3132) and additional unpacking
2854 generalizations (PEP 448).
2856 if leaf.type not in STARS or not leaf.parent:
2860 if p.type == syms.star_expr:
2861 # Star expressions are also used as assignment targets in extended
2862 # iterable unpacking (PEP 3132). See what its parent is instead.
2868 return p.type in within
2871 def is_multiline_string(leaf: Leaf) -> bool:
2872 """Return True if `leaf` is a multiline string that actually spans many lines."""
2873 value = leaf.value.lstrip("furbFURB")
2874 return value[:3] in {'"""', "'''"} and "\n" in value
2877 def is_stub_suite(node: Node) -> bool:
2878 """Return True if `node` is a suite with a stub body."""
2880 len(node.children) != 4
2881 or node.children[0].type != token.NEWLINE
2882 or node.children[1].type != token.INDENT
2883 or node.children[3].type != token.DEDENT
2887 return is_stub_body(node.children[2])
2890 def is_stub_body(node: LN) -> bool:
2891 """Return True if `node` is a simple statement containing an ellipsis."""
2892 if not isinstance(node, Node) or node.type != syms.simple_stmt:
2895 if len(node.children) != 2:
2898 child = node.children[0]
2900 child.type == syms.atom
2901 and len(child.children) == 3
2902 and all(leaf == Leaf(token.DOT, ".") for leaf in child.children)
2906 def max_delimiter_priority_in_atom(node: LN) -> int:
2907 """Return maximum delimiter priority inside `node`.
2909 This is specific to atoms with contents contained in a pair of parentheses.
2910 If `node` isn't an atom or there are no enclosing parentheses, returns 0.
2912 if node.type != syms.atom:
2915 first = node.children[0]
2916 last = node.children[-1]
2917 if not (first.type == token.LPAR and last.type == token.RPAR):
2920 bt = BracketTracker()
2921 for c in node.children[1:-1]:
2922 if isinstance(c, Leaf):
2925 for leaf in c.leaves():
2928 return bt.max_delimiter_priority()
2934 def ensure_visible(leaf: Leaf) -> None:
2935 """Make sure parentheses are visible.
2937 They could be invisible as part of some statements (see
2938 :func:`normalize_invible_parens` and :func:`visit_import_from`).
2940 if leaf.type == token.LPAR:
2942 elif leaf.type == token.RPAR:
2946 def should_explode(line: Line, opening_bracket: Leaf) -> bool:
2947 """Should `line` immediately be split with `delimiter_split()` after RHS?"""
2949 opening_bracket.parent
2950 and opening_bracket.parent.type in {syms.atom, syms.import_from}
2951 and opening_bracket.value in "[{("
2956 last_leaf = line.leaves[-1]
2957 exclude = {id(last_leaf)} if last_leaf.type == token.COMMA else set()
2958 max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude)
2959 except (IndexError, ValueError):
2962 return max_priority == COMMA_PRIORITY
2965 def is_python36(node: Node) -> bool:
2966 """Return True if the current file is using Python 3.6+ features.
2968 Currently looking for:
2970 - underscores in numeric literals; and
2971 - trailing commas after * or ** in function signatures and calls.
2973 for n in node.pre_order():
2974 if n.type == token.STRING:
2975 value_head = n.value[:2] # type: ignore
2976 if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
2979 elif n.type == token.NUMBER:
2980 if "_" in n.value: # type: ignore
2984 n.type in {syms.typedargslist, syms.arglist}
2986 and n.children[-1].type == token.COMMA
2988 for ch in n.children:
2989 if ch.type in STARS:
2992 if ch.type == syms.argument:
2993 for argch in ch.children:
2994 if argch.type in STARS:
3000 def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[LeafID]]:
3001 """Generate sets of closing bracket IDs that should be omitted in a RHS.
3003 Brackets can be omitted if the entire trailer up to and including
3004 a preceding closing bracket fits in one line.
3006 Yielded sets are cumulative (contain results of previous yields, too). First
3010 omit: Set[LeafID] = set()
3013 length = 4 * line.depth
3014 opening_bracket = None
3015 closing_bracket = None
3016 inner_brackets: Set[LeafID] = set()
3017 for index, leaf, leaf_length in enumerate_with_length(line, reversed=True):
3018 length += leaf_length
3019 if length > line_length:
3022 has_inline_comment = leaf_length > len(leaf.value) + len(leaf.prefix)
3023 if leaf.type == STANDALONE_COMMENT or has_inline_comment:
3027 if leaf is opening_bracket:
3028 opening_bracket = None
3029 elif leaf.type in CLOSING_BRACKETS:
3030 inner_brackets.add(id(leaf))
3031 elif leaf.type in CLOSING_BRACKETS:
3032 if index > 0 and line.leaves[index - 1].type in OPENING_BRACKETS:
3033 # Empty brackets would fail a split so treat them as "inner"
3034 # brackets (e.g. only add them to the `omit` set if another
3035 # pair of brackets was good enough.
3036 inner_brackets.add(id(leaf))
3040 omit.add(id(closing_bracket))
3041 omit.update(inner_brackets)
3042 inner_brackets.clear()
3046 opening_bracket = leaf.opening_bracket
3047 closing_bracket = leaf
3050 def get_future_imports(node: Node) -> Set[str]:
3051 """Return a set of __future__ imports in the file."""
3052 imports: Set[str] = set()
3054 def get_imports_from_children(children: List[LN]) -> Generator[str, None, None]:
3055 for child in children:
3056 if isinstance(child, Leaf):
3057 if child.type == token.NAME:
3059 elif child.type == syms.import_as_name:
3060 orig_name = child.children[0]
3061 assert isinstance(orig_name, Leaf), "Invalid syntax parsing imports"
3062 assert orig_name.type == token.NAME, "Invalid syntax parsing imports"
3063 yield orig_name.value
3064 elif child.type == syms.import_as_names:
3065 yield from get_imports_from_children(child.children)
3067 assert False, "Invalid syntax parsing imports"
3069 for child in node.children:
3070 if child.type != syms.simple_stmt:
3072 first_child = child.children[0]
3073 if isinstance(first_child, Leaf):
3074 # Continue looking if we see a docstring; otherwise stop.
3076 len(child.children) == 2
3077 and first_child.type == token.STRING
3078 and child.children[1].type == token.NEWLINE
3083 elif first_child.type == syms.import_from:
3084 module_name = first_child.children[1]
3085 if not isinstance(module_name, Leaf) or module_name.value != "__future__":
3087 imports |= set(get_imports_from_children(first_child.children[3:]))
3093 def gen_python_files_in_dir(
3096 include: Pattern[str],
3097 exclude: Pattern[str],
3099 ) -> Iterator[Path]:
3100 """Generate all files under `path` whose paths are not excluded by the
3101 `exclude` regex, but are included by the `include` regex.
3103 Symbolic links pointing outside of the `root` directory are ignored.
3105 `report` is where output about exclusions goes.
3107 assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
3108 for child in path.iterdir():
3110 normalized_path = "/" + child.resolve().relative_to(root).as_posix()
3112 if child.is_symlink():
3113 report.path_ignored(
3114 child, f"is a symbolic link that points outside {root}"
3121 normalized_path += "/"
3122 exclude_match = exclude.search(normalized_path)
3123 if exclude_match and exclude_match.group(0):
3124 report.path_ignored(child, f"matches the --exclude regular expression")
3128 yield from gen_python_files_in_dir(child, root, include, exclude, report)
3130 elif child.is_file():
3131 include_match = include.search(normalized_path)
3137 def find_project_root(srcs: Iterable[str]) -> Path:
3138 """Return a directory containing .git, .hg, or pyproject.toml.
3140 That directory can be one of the directories passed in `srcs` or their
3143 If no directory in the tree contains a marker that would specify it's the
3144 project root, the root of the file system is returned.
3147 return Path("/").resolve()
3149 common_base = min(Path(src).resolve() for src in srcs)
3150 if common_base.is_dir():
3151 # Append a fake file so `parents` below returns `common_base_dir`, too.
3152 common_base /= "fake-file"
3153 for directory in common_base.parents:
3154 if (directory / ".git").is_dir():
3157 if (directory / ".hg").is_dir():
3160 if (directory / "pyproject.toml").is_file():
3168 """Provides a reformatting counter. Can be rendered with `str(report)`."""
3172 verbose: bool = False
3173 change_count: int = 0
3175 failure_count: int = 0
3177 def done(self, src: Path, changed: Changed) -> None:
3178 """Increment the counter for successful reformatting. Write out a message."""
3179 if changed is Changed.YES:
3180 reformatted = "would reformat" if self.check else "reformatted"
3181 if self.verbose or not self.quiet:
3182 out(f"{reformatted} {src}")
3183 self.change_count += 1
3186 if changed is Changed.NO:
3187 msg = f"{src} already well formatted, good job."
3189 msg = f"{src} wasn't modified on disk since last run."
3190 out(msg, bold=False)
3191 self.same_count += 1
3193 def failed(self, src: Path, message: str) -> None:
3194 """Increment the counter for failed reformatting. Write out a message."""
3195 err(f"error: cannot format {src}: {message}")
3196 self.failure_count += 1
3198 def path_ignored(self, path: Path, message: str) -> None:
3200 out(f"{path} ignored: {message}", bold=False)
3203 def return_code(self) -> int:
3204 """Return the exit code that the app should use.
3206 This considers the current state of changed files and failures:
3207 - if there were any failures, return 123;
3208 - if any files were changed and --check is being used, return 1;
3209 - otherwise return 0.
3211 # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
3212 # 126 we have special return codes reserved by the shell.
3213 if self.failure_count:
3216 elif self.change_count and self.check:
3221 def __str__(self) -> str:
3222 """Render a color report of the current state.
3224 Use `click.unstyle` to remove colors.
3227 reformatted = "would be reformatted"
3228 unchanged = "would be left unchanged"
3229 failed = "would fail to reformat"
3231 reformatted = "reformatted"
3232 unchanged = "left unchanged"
3233 failed = "failed to reformat"
3235 if self.change_count:
3236 s = "s" if self.change_count > 1 else ""
3238 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
3241 s = "s" if self.same_count > 1 else ""
3242 report.append(f"{self.same_count} file{s} {unchanged}")
3243 if self.failure_count:
3244 s = "s" if self.failure_count > 1 else ""
3246 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
3248 return ", ".join(report) + "."
3251 def assert_equivalent(src: str, dst: str) -> None:
3252 """Raise AssertionError if `src` and `dst` aren't equivalent."""
3257 def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
3258 """Simple visitor generating strings to compare ASTs by content."""
3259 yield f"{' ' * depth}{node.__class__.__name__}("
3261 for field in sorted(node._fields):
3263 value = getattr(node, field)
3264 except AttributeError:
3267 yield f"{' ' * (depth+1)}{field}="
3269 if isinstance(value, list):
3271 if isinstance(item, ast.AST):
3272 yield from _v(item, depth + 2)
3274 elif isinstance(value, ast.AST):
3275 yield from _v(value, depth + 2)
3278 yield f"{' ' * (depth+2)}{value!r}, # {value.__class__.__name__}"
3280 yield f"{' ' * depth}) # /{node.__class__.__name__}"
3283 src_ast = ast.parse(src)
3284 except Exception as exc:
3285 major, minor = sys.version_info[:2]
3286 raise AssertionError(
3287 f"cannot use --safe with this file; failed to parse source file "
3288 f"with Python {major}.{minor}'s builtin AST. Re-run with --fast "
3289 f"or stop using deprecated Python 2 syntax. AST error message: {exc}"
3293 dst_ast = ast.parse(dst)
3294 except Exception as exc:
3295 log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
3296 raise AssertionError(
3297 f"INTERNAL ERROR: Black produced invalid code: {exc}. "
3298 f"Please report a bug on https://github.com/ambv/black/issues. "
3299 f"This invalid output might be helpful: {log}"
3302 src_ast_str = "\n".join(_v(src_ast))
3303 dst_ast_str = "\n".join(_v(dst_ast))
3304 if src_ast_str != dst_ast_str:
3305 log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
3306 raise AssertionError(
3307 f"INTERNAL ERROR: Black produced code that is not equivalent to "
3309 f"Please report a bug on https://github.com/ambv/black/issues. "
3310 f"This diff might be helpful: {log}"
3315 src: str, dst: str, line_length: int, mode: FileMode = FileMode.AUTO_DETECT
3317 """Raise AssertionError if `dst` reformats differently the second time."""
3318 newdst = format_str(dst, line_length=line_length, mode=mode)
3321 diff(src, dst, "source", "first pass"),
3322 diff(dst, newdst, "first pass", "second pass"),
3324 raise AssertionError(
3325 f"INTERNAL ERROR: Black produced different code on the second pass "
3326 f"of the formatter. "
3327 f"Please report a bug on https://github.com/ambv/black/issues. "
3328 f"This diff might be helpful: {log}"
3332 def dump_to_file(*output: str) -> str:
3333 """Dump `output` to a temporary file. Return path to the file."""
3336 with tempfile.NamedTemporaryFile(
3337 mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
3339 for lines in output:
3341 if lines and lines[-1] != "\n":
3346 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
3347 """Return a unified diff string between strings `a` and `b`."""
3350 a_lines = [line + "\n" for line in a.split("\n")]
3351 b_lines = [line + "\n" for line in b.split("\n")]
3353 difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
3357 def cancel(tasks: Iterable[asyncio.Task]) -> None:
3358 """asyncio signal handler that cancels all `tasks` and reports to stderr."""
3364 def shutdown(loop: BaseEventLoop) -> None:
3365 """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
3367 # This part is borrowed from asyncio/runners.py in Python 3.7b2.
3368 to_cancel = [task for task in asyncio.Task.all_tasks(loop) if not task.done()]
3372 for task in to_cancel:
3374 loop.run_until_complete(
3375 asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
3378 # `concurrent.futures.Future` objects cannot be cancelled once they
3379 # are already running. There might be some when the `shutdown()` happened.
3380 # Silence their logger's spew about the event loop being closed.
3381 cf_logger = logging.getLogger("concurrent.futures")
3382 cf_logger.setLevel(logging.CRITICAL)
3386 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
3387 """Replace `regex` with `replacement` twice on `original`.
3389 This is used by string normalization to perform replaces on
3390 overlapping matches.
3392 return regex.sub(replacement, regex.sub(replacement, original))
3395 def re_compile_maybe_verbose(regex: str) -> Pattern[str]:
3396 """Compile a regular expression string in `regex`.
3398 If it contains newlines, use verbose mode.
3401 regex = "(?x)" + regex
3402 return re.compile(regex)
3405 def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]:
3406 """Like `reversed(enumerate(sequence))` if that were possible."""
3407 index = len(sequence) - 1
3408 for element in reversed(sequence):
3409 yield (index, element)
3413 def enumerate_with_length(
3414 line: Line, reversed: bool = False
3415 ) -> Iterator[Tuple[Index, Leaf, int]]:
3416 """Return an enumeration of leaves with their length.
3418 Stops prematurely on multiline strings and standalone comments.
3421 Callable[[Sequence[Leaf]], Iterator[Tuple[Index, Leaf]]],
3422 enumerate_reversed if reversed else enumerate,
3424 for index, leaf in op(line.leaves):
3425 length = len(leaf.prefix) + len(leaf.value)
3426 if "\n" in leaf.value:
3427 return # Multiline strings, we can't continue.
3429 comment: Optional[Leaf]
3430 for comment in line.comments_after(leaf, index):
3431 length += len(comment.value)
3433 yield index, leaf, length
3436 def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
3437 """Return True if `line` is no longer than `line_length`.
3439 Uses the provided `line_str` rendering, if any, otherwise computes a new one.
3442 line_str = str(line).strip("\n")
3444 len(line_str) <= line_length
3445 and "\n" not in line_str # multiline strings
3446 and not line.contains_standalone_comments()
3450 def can_be_split(line: Line) -> bool:
3451 """Return False if the line cannot be split *for sure*.
3453 This is not an exhaustive search but a cheap heuristic that we can use to
3454 avoid some unfortunate formattings (mostly around wrapping unsplittable code
3455 in unnecessary parentheses).
3457 leaves = line.leaves
3461 if leaves[0].type == token.STRING and leaves[1].type == token.DOT:
3465 for leaf in leaves[-2::-1]:
3466 if leaf.type in OPENING_BRACKETS:
3467 if next.type not in CLOSING_BRACKETS:
3471 elif leaf.type == token.DOT:
3473 elif leaf.type == token.NAME:
3474 if not (next.type == token.DOT or next.type in OPENING_BRACKETS):
3477 elif leaf.type not in CLOSING_BRACKETS:
3480 if dot_count > 1 and call_count > 1:
3486 def can_omit_invisible_parens(line: Line, line_length: int) -> bool:
3487 """Does `line` have a shape safe to reformat without optional parens around it?
3489 Returns True for only a subset of potentially nice looking formattings but
3490 the point is to not return false positives that end up producing lines that
3493 bt = line.bracket_tracker
3494 if not bt.delimiters:
3495 # Without delimiters the optional parentheses are useless.
3498 max_priority = bt.max_delimiter_priority()
3499 if bt.delimiter_count_with_priority(max_priority) > 1:
3500 # With more than one delimiter of a kind the optional parentheses read better.
3503 if max_priority == DOT_PRIORITY:
3504 # A single stranded method call doesn't require optional parentheses.
3507 assert len(line.leaves) >= 2, "Stranded delimiter"
3509 first = line.leaves[0]
3510 second = line.leaves[1]
3511 penultimate = line.leaves[-2]
3512 last = line.leaves[-1]
3514 # With a single delimiter, omit if the expression starts or ends with
3516 if first.type in OPENING_BRACKETS and second.type not in CLOSING_BRACKETS:
3518 length = 4 * line.depth
3519 for _index, leaf, leaf_length in enumerate_with_length(line):
3520 if leaf.type in CLOSING_BRACKETS and leaf.opening_bracket is first:
3523 length += leaf_length
3524 if length > line_length:
3527 if leaf.type in OPENING_BRACKETS:
3528 # There are brackets we can further split on.
3532 # checked the entire string and line length wasn't exceeded
3533 if len(line.leaves) == _index + 1:
3536 # Note: we are not returning False here because a line might have *both*
3537 # a leading opening bracket and a trailing closing bracket. If the
3538 # opening bracket doesn't match our rule, maybe the closing will.
3541 last.type == token.RPAR
3542 or last.type == token.RBRACE
3544 # don't use indexing for omitting optional parentheses;
3546 last.type == token.RSQB
3548 and last.parent.type != syms.trailer
3551 if penultimate.type in OPENING_BRACKETS:
3552 # Empty brackets don't help.
3555 if is_multiline_string(first):
3556 # Additional wrapping of a multiline string in this situation is
3560 length = 4 * line.depth
3561 seen_other_brackets = False
3562 for _index, leaf, leaf_length in enumerate_with_length(line):
3563 length += leaf_length
3564 if leaf is last.opening_bracket:
3565 if seen_other_brackets or length <= line_length:
3568 elif leaf.type in OPENING_BRACKETS:
3569 # There are brackets we can further split on.
3570 seen_other_brackets = True
3575 def get_cache_file(line_length: int, mode: FileMode) -> Path:
3576 return CACHE_DIR / f"cache.{line_length}.{mode.value}.pickle"
3579 def read_cache(line_length: int, mode: FileMode) -> Cache:
3580 """Read the cache if it exists and is well formed.
3582 If it is not well formed, the call to write_cache later should resolve the issue.
3584 cache_file = get_cache_file(line_length, mode)
3585 if not cache_file.exists():
3588 with cache_file.open("rb") as fobj:
3590 cache: Cache = pickle.load(fobj)
3591 except pickle.UnpicklingError:
3597 def get_cache_info(path: Path) -> CacheInfo:
3598 """Return the information used to check if a file is already formatted or not."""
3600 return stat.st_mtime, stat.st_size
3603 def filter_cached(cache: Cache, sources: Iterable[Path]) -> Tuple[Set[Path], Set[Path]]:
3604 """Split an iterable of paths in `sources` into two sets.
3606 The first contains paths of files that modified on disk or are not in the
3607 cache. The other contains paths to non-modified files.
3609 todo, done = set(), set()
3612 if cache.get(src) != get_cache_info(src):
3620 cache: Cache, sources: Iterable[Path], line_length: int, mode: FileMode
3622 """Update the cache file."""
3623 cache_file = get_cache_file(line_length, mode)
3625 if not CACHE_DIR.exists():
3626 CACHE_DIR.mkdir(parents=True)
3627 new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
3628 with cache_file.open("wb") as fobj:
3629 pickle.dump(new_cache, fobj, protocol=pickle.HIGHEST_PROTOCOL)
3634 def patch_click() -> None:
3635 """Make Click not crash.
3637 On certain misconfigured environments, Python 3 selects the ASCII encoding as the
3638 default which restricts paths that it can access during the lifetime of the
3639 application. Click refuses to work in this scenario by raising a RuntimeError.
3641 In case of Black the likelihood that non-ASCII characters are going to be used in
3642 file paths is minimal since it's Python source code. Moreover, this crash was
3643 spurious on Python 3.7 thanks to PEP 538 and PEP 540.
3646 from click import core
3647 from click import _unicodefun # type: ignore
3648 except ModuleNotFoundError:
3651 for module in (core, _unicodefun):
3652 if hasattr(module, "_verify_python3_env"):
3653 module._verify_python3_env = lambda: None
3656 if __name__ == "__main__":