All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
2 from asyncio.base_events import BaseEventLoop
3 from concurrent.futures import Executor, ProcessPoolExecutor
4 from datetime import datetime
5 from enum import Enum, Flag
6 from functools import partial, wraps
10 from multiprocessing import Manager
12 from pathlib import Path
38 from appdirs import user_cache_dir
39 from attr import dataclass, Factory
43 from blib2to3.pytree import Node, Leaf, type_repr
44 from blib2to3 import pygram, pytree
45 from blib2to3.pgen2 import driver, token
46 from blib2to3.pgen2.parse import ParseError
49 __version__ = "18.6b0"
50 DEFAULT_LINE_LENGTH = 88
52 r"/(\.git|\.hg|\.mypy_cache|\.tox|\.venv|_build|buck-out|build|dist)/"
54 DEFAULT_INCLUDES = r"\.pyi?$"
55 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
67 LN = Union[Leaf, Node]
68 SplitFunc = Callable[["Line", bool], Iterator["Line"]]
71 CacheInfo = Tuple[Timestamp, FileSize]
72 Cache = Dict[Path, CacheInfo]
73 out = partial(click.secho, bold=True, err=True)
74 err = partial(click.secho, fg="red", err=True)
76 pygram.initialize(CACHE_DIR)
77 syms = pygram.python_symbols
80 class NothingChanged(UserWarning):
81 """Raised by :func:`format_file` when reformatted code is the same as source."""
84 class CannotSplit(Exception):
85 """A readable split that fits the allotted line length is impossible.
87 Raised by :func:`left_hand_split`, :func:`right_hand_split`, and
88 :func:`delimiter_split`.
92 class FormatError(Exception):
93 """Base exception for `# fmt: on` and `# fmt: off` handling.
95 It holds the number of bytes of the prefix consumed before the format
96 control comment appeared.
99 def __init__(self, consumed: int) -> None:
100 super().__init__(consumed)
101 self.consumed = consumed
103 def trim_prefix(self, leaf: Leaf) -> None:
104 leaf.prefix = leaf.prefix[self.consumed :]
106 def leaf_from_consumed(self, leaf: Leaf) -> Leaf:
107 """Returns a new Leaf from the consumed part of the prefix."""
108 unformatted_prefix = leaf.prefix[: self.consumed]
109 return Leaf(token.NEWLINE, unformatted_prefix)
112 class FormatOn(FormatError):
113 """Found a comment like `# fmt: on` in the file."""
116 class FormatOff(FormatError):
117 """Found a comment like `# fmt: off` in the file."""
120 class WriteBack(Enum):
126 def from_configuration(cls, *, check: bool, diff: bool) -> "WriteBack":
127 if check and not diff:
130 return cls.DIFF if diff else cls.YES
139 class FileMode(Flag):
143 NO_STRING_NORMALIZATION = 4
146 def from_configuration(
147 cls, *, py36: bool, pyi: bool, skip_string_normalization: bool
149 mode = cls.AUTO_DETECT
154 if skip_string_normalization:
155 mode |= cls.NO_STRING_NORMALIZATION
164 default=DEFAULT_LINE_LENGTH,
165 help="How many character per line to allow.",
172 "Allow using Python 3.6-only syntax on all input files. This will put "
173 "trailing commas in function signatures and calls also after *args and "
174 "**kwargs. [default: per-file auto-detection]"
181 "Format all input files like typing stubs regardless of file extension "
182 "(useful when piping source on standard input)."
187 "--skip-string-normalization",
189 help="Don't normalize string quotes or prefixes.",
195 "Don't write the files back, just return the status. Return code 0 "
196 "means nothing would change. Return code 1 means some files would be "
197 "reformatted. Return code 123 means there was an internal error."
203 help="Don't write the files back, just output a diff for each file on stdout.",
208 help="If --fast given, skip temporary sanity checks. [default: --safe]",
213 default=DEFAULT_INCLUDES,
215 "A regular expression that matches files and directories that should be "
216 "included on recursive searches. An empty value means all files are "
217 "included regardless of the name. Use forward slashes for directories on "
218 "all platforms (Windows, too). Exclusions are calculated first, inclusions "
226 default=DEFAULT_EXCLUDES,
228 "A regular expression that matches files and directories that should be "
229 "excluded on recursive searches. An empty value means no paths are excluded. "
230 "Use forward slashes for directories on all platforms (Windows, too). "
231 "Exclusions are calculated first, inclusions later."
240 "Don't emit non-error messages to stderr. Errors are still emitted, "
241 "silence those with 2>/dev/null."
249 "Also emit messages to stderr about files that were not changed or were "
250 "ignored due to --exclude=."
253 @click.version_option(version=__version__)
258 exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
270 skip_string_normalization: bool,
277 """The uncompromising code formatter."""
278 write_back = WriteBack.from_configuration(check=check, diff=diff)
279 mode = FileMode.from_configuration(
280 py36=py36, pyi=pyi, skip_string_normalization=skip_string_normalization
282 report = Report(check=check, quiet=quiet, verbose=verbose)
283 sources: Set[Path] = set()
285 include_regex = re.compile(include)
287 err(f"Invalid regular expression for include given: {include!r}")
290 exclude_regex = re.compile(exclude)
292 err(f"Invalid regular expression for exclude given: {exclude!r}")
294 root = find_project_root(src)
299 gen_python_files_in_dir(p, root, include_regex, exclude_regex, report)
301 elif p.is_file() or s == "-":
302 # if a file was explicitly given, we don't care about its extension
305 err(f"invalid path: {s}")
306 if len(sources) == 0:
307 if verbose or not quiet:
308 out("No paths given. Nothing to do 😴")
312 elif len(sources) == 1:
315 line_length=line_length,
317 write_back=write_back,
322 loop = asyncio.get_event_loop()
323 executor = ProcessPoolExecutor(max_workers=os.cpu_count())
325 loop.run_until_complete(
328 line_length=line_length,
330 write_back=write_back,
339 if verbose or not quiet:
340 out("All done! ✨ 🍰 ✨")
341 click.secho(str(report), err=True)
342 ctx.exit(report.return_code)
349 write_back: WriteBack,
353 """Reformat a single file under `src` without spawning child processes.
355 If `quiet` is True, non-error messages are not output. `line_length`,
356 `write_back`, `fast` and `pyi` options are passed to
357 :func:`format_file_in_place` or :func:`format_stdin_to_stdout`.
361 if not src.is_file() and str(src) == "-":
362 if format_stdin_to_stdout(
363 line_length=line_length, fast=fast, write_back=write_back, mode=mode
365 changed = Changed.YES
368 if write_back != WriteBack.DIFF:
369 cache = read_cache(line_length, mode)
370 res_src = src.resolve()
371 if res_src in cache and cache[res_src] == get_cache_info(res_src):
372 changed = Changed.CACHED
373 if changed is not Changed.CACHED and format_file_in_place(
375 line_length=line_length,
377 write_back=write_back,
380 changed = Changed.YES
381 if write_back == WriteBack.YES and changed is not Changed.NO:
382 write_cache(cache, [src], line_length, mode)
383 report.done(src, changed)
384 except Exception as exc:
385 report.failed(src, str(exc))
388 async def schedule_formatting(
392 write_back: WriteBack,
398 """Run formatting of `sources` in parallel using the provided `executor`.
400 (Use ProcessPoolExecutors for actual parallelism.)
402 `line_length`, `write_back`, `fast`, and `pyi` options are passed to
403 :func:`format_file_in_place`.
406 if write_back != WriteBack.DIFF:
407 cache = read_cache(line_length, mode)
408 sources, cached = filter_cached(cache, sources)
409 for src in sorted(cached):
410 report.done(src, Changed.CACHED)
415 if write_back == WriteBack.DIFF:
416 # For diff output, we need locks to ensure we don't interleave output
417 # from different processes.
419 lock = manager.Lock()
421 loop.run_in_executor(
423 format_file_in_place,
431 for src in sorted(sources)
433 pending: Iterable[asyncio.Task] = tasks.keys()
435 loop.add_signal_handler(signal.SIGINT, cancel, pending)
436 loop.add_signal_handler(signal.SIGTERM, cancel, pending)
437 except NotImplementedError:
438 # There are no good alternatives for these on Windows
441 done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
443 src = tasks.pop(task)
445 cancelled.append(task)
446 elif task.exception():
447 report.failed(src, str(task.exception()))
449 formatted.append(src)
450 report.done(src, Changed.YES if task.result() else Changed.NO)
452 await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
453 if write_back == WriteBack.YES and formatted:
454 write_cache(cache, formatted, line_length, mode)
457 def format_file_in_place(
461 write_back: WriteBack = WriteBack.NO,
462 mode: FileMode = FileMode.AUTO_DETECT,
463 lock: Any = None, # multiprocessing.Manager().Lock() is some crazy proxy
465 """Format file under `src` path. Return True if changed.
467 If `write_back` is True, write reformatted code back to stdout.
468 `line_length` and `fast` options are passed to :func:`format_file_contents`.
470 if src.suffix == ".pyi":
473 then = datetime.utcfromtimestamp(src.stat().st_mtime)
474 with open(src, "rb") as buf:
475 src_contents, encoding, newline = decode_bytes(buf.read())
477 dst_contents = format_file_contents(
478 src_contents, line_length=line_length, fast=fast, mode=mode
480 except NothingChanged:
483 if write_back == write_back.YES:
484 with open(src, "w", encoding=encoding, newline=newline) as f:
485 f.write(dst_contents)
486 elif write_back == write_back.DIFF:
487 now = datetime.utcnow()
488 src_name = f"{src}\t{then} +0000"
489 dst_name = f"{src}\t{now} +0000"
490 diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
494 f = io.TextIOWrapper(
500 f.write(diff_contents)
508 def format_stdin_to_stdout(
511 write_back: WriteBack = WriteBack.NO,
512 mode: FileMode = FileMode.AUTO_DETECT,
514 """Format file on stdin. Return True if changed.
516 If `write_back` is True, write reformatted code back to stdout.
517 `line_length`, `fast`, `is_pyi`, and `force_py36` arguments are passed to
518 :func:`format_file_contents`.
520 then = datetime.utcnow()
521 src, encoding, newline = decode_bytes(sys.stdin.buffer.read())
524 dst = format_file_contents(src, line_length=line_length, fast=fast, mode=mode)
527 except NothingChanged:
531 f = io.TextIOWrapper(
532 sys.stdout.buffer, encoding=encoding, newline=newline, write_through=True
534 if write_back == WriteBack.YES:
536 elif write_back == WriteBack.DIFF:
537 now = datetime.utcnow()
538 src_name = f"STDIN\t{then} +0000"
539 dst_name = f"STDOUT\t{now} +0000"
540 f.write(diff(src, dst, src_name, dst_name))
544 def format_file_contents(
549 mode: FileMode = FileMode.AUTO_DETECT,
551 """Reformat contents a file and return new contents.
553 If `fast` is False, additionally confirm that the reformatted code is
554 valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
555 `line_length` is passed to :func:`format_str`.
557 if src_contents.strip() == "":
560 dst_contents = format_str(src_contents, line_length=line_length, mode=mode)
561 if src_contents == dst_contents:
565 assert_equivalent(src_contents, dst_contents)
566 assert_stable(src_contents, dst_contents, line_length=line_length, mode=mode)
571 src_contents: str, line_length: int, *, mode: FileMode = FileMode.AUTO_DETECT
573 """Reformat a string and return new contents.
575 `line_length` determines how many characters per line are allowed.
577 src_node = lib2to3_parse(src_contents)
579 future_imports = get_future_imports(src_node)
580 is_pyi = bool(mode & FileMode.PYI)
581 py36 = bool(mode & FileMode.PYTHON36) or is_python36(src_node)
582 normalize_strings = not bool(mode & FileMode.NO_STRING_NORMALIZATION)
583 lines = LineGenerator(
584 remove_u_prefix=py36 or "unicode_literals" in future_imports,
586 normalize_strings=normalize_strings,
588 elt = EmptyLineTracker(is_pyi=is_pyi)
591 for current_line in lines.visit(src_node):
592 for _ in range(after):
593 dst_contents += str(empty_line)
594 before, after = elt.maybe_empty_lines(current_line)
595 for _ in range(before):
596 dst_contents += str(empty_line)
597 for line in split_line(current_line, line_length=line_length, py36=py36):
598 dst_contents += str(line)
602 def decode_bytes(src: bytes) -> Tuple[FileContent, Encoding, NewLine]:
603 """Return a tuple of (decoded_contents, encoding, newline).
605 `newline` is either CRLF or LF but `decoded_contents` is decoded with
606 universal newlines (i.e. only contains LF).
608 srcbuf = io.BytesIO(src)
609 encoding, lines = tokenize.detect_encoding(srcbuf.readline)
611 return "", encoding, "\n"
613 newline = "\r\n" if b"\r\n" == lines[0][-2:] else "\n"
615 with io.TextIOWrapper(srcbuf, encoding) as tiow:
616 return tiow.read(), encoding, newline
620 pygram.python_grammar_no_print_statement_no_exec_statement,
621 pygram.python_grammar_no_print_statement,
622 pygram.python_grammar,
626 def lib2to3_parse(src_txt: str) -> Node:
627 """Given a string with source, return the lib2to3 Node."""
628 grammar = pygram.python_grammar_no_print_statement
629 if src_txt[-1:] != "\n":
631 for grammar in GRAMMARS:
632 drv = driver.Driver(grammar, pytree.convert)
634 result = drv.parse_string(src_txt, True)
637 except ParseError as pe:
638 lineno, column = pe.context[1]
639 lines = src_txt.splitlines()
641 faulty_line = lines[lineno - 1]
643 faulty_line = "<line number missing in source>"
644 exc = ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}")
648 if isinstance(result, Leaf):
649 result = Node(syms.file_input, [result])
653 def lib2to3_unparse(node: Node) -> str:
654 """Given a lib2to3 node, return its string representation."""
662 class Visitor(Generic[T]):
663 """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
665 def visit(self, node: LN) -> Iterator[T]:
666 """Main method to visit `node` and its children.
668 It tries to find a `visit_*()` method for the given `node.type`, like
669 `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
670 If no dedicated `visit_*()` method is found, chooses `visit_default()`
673 Then yields objects of type `T` from the selected visitor.
676 name = token.tok_name[node.type]
678 name = type_repr(node.type)
679 yield from getattr(self, f"visit_{name}", self.visit_default)(node)
681 def visit_default(self, node: LN) -> Iterator[T]:
682 """Default `visit_*()` implementation. Recurses to children of `node`."""
683 if isinstance(node, Node):
684 for child in node.children:
685 yield from self.visit(child)
689 class DebugVisitor(Visitor[T]):
692 def visit_default(self, node: LN) -> Iterator[T]:
693 indent = " " * (2 * self.tree_depth)
694 if isinstance(node, Node):
695 _type = type_repr(node.type)
696 out(f"{indent}{_type}", fg="yellow")
698 for child in node.children:
699 yield from self.visit(child)
702 out(f"{indent}/{_type}", fg="yellow", bold=False)
704 _type = token.tok_name.get(node.type, str(node.type))
705 out(f"{indent}{_type}", fg="blue", nl=False)
707 # We don't have to handle prefixes for `Node` objects since
708 # that delegates to the first child anyway.
709 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
710 out(f" {node.value!r}", fg="blue", bold=False)
713 def show(cls, code: str) -> None:
714 """Pretty-print the lib2to3 AST of a given string of `code`.
716 Convenience method for debugging.
718 v: DebugVisitor[None] = DebugVisitor()
719 list(v.visit(lib2to3_parse(code)))
722 KEYWORDS = set(keyword.kwlist)
723 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
724 FLOW_CONTROL = {"return", "raise", "break", "continue"}
735 STANDALONE_COMMENT = 153
736 LOGIC_OPERATORS = {"and", "or"}
761 STARS = {token.STAR, token.DOUBLESTAR}
764 syms.argument, # double star in arglist
765 syms.trailer, # single argument to call
767 syms.varargslist, # lambdas
769 UNPACKING_PARENTS = {
770 syms.atom, # single element of a list or set literal
774 syms.testlist_star_expr,
809 COMPREHENSION_PRIORITY = 20
811 TERNARY_PRIORITY = 16
814 COMPARATOR_PRIORITY = 10
825 token.DOUBLESLASH: 4,
835 class BracketTracker:
836 """Keeps track of brackets on a line."""
839 bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
840 delimiters: Dict[LeafID, Priority] = Factory(dict)
841 previous: Optional[Leaf] = None
842 _for_loop_variable: int = 0
843 _lambda_arguments: int = 0
845 def mark(self, leaf: Leaf) -> None:
846 """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
848 All leaves receive an int `bracket_depth` field that stores how deep
849 within brackets a given leaf is. 0 means there are no enclosing brackets
850 that started on this line.
852 If a leaf is itself a closing bracket, it receives an `opening_bracket`
853 field that it forms a pair with. This is a one-directional link to
854 avoid reference cycles.
856 If a leaf is a delimiter (a token on which Black can split the line if
857 needed) and it's on depth 0, its `id()` is stored in the tracker's
860 if leaf.type == token.COMMENT:
863 self.maybe_decrement_after_for_loop_variable(leaf)
864 self.maybe_decrement_after_lambda_arguments(leaf)
865 if leaf.type in CLOSING_BRACKETS:
867 opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
868 leaf.opening_bracket = opening_bracket
869 leaf.bracket_depth = self.depth
871 delim = is_split_before_delimiter(leaf, self.previous)
872 if delim and self.previous is not None:
873 self.delimiters[id(self.previous)] = delim
875 delim = is_split_after_delimiter(leaf, self.previous)
877 self.delimiters[id(leaf)] = delim
878 if leaf.type in OPENING_BRACKETS:
879 self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
882 self.maybe_increment_lambda_arguments(leaf)
883 self.maybe_increment_for_loop_variable(leaf)
885 def any_open_brackets(self) -> bool:
886 """Return True if there is an yet unmatched open bracket on the line."""
887 return bool(self.bracket_match)
889 def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> int:
890 """Return the highest priority of a delimiter found on the line.
892 Values are consistent with what `is_split_*_delimiter()` return.
893 Raises ValueError on no delimiters.
895 return max(v for k, v in self.delimiters.items() if k not in exclude)
897 def delimiter_count_with_priority(self, priority: int = 0) -> int:
898 """Return the number of delimiters with the given `priority`.
900 If no `priority` is passed, defaults to max priority on the line.
902 if not self.delimiters:
905 priority = priority or self.max_delimiter_priority()
906 return sum(1 for p in self.delimiters.values() if p == priority)
908 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
909 """In a for loop, or comprehension, the variables are often unpacks.
911 To avoid splitting on the comma in this situation, increase the depth of
912 tokens between `for` and `in`.
914 if leaf.type == token.NAME and leaf.value == "for":
916 self._for_loop_variable += 1
921 def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
922 """See `maybe_increment_for_loop_variable` above for explanation."""
923 if self._for_loop_variable and leaf.type == token.NAME and leaf.value == "in":
925 self._for_loop_variable -= 1
930 def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
931 """In a lambda expression, there might be more than one argument.
933 To avoid splitting on the comma in this situation, increase the depth of
934 tokens between `lambda` and `:`.
936 if leaf.type == token.NAME and leaf.value == "lambda":
938 self._lambda_arguments += 1
943 def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
944 """See `maybe_increment_lambda_arguments` above for explanation."""
945 if self._lambda_arguments and leaf.type == token.COLON:
947 self._lambda_arguments -= 1
952 def get_open_lsqb(self) -> Optional[Leaf]:
953 """Return the most recent opening square bracket (if any)."""
954 return self.bracket_match.get((self.depth - 1, token.RSQB))
959 """Holds leaves and comments. Can be printed with `str(line)`."""
962 leaves: List[Leaf] = Factory(list)
963 comments: List[Tuple[Index, Leaf]] = Factory(list)
964 bracket_tracker: BracketTracker = Factory(BracketTracker)
965 inside_brackets: bool = False
966 should_explode: bool = False
968 def append(self, leaf: Leaf, preformatted: bool = False) -> None:
969 """Add a new `leaf` to the end of the line.
971 Unless `preformatted` is True, the `leaf` will receive a new consistent
972 whitespace prefix and metadata applied by :class:`BracketTracker`.
973 Trailing commas are maybe removed, unpacked for loop variables are
974 demoted from being delimiters.
976 Inline comments are put aside.
978 has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
982 if token.COLON == leaf.type and self.is_class_paren_empty:
984 if self.leaves and not preformatted:
985 # Note: at this point leaf.prefix should be empty except for
986 # imports, for which we only preserve newlines.
987 leaf.prefix += whitespace(
988 leaf, complex_subscript=self.is_complex_subscript(leaf)
990 if self.inside_brackets or not preformatted:
991 self.bracket_tracker.mark(leaf)
992 self.maybe_remove_trailing_comma(leaf)
993 if not self.append_comment(leaf):
994 self.leaves.append(leaf)
996 def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
997 """Like :func:`append()` but disallow invalid standalone comment structure.
999 Raises ValueError when any `leaf` is appended after a standalone comment
1000 or when a standalone comment is not the first leaf on the line.
1002 if self.bracket_tracker.depth == 0:
1004 raise ValueError("cannot append to standalone comments")
1006 if self.leaves and leaf.type == STANDALONE_COMMENT:
1008 "cannot append standalone comments to a populated line"
1011 self.append(leaf, preformatted=preformatted)
1014 def is_comment(self) -> bool:
1015 """Is this line a standalone comment?"""
1016 return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
1019 def is_decorator(self) -> bool:
1020 """Is this line a decorator?"""
1021 return bool(self) and self.leaves[0].type == token.AT
1024 def is_import(self) -> bool:
1025 """Is this an import line?"""
1026 return bool(self) and is_import(self.leaves[0])
1029 def is_class(self) -> bool:
1030 """Is this line a class definition?"""
1033 and self.leaves[0].type == token.NAME
1034 and self.leaves[0].value == "class"
1038 def is_stub_class(self) -> bool:
1039 """Is this line a class definition with a body consisting only of "..."?"""
1040 return self.is_class and self.leaves[-3:] == [
1041 Leaf(token.DOT, ".") for _ in range(3)
1045 def is_def(self) -> bool:
1046 """Is this a function definition? (Also returns True for async defs.)"""
1048 first_leaf = self.leaves[0]
1053 second_leaf: Optional[Leaf] = self.leaves[1]
1056 return (first_leaf.type == token.NAME and first_leaf.value == "def") or (
1057 first_leaf.type == token.ASYNC
1058 and second_leaf is not None
1059 and second_leaf.type == token.NAME
1060 and second_leaf.value == "def"
1064 def is_class_paren_empty(self) -> bool:
1065 """Is this a class with no base classes but using parentheses?
1067 Those are unnecessary and should be removed.
1071 and len(self.leaves) == 4
1073 and self.leaves[2].type == token.LPAR
1074 and self.leaves[2].value == "("
1075 and self.leaves[3].type == token.RPAR
1076 and self.leaves[3].value == ")"
1080 def is_triple_quoted_string(self) -> bool:
1081 """Is the line a triple quoted string?"""
1084 and self.leaves[0].type == token.STRING
1085 and self.leaves[0].value.startswith(('"""', "'''"))
1088 def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
1089 """If so, needs to be split before emitting."""
1090 for leaf in self.leaves:
1091 if leaf.type == STANDALONE_COMMENT:
1092 if leaf.bracket_depth <= depth_limit:
1097 def contains_multiline_strings(self) -> bool:
1098 for leaf in self.leaves:
1099 if is_multiline_string(leaf):
1104 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1105 """Remove trailing comma if there is one and it's safe."""
1108 and self.leaves[-1].type == token.COMMA
1109 and closing.type in CLOSING_BRACKETS
1113 if closing.type == token.RBRACE:
1114 self.remove_trailing_comma()
1117 if closing.type == token.RSQB:
1118 comma = self.leaves[-1]
1119 if comma.parent and comma.parent.type == syms.listmaker:
1120 self.remove_trailing_comma()
1123 # For parens let's check if it's safe to remove the comma.
1124 # Imports are always safe.
1126 self.remove_trailing_comma()
1129 # Otheriwsse, if the trailing one is the only one, we might mistakenly
1130 # change a tuple into a different type by removing the comma.
1131 depth = closing.bracket_depth + 1
1133 opening = closing.opening_bracket
1134 for _opening_index, leaf in enumerate(self.leaves):
1141 for leaf in self.leaves[_opening_index + 1 :]:
1145 bracket_depth = leaf.bracket_depth
1146 if bracket_depth == depth and leaf.type == token.COMMA:
1148 if leaf.parent and leaf.parent.type == syms.arglist:
1153 self.remove_trailing_comma()
1158 def append_comment(self, comment: Leaf) -> bool:
1159 """Add an inline or standalone comment to the line."""
1161 comment.type == STANDALONE_COMMENT
1162 and self.bracket_tracker.any_open_brackets()
1167 if comment.type != token.COMMENT:
1170 after = len(self.leaves) - 1
1172 comment.type = STANDALONE_COMMENT
1177 self.comments.append((after, comment))
1180 def comments_after(self, leaf: Leaf, _index: int = -1) -> Iterator[Leaf]:
1181 """Generate comments that should appear directly after `leaf`.
1183 Provide a non-negative leaf `_index` to speed up the function.
1186 for _index, _leaf in enumerate(self.leaves):
1193 for index, comment_after in self.comments:
1197 def remove_trailing_comma(self) -> None:
1198 """Remove the trailing comma and moves the comments attached to it."""
1199 comma_index = len(self.leaves) - 1
1200 for i in range(len(self.comments)):
1201 comment_index, comment = self.comments[i]
1202 if comment_index == comma_index:
1203 self.comments[i] = (comma_index - 1, comment)
1206 def is_complex_subscript(self, leaf: Leaf) -> bool:
1207 """Return True iff `leaf` is part of a slice with non-trivial exprs."""
1209 leaf if leaf.type == token.LSQB else self.bracket_tracker.get_open_lsqb()
1211 if open_lsqb is None:
1214 subscript_start = open_lsqb.next_sibling
1216 isinstance(subscript_start, Node)
1217 and subscript_start.type == syms.subscriptlist
1219 subscript_start = child_towards(subscript_start, leaf)
1220 return subscript_start is not None and any(
1221 n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
1224 def __str__(self) -> str:
1225 """Render the line."""
1229 indent = " " * self.depth
1230 leaves = iter(self.leaves)
1231 first = next(leaves)
1232 res = f"{first.prefix}{indent}{first.value}"
1235 for _, comment in self.comments:
1239 def __bool__(self) -> bool:
1240 """Return True if the line has leaves or comments."""
1241 return bool(self.leaves or self.comments)
1244 class UnformattedLines(Line):
1245 """Just like :class:`Line` but stores lines which aren't reformatted."""
1247 def append(self, leaf: Leaf, preformatted: bool = True) -> None:
1248 """Just add a new `leaf` to the end of the lines.
1250 The `preformatted` argument is ignored.
1252 Keeps track of indentation `depth`, which is useful when the user
1253 says `# fmt: on`. Otherwise, doesn't do anything with the `leaf`.
1256 list(generate_comments(leaf))
1257 except FormatOn as f_on:
1258 self.leaves.append(f_on.leaf_from_consumed(leaf))
1261 self.leaves.append(leaf)
1262 if leaf.type == token.INDENT:
1264 elif leaf.type == token.DEDENT:
1267 def __str__(self) -> str:
1268 """Render unformatted lines from leaves which were added with `append()`.
1270 `depth` is not used for indentation in this case.
1276 for leaf in self.leaves:
1280 def append_comment(self, comment: Leaf) -> bool:
1281 """Not implemented in this class. Raises `NotImplementedError`."""
1282 raise NotImplementedError("Unformatted lines don't store comments separately.")
1284 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1285 """Does nothing and returns False."""
1288 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
1289 """Does nothing and returns False."""
1294 class EmptyLineTracker:
1295 """Provides a stateful method that returns the number of potential extra
1296 empty lines needed before and after the currently processed line.
1298 Note: this tracker works on lines that haven't been split yet. It assumes
1299 the prefix of the first leaf consists of optional newlines. Those newlines
1300 are consumed by `maybe_empty_lines()` and included in the computation.
1303 is_pyi: bool = False
1304 previous_line: Optional[Line] = None
1305 previous_after: int = 0
1306 previous_defs: List[int] = Factory(list)
1308 def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1309 """Return the number of extra empty lines before and after the `current_line`.
1311 This is for separating `def`, `async def` and `class` with extra empty
1312 lines (two on module-level).
1314 if isinstance(current_line, UnformattedLines):
1317 before, after = self._maybe_empty_lines(current_line)
1318 before -= self.previous_after
1319 self.previous_after = after
1320 self.previous_line = current_line
1321 return before, after
1323 def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1325 if current_line.depth == 0:
1326 max_allowed = 1 if self.is_pyi else 2
1327 if current_line.leaves:
1328 # Consume the first leaf's extra newlines.
1329 first_leaf = current_line.leaves[0]
1330 before = first_leaf.prefix.count("\n")
1331 before = min(before, max_allowed)
1332 first_leaf.prefix = ""
1335 depth = current_line.depth
1336 while self.previous_defs and self.previous_defs[-1] >= depth:
1337 self.previous_defs.pop()
1339 before = 0 if depth else 1
1341 before = 1 if depth else 2
1342 is_decorator = current_line.is_decorator
1343 if is_decorator or current_line.is_def or current_line.is_class:
1344 if not is_decorator:
1345 self.previous_defs.append(depth)
1346 if self.previous_line is None:
1347 # Don't insert empty lines before the first line in the file.
1350 if self.previous_line.is_decorator:
1353 if self.previous_line.depth < current_line.depth and (
1354 self.previous_line.is_class or self.previous_line.is_def
1359 self.previous_line.is_comment
1360 and self.previous_line.depth == current_line.depth
1366 if self.previous_line.depth > current_line.depth:
1368 elif current_line.is_class or self.previous_line.is_class:
1369 if current_line.is_stub_class and self.previous_line.is_stub_class:
1377 if current_line.depth and newlines:
1383 and self.previous_line.is_import
1384 and not current_line.is_import
1385 and depth == self.previous_line.depth
1387 return (before or 1), 0
1391 and self.previous_line.is_class
1392 and current_line.is_triple_quoted_string
1400 class LineGenerator(Visitor[Line]):
1401 """Generates reformatted Line objects. Empty lines are not emitted.
1403 Note: destroys the tree it's visiting by mutating prefixes of its leaves
1404 in ways that will no longer stringify to valid Python code on the tree.
1407 is_pyi: bool = False
1408 normalize_strings: bool = True
1409 current_line: Line = Factory(Line)
1410 remove_u_prefix: bool = False
1412 def line(self, indent: int = 0, type: Type[Line] = Line) -> Iterator[Line]:
1415 If the line is empty, only emit if it makes sense.
1416 If the line is too long, split it first and then generate.
1418 If any lines were generated, set up a new current_line.
1420 if not self.current_line:
1421 if self.current_line.__class__ == type:
1422 self.current_line.depth += indent
1424 self.current_line = type(depth=self.current_line.depth + indent)
1425 return # Line is empty, don't emit. Creating a new one unnecessary.
1427 complete_line = self.current_line
1428 self.current_line = type(depth=complete_line.depth + indent)
1431 def visit(self, node: LN) -> Iterator[Line]:
1432 """Main method to visit `node` and its children.
1434 Yields :class:`Line` objects.
1436 if isinstance(self.current_line, UnformattedLines):
1437 # File contained `# fmt: off`
1438 yield from self.visit_unformatted(node)
1441 yield from super().visit(node)
1443 def visit_default(self, node: LN) -> Iterator[Line]:
1444 """Default `visit_*()` implementation. Recurses to children of `node`."""
1445 if isinstance(node, Leaf):
1446 any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1448 for comment in generate_comments(node):
1449 if any_open_brackets:
1450 # any comment within brackets is subject to splitting
1451 self.current_line.append(comment)
1452 elif comment.type == token.COMMENT:
1453 # regular trailing comment
1454 self.current_line.append(comment)
1455 yield from self.line()
1458 # regular standalone comment
1459 yield from self.line()
1461 self.current_line.append(comment)
1462 yield from self.line()
1464 except FormatOff as f_off:
1465 f_off.trim_prefix(node)
1466 yield from self.line(type=UnformattedLines)
1467 yield from self.visit(node)
1469 except FormatOn as f_on:
1470 # This only happens here if somebody says "fmt: on" multiple
1472 f_on.trim_prefix(node)
1473 yield from self.visit_default(node)
1476 normalize_prefix(node, inside_brackets=any_open_brackets)
1477 if self.normalize_strings and node.type == token.STRING:
1478 normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix)
1479 normalize_string_quotes(node)
1480 if node.type not in WHITESPACE:
1481 self.current_line.append(node)
1482 yield from super().visit_default(node)
1484 def visit_INDENT(self, node: Node) -> Iterator[Line]:
1485 """Increase indentation level, maybe yield a line."""
1486 # In blib2to3 INDENT never holds comments.
1487 yield from self.line(+1)
1488 yield from self.visit_default(node)
1490 def visit_DEDENT(self, node: Node) -> Iterator[Line]:
1491 """Decrease indentation level, maybe yield a line."""
1492 # The current line might still wait for trailing comments. At DEDENT time
1493 # there won't be any (they would be prefixes on the preceding NEWLINE).
1494 # Emit the line then.
1495 yield from self.line()
1497 # While DEDENT has no value, its prefix may contain standalone comments
1498 # that belong to the current indentation level. Get 'em.
1499 yield from self.visit_default(node)
1501 # Finally, emit the dedent.
1502 yield from self.line(-1)
1505 self, node: Node, keywords: Set[str], parens: Set[str]
1506 ) -> Iterator[Line]:
1507 """Visit a statement.
1509 This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1510 `def`, `with`, `class`, `assert` and assignments.
1512 The relevant Python language `keywords` for a given statement will be
1513 NAME leaves within it. This methods puts those on a separate line.
1515 `parens` holds a set of string leaf values immediately after which
1516 invisible parens should be put.
1518 normalize_invisible_parens(node, parens_after=parens)
1519 for child in node.children:
1520 if child.type == token.NAME and child.value in keywords: # type: ignore
1521 yield from self.line()
1523 yield from self.visit(child)
1525 def visit_suite(self, node: Node) -> Iterator[Line]:
1526 """Visit a suite."""
1527 if self.is_pyi and is_stub_suite(node):
1528 yield from self.visit(node.children[2])
1530 yield from self.visit_default(node)
1532 def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1533 """Visit a statement without nested statements."""
1534 is_suite_like = node.parent and node.parent.type in STATEMENT
1536 if self.is_pyi and is_stub_body(node):
1537 yield from self.visit_default(node)
1539 yield from self.line(+1)
1540 yield from self.visit_default(node)
1541 yield from self.line(-1)
1544 if not self.is_pyi or not node.parent or not is_stub_suite(node.parent):
1545 yield from self.line()
1546 yield from self.visit_default(node)
1548 def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1549 """Visit `async def`, `async for`, `async with`."""
1550 yield from self.line()
1552 children = iter(node.children)
1553 for child in children:
1554 yield from self.visit(child)
1556 if child.type == token.ASYNC:
1559 internal_stmt = next(children)
1560 for child in internal_stmt.children:
1561 yield from self.visit(child)
1563 def visit_decorators(self, node: Node) -> Iterator[Line]:
1564 """Visit decorators."""
1565 for child in node.children:
1566 yield from self.line()
1567 yield from self.visit(child)
1569 def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
1570 """Remove a semicolon and put the other statement on a separate line."""
1571 yield from self.line()
1573 def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
1574 """End of file. Process outstanding comments and end with a newline."""
1575 yield from self.visit_default(leaf)
1576 yield from self.line()
1578 def visit_unformatted(self, node: LN) -> Iterator[Line]:
1579 """Used when file contained a `# fmt: off`."""
1580 if isinstance(node, Node):
1581 for child in node.children:
1582 yield from self.visit(child)
1586 self.current_line.append(node)
1587 except FormatOn as f_on:
1588 f_on.trim_prefix(node)
1589 yield from self.line()
1590 yield from self.visit(node)
1592 if node.type == token.ENDMARKER:
1593 # somebody decided not to put a final `# fmt: on`
1594 yield from self.line()
1596 def __attrs_post_init__(self) -> None:
1597 """You are in a twisty little maze of passages."""
1600 self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
1601 self.visit_if_stmt = partial(
1602 v, keywords={"if", "else", "elif"}, parens={"if", "elif"}
1604 self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
1605 self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
1606 self.visit_try_stmt = partial(
1607 v, keywords={"try", "except", "else", "finally"}, parens=Ø
1609 self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
1610 self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
1611 self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
1612 self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
1613 self.visit_expr_stmt = partial(v, keywords=Ø, parens=ASSIGNMENTS)
1614 self.visit_return_stmt = partial(v, keywords={"return"}, parens={"return"})
1615 self.visit_import_from = partial(v, keywords=Ø, parens={"import"})
1616 self.visit_async_funcdef = self.visit_async_stmt
1617 self.visit_decorated = self.visit_decorators
1620 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
1621 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
1622 OPENING_BRACKETS = set(BRACKET.keys())
1623 CLOSING_BRACKETS = set(BRACKET.values())
1624 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
1625 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
1628 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str: # noqa C901
1629 """Return whitespace prefix if needed for the given `leaf`.
1631 `complex_subscript` signals whether the given leaf is part of a subscription
1632 which has non-trivial arguments, like arithmetic expressions or function calls.
1640 if t in ALWAYS_NO_SPACE:
1643 if t == token.COMMENT:
1646 assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
1647 if t == token.COLON and p.type not in {
1654 prev = leaf.prev_sibling
1656 prevp = preceding_leaf(p)
1657 if not prevp or prevp.type in OPENING_BRACKETS:
1660 if t == token.COLON:
1661 if prevp.type == token.COLON:
1664 elif prevp.type != token.COMMA and not complex_subscript:
1669 if prevp.type == token.EQUAL:
1671 if prevp.parent.type in {
1679 elif prevp.parent.type == syms.typedargslist:
1680 # A bit hacky: if the equal sign has whitespace, it means we
1681 # previously found it's a typed argument. So, we're using
1685 elif prevp.type in STARS:
1686 if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1689 elif prevp.type == token.COLON:
1690 if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
1691 return SPACE if complex_subscript else NO
1695 and prevp.parent.type == syms.factor
1696 and prevp.type in MATH_OPERATORS
1701 prevp.type == token.RIGHTSHIFT
1703 and prevp.parent.type == syms.shift_expr
1704 and prevp.prev_sibling
1705 and prevp.prev_sibling.type == token.NAME
1706 and prevp.prev_sibling.value == "print" # type: ignore
1708 # Python 2 print chevron
1711 elif prev.type in OPENING_BRACKETS:
1714 if p.type in {syms.parameters, syms.arglist}:
1715 # untyped function signatures or calls
1716 if not prev or prev.type != token.COMMA:
1719 elif p.type == syms.varargslist:
1721 if prev and prev.type != token.COMMA:
1724 elif p.type == syms.typedargslist:
1725 # typed function signatures
1729 if t == token.EQUAL:
1730 if prev.type != syms.tname:
1733 elif prev.type == token.EQUAL:
1734 # A bit hacky: if the equal sign has whitespace, it means we
1735 # previously found it's a typed argument. So, we're using that, too.
1738 elif prev.type != token.COMMA:
1741 elif p.type == syms.tname:
1744 prevp = preceding_leaf(p)
1745 if not prevp or prevp.type != token.COMMA:
1748 elif p.type == syms.trailer:
1749 # attributes and calls
1750 if t == token.LPAR or t == token.RPAR:
1755 prevp = preceding_leaf(p)
1756 if not prevp or prevp.type != token.NUMBER:
1759 elif t == token.LSQB:
1762 elif prev.type != token.COMMA:
1765 elif p.type == syms.argument:
1767 if t == token.EQUAL:
1771 prevp = preceding_leaf(p)
1772 if not prevp or prevp.type == token.LPAR:
1775 elif prev.type in {token.EQUAL} | STARS:
1778 elif p.type == syms.decorator:
1782 elif p.type == syms.dotted_name:
1786 prevp = preceding_leaf(p)
1787 if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
1790 elif p.type == syms.classdef:
1794 if prev and prev.type == token.LPAR:
1797 elif p.type in {syms.subscript, syms.sliceop}:
1800 assert p.parent is not None, "subscripts are always parented"
1801 if p.parent.type == syms.subscriptlist:
1806 elif not complex_subscript:
1809 elif p.type == syms.atom:
1810 if prev and t == token.DOT:
1811 # dots, but not the first one.
1814 elif p.type == syms.dictsetmaker:
1816 if prev and prev.type == token.DOUBLESTAR:
1819 elif p.type in {syms.factor, syms.star_expr}:
1822 prevp = preceding_leaf(p)
1823 if not prevp or prevp.type in OPENING_BRACKETS:
1826 prevp_parent = prevp.parent
1827 assert prevp_parent is not None
1828 if prevp.type == token.COLON and prevp_parent.type in {
1834 elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1837 elif t == token.NAME or t == token.NUMBER:
1840 elif p.type == syms.import_from:
1842 if prev and prev.type == token.DOT:
1845 elif t == token.NAME:
1849 if prev and prev.type == token.DOT:
1852 elif p.type == syms.sliceop:
1858 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1859 """Return the first leaf that precedes `node`, if any."""
1861 res = node.prev_sibling
1863 if isinstance(res, Leaf):
1867 return list(res.leaves())[-1]
1876 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
1877 """Return the child of `ancestor` that contains `descendant`."""
1878 node: Optional[LN] = descendant
1879 while node and node.parent != ancestor:
1884 def is_split_after_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1885 """Return the priority of the `leaf` delimiter, given a line break after it.
1887 The delimiter priorities returned here are from those delimiters that would
1888 cause a line break after themselves.
1890 Higher numbers are higher priority.
1892 if leaf.type == token.COMMA:
1893 return COMMA_PRIORITY
1898 def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1899 """Return the priority of the `leaf` delimiter, given a line before after it.
1901 The delimiter priorities returned here are from those delimiters that would
1902 cause a line break before themselves.
1904 Higher numbers are higher priority.
1906 if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1907 # * and ** might also be MATH_OPERATORS but in this case they are not.
1908 # Don't treat them as a delimiter.
1912 leaf.type == token.DOT
1914 and leaf.parent.type not in {syms.import_from, syms.dotted_name}
1915 and (previous is None or previous.type in CLOSING_BRACKETS)
1920 leaf.type in MATH_OPERATORS
1922 and leaf.parent.type not in {syms.factor, syms.star_expr}
1924 return MATH_PRIORITIES[leaf.type]
1926 if leaf.type in COMPARATORS:
1927 return COMPARATOR_PRIORITY
1930 leaf.type == token.STRING
1931 and previous is not None
1932 and previous.type == token.STRING
1934 return STRING_PRIORITY
1936 if leaf.type != token.NAME:
1942 and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
1944 return COMPREHENSION_PRIORITY
1949 and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
1951 return COMPREHENSION_PRIORITY
1953 if leaf.value in {"if", "else"} and leaf.parent and leaf.parent.type == syms.test:
1954 return TERNARY_PRIORITY
1956 if leaf.value == "is":
1957 return COMPARATOR_PRIORITY
1962 and leaf.parent.type in {syms.comp_op, syms.comparison}
1964 previous is not None
1965 and previous.type == token.NAME
1966 and previous.value == "not"
1969 return COMPARATOR_PRIORITY
1974 and leaf.parent.type == syms.comp_op
1976 previous is not None
1977 and previous.type == token.NAME
1978 and previous.value == "is"
1981 return COMPARATOR_PRIORITY
1983 if leaf.value in LOGIC_OPERATORS and leaf.parent:
1984 return LOGIC_PRIORITY
1989 def generate_comments(leaf: LN) -> Iterator[Leaf]:
1990 """Clean the prefix of the `leaf` and generate comments from it, if any.
1992 Comments in lib2to3 are shoved into the whitespace prefix. This happens
1993 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
1994 move because it does away with modifying the grammar to include all the
1995 possible places in which comments can be placed.
1997 The sad consequence for us though is that comments don't "belong" anywhere.
1998 This is why this function generates simple parentless Leaf objects for
1999 comments. We simply don't know what the correct parent should be.
2001 No matter though, we can live without this. We really only need to
2002 differentiate between inline and standalone comments. The latter don't
2003 share the line with any code.
2005 Inline comments are emitted as regular token.COMMENT leaves. Standalone
2006 are emitted with a fake STANDALONE_COMMENT token identifier.
2017 for index, line in enumerate(p.split("\n")):
2018 consumed += len(line) + 1 # adding the length of the split '\n'
2019 line = line.lstrip()
2022 if not line.startswith("#"):
2025 if index == 0 and leaf.type != token.ENDMARKER:
2026 comment_type = token.COMMENT # simple trailing comment
2028 comment_type = STANDALONE_COMMENT
2029 comment = make_comment(line)
2030 yield Leaf(comment_type, comment, prefix="\n" * nlines)
2032 if comment in {"# fmt: on", "# yapf: enable"}:
2033 raise FormatOn(consumed)
2035 if comment in {"# fmt: off", "# yapf: disable"}:
2036 if comment_type == STANDALONE_COMMENT:
2037 raise FormatOff(consumed)
2039 prev = preceding_leaf(leaf)
2040 if not prev or prev.type in WHITESPACE: # standalone comment in disguise
2041 raise FormatOff(consumed)
2046 def make_comment(content: str) -> str:
2047 """Return a consistently formatted comment from the given `content` string.
2049 All comments (except for "##", "#!", "#:") should have a single space between
2050 the hash sign and the content.
2052 If `content` didn't start with a hash sign, one is provided.
2054 content = content.rstrip()
2058 if content[0] == "#":
2059 content = content[1:]
2060 if content and content[0] not in " !:#":
2061 content = " " + content
2062 return "#" + content
2066 line: Line, line_length: int, inner: bool = False, py36: bool = False
2067 ) -> Iterator[Line]:
2068 """Split a `line` into potentially many lines.
2070 They should fit in the allotted `line_length` but might not be able to.
2071 `inner` signifies that there were a pair of brackets somewhere around the
2072 current `line`, possibly transitively. This means we can fallback to splitting
2073 by delimiters if the LHS/RHS don't yield any results.
2075 If `py36` is True, splitting may generate syntax that is only compatible
2076 with Python 3.6 and later.
2078 if isinstance(line, UnformattedLines) or line.is_comment:
2082 line_str = str(line).strip("\n")
2083 if not line.should_explode and is_line_short_enough(
2084 line, line_length=line_length, line_str=line_str
2089 split_funcs: List[SplitFunc]
2091 split_funcs = [left_hand_split]
2094 def rhs(line: Line, py36: bool = False) -> Iterator[Line]:
2095 for omit in generate_trailers_to_omit(line, line_length):
2096 lines = list(right_hand_split(line, line_length, py36, omit=omit))
2097 if is_line_short_enough(lines[0], line_length=line_length):
2101 # All splits failed, best effort split with no omits.
2102 # This mostly happens to multiline strings that are by definition
2103 # reported as not fitting a single line.
2104 yield from right_hand_split(line, py36)
2106 if line.inside_brackets:
2107 split_funcs = [delimiter_split, standalone_comment_split, rhs]
2110 for split_func in split_funcs:
2111 # We are accumulating lines in `result` because we might want to abort
2112 # mission and return the original line in the end, or attempt a different
2114 result: List[Line] = []
2116 for l in split_func(line, py36):
2117 if str(l).strip("\n") == line_str:
2118 raise CannotSplit("Split function returned an unchanged result")
2121 split_line(l, line_length=line_length, inner=True, py36=py36)
2123 except CannotSplit as cs:
2134 def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
2135 """Split line into many lines, starting with the first matching bracket pair.
2137 Note: this usually looks weird, only use this for function definitions.
2138 Prefer RHS otherwise. This is why this function is not symmetrical with
2139 :func:`right_hand_split` which also handles optional parentheses.
2141 head = Line(depth=line.depth)
2142 body = Line(depth=line.depth + 1, inside_brackets=True)
2143 tail = Line(depth=line.depth)
2144 tail_leaves: List[Leaf] = []
2145 body_leaves: List[Leaf] = []
2146 head_leaves: List[Leaf] = []
2147 current_leaves = head_leaves
2148 matching_bracket = None
2149 for leaf in line.leaves:
2151 current_leaves is body_leaves
2152 and leaf.type in CLOSING_BRACKETS
2153 and leaf.opening_bracket is matching_bracket
2155 current_leaves = tail_leaves if body_leaves else head_leaves
2156 current_leaves.append(leaf)
2157 if current_leaves is head_leaves:
2158 if leaf.type in OPENING_BRACKETS:
2159 matching_bracket = leaf
2160 current_leaves = body_leaves
2161 # Since body is a new indent level, remove spurious leading whitespace.
2163 normalize_prefix(body_leaves[0], inside_brackets=True)
2164 # Build the new lines.
2165 for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
2167 result.append(leaf, preformatted=True)
2168 for comment_after in line.comments_after(leaf):
2169 result.append(comment_after, preformatted=True)
2170 bracket_split_succeeded_or_raise(head, body, tail)
2171 for result in (head, body, tail):
2176 def right_hand_split(
2177 line: Line, line_length: int, py36: bool = False, omit: Collection[LeafID] = ()
2178 ) -> Iterator[Line]:
2179 """Split line into many lines, starting with the last matching bracket pair.
2181 If the split was by optional parentheses, attempt splitting without them, too.
2182 `omit` is a collection of closing bracket IDs that shouldn't be considered for
2185 Note: running this function modifies `bracket_depth` on the leaves of `line`.
2187 head = Line(depth=line.depth)
2188 body = Line(depth=line.depth + 1, inside_brackets=True)
2189 tail = Line(depth=line.depth)
2190 tail_leaves: List[Leaf] = []
2191 body_leaves: List[Leaf] = []
2192 head_leaves: List[Leaf] = []
2193 current_leaves = tail_leaves
2194 opening_bracket = None
2195 closing_bracket = None
2196 for leaf in reversed(line.leaves):
2197 if current_leaves is body_leaves:
2198 if leaf is opening_bracket:
2199 current_leaves = head_leaves if body_leaves else tail_leaves
2200 current_leaves.append(leaf)
2201 if current_leaves is tail_leaves:
2202 if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
2203 opening_bracket = leaf.opening_bracket
2204 closing_bracket = leaf
2205 current_leaves = body_leaves
2206 tail_leaves.reverse()
2207 body_leaves.reverse()
2208 head_leaves.reverse()
2209 # Since body is a new indent level, remove spurious leading whitespace.
2211 normalize_prefix(body_leaves[0], inside_brackets=True)
2213 # No `head` means the split failed. Either `tail` has all content or
2214 # the matching `opening_bracket` wasn't available on `line` anymore.
2215 raise CannotSplit("No brackets found")
2217 # Build the new lines.
2218 for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
2220 result.append(leaf, preformatted=True)
2221 for comment_after in line.comments_after(leaf):
2222 result.append(comment_after, preformatted=True)
2223 assert opening_bracket and closing_bracket
2224 body.should_explode = should_explode(body, opening_bracket)
2225 bracket_split_succeeded_or_raise(head, body, tail)
2227 # the body shouldn't be exploded
2228 not body.should_explode
2229 # the opening bracket is an optional paren
2230 and opening_bracket.type == token.LPAR
2231 and not opening_bracket.value
2232 # the closing bracket is an optional paren
2233 and closing_bracket.type == token.RPAR
2234 and not closing_bracket.value
2235 # it's not an import (optional parens are the only thing we can split on
2236 # in this case; attempting a split without them is a waste of time)
2237 and not line.is_import
2238 # there are no standalone comments in the body
2239 and not body.contains_standalone_comments(0)
2240 # and we can actually remove the parens
2241 and can_omit_invisible_parens(body, line_length)
2243 omit = {id(closing_bracket), *omit}
2245 yield from right_hand_split(line, line_length, py36=py36, omit=omit)
2251 or is_line_short_enough(body, line_length=line_length)
2254 "Splitting failed, body is still too long and can't be split."
2257 elif head.contains_multiline_strings() or tail.contains_multiline_strings():
2259 "The current optional pair of parentheses is bound to fail to "
2260 "satisfy the splitting algorithm becase the head or the tail "
2261 "contains multiline strings which by definition never fit one "
2265 ensure_visible(opening_bracket)
2266 ensure_visible(closing_bracket)
2267 for result in (head, body, tail):
2272 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
2273 """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
2275 Do nothing otherwise.
2277 A left- or right-hand split is based on a pair of brackets. Content before
2278 (and including) the opening bracket is left on one line, content inside the
2279 brackets is put on a separate line, and finally content starting with and
2280 following the closing bracket is put on a separate line.
2282 Those are called `head`, `body`, and `tail`, respectively. If the split
2283 produced the same line (all content in `head`) or ended up with an empty `body`
2284 and the `tail` is just the closing bracket, then it's considered failed.
2286 tail_len = len(str(tail).strip())
2289 raise CannotSplit("Splitting brackets produced the same line")
2293 f"Splitting brackets on an empty body to save "
2294 f"{tail_len} characters is not worth it"
2298 def dont_increase_indentation(split_func: SplitFunc) -> SplitFunc:
2299 """Normalize prefix of the first leaf in every line returned by `split_func`.
2301 This is a decorator over relevant split functions.
2305 def split_wrapper(line: Line, py36: bool = False) -> Iterator[Line]:
2306 for l in split_func(line, py36):
2307 normalize_prefix(l.leaves[0], inside_brackets=True)
2310 return split_wrapper
2313 @dont_increase_indentation
2314 def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]:
2315 """Split according to delimiters of the highest priority.
2317 If `py36` is True, the split will add trailing commas also in function
2318 signatures that contain `*` and `**`.
2321 last_leaf = line.leaves[-1]
2323 raise CannotSplit("Line empty")
2325 bt = line.bracket_tracker
2327 delimiter_priority = bt.max_delimiter_priority(exclude={id(last_leaf)})
2329 raise CannotSplit("No delimiters found")
2331 if delimiter_priority == DOT_PRIORITY:
2332 if bt.delimiter_count_with_priority(delimiter_priority) == 1:
2333 raise CannotSplit("Splitting a single attribute from its owner looks wrong")
2335 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2336 lowest_depth = sys.maxsize
2337 trailing_comma_safe = True
2339 def append_to_line(leaf: Leaf) -> Iterator[Line]:
2340 """Append `leaf` to current line or to new line if appending impossible."""
2341 nonlocal current_line
2343 current_line.append_safe(leaf, preformatted=True)
2344 except ValueError as ve:
2347 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2348 current_line.append(leaf)
2350 for index, leaf in enumerate(line.leaves):
2351 yield from append_to_line(leaf)
2353 for comment_after in line.comments_after(leaf, index):
2354 yield from append_to_line(comment_after)
2356 lowest_depth = min(lowest_depth, leaf.bracket_depth)
2357 if leaf.bracket_depth == lowest_depth and is_vararg(
2358 leaf, within=VARARGS_PARENTS
2360 trailing_comma_safe = trailing_comma_safe and py36
2361 leaf_priority = bt.delimiters.get(id(leaf))
2362 if leaf_priority == delimiter_priority:
2365 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2369 and delimiter_priority == COMMA_PRIORITY
2370 and current_line.leaves[-1].type != token.COMMA
2371 and current_line.leaves[-1].type != STANDALONE_COMMENT
2373 current_line.append(Leaf(token.COMMA, ","))
2377 @dont_increase_indentation
2378 def standalone_comment_split(line: Line, py36: bool = False) -> Iterator[Line]:
2379 """Split standalone comments from the rest of the line."""
2380 if not line.contains_standalone_comments(0):
2381 raise CannotSplit("Line does not have any standalone comments")
2383 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2385 def append_to_line(leaf: Leaf) -> Iterator[Line]:
2386 """Append `leaf` to current line or to new line if appending impossible."""
2387 nonlocal current_line
2389 current_line.append_safe(leaf, preformatted=True)
2390 except ValueError as ve:
2393 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2394 current_line.append(leaf)
2396 for index, leaf in enumerate(line.leaves):
2397 yield from append_to_line(leaf)
2399 for comment_after in line.comments_after(leaf, index):
2400 yield from append_to_line(comment_after)
2406 def is_import(leaf: Leaf) -> bool:
2407 """Return True if the given leaf starts an import statement."""
2414 (v == "import" and p and p.type == syms.import_name)
2415 or (v == "from" and p and p.type == syms.import_from)
2420 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
2421 """Leave existing extra newlines if not `inside_brackets`. Remove everything
2424 Note: don't use backslashes for formatting or you'll lose your voting rights.
2426 if not inside_brackets:
2427 spl = leaf.prefix.split("#")
2428 if "\\" not in spl[0]:
2429 nl_count = spl[-1].count("\n")
2432 leaf.prefix = "\n" * nl_count
2438 def normalize_string_prefix(leaf: Leaf, remove_u_prefix: bool = False) -> None:
2439 """Make all string prefixes lowercase.
2441 If remove_u_prefix is given, also removes any u prefix from the string.
2443 Note: Mutates its argument.
2445 match = re.match(r"^([furbFURB]*)(.*)$", leaf.value, re.DOTALL)
2446 assert match is not None, f"failed to match string {leaf.value!r}"
2447 orig_prefix = match.group(1)
2448 new_prefix = orig_prefix.lower()
2450 new_prefix = new_prefix.replace("u", "")
2451 leaf.value = f"{new_prefix}{match.group(2)}"
2454 def normalize_string_quotes(leaf: Leaf) -> None:
2455 """Prefer double quotes but only if it doesn't cause more escaping.
2457 Adds or removes backslashes as appropriate. Doesn't parse and fix
2458 strings nested in f-strings (yet).
2460 Note: Mutates its argument.
2462 value = leaf.value.lstrip("furbFURB")
2463 if value[:3] == '"""':
2466 elif value[:3] == "'''":
2469 elif value[0] == '"':
2475 first_quote_pos = leaf.value.find(orig_quote)
2476 if first_quote_pos == -1:
2477 return # There's an internal error
2479 prefix = leaf.value[:first_quote_pos]
2480 unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
2481 escaped_new_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{new_quote}")
2482 escaped_orig_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{orig_quote}")
2483 body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
2484 if "r" in prefix.casefold():
2485 if unescaped_new_quote.search(body):
2486 # There's at least one unescaped new_quote in this raw string
2487 # so converting is impossible
2490 # Do not introduce or remove backslashes in raw strings
2493 # remove unnecessary quotes
2494 new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
2495 if body != new_body:
2496 # Consider the string without unnecessary quotes as the original
2498 leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
2499 new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
2500 new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
2501 if new_quote == '"""' and new_body[-1] == '"':
2503 new_body = new_body[:-1] + '\\"'
2504 orig_escape_count = body.count("\\")
2505 new_escape_count = new_body.count("\\")
2506 if new_escape_count > orig_escape_count:
2507 return # Do not introduce more escaping
2509 if new_escape_count == orig_escape_count and orig_quote == '"':
2510 return # Prefer double quotes
2512 leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
2515 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
2516 """Make existing optional parentheses invisible or create new ones.
2518 `parens_after` is a set of string leaf values immeditely after which parens
2521 Standardizes on visible parentheses for single-element tuples, and keeps
2522 existing visible parentheses for other tuples and generator expressions.
2525 list(generate_comments(node))
2527 return # This `node` has a prefix with `# fmt: off`, don't mess with parens.
2530 for index, child in enumerate(list(node.children)):
2532 if child.type == syms.atom:
2533 maybe_make_parens_invisible_in_atom(child)
2534 elif is_one_tuple(child):
2535 # wrap child in visible parentheses
2536 lpar = Leaf(token.LPAR, "(")
2537 rpar = Leaf(token.RPAR, ")")
2539 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2540 elif node.type == syms.import_from:
2541 # "import from" nodes store parentheses directly as part of
2543 if child.type == token.LPAR:
2544 # make parentheses invisible
2545 child.value = "" # type: ignore
2546 node.children[-1].value = "" # type: ignore
2547 elif child.type != token.STAR:
2548 # insert invisible parentheses
2549 node.insert_child(index, Leaf(token.LPAR, ""))
2550 node.append_child(Leaf(token.RPAR, ""))
2553 elif not (isinstance(child, Leaf) and is_multiline_string(child)):
2554 # wrap child in invisible parentheses
2555 lpar = Leaf(token.LPAR, "")
2556 rpar = Leaf(token.RPAR, "")
2557 index = child.remove() or 0
2558 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2560 check_lpar = isinstance(child, Leaf) and child.value in parens_after
2563 def maybe_make_parens_invisible_in_atom(node: LN) -> bool:
2564 """If it's safe, make the parens in the atom `node` invisible, recusively."""
2566 node.type != syms.atom
2567 or is_empty_tuple(node)
2568 or is_one_tuple(node)
2570 or max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
2574 first = node.children[0]
2575 last = node.children[-1]
2576 if first.type == token.LPAR and last.type == token.RPAR:
2577 # make parentheses invisible
2578 first.value = "" # type: ignore
2579 last.value = "" # type: ignore
2580 if len(node.children) > 1:
2581 maybe_make_parens_invisible_in_atom(node.children[1])
2587 def is_empty_tuple(node: LN) -> bool:
2588 """Return True if `node` holds an empty tuple."""
2590 node.type == syms.atom
2591 and len(node.children) == 2
2592 and node.children[0].type == token.LPAR
2593 and node.children[1].type == token.RPAR
2597 def is_one_tuple(node: LN) -> bool:
2598 """Return True if `node` holds a tuple with one element, with or without parens."""
2599 if node.type == syms.atom:
2600 if len(node.children) != 3:
2603 lpar, gexp, rpar = node.children
2605 lpar.type == token.LPAR
2606 and gexp.type == syms.testlist_gexp
2607 and rpar.type == token.RPAR
2611 return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
2614 node.type in IMPLICIT_TUPLE
2615 and len(node.children) == 2
2616 and node.children[1].type == token.COMMA
2620 def is_yield(node: LN) -> bool:
2621 """Return True if `node` holds a `yield` or `yield from` expression."""
2622 if node.type == syms.yield_expr:
2625 if node.type == token.NAME and node.value == "yield": # type: ignore
2628 if node.type != syms.atom:
2631 if len(node.children) != 3:
2634 lpar, expr, rpar = node.children
2635 if lpar.type == token.LPAR and rpar.type == token.RPAR:
2636 return is_yield(expr)
2641 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
2642 """Return True if `leaf` is a star or double star in a vararg or kwarg.
2644 If `within` includes VARARGS_PARENTS, this applies to function signatures.
2645 If `within` includes UNPACKING_PARENTS, it applies to right hand-side
2646 extended iterable unpacking (PEP 3132) and additional unpacking
2647 generalizations (PEP 448).
2649 if leaf.type not in STARS or not leaf.parent:
2653 if p.type == syms.star_expr:
2654 # Star expressions are also used as assignment targets in extended
2655 # iterable unpacking (PEP 3132). See what its parent is instead.
2661 return p.type in within
2664 def is_multiline_string(leaf: Leaf) -> bool:
2665 """Return True if `leaf` is a multiline string that actually spans many lines."""
2666 value = leaf.value.lstrip("furbFURB")
2667 return value[:3] in {'"""', "'''"} and "\n" in value
2670 def is_stub_suite(node: Node) -> bool:
2671 """Return True if `node` is a suite with a stub body."""
2673 len(node.children) != 4
2674 or node.children[0].type != token.NEWLINE
2675 or node.children[1].type != token.INDENT
2676 or node.children[3].type != token.DEDENT
2680 return is_stub_body(node.children[2])
2683 def is_stub_body(node: LN) -> bool:
2684 """Return True if `node` is a simple statement containing an ellipsis."""
2685 if not isinstance(node, Node) or node.type != syms.simple_stmt:
2688 if len(node.children) != 2:
2691 child = node.children[0]
2693 child.type == syms.atom
2694 and len(child.children) == 3
2695 and all(leaf == Leaf(token.DOT, ".") for leaf in child.children)
2699 def max_delimiter_priority_in_atom(node: LN) -> int:
2700 """Return maximum delimiter priority inside `node`.
2702 This is specific to atoms with contents contained in a pair of parentheses.
2703 If `node` isn't an atom or there are no enclosing parentheses, returns 0.
2705 if node.type != syms.atom:
2708 first = node.children[0]
2709 last = node.children[-1]
2710 if not (first.type == token.LPAR and last.type == token.RPAR):
2713 bt = BracketTracker()
2714 for c in node.children[1:-1]:
2715 if isinstance(c, Leaf):
2718 for leaf in c.leaves():
2721 return bt.max_delimiter_priority()
2727 def ensure_visible(leaf: Leaf) -> None:
2728 """Make sure parentheses are visible.
2730 They could be invisible as part of some statements (see
2731 :func:`normalize_invible_parens` and :func:`visit_import_from`).
2733 if leaf.type == token.LPAR:
2735 elif leaf.type == token.RPAR:
2739 def should_explode(line: Line, opening_bracket: Leaf) -> bool:
2740 """Should `line` immediately be split with `delimiter_split()` after RHS?"""
2742 opening_bracket.parent
2743 and opening_bracket.parent.type in {syms.atom, syms.import_from}
2744 and opening_bracket.value in "[{("
2749 last_leaf = line.leaves[-1]
2750 exclude = {id(last_leaf)} if last_leaf.type == token.COMMA else set()
2751 max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude)
2752 except (IndexError, ValueError):
2755 return max_priority == COMMA_PRIORITY
2758 def is_python36(node: Node) -> bool:
2759 """Return True if the current file is using Python 3.6+ features.
2761 Currently looking for:
2763 - trailing commas after * or ** in function signatures and calls.
2765 for n in node.pre_order():
2766 if n.type == token.STRING:
2767 value_head = n.value[:2] # type: ignore
2768 if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
2772 n.type in {syms.typedargslist, syms.arglist}
2774 and n.children[-1].type == token.COMMA
2776 for ch in n.children:
2777 if ch.type in STARS:
2780 if ch.type == syms.argument:
2781 for argch in ch.children:
2782 if argch.type in STARS:
2788 def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[LeafID]]:
2789 """Generate sets of closing bracket IDs that should be omitted in a RHS.
2791 Brackets can be omitted if the entire trailer up to and including
2792 a preceding closing bracket fits in one line.
2794 Yielded sets are cumulative (contain results of previous yields, too). First
2798 omit: Set[LeafID] = set()
2801 length = 4 * line.depth
2802 opening_bracket = None
2803 closing_bracket = None
2804 optional_brackets: Set[LeafID] = set()
2805 inner_brackets: Set[LeafID] = set()
2806 for index, leaf, leaf_length in enumerate_with_length(line, reversed=True):
2807 length += leaf_length
2808 if length > line_length:
2811 has_inline_comment = leaf_length > len(leaf.value) + len(leaf.prefix)
2812 if leaf.type == STANDALONE_COMMENT or has_inline_comment:
2815 optional_brackets.discard(id(leaf))
2817 if leaf is opening_bracket:
2818 opening_bracket = None
2819 elif leaf.type in CLOSING_BRACKETS:
2820 inner_brackets.add(id(leaf))
2821 elif leaf.type in CLOSING_BRACKETS:
2823 optional_brackets.add(id(opening_bracket))
2826 if index > 0 and line.leaves[index - 1].type in OPENING_BRACKETS:
2827 # Empty brackets would fail a split so treat them as "inner"
2828 # brackets (e.g. only add them to the `omit` set if another
2829 # pair of brackets was good enough.
2830 inner_brackets.add(id(leaf))
2833 opening_bracket = leaf.opening_bracket
2835 omit.add(id(closing_bracket))
2836 omit.update(inner_brackets)
2837 inner_brackets.clear()
2839 closing_bracket = leaf
2842 def get_future_imports(node: Node) -> Set[str]:
2843 """Return a set of __future__ imports in the file."""
2845 for child in node.children:
2846 if child.type != syms.simple_stmt:
2848 first_child = child.children[0]
2849 if isinstance(first_child, Leaf):
2850 # Continue looking if we see a docstring; otherwise stop.
2852 len(child.children) == 2
2853 and first_child.type == token.STRING
2854 and child.children[1].type == token.NEWLINE
2859 elif first_child.type == syms.import_from:
2860 module_name = first_child.children[1]
2861 if not isinstance(module_name, Leaf) or module_name.value != "__future__":
2863 for import_from_child in first_child.children[3:]:
2864 if isinstance(import_from_child, Leaf):
2865 if import_from_child.type == token.NAME:
2866 imports.add(import_from_child.value)
2868 assert import_from_child.type == syms.import_as_names
2869 for leaf in import_from_child.children:
2870 if isinstance(leaf, Leaf) and leaf.type == token.NAME:
2871 imports.add(leaf.value)
2877 def gen_python_files_in_dir(
2880 include: Pattern[str],
2881 exclude: Pattern[str],
2883 ) -> Iterator[Path]:
2884 """Generate all files under `path` whose paths are not excluded by the
2885 `exclude` regex, but are included by the `include` regex.
2887 `report` is where output about exclusions goes.
2889 assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
2890 for child in path.iterdir():
2891 normalized_path = "/" + child.resolve().relative_to(root).as_posix()
2893 normalized_path += "/"
2894 exclude_match = exclude.search(normalized_path)
2895 if exclude_match and exclude_match.group(0):
2896 report.path_ignored(child, f"matches --exclude={exclude.pattern}")
2900 yield from gen_python_files_in_dir(child, root, include, exclude, report)
2902 elif child.is_file():
2903 include_match = include.search(normalized_path)
2908 def find_project_root(srcs: List[str]) -> Path:
2909 """Return a directory containing .git, .hg, or pyproject.toml.
2911 That directory can be one of the directories passed in `srcs` or their
2914 If no directory in the tree contains a marker that would specify it's the
2915 project root, the root of the file system is returned.
2918 return Path("/").resolve()
2920 common_base = min(Path(src).resolve() for src in srcs)
2921 if common_base.is_dir():
2922 # Append a fake file so `parents` below returns `common_base_dir`, too.
2923 common_base /= "fake-file"
2924 for directory in common_base.parents:
2925 if (directory / ".git").is_dir():
2928 if (directory / ".hg").is_dir():
2931 if (directory / "pyproject.toml").is_file():
2939 """Provides a reformatting counter. Can be rendered with `str(report)`."""
2943 verbose: bool = False
2944 change_count: int = 0
2946 failure_count: int = 0
2948 def done(self, src: Path, changed: Changed) -> None:
2949 """Increment the counter for successful reformatting. Write out a message."""
2950 if changed is Changed.YES:
2951 reformatted = "would reformat" if self.check else "reformatted"
2952 if self.verbose or not self.quiet:
2953 out(f"{reformatted} {src}")
2954 self.change_count += 1
2957 if changed is Changed.NO:
2958 msg = f"{src} already well formatted, good job."
2960 msg = f"{src} wasn't modified on disk since last run."
2961 out(msg, bold=False)
2962 self.same_count += 1
2964 def failed(self, src: Path, message: str) -> None:
2965 """Increment the counter for failed reformatting. Write out a message."""
2966 err(f"error: cannot format {src}: {message}")
2967 self.failure_count += 1
2969 def path_ignored(self, path: Path, message: str) -> None:
2971 out(f"{path} ignored: {message}", bold=False)
2974 def return_code(self) -> int:
2975 """Return the exit code that the app should use.
2977 This considers the current state of changed files and failures:
2978 - if there were any failures, return 123;
2979 - if any files were changed and --check is being used, return 1;
2980 - otherwise return 0.
2982 # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
2983 # 126 we have special returncodes reserved by the shell.
2984 if self.failure_count:
2987 elif self.change_count and self.check:
2992 def __str__(self) -> str:
2993 """Render a color report of the current state.
2995 Use `click.unstyle` to remove colors.
2998 reformatted = "would be reformatted"
2999 unchanged = "would be left unchanged"
3000 failed = "would fail to reformat"
3002 reformatted = "reformatted"
3003 unchanged = "left unchanged"
3004 failed = "failed to reformat"
3006 if self.change_count:
3007 s = "s" if self.change_count > 1 else ""
3009 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
3012 s = "s" if self.same_count > 1 else ""
3013 report.append(f"{self.same_count} file{s} {unchanged}")
3014 if self.failure_count:
3015 s = "s" if self.failure_count > 1 else ""
3017 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
3019 return ", ".join(report) + "."
3022 def assert_equivalent(src: str, dst: str) -> None:
3023 """Raise AssertionError if `src` and `dst` aren't equivalent."""
3028 def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
3029 """Simple visitor generating strings to compare ASTs by content."""
3030 yield f"{' ' * depth}{node.__class__.__name__}("
3032 for field in sorted(node._fields):
3034 value = getattr(node, field)
3035 except AttributeError:
3038 yield f"{' ' * (depth+1)}{field}="
3040 if isinstance(value, list):
3042 if isinstance(item, ast.AST):
3043 yield from _v(item, depth + 2)
3045 elif isinstance(value, ast.AST):
3046 yield from _v(value, depth + 2)
3049 yield f"{' ' * (depth+2)}{value!r}, # {value.__class__.__name__}"
3051 yield f"{' ' * depth}) # /{node.__class__.__name__}"
3054 src_ast = ast.parse(src)
3055 except Exception as exc:
3056 major, minor = sys.version_info[:2]
3057 raise AssertionError(
3058 f"cannot use --safe with this file; failed to parse source file "
3059 f"with Python {major}.{minor}'s builtin AST. Re-run with --fast "
3060 f"or stop using deprecated Python 2 syntax. AST error message: {exc}"
3064 dst_ast = ast.parse(dst)
3065 except Exception as exc:
3066 log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
3067 raise AssertionError(
3068 f"INTERNAL ERROR: Black produced invalid code: {exc}. "
3069 f"Please report a bug on https://github.com/ambv/black/issues. "
3070 f"This invalid output might be helpful: {log}"
3073 src_ast_str = "\n".join(_v(src_ast))
3074 dst_ast_str = "\n".join(_v(dst_ast))
3075 if src_ast_str != dst_ast_str:
3076 log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
3077 raise AssertionError(
3078 f"INTERNAL ERROR: Black produced code that is not equivalent to "
3080 f"Please report a bug on https://github.com/ambv/black/issues. "
3081 f"This diff might be helpful: {log}"
3086 src: str, dst: str, line_length: int, mode: FileMode = FileMode.AUTO_DETECT
3088 """Raise AssertionError if `dst` reformats differently the second time."""
3089 newdst = format_str(dst, line_length=line_length, mode=mode)
3092 diff(src, dst, "source", "first pass"),
3093 diff(dst, newdst, "first pass", "second pass"),
3095 raise AssertionError(
3096 f"INTERNAL ERROR: Black produced different code on the second pass "
3097 f"of the formatter. "
3098 f"Please report a bug on https://github.com/ambv/black/issues. "
3099 f"This diff might be helpful: {log}"
3103 def dump_to_file(*output: str) -> str:
3104 """Dump `output` to a temporary file. Return path to the file."""
3107 with tempfile.NamedTemporaryFile(
3108 mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
3110 for lines in output:
3112 if lines and lines[-1] != "\n":
3117 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
3118 """Return a unified diff string between strings `a` and `b`."""
3121 a_lines = [line + "\n" for line in a.split("\n")]
3122 b_lines = [line + "\n" for line in b.split("\n")]
3124 difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
3128 def cancel(tasks: Iterable[asyncio.Task]) -> None:
3129 """asyncio signal handler that cancels all `tasks` and reports to stderr."""
3135 def shutdown(loop: BaseEventLoop) -> None:
3136 """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
3138 # This part is borrowed from asyncio/runners.py in Python 3.7b2.
3139 to_cancel = [task for task in asyncio.Task.all_tasks(loop) if not task.done()]
3143 for task in to_cancel:
3145 loop.run_until_complete(
3146 asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
3149 # `concurrent.futures.Future` objects cannot be cancelled once they
3150 # are already running. There might be some when the `shutdown()` happened.
3151 # Silence their logger's spew about the event loop being closed.
3152 cf_logger = logging.getLogger("concurrent.futures")
3153 cf_logger.setLevel(logging.CRITICAL)
3157 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
3158 """Replace `regex` with `replacement` twice on `original`.
3160 This is used by string normalization to perform replaces on
3161 overlapping matches.
3163 return regex.sub(replacement, regex.sub(replacement, original))
3166 def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]:
3167 """Like `reversed(enumerate(sequence))` if that were possible."""
3168 index = len(sequence) - 1
3169 for element in reversed(sequence):
3170 yield (index, element)
3174 def enumerate_with_length(
3175 line: Line, reversed: bool = False
3176 ) -> Iterator[Tuple[Index, Leaf, int]]:
3177 """Return an enumeration of leaves with their length.
3179 Stops prematurely on multiline strings and standalone comments.
3182 Callable[[Sequence[Leaf]], Iterator[Tuple[Index, Leaf]]],
3183 enumerate_reversed if reversed else enumerate,
3185 for index, leaf in op(line.leaves):
3186 length = len(leaf.prefix) + len(leaf.value)
3187 if "\n" in leaf.value:
3188 return # Multiline strings, we can't continue.
3190 comment: Optional[Leaf]
3191 for comment in line.comments_after(leaf, index):
3192 length += len(comment.value)
3194 yield index, leaf, length
3197 def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
3198 """Return True if `line` is no longer than `line_length`.
3200 Uses the provided `line_str` rendering, if any, otherwise computes a new one.
3203 line_str = str(line).strip("\n")
3205 len(line_str) <= line_length
3206 and "\n" not in line_str # multiline strings
3207 and not line.contains_standalone_comments()
3211 def can_be_split(line: Line) -> bool:
3212 """Return False if the line cannot be split *for sure*.
3214 This is not an exhaustive search but a cheap heuristic that we can use to
3215 avoid some unfortunate formattings (mostly around wrapping unsplittable code
3216 in unnecessary parentheses).
3218 leaves = line.leaves
3222 if leaves[0].type == token.STRING and leaves[1].type == token.DOT:
3226 for leaf in leaves[-2::-1]:
3227 if leaf.type in OPENING_BRACKETS:
3228 if next.type not in CLOSING_BRACKETS:
3232 elif leaf.type == token.DOT:
3234 elif leaf.type == token.NAME:
3235 if not (next.type == token.DOT or next.type in OPENING_BRACKETS):
3238 elif leaf.type not in CLOSING_BRACKETS:
3241 if dot_count > 1 and call_count > 1:
3247 def can_omit_invisible_parens(line: Line, line_length: int) -> bool:
3248 """Does `line` have a shape safe to reformat without optional parens around it?
3250 Returns True for only a subset of potentially nice looking formattings but
3251 the point is to not return false positives that end up producing lines that
3254 bt = line.bracket_tracker
3255 if not bt.delimiters:
3256 # Without delimiters the optional parentheses are useless.
3259 max_priority = bt.max_delimiter_priority()
3260 if bt.delimiter_count_with_priority(max_priority) > 1:
3261 # With more than one delimiter of a kind the optional parentheses read better.
3264 if max_priority == DOT_PRIORITY:
3265 # A single stranded method call doesn't require optional parentheses.
3268 assert len(line.leaves) >= 2, "Stranded delimiter"
3270 first = line.leaves[0]
3271 second = line.leaves[1]
3272 penultimate = line.leaves[-2]
3273 last = line.leaves[-1]
3275 # With a single delimiter, omit if the expression starts or ends with
3277 if first.type in OPENING_BRACKETS and second.type not in CLOSING_BRACKETS:
3279 length = 4 * line.depth
3280 for _index, leaf, leaf_length in enumerate_with_length(line):
3281 if leaf.type in CLOSING_BRACKETS and leaf.opening_bracket is first:
3284 length += leaf_length
3285 if length > line_length:
3288 if leaf.type in OPENING_BRACKETS:
3289 # There are brackets we can further split on.
3293 # checked the entire string and line length wasn't exceeded
3294 if len(line.leaves) == _index + 1:
3297 # Note: we are not returning False here because a line might have *both*
3298 # a leading opening bracket and a trailing closing bracket. If the
3299 # opening bracket doesn't match our rule, maybe the closing will.
3302 last.type == token.RPAR
3303 or last.type == token.RBRACE
3305 # don't use indexing for omitting optional parentheses;
3307 last.type == token.RSQB
3309 and last.parent.type != syms.trailer
3312 if penultimate.type in OPENING_BRACKETS:
3313 # Empty brackets don't help.
3316 if is_multiline_string(first):
3317 # Additional wrapping of a multiline string in this situation is
3321 length = 4 * line.depth
3322 seen_other_brackets = False
3323 for _index, leaf, leaf_length in enumerate_with_length(line):
3324 length += leaf_length
3325 if leaf is last.opening_bracket:
3326 if seen_other_brackets or length <= line_length:
3329 elif leaf.type in OPENING_BRACKETS:
3330 # There are brackets we can further split on.
3331 seen_other_brackets = True
3336 def get_cache_file(line_length: int, mode: FileMode) -> Path:
3337 pyi = bool(mode & FileMode.PYI)
3338 py36 = bool(mode & FileMode.PYTHON36)
3341 / f"cache.{line_length}{'.pyi' if pyi else ''}{'.py36' if py36 else ''}.pickle"
3345 def read_cache(line_length: int, mode: FileMode) -> Cache:
3346 """Read the cache if it exists and is well formed.
3348 If it is not well formed, the call to write_cache later should resolve the issue.
3350 cache_file = get_cache_file(line_length, mode)
3351 if not cache_file.exists():
3354 with cache_file.open("rb") as fobj:
3356 cache: Cache = pickle.load(fobj)
3357 except pickle.UnpicklingError:
3363 def get_cache_info(path: Path) -> CacheInfo:
3364 """Return the information used to check if a file is already formatted or not."""
3366 return stat.st_mtime, stat.st_size
3369 def filter_cached(cache: Cache, sources: Iterable[Path]) -> Tuple[Set[Path], Set[Path]]:
3370 """Split an iterable of paths in `sources` into two sets.
3372 The first contains paths of files that modified on disk or are not in the
3373 cache. The other contains paths to non-modified files.
3375 todo, done = set(), set()
3378 if cache.get(src) != get_cache_info(src):
3386 cache: Cache, sources: Iterable[Path], line_length: int, mode: FileMode
3388 """Update the cache file."""
3389 cache_file = get_cache_file(line_length, mode)
3391 if not CACHE_DIR.exists():
3392 CACHE_DIR.mkdir(parents=True)
3393 new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
3394 with cache_file.open("wb") as fobj:
3395 pickle.dump(new_cache, fobj, protocol=pickle.HIGHEST_PROTOCOL)
3400 if __name__ == "__main__":