All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
2 from asyncio.base_events import BaseEventLoop
3 from concurrent.futures import Executor, ProcessPoolExecutor
4 from datetime import datetime
5 from enum import Enum, Flag
6 from functools import partial, wraps
10 from multiprocessing import Manager
12 from pathlib import Path
38 from appdirs import user_cache_dir
39 from attr import dataclass, Factory
43 from blib2to3.pytree import Node, Leaf, type_repr
44 from blib2to3 import pygram, pytree
45 from blib2to3.pgen2 import driver, token
46 from blib2to3.pgen2.parse import ParseError
49 __version__ = "18.5b1"
50 DEFAULT_LINE_LENGTH = 88
52 r"/(\.git|\.hg|\.mypy_cache|\.tox|\.venv|_build|buck-out|build|dist)/"
54 DEFAULT_INCLUDES = r"\.pyi?$"
55 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
67 LN = Union[Leaf, Node]
68 SplitFunc = Callable[["Line", bool], Iterator["Line"]]
71 CacheInfo = Tuple[Timestamp, FileSize]
72 Cache = Dict[Path, CacheInfo]
73 out = partial(click.secho, bold=True, err=True)
74 err = partial(click.secho, fg="red", err=True)
76 pygram.initialize(CACHE_DIR)
77 syms = pygram.python_symbols
80 class NothingChanged(UserWarning):
81 """Raised by :func:`format_file` when reformatted code is the same as source."""
84 class CannotSplit(Exception):
85 """A readable split that fits the allotted line length is impossible.
87 Raised by :func:`left_hand_split`, :func:`right_hand_split`, and
88 :func:`delimiter_split`.
92 class FormatError(Exception):
93 """Base exception for `# fmt: on` and `# fmt: off` handling.
95 It holds the number of bytes of the prefix consumed before the format
96 control comment appeared.
99 def __init__(self, consumed: int) -> None:
100 super().__init__(consumed)
101 self.consumed = consumed
103 def trim_prefix(self, leaf: Leaf) -> None:
104 leaf.prefix = leaf.prefix[self.consumed :]
106 def leaf_from_consumed(self, leaf: Leaf) -> Leaf:
107 """Returns a new Leaf from the consumed part of the prefix."""
108 unformatted_prefix = leaf.prefix[: self.consumed]
109 return Leaf(token.NEWLINE, unformatted_prefix)
112 class FormatOn(FormatError):
113 """Found a comment like `# fmt: on` in the file."""
116 class FormatOff(FormatError):
117 """Found a comment like `# fmt: off` in the file."""
120 class WriteBack(Enum):
126 def from_configuration(cls, *, check: bool, diff: bool) -> "WriteBack":
127 if check and not diff:
130 return cls.DIFF if diff else cls.YES
139 class FileMode(Flag):
143 NO_STRING_NORMALIZATION = 4
146 def from_configuration(
147 cls, *, py36: bool, pyi: bool, skip_string_normalization: bool
149 mode = cls.AUTO_DETECT
154 if skip_string_normalization:
155 mode |= cls.NO_STRING_NORMALIZATION
164 default=DEFAULT_LINE_LENGTH,
165 help="How many character per line to allow.",
172 "Allow using Python 3.6-only syntax on all input files. This will put "
173 "trailing commas in function signatures and calls also after *args and "
174 "**kwargs. [default: per-file auto-detection]"
181 "Format all input files like typing stubs regardless of file extension "
182 "(useful when piping source on standard input)."
187 "--skip-string-normalization",
189 help="Don't normalize string quotes or prefixes.",
195 "Don't write the files back, just return the status. Return code 0 "
196 "means nothing would change. Return code 1 means some files would be "
197 "reformatted. Return code 123 means there was an internal error."
203 help="Don't write the files back, just output a diff for each file on stdout.",
208 help="If --fast given, skip temporary sanity checks. [default: --safe]",
213 default=DEFAULT_INCLUDES,
215 "A regular expression that matches files and directories that should be "
216 "included on recursive searches. An empty value means all files are "
217 "included regardless of the name. Use forward slashes for directories on "
218 "all platforms (Windows, too). Exclusions are calculated first, inclusions "
226 default=DEFAULT_EXCLUDES,
228 "A regular expression that matches files and directories that should be "
229 "excluded on recursive searches. An empty value means no paths are excluded. "
230 "Use forward slashes for directories on all platforms (Windows, too). "
231 "Exclusions are calculated first, inclusions later."
240 "Don't emit non-error messages to stderr. Errors are still emitted, "
241 "silence those with 2>/dev/null."
249 "Also emit messages to stderr about files that were not changed or were "
250 "ignored due to --exclude=."
253 @click.version_option(version=__version__)
258 exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
270 skip_string_normalization: bool,
277 """The uncompromising code formatter."""
278 write_back = WriteBack.from_configuration(check=check, diff=diff)
279 mode = FileMode.from_configuration(
280 py36=py36, pyi=pyi, skip_string_normalization=skip_string_normalization
282 report = Report(check=check, quiet=quiet, verbose=verbose)
283 sources: Set[Path] = set()
285 include_regex = re.compile(include)
287 err(f"Invalid regular expression for include given: {include!r}")
290 exclude_regex = re.compile(exclude)
292 err(f"Invalid regular expression for exclude given: {exclude!r}")
294 root = find_project_root(src)
299 gen_python_files_in_dir(p, root, include_regex, exclude_regex, report)
301 elif p.is_file() or s == "-":
302 # if a file was explicitly given, we don't care about its extension
305 err(f"invalid path: {s}")
306 if len(sources) == 0:
307 if verbose or not quiet:
308 out("No paths given. Nothing to do 😴")
312 elif len(sources) == 1:
315 line_length=line_length,
317 write_back=write_back,
322 loop = asyncio.get_event_loop()
323 executor = ProcessPoolExecutor(max_workers=os.cpu_count())
325 loop.run_until_complete(
328 line_length=line_length,
330 write_back=write_back,
339 if verbose or not quiet:
340 out("All done! ✨ 🍰 ✨")
341 click.echo(str(report))
342 ctx.exit(report.return_code)
349 write_back: WriteBack,
353 """Reformat a single file under `src` without spawning child processes.
355 If `quiet` is True, non-error messages are not output. `line_length`,
356 `write_back`, `fast` and `pyi` options are passed to
357 :func:`format_file_in_place` or :func:`format_stdin_to_stdout`.
361 if not src.is_file() and str(src) == "-":
362 if format_stdin_to_stdout(
363 line_length=line_length, fast=fast, write_back=write_back, mode=mode
365 changed = Changed.YES
368 if write_back != WriteBack.DIFF:
369 cache = read_cache(line_length, mode)
370 res_src = src.resolve()
371 if res_src in cache and cache[res_src] == get_cache_info(res_src):
372 changed = Changed.CACHED
373 if changed is not Changed.CACHED and format_file_in_place(
375 line_length=line_length,
377 write_back=write_back,
380 changed = Changed.YES
381 if write_back == WriteBack.YES and changed is not Changed.NO:
382 write_cache(cache, [src], line_length, mode)
383 report.done(src, changed)
384 except Exception as exc:
385 report.failed(src, str(exc))
388 async def schedule_formatting(
392 write_back: WriteBack,
398 """Run formatting of `sources` in parallel using the provided `executor`.
400 (Use ProcessPoolExecutors for actual parallelism.)
402 `line_length`, `write_back`, `fast`, and `pyi` options are passed to
403 :func:`format_file_in_place`.
406 if write_back != WriteBack.DIFF:
407 cache = read_cache(line_length, mode)
408 sources, cached = filter_cached(cache, sources)
409 for src in sorted(cached):
410 report.done(src, Changed.CACHED)
415 if write_back == WriteBack.DIFF:
416 # For diff output, we need locks to ensure we don't interleave output
417 # from different processes.
419 lock = manager.Lock()
421 loop.run_in_executor(
423 format_file_in_place,
431 for src in sorted(sources)
433 pending: Iterable[asyncio.Task] = tasks.keys()
435 loop.add_signal_handler(signal.SIGINT, cancel, pending)
436 loop.add_signal_handler(signal.SIGTERM, cancel, pending)
437 except NotImplementedError:
438 # There are no good alternatives for these on Windows
441 done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
443 src = tasks.pop(task)
445 cancelled.append(task)
446 elif task.exception():
447 report.failed(src, str(task.exception()))
449 formatted.append(src)
450 report.done(src, Changed.YES if task.result() else Changed.NO)
452 await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
453 if write_back == WriteBack.YES and formatted:
454 write_cache(cache, formatted, line_length, mode)
457 def format_file_in_place(
461 write_back: WriteBack = WriteBack.NO,
462 mode: FileMode = FileMode.AUTO_DETECT,
463 lock: Any = None, # multiprocessing.Manager().Lock() is some crazy proxy
465 """Format file under `src` path. Return True if changed.
467 If `write_back` is True, write reformatted code back to stdout.
468 `line_length` and `fast` options are passed to :func:`format_file_contents`.
470 if src.suffix == ".pyi":
473 then = datetime.utcfromtimestamp(src.stat().st_mtime)
474 with open(src, "rb") as buf:
475 src_contents, encoding, newline = decode_bytes(buf.read())
477 dst_contents = format_file_contents(
478 src_contents, line_length=line_length, fast=fast, mode=mode
480 except NothingChanged:
483 if write_back == write_back.YES:
484 with open(src, "w", encoding=encoding, newline=newline) as f:
485 f.write(dst_contents)
486 elif write_back == write_back.DIFF:
487 now = datetime.utcnow()
488 src_name = f"{src}\t{then} +0000"
489 dst_name = f"{src}\t{now} +0000"
490 diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
494 f = io.TextIOWrapper(
500 f.write(diff_contents)
508 def format_stdin_to_stdout(
511 write_back: WriteBack = WriteBack.NO,
512 mode: FileMode = FileMode.AUTO_DETECT,
514 """Format file on stdin. Return True if changed.
516 If `write_back` is True, write reformatted code back to stdout.
517 `line_length`, `fast`, `is_pyi`, and `force_py36` arguments are passed to
518 :func:`format_file_contents`.
520 then = datetime.utcnow()
521 src, encoding, newline = decode_bytes(sys.stdin.buffer.read())
524 dst = format_file_contents(src, line_length=line_length, fast=fast, mode=mode)
527 except NothingChanged:
531 f = io.TextIOWrapper(
532 sys.stdout.buffer, encoding=encoding, newline=newline, write_through=True
534 if write_back == WriteBack.YES:
536 elif write_back == WriteBack.DIFF:
537 now = datetime.utcnow()
538 src_name = f"STDIN\t{then} +0000"
539 dst_name = f"STDOUT\t{now} +0000"
540 f.write(diff(src, dst, src_name, dst_name))
544 def format_file_contents(
549 mode: FileMode = FileMode.AUTO_DETECT,
551 """Reformat contents a file and return new contents.
553 If `fast` is False, additionally confirm that the reformatted code is
554 valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
555 `line_length` is passed to :func:`format_str`.
557 if src_contents.strip() == "":
560 dst_contents = format_str(src_contents, line_length=line_length, mode=mode)
561 if src_contents == dst_contents:
565 assert_equivalent(src_contents, dst_contents)
566 assert_stable(src_contents, dst_contents, line_length=line_length, mode=mode)
571 src_contents: str, line_length: int, *, mode: FileMode = FileMode.AUTO_DETECT
573 """Reformat a string and return new contents.
575 `line_length` determines how many characters per line are allowed.
577 src_node = lib2to3_parse(src_contents)
579 future_imports = get_future_imports(src_node)
580 is_pyi = bool(mode & FileMode.PYI)
581 py36 = bool(mode & FileMode.PYTHON36) or is_python36(src_node)
582 normalize_strings = not bool(mode & FileMode.NO_STRING_NORMALIZATION)
583 lines = LineGenerator(
584 remove_u_prefix=py36 or "unicode_literals" in future_imports,
586 normalize_strings=normalize_strings,
588 elt = EmptyLineTracker(is_pyi=is_pyi)
591 for current_line in lines.visit(src_node):
592 for _ in range(after):
593 dst_contents += str(empty_line)
594 before, after = elt.maybe_empty_lines(current_line)
595 for _ in range(before):
596 dst_contents += str(empty_line)
597 for line in split_line(current_line, line_length=line_length, py36=py36):
598 dst_contents += str(line)
602 def decode_bytes(src: bytes) -> Tuple[FileContent, Encoding, NewLine]:
603 """Return a tuple of (decoded_contents, encoding, newline).
605 `newline` is either CRLF or LF but `decoded_contents` is decoded with
606 universal newlines (i.e. only contains LF).
608 srcbuf = io.BytesIO(src)
609 encoding, lines = tokenize.detect_encoding(srcbuf.readline)
611 return "", encoding, "\n"
613 newline = "\r\n" if b"\r\n" == lines[0][-2:] else "\n"
615 with io.TextIOWrapper(srcbuf, encoding) as tiow:
616 return tiow.read(), encoding, newline
620 pygram.python_grammar_no_print_statement_no_exec_statement,
621 pygram.python_grammar_no_print_statement,
622 pygram.python_grammar,
626 def lib2to3_parse(src_txt: str) -> Node:
627 """Given a string with source, return the lib2to3 Node."""
628 grammar = pygram.python_grammar_no_print_statement
629 if src_txt[-1:] != "\n":
631 for grammar in GRAMMARS:
632 drv = driver.Driver(grammar, pytree.convert)
634 result = drv.parse_string(src_txt, True)
637 except ParseError as pe:
638 lineno, column = pe.context[1]
639 lines = src_txt.splitlines()
641 faulty_line = lines[lineno - 1]
643 faulty_line = "<line number missing in source>"
644 exc = ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}")
648 if isinstance(result, Leaf):
649 result = Node(syms.file_input, [result])
653 def lib2to3_unparse(node: Node) -> str:
654 """Given a lib2to3 node, return its string representation."""
662 class Visitor(Generic[T]):
663 """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
665 def visit(self, node: LN) -> Iterator[T]:
666 """Main method to visit `node` and its children.
668 It tries to find a `visit_*()` method for the given `node.type`, like
669 `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
670 If no dedicated `visit_*()` method is found, chooses `visit_default()`
673 Then yields objects of type `T` from the selected visitor.
676 name = token.tok_name[node.type]
678 name = type_repr(node.type)
679 yield from getattr(self, f"visit_{name}", self.visit_default)(node)
681 def visit_default(self, node: LN) -> Iterator[T]:
682 """Default `visit_*()` implementation. Recurses to children of `node`."""
683 if isinstance(node, Node):
684 for child in node.children:
685 yield from self.visit(child)
689 class DebugVisitor(Visitor[T]):
692 def visit_default(self, node: LN) -> Iterator[T]:
693 indent = " " * (2 * self.tree_depth)
694 if isinstance(node, Node):
695 _type = type_repr(node.type)
696 out(f"{indent}{_type}", fg="yellow")
698 for child in node.children:
699 yield from self.visit(child)
702 out(f"{indent}/{_type}", fg="yellow", bold=False)
704 _type = token.tok_name.get(node.type, str(node.type))
705 out(f"{indent}{_type}", fg="blue", nl=False)
707 # We don't have to handle prefixes for `Node` objects since
708 # that delegates to the first child anyway.
709 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
710 out(f" {node.value!r}", fg="blue", bold=False)
713 def show(cls, code: str) -> None:
714 """Pretty-print the lib2to3 AST of a given string of `code`.
716 Convenience method for debugging.
718 v: DebugVisitor[None] = DebugVisitor()
719 list(v.visit(lib2to3_parse(code)))
722 KEYWORDS = set(keyword.kwlist)
723 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
724 FLOW_CONTROL = {"return", "raise", "break", "continue"}
735 STANDALONE_COMMENT = 153
736 LOGIC_OPERATORS = {"and", "or"}
761 STARS = {token.STAR, token.DOUBLESTAR}
764 syms.argument, # double star in arglist
765 syms.trailer, # single argument to call
767 syms.varargslist, # lambdas
769 UNPACKING_PARENTS = {
770 syms.atom, # single element of a list or set literal
774 syms.testlist_star_expr,
809 COMPREHENSION_PRIORITY = 20
811 TERNARY_PRIORITY = 16
814 COMPARATOR_PRIORITY = 10
825 token.DOUBLESLASH: 4,
835 class BracketTracker:
836 """Keeps track of brackets on a line."""
839 bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
840 delimiters: Dict[LeafID, Priority] = Factory(dict)
841 previous: Optional[Leaf] = None
842 _for_loop_variable: int = 0
843 _lambda_arguments: int = 0
845 def mark(self, leaf: Leaf) -> None:
846 """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
848 All leaves receive an int `bracket_depth` field that stores how deep
849 within brackets a given leaf is. 0 means there are no enclosing brackets
850 that started on this line.
852 If a leaf is itself a closing bracket, it receives an `opening_bracket`
853 field that it forms a pair with. This is a one-directional link to
854 avoid reference cycles.
856 If a leaf is a delimiter (a token on which Black can split the line if
857 needed) and it's on depth 0, its `id()` is stored in the tracker's
860 if leaf.type == token.COMMENT:
863 self.maybe_decrement_after_for_loop_variable(leaf)
864 self.maybe_decrement_after_lambda_arguments(leaf)
865 if leaf.type in CLOSING_BRACKETS:
867 opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
868 leaf.opening_bracket = opening_bracket
869 leaf.bracket_depth = self.depth
871 delim = is_split_before_delimiter(leaf, self.previous)
872 if delim and self.previous is not None:
873 self.delimiters[id(self.previous)] = delim
875 delim = is_split_after_delimiter(leaf, self.previous)
877 self.delimiters[id(leaf)] = delim
878 if leaf.type in OPENING_BRACKETS:
879 self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
882 self.maybe_increment_lambda_arguments(leaf)
883 self.maybe_increment_for_loop_variable(leaf)
885 def any_open_brackets(self) -> bool:
886 """Return True if there is an yet unmatched open bracket on the line."""
887 return bool(self.bracket_match)
889 def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> int:
890 """Return the highest priority of a delimiter found on the line.
892 Values are consistent with what `is_split_*_delimiter()` return.
893 Raises ValueError on no delimiters.
895 return max(v for k, v in self.delimiters.items() if k not in exclude)
897 def delimiter_count_with_priority(self, priority: int = 0) -> int:
898 """Return the number of delimiters with the given `priority`.
900 If no `priority` is passed, defaults to max priority on the line.
902 if not self.delimiters:
905 priority = priority or self.max_delimiter_priority()
906 return sum(1 for p in self.delimiters.values() if p == priority)
908 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
909 """In a for loop, or comprehension, the variables are often unpacks.
911 To avoid splitting on the comma in this situation, increase the depth of
912 tokens between `for` and `in`.
914 if leaf.type == token.NAME and leaf.value == "for":
916 self._for_loop_variable += 1
921 def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
922 """See `maybe_increment_for_loop_variable` above for explanation."""
923 if self._for_loop_variable and leaf.type == token.NAME and leaf.value == "in":
925 self._for_loop_variable -= 1
930 def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
931 """In a lambda expression, there might be more than one argument.
933 To avoid splitting on the comma in this situation, increase the depth of
934 tokens between `lambda` and `:`.
936 if leaf.type == token.NAME and leaf.value == "lambda":
938 self._lambda_arguments += 1
943 def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
944 """See `maybe_increment_lambda_arguments` above for explanation."""
945 if self._lambda_arguments and leaf.type == token.COLON:
947 self._lambda_arguments -= 1
952 def get_open_lsqb(self) -> Optional[Leaf]:
953 """Return the most recent opening square bracket (if any)."""
954 return self.bracket_match.get((self.depth - 1, token.RSQB))
959 """Holds leaves and comments. Can be printed with `str(line)`."""
962 leaves: List[Leaf] = Factory(list)
963 comments: List[Tuple[Index, Leaf]] = Factory(list)
964 bracket_tracker: BracketTracker = Factory(BracketTracker)
965 inside_brackets: bool = False
966 should_explode: bool = False
968 def append(self, leaf: Leaf, preformatted: bool = False) -> None:
969 """Add a new `leaf` to the end of the line.
971 Unless `preformatted` is True, the `leaf` will receive a new consistent
972 whitespace prefix and metadata applied by :class:`BracketTracker`.
973 Trailing commas are maybe removed, unpacked for loop variables are
974 demoted from being delimiters.
976 Inline comments are put aside.
978 has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
982 if token.COLON == leaf.type and self.is_class_paren_empty:
984 if self.leaves and not preformatted:
985 # Note: at this point leaf.prefix should be empty except for
986 # imports, for which we only preserve newlines.
987 leaf.prefix += whitespace(
988 leaf, complex_subscript=self.is_complex_subscript(leaf)
990 if self.inside_brackets or not preformatted:
991 self.bracket_tracker.mark(leaf)
992 self.maybe_remove_trailing_comma(leaf)
993 if not self.append_comment(leaf):
994 self.leaves.append(leaf)
996 def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
997 """Like :func:`append()` but disallow invalid standalone comment structure.
999 Raises ValueError when any `leaf` is appended after a standalone comment
1000 or when a standalone comment is not the first leaf on the line.
1002 if self.bracket_tracker.depth == 0:
1004 raise ValueError("cannot append to standalone comments")
1006 if self.leaves and leaf.type == STANDALONE_COMMENT:
1008 "cannot append standalone comments to a populated line"
1011 self.append(leaf, preformatted=preformatted)
1014 def is_comment(self) -> bool:
1015 """Is this line a standalone comment?"""
1016 return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
1019 def is_decorator(self) -> bool:
1020 """Is this line a decorator?"""
1021 return bool(self) and self.leaves[0].type == token.AT
1024 def is_import(self) -> bool:
1025 """Is this an import line?"""
1026 return bool(self) and is_import(self.leaves[0])
1029 def is_class(self) -> bool:
1030 """Is this line a class definition?"""
1033 and self.leaves[0].type == token.NAME
1034 and self.leaves[0].value == "class"
1038 def is_stub_class(self) -> bool:
1039 """Is this line a class definition with a body consisting only of "..."?"""
1040 return self.is_class and self.leaves[-3:] == [
1041 Leaf(token.DOT, ".") for _ in range(3)
1045 def is_def(self) -> bool:
1046 """Is this a function definition? (Also returns True for async defs.)"""
1048 first_leaf = self.leaves[0]
1053 second_leaf: Optional[Leaf] = self.leaves[1]
1056 return (first_leaf.type == token.NAME and first_leaf.value == "def") or (
1057 first_leaf.type == token.ASYNC
1058 and second_leaf is not None
1059 and second_leaf.type == token.NAME
1060 and second_leaf.value == "def"
1064 def is_class_paren_empty(self) -> bool:
1065 """Is this a class with no base classes but using parentheses?
1067 Those are unnecessary and should be removed.
1071 and len(self.leaves) == 4
1073 and self.leaves[2].type == token.LPAR
1074 and self.leaves[2].value == "("
1075 and self.leaves[3].type == token.RPAR
1076 and self.leaves[3].value == ")"
1080 def is_triple_quoted_string(self) -> bool:
1081 """Is the line a triple quoted string?"""
1084 and self.leaves[0].type == token.STRING
1085 and self.leaves[0].value.startswith(('"""', "'''"))
1088 def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
1089 """If so, needs to be split before emitting."""
1090 for leaf in self.leaves:
1091 if leaf.type == STANDALONE_COMMENT:
1092 if leaf.bracket_depth <= depth_limit:
1097 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1098 """Remove trailing comma if there is one and it's safe."""
1101 and self.leaves[-1].type == token.COMMA
1102 and closing.type in CLOSING_BRACKETS
1106 if closing.type == token.RBRACE:
1107 self.remove_trailing_comma()
1110 if closing.type == token.RSQB:
1111 comma = self.leaves[-1]
1112 if comma.parent and comma.parent.type == syms.listmaker:
1113 self.remove_trailing_comma()
1116 # For parens let's check if it's safe to remove the comma.
1117 # Imports are always safe.
1119 self.remove_trailing_comma()
1122 # Otheriwsse, if the trailing one is the only one, we might mistakenly
1123 # change a tuple into a different type by removing the comma.
1124 depth = closing.bracket_depth + 1
1126 opening = closing.opening_bracket
1127 for _opening_index, leaf in enumerate(self.leaves):
1134 for leaf in self.leaves[_opening_index + 1 :]:
1138 bracket_depth = leaf.bracket_depth
1139 if bracket_depth == depth and leaf.type == token.COMMA:
1141 if leaf.parent and leaf.parent.type == syms.arglist:
1146 self.remove_trailing_comma()
1151 def append_comment(self, comment: Leaf) -> bool:
1152 """Add an inline or standalone comment to the line."""
1154 comment.type == STANDALONE_COMMENT
1155 and self.bracket_tracker.any_open_brackets()
1160 if comment.type != token.COMMENT:
1163 after = len(self.leaves) - 1
1165 comment.type = STANDALONE_COMMENT
1170 self.comments.append((after, comment))
1173 def comments_after(self, leaf: Leaf, _index: int = -1) -> Iterator[Leaf]:
1174 """Generate comments that should appear directly after `leaf`.
1176 Provide a non-negative leaf `_index` to speed up the function.
1179 for _index, _leaf in enumerate(self.leaves):
1186 for index, comment_after in self.comments:
1190 def remove_trailing_comma(self) -> None:
1191 """Remove the trailing comma and moves the comments attached to it."""
1192 comma_index = len(self.leaves) - 1
1193 for i in range(len(self.comments)):
1194 comment_index, comment = self.comments[i]
1195 if comment_index == comma_index:
1196 self.comments[i] = (comma_index - 1, comment)
1199 def is_complex_subscript(self, leaf: Leaf) -> bool:
1200 """Return True iff `leaf` is part of a slice with non-trivial exprs."""
1202 leaf if leaf.type == token.LSQB else self.bracket_tracker.get_open_lsqb()
1204 if open_lsqb is None:
1207 subscript_start = open_lsqb.next_sibling
1209 isinstance(subscript_start, Node)
1210 and subscript_start.type == syms.subscriptlist
1212 subscript_start = child_towards(subscript_start, leaf)
1213 return subscript_start is not None and any(
1214 n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
1217 def __str__(self) -> str:
1218 """Render the line."""
1222 indent = " " * self.depth
1223 leaves = iter(self.leaves)
1224 first = next(leaves)
1225 res = f"{first.prefix}{indent}{first.value}"
1228 for _, comment in self.comments:
1232 def __bool__(self) -> bool:
1233 """Return True if the line has leaves or comments."""
1234 return bool(self.leaves or self.comments)
1237 class UnformattedLines(Line):
1238 """Just like :class:`Line` but stores lines which aren't reformatted."""
1240 def append(self, leaf: Leaf, preformatted: bool = True) -> None:
1241 """Just add a new `leaf` to the end of the lines.
1243 The `preformatted` argument is ignored.
1245 Keeps track of indentation `depth`, which is useful when the user
1246 says `# fmt: on`. Otherwise, doesn't do anything with the `leaf`.
1249 list(generate_comments(leaf))
1250 except FormatOn as f_on:
1251 self.leaves.append(f_on.leaf_from_consumed(leaf))
1254 self.leaves.append(leaf)
1255 if leaf.type == token.INDENT:
1257 elif leaf.type == token.DEDENT:
1260 def __str__(self) -> str:
1261 """Render unformatted lines from leaves which were added with `append()`.
1263 `depth` is not used for indentation in this case.
1269 for leaf in self.leaves:
1273 def append_comment(self, comment: Leaf) -> bool:
1274 """Not implemented in this class. Raises `NotImplementedError`."""
1275 raise NotImplementedError("Unformatted lines don't store comments separately.")
1277 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1278 """Does nothing and returns False."""
1281 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
1282 """Does nothing and returns False."""
1287 class EmptyLineTracker:
1288 """Provides a stateful method that returns the number of potential extra
1289 empty lines needed before and after the currently processed line.
1291 Note: this tracker works on lines that haven't been split yet. It assumes
1292 the prefix of the first leaf consists of optional newlines. Those newlines
1293 are consumed by `maybe_empty_lines()` and included in the computation.
1296 is_pyi: bool = False
1297 previous_line: Optional[Line] = None
1298 previous_after: int = 0
1299 previous_defs: List[int] = Factory(list)
1301 def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1302 """Return the number of extra empty lines before and after the `current_line`.
1304 This is for separating `def`, `async def` and `class` with extra empty
1305 lines (two on module-level).
1307 if isinstance(current_line, UnformattedLines):
1310 before, after = self._maybe_empty_lines(current_line)
1311 before -= self.previous_after
1312 self.previous_after = after
1313 self.previous_line = current_line
1314 return before, after
1316 def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1318 if current_line.depth == 0:
1319 max_allowed = 1 if self.is_pyi else 2
1320 if current_line.leaves:
1321 # Consume the first leaf's extra newlines.
1322 first_leaf = current_line.leaves[0]
1323 before = first_leaf.prefix.count("\n")
1324 before = min(before, max_allowed)
1325 first_leaf.prefix = ""
1328 depth = current_line.depth
1329 while self.previous_defs and self.previous_defs[-1] >= depth:
1330 self.previous_defs.pop()
1332 before = 0 if depth else 1
1334 before = 1 if depth else 2
1335 is_decorator = current_line.is_decorator
1336 if is_decorator or current_line.is_def or current_line.is_class:
1337 if not is_decorator:
1338 self.previous_defs.append(depth)
1339 if self.previous_line is None:
1340 # Don't insert empty lines before the first line in the file.
1343 if self.previous_line.is_decorator:
1346 if self.previous_line.depth < current_line.depth and (
1347 self.previous_line.is_class or self.previous_line.is_def
1352 self.previous_line.is_comment
1353 and self.previous_line.depth == current_line.depth
1359 if self.previous_line.depth > current_line.depth:
1361 elif current_line.is_class or self.previous_line.is_class:
1362 if current_line.is_stub_class and self.previous_line.is_stub_class:
1370 if current_line.depth and newlines:
1376 and self.previous_line.is_import
1377 and not current_line.is_import
1378 and depth == self.previous_line.depth
1380 return (before or 1), 0
1384 and self.previous_line.is_class
1385 and current_line.is_triple_quoted_string
1393 class LineGenerator(Visitor[Line]):
1394 """Generates reformatted Line objects. Empty lines are not emitted.
1396 Note: destroys the tree it's visiting by mutating prefixes of its leaves
1397 in ways that will no longer stringify to valid Python code on the tree.
1400 is_pyi: bool = False
1401 normalize_strings: bool = True
1402 current_line: Line = Factory(Line)
1403 remove_u_prefix: bool = False
1405 def line(self, indent: int = 0, type: Type[Line] = Line) -> Iterator[Line]:
1408 If the line is empty, only emit if it makes sense.
1409 If the line is too long, split it first and then generate.
1411 If any lines were generated, set up a new current_line.
1413 if not self.current_line:
1414 if self.current_line.__class__ == type:
1415 self.current_line.depth += indent
1417 self.current_line = type(depth=self.current_line.depth + indent)
1418 return # Line is empty, don't emit. Creating a new one unnecessary.
1420 complete_line = self.current_line
1421 self.current_line = type(depth=complete_line.depth + indent)
1424 def visit(self, node: LN) -> Iterator[Line]:
1425 """Main method to visit `node` and its children.
1427 Yields :class:`Line` objects.
1429 if isinstance(self.current_line, UnformattedLines):
1430 # File contained `# fmt: off`
1431 yield from self.visit_unformatted(node)
1434 yield from super().visit(node)
1436 def visit_default(self, node: LN) -> Iterator[Line]:
1437 """Default `visit_*()` implementation. Recurses to children of `node`."""
1438 if isinstance(node, Leaf):
1439 any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1441 for comment in generate_comments(node):
1442 if any_open_brackets:
1443 # any comment within brackets is subject to splitting
1444 self.current_line.append(comment)
1445 elif comment.type == token.COMMENT:
1446 # regular trailing comment
1447 self.current_line.append(comment)
1448 yield from self.line()
1451 # regular standalone comment
1452 yield from self.line()
1454 self.current_line.append(comment)
1455 yield from self.line()
1457 except FormatOff as f_off:
1458 f_off.trim_prefix(node)
1459 yield from self.line(type=UnformattedLines)
1460 yield from self.visit(node)
1462 except FormatOn as f_on:
1463 # This only happens here if somebody says "fmt: on" multiple
1465 f_on.trim_prefix(node)
1466 yield from self.visit_default(node)
1469 normalize_prefix(node, inside_brackets=any_open_brackets)
1470 if self.normalize_strings and node.type == token.STRING:
1471 normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix)
1472 normalize_string_quotes(node)
1473 if node.type not in WHITESPACE:
1474 self.current_line.append(node)
1475 yield from super().visit_default(node)
1477 def visit_INDENT(self, node: Node) -> Iterator[Line]:
1478 """Increase indentation level, maybe yield a line."""
1479 # In blib2to3 INDENT never holds comments.
1480 yield from self.line(+1)
1481 yield from self.visit_default(node)
1483 def visit_DEDENT(self, node: Node) -> Iterator[Line]:
1484 """Decrease indentation level, maybe yield a line."""
1485 # The current line might still wait for trailing comments. At DEDENT time
1486 # there won't be any (they would be prefixes on the preceding NEWLINE).
1487 # Emit the line then.
1488 yield from self.line()
1490 # While DEDENT has no value, its prefix may contain standalone comments
1491 # that belong to the current indentation level. Get 'em.
1492 yield from self.visit_default(node)
1494 # Finally, emit the dedent.
1495 yield from self.line(-1)
1498 self, node: Node, keywords: Set[str], parens: Set[str]
1499 ) -> Iterator[Line]:
1500 """Visit a statement.
1502 This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1503 `def`, `with`, `class`, `assert` and assignments.
1505 The relevant Python language `keywords` for a given statement will be
1506 NAME leaves within it. This methods puts those on a separate line.
1508 `parens` holds a set of string leaf values immediately after which
1509 invisible parens should be put.
1511 normalize_invisible_parens(node, parens_after=parens)
1512 for child in node.children:
1513 if child.type == token.NAME and child.value in keywords: # type: ignore
1514 yield from self.line()
1516 yield from self.visit(child)
1518 def visit_suite(self, node: Node) -> Iterator[Line]:
1519 """Visit a suite."""
1520 if self.is_pyi and is_stub_suite(node):
1521 yield from self.visit(node.children[2])
1523 yield from self.visit_default(node)
1525 def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1526 """Visit a statement without nested statements."""
1527 is_suite_like = node.parent and node.parent.type in STATEMENT
1529 if self.is_pyi and is_stub_body(node):
1530 yield from self.visit_default(node)
1532 yield from self.line(+1)
1533 yield from self.visit_default(node)
1534 yield from self.line(-1)
1537 if not self.is_pyi or not node.parent or not is_stub_suite(node.parent):
1538 yield from self.line()
1539 yield from self.visit_default(node)
1541 def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1542 """Visit `async def`, `async for`, `async with`."""
1543 yield from self.line()
1545 children = iter(node.children)
1546 for child in children:
1547 yield from self.visit(child)
1549 if child.type == token.ASYNC:
1552 internal_stmt = next(children)
1553 for child in internal_stmt.children:
1554 yield from self.visit(child)
1556 def visit_decorators(self, node: Node) -> Iterator[Line]:
1557 """Visit decorators."""
1558 for child in node.children:
1559 yield from self.line()
1560 yield from self.visit(child)
1562 def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
1563 """Remove a semicolon and put the other statement on a separate line."""
1564 yield from self.line()
1566 def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
1567 """End of file. Process outstanding comments and end with a newline."""
1568 yield from self.visit_default(leaf)
1569 yield from self.line()
1571 def visit_unformatted(self, node: LN) -> Iterator[Line]:
1572 """Used when file contained a `# fmt: off`."""
1573 if isinstance(node, Node):
1574 for child in node.children:
1575 yield from self.visit(child)
1579 self.current_line.append(node)
1580 except FormatOn as f_on:
1581 f_on.trim_prefix(node)
1582 yield from self.line()
1583 yield from self.visit(node)
1585 if node.type == token.ENDMARKER:
1586 # somebody decided not to put a final `# fmt: on`
1587 yield from self.line()
1589 def __attrs_post_init__(self) -> None:
1590 """You are in a twisty little maze of passages."""
1593 self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
1594 self.visit_if_stmt = partial(
1595 v, keywords={"if", "else", "elif"}, parens={"if", "elif"}
1597 self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
1598 self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
1599 self.visit_try_stmt = partial(
1600 v, keywords={"try", "except", "else", "finally"}, parens=Ø
1602 self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
1603 self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
1604 self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
1605 self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
1606 self.visit_expr_stmt = partial(v, keywords=Ø, parens=ASSIGNMENTS)
1607 self.visit_return_stmt = partial(v, keywords={"return"}, parens={"return"})
1608 self.visit_import_from = partial(v, keywords=Ø, parens={"import"})
1609 self.visit_async_funcdef = self.visit_async_stmt
1610 self.visit_decorated = self.visit_decorators
1613 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
1614 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
1615 OPENING_BRACKETS = set(BRACKET.keys())
1616 CLOSING_BRACKETS = set(BRACKET.values())
1617 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
1618 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
1621 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str: # noqa C901
1622 """Return whitespace prefix if needed for the given `leaf`.
1624 `complex_subscript` signals whether the given leaf is part of a subscription
1625 which has non-trivial arguments, like arithmetic expressions or function calls.
1633 if t in ALWAYS_NO_SPACE:
1636 if t == token.COMMENT:
1639 assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
1640 if t == token.COLON and p.type not in {
1647 prev = leaf.prev_sibling
1649 prevp = preceding_leaf(p)
1650 if not prevp or prevp.type in OPENING_BRACKETS:
1653 if t == token.COLON:
1654 if prevp.type == token.COLON:
1657 elif prevp.type != token.COMMA and not complex_subscript:
1662 if prevp.type == token.EQUAL:
1664 if prevp.parent.type in {
1672 elif prevp.parent.type == syms.typedargslist:
1673 # A bit hacky: if the equal sign has whitespace, it means we
1674 # previously found it's a typed argument. So, we're using
1678 elif prevp.type in STARS:
1679 if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1682 elif prevp.type == token.COLON:
1683 if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
1684 return SPACE if complex_subscript else NO
1688 and prevp.parent.type == syms.factor
1689 and prevp.type in MATH_OPERATORS
1694 prevp.type == token.RIGHTSHIFT
1696 and prevp.parent.type == syms.shift_expr
1697 and prevp.prev_sibling
1698 and prevp.prev_sibling.type == token.NAME
1699 and prevp.prev_sibling.value == "print" # type: ignore
1701 # Python 2 print chevron
1704 elif prev.type in OPENING_BRACKETS:
1707 if p.type in {syms.parameters, syms.arglist}:
1708 # untyped function signatures or calls
1709 if not prev or prev.type != token.COMMA:
1712 elif p.type == syms.varargslist:
1714 if prev and prev.type != token.COMMA:
1717 elif p.type == syms.typedargslist:
1718 # typed function signatures
1722 if t == token.EQUAL:
1723 if prev.type != syms.tname:
1726 elif prev.type == token.EQUAL:
1727 # A bit hacky: if the equal sign has whitespace, it means we
1728 # previously found it's a typed argument. So, we're using that, too.
1731 elif prev.type != token.COMMA:
1734 elif p.type == syms.tname:
1737 prevp = preceding_leaf(p)
1738 if not prevp or prevp.type != token.COMMA:
1741 elif p.type == syms.trailer:
1742 # attributes and calls
1743 if t == token.LPAR or t == token.RPAR:
1748 prevp = preceding_leaf(p)
1749 if not prevp or prevp.type != token.NUMBER:
1752 elif t == token.LSQB:
1755 elif prev.type != token.COMMA:
1758 elif p.type == syms.argument:
1760 if t == token.EQUAL:
1764 prevp = preceding_leaf(p)
1765 if not prevp or prevp.type == token.LPAR:
1768 elif prev.type in {token.EQUAL} | STARS:
1771 elif p.type == syms.decorator:
1775 elif p.type == syms.dotted_name:
1779 prevp = preceding_leaf(p)
1780 if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
1783 elif p.type == syms.classdef:
1787 if prev and prev.type == token.LPAR:
1790 elif p.type in {syms.subscript, syms.sliceop}:
1793 assert p.parent is not None, "subscripts are always parented"
1794 if p.parent.type == syms.subscriptlist:
1799 elif not complex_subscript:
1802 elif p.type == syms.atom:
1803 if prev and t == token.DOT:
1804 # dots, but not the first one.
1807 elif p.type == syms.dictsetmaker:
1809 if prev and prev.type == token.DOUBLESTAR:
1812 elif p.type in {syms.factor, syms.star_expr}:
1815 prevp = preceding_leaf(p)
1816 if not prevp or prevp.type in OPENING_BRACKETS:
1819 prevp_parent = prevp.parent
1820 assert prevp_parent is not None
1821 if prevp.type == token.COLON and prevp_parent.type in {
1827 elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1830 elif t == token.NAME or t == token.NUMBER:
1833 elif p.type == syms.import_from:
1835 if prev and prev.type == token.DOT:
1838 elif t == token.NAME:
1842 if prev and prev.type == token.DOT:
1845 elif p.type == syms.sliceop:
1851 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1852 """Return the first leaf that precedes `node`, if any."""
1854 res = node.prev_sibling
1856 if isinstance(res, Leaf):
1860 return list(res.leaves())[-1]
1869 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
1870 """Return the child of `ancestor` that contains `descendant`."""
1871 node: Optional[LN] = descendant
1872 while node and node.parent != ancestor:
1877 def is_split_after_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1878 """Return the priority of the `leaf` delimiter, given a line break after it.
1880 The delimiter priorities returned here are from those delimiters that would
1881 cause a line break after themselves.
1883 Higher numbers are higher priority.
1885 if leaf.type == token.COMMA:
1886 return COMMA_PRIORITY
1891 def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1892 """Return the priority of the `leaf` delimiter, given a line before after it.
1894 The delimiter priorities returned here are from those delimiters that would
1895 cause a line break before themselves.
1897 Higher numbers are higher priority.
1899 if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1900 # * and ** might also be MATH_OPERATORS but in this case they are not.
1901 # Don't treat them as a delimiter.
1905 leaf.type == token.DOT
1907 and leaf.parent.type not in {syms.import_from, syms.dotted_name}
1908 and (previous is None or previous.type in CLOSING_BRACKETS)
1913 leaf.type in MATH_OPERATORS
1915 and leaf.parent.type not in {syms.factor, syms.star_expr}
1917 return MATH_PRIORITIES[leaf.type]
1919 if leaf.type in COMPARATORS:
1920 return COMPARATOR_PRIORITY
1923 leaf.type == token.STRING
1924 and previous is not None
1925 and previous.type == token.STRING
1927 return STRING_PRIORITY
1929 if leaf.type != token.NAME:
1935 and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
1937 return COMPREHENSION_PRIORITY
1942 and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
1944 return COMPREHENSION_PRIORITY
1946 if leaf.value in {"if", "else"} and leaf.parent and leaf.parent.type == syms.test:
1947 return TERNARY_PRIORITY
1949 if leaf.value == "is":
1950 return COMPARATOR_PRIORITY
1955 and leaf.parent.type in {syms.comp_op, syms.comparison}
1957 previous is not None
1958 and previous.type == token.NAME
1959 and previous.value == "not"
1962 return COMPARATOR_PRIORITY
1967 and leaf.parent.type == syms.comp_op
1969 previous is not None
1970 and previous.type == token.NAME
1971 and previous.value == "is"
1974 return COMPARATOR_PRIORITY
1976 if leaf.value in LOGIC_OPERATORS and leaf.parent:
1977 return LOGIC_PRIORITY
1982 def generate_comments(leaf: LN) -> Iterator[Leaf]:
1983 """Clean the prefix of the `leaf` and generate comments from it, if any.
1985 Comments in lib2to3 are shoved into the whitespace prefix. This happens
1986 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
1987 move because it does away with modifying the grammar to include all the
1988 possible places in which comments can be placed.
1990 The sad consequence for us though is that comments don't "belong" anywhere.
1991 This is why this function generates simple parentless Leaf objects for
1992 comments. We simply don't know what the correct parent should be.
1994 No matter though, we can live without this. We really only need to
1995 differentiate between inline and standalone comments. The latter don't
1996 share the line with any code.
1998 Inline comments are emitted as regular token.COMMENT leaves. Standalone
1999 are emitted with a fake STANDALONE_COMMENT token identifier.
2010 for index, line in enumerate(p.split("\n")):
2011 consumed += len(line) + 1 # adding the length of the split '\n'
2012 line = line.lstrip()
2015 if not line.startswith("#"):
2018 if index == 0 and leaf.type != token.ENDMARKER:
2019 comment_type = token.COMMENT # simple trailing comment
2021 comment_type = STANDALONE_COMMENT
2022 comment = make_comment(line)
2023 yield Leaf(comment_type, comment, prefix="\n" * nlines)
2025 if comment in {"# fmt: on", "# yapf: enable"}:
2026 raise FormatOn(consumed)
2028 if comment in {"# fmt: off", "# yapf: disable"}:
2029 if comment_type == STANDALONE_COMMENT:
2030 raise FormatOff(consumed)
2032 prev = preceding_leaf(leaf)
2033 if not prev or prev.type in WHITESPACE: # standalone comment in disguise
2034 raise FormatOff(consumed)
2039 def make_comment(content: str) -> str:
2040 """Return a consistently formatted comment from the given `content` string.
2042 All comments (except for "##", "#!", "#:") should have a single space between
2043 the hash sign and the content.
2045 If `content` didn't start with a hash sign, one is provided.
2047 content = content.rstrip()
2051 if content[0] == "#":
2052 content = content[1:]
2053 if content and content[0] not in " !:#":
2054 content = " " + content
2055 return "#" + content
2059 line: Line, line_length: int, inner: bool = False, py36: bool = False
2060 ) -> Iterator[Line]:
2061 """Split a `line` into potentially many lines.
2063 They should fit in the allotted `line_length` but might not be able to.
2064 `inner` signifies that there were a pair of brackets somewhere around the
2065 current `line`, possibly transitively. This means we can fallback to splitting
2066 by delimiters if the LHS/RHS don't yield any results.
2068 If `py36` is True, splitting may generate syntax that is only compatible
2069 with Python 3.6 and later.
2071 if isinstance(line, UnformattedLines) or line.is_comment:
2075 line_str = str(line).strip("\n")
2076 if not line.should_explode and is_line_short_enough(
2077 line, line_length=line_length, line_str=line_str
2082 split_funcs: List[SplitFunc]
2084 split_funcs = [left_hand_split]
2087 def rhs(line: Line, py36: bool = False) -> Iterator[Line]:
2088 for omit in generate_trailers_to_omit(line, line_length):
2089 lines = list(right_hand_split(line, line_length, py36, omit=omit))
2090 if is_line_short_enough(lines[0], line_length=line_length):
2094 # All splits failed, best effort split with no omits.
2095 # This mostly happens to multiline strings that are by definition
2096 # reported as not fitting a single line.
2097 yield from right_hand_split(line, py36)
2099 if line.inside_brackets:
2100 split_funcs = [delimiter_split, standalone_comment_split, rhs]
2103 for split_func in split_funcs:
2104 # We are accumulating lines in `result` because we might want to abort
2105 # mission and return the original line in the end, or attempt a different
2107 result: List[Line] = []
2109 for l in split_func(line, py36):
2110 if str(l).strip("\n") == line_str:
2111 raise CannotSplit("Split function returned an unchanged result")
2114 split_line(l, line_length=line_length, inner=True, py36=py36)
2116 except CannotSplit as cs:
2127 def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
2128 """Split line into many lines, starting with the first matching bracket pair.
2130 Note: this usually looks weird, only use this for function definitions.
2131 Prefer RHS otherwise. This is why this function is not symmetrical with
2132 :func:`right_hand_split` which also handles optional parentheses.
2134 head = Line(depth=line.depth)
2135 body = Line(depth=line.depth + 1, inside_brackets=True)
2136 tail = Line(depth=line.depth)
2137 tail_leaves: List[Leaf] = []
2138 body_leaves: List[Leaf] = []
2139 head_leaves: List[Leaf] = []
2140 current_leaves = head_leaves
2141 matching_bracket = None
2142 for leaf in line.leaves:
2144 current_leaves is body_leaves
2145 and leaf.type in CLOSING_BRACKETS
2146 and leaf.opening_bracket is matching_bracket
2148 current_leaves = tail_leaves if body_leaves else head_leaves
2149 current_leaves.append(leaf)
2150 if current_leaves is head_leaves:
2151 if leaf.type in OPENING_BRACKETS:
2152 matching_bracket = leaf
2153 current_leaves = body_leaves
2154 # Since body is a new indent level, remove spurious leading whitespace.
2156 normalize_prefix(body_leaves[0], inside_brackets=True)
2157 # Build the new lines.
2158 for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
2160 result.append(leaf, preformatted=True)
2161 for comment_after in line.comments_after(leaf):
2162 result.append(comment_after, preformatted=True)
2163 bracket_split_succeeded_or_raise(head, body, tail)
2164 for result in (head, body, tail):
2169 def right_hand_split(
2170 line: Line, line_length: int, py36: bool = False, omit: Collection[LeafID] = ()
2171 ) -> Iterator[Line]:
2172 """Split line into many lines, starting with the last matching bracket pair.
2174 If the split was by optional parentheses, attempt splitting without them, too.
2175 `omit` is a collection of closing bracket IDs that shouldn't be considered for
2178 Note: running this function modifies `bracket_depth` on the leaves of `line`.
2180 head = Line(depth=line.depth)
2181 body = Line(depth=line.depth + 1, inside_brackets=True)
2182 tail = Line(depth=line.depth)
2183 tail_leaves: List[Leaf] = []
2184 body_leaves: List[Leaf] = []
2185 head_leaves: List[Leaf] = []
2186 current_leaves = tail_leaves
2187 opening_bracket = None
2188 closing_bracket = None
2189 for leaf in reversed(line.leaves):
2190 if current_leaves is body_leaves:
2191 if leaf is opening_bracket:
2192 current_leaves = head_leaves if body_leaves else tail_leaves
2193 current_leaves.append(leaf)
2194 if current_leaves is tail_leaves:
2195 if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
2196 opening_bracket = leaf.opening_bracket
2197 closing_bracket = leaf
2198 current_leaves = body_leaves
2199 tail_leaves.reverse()
2200 body_leaves.reverse()
2201 head_leaves.reverse()
2202 # Since body is a new indent level, remove spurious leading whitespace.
2204 normalize_prefix(body_leaves[0], inside_brackets=True)
2206 # No `head` means the split failed. Either `tail` has all content or
2207 # the matching `opening_bracket` wasn't available on `line` anymore.
2208 raise CannotSplit("No brackets found")
2210 # Build the new lines.
2211 for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
2213 result.append(leaf, preformatted=True)
2214 for comment_after in line.comments_after(leaf):
2215 result.append(comment_after, preformatted=True)
2216 bracket_split_succeeded_or_raise(head, body, tail)
2217 assert opening_bracket and closing_bracket
2219 # the opening bracket is an optional paren
2220 opening_bracket.type == token.LPAR
2221 and not opening_bracket.value
2222 # the closing bracket is an optional paren
2223 and closing_bracket.type == token.RPAR
2224 and not closing_bracket.value
2225 # there are no standalone comments in the body
2226 and not line.contains_standalone_comments(0)
2227 # and it's not an import (optional parens are the only thing we can split
2228 # on in this case; attempting a split without them is a waste of time)
2229 and not line.is_import
2231 omit = {id(closing_bracket), *omit}
2232 if can_omit_invisible_parens(body, line_length):
2234 yield from right_hand_split(line, line_length, py36=py36, omit=omit)
2239 ensure_visible(opening_bracket)
2240 ensure_visible(closing_bracket)
2241 body.should_explode = should_explode(body, opening_bracket)
2242 for result in (head, body, tail):
2247 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
2248 """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
2250 Do nothing otherwise.
2252 A left- or right-hand split is based on a pair of brackets. Content before
2253 (and including) the opening bracket is left on one line, content inside the
2254 brackets is put on a separate line, and finally content starting with and
2255 following the closing bracket is put on a separate line.
2257 Those are called `head`, `body`, and `tail`, respectively. If the split
2258 produced the same line (all content in `head`) or ended up with an empty `body`
2259 and the `tail` is just the closing bracket, then it's considered failed.
2261 tail_len = len(str(tail).strip())
2264 raise CannotSplit("Splitting brackets produced the same line")
2268 f"Splitting brackets on an empty body to save "
2269 f"{tail_len} characters is not worth it"
2273 def dont_increase_indentation(split_func: SplitFunc) -> SplitFunc:
2274 """Normalize prefix of the first leaf in every line returned by `split_func`.
2276 This is a decorator over relevant split functions.
2280 def split_wrapper(line: Line, py36: bool = False) -> Iterator[Line]:
2281 for l in split_func(line, py36):
2282 normalize_prefix(l.leaves[0], inside_brackets=True)
2285 return split_wrapper
2288 @dont_increase_indentation
2289 def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]:
2290 """Split according to delimiters of the highest priority.
2292 If `py36` is True, the split will add trailing commas also in function
2293 signatures that contain `*` and `**`.
2296 last_leaf = line.leaves[-1]
2298 raise CannotSplit("Line empty")
2300 bt = line.bracket_tracker
2302 delimiter_priority = bt.max_delimiter_priority(exclude={id(last_leaf)})
2304 raise CannotSplit("No delimiters found")
2306 if delimiter_priority == DOT_PRIORITY:
2307 if bt.delimiter_count_with_priority(delimiter_priority) == 1:
2308 raise CannotSplit("Splitting a single attribute from its owner looks wrong")
2310 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2311 lowest_depth = sys.maxsize
2312 trailing_comma_safe = True
2314 def append_to_line(leaf: Leaf) -> Iterator[Line]:
2315 """Append `leaf` to current line or to new line if appending impossible."""
2316 nonlocal current_line
2318 current_line.append_safe(leaf, preformatted=True)
2319 except ValueError as ve:
2322 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2323 current_line.append(leaf)
2325 for index, leaf in enumerate(line.leaves):
2326 yield from append_to_line(leaf)
2328 for comment_after in line.comments_after(leaf, index):
2329 yield from append_to_line(comment_after)
2331 lowest_depth = min(lowest_depth, leaf.bracket_depth)
2332 if leaf.bracket_depth == lowest_depth and is_vararg(
2333 leaf, within=VARARGS_PARENTS
2335 trailing_comma_safe = trailing_comma_safe and py36
2336 leaf_priority = bt.delimiters.get(id(leaf))
2337 if leaf_priority == delimiter_priority:
2340 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2344 and delimiter_priority == COMMA_PRIORITY
2345 and current_line.leaves[-1].type != token.COMMA
2346 and current_line.leaves[-1].type != STANDALONE_COMMENT
2348 current_line.append(Leaf(token.COMMA, ","))
2352 @dont_increase_indentation
2353 def standalone_comment_split(line: Line, py36: bool = False) -> Iterator[Line]:
2354 """Split standalone comments from the rest of the line."""
2355 if not line.contains_standalone_comments(0):
2356 raise CannotSplit("Line does not have any standalone comments")
2358 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2360 def append_to_line(leaf: Leaf) -> Iterator[Line]:
2361 """Append `leaf` to current line or to new line if appending impossible."""
2362 nonlocal current_line
2364 current_line.append_safe(leaf, preformatted=True)
2365 except ValueError as ve:
2368 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2369 current_line.append(leaf)
2371 for index, leaf in enumerate(line.leaves):
2372 yield from append_to_line(leaf)
2374 for comment_after in line.comments_after(leaf, index):
2375 yield from append_to_line(comment_after)
2381 def is_import(leaf: Leaf) -> bool:
2382 """Return True if the given leaf starts an import statement."""
2389 (v == "import" and p and p.type == syms.import_name)
2390 or (v == "from" and p and p.type == syms.import_from)
2395 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
2396 """Leave existing extra newlines if not `inside_brackets`. Remove everything
2399 Note: don't use backslashes for formatting or you'll lose your voting rights.
2401 if not inside_brackets:
2402 spl = leaf.prefix.split("#")
2403 if "\\" not in spl[0]:
2404 nl_count = spl[-1].count("\n")
2407 leaf.prefix = "\n" * nl_count
2413 def normalize_string_prefix(leaf: Leaf, remove_u_prefix: bool = False) -> None:
2414 """Make all string prefixes lowercase.
2416 If remove_u_prefix is given, also removes any u prefix from the string.
2418 Note: Mutates its argument.
2420 match = re.match(r"^([furbFURB]*)(.*)$", leaf.value, re.DOTALL)
2421 assert match is not None, f"failed to match string {leaf.value!r}"
2422 orig_prefix = match.group(1)
2423 new_prefix = orig_prefix.lower()
2425 new_prefix = new_prefix.replace("u", "")
2426 leaf.value = f"{new_prefix}{match.group(2)}"
2429 def normalize_string_quotes(leaf: Leaf) -> None:
2430 """Prefer double quotes but only if it doesn't cause more escaping.
2432 Adds or removes backslashes as appropriate. Doesn't parse and fix
2433 strings nested in f-strings (yet).
2435 Note: Mutates its argument.
2437 value = leaf.value.lstrip("furbFURB")
2438 if value[:3] == '"""':
2441 elif value[:3] == "'''":
2444 elif value[0] == '"':
2450 first_quote_pos = leaf.value.find(orig_quote)
2451 if first_quote_pos == -1:
2452 return # There's an internal error
2454 prefix = leaf.value[:first_quote_pos]
2455 unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
2456 escaped_new_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{new_quote}")
2457 escaped_orig_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{orig_quote}")
2458 body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
2459 if "r" in prefix.casefold():
2460 if unescaped_new_quote.search(body):
2461 # There's at least one unescaped new_quote in this raw string
2462 # so converting is impossible
2465 # Do not introduce or remove backslashes in raw strings
2468 # remove unnecessary quotes
2469 new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
2470 if body != new_body:
2471 # Consider the string without unnecessary quotes as the original
2473 leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
2474 new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
2475 new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
2476 if new_quote == '"""' and new_body[-1] == '"':
2478 new_body = new_body[:-1] + '\\"'
2479 orig_escape_count = body.count("\\")
2480 new_escape_count = new_body.count("\\")
2481 if new_escape_count > orig_escape_count:
2482 return # Do not introduce more escaping
2484 if new_escape_count == orig_escape_count and orig_quote == '"':
2485 return # Prefer double quotes
2487 leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
2490 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
2491 """Make existing optional parentheses invisible or create new ones.
2493 `parens_after` is a set of string leaf values immeditely after which parens
2496 Standardizes on visible parentheses for single-element tuples, and keeps
2497 existing visible parentheses for other tuples and generator expressions.
2500 list(generate_comments(node))
2502 return # This `node` has a prefix with `# fmt: off`, don't mess with parens.
2505 for index, child in enumerate(list(node.children)):
2507 if child.type == syms.atom:
2508 maybe_make_parens_invisible_in_atom(child)
2509 elif is_one_tuple(child):
2510 # wrap child in visible parentheses
2511 lpar = Leaf(token.LPAR, "(")
2512 rpar = Leaf(token.RPAR, ")")
2514 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2515 elif node.type == syms.import_from:
2516 # "import from" nodes store parentheses directly as part of
2518 if child.type == token.LPAR:
2519 # make parentheses invisible
2520 child.value = "" # type: ignore
2521 node.children[-1].value = "" # type: ignore
2522 elif child.type != token.STAR:
2523 # insert invisible parentheses
2524 node.insert_child(index, Leaf(token.LPAR, ""))
2525 node.append_child(Leaf(token.RPAR, ""))
2528 elif not (isinstance(child, Leaf) and is_multiline_string(child)):
2529 # wrap child in invisible parentheses
2530 lpar = Leaf(token.LPAR, "")
2531 rpar = Leaf(token.RPAR, "")
2532 index = child.remove() or 0
2533 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2535 check_lpar = isinstance(child, Leaf) and child.value in parens_after
2538 def maybe_make_parens_invisible_in_atom(node: LN) -> bool:
2539 """If it's safe, make the parens in the atom `node` invisible, recusively."""
2541 node.type != syms.atom
2542 or is_empty_tuple(node)
2543 or is_one_tuple(node)
2545 or max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
2549 first = node.children[0]
2550 last = node.children[-1]
2551 if first.type == token.LPAR and last.type == token.RPAR:
2552 # make parentheses invisible
2553 first.value = "" # type: ignore
2554 last.value = "" # type: ignore
2555 if len(node.children) > 1:
2556 maybe_make_parens_invisible_in_atom(node.children[1])
2562 def is_empty_tuple(node: LN) -> bool:
2563 """Return True if `node` holds an empty tuple."""
2565 node.type == syms.atom
2566 and len(node.children) == 2
2567 and node.children[0].type == token.LPAR
2568 and node.children[1].type == token.RPAR
2572 def is_one_tuple(node: LN) -> bool:
2573 """Return True if `node` holds a tuple with one element, with or without parens."""
2574 if node.type == syms.atom:
2575 if len(node.children) != 3:
2578 lpar, gexp, rpar = node.children
2580 lpar.type == token.LPAR
2581 and gexp.type == syms.testlist_gexp
2582 and rpar.type == token.RPAR
2586 return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
2589 node.type in IMPLICIT_TUPLE
2590 and len(node.children) == 2
2591 and node.children[1].type == token.COMMA
2595 def is_yield(node: LN) -> bool:
2596 """Return True if `node` holds a `yield` or `yield from` expression."""
2597 if node.type == syms.yield_expr:
2600 if node.type == token.NAME and node.value == "yield": # type: ignore
2603 if node.type != syms.atom:
2606 if len(node.children) != 3:
2609 lpar, expr, rpar = node.children
2610 if lpar.type == token.LPAR and rpar.type == token.RPAR:
2611 return is_yield(expr)
2616 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
2617 """Return True if `leaf` is a star or double star in a vararg or kwarg.
2619 If `within` includes VARARGS_PARENTS, this applies to function signatures.
2620 If `within` includes UNPACKING_PARENTS, it applies to right hand-side
2621 extended iterable unpacking (PEP 3132) and additional unpacking
2622 generalizations (PEP 448).
2624 if leaf.type not in STARS or not leaf.parent:
2628 if p.type == syms.star_expr:
2629 # Star expressions are also used as assignment targets in extended
2630 # iterable unpacking (PEP 3132). See what its parent is instead.
2636 return p.type in within
2639 def is_multiline_string(leaf: Leaf) -> bool:
2640 """Return True if `leaf` is a multiline string that actually spans many lines."""
2641 value = leaf.value.lstrip("furbFURB")
2642 return value[:3] in {'"""', "'''"} and "\n" in value
2645 def is_stub_suite(node: Node) -> bool:
2646 """Return True if `node` is a suite with a stub body."""
2648 len(node.children) != 4
2649 or node.children[0].type != token.NEWLINE
2650 or node.children[1].type != token.INDENT
2651 or node.children[3].type != token.DEDENT
2655 return is_stub_body(node.children[2])
2658 def is_stub_body(node: LN) -> bool:
2659 """Return True if `node` is a simple statement containing an ellipsis."""
2660 if not isinstance(node, Node) or node.type != syms.simple_stmt:
2663 if len(node.children) != 2:
2666 child = node.children[0]
2668 child.type == syms.atom
2669 and len(child.children) == 3
2670 and all(leaf == Leaf(token.DOT, ".") for leaf in child.children)
2674 def max_delimiter_priority_in_atom(node: LN) -> int:
2675 """Return maximum delimiter priority inside `node`.
2677 This is specific to atoms with contents contained in a pair of parentheses.
2678 If `node` isn't an atom or there are no enclosing parentheses, returns 0.
2680 if node.type != syms.atom:
2683 first = node.children[0]
2684 last = node.children[-1]
2685 if not (first.type == token.LPAR and last.type == token.RPAR):
2688 bt = BracketTracker()
2689 for c in node.children[1:-1]:
2690 if isinstance(c, Leaf):
2693 for leaf in c.leaves():
2696 return bt.max_delimiter_priority()
2702 def ensure_visible(leaf: Leaf) -> None:
2703 """Make sure parentheses are visible.
2705 They could be invisible as part of some statements (see
2706 :func:`normalize_invible_parens` and :func:`visit_import_from`).
2708 if leaf.type == token.LPAR:
2710 elif leaf.type == token.RPAR:
2714 def should_explode(line: Line, opening_bracket: Leaf) -> bool:
2715 """Should `line` immediately be split with `delimiter_split()` after RHS?"""
2717 opening_bracket.parent
2718 and opening_bracket.parent.type in {syms.atom, syms.import_from}
2719 and opening_bracket.value in "[{("
2724 last_leaf = line.leaves[-1]
2725 exclude = {id(last_leaf)} if last_leaf.type == token.COMMA else set()
2726 max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude)
2727 except (IndexError, ValueError):
2730 return max_priority == COMMA_PRIORITY
2733 def is_python36(node: Node) -> bool:
2734 """Return True if the current file is using Python 3.6+ features.
2736 Currently looking for:
2738 - trailing commas after * or ** in function signatures and calls.
2740 for n in node.pre_order():
2741 if n.type == token.STRING:
2742 value_head = n.value[:2] # type: ignore
2743 if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
2747 n.type in {syms.typedargslist, syms.arglist}
2749 and n.children[-1].type == token.COMMA
2751 for ch in n.children:
2752 if ch.type in STARS:
2755 if ch.type == syms.argument:
2756 for argch in ch.children:
2757 if argch.type in STARS:
2763 def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[LeafID]]:
2764 """Generate sets of closing bracket IDs that should be omitted in a RHS.
2766 Brackets can be omitted if the entire trailer up to and including
2767 a preceding closing bracket fits in one line.
2769 Yielded sets are cumulative (contain results of previous yields, too). First
2773 omit: Set[LeafID] = set()
2776 length = 4 * line.depth
2777 opening_bracket = None
2778 closing_bracket = None
2779 optional_brackets: Set[LeafID] = set()
2780 inner_brackets: Set[LeafID] = set()
2781 for index, leaf, leaf_length in enumerate_with_length(line, reversed=True):
2782 length += leaf_length
2783 if length > line_length:
2786 has_inline_comment = leaf_length > len(leaf.value) + len(leaf.prefix)
2787 if leaf.type == STANDALONE_COMMENT or has_inline_comment:
2790 optional_brackets.discard(id(leaf))
2792 if leaf is opening_bracket:
2793 opening_bracket = None
2794 elif leaf.type in CLOSING_BRACKETS:
2795 inner_brackets.add(id(leaf))
2796 elif leaf.type in CLOSING_BRACKETS:
2798 optional_brackets.add(id(opening_bracket))
2801 if index > 0 and line.leaves[index - 1].type in OPENING_BRACKETS:
2802 # Empty brackets would fail a split so treat them as "inner"
2803 # brackets (e.g. only add them to the `omit` set if another
2804 # pair of brackets was good enough.
2805 inner_brackets.add(id(leaf))
2808 opening_bracket = leaf.opening_bracket
2810 omit.add(id(closing_bracket))
2811 omit.update(inner_brackets)
2812 inner_brackets.clear()
2814 closing_bracket = leaf
2817 def get_future_imports(node: Node) -> Set[str]:
2818 """Return a set of __future__ imports in the file."""
2820 for child in node.children:
2821 if child.type != syms.simple_stmt:
2823 first_child = child.children[0]
2824 if isinstance(first_child, Leaf):
2825 # Continue looking if we see a docstring; otherwise stop.
2827 len(child.children) == 2
2828 and first_child.type == token.STRING
2829 and child.children[1].type == token.NEWLINE
2834 elif first_child.type == syms.import_from:
2835 module_name = first_child.children[1]
2836 if not isinstance(module_name, Leaf) or module_name.value != "__future__":
2838 for import_from_child in first_child.children[3:]:
2839 if isinstance(import_from_child, Leaf):
2840 if import_from_child.type == token.NAME:
2841 imports.add(import_from_child.value)
2843 assert import_from_child.type == syms.import_as_names
2844 for leaf in import_from_child.children:
2845 if isinstance(leaf, Leaf) and leaf.type == token.NAME:
2846 imports.add(leaf.value)
2852 def gen_python_files_in_dir(
2855 include: Pattern[str],
2856 exclude: Pattern[str],
2858 ) -> Iterator[Path]:
2859 """Generate all files under `path` whose paths are not excluded by the
2860 `exclude` regex, but are included by the `include` regex.
2862 `report` is where output about exclusions goes.
2864 assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
2865 for child in path.iterdir():
2866 normalized_path = "/" + child.resolve().relative_to(root).as_posix()
2868 normalized_path += "/"
2869 exclude_match = exclude.search(normalized_path)
2870 if exclude_match and exclude_match.group(0):
2871 report.path_ignored(child, f"matches --exclude={exclude.pattern}")
2875 yield from gen_python_files_in_dir(child, root, include, exclude, report)
2877 elif child.is_file():
2878 include_match = include.search(normalized_path)
2883 def find_project_root(srcs: List[str]) -> Path:
2884 """Return a directory containing .git, .hg, or pyproject.toml.
2886 That directory can be one of the directories passed in `srcs` or their
2889 If no directory in the tree contains a marker that would specify it's the
2890 project root, the root of the file system is returned.
2893 return Path("/").resolve()
2895 common_base = min(Path(src).resolve() for src in srcs)
2896 if common_base.is_dir():
2897 # Append a fake file so `parents` below returns `common_base_dir`, too.
2898 common_base /= "fake-file"
2899 for directory in common_base.parents:
2900 if (directory / ".git").is_dir():
2903 if (directory / ".hg").is_dir():
2906 if (directory / "pyproject.toml").is_file():
2914 """Provides a reformatting counter. Can be rendered with `str(report)`."""
2918 verbose: bool = False
2919 change_count: int = 0
2921 failure_count: int = 0
2923 def done(self, src: Path, changed: Changed) -> None:
2924 """Increment the counter for successful reformatting. Write out a message."""
2925 if changed is Changed.YES:
2926 reformatted = "would reformat" if self.check else "reformatted"
2927 if self.verbose or not self.quiet:
2928 out(f"{reformatted} {src}")
2929 self.change_count += 1
2932 if changed is Changed.NO:
2933 msg = f"{src} already well formatted, good job."
2935 msg = f"{src} wasn't modified on disk since last run."
2936 out(msg, bold=False)
2937 self.same_count += 1
2939 def failed(self, src: Path, message: str) -> None:
2940 """Increment the counter for failed reformatting. Write out a message."""
2941 err(f"error: cannot format {src}: {message}")
2942 self.failure_count += 1
2944 def path_ignored(self, path: Path, message: str) -> None:
2946 out(f"{path} ignored: {message}", bold=False)
2949 def return_code(self) -> int:
2950 """Return the exit code that the app should use.
2952 This considers the current state of changed files and failures:
2953 - if there were any failures, return 123;
2954 - if any files were changed and --check is being used, return 1;
2955 - otherwise return 0.
2957 # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
2958 # 126 we have special returncodes reserved by the shell.
2959 if self.failure_count:
2962 elif self.change_count and self.check:
2967 def __str__(self) -> str:
2968 """Render a color report of the current state.
2970 Use `click.unstyle` to remove colors.
2973 reformatted = "would be reformatted"
2974 unchanged = "would be left unchanged"
2975 failed = "would fail to reformat"
2977 reformatted = "reformatted"
2978 unchanged = "left unchanged"
2979 failed = "failed to reformat"
2981 if self.change_count:
2982 s = "s" if self.change_count > 1 else ""
2984 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
2987 s = "s" if self.same_count > 1 else ""
2988 report.append(f"{self.same_count} file{s} {unchanged}")
2989 if self.failure_count:
2990 s = "s" if self.failure_count > 1 else ""
2992 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
2994 return ", ".join(report) + "."
2997 def assert_equivalent(src: str, dst: str) -> None:
2998 """Raise AssertionError if `src` and `dst` aren't equivalent."""
3003 def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
3004 """Simple visitor generating strings to compare ASTs by content."""
3005 yield f"{' ' * depth}{node.__class__.__name__}("
3007 for field in sorted(node._fields):
3009 value = getattr(node, field)
3010 except AttributeError:
3013 yield f"{' ' * (depth+1)}{field}="
3015 if isinstance(value, list):
3017 if isinstance(item, ast.AST):
3018 yield from _v(item, depth + 2)
3020 elif isinstance(value, ast.AST):
3021 yield from _v(value, depth + 2)
3024 yield f"{' ' * (depth+2)}{value!r}, # {value.__class__.__name__}"
3026 yield f"{' ' * depth}) # /{node.__class__.__name__}"
3029 src_ast = ast.parse(src)
3030 except Exception as exc:
3031 major, minor = sys.version_info[:2]
3032 raise AssertionError(
3033 f"cannot use --safe with this file; failed to parse source file "
3034 f"with Python {major}.{minor}'s builtin AST. Re-run with --fast "
3035 f"or stop using deprecated Python 2 syntax. AST error message: {exc}"
3039 dst_ast = ast.parse(dst)
3040 except Exception as exc:
3041 log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
3042 raise AssertionError(
3043 f"INTERNAL ERROR: Black produced invalid code: {exc}. "
3044 f"Please report a bug on https://github.com/ambv/black/issues. "
3045 f"This invalid output might be helpful: {log}"
3048 src_ast_str = "\n".join(_v(src_ast))
3049 dst_ast_str = "\n".join(_v(dst_ast))
3050 if src_ast_str != dst_ast_str:
3051 log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
3052 raise AssertionError(
3053 f"INTERNAL ERROR: Black produced code that is not equivalent to "
3055 f"Please report a bug on https://github.com/ambv/black/issues. "
3056 f"This diff might be helpful: {log}"
3061 src: str, dst: str, line_length: int, mode: FileMode = FileMode.AUTO_DETECT
3063 """Raise AssertionError if `dst` reformats differently the second time."""
3064 newdst = format_str(dst, line_length=line_length, mode=mode)
3067 diff(src, dst, "source", "first pass"),
3068 diff(dst, newdst, "first pass", "second pass"),
3070 raise AssertionError(
3071 f"INTERNAL ERROR: Black produced different code on the second pass "
3072 f"of the formatter. "
3073 f"Please report a bug on https://github.com/ambv/black/issues. "
3074 f"This diff might be helpful: {log}"
3078 def dump_to_file(*output: str) -> str:
3079 """Dump `output` to a temporary file. Return path to the file."""
3082 with tempfile.NamedTemporaryFile(
3083 mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
3085 for lines in output:
3087 if lines and lines[-1] != "\n":
3092 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
3093 """Return a unified diff string between strings `a` and `b`."""
3096 a_lines = [line + "\n" for line in a.split("\n")]
3097 b_lines = [line + "\n" for line in b.split("\n")]
3099 difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
3103 def cancel(tasks: Iterable[asyncio.Task]) -> None:
3104 """asyncio signal handler that cancels all `tasks` and reports to stderr."""
3110 def shutdown(loop: BaseEventLoop) -> None:
3111 """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
3113 # This part is borrowed from asyncio/runners.py in Python 3.7b2.
3114 to_cancel = [task for task in asyncio.Task.all_tasks(loop) if not task.done()]
3118 for task in to_cancel:
3120 loop.run_until_complete(
3121 asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
3124 # `concurrent.futures.Future` objects cannot be cancelled once they
3125 # are already running. There might be some when the `shutdown()` happened.
3126 # Silence their logger's spew about the event loop being closed.
3127 cf_logger = logging.getLogger("concurrent.futures")
3128 cf_logger.setLevel(logging.CRITICAL)
3132 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
3133 """Replace `regex` with `replacement` twice on `original`.
3135 This is used by string normalization to perform replaces on
3136 overlapping matches.
3138 return regex.sub(replacement, regex.sub(replacement, original))
3141 def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]:
3142 """Like `reversed(enumerate(sequence))` if that were possible."""
3143 index = len(sequence) - 1
3144 for element in reversed(sequence):
3145 yield (index, element)
3149 def enumerate_with_length(
3150 line: Line, reversed: bool = False
3151 ) -> Iterator[Tuple[Index, Leaf, int]]:
3152 """Return an enumeration of leaves with their length.
3154 Stops prematurely on multiline strings and standalone comments.
3157 Callable[[Sequence[Leaf]], Iterator[Tuple[Index, Leaf]]],
3158 enumerate_reversed if reversed else enumerate,
3160 for index, leaf in op(line.leaves):
3161 length = len(leaf.prefix) + len(leaf.value)
3162 if "\n" in leaf.value:
3163 return # Multiline strings, we can't continue.
3165 comment: Optional[Leaf]
3166 for comment in line.comments_after(leaf, index):
3167 length += len(comment.value)
3169 yield index, leaf, length
3172 def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
3173 """Return True if `line` is no longer than `line_length`.
3175 Uses the provided `line_str` rendering, if any, otherwise computes a new one.
3178 line_str = str(line).strip("\n")
3180 len(line_str) <= line_length
3181 and "\n" not in line_str # multiline strings
3182 and not line.contains_standalone_comments()
3186 def can_omit_invisible_parens(line: Line, line_length: int) -> bool:
3187 """Does `line` have a shape safe to reformat without optional parens around it?
3189 Returns True for only a subset of potentially nice looking formattings but
3190 the point is to not return false positives that end up producing lines that
3193 bt = line.bracket_tracker
3194 if not bt.delimiters:
3195 # Without delimiters the optional parentheses are useless.
3198 max_priority = bt.max_delimiter_priority()
3199 if bt.delimiter_count_with_priority(max_priority) > 1:
3200 # With more than one delimiter of a kind the optional parentheses read better.
3203 if max_priority == DOT_PRIORITY:
3204 # A single stranded method call doesn't require optional parentheses.
3207 assert len(line.leaves) >= 2, "Stranded delimiter"
3209 first = line.leaves[0]
3210 second = line.leaves[1]
3211 penultimate = line.leaves[-2]
3212 last = line.leaves[-1]
3214 # With a single delimiter, omit if the expression starts or ends with
3216 if first.type in OPENING_BRACKETS and second.type not in CLOSING_BRACKETS:
3218 length = 4 * line.depth
3219 for _index, leaf, leaf_length in enumerate_with_length(line):
3220 if leaf.type in CLOSING_BRACKETS and leaf.opening_bracket is first:
3223 length += leaf_length
3224 if length > line_length:
3227 if leaf.type in OPENING_BRACKETS:
3228 # There are brackets we can further split on.
3232 # checked the entire string and line length wasn't exceeded
3233 if len(line.leaves) == _index + 1:
3236 # Note: we are not returning False here because a line might have *both*
3237 # a leading opening bracket and a trailing closing bracket. If the
3238 # opening bracket doesn't match our rule, maybe the closing will.
3241 last.type == token.RPAR
3242 or last.type == token.RBRACE
3244 # don't use indexing for omitting optional parentheses;
3246 last.type == token.RSQB
3248 and last.parent.type != syms.trailer
3251 if penultimate.type in OPENING_BRACKETS:
3252 # Empty brackets don't help.
3255 if is_multiline_string(first):
3256 # Additional wrapping of a multiline string in this situation is
3260 length = 4 * line.depth
3261 seen_other_brackets = False
3262 for _index, leaf, leaf_length in enumerate_with_length(line):
3263 length += leaf_length
3264 if leaf is last.opening_bracket:
3265 if seen_other_brackets or length <= line_length:
3268 elif leaf.type in OPENING_BRACKETS:
3269 # There are brackets we can further split on.
3270 seen_other_brackets = True
3275 def get_cache_file(line_length: int, mode: FileMode) -> Path:
3276 pyi = bool(mode & FileMode.PYI)
3277 py36 = bool(mode & FileMode.PYTHON36)
3280 / f"cache.{line_length}{'.pyi' if pyi else ''}{'.py36' if py36 else ''}.pickle"
3284 def read_cache(line_length: int, mode: FileMode) -> Cache:
3285 """Read the cache if it exists and is well formed.
3287 If it is not well formed, the call to write_cache later should resolve the issue.
3289 cache_file = get_cache_file(line_length, mode)
3290 if not cache_file.exists():
3293 with cache_file.open("rb") as fobj:
3295 cache: Cache = pickle.load(fobj)
3296 except pickle.UnpicklingError:
3302 def get_cache_info(path: Path) -> CacheInfo:
3303 """Return the information used to check if a file is already formatted or not."""
3305 return stat.st_mtime, stat.st_size
3308 def filter_cached(cache: Cache, sources: Iterable[Path]) -> Tuple[Set[Path], Set[Path]]:
3309 """Split an iterable of paths in `sources` into two sets.
3311 The first contains paths of files that modified on disk or are not in the
3312 cache. The other contains paths to non-modified files.
3314 todo, done = set(), set()
3317 if cache.get(src) != get_cache_info(src):
3325 cache: Cache, sources: Iterable[Path], line_length: int, mode: FileMode
3327 """Update the cache file."""
3328 cache_file = get_cache_file(line_length, mode)
3330 if not CACHE_DIR.exists():
3331 CACHE_DIR.mkdir(parents=True)
3332 new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
3333 with cache_file.open("wb") as fobj:
3334 pickle.dump(new_cache, fobj, protocol=pickle.HIGHEST_PROTOCOL)
3339 if __name__ == "__main__":