All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
3 from asyncio.base_events import BaseEventLoop
4 from concurrent.futures import Executor, ProcessPoolExecutor
5 from enum import Enum, Flag
6 from functools import partial, wraps
10 from multiprocessing import Manager
12 from pathlib import Path
37 from appdirs import user_cache_dir
38 from attr import dataclass, Factory
42 from blib2to3.pytree import Node, Leaf, type_repr
43 from blib2to3 import pygram, pytree
44 from blib2to3.pgen2 import driver, token
45 from blib2to3.pgen2.parse import ParseError
48 __version__ = "18.5b1"
49 DEFAULT_LINE_LENGTH = 88
51 r"/(\.git|\.hg|\.mypy_cache|\.tox|\.venv|_build|buck-out|build|dist)/"
53 DEFAULT_INCLUDES = r"\.pyi?$"
54 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
65 LN = Union[Leaf, Node]
66 SplitFunc = Callable[["Line", bool], Iterator["Line"]]
69 CacheInfo = Tuple[Timestamp, FileSize]
70 Cache = Dict[Path, CacheInfo]
71 out = partial(click.secho, bold=True, err=True)
72 err = partial(click.secho, fg="red", err=True)
74 pygram.initialize(CACHE_DIR)
75 syms = pygram.python_symbols
78 class NothingChanged(UserWarning):
79 """Raised by :func:`format_file` when reformatted code is the same as source."""
82 class CannotSplit(Exception):
83 """A readable split that fits the allotted line length is impossible.
85 Raised by :func:`left_hand_split`, :func:`right_hand_split`, and
86 :func:`delimiter_split`.
90 class FormatError(Exception):
91 """Base exception for `# fmt: on` and `# fmt: off` handling.
93 It holds the number of bytes of the prefix consumed before the format
94 control comment appeared.
97 def __init__(self, consumed: int) -> None:
98 super().__init__(consumed)
99 self.consumed = consumed
101 def trim_prefix(self, leaf: Leaf) -> None:
102 leaf.prefix = leaf.prefix[self.consumed :]
104 def leaf_from_consumed(self, leaf: Leaf) -> Leaf:
105 """Returns a new Leaf from the consumed part of the prefix."""
106 unformatted_prefix = leaf.prefix[: self.consumed]
107 return Leaf(token.NEWLINE, unformatted_prefix)
110 class FormatOn(FormatError):
111 """Found a comment like `# fmt: on` in the file."""
114 class FormatOff(FormatError):
115 """Found a comment like `# fmt: off` in the file."""
118 class WriteBack(Enum):
124 def from_configuration(cls, *, check: bool, diff: bool) -> "WriteBack":
125 if check and not diff:
128 return cls.DIFF if diff else cls.YES
137 class FileMode(Flag):
141 NO_STRING_NORMALIZATION = 4
144 def from_configuration(
145 cls, *, py36: bool, pyi: bool, skip_string_normalization: bool
147 mode = cls.AUTO_DETECT
152 if skip_string_normalization:
153 mode |= cls.NO_STRING_NORMALIZATION
162 default=DEFAULT_LINE_LENGTH,
163 help="How many character per line to allow.",
170 "Allow using Python 3.6-only syntax on all input files. This will put "
171 "trailing commas in function signatures and calls also after *args and "
172 "**kwargs. [default: per-file auto-detection]"
179 "Format all input files like typing stubs regardless of file extension "
180 "(useful when piping source on standard input)."
185 "--skip-string-normalization",
187 help="Don't normalize string quotes or prefixes.",
193 "Don't write the files back, just return the status. Return code 0 "
194 "means nothing would change. Return code 1 means some files would be "
195 "reformatted. Return code 123 means there was an internal error."
201 help="Don't write the files back, just output a diff for each file on stdout.",
206 help="If --fast given, skip temporary sanity checks. [default: --safe]",
211 default=DEFAULT_INCLUDES,
213 "A regular expression that matches files and directories that should be "
214 "included on recursive searches. An empty value means all files are "
215 "included regardless of the name. Use forward slashes for directories on "
216 "all platforms (Windows, too). Exclusions are calculated first, inclusions "
224 default=DEFAULT_EXCLUDES,
226 "A regular expression that matches files and directories that should be "
227 "excluded on recursive searches. An empty value means no paths are excluded. "
228 "Use forward slashes for directories on all platforms (Windows, too). "
229 "Exclusions are calculated first, inclusions later."
238 "Don't emit non-error messages to stderr. Errors are still emitted, "
239 "silence those with 2>/dev/null."
247 "Also emit messages to stderr about files that were not changed or were "
248 "ignored due to --exclude=."
251 @click.version_option(version=__version__)
256 exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
268 skip_string_normalization: bool,
275 """The uncompromising code formatter."""
276 write_back = WriteBack.from_configuration(check=check, diff=diff)
277 mode = FileMode.from_configuration(
278 py36=py36, pyi=pyi, skip_string_normalization=skip_string_normalization
280 report = Report(check=check, quiet=quiet, verbose=verbose)
281 sources: List[Path] = []
283 include_regex = re.compile(include)
285 err(f"Invalid regular expression for include given: {include!r}")
288 exclude_regex = re.compile(exclude)
290 err(f"Invalid regular expression for exclude given: {exclude!r}")
292 root = find_project_root(src)
297 gen_python_files_in_dir(p, root, include_regex, exclude_regex, report)
299 elif p.is_file() or s == "-":
300 # if a file was explicitly given, we don't care about its extension
303 err(f"invalid path: {s}")
304 if len(sources) == 0:
305 out("No paths given. Nothing to do 😴")
309 elif len(sources) == 1:
312 line_length=line_length,
314 write_back=write_back,
319 loop = asyncio.get_event_loop()
320 executor = ProcessPoolExecutor(max_workers=os.cpu_count())
322 loop.run_until_complete(
325 line_length=line_length,
327 write_back=write_back,
337 out("All done! ✨ 🍰 ✨")
338 click.echo(str(report))
339 ctx.exit(report.return_code)
346 write_back: WriteBack,
350 """Reformat a single file under `src` without spawning child processes.
352 If `quiet` is True, non-error messages are not output. `line_length`,
353 `write_back`, `fast` and `pyi` options are passed to
354 :func:`format_file_in_place` or :func:`format_stdin_to_stdout`.
358 if not src.is_file() and str(src) == "-":
359 if format_stdin_to_stdout(
360 line_length=line_length, fast=fast, write_back=write_back, mode=mode
362 changed = Changed.YES
365 if write_back != WriteBack.DIFF:
366 cache = read_cache(line_length, mode)
367 res_src = src.resolve()
368 if res_src in cache and cache[res_src] == get_cache_info(res_src):
369 changed = Changed.CACHED
370 if changed is not Changed.CACHED and format_file_in_place(
372 line_length=line_length,
374 write_back=write_back,
377 changed = Changed.YES
378 if write_back == WriteBack.YES and changed is not Changed.NO:
379 write_cache(cache, [src], line_length, mode)
380 report.done(src, changed)
381 except Exception as exc:
382 report.failed(src, str(exc))
385 async def schedule_formatting(
389 write_back: WriteBack,
395 """Run formatting of `sources` in parallel using the provided `executor`.
397 (Use ProcessPoolExecutors for actual parallelism.)
399 `line_length`, `write_back`, `fast`, and `pyi` options are passed to
400 :func:`format_file_in_place`.
403 if write_back != WriteBack.DIFF:
404 cache = read_cache(line_length, mode)
405 sources, cached = filter_cached(cache, sources)
407 report.done(src, Changed.CACHED)
412 if write_back == WriteBack.DIFF:
413 # For diff output, we need locks to ensure we don't interleave output
414 # from different processes.
416 lock = manager.Lock()
418 loop.run_in_executor(
420 format_file_in_place,
428 for src in sorted(sources)
430 pending: Iterable[asyncio.Task] = tasks.keys()
432 loop.add_signal_handler(signal.SIGINT, cancel, pending)
433 loop.add_signal_handler(signal.SIGTERM, cancel, pending)
434 except NotImplementedError:
435 # There are no good alternatives for these on Windows
438 done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
440 src = tasks.pop(task)
442 cancelled.append(task)
443 elif task.exception():
444 report.failed(src, str(task.exception()))
446 formatted.append(src)
447 report.done(src, Changed.YES if task.result() else Changed.NO)
449 await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
450 if write_back == WriteBack.YES and formatted:
451 write_cache(cache, formatted, line_length, mode)
454 def format_file_in_place(
458 write_back: WriteBack = WriteBack.NO,
459 mode: FileMode = FileMode.AUTO_DETECT,
460 lock: Any = None, # multiprocessing.Manager().Lock() is some crazy proxy
462 """Format file under `src` path. Return True if changed.
464 If `write_back` is True, write reformatted code back to stdout.
465 `line_length` and `fast` options are passed to :func:`format_file_contents`.
467 if src.suffix == ".pyi":
470 with open(src, "rb") as buf:
471 newline, encoding, src_contents = prepare_input(buf.read())
473 dst_contents = format_file_contents(
474 src_contents, line_length=line_length, fast=fast, mode=mode
476 except NothingChanged:
479 if write_back == write_back.YES:
480 with open(src, "w", encoding=encoding, newline=newline) as f:
481 f.write(dst_contents)
482 elif write_back == write_back.DIFF:
483 src_name = f"{src} (original)"
484 dst_name = f"{src} (formatted)"
485 diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
489 f = io.TextIOWrapper(
495 f.write(diff_contents)
503 def format_stdin_to_stdout(
506 write_back: WriteBack = WriteBack.NO,
507 mode: FileMode = FileMode.AUTO_DETECT,
509 """Format file on stdin. Return True if changed.
511 If `write_back` is True, write reformatted code back to stdout.
512 `line_length`, `fast`, `is_pyi`, and `force_py36` arguments are passed to
513 :func:`format_file_contents`.
515 newline, encoding, src = prepare_input(sys.stdin.buffer.read())
518 dst = format_file_contents(src, line_length=line_length, fast=fast, mode=mode)
521 except NothingChanged:
525 if write_back == WriteBack.YES:
526 f = io.TextIOWrapper(
534 elif write_back == WriteBack.DIFF:
535 src_name = "<stdin> (original)"
536 dst_name = "<stdin> (formatted)"
537 f = io.TextIOWrapper(
543 f.write(diff(src, dst, src_name, dst_name))
547 def format_file_contents(
552 mode: FileMode = FileMode.AUTO_DETECT,
554 """Reformat contents a file and return new contents.
556 If `fast` is False, additionally confirm that the reformatted code is
557 valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
558 `line_length` is passed to :func:`format_str`.
560 if src_contents.strip() == "":
563 dst_contents = format_str(src_contents, line_length=line_length, mode=mode)
564 if src_contents == dst_contents:
568 assert_equivalent(src_contents, dst_contents)
569 assert_stable(src_contents, dst_contents, line_length=line_length, mode=mode)
574 src_contents: str, line_length: int, *, mode: FileMode = FileMode.AUTO_DETECT
576 """Reformat a string and return new contents.
578 `line_length` determines how many characters per line are allowed.
580 src_node = lib2to3_parse(src_contents)
582 future_imports = get_future_imports(src_node)
583 is_pyi = bool(mode & FileMode.PYI)
584 py36 = bool(mode & FileMode.PYTHON36) or is_python36(src_node)
585 normalize_strings = not bool(mode & FileMode.NO_STRING_NORMALIZATION)
586 lines = LineGenerator(
587 remove_u_prefix=py36 or "unicode_literals" in future_imports,
589 normalize_strings=normalize_strings,
591 elt = EmptyLineTracker(is_pyi=is_pyi)
594 for current_line in lines.visit(src_node):
595 for _ in range(after):
596 dst_contents += str(empty_line)
597 before, after = elt.maybe_empty_lines(current_line)
598 for _ in range(before):
599 dst_contents += str(empty_line)
600 for line in split_line(current_line, line_length=line_length, py36=py36):
601 dst_contents += str(line)
605 def prepare_input(src: bytes) -> Tuple[str, str, str]:
606 """Analyze `src` and return a tuple of (newline, encoding, decoded_contents)
608 Where `newline` is either CRLF or LF, and `decoded_contents` is decoded with
609 universal newlines (i.e. only LF).
611 srcbuf = io.BytesIO(src)
612 encoding, lines = tokenize.detect_encoding(srcbuf.readline)
613 newline = "\r\n" if b"\r\n" == lines[0][-2:] else "\n"
615 return newline, encoding, io.TextIOWrapper(srcbuf, encoding).read()
619 pygram.python_grammar_no_print_statement_no_exec_statement,
620 pygram.python_grammar_no_print_statement,
621 pygram.python_grammar,
625 def lib2to3_parse(src_txt: str) -> Node:
626 """Given a string with source, return the lib2to3 Node."""
627 grammar = pygram.python_grammar_no_print_statement
628 if src_txt[-1] != "\n":
630 for grammar in GRAMMARS:
631 drv = driver.Driver(grammar, pytree.convert)
633 result = drv.parse_string(src_txt, True)
636 except ParseError as pe:
637 lineno, column = pe.context[1]
638 lines = src_txt.splitlines()
640 faulty_line = lines[lineno - 1]
642 faulty_line = "<line number missing in source>"
643 exc = ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}")
647 if isinstance(result, Leaf):
648 result = Node(syms.file_input, [result])
652 def lib2to3_unparse(node: Node) -> str:
653 """Given a lib2to3 node, return its string representation."""
661 class Visitor(Generic[T]):
662 """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
664 def visit(self, node: LN) -> Iterator[T]:
665 """Main method to visit `node` and its children.
667 It tries to find a `visit_*()` method for the given `node.type`, like
668 `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
669 If no dedicated `visit_*()` method is found, chooses `visit_default()`
672 Then yields objects of type `T` from the selected visitor.
675 name = token.tok_name[node.type]
677 name = type_repr(node.type)
678 yield from getattr(self, f"visit_{name}", self.visit_default)(node)
680 def visit_default(self, node: LN) -> Iterator[T]:
681 """Default `visit_*()` implementation. Recurses to children of `node`."""
682 if isinstance(node, Node):
683 for child in node.children:
684 yield from self.visit(child)
688 class DebugVisitor(Visitor[T]):
691 def visit_default(self, node: LN) -> Iterator[T]:
692 indent = " " * (2 * self.tree_depth)
693 if isinstance(node, Node):
694 _type = type_repr(node.type)
695 out(f"{indent}{_type}", fg="yellow")
697 for child in node.children:
698 yield from self.visit(child)
701 out(f"{indent}/{_type}", fg="yellow", bold=False)
703 _type = token.tok_name.get(node.type, str(node.type))
704 out(f"{indent}{_type}", fg="blue", nl=False)
706 # We don't have to handle prefixes for `Node` objects since
707 # that delegates to the first child anyway.
708 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
709 out(f" {node.value!r}", fg="blue", bold=False)
712 def show(cls, code: str) -> None:
713 """Pretty-print the lib2to3 AST of a given string of `code`.
715 Convenience method for debugging.
717 v: DebugVisitor[None] = DebugVisitor()
718 list(v.visit(lib2to3_parse(code)))
721 KEYWORDS = set(keyword.kwlist)
722 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
723 FLOW_CONTROL = {"return", "raise", "break", "continue"}
734 STANDALONE_COMMENT = 153
735 LOGIC_OPERATORS = {"and", "or"}
760 STARS = {token.STAR, token.DOUBLESTAR}
763 syms.argument, # double star in arglist
764 syms.trailer, # single argument to call
766 syms.varargslist, # lambdas
768 UNPACKING_PARENTS = {
769 syms.atom, # single element of a list or set literal
807 COMPREHENSION_PRIORITY = 20
809 TERNARY_PRIORITY = 16
812 COMPARATOR_PRIORITY = 10
823 token.DOUBLESLASH: 4,
833 class BracketTracker:
834 """Keeps track of brackets on a line."""
837 bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
838 delimiters: Dict[LeafID, Priority] = Factory(dict)
839 previous: Optional[Leaf] = None
840 _for_loop_variable: int = 0
841 _lambda_arguments: int = 0
843 def mark(self, leaf: Leaf) -> None:
844 """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
846 All leaves receive an int `bracket_depth` field that stores how deep
847 within brackets a given leaf is. 0 means there are no enclosing brackets
848 that started on this line.
850 If a leaf is itself a closing bracket, it receives an `opening_bracket`
851 field that it forms a pair with. This is a one-directional link to
852 avoid reference cycles.
854 If a leaf is a delimiter (a token on which Black can split the line if
855 needed) and it's on depth 0, its `id()` is stored in the tracker's
858 if leaf.type == token.COMMENT:
861 self.maybe_decrement_after_for_loop_variable(leaf)
862 self.maybe_decrement_after_lambda_arguments(leaf)
863 if leaf.type in CLOSING_BRACKETS:
865 opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
866 leaf.opening_bracket = opening_bracket
867 leaf.bracket_depth = self.depth
869 delim = is_split_before_delimiter(leaf, self.previous)
870 if delim and self.previous is not None:
871 self.delimiters[id(self.previous)] = delim
873 delim = is_split_after_delimiter(leaf, self.previous)
875 self.delimiters[id(leaf)] = delim
876 if leaf.type in OPENING_BRACKETS:
877 self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
880 self.maybe_increment_lambda_arguments(leaf)
881 self.maybe_increment_for_loop_variable(leaf)
883 def any_open_brackets(self) -> bool:
884 """Return True if there is an yet unmatched open bracket on the line."""
885 return bool(self.bracket_match)
887 def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> int:
888 """Return the highest priority of a delimiter found on the line.
890 Values are consistent with what `is_split_*_delimiter()` return.
891 Raises ValueError on no delimiters.
893 return max(v for k, v in self.delimiters.items() if k not in exclude)
895 def delimiter_count_with_priority(self, priority: int = 0) -> int:
896 """Return the number of delimiters with the given `priority`.
898 If no `priority` is passed, defaults to max priority on the line.
900 if not self.delimiters:
903 priority = priority or self.max_delimiter_priority()
904 return sum(1 for p in self.delimiters.values() if p == priority)
906 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
907 """In a for loop, or comprehension, the variables are often unpacks.
909 To avoid splitting on the comma in this situation, increase the depth of
910 tokens between `for` and `in`.
912 if leaf.type == token.NAME and leaf.value == "for":
914 self._for_loop_variable += 1
919 def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
920 """See `maybe_increment_for_loop_variable` above for explanation."""
921 if self._for_loop_variable and leaf.type == token.NAME and leaf.value == "in":
923 self._for_loop_variable -= 1
928 def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
929 """In a lambda expression, there might be more than one argument.
931 To avoid splitting on the comma in this situation, increase the depth of
932 tokens between `lambda` and `:`.
934 if leaf.type == token.NAME and leaf.value == "lambda":
936 self._lambda_arguments += 1
941 def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
942 """See `maybe_increment_lambda_arguments` above for explanation."""
943 if self._lambda_arguments and leaf.type == token.COLON:
945 self._lambda_arguments -= 1
950 def get_open_lsqb(self) -> Optional[Leaf]:
951 """Return the most recent opening square bracket (if any)."""
952 return self.bracket_match.get((self.depth - 1, token.RSQB))
957 """Holds leaves and comments. Can be printed with `str(line)`."""
960 leaves: List[Leaf] = Factory(list)
961 comments: List[Tuple[Index, Leaf]] = Factory(list)
962 bracket_tracker: BracketTracker = Factory(BracketTracker)
963 inside_brackets: bool = False
964 should_explode: bool = False
966 def append(self, leaf: Leaf, preformatted: bool = False) -> None:
967 """Add a new `leaf` to the end of the line.
969 Unless `preformatted` is True, the `leaf` will receive a new consistent
970 whitespace prefix and metadata applied by :class:`BracketTracker`.
971 Trailing commas are maybe removed, unpacked for loop variables are
972 demoted from being delimiters.
974 Inline comments are put aside.
976 has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
980 if token.COLON == leaf.type and self.is_class_paren_empty:
982 if self.leaves and not preformatted:
983 # Note: at this point leaf.prefix should be empty except for
984 # imports, for which we only preserve newlines.
985 leaf.prefix += whitespace(
986 leaf, complex_subscript=self.is_complex_subscript(leaf)
988 if self.inside_brackets or not preformatted:
989 self.bracket_tracker.mark(leaf)
990 self.maybe_remove_trailing_comma(leaf)
991 if not self.append_comment(leaf):
992 self.leaves.append(leaf)
994 def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
995 """Like :func:`append()` but disallow invalid standalone comment structure.
997 Raises ValueError when any `leaf` is appended after a standalone comment
998 or when a standalone comment is not the first leaf on the line.
1000 if self.bracket_tracker.depth == 0:
1002 raise ValueError("cannot append to standalone comments")
1004 if self.leaves and leaf.type == STANDALONE_COMMENT:
1006 "cannot append standalone comments to a populated line"
1009 self.append(leaf, preformatted=preformatted)
1012 def is_comment(self) -> bool:
1013 """Is this line a standalone comment?"""
1014 return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
1017 def is_decorator(self) -> bool:
1018 """Is this line a decorator?"""
1019 return bool(self) and self.leaves[0].type == token.AT
1022 def is_import(self) -> bool:
1023 """Is this an import line?"""
1024 return bool(self) and is_import(self.leaves[0])
1027 def is_class(self) -> bool:
1028 """Is this line a class definition?"""
1031 and self.leaves[0].type == token.NAME
1032 and self.leaves[0].value == "class"
1036 def is_stub_class(self) -> bool:
1037 """Is this line a class definition with a body consisting only of "..."?"""
1038 return self.is_class and self.leaves[-3:] == [
1039 Leaf(token.DOT, ".") for _ in range(3)
1043 def is_def(self) -> bool:
1044 """Is this a function definition? (Also returns True for async defs.)"""
1046 first_leaf = self.leaves[0]
1051 second_leaf: Optional[Leaf] = self.leaves[1]
1054 return (first_leaf.type == token.NAME and first_leaf.value == "def") or (
1055 first_leaf.type == token.ASYNC
1056 and second_leaf is not None
1057 and second_leaf.type == token.NAME
1058 and second_leaf.value == "def"
1062 def is_class_paren_empty(self) -> bool:
1063 """Is this a class with no base classes but using parentheses?
1065 Those are unnecessary and should be removed.
1069 and len(self.leaves) == 4
1071 and self.leaves[2].type == token.LPAR
1072 and self.leaves[2].value == "("
1073 and self.leaves[3].type == token.RPAR
1074 and self.leaves[3].value == ")"
1078 def is_triple_quoted_string(self) -> bool:
1079 """Is the line a triple quoted string?"""
1082 and self.leaves[0].type == token.STRING
1083 and self.leaves[0].value.startswith(('"""', "'''"))
1086 def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
1087 """If so, needs to be split before emitting."""
1088 for leaf in self.leaves:
1089 if leaf.type == STANDALONE_COMMENT:
1090 if leaf.bracket_depth <= depth_limit:
1095 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1096 """Remove trailing comma if there is one and it's safe."""
1099 and self.leaves[-1].type == token.COMMA
1100 and closing.type in CLOSING_BRACKETS
1104 if closing.type == token.RBRACE:
1105 self.remove_trailing_comma()
1108 if closing.type == token.RSQB:
1109 comma = self.leaves[-1]
1110 if comma.parent and comma.parent.type == syms.listmaker:
1111 self.remove_trailing_comma()
1114 # For parens let's check if it's safe to remove the comma.
1115 # Imports are always safe.
1117 self.remove_trailing_comma()
1120 # Otheriwsse, if the trailing one is the only one, we might mistakenly
1121 # change a tuple into a different type by removing the comma.
1122 depth = closing.bracket_depth + 1
1124 opening = closing.opening_bracket
1125 for _opening_index, leaf in enumerate(self.leaves):
1132 for leaf in self.leaves[_opening_index + 1 :]:
1136 bracket_depth = leaf.bracket_depth
1137 if bracket_depth == depth and leaf.type == token.COMMA:
1139 if leaf.parent and leaf.parent.type == syms.arglist:
1144 self.remove_trailing_comma()
1149 def append_comment(self, comment: Leaf) -> bool:
1150 """Add an inline or standalone comment to the line."""
1152 comment.type == STANDALONE_COMMENT
1153 and self.bracket_tracker.any_open_brackets()
1158 if comment.type != token.COMMENT:
1161 after = len(self.leaves) - 1
1163 comment.type = STANDALONE_COMMENT
1168 self.comments.append((after, comment))
1171 def comments_after(self, leaf: Leaf, _index: int = -1) -> Iterator[Leaf]:
1172 """Generate comments that should appear directly after `leaf`.
1174 Provide a non-negative leaf `_index` to speed up the function.
1177 for _index, _leaf in enumerate(self.leaves):
1184 for index, comment_after in self.comments:
1188 def remove_trailing_comma(self) -> None:
1189 """Remove the trailing comma and moves the comments attached to it."""
1190 comma_index = len(self.leaves) - 1
1191 for i in range(len(self.comments)):
1192 comment_index, comment = self.comments[i]
1193 if comment_index == comma_index:
1194 self.comments[i] = (comma_index - 1, comment)
1197 def is_complex_subscript(self, leaf: Leaf) -> bool:
1198 """Return True iff `leaf` is part of a slice with non-trivial exprs."""
1200 leaf if leaf.type == token.LSQB else self.bracket_tracker.get_open_lsqb()
1202 if open_lsqb is None:
1205 subscript_start = open_lsqb.next_sibling
1207 isinstance(subscript_start, Node)
1208 and subscript_start.type == syms.subscriptlist
1210 subscript_start = child_towards(subscript_start, leaf)
1211 return subscript_start is not None and any(
1212 n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
1215 def __str__(self) -> str:
1216 """Render the line."""
1220 indent = " " * self.depth
1221 leaves = iter(self.leaves)
1222 first = next(leaves)
1223 res = f"{first.prefix}{indent}{first.value}"
1226 for _, comment in self.comments:
1230 def __bool__(self) -> bool:
1231 """Return True if the line has leaves or comments."""
1232 return bool(self.leaves or self.comments)
1235 class UnformattedLines(Line):
1236 """Just like :class:`Line` but stores lines which aren't reformatted."""
1238 def append(self, leaf: Leaf, preformatted: bool = True) -> None:
1239 """Just add a new `leaf` to the end of the lines.
1241 The `preformatted` argument is ignored.
1243 Keeps track of indentation `depth`, which is useful when the user
1244 says `# fmt: on`. Otherwise, doesn't do anything with the `leaf`.
1247 list(generate_comments(leaf))
1248 except FormatOn as f_on:
1249 self.leaves.append(f_on.leaf_from_consumed(leaf))
1252 self.leaves.append(leaf)
1253 if leaf.type == token.INDENT:
1255 elif leaf.type == token.DEDENT:
1258 def __str__(self) -> str:
1259 """Render unformatted lines from leaves which were added with `append()`.
1261 `depth` is not used for indentation in this case.
1267 for leaf in self.leaves:
1271 def append_comment(self, comment: Leaf) -> bool:
1272 """Not implemented in this class. Raises `NotImplementedError`."""
1273 raise NotImplementedError("Unformatted lines don't store comments separately.")
1275 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1276 """Does nothing and returns False."""
1279 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
1280 """Does nothing and returns False."""
1285 class EmptyLineTracker:
1286 """Provides a stateful method that returns the number of potential extra
1287 empty lines needed before and after the currently processed line.
1289 Note: this tracker works on lines that haven't been split yet. It assumes
1290 the prefix of the first leaf consists of optional newlines. Those newlines
1291 are consumed by `maybe_empty_lines()` and included in the computation.
1294 is_pyi: bool = False
1295 previous_line: Optional[Line] = None
1296 previous_after: int = 0
1297 previous_defs: List[int] = Factory(list)
1299 def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1300 """Return the number of extra empty lines before and after the `current_line`.
1302 This is for separating `def`, `async def` and `class` with extra empty
1303 lines (two on module-level).
1305 if isinstance(current_line, UnformattedLines):
1308 before, after = self._maybe_empty_lines(current_line)
1309 before -= self.previous_after
1310 self.previous_after = after
1311 self.previous_line = current_line
1312 return before, after
1314 def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1316 if current_line.depth == 0:
1317 max_allowed = 1 if self.is_pyi else 2
1318 if current_line.leaves:
1319 # Consume the first leaf's extra newlines.
1320 first_leaf = current_line.leaves[0]
1321 before = first_leaf.prefix.count("\n")
1322 before = min(before, max_allowed)
1323 first_leaf.prefix = ""
1326 depth = current_line.depth
1327 while self.previous_defs and self.previous_defs[-1] >= depth:
1328 self.previous_defs.pop()
1330 before = 0 if depth else 1
1332 before = 1 if depth else 2
1333 is_decorator = current_line.is_decorator
1334 if is_decorator or current_line.is_def or current_line.is_class:
1335 if not is_decorator:
1336 self.previous_defs.append(depth)
1337 if self.previous_line is None:
1338 # Don't insert empty lines before the first line in the file.
1341 if self.previous_line.is_decorator:
1344 if self.previous_line.depth < current_line.depth and (
1345 self.previous_line.is_class or self.previous_line.is_def
1350 self.previous_line.is_comment
1351 and self.previous_line.depth == current_line.depth
1357 if self.previous_line.depth > current_line.depth:
1359 elif current_line.is_class or self.previous_line.is_class:
1360 if current_line.is_stub_class and self.previous_line.is_stub_class:
1368 if current_line.depth and newlines:
1374 and self.previous_line.is_import
1375 and not current_line.is_import
1376 and depth == self.previous_line.depth
1378 return (before or 1), 0
1382 and self.previous_line.is_class
1383 and current_line.is_triple_quoted_string
1391 class LineGenerator(Visitor[Line]):
1392 """Generates reformatted Line objects. Empty lines are not emitted.
1394 Note: destroys the tree it's visiting by mutating prefixes of its leaves
1395 in ways that will no longer stringify to valid Python code on the tree.
1398 is_pyi: bool = False
1399 normalize_strings: bool = True
1400 current_line: Line = Factory(Line)
1401 remove_u_prefix: bool = False
1403 def line(self, indent: int = 0, type: Type[Line] = Line) -> Iterator[Line]:
1406 If the line is empty, only emit if it makes sense.
1407 If the line is too long, split it first and then generate.
1409 If any lines were generated, set up a new current_line.
1411 if not self.current_line:
1412 if self.current_line.__class__ == type:
1413 self.current_line.depth += indent
1415 self.current_line = type(depth=self.current_line.depth + indent)
1416 return # Line is empty, don't emit. Creating a new one unnecessary.
1418 complete_line = self.current_line
1419 self.current_line = type(depth=complete_line.depth + indent)
1422 def visit(self, node: LN) -> Iterator[Line]:
1423 """Main method to visit `node` and its children.
1425 Yields :class:`Line` objects.
1427 if isinstance(self.current_line, UnformattedLines):
1428 # File contained `# fmt: off`
1429 yield from self.visit_unformatted(node)
1432 yield from super().visit(node)
1434 def visit_default(self, node: LN) -> Iterator[Line]:
1435 """Default `visit_*()` implementation. Recurses to children of `node`."""
1436 if isinstance(node, Leaf):
1437 any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1439 for comment in generate_comments(node):
1440 if any_open_brackets:
1441 # any comment within brackets is subject to splitting
1442 self.current_line.append(comment)
1443 elif comment.type == token.COMMENT:
1444 # regular trailing comment
1445 self.current_line.append(comment)
1446 yield from self.line()
1449 # regular standalone comment
1450 yield from self.line()
1452 self.current_line.append(comment)
1453 yield from self.line()
1455 except FormatOff as f_off:
1456 f_off.trim_prefix(node)
1457 yield from self.line(type=UnformattedLines)
1458 yield from self.visit(node)
1460 except FormatOn as f_on:
1461 # This only happens here if somebody says "fmt: on" multiple
1463 f_on.trim_prefix(node)
1464 yield from self.visit_default(node)
1467 normalize_prefix(node, inside_brackets=any_open_brackets)
1468 if self.normalize_strings and node.type == token.STRING:
1469 normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix)
1470 normalize_string_quotes(node)
1471 if node.type not in WHITESPACE:
1472 self.current_line.append(node)
1473 yield from super().visit_default(node)
1475 def visit_INDENT(self, node: Node) -> Iterator[Line]:
1476 """Increase indentation level, maybe yield a line."""
1477 # In blib2to3 INDENT never holds comments.
1478 yield from self.line(+1)
1479 yield from self.visit_default(node)
1481 def visit_DEDENT(self, node: Node) -> Iterator[Line]:
1482 """Decrease indentation level, maybe yield a line."""
1483 # The current line might still wait for trailing comments. At DEDENT time
1484 # there won't be any (they would be prefixes on the preceding NEWLINE).
1485 # Emit the line then.
1486 yield from self.line()
1488 # While DEDENT has no value, its prefix may contain standalone comments
1489 # that belong to the current indentation level. Get 'em.
1490 yield from self.visit_default(node)
1492 # Finally, emit the dedent.
1493 yield from self.line(-1)
1496 self, node: Node, keywords: Set[str], parens: Set[str]
1497 ) -> Iterator[Line]:
1498 """Visit a statement.
1500 This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1501 `def`, `with`, `class`, `assert` and assignments.
1503 The relevant Python language `keywords` for a given statement will be
1504 NAME leaves within it. This methods puts those on a separate line.
1506 `parens` holds a set of string leaf values immediately after which
1507 invisible parens should be put.
1509 normalize_invisible_parens(node, parens_after=parens)
1510 for child in node.children:
1511 if child.type == token.NAME and child.value in keywords: # type: ignore
1512 yield from self.line()
1514 yield from self.visit(child)
1516 def visit_suite(self, node: Node) -> Iterator[Line]:
1517 """Visit a suite."""
1518 if self.is_pyi and is_stub_suite(node):
1519 yield from self.visit(node.children[2])
1521 yield from self.visit_default(node)
1523 def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1524 """Visit a statement without nested statements."""
1525 is_suite_like = node.parent and node.parent.type in STATEMENT
1527 if self.is_pyi and is_stub_body(node):
1528 yield from self.visit_default(node)
1530 yield from self.line(+1)
1531 yield from self.visit_default(node)
1532 yield from self.line(-1)
1535 if not self.is_pyi or not node.parent or not is_stub_suite(node.parent):
1536 yield from self.line()
1537 yield from self.visit_default(node)
1539 def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1540 """Visit `async def`, `async for`, `async with`."""
1541 yield from self.line()
1543 children = iter(node.children)
1544 for child in children:
1545 yield from self.visit(child)
1547 if child.type == token.ASYNC:
1550 internal_stmt = next(children)
1551 for child in internal_stmt.children:
1552 yield from self.visit(child)
1554 def visit_decorators(self, node: Node) -> Iterator[Line]:
1555 """Visit decorators."""
1556 for child in node.children:
1557 yield from self.line()
1558 yield from self.visit(child)
1560 def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
1561 """Remove a semicolon and put the other statement on a separate line."""
1562 yield from self.line()
1564 def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
1565 """End of file. Process outstanding comments and end with a newline."""
1566 yield from self.visit_default(leaf)
1567 yield from self.line()
1569 def visit_unformatted(self, node: LN) -> Iterator[Line]:
1570 """Used when file contained a `# fmt: off`."""
1571 if isinstance(node, Node):
1572 for child in node.children:
1573 yield from self.visit(child)
1577 self.current_line.append(node)
1578 except FormatOn as f_on:
1579 f_on.trim_prefix(node)
1580 yield from self.line()
1581 yield from self.visit(node)
1583 if node.type == token.ENDMARKER:
1584 # somebody decided not to put a final `# fmt: on`
1585 yield from self.line()
1587 def __attrs_post_init__(self) -> None:
1588 """You are in a twisty little maze of passages."""
1591 self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
1592 self.visit_if_stmt = partial(
1593 v, keywords={"if", "else", "elif"}, parens={"if", "elif"}
1595 self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
1596 self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
1597 self.visit_try_stmt = partial(
1598 v, keywords={"try", "except", "else", "finally"}, parens=Ø
1600 self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
1601 self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
1602 self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
1603 self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
1604 self.visit_expr_stmt = partial(v, keywords=Ø, parens=ASSIGNMENTS)
1605 self.visit_return_stmt = partial(v, keywords={"return"}, parens={"return"})
1606 self.visit_import_from = partial(v, keywords=Ø, parens={"import"})
1607 self.visit_async_funcdef = self.visit_async_stmt
1608 self.visit_decorated = self.visit_decorators
1611 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
1612 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
1613 OPENING_BRACKETS = set(BRACKET.keys())
1614 CLOSING_BRACKETS = set(BRACKET.values())
1615 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
1616 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
1619 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str: # noqa C901
1620 """Return whitespace prefix if needed for the given `leaf`.
1622 `complex_subscript` signals whether the given leaf is part of a subscription
1623 which has non-trivial arguments, like arithmetic expressions or function calls.
1631 if t in ALWAYS_NO_SPACE:
1634 if t == token.COMMENT:
1637 assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
1638 if t == token.COLON and p.type not in {
1645 prev = leaf.prev_sibling
1647 prevp = preceding_leaf(p)
1648 if not prevp or prevp.type in OPENING_BRACKETS:
1651 if t == token.COLON:
1652 if prevp.type == token.COLON:
1655 elif prevp.type != token.COMMA and not complex_subscript:
1660 if prevp.type == token.EQUAL:
1662 if prevp.parent.type in {
1670 elif prevp.parent.type == syms.typedargslist:
1671 # A bit hacky: if the equal sign has whitespace, it means we
1672 # previously found it's a typed argument. So, we're using
1676 elif prevp.type in STARS:
1677 if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1680 elif prevp.type == token.COLON:
1681 if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
1682 return SPACE if complex_subscript else NO
1686 and prevp.parent.type == syms.factor
1687 and prevp.type in MATH_OPERATORS
1692 prevp.type == token.RIGHTSHIFT
1694 and prevp.parent.type == syms.shift_expr
1695 and prevp.prev_sibling
1696 and prevp.prev_sibling.type == token.NAME
1697 and prevp.prev_sibling.value == "print" # type: ignore
1699 # Python 2 print chevron
1702 elif prev.type in OPENING_BRACKETS:
1705 if p.type in {syms.parameters, syms.arglist}:
1706 # untyped function signatures or calls
1707 if not prev or prev.type != token.COMMA:
1710 elif p.type == syms.varargslist:
1712 if prev and prev.type != token.COMMA:
1715 elif p.type == syms.typedargslist:
1716 # typed function signatures
1720 if t == token.EQUAL:
1721 if prev.type != syms.tname:
1724 elif prev.type == token.EQUAL:
1725 # A bit hacky: if the equal sign has whitespace, it means we
1726 # previously found it's a typed argument. So, we're using that, too.
1729 elif prev.type != token.COMMA:
1732 elif p.type == syms.tname:
1735 prevp = preceding_leaf(p)
1736 if not prevp or prevp.type != token.COMMA:
1739 elif p.type == syms.trailer:
1740 # attributes and calls
1741 if t == token.LPAR or t == token.RPAR:
1746 prevp = preceding_leaf(p)
1747 if not prevp or prevp.type != token.NUMBER:
1750 elif t == token.LSQB:
1753 elif prev.type != token.COMMA:
1756 elif p.type == syms.argument:
1758 if t == token.EQUAL:
1762 prevp = preceding_leaf(p)
1763 if not prevp or prevp.type == token.LPAR:
1766 elif prev.type in {token.EQUAL} | STARS:
1769 elif p.type == syms.decorator:
1773 elif p.type == syms.dotted_name:
1777 prevp = preceding_leaf(p)
1778 if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
1781 elif p.type == syms.classdef:
1785 if prev and prev.type == token.LPAR:
1788 elif p.type in {syms.subscript, syms.sliceop}:
1791 assert p.parent is not None, "subscripts are always parented"
1792 if p.parent.type == syms.subscriptlist:
1797 elif not complex_subscript:
1800 elif p.type == syms.atom:
1801 if prev and t == token.DOT:
1802 # dots, but not the first one.
1805 elif p.type == syms.dictsetmaker:
1807 if prev and prev.type == token.DOUBLESTAR:
1810 elif p.type in {syms.factor, syms.star_expr}:
1813 prevp = preceding_leaf(p)
1814 if not prevp or prevp.type in OPENING_BRACKETS:
1817 prevp_parent = prevp.parent
1818 assert prevp_parent is not None
1819 if prevp.type == token.COLON and prevp_parent.type in {
1825 elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1828 elif t == token.NAME or t == token.NUMBER:
1831 elif p.type == syms.import_from:
1833 if prev and prev.type == token.DOT:
1836 elif t == token.NAME:
1840 if prev and prev.type == token.DOT:
1843 elif p.type == syms.sliceop:
1849 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1850 """Return the first leaf that precedes `node`, if any."""
1852 res = node.prev_sibling
1854 if isinstance(res, Leaf):
1858 return list(res.leaves())[-1]
1867 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
1868 """Return the child of `ancestor` that contains `descendant`."""
1869 node: Optional[LN] = descendant
1870 while node and node.parent != ancestor:
1875 def is_split_after_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1876 """Return the priority of the `leaf` delimiter, given a line break after it.
1878 The delimiter priorities returned here are from those delimiters that would
1879 cause a line break after themselves.
1881 Higher numbers are higher priority.
1883 if leaf.type == token.COMMA:
1884 return COMMA_PRIORITY
1889 def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1890 """Return the priority of the `leaf` delimiter, given a line before after it.
1892 The delimiter priorities returned here are from those delimiters that would
1893 cause a line break before themselves.
1895 Higher numbers are higher priority.
1897 if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1898 # * and ** might also be MATH_OPERATORS but in this case they are not.
1899 # Don't treat them as a delimiter.
1903 leaf.type == token.DOT
1905 and leaf.parent.type not in {syms.import_from, syms.dotted_name}
1906 and (previous is None or previous.type in CLOSING_BRACKETS)
1911 leaf.type in MATH_OPERATORS
1913 and leaf.parent.type not in {syms.factor, syms.star_expr}
1915 return MATH_PRIORITIES[leaf.type]
1917 if leaf.type in COMPARATORS:
1918 return COMPARATOR_PRIORITY
1921 leaf.type == token.STRING
1922 and previous is not None
1923 and previous.type == token.STRING
1925 return STRING_PRIORITY
1927 if leaf.type != token.NAME:
1933 and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
1935 return COMPREHENSION_PRIORITY
1940 and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
1942 return COMPREHENSION_PRIORITY
1944 if leaf.value in {"if", "else"} and leaf.parent and leaf.parent.type == syms.test:
1945 return TERNARY_PRIORITY
1947 if leaf.value == "is":
1948 return COMPARATOR_PRIORITY
1953 and leaf.parent.type in {syms.comp_op, syms.comparison}
1955 previous is not None
1956 and previous.type == token.NAME
1957 and previous.value == "not"
1960 return COMPARATOR_PRIORITY
1965 and leaf.parent.type == syms.comp_op
1967 previous is not None
1968 and previous.type == token.NAME
1969 and previous.value == "is"
1972 return COMPARATOR_PRIORITY
1974 if leaf.value in LOGIC_OPERATORS and leaf.parent:
1975 return LOGIC_PRIORITY
1980 def generate_comments(leaf: LN) -> Iterator[Leaf]:
1981 """Clean the prefix of the `leaf` and generate comments from it, if any.
1983 Comments in lib2to3 are shoved into the whitespace prefix. This happens
1984 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
1985 move because it does away with modifying the grammar to include all the
1986 possible places in which comments can be placed.
1988 The sad consequence for us though is that comments don't "belong" anywhere.
1989 This is why this function generates simple parentless Leaf objects for
1990 comments. We simply don't know what the correct parent should be.
1992 No matter though, we can live without this. We really only need to
1993 differentiate between inline and standalone comments. The latter don't
1994 share the line with any code.
1996 Inline comments are emitted as regular token.COMMENT leaves. Standalone
1997 are emitted with a fake STANDALONE_COMMENT token identifier.
2008 for index, line in enumerate(p.split("\n")):
2009 consumed += len(line) + 1 # adding the length of the split '\n'
2010 line = line.lstrip()
2013 if not line.startswith("#"):
2016 if index == 0 and leaf.type != token.ENDMARKER:
2017 comment_type = token.COMMENT # simple trailing comment
2019 comment_type = STANDALONE_COMMENT
2020 comment = make_comment(line)
2021 yield Leaf(comment_type, comment, prefix="\n" * nlines)
2023 if comment in {"# fmt: on", "# yapf: enable"}:
2024 raise FormatOn(consumed)
2026 if comment in {"# fmt: off", "# yapf: disable"}:
2027 if comment_type == STANDALONE_COMMENT:
2028 raise FormatOff(consumed)
2030 prev = preceding_leaf(leaf)
2031 if not prev or prev.type in WHITESPACE: # standalone comment in disguise
2032 raise FormatOff(consumed)
2037 def make_comment(content: str) -> str:
2038 """Return a consistently formatted comment from the given `content` string.
2040 All comments (except for "##", "#!", "#:") should have a single space between
2041 the hash sign and the content.
2043 If `content` didn't start with a hash sign, one is provided.
2045 content = content.rstrip()
2049 if content[0] == "#":
2050 content = content[1:]
2051 if content and content[0] not in " !:#":
2052 content = " " + content
2053 return "#" + content
2057 line: Line, line_length: int, inner: bool = False, py36: bool = False
2058 ) -> Iterator[Line]:
2059 """Split a `line` into potentially many lines.
2061 They should fit in the allotted `line_length` but might not be able to.
2062 `inner` signifies that there were a pair of brackets somewhere around the
2063 current `line`, possibly transitively. This means we can fallback to splitting
2064 by delimiters if the LHS/RHS don't yield any results.
2066 If `py36` is True, splitting may generate syntax that is only compatible
2067 with Python 3.6 and later.
2069 if isinstance(line, UnformattedLines) or line.is_comment:
2073 line_str = str(line).strip("\n")
2074 if not line.should_explode and is_line_short_enough(
2075 line, line_length=line_length, line_str=line_str
2080 split_funcs: List[SplitFunc]
2082 split_funcs = [left_hand_split]
2085 def rhs(line: Line, py36: bool = False) -> Iterator[Line]:
2086 for omit in generate_trailers_to_omit(line, line_length):
2087 lines = list(right_hand_split(line, line_length, py36, omit=omit))
2088 if is_line_short_enough(lines[0], line_length=line_length):
2092 # All splits failed, best effort split with no omits.
2093 # This mostly happens to multiline strings that are by definition
2094 # reported as not fitting a single line.
2095 yield from right_hand_split(line, py36)
2097 if line.inside_brackets:
2098 split_funcs = [delimiter_split, standalone_comment_split, rhs]
2101 for split_func in split_funcs:
2102 # We are accumulating lines in `result` because we might want to abort
2103 # mission and return the original line in the end, or attempt a different
2105 result: List[Line] = []
2107 for l in split_func(line, py36):
2108 if str(l).strip("\n") == line_str:
2109 raise CannotSplit("Split function returned an unchanged result")
2112 split_line(l, line_length=line_length, inner=True, py36=py36)
2114 except CannotSplit as cs:
2125 def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
2126 """Split line into many lines, starting with the first matching bracket pair.
2128 Note: this usually looks weird, only use this for function definitions.
2129 Prefer RHS otherwise. This is why this function is not symmetrical with
2130 :func:`right_hand_split` which also handles optional parentheses.
2132 head = Line(depth=line.depth)
2133 body = Line(depth=line.depth + 1, inside_brackets=True)
2134 tail = Line(depth=line.depth)
2135 tail_leaves: List[Leaf] = []
2136 body_leaves: List[Leaf] = []
2137 head_leaves: List[Leaf] = []
2138 current_leaves = head_leaves
2139 matching_bracket = None
2140 for leaf in line.leaves:
2142 current_leaves is body_leaves
2143 and leaf.type in CLOSING_BRACKETS
2144 and leaf.opening_bracket is matching_bracket
2146 current_leaves = tail_leaves if body_leaves else head_leaves
2147 current_leaves.append(leaf)
2148 if current_leaves is head_leaves:
2149 if leaf.type in OPENING_BRACKETS:
2150 matching_bracket = leaf
2151 current_leaves = body_leaves
2152 # Since body is a new indent level, remove spurious leading whitespace.
2154 normalize_prefix(body_leaves[0], inside_brackets=True)
2155 # Build the new lines.
2156 for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
2158 result.append(leaf, preformatted=True)
2159 for comment_after in line.comments_after(leaf):
2160 result.append(comment_after, preformatted=True)
2161 bracket_split_succeeded_or_raise(head, body, tail)
2162 for result in (head, body, tail):
2167 def right_hand_split(
2168 line: Line, line_length: int, py36: bool = False, omit: Collection[LeafID] = ()
2169 ) -> Iterator[Line]:
2170 """Split line into many lines, starting with the last matching bracket pair.
2172 If the split was by optional parentheses, attempt splitting without them, too.
2173 `omit` is a collection of closing bracket IDs that shouldn't be considered for
2176 Note: running this function modifies `bracket_depth` on the leaves of `line`.
2178 head = Line(depth=line.depth)
2179 body = Line(depth=line.depth + 1, inside_brackets=True)
2180 tail = Line(depth=line.depth)
2181 tail_leaves: List[Leaf] = []
2182 body_leaves: List[Leaf] = []
2183 head_leaves: List[Leaf] = []
2184 current_leaves = tail_leaves
2185 opening_bracket = None
2186 closing_bracket = None
2187 for leaf in reversed(line.leaves):
2188 if current_leaves is body_leaves:
2189 if leaf is opening_bracket:
2190 current_leaves = head_leaves if body_leaves else tail_leaves
2191 current_leaves.append(leaf)
2192 if current_leaves is tail_leaves:
2193 if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
2194 opening_bracket = leaf.opening_bracket
2195 closing_bracket = leaf
2196 current_leaves = body_leaves
2197 tail_leaves.reverse()
2198 body_leaves.reverse()
2199 head_leaves.reverse()
2200 # Since body is a new indent level, remove spurious leading whitespace.
2202 normalize_prefix(body_leaves[0], inside_brackets=True)
2204 # No `head` means the split failed. Either `tail` has all content or
2205 # the matching `opening_bracket` wasn't available on `line` anymore.
2206 raise CannotSplit("No brackets found")
2208 # Build the new lines.
2209 for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
2211 result.append(leaf, preformatted=True)
2212 for comment_after in line.comments_after(leaf):
2213 result.append(comment_after, preformatted=True)
2214 bracket_split_succeeded_or_raise(head, body, tail)
2215 assert opening_bracket and closing_bracket
2217 # the opening bracket is an optional paren
2218 opening_bracket.type == token.LPAR
2219 and not opening_bracket.value
2220 # the closing bracket is an optional paren
2221 and closing_bracket.type == token.RPAR
2222 and not closing_bracket.value
2223 # there are no standalone comments in the body
2224 and not line.contains_standalone_comments(0)
2225 # and it's not an import (optional parens are the only thing we can split
2226 # on in this case; attempting a split without them is a waste of time)
2227 and not line.is_import
2229 omit = {id(closing_bracket), *omit}
2230 if can_omit_invisible_parens(body, line_length):
2232 yield from right_hand_split(line, line_length, py36=py36, omit=omit)
2237 ensure_visible(opening_bracket)
2238 ensure_visible(closing_bracket)
2239 body.should_explode = should_explode(body, opening_bracket)
2240 for result in (head, body, tail):
2245 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
2246 """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
2248 Do nothing otherwise.
2250 A left- or right-hand split is based on a pair of brackets. Content before
2251 (and including) the opening bracket is left on one line, content inside the
2252 brackets is put on a separate line, and finally content starting with and
2253 following the closing bracket is put on a separate line.
2255 Those are called `head`, `body`, and `tail`, respectively. If the split
2256 produced the same line (all content in `head`) or ended up with an empty `body`
2257 and the `tail` is just the closing bracket, then it's considered failed.
2259 tail_len = len(str(tail).strip())
2262 raise CannotSplit("Splitting brackets produced the same line")
2266 f"Splitting brackets on an empty body to save "
2267 f"{tail_len} characters is not worth it"
2271 def dont_increase_indentation(split_func: SplitFunc) -> SplitFunc:
2272 """Normalize prefix of the first leaf in every line returned by `split_func`.
2274 This is a decorator over relevant split functions.
2278 def split_wrapper(line: Line, py36: bool = False) -> Iterator[Line]:
2279 for l in split_func(line, py36):
2280 normalize_prefix(l.leaves[0], inside_brackets=True)
2283 return split_wrapper
2286 @dont_increase_indentation
2287 def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]:
2288 """Split according to delimiters of the highest priority.
2290 If `py36` is True, the split will add trailing commas also in function
2291 signatures that contain `*` and `**`.
2294 last_leaf = line.leaves[-1]
2296 raise CannotSplit("Line empty")
2298 bt = line.bracket_tracker
2300 delimiter_priority = bt.max_delimiter_priority(exclude={id(last_leaf)})
2302 raise CannotSplit("No delimiters found")
2304 if delimiter_priority == DOT_PRIORITY:
2305 if bt.delimiter_count_with_priority(delimiter_priority) == 1:
2306 raise CannotSplit("Splitting a single attribute from its owner looks wrong")
2308 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2309 lowest_depth = sys.maxsize
2310 trailing_comma_safe = True
2312 def append_to_line(leaf: Leaf) -> Iterator[Line]:
2313 """Append `leaf` to current line or to new line if appending impossible."""
2314 nonlocal current_line
2316 current_line.append_safe(leaf, preformatted=True)
2317 except ValueError as ve:
2320 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2321 current_line.append(leaf)
2323 for index, leaf in enumerate(line.leaves):
2324 yield from append_to_line(leaf)
2326 for comment_after in line.comments_after(leaf, index):
2327 yield from append_to_line(comment_after)
2329 lowest_depth = min(lowest_depth, leaf.bracket_depth)
2330 if leaf.bracket_depth == lowest_depth and is_vararg(
2331 leaf, within=VARARGS_PARENTS
2333 trailing_comma_safe = trailing_comma_safe and py36
2334 leaf_priority = bt.delimiters.get(id(leaf))
2335 if leaf_priority == delimiter_priority:
2338 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2342 and delimiter_priority == COMMA_PRIORITY
2343 and current_line.leaves[-1].type != token.COMMA
2344 and current_line.leaves[-1].type != STANDALONE_COMMENT
2346 current_line.append(Leaf(token.COMMA, ","))
2350 @dont_increase_indentation
2351 def standalone_comment_split(line: Line, py36: bool = False) -> Iterator[Line]:
2352 """Split standalone comments from the rest of the line."""
2353 if not line.contains_standalone_comments(0):
2354 raise CannotSplit("Line does not have any standalone comments")
2356 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2358 def append_to_line(leaf: Leaf) -> Iterator[Line]:
2359 """Append `leaf` to current line or to new line if appending impossible."""
2360 nonlocal current_line
2362 current_line.append_safe(leaf, preformatted=True)
2363 except ValueError as ve:
2366 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2367 current_line.append(leaf)
2369 for index, leaf in enumerate(line.leaves):
2370 yield from append_to_line(leaf)
2372 for comment_after in line.comments_after(leaf, index):
2373 yield from append_to_line(comment_after)
2379 def is_import(leaf: Leaf) -> bool:
2380 """Return True if the given leaf starts an import statement."""
2387 (v == "import" and p and p.type == syms.import_name)
2388 or (v == "from" and p and p.type == syms.import_from)
2393 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
2394 """Leave existing extra newlines if not `inside_brackets`. Remove everything
2397 Note: don't use backslashes for formatting or you'll lose your voting rights.
2399 if not inside_brackets:
2400 spl = leaf.prefix.split("#")
2401 if "\\" not in spl[0]:
2402 nl_count = spl[-1].count("\n")
2405 leaf.prefix = "\n" * nl_count
2411 def normalize_string_prefix(leaf: Leaf, remove_u_prefix: bool = False) -> None:
2412 """Make all string prefixes lowercase.
2414 If remove_u_prefix is given, also removes any u prefix from the string.
2416 Note: Mutates its argument.
2418 match = re.match(r"^([furbFURB]*)(.*)$", leaf.value, re.DOTALL)
2419 assert match is not None, f"failed to match string {leaf.value!r}"
2420 orig_prefix = match.group(1)
2421 new_prefix = orig_prefix.lower()
2423 new_prefix = new_prefix.replace("u", "")
2424 leaf.value = f"{new_prefix}{match.group(2)}"
2427 def normalize_string_quotes(leaf: Leaf) -> None:
2428 """Prefer double quotes but only if it doesn't cause more escaping.
2430 Adds or removes backslashes as appropriate. Doesn't parse and fix
2431 strings nested in f-strings (yet).
2433 Note: Mutates its argument.
2435 value = leaf.value.lstrip("furbFURB")
2436 if value[:3] == '"""':
2439 elif value[:3] == "'''":
2442 elif value[0] == '"':
2448 first_quote_pos = leaf.value.find(orig_quote)
2449 if first_quote_pos == -1:
2450 return # There's an internal error
2452 prefix = leaf.value[:first_quote_pos]
2453 unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
2454 escaped_new_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{new_quote}")
2455 escaped_orig_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{orig_quote}")
2456 body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
2457 if "r" in prefix.casefold():
2458 if unescaped_new_quote.search(body):
2459 # There's at least one unescaped new_quote in this raw string
2460 # so converting is impossible
2463 # Do not introduce or remove backslashes in raw strings
2466 # remove unnecessary quotes
2467 new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
2468 if body != new_body:
2469 # Consider the string without unnecessary quotes as the original
2471 leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
2472 new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
2473 new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
2474 if new_quote == '"""' and new_body[-1] == '"':
2476 new_body = new_body[:-1] + '\\"'
2477 orig_escape_count = body.count("\\")
2478 new_escape_count = new_body.count("\\")
2479 if new_escape_count > orig_escape_count:
2480 return # Do not introduce more escaping
2482 if new_escape_count == orig_escape_count and orig_quote == '"':
2483 return # Prefer double quotes
2485 leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
2488 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
2489 """Make existing optional parentheses invisible or create new ones.
2491 `parens_after` is a set of string leaf values immeditely after which parens
2494 Standardizes on visible parentheses for single-element tuples, and keeps
2495 existing visible parentheses for other tuples and generator expressions.
2498 list(generate_comments(node))
2500 return # This `node` has a prefix with `# fmt: off`, don't mess with parens.
2503 for index, child in enumerate(list(node.children)):
2505 if child.type == syms.atom:
2506 maybe_make_parens_invisible_in_atom(child)
2507 elif is_one_tuple(child):
2508 # wrap child in visible parentheses
2509 lpar = Leaf(token.LPAR, "(")
2510 rpar = Leaf(token.RPAR, ")")
2512 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2513 elif node.type == syms.import_from:
2514 # "import from" nodes store parentheses directly as part of
2516 if child.type == token.LPAR:
2517 # make parentheses invisible
2518 child.value = "" # type: ignore
2519 node.children[-1].value = "" # type: ignore
2520 elif child.type != token.STAR:
2521 # insert invisible parentheses
2522 node.insert_child(index, Leaf(token.LPAR, ""))
2523 node.append_child(Leaf(token.RPAR, ""))
2526 elif not (isinstance(child, Leaf) and is_multiline_string(child)):
2527 # wrap child in invisible parentheses
2528 lpar = Leaf(token.LPAR, "")
2529 rpar = Leaf(token.RPAR, "")
2530 index = child.remove() or 0
2531 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2533 check_lpar = isinstance(child, Leaf) and child.value in parens_after
2536 def maybe_make_parens_invisible_in_atom(node: LN) -> bool:
2537 """If it's safe, make the parens in the atom `node` invisible, recusively."""
2539 node.type != syms.atom
2540 or is_empty_tuple(node)
2541 or is_one_tuple(node)
2543 or max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
2547 first = node.children[0]
2548 last = node.children[-1]
2549 if first.type == token.LPAR and last.type == token.RPAR:
2550 # make parentheses invisible
2551 first.value = "" # type: ignore
2552 last.value = "" # type: ignore
2553 if len(node.children) > 1:
2554 maybe_make_parens_invisible_in_atom(node.children[1])
2560 def is_empty_tuple(node: LN) -> bool:
2561 """Return True if `node` holds an empty tuple."""
2563 node.type == syms.atom
2564 and len(node.children) == 2
2565 and node.children[0].type == token.LPAR
2566 and node.children[1].type == token.RPAR
2570 def is_one_tuple(node: LN) -> bool:
2571 """Return True if `node` holds a tuple with one element, with or without parens."""
2572 if node.type == syms.atom:
2573 if len(node.children) != 3:
2576 lpar, gexp, rpar = node.children
2578 lpar.type == token.LPAR
2579 and gexp.type == syms.testlist_gexp
2580 and rpar.type == token.RPAR
2584 return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
2587 node.type in IMPLICIT_TUPLE
2588 and len(node.children) == 2
2589 and node.children[1].type == token.COMMA
2593 def is_yield(node: LN) -> bool:
2594 """Return True if `node` holds a `yield` or `yield from` expression."""
2595 if node.type == syms.yield_expr:
2598 if node.type == token.NAME and node.value == "yield": # type: ignore
2601 if node.type != syms.atom:
2604 if len(node.children) != 3:
2607 lpar, expr, rpar = node.children
2608 if lpar.type == token.LPAR and rpar.type == token.RPAR:
2609 return is_yield(expr)
2614 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
2615 """Return True if `leaf` is a star or double star in a vararg or kwarg.
2617 If `within` includes VARARGS_PARENTS, this applies to function signatures.
2618 If `within` includes UNPACKING_PARENTS, it applies to right hand-side
2619 extended iterable unpacking (PEP 3132) and additional unpacking
2620 generalizations (PEP 448).
2622 if leaf.type not in STARS or not leaf.parent:
2626 if p.type == syms.star_expr:
2627 # Star expressions are also used as assignment targets in extended
2628 # iterable unpacking (PEP 3132). See what its parent is instead.
2634 return p.type in within
2637 def is_multiline_string(leaf: Leaf) -> bool:
2638 """Return True if `leaf` is a multiline string that actually spans many lines."""
2639 value = leaf.value.lstrip("furbFURB")
2640 return value[:3] in {'"""', "'''"} and "\n" in value
2643 def is_stub_suite(node: Node) -> bool:
2644 """Return True if `node` is a suite with a stub body."""
2646 len(node.children) != 4
2647 or node.children[0].type != token.NEWLINE
2648 or node.children[1].type != token.INDENT
2649 or node.children[3].type != token.DEDENT
2653 return is_stub_body(node.children[2])
2656 def is_stub_body(node: LN) -> bool:
2657 """Return True if `node` is a simple statement containing an ellipsis."""
2658 if not isinstance(node, Node) or node.type != syms.simple_stmt:
2661 if len(node.children) != 2:
2664 child = node.children[0]
2666 child.type == syms.atom
2667 and len(child.children) == 3
2668 and all(leaf == Leaf(token.DOT, ".") for leaf in child.children)
2672 def max_delimiter_priority_in_atom(node: LN) -> int:
2673 """Return maximum delimiter priority inside `node`.
2675 This is specific to atoms with contents contained in a pair of parentheses.
2676 If `node` isn't an atom or there are no enclosing parentheses, returns 0.
2678 if node.type != syms.atom:
2681 first = node.children[0]
2682 last = node.children[-1]
2683 if not (first.type == token.LPAR and last.type == token.RPAR):
2686 bt = BracketTracker()
2687 for c in node.children[1:-1]:
2688 if isinstance(c, Leaf):
2691 for leaf in c.leaves():
2694 return bt.max_delimiter_priority()
2700 def ensure_visible(leaf: Leaf) -> None:
2701 """Make sure parentheses are visible.
2703 They could be invisible as part of some statements (see
2704 :func:`normalize_invible_parens` and :func:`visit_import_from`).
2706 if leaf.type == token.LPAR:
2708 elif leaf.type == token.RPAR:
2712 def should_explode(line: Line, opening_bracket: Leaf) -> bool:
2713 """Should `line` immediately be split with `delimiter_split()` after RHS?"""
2715 opening_bracket.parent
2716 and opening_bracket.parent.type in {syms.atom, syms.import_from}
2717 and opening_bracket.value in "[{("
2722 last_leaf = line.leaves[-1]
2723 exclude = {id(last_leaf)} if last_leaf.type == token.COMMA else set()
2724 max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude)
2725 except (IndexError, ValueError):
2728 return max_priority == COMMA_PRIORITY
2731 def is_python36(node: Node) -> bool:
2732 """Return True if the current file is using Python 3.6+ features.
2734 Currently looking for:
2736 - trailing commas after * or ** in function signatures and calls.
2738 for n in node.pre_order():
2739 if n.type == token.STRING:
2740 value_head = n.value[:2] # type: ignore
2741 if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
2745 n.type in {syms.typedargslist, syms.arglist}
2747 and n.children[-1].type == token.COMMA
2749 for ch in n.children:
2750 if ch.type in STARS:
2753 if ch.type == syms.argument:
2754 for argch in ch.children:
2755 if argch.type in STARS:
2761 def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[LeafID]]:
2762 """Generate sets of closing bracket IDs that should be omitted in a RHS.
2764 Brackets can be omitted if the entire trailer up to and including
2765 a preceding closing bracket fits in one line.
2767 Yielded sets are cumulative (contain results of previous yields, too). First
2771 omit: Set[LeafID] = set()
2774 length = 4 * line.depth
2775 opening_bracket = None
2776 closing_bracket = None
2777 optional_brackets: Set[LeafID] = set()
2778 inner_brackets: Set[LeafID] = set()
2779 for index, leaf, leaf_length in enumerate_with_length(line, reversed=True):
2780 length += leaf_length
2781 if length > line_length:
2784 has_inline_comment = leaf_length > len(leaf.value) + len(leaf.prefix)
2785 if leaf.type == STANDALONE_COMMENT or has_inline_comment:
2788 optional_brackets.discard(id(leaf))
2790 if leaf is opening_bracket:
2791 opening_bracket = None
2792 elif leaf.type in CLOSING_BRACKETS:
2793 inner_brackets.add(id(leaf))
2794 elif leaf.type in CLOSING_BRACKETS:
2796 optional_brackets.add(id(opening_bracket))
2799 if index > 0 and line.leaves[index - 1].type in OPENING_BRACKETS:
2800 # Empty brackets would fail a split so treat them as "inner"
2801 # brackets (e.g. only add them to the `omit` set if another
2802 # pair of brackets was good enough.
2803 inner_brackets.add(id(leaf))
2806 opening_bracket = leaf.opening_bracket
2808 omit.add(id(closing_bracket))
2809 omit.update(inner_brackets)
2810 inner_brackets.clear()
2812 closing_bracket = leaf
2815 def get_future_imports(node: Node) -> Set[str]:
2816 """Return a set of __future__ imports in the file."""
2818 for child in node.children:
2819 if child.type != syms.simple_stmt:
2821 first_child = child.children[0]
2822 if isinstance(first_child, Leaf):
2823 # Continue looking if we see a docstring; otherwise stop.
2825 len(child.children) == 2
2826 and first_child.type == token.STRING
2827 and child.children[1].type == token.NEWLINE
2832 elif first_child.type == syms.import_from:
2833 module_name = first_child.children[1]
2834 if not isinstance(module_name, Leaf) or module_name.value != "__future__":
2836 for import_from_child in first_child.children[3:]:
2837 if isinstance(import_from_child, Leaf):
2838 if import_from_child.type == token.NAME:
2839 imports.add(import_from_child.value)
2841 assert import_from_child.type == syms.import_as_names
2842 for leaf in import_from_child.children:
2843 if isinstance(leaf, Leaf) and leaf.type == token.NAME:
2844 imports.add(leaf.value)
2850 def gen_python_files_in_dir(
2853 include: Pattern[str],
2854 exclude: Pattern[str],
2856 ) -> Iterator[Path]:
2857 """Generate all files under `path` whose paths are not excluded by the
2858 `exclude` regex, but are included by the `include` regex.
2860 `report` is where output about exclusions goes.
2862 assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
2863 for child in path.iterdir():
2864 normalized_path = "/" + child.resolve().relative_to(root).as_posix()
2866 normalized_path += "/"
2867 exclude_match = exclude.search(normalized_path)
2868 if exclude_match and exclude_match.group(0):
2869 report.path_ignored(child, f"matches --exclude={exclude.pattern}")
2873 yield from gen_python_files_in_dir(child, root, include, exclude, report)
2875 elif child.is_file():
2876 include_match = include.search(normalized_path)
2881 def find_project_root(srcs: List[str]) -> Path:
2882 """Return a directory containing .git, .hg, or pyproject.toml.
2884 That directory can be one of the directories passed in `srcs` or their
2887 If no directory in the tree contains a marker that would specify it's the
2888 project root, the root of the file system is returned.
2891 return Path("/").resolve()
2893 common_base = min(Path(src).resolve() for src in srcs)
2894 if common_base.is_dir():
2895 # Append a fake file so `parents` below returns `common_base_dir`, too.
2896 common_base /= "fake-file"
2897 for directory in common_base.parents:
2898 if (directory / ".git").is_dir():
2901 if (directory / ".hg").is_dir():
2904 if (directory / "pyproject.toml").is_file():
2912 """Provides a reformatting counter. Can be rendered with `str(report)`."""
2916 verbose: bool = False
2917 change_count: int = 0
2919 failure_count: int = 0
2921 def done(self, src: Path, changed: Changed) -> None:
2922 """Increment the counter for successful reformatting. Write out a message."""
2923 if changed is Changed.YES:
2924 reformatted = "would reformat" if self.check else "reformatted"
2925 if self.verbose or not self.quiet:
2926 out(f"{reformatted} {src}")
2927 self.change_count += 1
2930 if changed is Changed.NO:
2931 msg = f"{src} already well formatted, good job."
2933 msg = f"{src} wasn't modified on disk since last run."
2934 out(msg, bold=False)
2935 self.same_count += 1
2937 def failed(self, src: Path, message: str) -> None:
2938 """Increment the counter for failed reformatting. Write out a message."""
2939 err(f"error: cannot format {src}: {message}")
2940 self.failure_count += 1
2942 def path_ignored(self, path: Path, message: str) -> None:
2944 out(f"{path} ignored: {message}", bold=False)
2947 def return_code(self) -> int:
2948 """Return the exit code that the app should use.
2950 This considers the current state of changed files and failures:
2951 - if there were any failures, return 123;
2952 - if any files were changed and --check is being used, return 1;
2953 - otherwise return 0.
2955 # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
2956 # 126 we have special returncodes reserved by the shell.
2957 if self.failure_count:
2960 elif self.change_count and self.check:
2965 def __str__(self) -> str:
2966 """Render a color report of the current state.
2968 Use `click.unstyle` to remove colors.
2971 reformatted = "would be reformatted"
2972 unchanged = "would be left unchanged"
2973 failed = "would fail to reformat"
2975 reformatted = "reformatted"
2976 unchanged = "left unchanged"
2977 failed = "failed to reformat"
2979 if self.change_count:
2980 s = "s" if self.change_count > 1 else ""
2982 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
2985 s = "s" if self.same_count > 1 else ""
2986 report.append(f"{self.same_count} file{s} {unchanged}")
2987 if self.failure_count:
2988 s = "s" if self.failure_count > 1 else ""
2990 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
2992 return ", ".join(report) + "."
2995 def assert_equivalent(src: str, dst: str) -> None:
2996 """Raise AssertionError if `src` and `dst` aren't equivalent."""
3001 def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
3002 """Simple visitor generating strings to compare ASTs by content."""
3003 yield f"{' ' * depth}{node.__class__.__name__}("
3005 for field in sorted(node._fields):
3007 value = getattr(node, field)
3008 except AttributeError:
3011 yield f"{' ' * (depth+1)}{field}="
3013 if isinstance(value, list):
3015 if isinstance(item, ast.AST):
3016 yield from _v(item, depth + 2)
3018 elif isinstance(value, ast.AST):
3019 yield from _v(value, depth + 2)
3022 yield f"{' ' * (depth+2)}{value!r}, # {value.__class__.__name__}"
3024 yield f"{' ' * depth}) # /{node.__class__.__name__}"
3027 src_ast = ast.parse(src)
3028 except Exception as exc:
3029 major, minor = sys.version_info[:2]
3030 raise AssertionError(
3031 f"cannot use --safe with this file; failed to parse source file "
3032 f"with Python {major}.{minor}'s builtin AST. Re-run with --fast "
3033 f"or stop using deprecated Python 2 syntax. AST error message: {exc}"
3037 dst_ast = ast.parse(dst)
3038 except Exception as exc:
3039 log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
3040 raise AssertionError(
3041 f"INTERNAL ERROR: Black produced invalid code: {exc}. "
3042 f"Please report a bug on https://github.com/ambv/black/issues. "
3043 f"This invalid output might be helpful: {log}"
3046 src_ast_str = "\n".join(_v(src_ast))
3047 dst_ast_str = "\n".join(_v(dst_ast))
3048 if src_ast_str != dst_ast_str:
3049 log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
3050 raise AssertionError(
3051 f"INTERNAL ERROR: Black produced code that is not equivalent to "
3053 f"Please report a bug on https://github.com/ambv/black/issues. "
3054 f"This diff might be helpful: {log}"
3059 src: str, dst: str, line_length: int, mode: FileMode = FileMode.AUTO_DETECT
3061 """Raise AssertionError if `dst` reformats differently the second time."""
3062 newdst = format_str(dst, line_length=line_length, mode=mode)
3065 diff(src, dst, "source", "first pass"),
3066 diff(dst, newdst, "first pass", "second pass"),
3068 raise AssertionError(
3069 f"INTERNAL ERROR: Black produced different code on the second pass "
3070 f"of the formatter. "
3071 f"Please report a bug on https://github.com/ambv/black/issues. "
3072 f"This diff might be helpful: {log}"
3076 def dump_to_file(*output: str) -> str:
3077 """Dump `output` to a temporary file. Return path to the file."""
3080 with tempfile.NamedTemporaryFile(
3081 mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
3083 for lines in output:
3085 if lines and lines[-1] != "\n":
3090 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
3091 """Return a unified diff string between strings `a` and `b`."""
3094 a_lines = [line + "\n" for line in a.split("\n")]
3095 b_lines = [line + "\n" for line in b.split("\n")]
3097 difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
3101 def cancel(tasks: Iterable[asyncio.Task]) -> None:
3102 """asyncio signal handler that cancels all `tasks` and reports to stderr."""
3108 def shutdown(loop: BaseEventLoop) -> None:
3109 """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
3111 # This part is borrowed from asyncio/runners.py in Python 3.7b2.
3112 to_cancel = [task for task in asyncio.Task.all_tasks(loop) if not task.done()]
3116 for task in to_cancel:
3118 loop.run_until_complete(
3119 asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
3122 # `concurrent.futures.Future` objects cannot be cancelled once they
3123 # are already running. There might be some when the `shutdown()` happened.
3124 # Silence their logger's spew about the event loop being closed.
3125 cf_logger = logging.getLogger("concurrent.futures")
3126 cf_logger.setLevel(logging.CRITICAL)
3130 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
3131 """Replace `regex` with `replacement` twice on `original`.
3133 This is used by string normalization to perform replaces on
3134 overlapping matches.
3136 return regex.sub(replacement, regex.sub(replacement, original))
3139 def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]:
3140 """Like `reversed(enumerate(sequence))` if that were possible."""
3141 index = len(sequence) - 1
3142 for element in reversed(sequence):
3143 yield (index, element)
3147 def enumerate_with_length(
3148 line: Line, reversed: bool = False
3149 ) -> Iterator[Tuple[Index, Leaf, int]]:
3150 """Return an enumeration of leaves with their length.
3152 Stops prematurely on multiline strings and standalone comments.
3155 Callable[[Sequence[Leaf]], Iterator[Tuple[Index, Leaf]]],
3156 enumerate_reversed if reversed else enumerate,
3158 for index, leaf in op(line.leaves):
3159 length = len(leaf.prefix) + len(leaf.value)
3160 if "\n" in leaf.value:
3161 return # Multiline strings, we can't continue.
3163 comment: Optional[Leaf]
3164 for comment in line.comments_after(leaf, index):
3165 length += len(comment.value)
3167 yield index, leaf, length
3170 def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
3171 """Return True if `line` is no longer than `line_length`.
3173 Uses the provided `line_str` rendering, if any, otherwise computes a new one.
3176 line_str = str(line).strip("\n")
3178 len(line_str) <= line_length
3179 and "\n" not in line_str # multiline strings
3180 and not line.contains_standalone_comments()
3184 def can_omit_invisible_parens(line: Line, line_length: int) -> bool:
3185 """Does `line` have a shape safe to reformat without optional parens around it?
3187 Returns True for only a subset of potentially nice looking formattings but
3188 the point is to not return false positives that end up producing lines that
3191 bt = line.bracket_tracker
3192 if not bt.delimiters:
3193 # Without delimiters the optional parentheses are useless.
3196 max_priority = bt.max_delimiter_priority()
3197 if bt.delimiter_count_with_priority(max_priority) > 1:
3198 # With more than one delimiter of a kind the optional parentheses read better.
3201 if max_priority == DOT_PRIORITY:
3202 # A single stranded method call doesn't require optional parentheses.
3205 assert len(line.leaves) >= 2, "Stranded delimiter"
3207 first = line.leaves[0]
3208 second = line.leaves[1]
3209 penultimate = line.leaves[-2]
3210 last = line.leaves[-1]
3212 # With a single delimiter, omit if the expression starts or ends with
3214 if first.type in OPENING_BRACKETS and second.type not in CLOSING_BRACKETS:
3216 length = 4 * line.depth
3217 for _index, leaf, leaf_length in enumerate_with_length(line):
3218 if leaf.type in CLOSING_BRACKETS and leaf.opening_bracket is first:
3221 length += leaf_length
3222 if length > line_length:
3225 if leaf.type in OPENING_BRACKETS:
3226 # There are brackets we can further split on.
3230 # checked the entire string and line length wasn't exceeded
3231 if len(line.leaves) == _index + 1:
3234 # Note: we are not returning False here because a line might have *both*
3235 # a leading opening bracket and a trailing closing bracket. If the
3236 # opening bracket doesn't match our rule, maybe the closing will.
3239 last.type == token.RPAR
3240 or last.type == token.RBRACE
3242 # don't use indexing for omitting optional parentheses;
3244 last.type == token.RSQB
3246 and last.parent.type != syms.trailer
3249 if penultimate.type in OPENING_BRACKETS:
3250 # Empty brackets don't help.
3253 if is_multiline_string(first):
3254 # Additional wrapping of a multiline string in this situation is
3258 length = 4 * line.depth
3259 seen_other_brackets = False
3260 for _index, leaf, leaf_length in enumerate_with_length(line):
3261 length += leaf_length
3262 if leaf is last.opening_bracket:
3263 if seen_other_brackets or length <= line_length:
3266 elif leaf.type in OPENING_BRACKETS:
3267 # There are brackets we can further split on.
3268 seen_other_brackets = True
3273 def get_cache_file(line_length: int, mode: FileMode) -> Path:
3274 pyi = bool(mode & FileMode.PYI)
3275 py36 = bool(mode & FileMode.PYTHON36)
3278 / f"cache.{line_length}{'.pyi' if pyi else ''}{'.py36' if py36 else ''}.pickle"
3282 def read_cache(line_length: int, mode: FileMode) -> Cache:
3283 """Read the cache if it exists and is well formed.
3285 If it is not well formed, the call to write_cache later should resolve the issue.
3287 cache_file = get_cache_file(line_length, mode)
3288 if not cache_file.exists():
3291 with cache_file.open("rb") as fobj:
3293 cache: Cache = pickle.load(fobj)
3294 except pickle.UnpicklingError:
3300 def get_cache_info(path: Path) -> CacheInfo:
3301 """Return the information used to check if a file is already formatted or not."""
3303 return stat.st_mtime, stat.st_size
3307 cache: Cache, sources: Iterable[Path]
3308 ) -> Tuple[List[Path], List[Path]]:
3309 """Split a list of paths into two.
3311 The first list contains paths of files that modified on disk or are not in the
3312 cache. The other list contains paths to non-modified files.
3317 if cache.get(src) != get_cache_info(src):
3325 cache: Cache, sources: List[Path], line_length: int, mode: FileMode
3327 """Update the cache file."""
3328 cache_file = get_cache_file(line_length, mode)
3330 if not CACHE_DIR.exists():
3331 CACHE_DIR.mkdir(parents=True)
3332 new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
3333 with cache_file.open("wb") as fobj:
3334 pickle.dump(new_cache, fobj, protocol=pickle.HIGHEST_PROTOCOL)
3339 if __name__ == "__main__":