All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
2 from asyncio.base_events import BaseEventLoop
3 from concurrent.futures import Executor, ProcessPoolExecutor
4 from datetime import datetime
6 from functools import lru_cache, partial, wraps
10 from multiprocessing import Manager, freeze_support
12 from pathlib import Path
40 from appdirs import user_cache_dir
41 from attr import dataclass, evolve, Factory
44 from typed_ast import ast3, ast27
47 from blib2to3.pytree import Node, Leaf, type_repr
48 from blib2to3 import pygram, pytree
49 from blib2to3.pgen2 import driver, token
50 from blib2to3.pgen2.grammar import Grammar
51 from blib2to3.pgen2.parse import ParseError
54 __version__ = "19.3b0"
55 DEFAULT_LINE_LENGTH = 88
57 r"/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|_build|buck-out|build|dist)/"
59 DEFAULT_INCLUDES = r"\.pyi?$"
60 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
72 LN = Union[Leaf, Node]
73 SplitFunc = Callable[["Line", Collection["Feature"]], Iterator["Line"]]
76 CacheInfo = Tuple[Timestamp, FileSize]
77 Cache = Dict[Path, CacheInfo]
78 out = partial(click.secho, bold=True, err=True)
79 err = partial(click.secho, fg="red", err=True)
81 pygram.initialize(CACHE_DIR)
82 syms = pygram.python_symbols
85 class NothingChanged(UserWarning):
86 """Raised when reformatted code is the same as source."""
89 class CannotSplit(Exception):
90 """A readable split that fits the allotted line length is impossible."""
93 class InvalidInput(ValueError):
94 """Raised when input source code fails all parse attempts."""
97 class WriteBack(Enum):
104 def from_configuration(cls, *, check: bool, diff: bool) -> "WriteBack":
105 if check and not diff:
108 return cls.DIFF if diff else cls.YES
117 class TargetVersion(Enum):
126 def is_python2(self) -> bool:
127 return self is TargetVersion.PY27
130 PY36_VERSIONS = {TargetVersion.PY36, TargetVersion.PY37, TargetVersion.PY38}
134 # All string literals are unicode
137 NUMERIC_UNDERSCORES = 3
138 TRAILING_COMMA_IN_CALL = 4
139 TRAILING_COMMA_IN_DEF = 5
140 # The following two feature-flags are mutually exclusive, and exactly one should be
141 # set for every version of python.
142 ASYNC_IDENTIFIERS = 6
146 VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = {
147 TargetVersion.PY27: {Feature.ASYNC_IDENTIFIERS},
148 TargetVersion.PY33: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS},
149 TargetVersion.PY34: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS},
150 TargetVersion.PY35: {
151 Feature.UNICODE_LITERALS,
152 Feature.TRAILING_COMMA_IN_CALL,
153 Feature.ASYNC_IDENTIFIERS,
155 TargetVersion.PY36: {
156 Feature.UNICODE_LITERALS,
158 Feature.NUMERIC_UNDERSCORES,
159 Feature.TRAILING_COMMA_IN_CALL,
160 Feature.TRAILING_COMMA_IN_DEF,
161 Feature.ASYNC_IDENTIFIERS,
163 TargetVersion.PY37: {
164 Feature.UNICODE_LITERALS,
166 Feature.NUMERIC_UNDERSCORES,
167 Feature.TRAILING_COMMA_IN_CALL,
168 Feature.TRAILING_COMMA_IN_DEF,
169 Feature.ASYNC_KEYWORDS,
171 TargetVersion.PY38: {
172 Feature.UNICODE_LITERALS,
174 Feature.NUMERIC_UNDERSCORES,
175 Feature.TRAILING_COMMA_IN_CALL,
176 Feature.TRAILING_COMMA_IN_DEF,
177 Feature.ASYNC_KEYWORDS,
184 target_versions: Set[TargetVersion] = Factory(set)
185 line_length: int = DEFAULT_LINE_LENGTH
186 string_normalization: bool = True
189 def get_cache_key(self) -> str:
190 if self.target_versions:
191 version_str = ",".join(
193 for version in sorted(self.target_versions, key=lambda v: v.value)
199 str(self.line_length),
200 str(int(self.string_normalization)),
201 str(int(self.is_pyi)),
203 return ".".join(parts)
206 def supports_feature(target_versions: Set[TargetVersion], feature: Feature) -> bool:
207 return all(feature in VERSION_TO_FEATURES[version] for version in target_versions)
210 def read_pyproject_toml(
211 ctx: click.Context, param: click.Parameter, value: Union[str, int, bool, None]
213 """Inject Black configuration from "pyproject.toml" into defaults in `ctx`.
215 Returns the path to a successfully found and read configuration file, None
218 assert not isinstance(value, (int, bool)), "Invalid parameter type passed"
220 root = find_project_root(ctx.params.get("src", ()))
221 path = root / "pyproject.toml"
228 pyproject_toml = toml.load(value)
229 config = pyproject_toml.get("tool", {}).get("black", {})
230 except (toml.TomlDecodeError, OSError) as e:
231 raise click.FileError(
232 filename=value, hint=f"Error reading configuration file: {e}"
238 if ctx.default_map is None:
240 ctx.default_map.update( # type: ignore # bad types in .pyi
241 {k.replace("--", "").replace("-", "_"): v for k, v in config.items()}
246 @click.command(context_settings=dict(help_option_names=["-h", "--help"]))
247 @click.option("-c", "--code", type=str, help="Format the code passed in as a string.")
252 default=DEFAULT_LINE_LENGTH,
253 help="How many characters per line to allow.",
259 type=click.Choice([v.name.lower() for v in TargetVersion]),
260 callback=lambda c, p, v: [TargetVersion[val.upper()] for val in v],
263 "Python versions that should be supported by Black's output. [default: "
264 "per-file auto-detection]"
271 "Allow using Python 3.6-only syntax on all input files. This will put "
272 "trailing commas in function signatures and calls also after *args and "
273 "**kwargs. Deprecated; use --target-version instead. "
274 "[default: per-file auto-detection]"
281 "Format all input files like typing stubs regardless of file extension "
282 "(useful when piping source on standard input)."
287 "--skip-string-normalization",
289 help="Don't normalize string quotes or prefixes.",
295 "Don't write the files back, just return the status. Return code 0 "
296 "means nothing would change. Return code 1 means some files would be "
297 "reformatted. Return code 123 means there was an internal error."
303 help="Don't write the files back, just output a diff for each file on stdout.",
308 help="If --fast given, skip temporary sanity checks. [default: --safe]",
313 default=DEFAULT_INCLUDES,
315 "A regular expression that matches files and directories that should be "
316 "included on recursive searches. An empty value means all files are "
317 "included regardless of the name. Use forward slashes for directories on "
318 "all platforms (Windows, too). Exclusions are calculated first, inclusions "
326 default=DEFAULT_EXCLUDES,
328 "A regular expression that matches files and directories that should be "
329 "excluded on recursive searches. An empty value means no paths are excluded. "
330 "Use forward slashes for directories on all platforms (Windows, too). "
331 "Exclusions are calculated first, inclusions later."
340 "Don't emit non-error messages to stderr. Errors are still emitted; "
341 "silence those with 2>/dev/null."
349 "Also emit messages to stderr about files that were not changed or were "
350 "ignored due to --exclude=."
353 @click.version_option(version=__version__)
358 exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
365 exists=False, file_okay=True, dir_okay=False, readable=True, allow_dash=False
368 callback=read_pyproject_toml,
369 help="Read configuration from PATH.",
376 target_version: List[TargetVersion],
382 skip_string_normalization: bool,
388 config: Optional[str],
390 """The uncompromising code formatter."""
391 write_back = WriteBack.from_configuration(check=check, diff=diff)
394 err(f"Cannot use both --target-version and --py36")
397 versions = set(target_version)
400 "--py36 is deprecated and will be removed in a future version. "
401 "Use --target-version py36 instead."
403 versions = PY36_VERSIONS
405 # We'll autodetect later.
408 target_versions=versions,
409 line_length=line_length,
411 string_normalization=not skip_string_normalization,
413 if config and verbose:
414 out(f"Using configuration from {config}.", bold=False, fg="blue")
416 print(format_str(code, mode=mode))
419 include_regex = re_compile_maybe_verbose(include)
421 err(f"Invalid regular expression for include given: {include!r}")
424 exclude_regex = re_compile_maybe_verbose(exclude)
426 err(f"Invalid regular expression for exclude given: {exclude!r}")
428 report = Report(check=check, quiet=quiet, verbose=verbose)
429 root = find_project_root(src)
430 sources: Set[Path] = set()
435 gen_python_files_in_dir(p, root, include_regex, exclude_regex, report)
437 elif p.is_file() or s == "-":
438 # if a file was explicitly given, we don't care about its extension
441 err(f"invalid path: {s}")
442 if len(sources) == 0:
443 if verbose or not quiet:
444 out("No paths given. Nothing to do 😴")
447 if len(sources) == 1:
451 write_back=write_back,
457 sources=sources, fast=fast, write_back=write_back, mode=mode, report=report
460 if verbose or not quiet:
461 out("Oh no! 💥 💔 💥" if report.return_code else "All done! ✨ 🍰 ✨")
462 click.secho(str(report), err=True)
463 ctx.exit(report.return_code)
467 src: Path, fast: bool, write_back: WriteBack, mode: FileMode, report: "Report"
469 """Reformat a single file under `src` without spawning child processes.
471 `fast`, `write_back`, and `mode` options are passed to
472 :func:`format_file_in_place` or :func:`format_stdin_to_stdout`.
476 if not src.is_file() and str(src) == "-":
477 if format_stdin_to_stdout(fast=fast, write_back=write_back, mode=mode):
478 changed = Changed.YES
481 if write_back != WriteBack.DIFF:
482 cache = read_cache(mode)
483 res_src = src.resolve()
484 if res_src in cache and cache[res_src] == get_cache_info(res_src):
485 changed = Changed.CACHED
486 if changed is not Changed.CACHED and format_file_in_place(
487 src, fast=fast, write_back=write_back, mode=mode
489 changed = Changed.YES
490 if (write_back is WriteBack.YES and changed is not Changed.CACHED) or (
491 write_back is WriteBack.CHECK and changed is Changed.NO
493 write_cache(cache, [src], mode)
494 report.done(src, changed)
495 except Exception as exc:
496 report.failed(src, str(exc))
502 write_back: WriteBack,
506 """Reformat multiple files using a ProcessPoolExecutor."""
507 loop = asyncio.get_event_loop()
508 worker_count = os.cpu_count()
509 if sys.platform == "win32":
510 # Work around https://bugs.python.org/issue26903
511 worker_count = min(worker_count, 61)
512 executor = ProcessPoolExecutor(max_workers=worker_count)
514 loop.run_until_complete(
518 write_back=write_back,
529 async def schedule_formatting(
532 write_back: WriteBack,
538 """Run formatting of `sources` in parallel using the provided `executor`.
540 (Use ProcessPoolExecutors for actual parallelism.)
542 `line_length`, `write_back`, `fast`, and `pyi` options are passed to
543 :func:`format_file_in_place`.
546 if write_back != WriteBack.DIFF:
547 cache = read_cache(mode)
548 sources, cached = filter_cached(cache, sources)
549 for src in sorted(cached):
550 report.done(src, Changed.CACHED)
555 sources_to_cache = []
557 if write_back == WriteBack.DIFF:
558 # For diff output, we need locks to ensure we don't interleave output
559 # from different processes.
561 lock = manager.Lock()
563 asyncio.ensure_future(
564 loop.run_in_executor(
565 executor, format_file_in_place, src, fast, mode, write_back, lock
568 for src in sorted(sources)
570 pending: Iterable[asyncio.Future] = tasks.keys()
572 loop.add_signal_handler(signal.SIGINT, cancel, pending)
573 loop.add_signal_handler(signal.SIGTERM, cancel, pending)
574 except NotImplementedError:
575 # There are no good alternatives for these on Windows.
578 done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
580 src = tasks.pop(task)
582 cancelled.append(task)
583 elif task.exception():
584 report.failed(src, str(task.exception()))
586 changed = Changed.YES if task.result() else Changed.NO
587 # If the file was written back or was successfully checked as
588 # well-formatted, store this information in the cache.
589 if write_back is WriteBack.YES or (
590 write_back is WriteBack.CHECK and changed is Changed.NO
592 sources_to_cache.append(src)
593 report.done(src, changed)
595 await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
597 write_cache(cache, sources_to_cache, mode)
600 def format_file_in_place(
604 write_back: WriteBack = WriteBack.NO,
605 lock: Any = None, # multiprocessing.Manager().Lock() is some crazy proxy
607 """Format file under `src` path. Return True if changed.
609 If `write_back` is DIFF, write a diff to stdout. If it is YES, write reformatted
611 `mode` and `fast` options are passed to :func:`format_file_contents`.
613 if src.suffix == ".pyi":
614 mode = evolve(mode, is_pyi=True)
616 then = datetime.utcfromtimestamp(src.stat().st_mtime)
617 with open(src, "rb") as buf:
618 src_contents, encoding, newline = decode_bytes(buf.read())
620 dst_contents = format_file_contents(src_contents, fast=fast, mode=mode)
621 except NothingChanged:
624 if write_back == write_back.YES:
625 with open(src, "w", encoding=encoding, newline=newline) as f:
626 f.write(dst_contents)
627 elif write_back == write_back.DIFF:
628 now = datetime.utcnow()
629 src_name = f"{src}\t{then} +0000"
630 dst_name = f"{src}\t{now} +0000"
631 diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
635 f = io.TextIOWrapper(
641 f.write(diff_contents)
649 def format_stdin_to_stdout(
650 fast: bool, *, write_back: WriteBack = WriteBack.NO, mode: FileMode
652 """Format file on stdin. Return True if changed.
654 If `write_back` is YES, write reformatted code back to stdout. If it is DIFF,
655 write a diff to stdout. The `mode` argument is passed to
656 :func:`format_file_contents`.
658 then = datetime.utcnow()
659 src, encoding, newline = decode_bytes(sys.stdin.buffer.read())
662 dst = format_file_contents(src, fast=fast, mode=mode)
665 except NothingChanged:
669 f = io.TextIOWrapper(
670 sys.stdout.buffer, encoding=encoding, newline=newline, write_through=True
672 if write_back == WriteBack.YES:
674 elif write_back == WriteBack.DIFF:
675 now = datetime.utcnow()
676 src_name = f"STDIN\t{then} +0000"
677 dst_name = f"STDOUT\t{now} +0000"
678 f.write(diff(src, dst, src_name, dst_name))
682 def format_file_contents(
683 src_contents: str, *, fast: bool, mode: FileMode
685 """Reformat contents a file and return new contents.
687 If `fast` is False, additionally confirm that the reformatted code is
688 valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
689 `mode` is passed to :func:`format_str`.
691 if src_contents.strip() == "":
694 dst_contents = format_str(src_contents, mode=mode)
695 if src_contents == dst_contents:
699 assert_equivalent(src_contents, dst_contents)
700 assert_stable(src_contents, dst_contents, mode=mode)
704 def format_str(src_contents: str, *, mode: FileMode) -> FileContent:
705 """Reformat a string and return new contents.
707 `mode` determines formatting options, such as how many characters per line are
710 src_node = lib2to3_parse(src_contents.lstrip(), mode.target_versions)
712 future_imports = get_future_imports(src_node)
713 if mode.target_versions:
714 versions = mode.target_versions
716 versions = detect_target_versions(src_node)
717 normalize_fmt_off(src_node)
718 lines = LineGenerator(
719 remove_u_prefix="unicode_literals" in future_imports
720 or supports_feature(versions, Feature.UNICODE_LITERALS),
722 normalize_strings=mode.string_normalization,
724 elt = EmptyLineTracker(is_pyi=mode.is_pyi)
727 split_line_features = {
729 for feature in {Feature.TRAILING_COMMA_IN_CALL, Feature.TRAILING_COMMA_IN_DEF}
730 if supports_feature(versions, feature)
732 for current_line in lines.visit(src_node):
733 for _ in range(after):
734 dst_contents.append(str(empty_line))
735 before, after = elt.maybe_empty_lines(current_line)
736 for _ in range(before):
737 dst_contents.append(str(empty_line))
738 for line in split_line(
739 current_line, line_length=mode.line_length, features=split_line_features
741 dst_contents.append(str(line))
742 return "".join(dst_contents)
745 def decode_bytes(src: bytes) -> Tuple[FileContent, Encoding, NewLine]:
746 """Return a tuple of (decoded_contents, encoding, newline).
748 `newline` is either CRLF or LF but `decoded_contents` is decoded with
749 universal newlines (i.e. only contains LF).
751 srcbuf = io.BytesIO(src)
752 encoding, lines = tokenize.detect_encoding(srcbuf.readline)
754 return "", encoding, "\n"
756 newline = "\r\n" if b"\r\n" == lines[0][-2:] else "\n"
758 with io.TextIOWrapper(srcbuf, encoding) as tiow:
759 return tiow.read(), encoding, newline
762 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
763 if not target_versions:
764 # No target_version specified, so try all grammars.
767 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
769 pygram.python_grammar_no_print_statement_no_exec_statement,
770 # Python 2.7 with future print_function import
771 pygram.python_grammar_no_print_statement,
773 pygram.python_grammar,
775 elif all(version.is_python2() for version in target_versions):
776 # Python 2-only code, so try Python 2 grammars.
778 # Python 2.7 with future print_function import
779 pygram.python_grammar_no_print_statement,
781 pygram.python_grammar,
784 # Python 3-compatible code, so only try Python 3 grammar.
786 # If we have to parse both, try to parse async as a keyword first
787 if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
790 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords # noqa: B950
792 if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
794 grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
795 # At least one of the above branches must have been taken, because every Python
796 # version has exactly one of the two 'ASYNC_*' flags
800 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
801 """Given a string with source, return the lib2to3 Node."""
802 if src_txt[-1:] != "\n":
805 for grammar in get_grammars(set(target_versions)):
806 drv = driver.Driver(grammar, pytree.convert)
808 result = drv.parse_string(src_txt, True)
811 except ParseError as pe:
812 lineno, column = pe.context[1]
813 lines = src_txt.splitlines()
815 faulty_line = lines[lineno - 1]
817 faulty_line = "<line number missing in source>"
818 exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
822 if isinstance(result, Leaf):
823 result = Node(syms.file_input, [result])
827 def lib2to3_unparse(node: Node) -> str:
828 """Given a lib2to3 node, return its string representation."""
836 class Visitor(Generic[T]):
837 """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
839 def visit(self, node: LN) -> Iterator[T]:
840 """Main method to visit `node` and its children.
842 It tries to find a `visit_*()` method for the given `node.type`, like
843 `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
844 If no dedicated `visit_*()` method is found, chooses `visit_default()`
847 Then yields objects of type `T` from the selected visitor.
850 name = token.tok_name[node.type]
852 name = type_repr(node.type)
853 yield from getattr(self, f"visit_{name}", self.visit_default)(node)
855 def visit_default(self, node: LN) -> Iterator[T]:
856 """Default `visit_*()` implementation. Recurses to children of `node`."""
857 if isinstance(node, Node):
858 for child in node.children:
859 yield from self.visit(child)
863 class DebugVisitor(Visitor[T]):
866 def visit_default(self, node: LN) -> Iterator[T]:
867 indent = " " * (2 * self.tree_depth)
868 if isinstance(node, Node):
869 _type = type_repr(node.type)
870 out(f"{indent}{_type}", fg="yellow")
872 for child in node.children:
873 yield from self.visit(child)
876 out(f"{indent}/{_type}", fg="yellow", bold=False)
878 _type = token.tok_name.get(node.type, str(node.type))
879 out(f"{indent}{_type}", fg="blue", nl=False)
881 # We don't have to handle prefixes for `Node` objects since
882 # that delegates to the first child anyway.
883 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
884 out(f" {node.value!r}", fg="blue", bold=False)
887 def show(cls, code: Union[str, Leaf, Node]) -> None:
888 """Pretty-print the lib2to3 AST of a given string of `code`.
890 Convenience method for debugging.
892 v: DebugVisitor[None] = DebugVisitor()
893 if isinstance(code, str):
894 code = lib2to3_parse(code)
898 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
909 STANDALONE_COMMENT = 153
910 token.tok_name[STANDALONE_COMMENT] = "STANDALONE_COMMENT"
911 LOGIC_OPERATORS = {"and", "or"}
936 STARS = {token.STAR, token.DOUBLESTAR}
939 syms.argument, # double star in arglist
940 syms.trailer, # single argument to call
942 syms.varargslist, # lambdas
944 UNPACKING_PARENTS = {
945 syms.atom, # single element of a list or set literal
949 syms.testlist_star_expr,
984 COMPREHENSION_PRIORITY = 20
986 TERNARY_PRIORITY = 16
989 COMPARATOR_PRIORITY = 10
1000 token.DOUBLESLASH: 4,
1004 token.DOUBLESTAR: 2,
1010 class BracketTracker:
1011 """Keeps track of brackets on a line."""
1014 bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
1015 delimiters: Dict[LeafID, Priority] = Factory(dict)
1016 previous: Optional[Leaf] = None
1017 _for_loop_depths: List[int] = Factory(list)
1018 _lambda_argument_depths: List[int] = Factory(list)
1020 def mark(self, leaf: Leaf) -> None:
1021 """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
1023 All leaves receive an int `bracket_depth` field that stores how deep
1024 within brackets a given leaf is. 0 means there are no enclosing brackets
1025 that started on this line.
1027 If a leaf is itself a closing bracket, it receives an `opening_bracket`
1028 field that it forms a pair with. This is a one-directional link to
1029 avoid reference cycles.
1031 If a leaf is a delimiter (a token on which Black can split the line if
1032 needed) and it's on depth 0, its `id()` is stored in the tracker's
1035 if leaf.type == token.COMMENT:
1038 self.maybe_decrement_after_for_loop_variable(leaf)
1039 self.maybe_decrement_after_lambda_arguments(leaf)
1040 if leaf.type in CLOSING_BRACKETS:
1042 opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
1043 leaf.opening_bracket = opening_bracket
1044 leaf.bracket_depth = self.depth
1046 delim = is_split_before_delimiter(leaf, self.previous)
1047 if delim and self.previous is not None:
1048 self.delimiters[id(self.previous)] = delim
1050 delim = is_split_after_delimiter(leaf, self.previous)
1052 self.delimiters[id(leaf)] = delim
1053 if leaf.type in OPENING_BRACKETS:
1054 self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
1056 self.previous = leaf
1057 self.maybe_increment_lambda_arguments(leaf)
1058 self.maybe_increment_for_loop_variable(leaf)
1060 def any_open_brackets(self) -> bool:
1061 """Return True if there is an yet unmatched open bracket on the line."""
1062 return bool(self.bracket_match)
1064 def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> Priority:
1065 """Return the highest priority of a delimiter found on the line.
1067 Values are consistent with what `is_split_*_delimiter()` return.
1068 Raises ValueError on no delimiters.
1070 return max(v for k, v in self.delimiters.items() if k not in exclude)
1072 def delimiter_count_with_priority(self, priority: Priority = 0) -> int:
1073 """Return the number of delimiters with the given `priority`.
1075 If no `priority` is passed, defaults to max priority on the line.
1077 if not self.delimiters:
1080 priority = priority or self.max_delimiter_priority()
1081 return sum(1 for p in self.delimiters.values() if p == priority)
1083 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
1084 """In a for loop, or comprehension, the variables are often unpacks.
1086 To avoid splitting on the comma in this situation, increase the depth of
1087 tokens between `for` and `in`.
1089 if leaf.type == token.NAME and leaf.value == "for":
1091 self._for_loop_depths.append(self.depth)
1096 def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
1097 """See `maybe_increment_for_loop_variable` above for explanation."""
1099 self._for_loop_depths
1100 and self._for_loop_depths[-1] == self.depth
1101 and leaf.type == token.NAME
1102 and leaf.value == "in"
1105 self._for_loop_depths.pop()
1110 def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
1111 """In a lambda expression, there might be more than one argument.
1113 To avoid splitting on the comma in this situation, increase the depth of
1114 tokens between `lambda` and `:`.
1116 if leaf.type == token.NAME and leaf.value == "lambda":
1118 self._lambda_argument_depths.append(self.depth)
1123 def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
1124 """See `maybe_increment_lambda_arguments` above for explanation."""
1126 self._lambda_argument_depths
1127 and self._lambda_argument_depths[-1] == self.depth
1128 and leaf.type == token.COLON
1131 self._lambda_argument_depths.pop()
1136 def get_open_lsqb(self) -> Optional[Leaf]:
1137 """Return the most recent opening square bracket (if any)."""
1138 return self.bracket_match.get((self.depth - 1, token.RSQB))
1143 """Holds leaves and comments. Can be printed with `str(line)`."""
1146 leaves: List[Leaf] = Factory(list)
1147 comments: Dict[LeafID, List[Leaf]] = Factory(dict) # keys ordered like `leaves`
1148 bracket_tracker: BracketTracker = Factory(BracketTracker)
1149 inside_brackets: bool = False
1150 should_explode: bool = False
1152 def append(self, leaf: Leaf, preformatted: bool = False) -> None:
1153 """Add a new `leaf` to the end of the line.
1155 Unless `preformatted` is True, the `leaf` will receive a new consistent
1156 whitespace prefix and metadata applied by :class:`BracketTracker`.
1157 Trailing commas are maybe removed, unpacked for loop variables are
1158 demoted from being delimiters.
1160 Inline comments are put aside.
1162 has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
1166 if token.COLON == leaf.type and self.is_class_paren_empty:
1167 del self.leaves[-2:]
1168 if self.leaves and not preformatted:
1169 # Note: at this point leaf.prefix should be empty except for
1170 # imports, for which we only preserve newlines.
1171 leaf.prefix += whitespace(
1172 leaf, complex_subscript=self.is_complex_subscript(leaf)
1174 if self.inside_brackets or not preformatted:
1175 self.bracket_tracker.mark(leaf)
1176 self.maybe_remove_trailing_comma(leaf)
1177 if not self.append_comment(leaf):
1178 self.leaves.append(leaf)
1180 def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
1181 """Like :func:`append()` but disallow invalid standalone comment structure.
1183 Raises ValueError when any `leaf` is appended after a standalone comment
1184 or when a standalone comment is not the first leaf on the line.
1186 if self.bracket_tracker.depth == 0:
1188 raise ValueError("cannot append to standalone comments")
1190 if self.leaves and leaf.type == STANDALONE_COMMENT:
1192 "cannot append standalone comments to a populated line"
1195 self.append(leaf, preformatted=preformatted)
1198 def is_comment(self) -> bool:
1199 """Is this line a standalone comment?"""
1200 return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
1203 def is_decorator(self) -> bool:
1204 """Is this line a decorator?"""
1205 return bool(self) and self.leaves[0].type == token.AT
1208 def is_import(self) -> bool:
1209 """Is this an import line?"""
1210 return bool(self) and is_import(self.leaves[0])
1213 def is_class(self) -> bool:
1214 """Is this line a class definition?"""
1217 and self.leaves[0].type == token.NAME
1218 and self.leaves[0].value == "class"
1222 def is_stub_class(self) -> bool:
1223 """Is this line a class definition with a body consisting only of "..."?"""
1224 return self.is_class and self.leaves[-3:] == [
1225 Leaf(token.DOT, ".") for _ in range(3)
1229 def is_def(self) -> bool:
1230 """Is this a function definition? (Also returns True for async defs.)"""
1232 first_leaf = self.leaves[0]
1237 second_leaf: Optional[Leaf] = self.leaves[1]
1240 return (first_leaf.type == token.NAME and first_leaf.value == "def") or (
1241 first_leaf.type == token.ASYNC
1242 and second_leaf is not None
1243 and second_leaf.type == token.NAME
1244 and second_leaf.value == "def"
1248 def is_class_paren_empty(self) -> bool:
1249 """Is this a class with no base classes but using parentheses?
1251 Those are unnecessary and should be removed.
1255 and len(self.leaves) == 4
1257 and self.leaves[2].type == token.LPAR
1258 and self.leaves[2].value == "("
1259 and self.leaves[3].type == token.RPAR
1260 and self.leaves[3].value == ")"
1264 def is_triple_quoted_string(self) -> bool:
1265 """Is the line a triple quoted string?"""
1268 and self.leaves[0].type == token.STRING
1269 and self.leaves[0].value.startswith(('"""', "'''"))
1272 def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
1273 """If so, needs to be split before emitting."""
1274 for leaf in self.leaves:
1275 if leaf.type == STANDALONE_COMMENT:
1276 if leaf.bracket_depth <= depth_limit:
1280 def contains_inner_type_comments(self) -> bool:
1283 last_leaf = self.leaves[-1]
1284 ignored_ids.add(id(last_leaf))
1285 if last_leaf.type == token.COMMA:
1286 # When trailing commas are inserted by Black for consistency, comments
1287 # after the previous last element are not moved (they don't have to,
1288 # rendering will still be correct). So we ignore trailing commas.
1289 last_leaf = self.leaves[-2]
1290 ignored_ids.add(id(last_leaf))
1294 for leaf_id, comments in self.comments.items():
1295 if leaf_id in ignored_ids:
1298 for comment in comments:
1299 if is_type_comment(comment):
1304 def contains_multiline_strings(self) -> bool:
1305 for leaf in self.leaves:
1306 if is_multiline_string(leaf):
1311 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1312 """Remove trailing comma if there is one and it's safe."""
1315 and self.leaves[-1].type == token.COMMA
1316 and closing.type in CLOSING_BRACKETS
1320 if closing.type == token.RBRACE:
1321 self.remove_trailing_comma()
1324 if closing.type == token.RSQB:
1325 comma = self.leaves[-1]
1326 if comma.parent and comma.parent.type == syms.listmaker:
1327 self.remove_trailing_comma()
1330 # For parens let's check if it's safe to remove the comma.
1331 # Imports are always safe.
1333 self.remove_trailing_comma()
1336 # Otherwise, if the trailing one is the only one, we might mistakenly
1337 # change a tuple into a different type by removing the comma.
1338 depth = closing.bracket_depth + 1
1340 opening = closing.opening_bracket
1341 for _opening_index, leaf in enumerate(self.leaves):
1348 for leaf in self.leaves[_opening_index + 1 :]:
1352 bracket_depth = leaf.bracket_depth
1353 if bracket_depth == depth and leaf.type == token.COMMA:
1355 if leaf.parent and leaf.parent.type == syms.arglist:
1360 self.remove_trailing_comma()
1365 def append_comment(self, comment: Leaf) -> bool:
1366 """Add an inline or standalone comment to the line."""
1368 comment.type == STANDALONE_COMMENT
1369 and self.bracket_tracker.any_open_brackets()
1374 if comment.type != token.COMMENT:
1378 comment.type = STANDALONE_COMMENT
1382 self.comments.setdefault(id(self.leaves[-1]), []).append(comment)
1385 def comments_after(self, leaf: Leaf) -> List[Leaf]:
1386 """Generate comments that should appear directly after `leaf`."""
1387 return self.comments.get(id(leaf), [])
1389 def remove_trailing_comma(self) -> None:
1390 """Remove the trailing comma and moves the comments attached to it."""
1391 trailing_comma = self.leaves.pop()
1392 trailing_comma_comments = self.comments.pop(id(trailing_comma), [])
1393 self.comments.setdefault(id(self.leaves[-1]), []).extend(
1394 trailing_comma_comments
1397 def is_complex_subscript(self, leaf: Leaf) -> bool:
1398 """Return True iff `leaf` is part of a slice with non-trivial exprs."""
1399 open_lsqb = self.bracket_tracker.get_open_lsqb()
1400 if open_lsqb is None:
1403 subscript_start = open_lsqb.next_sibling
1405 if isinstance(subscript_start, Node):
1406 if subscript_start.type == syms.listmaker:
1409 if subscript_start.type == syms.subscriptlist:
1410 subscript_start = child_towards(subscript_start, leaf)
1411 return subscript_start is not None and any(
1412 n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
1415 def __str__(self) -> str:
1416 """Render the line."""
1420 indent = " " * self.depth
1421 leaves = iter(self.leaves)
1422 first = next(leaves)
1423 res = f"{first.prefix}{indent}{first.value}"
1426 for comment in itertools.chain.from_iterable(self.comments.values()):
1430 def __bool__(self) -> bool:
1431 """Return True if the line has leaves or comments."""
1432 return bool(self.leaves or self.comments)
1436 class EmptyLineTracker:
1437 """Provides a stateful method that returns the number of potential extra
1438 empty lines needed before and after the currently processed line.
1440 Note: this tracker works on lines that haven't been split yet. It assumes
1441 the prefix of the first leaf consists of optional newlines. Those newlines
1442 are consumed by `maybe_empty_lines()` and included in the computation.
1445 is_pyi: bool = False
1446 previous_line: Optional[Line] = None
1447 previous_after: int = 0
1448 previous_defs: List[int] = Factory(list)
1450 def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1451 """Return the number of extra empty lines before and after the `current_line`.
1453 This is for separating `def`, `async def` and `class` with extra empty
1454 lines (two on module-level).
1456 before, after = self._maybe_empty_lines(current_line)
1457 before -= self.previous_after
1458 self.previous_after = after
1459 self.previous_line = current_line
1460 return before, after
1462 def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1464 if current_line.depth == 0:
1465 max_allowed = 1 if self.is_pyi else 2
1466 if current_line.leaves:
1467 # Consume the first leaf's extra newlines.
1468 first_leaf = current_line.leaves[0]
1469 before = first_leaf.prefix.count("\n")
1470 before = min(before, max_allowed)
1471 first_leaf.prefix = ""
1474 depth = current_line.depth
1475 while self.previous_defs and self.previous_defs[-1] >= depth:
1476 self.previous_defs.pop()
1478 before = 0 if depth else 1
1480 before = 1 if depth else 2
1481 if current_line.is_decorator or current_line.is_def or current_line.is_class:
1482 return self._maybe_empty_lines_for_class_or_def(current_line, before)
1486 and self.previous_line.is_import
1487 and not current_line.is_import
1488 and depth == self.previous_line.depth
1490 return (before or 1), 0
1494 and self.previous_line.is_class
1495 and current_line.is_triple_quoted_string
1501 def _maybe_empty_lines_for_class_or_def(
1502 self, current_line: Line, before: int
1503 ) -> Tuple[int, int]:
1504 if not current_line.is_decorator:
1505 self.previous_defs.append(current_line.depth)
1506 if self.previous_line is None:
1507 # Don't insert empty lines before the first line in the file.
1510 if self.previous_line.is_decorator:
1513 if self.previous_line.depth < current_line.depth and (
1514 self.previous_line.is_class or self.previous_line.is_def
1519 self.previous_line.is_comment
1520 and self.previous_line.depth == current_line.depth
1526 if self.previous_line.depth > current_line.depth:
1528 elif current_line.is_class or self.previous_line.is_class:
1529 if current_line.is_stub_class and self.previous_line.is_stub_class:
1530 # No blank line between classes with an empty body
1534 elif current_line.is_def and not self.previous_line.is_def:
1535 # Blank line between a block of functions and a block of non-functions
1541 if current_line.depth and newlines:
1547 class LineGenerator(Visitor[Line]):
1548 """Generates reformatted Line objects. Empty lines are not emitted.
1550 Note: destroys the tree it's visiting by mutating prefixes of its leaves
1551 in ways that will no longer stringify to valid Python code on the tree.
1554 is_pyi: bool = False
1555 normalize_strings: bool = True
1556 current_line: Line = Factory(Line)
1557 remove_u_prefix: bool = False
1559 def line(self, indent: int = 0) -> Iterator[Line]:
1562 If the line is empty, only emit if it makes sense.
1563 If the line is too long, split it first and then generate.
1565 If any lines were generated, set up a new current_line.
1567 if not self.current_line:
1568 self.current_line.depth += indent
1569 return # Line is empty, don't emit. Creating a new one unnecessary.
1571 complete_line = self.current_line
1572 self.current_line = Line(depth=complete_line.depth + indent)
1575 def visit_default(self, node: LN) -> Iterator[Line]:
1576 """Default `visit_*()` implementation. Recurses to children of `node`."""
1577 if isinstance(node, Leaf):
1578 any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1579 for comment in generate_comments(node):
1580 if any_open_brackets:
1581 # any comment within brackets is subject to splitting
1582 self.current_line.append(comment)
1583 elif comment.type == token.COMMENT:
1584 # regular trailing comment
1585 self.current_line.append(comment)
1586 yield from self.line()
1589 # regular standalone comment
1590 yield from self.line()
1592 self.current_line.append(comment)
1593 yield from self.line()
1595 normalize_prefix(node, inside_brackets=any_open_brackets)
1596 if self.normalize_strings and node.type == token.STRING:
1597 normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix)
1598 normalize_string_quotes(node)
1599 if node.type == token.NUMBER:
1600 normalize_numeric_literal(node)
1601 if node.type not in WHITESPACE:
1602 self.current_line.append(node)
1603 yield from super().visit_default(node)
1605 def visit_atom(self, node: Node) -> Iterator[Line]:
1606 # Always make parentheses invisible around a single node, because it should
1607 # not be needed (except in the case of yield, where removing the parentheses
1608 # produces a SyntaxError).
1610 len(node.children) == 3
1611 and isinstance(node.children[0], Leaf)
1612 and node.children[0].type == token.LPAR
1613 and isinstance(node.children[2], Leaf)
1614 and node.children[2].type == token.RPAR
1615 and isinstance(node.children[1], Leaf)
1617 node.children[1].type == token.NAME
1618 and node.children[1].value == "yield"
1621 node.children[0].value = ""
1622 node.children[2].value = ""
1623 yield from super().visit_default(node)
1625 def visit_INDENT(self, node: Node) -> Iterator[Line]:
1626 """Increase indentation level, maybe yield a line."""
1627 # In blib2to3 INDENT never holds comments.
1628 yield from self.line(+1)
1629 yield from self.visit_default(node)
1631 def visit_DEDENT(self, node: Node) -> Iterator[Line]:
1632 """Decrease indentation level, maybe yield a line."""
1633 # The current line might still wait for trailing comments. At DEDENT time
1634 # there won't be any (they would be prefixes on the preceding NEWLINE).
1635 # Emit the line then.
1636 yield from self.line()
1638 # While DEDENT has no value, its prefix may contain standalone comments
1639 # that belong to the current indentation level. Get 'em.
1640 yield from self.visit_default(node)
1642 # Finally, emit the dedent.
1643 yield from self.line(-1)
1646 self, node: Node, keywords: Set[str], parens: Set[str]
1647 ) -> Iterator[Line]:
1648 """Visit a statement.
1650 This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1651 `def`, `with`, `class`, `assert` and assignments.
1653 The relevant Python language `keywords` for a given statement will be
1654 NAME leaves within it. This methods puts those on a separate line.
1656 `parens` holds a set of string leaf values immediately after which
1657 invisible parens should be put.
1659 normalize_invisible_parens(node, parens_after=parens)
1660 for child in node.children:
1661 if child.type == token.NAME and child.value in keywords: # type: ignore
1662 yield from self.line()
1664 yield from self.visit(child)
1666 def visit_suite(self, node: Node) -> Iterator[Line]:
1667 """Visit a suite."""
1668 if self.is_pyi and is_stub_suite(node):
1669 yield from self.visit(node.children[2])
1671 yield from self.visit_default(node)
1673 def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1674 """Visit a statement without nested statements."""
1675 is_suite_like = node.parent and node.parent.type in STATEMENT
1677 if self.is_pyi and is_stub_body(node):
1678 yield from self.visit_default(node)
1680 yield from self.line(+1)
1681 yield from self.visit_default(node)
1682 yield from self.line(-1)
1685 if not self.is_pyi or not node.parent or not is_stub_suite(node.parent):
1686 yield from self.line()
1687 yield from self.visit_default(node)
1689 def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1690 """Visit `async def`, `async for`, `async with`."""
1691 yield from self.line()
1693 children = iter(node.children)
1694 for child in children:
1695 yield from self.visit(child)
1697 if child.type == token.ASYNC:
1700 internal_stmt = next(children)
1701 for child in internal_stmt.children:
1702 yield from self.visit(child)
1704 def visit_decorators(self, node: Node) -> Iterator[Line]:
1705 """Visit decorators."""
1706 for child in node.children:
1707 yield from self.line()
1708 yield from self.visit(child)
1710 def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
1711 """Remove a semicolon and put the other statement on a separate line."""
1712 yield from self.line()
1714 def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
1715 """End of file. Process outstanding comments and end with a newline."""
1716 yield from self.visit_default(leaf)
1717 yield from self.line()
1719 def visit_STANDALONE_COMMENT(self, leaf: Leaf) -> Iterator[Line]:
1720 if not self.current_line.bracket_tracker.any_open_brackets():
1721 yield from self.line()
1722 yield from self.visit_default(leaf)
1724 def __attrs_post_init__(self) -> None:
1725 """You are in a twisty little maze of passages."""
1728 self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
1729 self.visit_if_stmt = partial(
1730 v, keywords={"if", "else", "elif"}, parens={"if", "elif"}
1732 self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
1733 self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
1734 self.visit_try_stmt = partial(
1735 v, keywords={"try", "except", "else", "finally"}, parens=Ø
1737 self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
1738 self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
1739 self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
1740 self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
1741 self.visit_expr_stmt = partial(v, keywords=Ø, parens=ASSIGNMENTS)
1742 self.visit_return_stmt = partial(v, keywords={"return"}, parens={"return"})
1743 self.visit_import_from = partial(v, keywords=Ø, parens={"import"})
1744 self.visit_del_stmt = partial(v, keywords=Ø, parens={"del"})
1745 self.visit_async_funcdef = self.visit_async_stmt
1746 self.visit_decorated = self.visit_decorators
1749 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
1750 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
1751 OPENING_BRACKETS = set(BRACKET.keys())
1752 CLOSING_BRACKETS = set(BRACKET.values())
1753 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
1754 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
1757 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str: # noqa: C901
1758 """Return whitespace prefix if needed for the given `leaf`.
1760 `complex_subscript` signals whether the given leaf is part of a subscription
1761 which has non-trivial arguments, like arithmetic expressions or function calls.
1769 if t in ALWAYS_NO_SPACE:
1772 if t == token.COMMENT:
1775 assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
1776 if t == token.COLON and p.type not in {
1783 prev = leaf.prev_sibling
1785 prevp = preceding_leaf(p)
1786 if not prevp or prevp.type in OPENING_BRACKETS:
1789 if t == token.COLON:
1790 if prevp.type == token.COLON:
1793 elif prevp.type != token.COMMA and not complex_subscript:
1798 if prevp.type == token.EQUAL:
1800 if prevp.parent.type in {
1808 elif prevp.parent.type == syms.typedargslist:
1809 # A bit hacky: if the equal sign has whitespace, it means we
1810 # previously found it's a typed argument. So, we're using
1814 elif prevp.type in STARS:
1815 if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1818 elif prevp.type == token.COLON:
1819 if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
1820 return SPACE if complex_subscript else NO
1824 and prevp.parent.type == syms.factor
1825 and prevp.type in MATH_OPERATORS
1830 prevp.type == token.RIGHTSHIFT
1832 and prevp.parent.type == syms.shift_expr
1833 and prevp.prev_sibling
1834 and prevp.prev_sibling.type == token.NAME
1835 and prevp.prev_sibling.value == "print" # type: ignore
1837 # Python 2 print chevron
1840 elif prev.type in OPENING_BRACKETS:
1843 if p.type in {syms.parameters, syms.arglist}:
1844 # untyped function signatures or calls
1845 if not prev or prev.type != token.COMMA:
1848 elif p.type == syms.varargslist:
1850 if prev and prev.type != token.COMMA:
1853 elif p.type == syms.typedargslist:
1854 # typed function signatures
1858 if t == token.EQUAL:
1859 if prev.type != syms.tname:
1862 elif prev.type == token.EQUAL:
1863 # A bit hacky: if the equal sign has whitespace, it means we
1864 # previously found it's a typed argument. So, we're using that, too.
1867 elif prev.type != token.COMMA:
1870 elif p.type == syms.tname:
1873 prevp = preceding_leaf(p)
1874 if not prevp or prevp.type != token.COMMA:
1877 elif p.type == syms.trailer:
1878 # attributes and calls
1879 if t == token.LPAR or t == token.RPAR:
1884 prevp = preceding_leaf(p)
1885 if not prevp or prevp.type != token.NUMBER:
1888 elif t == token.LSQB:
1891 elif prev.type != token.COMMA:
1894 elif p.type == syms.argument:
1896 if t == token.EQUAL:
1900 prevp = preceding_leaf(p)
1901 if not prevp or prevp.type == token.LPAR:
1904 elif prev.type in {token.EQUAL} | STARS:
1907 elif p.type == syms.decorator:
1911 elif p.type == syms.dotted_name:
1915 prevp = preceding_leaf(p)
1916 if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
1919 elif p.type == syms.classdef:
1923 if prev and prev.type == token.LPAR:
1926 elif p.type in {syms.subscript, syms.sliceop}:
1929 assert p.parent is not None, "subscripts are always parented"
1930 if p.parent.type == syms.subscriptlist:
1935 elif not complex_subscript:
1938 elif p.type == syms.atom:
1939 if prev and t == token.DOT:
1940 # dots, but not the first one.
1943 elif p.type == syms.dictsetmaker:
1945 if prev and prev.type == token.DOUBLESTAR:
1948 elif p.type in {syms.factor, syms.star_expr}:
1951 prevp = preceding_leaf(p)
1952 if not prevp or prevp.type in OPENING_BRACKETS:
1955 prevp_parent = prevp.parent
1956 assert prevp_parent is not None
1957 if prevp.type == token.COLON and prevp_parent.type in {
1963 elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1966 elif t in {token.NAME, token.NUMBER, token.STRING}:
1969 elif p.type == syms.import_from:
1971 if prev and prev.type == token.DOT:
1974 elif t == token.NAME:
1978 if prev and prev.type == token.DOT:
1981 elif p.type == syms.sliceop:
1987 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1988 """Return the first leaf that precedes `node`, if any."""
1990 res = node.prev_sibling
1992 if isinstance(res, Leaf):
1996 return list(res.leaves())[-1]
2005 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
2006 """Return the child of `ancestor` that contains `descendant`."""
2007 node: Optional[LN] = descendant
2008 while node and node.parent != ancestor:
2013 def container_of(leaf: Leaf) -> LN:
2014 """Return `leaf` or one of its ancestors that is the topmost container of it.
2016 By "container" we mean a node where `leaf` is the very first child.
2018 same_prefix = leaf.prefix
2019 container: LN = leaf
2021 parent = container.parent
2025 if parent.children[0].prefix != same_prefix:
2028 if parent.type == syms.file_input:
2031 if parent.prev_sibling is not None and parent.prev_sibling.type in BRACKETS:
2038 def is_split_after_delimiter(leaf: Leaf, previous: Optional[Leaf] = None) -> Priority:
2039 """Return the priority of the `leaf` delimiter, given a line break after it.
2041 The delimiter priorities returned here are from those delimiters that would
2042 cause a line break after themselves.
2044 Higher numbers are higher priority.
2046 if leaf.type == token.COMMA:
2047 return COMMA_PRIORITY
2052 def is_split_before_delimiter(leaf: Leaf, previous: Optional[Leaf] = None) -> Priority:
2053 """Return the priority of the `leaf` delimiter, given a line break before it.
2055 The delimiter priorities returned here are from those delimiters that would
2056 cause a line break before themselves.
2058 Higher numbers are higher priority.
2060 if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
2061 # * and ** might also be MATH_OPERATORS but in this case they are not.
2062 # Don't treat them as a delimiter.
2066 leaf.type == token.DOT
2068 and leaf.parent.type not in {syms.import_from, syms.dotted_name}
2069 and (previous is None or previous.type in CLOSING_BRACKETS)
2074 leaf.type in MATH_OPERATORS
2076 and leaf.parent.type not in {syms.factor, syms.star_expr}
2078 return MATH_PRIORITIES[leaf.type]
2080 if leaf.type in COMPARATORS:
2081 return COMPARATOR_PRIORITY
2084 leaf.type == token.STRING
2085 and previous is not None
2086 and previous.type == token.STRING
2088 return STRING_PRIORITY
2090 if leaf.type not in {token.NAME, token.ASYNC}:
2096 and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
2097 or leaf.type == token.ASYNC
2100 not isinstance(leaf.prev_sibling, Leaf)
2101 or leaf.prev_sibling.value != "async"
2103 return COMPREHENSION_PRIORITY
2108 and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
2110 return COMPREHENSION_PRIORITY
2112 if leaf.value in {"if", "else"} and leaf.parent and leaf.parent.type == syms.test:
2113 return TERNARY_PRIORITY
2115 if leaf.value == "is":
2116 return COMPARATOR_PRIORITY
2121 and leaf.parent.type in {syms.comp_op, syms.comparison}
2123 previous is not None
2124 and previous.type == token.NAME
2125 and previous.value == "not"
2128 return COMPARATOR_PRIORITY
2133 and leaf.parent.type == syms.comp_op
2135 previous is not None
2136 and previous.type == token.NAME
2137 and previous.value == "is"
2140 return COMPARATOR_PRIORITY
2142 if leaf.value in LOGIC_OPERATORS and leaf.parent:
2143 return LOGIC_PRIORITY
2148 FMT_OFF = {"# fmt: off", "# fmt:off", "# yapf: disable"}
2149 FMT_ON = {"# fmt: on", "# fmt:on", "# yapf: enable"}
2152 def generate_comments(leaf: LN) -> Iterator[Leaf]:
2153 """Clean the prefix of the `leaf` and generate comments from it, if any.
2155 Comments in lib2to3 are shoved into the whitespace prefix. This happens
2156 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
2157 move because it does away with modifying the grammar to include all the
2158 possible places in which comments can be placed.
2160 The sad consequence for us though is that comments don't "belong" anywhere.
2161 This is why this function generates simple parentless Leaf objects for
2162 comments. We simply don't know what the correct parent should be.
2164 No matter though, we can live without this. We really only need to
2165 differentiate between inline and standalone comments. The latter don't
2166 share the line with any code.
2168 Inline comments are emitted as regular token.COMMENT leaves. Standalone
2169 are emitted with a fake STANDALONE_COMMENT token identifier.
2171 for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER):
2172 yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
2177 """Describes a piece of syntax that is a comment.
2179 It's not a :class:`blib2to3.pytree.Leaf` so that:
2181 * it can be cached (`Leaf` objects should not be reused more than once as
2182 they store their lineno, column, prefix, and parent information);
2183 * `newlines` and `consumed` fields are kept separate from the `value`. This
2184 simplifies handling of special marker comments like ``# fmt: off/on``.
2187 type: int # token.COMMENT or STANDALONE_COMMENT
2188 value: str # content of the comment
2189 newlines: int # how many newlines before the comment
2190 consumed: int # how many characters of the original leaf's prefix did we consume
2193 @lru_cache(maxsize=4096)
2194 def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]:
2195 """Return a list of :class:`ProtoComment` objects parsed from the given `prefix`."""
2196 result: List[ProtoComment] = []
2197 if not prefix or "#" not in prefix:
2203 for index, line in enumerate(prefix.split("\n")):
2204 consumed += len(line) + 1 # adding the length of the split '\n'
2205 line = line.lstrip()
2208 if not line.startswith("#"):
2209 # Escaped newlines outside of a comment are not really newlines at
2210 # all. We treat a single-line comment following an escaped newline
2211 # as a simple trailing comment.
2212 if line.endswith("\\"):
2216 if index == ignored_lines and not is_endmarker:
2217 comment_type = token.COMMENT # simple trailing comment
2219 comment_type = STANDALONE_COMMENT
2220 comment = make_comment(line)
2223 type=comment_type, value=comment, newlines=nlines, consumed=consumed
2230 def make_comment(content: str) -> str:
2231 """Return a consistently formatted comment from the given `content` string.
2233 All comments (except for "##", "#!", "#:", '#'", "#%%") should have a single
2234 space between the hash sign and the content.
2236 If `content` didn't start with a hash sign, one is provided.
2238 content = content.rstrip()
2242 if content[0] == "#":
2243 content = content[1:]
2244 if content and content[0] not in " !:#'%":
2245 content = " " + content
2246 return "#" + content
2252 inner: bool = False,
2253 features: Collection[Feature] = (),
2254 ) -> Iterator[Line]:
2255 """Split a `line` into potentially many lines.
2257 They should fit in the allotted `line_length` but might not be able to.
2258 `inner` signifies that there were a pair of brackets somewhere around the
2259 current `line`, possibly transitively. This means we can fallback to splitting
2260 by delimiters if the LHS/RHS don't yield any results.
2262 `features` are syntactical features that may be used in the output.
2268 line_str = str(line).strip("\n")
2271 not line.contains_inner_type_comments()
2272 and not line.should_explode
2273 and is_line_short_enough(line, line_length=line_length, line_str=line_str)
2278 split_funcs: List[SplitFunc]
2280 split_funcs = [left_hand_split]
2283 def rhs(line: Line, features: Collection[Feature]) -> Iterator[Line]:
2284 for omit in generate_trailers_to_omit(line, line_length):
2285 lines = list(right_hand_split(line, line_length, features, omit=omit))
2286 if is_line_short_enough(lines[0], line_length=line_length):
2290 # All splits failed, best effort split with no omits.
2291 # This mostly happens to multiline strings that are by definition
2292 # reported as not fitting a single line.
2293 yield from right_hand_split(line, line_length, features=features)
2295 if line.inside_brackets:
2296 split_funcs = [delimiter_split, standalone_comment_split, rhs]
2299 for split_func in split_funcs:
2300 # We are accumulating lines in `result` because we might want to abort
2301 # mission and return the original line in the end, or attempt a different
2303 result: List[Line] = []
2305 for l in split_func(line, features):
2306 if str(l).strip("\n") == line_str:
2307 raise CannotSplit("Split function returned an unchanged result")
2311 l, line_length=line_length, inner=True, features=features
2325 def left_hand_split(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]:
2326 """Split line into many lines, starting with the first matching bracket pair.
2328 Note: this usually looks weird, only use this for function definitions.
2329 Prefer RHS otherwise. This is why this function is not symmetrical with
2330 :func:`right_hand_split` which also handles optional parentheses.
2332 tail_leaves: List[Leaf] = []
2333 body_leaves: List[Leaf] = []
2334 head_leaves: List[Leaf] = []
2335 current_leaves = head_leaves
2336 matching_bracket = None
2337 for leaf in line.leaves:
2339 current_leaves is body_leaves
2340 and leaf.type in CLOSING_BRACKETS
2341 and leaf.opening_bracket is matching_bracket
2343 current_leaves = tail_leaves if body_leaves else head_leaves
2344 current_leaves.append(leaf)
2345 if current_leaves is head_leaves:
2346 if leaf.type in OPENING_BRACKETS:
2347 matching_bracket = leaf
2348 current_leaves = body_leaves
2349 if not matching_bracket:
2350 raise CannotSplit("No brackets found")
2352 head = bracket_split_build_line(head_leaves, line, matching_bracket)
2353 body = bracket_split_build_line(body_leaves, line, matching_bracket, is_body=True)
2354 tail = bracket_split_build_line(tail_leaves, line, matching_bracket)
2355 bracket_split_succeeded_or_raise(head, body, tail)
2356 for result in (head, body, tail):
2361 def right_hand_split(
2364 features: Collection[Feature] = (),
2365 omit: Collection[LeafID] = (),
2366 ) -> Iterator[Line]:
2367 """Split line into many lines, starting with the last matching bracket pair.
2369 If the split was by optional parentheses, attempt splitting without them, too.
2370 `omit` is a collection of closing bracket IDs that shouldn't be considered for
2373 Note: running this function modifies `bracket_depth` on the leaves of `line`.
2375 tail_leaves: List[Leaf] = []
2376 body_leaves: List[Leaf] = []
2377 head_leaves: List[Leaf] = []
2378 current_leaves = tail_leaves
2379 opening_bracket = None
2380 closing_bracket = None
2381 for leaf in reversed(line.leaves):
2382 if current_leaves is body_leaves:
2383 if leaf is opening_bracket:
2384 current_leaves = head_leaves if body_leaves else tail_leaves
2385 current_leaves.append(leaf)
2386 if current_leaves is tail_leaves:
2387 if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
2388 opening_bracket = leaf.opening_bracket
2389 closing_bracket = leaf
2390 current_leaves = body_leaves
2391 if not (opening_bracket and closing_bracket and head_leaves):
2392 # If there is no opening or closing_bracket that means the split failed and
2393 # all content is in the tail. Otherwise, if `head_leaves` are empty, it means
2394 # the matching `opening_bracket` wasn't available on `line` anymore.
2395 raise CannotSplit("No brackets found")
2397 tail_leaves.reverse()
2398 body_leaves.reverse()
2399 head_leaves.reverse()
2400 head = bracket_split_build_line(head_leaves, line, opening_bracket)
2401 body = bracket_split_build_line(body_leaves, line, opening_bracket, is_body=True)
2402 tail = bracket_split_build_line(tail_leaves, line, opening_bracket)
2403 bracket_split_succeeded_or_raise(head, body, tail)
2405 # the body shouldn't be exploded
2406 not body.should_explode
2407 # the opening bracket is an optional paren
2408 and opening_bracket.type == token.LPAR
2409 and not opening_bracket.value
2410 # the closing bracket is an optional paren
2411 and closing_bracket.type == token.RPAR
2412 and not closing_bracket.value
2413 # it's not an import (optional parens are the only thing we can split on
2414 # in this case; attempting a split without them is a waste of time)
2415 and not line.is_import
2416 # there are no standalone comments in the body
2417 and not body.contains_standalone_comments(0)
2418 # and we can actually remove the parens
2419 and can_omit_invisible_parens(body, line_length)
2421 omit = {id(closing_bracket), *omit}
2423 yield from right_hand_split(line, line_length, features=features, omit=omit)
2429 or is_line_short_enough(body, line_length=line_length)
2432 "Splitting failed, body is still too long and can't be split."
2435 elif head.contains_multiline_strings() or tail.contains_multiline_strings():
2437 "The current optional pair of parentheses is bound to fail to "
2438 "satisfy the splitting algorithm because the head or the tail "
2439 "contains multiline strings which by definition never fit one "
2443 ensure_visible(opening_bracket)
2444 ensure_visible(closing_bracket)
2445 for result in (head, body, tail):
2450 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
2451 """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
2453 Do nothing otherwise.
2455 A left- or right-hand split is based on a pair of brackets. Content before
2456 (and including) the opening bracket is left on one line, content inside the
2457 brackets is put on a separate line, and finally content starting with and
2458 following the closing bracket is put on a separate line.
2460 Those are called `head`, `body`, and `tail`, respectively. If the split
2461 produced the same line (all content in `head`) or ended up with an empty `body`
2462 and the `tail` is just the closing bracket, then it's considered failed.
2464 tail_len = len(str(tail).strip())
2467 raise CannotSplit("Splitting brackets produced the same line")
2471 f"Splitting brackets on an empty body to save "
2472 f"{tail_len} characters is not worth it"
2476 def bracket_split_build_line(
2477 leaves: List[Leaf], original: Line, opening_bracket: Leaf, *, is_body: bool = False
2479 """Return a new line with given `leaves` and respective comments from `original`.
2481 If `is_body` is True, the result line is one-indented inside brackets and as such
2482 has its first leaf's prefix normalized and a trailing comma added when expected.
2484 result = Line(depth=original.depth)
2486 result.inside_brackets = True
2489 # Since body is a new indent level, remove spurious leading whitespace.
2490 normalize_prefix(leaves[0], inside_brackets=True)
2491 # Ensure a trailing comma for imports, but be careful not to add one after
2493 if original.is_import:
2494 for i in range(len(leaves) - 1, -1, -1):
2495 if leaves[i].type == STANDALONE_COMMENT:
2497 elif leaves[i].type == token.COMMA:
2500 leaves.insert(i + 1, Leaf(token.COMMA, ","))
2504 result.append(leaf, preformatted=True)
2505 for comment_after in original.comments_after(leaf):
2506 result.append(comment_after, preformatted=True)
2508 result.should_explode = should_explode(result, opening_bracket)
2512 def dont_increase_indentation(split_func: SplitFunc) -> SplitFunc:
2513 """Normalize prefix of the first leaf in every line returned by `split_func`.
2515 This is a decorator over relevant split functions.
2519 def split_wrapper(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]:
2520 for l in split_func(line, features):
2521 normalize_prefix(l.leaves[0], inside_brackets=True)
2524 return split_wrapper
2527 @dont_increase_indentation
2528 def delimiter_split(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]:
2529 """Split according to delimiters of the highest priority.
2531 If the appropriate Features are given, the split will add trailing commas
2532 also in function signatures and calls that contain `*` and `**`.
2535 last_leaf = line.leaves[-1]
2537 raise CannotSplit("Line empty")
2539 bt = line.bracket_tracker
2541 delimiter_priority = bt.max_delimiter_priority(exclude={id(last_leaf)})
2543 raise CannotSplit("No delimiters found")
2545 if delimiter_priority == DOT_PRIORITY:
2546 if bt.delimiter_count_with_priority(delimiter_priority) == 1:
2547 raise CannotSplit("Splitting a single attribute from its owner looks wrong")
2549 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2550 lowest_depth = sys.maxsize
2551 trailing_comma_safe = True
2553 def append_to_line(leaf: Leaf) -> Iterator[Line]:
2554 """Append `leaf` to current line or to new line if appending impossible."""
2555 nonlocal current_line
2557 current_line.append_safe(leaf, preformatted=True)
2561 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2562 current_line.append(leaf)
2564 for leaf in line.leaves:
2565 yield from append_to_line(leaf)
2567 for comment_after in line.comments_after(leaf):
2568 yield from append_to_line(comment_after)
2570 lowest_depth = min(lowest_depth, leaf.bracket_depth)
2571 if leaf.bracket_depth == lowest_depth:
2572 if is_vararg(leaf, within={syms.typedargslist}):
2573 trailing_comma_safe = (
2574 trailing_comma_safe and Feature.TRAILING_COMMA_IN_DEF in features
2576 elif is_vararg(leaf, within={syms.arglist, syms.argument}):
2577 trailing_comma_safe = (
2578 trailing_comma_safe and Feature.TRAILING_COMMA_IN_CALL in features
2581 leaf_priority = bt.delimiters.get(id(leaf))
2582 if leaf_priority == delimiter_priority:
2585 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2589 and delimiter_priority == COMMA_PRIORITY
2590 and current_line.leaves[-1].type != token.COMMA
2591 and current_line.leaves[-1].type != STANDALONE_COMMENT
2593 current_line.append(Leaf(token.COMMA, ","))
2597 @dont_increase_indentation
2598 def standalone_comment_split(
2599 line: Line, features: Collection[Feature] = ()
2600 ) -> Iterator[Line]:
2601 """Split standalone comments from the rest of the line."""
2602 if not line.contains_standalone_comments(0):
2603 raise CannotSplit("Line does not have any standalone comments")
2605 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2607 def append_to_line(leaf: Leaf) -> Iterator[Line]:
2608 """Append `leaf` to current line or to new line if appending impossible."""
2609 nonlocal current_line
2611 current_line.append_safe(leaf, preformatted=True)
2615 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2616 current_line.append(leaf)
2618 for leaf in line.leaves:
2619 yield from append_to_line(leaf)
2621 for comment_after in line.comments_after(leaf):
2622 yield from append_to_line(comment_after)
2628 def is_import(leaf: Leaf) -> bool:
2629 """Return True if the given leaf starts an import statement."""
2636 (v == "import" and p and p.type == syms.import_name)
2637 or (v == "from" and p and p.type == syms.import_from)
2642 def is_type_comment(leaf: Leaf) -> bool:
2643 """Return True if the given leaf is a special comment.
2644 Only returns true for type comments for now."""
2647 return t in {token.COMMENT, t == STANDALONE_COMMENT} and v.startswith("# type:")
2650 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
2651 """Leave existing extra newlines if not `inside_brackets`. Remove everything
2654 Note: don't use backslashes for formatting or you'll lose your voting rights.
2656 if not inside_brackets:
2657 spl = leaf.prefix.split("#")
2658 if "\\" not in spl[0]:
2659 nl_count = spl[-1].count("\n")
2662 leaf.prefix = "\n" * nl_count
2668 def normalize_string_prefix(leaf: Leaf, remove_u_prefix: bool = False) -> None:
2669 """Make all string prefixes lowercase.
2671 If remove_u_prefix is given, also removes any u prefix from the string.
2673 Note: Mutates its argument.
2675 match = re.match(r"^([furbFURB]*)(.*)$", leaf.value, re.DOTALL)
2676 assert match is not None, f"failed to match string {leaf.value!r}"
2677 orig_prefix = match.group(1)
2678 new_prefix = orig_prefix.lower()
2680 new_prefix = new_prefix.replace("u", "")
2681 leaf.value = f"{new_prefix}{match.group(2)}"
2684 def normalize_string_quotes(leaf: Leaf) -> None:
2685 """Prefer double quotes but only if it doesn't cause more escaping.
2687 Adds or removes backslashes as appropriate. Doesn't parse and fix
2688 strings nested in f-strings (yet).
2690 Note: Mutates its argument.
2692 value = leaf.value.lstrip("furbFURB")
2693 if value[:3] == '"""':
2696 elif value[:3] == "'''":
2699 elif value[0] == '"':
2705 first_quote_pos = leaf.value.find(orig_quote)
2706 if first_quote_pos == -1:
2707 return # There's an internal error
2709 prefix = leaf.value[:first_quote_pos]
2710 unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
2711 escaped_new_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
2712 escaped_orig_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
2713 body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
2714 if "r" in prefix.casefold():
2715 if unescaped_new_quote.search(body):
2716 # There's at least one unescaped new_quote in this raw string
2717 # so converting is impossible
2720 # Do not introduce or remove backslashes in raw strings
2723 # remove unnecessary escapes
2724 new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
2725 if body != new_body:
2726 # Consider the string without unnecessary escapes as the original
2728 leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
2729 new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
2730 new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
2731 if "f" in prefix.casefold():
2732 matches = re.findall(
2734 (?:[^{]|^)\{ # start of the string or a non-{ followed by a single {
2735 ([^{].*?) # contents of the brackets except if begins with {{
2736 \}(?:[^}]|$) # A } followed by end of the string or a non-}
2743 # Do not introduce backslashes in interpolated expressions
2745 if new_quote == '"""' and new_body[-1:] == '"':
2747 new_body = new_body[:-1] + '\\"'
2748 orig_escape_count = body.count("\\")
2749 new_escape_count = new_body.count("\\")
2750 if new_escape_count > orig_escape_count:
2751 return # Do not introduce more escaping
2753 if new_escape_count == orig_escape_count and orig_quote == '"':
2754 return # Prefer double quotes
2756 leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
2759 def normalize_numeric_literal(leaf: Leaf) -> None:
2760 """Normalizes numeric (float, int, and complex) literals.
2762 All letters used in the representation are normalized to lowercase (except
2763 in Python 2 long literals).
2765 text = leaf.value.lower()
2766 if text.startswith(("0o", "0b")):
2767 # Leave octal and binary literals alone.
2769 elif text.startswith("0x"):
2770 # Change hex literals to upper case.
2771 before, after = text[:2], text[2:]
2772 text = f"{before}{after.upper()}"
2774 before, after = text.split("e")
2776 if after.startswith("-"):
2779 elif after.startswith("+"):
2781 before = format_float_or_int_string(before)
2782 text = f"{before}e{sign}{after}"
2783 elif text.endswith(("j", "l")):
2786 # Capitalize in "2L" because "l" looks too similar to "1".
2789 text = f"{format_float_or_int_string(number)}{suffix}"
2791 text = format_float_or_int_string(text)
2795 def format_float_or_int_string(text: str) -> str:
2796 """Formats a float string like "1.0"."""
2800 before, after = text.split(".")
2801 return f"{before or 0}.{after or 0}"
2804 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
2805 """Make existing optional parentheses invisible or create new ones.
2807 `parens_after` is a set of string leaf values immediately after which parens
2810 Standardizes on visible parentheses for single-element tuples, and keeps
2811 existing visible parentheses for other tuples and generator expressions.
2813 for pc in list_comments(node.prefix, is_endmarker=False):
2814 if pc.value in FMT_OFF:
2815 # This `node` has a prefix with `# fmt: off`, don't mess with parens.
2819 for index, child in enumerate(list(node.children)):
2820 # Add parentheses around long tuple unpacking in assignments.
2823 and isinstance(child, Node)
2824 and child.type == syms.testlist_star_expr
2829 if child.type == syms.atom:
2830 if maybe_make_parens_invisible_in_atom(child, parent=node):
2831 lpar = Leaf(token.LPAR, "")
2832 rpar = Leaf(token.RPAR, "")
2833 index = child.remove() or 0
2834 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2835 elif is_one_tuple(child):
2836 # wrap child in visible parentheses
2837 lpar = Leaf(token.LPAR, "(")
2838 rpar = Leaf(token.RPAR, ")")
2840 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2841 elif node.type == syms.import_from:
2842 # "import from" nodes store parentheses directly as part of
2844 if child.type == token.LPAR:
2845 # make parentheses invisible
2846 child.value = "" # type: ignore
2847 node.children[-1].value = "" # type: ignore
2848 elif child.type != token.STAR:
2849 # insert invisible parentheses
2850 node.insert_child(index, Leaf(token.LPAR, ""))
2851 node.append_child(Leaf(token.RPAR, ""))
2854 elif not (isinstance(child, Leaf) and is_multiline_string(child)):
2855 # wrap child in invisible parentheses
2856 lpar = Leaf(token.LPAR, "")
2857 rpar = Leaf(token.RPAR, "")
2858 index = child.remove() or 0
2859 prefix = child.prefix
2861 new_child = Node(syms.atom, [lpar, child, rpar])
2862 new_child.prefix = prefix
2863 node.insert_child(index, new_child)
2865 check_lpar = isinstance(child, Leaf) and child.value in parens_after
2868 def normalize_fmt_off(node: Node) -> None:
2869 """Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
2872 try_again = convert_one_fmt_off_pair(node)
2875 def convert_one_fmt_off_pair(node: Node) -> bool:
2876 """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
2878 Returns True if a pair was converted.
2880 for leaf in node.leaves():
2881 previous_consumed = 0
2882 for comment in list_comments(leaf.prefix, is_endmarker=False):
2883 if comment.value in FMT_OFF:
2884 # We only want standalone comments. If there's no previous leaf or
2885 # the previous leaf is indentation, it's a standalone comment in
2887 if comment.type != STANDALONE_COMMENT:
2888 prev = preceding_leaf(leaf)
2889 if prev and prev.type not in WHITESPACE:
2892 ignored_nodes = list(generate_ignored_nodes(leaf))
2893 if not ignored_nodes:
2896 first = ignored_nodes[0] # Can be a container node with the `leaf`.
2897 parent = first.parent
2898 prefix = first.prefix
2899 first.prefix = prefix[comment.consumed :]
2901 comment.value + "\n" + "".join(str(n) for n in ignored_nodes)
2903 if hidden_value.endswith("\n"):
2904 # That happens when one of the `ignored_nodes` ended with a NEWLINE
2905 # leaf (possibly followed by a DEDENT).
2906 hidden_value = hidden_value[:-1]
2908 for ignored in ignored_nodes:
2909 index = ignored.remove()
2910 if first_idx is None:
2912 assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)"
2913 assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)"
2914 parent.insert_child(
2919 prefix=prefix[:previous_consumed] + "\n" * comment.newlines,
2924 previous_consumed = comment.consumed
2929 def generate_ignored_nodes(leaf: Leaf) -> Iterator[LN]:
2930 """Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
2932 Stops at the end of the block.
2934 container: Optional[LN] = container_of(leaf)
2935 while container is not None and container.type != token.ENDMARKER:
2936 for comment in list_comments(container.prefix, is_endmarker=False):
2937 if comment.value in FMT_ON:
2942 container = container.next_sibling
2945 def maybe_make_parens_invisible_in_atom(node: LN, parent: LN) -> bool:
2946 """If it's safe, make the parens in the atom `node` invisible, recursively.
2948 Returns whether the node should itself be wrapped in invisible parentheses.
2952 node.type != syms.atom
2953 or is_empty_tuple(node)
2954 or is_one_tuple(node)
2955 or (is_yield(node) and parent.type != syms.expr_stmt)
2956 or max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
2960 first = node.children[0]
2961 last = node.children[-1]
2962 if first.type == token.LPAR and last.type == token.RPAR:
2963 # make parentheses invisible
2964 first.value = "" # type: ignore
2965 last.value = "" # type: ignore
2966 if len(node.children) > 1:
2967 maybe_make_parens_invisible_in_atom(node.children[1], parent=parent)
2973 def is_empty_tuple(node: LN) -> bool:
2974 """Return True if `node` holds an empty tuple."""
2976 node.type == syms.atom
2977 and len(node.children) == 2
2978 and node.children[0].type == token.LPAR
2979 and node.children[1].type == token.RPAR
2983 def is_one_tuple(node: LN) -> bool:
2984 """Return True if `node` holds a tuple with one element, with or without parens."""
2985 if node.type == syms.atom:
2986 if len(node.children) != 3:
2989 lpar, gexp, rpar = node.children
2991 lpar.type == token.LPAR
2992 and gexp.type == syms.testlist_gexp
2993 and rpar.type == token.RPAR
2997 return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
3000 node.type in IMPLICIT_TUPLE
3001 and len(node.children) == 2
3002 and node.children[1].type == token.COMMA
3006 def is_yield(node: LN) -> bool:
3007 """Return True if `node` holds a `yield` or `yield from` expression."""
3008 if node.type == syms.yield_expr:
3011 if node.type == token.NAME and node.value == "yield": # type: ignore
3014 if node.type != syms.atom:
3017 if len(node.children) != 3:
3020 lpar, expr, rpar = node.children
3021 if lpar.type == token.LPAR and rpar.type == token.RPAR:
3022 return is_yield(expr)
3027 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
3028 """Return True if `leaf` is a star or double star in a vararg or kwarg.
3030 If `within` includes VARARGS_PARENTS, this applies to function signatures.
3031 If `within` includes UNPACKING_PARENTS, it applies to right hand-side
3032 extended iterable unpacking (PEP 3132) and additional unpacking
3033 generalizations (PEP 448).
3035 if leaf.type not in STARS or not leaf.parent:
3039 if p.type == syms.star_expr:
3040 # Star expressions are also used as assignment targets in extended
3041 # iterable unpacking (PEP 3132). See what its parent is instead.
3047 return p.type in within
3050 def is_multiline_string(leaf: Leaf) -> bool:
3051 """Return True if `leaf` is a multiline string that actually spans many lines."""
3052 value = leaf.value.lstrip("furbFURB")
3053 return value[:3] in {'"""', "'''"} and "\n" in value
3056 def is_stub_suite(node: Node) -> bool:
3057 """Return True if `node` is a suite with a stub body."""
3059 len(node.children) != 4
3060 or node.children[0].type != token.NEWLINE
3061 or node.children[1].type != token.INDENT
3062 or node.children[3].type != token.DEDENT
3066 return is_stub_body(node.children[2])
3069 def is_stub_body(node: LN) -> bool:
3070 """Return True if `node` is a simple statement containing an ellipsis."""
3071 if not isinstance(node, Node) or node.type != syms.simple_stmt:
3074 if len(node.children) != 2:
3077 child = node.children[0]
3079 child.type == syms.atom
3080 and len(child.children) == 3
3081 and all(leaf == Leaf(token.DOT, ".") for leaf in child.children)
3085 def max_delimiter_priority_in_atom(node: LN) -> Priority:
3086 """Return maximum delimiter priority inside `node`.
3088 This is specific to atoms with contents contained in a pair of parentheses.
3089 If `node` isn't an atom or there are no enclosing parentheses, returns 0.
3091 if node.type != syms.atom:
3094 first = node.children[0]
3095 last = node.children[-1]
3096 if not (first.type == token.LPAR and last.type == token.RPAR):
3099 bt = BracketTracker()
3100 for c in node.children[1:-1]:
3101 if isinstance(c, Leaf):
3104 for leaf in c.leaves():
3107 return bt.max_delimiter_priority()
3113 def ensure_visible(leaf: Leaf) -> None:
3114 """Make sure parentheses are visible.
3116 They could be invisible as part of some statements (see
3117 :func:`normalize_invible_parens` and :func:`visit_import_from`).
3119 if leaf.type == token.LPAR:
3121 elif leaf.type == token.RPAR:
3125 def should_explode(line: Line, opening_bracket: Leaf) -> bool:
3126 """Should `line` immediately be split with `delimiter_split()` after RHS?"""
3129 opening_bracket.parent
3130 and opening_bracket.parent.type in {syms.atom, syms.import_from}
3131 and opening_bracket.value in "[{("
3136 last_leaf = line.leaves[-1]
3137 exclude = {id(last_leaf)} if last_leaf.type == token.COMMA else set()
3138 max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude)
3139 except (IndexError, ValueError):
3142 return max_priority == COMMA_PRIORITY
3145 def get_features_used(node: Node) -> Set[Feature]:
3146 """Return a set of (relatively) new Python features used in this file.
3148 Currently looking for:
3150 - underscores in numeric literals; and
3151 - trailing commas after * or ** in function signatures and calls.
3153 features: Set[Feature] = set()
3154 for n in node.pre_order():
3155 if n.type == token.STRING:
3156 value_head = n.value[:2] # type: ignore
3157 if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
3158 features.add(Feature.F_STRINGS)
3160 elif n.type == token.NUMBER:
3161 if "_" in n.value: # type: ignore
3162 features.add(Feature.NUMERIC_UNDERSCORES)
3165 n.type in {syms.typedargslist, syms.arglist}
3167 and n.children[-1].type == token.COMMA
3169 if n.type == syms.typedargslist:
3170 feature = Feature.TRAILING_COMMA_IN_DEF
3172 feature = Feature.TRAILING_COMMA_IN_CALL
3174 for ch in n.children:
3175 if ch.type in STARS:
3176 features.add(feature)
3178 if ch.type == syms.argument:
3179 for argch in ch.children:
3180 if argch.type in STARS:
3181 features.add(feature)
3186 def detect_target_versions(node: Node) -> Set[TargetVersion]:
3187 """Detect the version to target based on the nodes used."""
3188 features = get_features_used(node)
3190 version for version in TargetVersion if features <= VERSION_TO_FEATURES[version]
3194 def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[LeafID]]:
3195 """Generate sets of closing bracket IDs that should be omitted in a RHS.
3197 Brackets can be omitted if the entire trailer up to and including
3198 a preceding closing bracket fits in one line.
3200 Yielded sets are cumulative (contain results of previous yields, too). First
3204 omit: Set[LeafID] = set()
3207 length = 4 * line.depth
3208 opening_bracket = None
3209 closing_bracket = None
3210 inner_brackets: Set[LeafID] = set()
3211 for index, leaf, leaf_length in enumerate_with_length(line, reversed=True):
3212 length += leaf_length
3213 if length > line_length:
3216 has_inline_comment = leaf_length > len(leaf.value) + len(leaf.prefix)
3217 if leaf.type == STANDALONE_COMMENT or has_inline_comment:
3221 if leaf is opening_bracket:
3222 opening_bracket = None
3223 elif leaf.type in CLOSING_BRACKETS:
3224 inner_brackets.add(id(leaf))
3225 elif leaf.type in CLOSING_BRACKETS:
3226 if index > 0 and line.leaves[index - 1].type in OPENING_BRACKETS:
3227 # Empty brackets would fail a split so treat them as "inner"
3228 # brackets (e.g. only add them to the `omit` set if another
3229 # pair of brackets was good enough.
3230 inner_brackets.add(id(leaf))
3234 omit.add(id(closing_bracket))
3235 omit.update(inner_brackets)
3236 inner_brackets.clear()
3240 opening_bracket = leaf.opening_bracket
3241 closing_bracket = leaf
3244 def get_future_imports(node: Node) -> Set[str]:
3245 """Return a set of __future__ imports in the file."""
3246 imports: Set[str] = set()
3248 def get_imports_from_children(children: List[LN]) -> Generator[str, None, None]:
3249 for child in children:
3250 if isinstance(child, Leaf):
3251 if child.type == token.NAME:
3253 elif child.type == syms.import_as_name:
3254 orig_name = child.children[0]
3255 assert isinstance(orig_name, Leaf), "Invalid syntax parsing imports"
3256 assert orig_name.type == token.NAME, "Invalid syntax parsing imports"
3257 yield orig_name.value
3258 elif child.type == syms.import_as_names:
3259 yield from get_imports_from_children(child.children)
3261 raise AssertionError("Invalid syntax parsing imports")
3263 for child in node.children:
3264 if child.type != syms.simple_stmt:
3266 first_child = child.children[0]
3267 if isinstance(first_child, Leaf):
3268 # Continue looking if we see a docstring; otherwise stop.
3270 len(child.children) == 2
3271 and first_child.type == token.STRING
3272 and child.children[1].type == token.NEWLINE
3277 elif first_child.type == syms.import_from:
3278 module_name = first_child.children[1]
3279 if not isinstance(module_name, Leaf) or module_name.value != "__future__":
3281 imports |= set(get_imports_from_children(first_child.children[3:]))
3287 def gen_python_files_in_dir(
3290 include: Pattern[str],
3291 exclude: Pattern[str],
3293 ) -> Iterator[Path]:
3294 """Generate all files under `path` whose paths are not excluded by the
3295 `exclude` regex, but are included by the `include` regex.
3297 Symbolic links pointing outside of the `root` directory are ignored.
3299 `report` is where output about exclusions goes.
3301 assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
3302 for child in path.iterdir():
3304 normalized_path = "/" + child.resolve().relative_to(root).as_posix()
3306 if child.is_symlink():
3307 report.path_ignored(
3308 child, f"is a symbolic link that points outside {root}"
3315 normalized_path += "/"
3316 exclude_match = exclude.search(normalized_path)
3317 if exclude_match and exclude_match.group(0):
3318 report.path_ignored(child, f"matches the --exclude regular expression")
3322 yield from gen_python_files_in_dir(child, root, include, exclude, report)
3324 elif child.is_file():
3325 include_match = include.search(normalized_path)
3331 def find_project_root(srcs: Iterable[str]) -> Path:
3332 """Return a directory containing .git, .hg, or pyproject.toml.
3334 That directory can be one of the directories passed in `srcs` or their
3337 If no directory in the tree contains a marker that would specify it's the
3338 project root, the root of the file system is returned.
3341 return Path("/").resolve()
3343 common_base = min(Path(src).resolve() for src in srcs)
3344 if common_base.is_dir():
3345 # Append a fake file so `parents` below returns `common_base_dir`, too.
3346 common_base /= "fake-file"
3347 for directory in common_base.parents:
3348 if (directory / ".git").is_dir():
3351 if (directory / ".hg").is_dir():
3354 if (directory / "pyproject.toml").is_file():
3362 """Provides a reformatting counter. Can be rendered with `str(report)`."""
3366 verbose: bool = False
3367 change_count: int = 0
3369 failure_count: int = 0
3371 def done(self, src: Path, changed: Changed) -> None:
3372 """Increment the counter for successful reformatting. Write out a message."""
3373 if changed is Changed.YES:
3374 reformatted = "would reformat" if self.check else "reformatted"
3375 if self.verbose or not self.quiet:
3376 out(f"{reformatted} {src}")
3377 self.change_count += 1
3380 if changed is Changed.NO:
3381 msg = f"{src} already well formatted, good job."
3383 msg = f"{src} wasn't modified on disk since last run."
3384 out(msg, bold=False)
3385 self.same_count += 1
3387 def failed(self, src: Path, message: str) -> None:
3388 """Increment the counter for failed reformatting. Write out a message."""
3389 err(f"error: cannot format {src}: {message}")
3390 self.failure_count += 1
3392 def path_ignored(self, path: Path, message: str) -> None:
3394 out(f"{path} ignored: {message}", bold=False)
3397 def return_code(self) -> int:
3398 """Return the exit code that the app should use.
3400 This considers the current state of changed files and failures:
3401 - if there were any failures, return 123;
3402 - if any files were changed and --check is being used, return 1;
3403 - otherwise return 0.
3405 # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
3406 # 126 we have special return codes reserved by the shell.
3407 if self.failure_count:
3410 elif self.change_count and self.check:
3415 def __str__(self) -> str:
3416 """Render a color report of the current state.
3418 Use `click.unstyle` to remove colors.
3421 reformatted = "would be reformatted"
3422 unchanged = "would be left unchanged"
3423 failed = "would fail to reformat"
3425 reformatted = "reformatted"
3426 unchanged = "left unchanged"
3427 failed = "failed to reformat"
3429 if self.change_count:
3430 s = "s" if self.change_count > 1 else ""
3432 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
3435 s = "s" if self.same_count > 1 else ""
3436 report.append(f"{self.same_count} file{s} {unchanged}")
3437 if self.failure_count:
3438 s = "s" if self.failure_count > 1 else ""
3440 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
3442 return ", ".join(report) + "."
3445 def parse_ast(src: str) -> Union[ast3.AST, ast27.AST]:
3446 for feature_version in (7, 6):
3448 return ast3.parse(src, feature_version=feature_version)
3452 return ast27.parse(src)
3455 def assert_equivalent(src: str, dst: str) -> None:
3456 """Raise AssertionError if `src` and `dst` aren't equivalent."""
3458 def _v(node: Union[ast3.AST, ast27.AST], depth: int = 0) -> Iterator[str]:
3459 """Simple visitor generating strings to compare ASTs by content."""
3460 yield f"{' ' * depth}{node.__class__.__name__}("
3462 for field in sorted(node._fields):
3463 # TypeIgnore has only one field 'lineno' which breaks this comparison
3464 if isinstance(node, (ast3.TypeIgnore, ast27.TypeIgnore)):
3467 # Ignore str kind which is case sensitive / and ignores unicode_literals
3468 if isinstance(node, (ast3.Str, ast27.Str, ast3.Bytes)) and field == "kind":
3472 value = getattr(node, field)
3473 except AttributeError:
3476 yield f"{' ' * (depth+1)}{field}="
3478 if isinstance(value, list):
3480 # Ignore nested tuples within del statements, because we may insert
3481 # parentheses and they change the AST.
3484 and isinstance(node, (ast3.Delete, ast27.Delete))
3485 and isinstance(item, (ast3.Tuple, ast27.Tuple))
3487 for item in item.elts:
3488 yield from _v(item, depth + 2)
3489 elif isinstance(item, (ast3.AST, ast27.AST)):
3490 yield from _v(item, depth + 2)
3492 elif isinstance(value, (ast3.AST, ast27.AST)):
3493 yield from _v(value, depth + 2)
3496 yield f"{' ' * (depth+2)}{value!r}, # {value.__class__.__name__}"
3498 yield f"{' ' * depth}) # /{node.__class__.__name__}"
3501 src_ast = parse_ast(src)
3502 except Exception as exc:
3503 raise AssertionError(
3504 f"cannot use --safe with this file; failed to parse source file. "
3505 f"AST error message: {exc}"
3509 dst_ast = parse_ast(dst)
3510 except Exception as exc:
3511 log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
3512 raise AssertionError(
3513 f"INTERNAL ERROR: Black produced invalid code: {exc}. "
3514 f"Please report a bug on https://github.com/python/black/issues. "
3515 f"This invalid output might be helpful: {log}"
3518 src_ast_str = "\n".join(_v(src_ast))
3519 dst_ast_str = "\n".join(_v(dst_ast))
3520 if src_ast_str != dst_ast_str:
3521 log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
3522 raise AssertionError(
3523 f"INTERNAL ERROR: Black produced code that is not equivalent to "
3525 f"Please report a bug on https://github.com/python/black/issues. "
3526 f"This diff might be helpful: {log}"
3530 def assert_stable(src: str, dst: str, mode: FileMode) -> None:
3531 """Raise AssertionError if `dst` reformats differently the second time."""
3532 newdst = format_str(dst, mode=mode)
3535 diff(src, dst, "source", "first pass"),
3536 diff(dst, newdst, "first pass", "second pass"),
3538 raise AssertionError(
3539 f"INTERNAL ERROR: Black produced different code on the second pass "
3540 f"of the formatter. "
3541 f"Please report a bug on https://github.com/python/black/issues. "
3542 f"This diff might be helpful: {log}"
3546 def dump_to_file(*output: str) -> str:
3547 """Dump `output` to a temporary file. Return path to the file."""
3548 with tempfile.NamedTemporaryFile(
3549 mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
3551 for lines in output:
3553 if lines and lines[-1] != "\n":
3558 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
3559 """Return a unified diff string between strings `a` and `b`."""
3562 a_lines = [line + "\n" for line in a.split("\n")]
3563 b_lines = [line + "\n" for line in b.split("\n")]
3565 difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
3569 def cancel(tasks: Iterable[asyncio.Task]) -> None:
3570 """asyncio signal handler that cancels all `tasks` and reports to stderr."""
3576 def shutdown(loop: BaseEventLoop) -> None:
3577 """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
3579 if sys.version_info[:2] >= (3, 7):
3580 all_tasks = asyncio.all_tasks
3582 all_tasks = asyncio.Task.all_tasks
3583 # This part is borrowed from asyncio/runners.py in Python 3.7b2.
3584 to_cancel = [task for task in all_tasks(loop) if not task.done()]
3588 for task in to_cancel:
3590 loop.run_until_complete(
3591 asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
3594 # `concurrent.futures.Future` objects cannot be cancelled once they
3595 # are already running. There might be some when the `shutdown()` happened.
3596 # Silence their logger's spew about the event loop being closed.
3597 cf_logger = logging.getLogger("concurrent.futures")
3598 cf_logger.setLevel(logging.CRITICAL)
3602 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
3603 """Replace `regex` with `replacement` twice on `original`.
3605 This is used by string normalization to perform replaces on
3606 overlapping matches.
3608 return regex.sub(replacement, regex.sub(replacement, original))
3611 def re_compile_maybe_verbose(regex: str) -> Pattern[str]:
3612 """Compile a regular expression string in `regex`.
3614 If it contains newlines, use verbose mode.
3617 regex = "(?x)" + regex
3618 return re.compile(regex)
3621 def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]:
3622 """Like `reversed(enumerate(sequence))` if that were possible."""
3623 index = len(sequence) - 1
3624 for element in reversed(sequence):
3625 yield (index, element)
3629 def enumerate_with_length(
3630 line: Line, reversed: bool = False
3631 ) -> Iterator[Tuple[Index, Leaf, int]]:
3632 """Return an enumeration of leaves with their length.
3634 Stops prematurely on multiline strings and standalone comments.
3637 Callable[[Sequence[Leaf]], Iterator[Tuple[Index, Leaf]]],
3638 enumerate_reversed if reversed else enumerate,
3640 for index, leaf in op(line.leaves):
3641 length = len(leaf.prefix) + len(leaf.value)
3642 if "\n" in leaf.value:
3643 return # Multiline strings, we can't continue.
3645 for comment in line.comments_after(leaf):
3646 length += len(comment.value)
3648 yield index, leaf, length
3651 def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
3652 """Return True if `line` is no longer than `line_length`.
3654 Uses the provided `line_str` rendering, if any, otherwise computes a new one.
3657 line_str = str(line).strip("\n")
3659 len(line_str) <= line_length
3660 and "\n" not in line_str # multiline strings
3661 and not line.contains_standalone_comments()
3665 def can_be_split(line: Line) -> bool:
3666 """Return False if the line cannot be split *for sure*.
3668 This is not an exhaustive search but a cheap heuristic that we can use to
3669 avoid some unfortunate formattings (mostly around wrapping unsplittable code
3670 in unnecessary parentheses).
3672 leaves = line.leaves
3676 if leaves[0].type == token.STRING and leaves[1].type == token.DOT:
3680 for leaf in leaves[-2::-1]:
3681 if leaf.type in OPENING_BRACKETS:
3682 if next.type not in CLOSING_BRACKETS:
3686 elif leaf.type == token.DOT:
3688 elif leaf.type == token.NAME:
3689 if not (next.type == token.DOT or next.type in OPENING_BRACKETS):
3692 elif leaf.type not in CLOSING_BRACKETS:
3695 if dot_count > 1 and call_count > 1:
3701 def can_omit_invisible_parens(line: Line, line_length: int) -> bool:
3702 """Does `line` have a shape safe to reformat without optional parens around it?
3704 Returns True for only a subset of potentially nice looking formattings but
3705 the point is to not return false positives that end up producing lines that
3708 bt = line.bracket_tracker
3709 if not bt.delimiters:
3710 # Without delimiters the optional parentheses are useless.
3713 max_priority = bt.max_delimiter_priority()
3714 if bt.delimiter_count_with_priority(max_priority) > 1:
3715 # With more than one delimiter of a kind the optional parentheses read better.
3718 if max_priority == DOT_PRIORITY:
3719 # A single stranded method call doesn't require optional parentheses.
3722 assert len(line.leaves) >= 2, "Stranded delimiter"
3724 first = line.leaves[0]
3725 second = line.leaves[1]
3726 penultimate = line.leaves[-2]
3727 last = line.leaves[-1]
3729 # With a single delimiter, omit if the expression starts or ends with
3731 if first.type in OPENING_BRACKETS and second.type not in CLOSING_BRACKETS:
3733 length = 4 * line.depth
3734 for _index, leaf, leaf_length in enumerate_with_length(line):
3735 if leaf.type in CLOSING_BRACKETS and leaf.opening_bracket is first:
3738 length += leaf_length
3739 if length > line_length:
3742 if leaf.type in OPENING_BRACKETS:
3743 # There are brackets we can further split on.
3747 # checked the entire string and line length wasn't exceeded
3748 if len(line.leaves) == _index + 1:
3751 # Note: we are not returning False here because a line might have *both*
3752 # a leading opening bracket and a trailing closing bracket. If the
3753 # opening bracket doesn't match our rule, maybe the closing will.
3756 last.type == token.RPAR
3757 or last.type == token.RBRACE
3759 # don't use indexing for omitting optional parentheses;
3761 last.type == token.RSQB
3763 and last.parent.type != syms.trailer
3766 if penultimate.type in OPENING_BRACKETS:
3767 # Empty brackets don't help.
3770 if is_multiline_string(first):
3771 # Additional wrapping of a multiline string in this situation is
3775 length = 4 * line.depth
3776 seen_other_brackets = False
3777 for _index, leaf, leaf_length in enumerate_with_length(line):
3778 length += leaf_length
3779 if leaf is last.opening_bracket:
3780 if seen_other_brackets or length <= line_length:
3783 elif leaf.type in OPENING_BRACKETS:
3784 # There are brackets we can further split on.
3785 seen_other_brackets = True
3790 def get_cache_file(mode: FileMode) -> Path:
3791 return CACHE_DIR / f"cache.{mode.get_cache_key()}.pickle"
3794 def read_cache(mode: FileMode) -> Cache:
3795 """Read the cache if it exists and is well formed.
3797 If it is not well formed, the call to write_cache later should resolve the issue.
3799 cache_file = get_cache_file(mode)
3800 if not cache_file.exists():
3803 with cache_file.open("rb") as fobj:
3805 cache: Cache = pickle.load(fobj)
3806 except pickle.UnpicklingError:
3812 def get_cache_info(path: Path) -> CacheInfo:
3813 """Return the information used to check if a file is already formatted or not."""
3815 return stat.st_mtime, stat.st_size
3818 def filter_cached(cache: Cache, sources: Iterable[Path]) -> Tuple[Set[Path], Set[Path]]:
3819 """Split an iterable of paths in `sources` into two sets.
3821 The first contains paths of files that modified on disk or are not in the
3822 cache. The other contains paths to non-modified files.
3824 todo, done = set(), set()
3827 if cache.get(src) != get_cache_info(src):
3834 def write_cache(cache: Cache, sources: Iterable[Path], mode: FileMode) -> None:
3835 """Update the cache file."""
3836 cache_file = get_cache_file(mode)
3838 CACHE_DIR.mkdir(parents=True, exist_ok=True)
3839 new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
3840 with tempfile.NamedTemporaryFile(dir=str(cache_file.parent), delete=False) as f:
3841 pickle.dump(new_cache, f, protocol=pickle.HIGHEST_PROTOCOL)
3842 os.replace(f.name, cache_file)
3847 def patch_click() -> None:
3848 """Make Click not crash.
3850 On certain misconfigured environments, Python 3 selects the ASCII encoding as the
3851 default which restricts paths that it can access during the lifetime of the
3852 application. Click refuses to work in this scenario by raising a RuntimeError.
3854 In case of Black the likelihood that non-ASCII characters are going to be used in
3855 file paths is minimal since it's Python source code. Moreover, this crash was
3856 spurious on Python 3.7 thanks to PEP 538 and PEP 540.
3859 from click import core
3860 from click import _unicodefun # type: ignore
3861 except ModuleNotFoundError:
3864 for module in (core, _unicodefun):
3865 if hasattr(module, "_verify_python3_env"):
3866 module._verify_python3_env = lambda: None
3869 def patched_main() -> None:
3875 if __name__ == "__main__":