All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
3 from abc import ABC, abstractmethod
4 from collections import defaultdict
5 from concurrent.futures import Executor, ThreadPoolExecutor, ProcessPoolExecutor
6 from contextlib import contextmanager
7 from datetime import datetime
9 from functools import lru_cache, partial, wraps
13 from multiprocessing import Manager, freeze_support
15 from pathlib import Path
45 from mypy_extensions import mypyc_attr
47 from appdirs import user_cache_dir
48 from dataclasses import dataclass, field, replace
51 from typed_ast import ast3, ast27
52 from pathspec import PathSpec
55 from blib2to3.pytree import Node, Leaf, type_repr
56 from blib2to3 import pygram, pytree
57 from blib2to3.pgen2 import driver, token
58 from blib2to3.pgen2.grammar import Grammar
59 from blib2to3.pgen2.parse import ParseError
61 from _black_version import version as __version__
63 if sys.version_info < (3, 8):
64 from typing_extensions import Final
66 from typing import Final
69 import colorama # noqa: F401
71 DEFAULT_LINE_LENGTH = 88
72 DEFAULT_EXCLUDES = r"/(\.direnv|\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist)/" # noqa: B950
73 DEFAULT_INCLUDES = r"\.pyi?$"
74 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
75 STDIN_PLACEHOLDER = "__BLACK_STDIN_FILENAME__"
77 STRING_PREFIX_CHARS: Final = "furbFURB" # All possible string prefix characters.
91 LN = Union[Leaf, Node]
92 Transformer = Callable[["Line", Collection["Feature"]], Iterator["Line"]]
95 CacheInfo = Tuple[Timestamp, FileSize]
96 Cache = Dict[Path, CacheInfo]
97 out = partial(click.secho, bold=True, err=True)
98 err = partial(click.secho, fg="red", err=True)
100 pygram.initialize(CACHE_DIR)
101 syms = pygram.python_symbols
104 class NothingChanged(UserWarning):
105 """Raised when reformatted code is the same as source."""
108 class CannotTransform(Exception):
109 """Base class for errors raised by Transformers."""
112 class CannotSplit(CannotTransform):
113 """A readable split that fits the allotted line length is impossible."""
116 class InvalidInput(ValueError):
117 """Raised when input source code fails all parse attempts."""
120 class BracketMatchError(KeyError):
121 """Raised when an opening bracket is unable to be matched to a closing bracket."""
125 E = TypeVar("E", bound=Exception)
128 class Ok(Generic[T]):
129 def __init__(self, value: T) -> None:
136 class Err(Generic[E]):
137 def __init__(self, e: E) -> None:
144 # The 'Result' return type is used to implement an error-handling model heavily
145 # influenced by that used by the Rust programming language
146 # (see https://doc.rust-lang.org/book/ch09-00-error-handling.html).
147 Result = Union[Ok[T], Err[E]]
148 TResult = Result[T, CannotTransform] # (T)ransform Result
149 TMatchResult = TResult[Index]
152 class WriteBack(Enum):
160 def from_configuration(
161 cls, *, check: bool, diff: bool, color: bool = False
163 if check and not diff:
167 return cls.COLOR_DIFF
169 return cls.DIFF if diff else cls.YES
178 class TargetVersion(Enum):
188 def is_python2(self) -> bool:
189 return self is TargetVersion.PY27
193 # All string literals are unicode
196 NUMERIC_UNDERSCORES = 3
197 TRAILING_COMMA_IN_CALL = 4
198 TRAILING_COMMA_IN_DEF = 5
199 # The following two feature-flags are mutually exclusive, and exactly one should be
200 # set for every version of python.
201 ASYNC_IDENTIFIERS = 6
203 ASSIGNMENT_EXPRESSIONS = 8
204 POS_ONLY_ARGUMENTS = 9
205 RELAXED_DECORATORS = 10
206 FORCE_OPTIONAL_PARENTHESES = 50
209 VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = {
210 TargetVersion.PY27: {Feature.ASYNC_IDENTIFIERS},
211 TargetVersion.PY33: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS},
212 TargetVersion.PY34: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS},
213 TargetVersion.PY35: {
214 Feature.UNICODE_LITERALS,
215 Feature.TRAILING_COMMA_IN_CALL,
216 Feature.ASYNC_IDENTIFIERS,
218 TargetVersion.PY36: {
219 Feature.UNICODE_LITERALS,
221 Feature.NUMERIC_UNDERSCORES,
222 Feature.TRAILING_COMMA_IN_CALL,
223 Feature.TRAILING_COMMA_IN_DEF,
224 Feature.ASYNC_IDENTIFIERS,
226 TargetVersion.PY37: {
227 Feature.UNICODE_LITERALS,
229 Feature.NUMERIC_UNDERSCORES,
230 Feature.TRAILING_COMMA_IN_CALL,
231 Feature.TRAILING_COMMA_IN_DEF,
232 Feature.ASYNC_KEYWORDS,
234 TargetVersion.PY38: {
235 Feature.UNICODE_LITERALS,
237 Feature.NUMERIC_UNDERSCORES,
238 Feature.TRAILING_COMMA_IN_CALL,
239 Feature.TRAILING_COMMA_IN_DEF,
240 Feature.ASYNC_KEYWORDS,
241 Feature.ASSIGNMENT_EXPRESSIONS,
242 Feature.POS_ONLY_ARGUMENTS,
244 TargetVersion.PY39: {
245 Feature.UNICODE_LITERALS,
247 Feature.NUMERIC_UNDERSCORES,
248 Feature.TRAILING_COMMA_IN_CALL,
249 Feature.TRAILING_COMMA_IN_DEF,
250 Feature.ASYNC_KEYWORDS,
251 Feature.ASSIGNMENT_EXPRESSIONS,
252 Feature.RELAXED_DECORATORS,
253 Feature.POS_ONLY_ARGUMENTS,
260 target_versions: Set[TargetVersion] = field(default_factory=set)
261 line_length: int = DEFAULT_LINE_LENGTH
262 string_normalization: bool = True
263 experimental_string_processing: bool = False
266 def get_cache_key(self) -> str:
267 if self.target_versions:
268 version_str = ",".join(
270 for version in sorted(self.target_versions, key=lambda v: v.value)
276 str(self.line_length),
277 str(int(self.string_normalization)),
278 str(int(self.is_pyi)),
280 return ".".join(parts)
283 # Legacy name, left for integrations.
287 def supports_feature(target_versions: Set[TargetVersion], feature: Feature) -> bool:
288 return all(feature in VERSION_TO_FEATURES[version] for version in target_versions)
291 def find_pyproject_toml(path_search_start: Iterable[str]) -> Optional[str]:
292 """Find the absolute filepath to a pyproject.toml if it exists"""
293 path_project_root = find_project_root(path_search_start)
294 path_pyproject_toml = path_project_root / "pyproject.toml"
295 return str(path_pyproject_toml) if path_pyproject_toml.is_file() else None
298 def parse_pyproject_toml(path_config: str) -> Dict[str, Any]:
299 """Parse a pyproject toml file, pulling out relevant parts for Black
301 If parsing fails, will raise a toml.TomlDecodeError
303 pyproject_toml = toml.load(path_config)
304 config = pyproject_toml.get("tool", {}).get("black", {})
305 return {k.replace("--", "").replace("-", "_"): v for k, v in config.items()}
308 def read_pyproject_toml(
309 ctx: click.Context, param: click.Parameter, value: Optional[str]
311 """Inject Black configuration from "pyproject.toml" into defaults in `ctx`.
313 Returns the path to a successfully found and read configuration file, None
317 value = find_pyproject_toml(ctx.params.get("src", ()))
322 config = parse_pyproject_toml(value)
323 except (toml.TomlDecodeError, OSError) as e:
324 raise click.FileError(
325 filename=value, hint=f"Error reading configuration file: {e}"
331 # Sanitize the values to be Click friendly. For more information please see:
332 # https://github.com/psf/black/issues/1458
333 # https://github.com/pallets/click/issues/1567
335 k: str(v) if not isinstance(v, (list, dict)) else v
336 for k, v in config.items()
339 target_version = config.get("target_version")
340 if target_version is not None and not isinstance(target_version, list):
341 raise click.BadOptionUsage(
342 "target-version", "Config key target-version must be a list"
345 default_map: Dict[str, Any] = {}
347 default_map.update(ctx.default_map)
348 default_map.update(config)
350 ctx.default_map = default_map
354 def target_version_option_callback(
355 c: click.Context, p: Union[click.Option, click.Parameter], v: Tuple[str, ...]
356 ) -> List[TargetVersion]:
357 """Compute the target versions from a --target-version flag.
359 This is its own function because mypy couldn't infer the type correctly
360 when it was a lambda, causing mypyc trouble.
362 return [TargetVersion[val.upper()] for val in v]
365 @click.command(context_settings=dict(help_option_names=["-h", "--help"]))
366 @click.option("-c", "--code", type=str, help="Format the code passed in as a string.")
371 default=DEFAULT_LINE_LENGTH,
372 help="How many characters per line to allow.",
378 type=click.Choice([v.name.lower() for v in TargetVersion]),
379 callback=target_version_option_callback,
382 "Python versions that should be supported by Black's output. [default: per-file"
390 "Format all input files like typing stubs regardless of file extension (useful"
391 " when piping source on standard input)."
396 "--skip-string-normalization",
398 help="Don't normalize string quotes or prefixes.",
401 "--experimental-string-processing",
405 "Experimental option that performs more normalization on string literals."
406 " Currently disabled because it leads to some crashes."
413 "Don't write the files back, just return the status. Return code 0 means"
414 " nothing would change. Return code 1 means some files would be reformatted."
415 " Return code 123 means there was an internal error."
421 help="Don't write the files back, just output a diff for each file on stdout.",
424 "--color/--no-color",
426 help="Show colored diff. Only applies when `--diff` is given.",
431 help="If --fast given, skip temporary sanity checks. [default: --safe]",
436 default=DEFAULT_INCLUDES,
438 "A regular expression that matches files and directories that should be"
439 " included on recursive searches. An empty value means all files are included"
440 " regardless of the name. Use forward slashes for directories on all platforms"
441 " (Windows, too). Exclusions are calculated first, inclusions later."
448 default=DEFAULT_EXCLUDES,
450 "A regular expression that matches files and directories that should be"
451 " excluded on recursive searches. An empty value means no paths are excluded."
452 " Use forward slashes for directories on all platforms (Windows, too). "
453 " Exclusions are calculated first, inclusions later."
461 "Like --exclude, but files and directories matching this regex will be "
462 "excluded even when they are passed explicitly as arguments."
469 "The name of the file when passing it through stdin. Useful to make "
470 "sure Black will respect --force-exclude option on some "
471 "editors that rely on using stdin."
479 "Don't emit non-error messages to stderr. Errors are still emitted; silence"
480 " those with 2>/dev/null."
488 "Also emit messages to stderr about files that were not changed or were ignored"
489 " due to --exclude=."
492 @click.version_option(version=__version__)
497 exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
512 callback=read_pyproject_toml,
513 help="Read configuration from FILE path.",
520 target_version: List[TargetVersion],
526 skip_string_normalization: bool,
527 experimental_string_processing: bool,
532 force_exclude: Optional[str],
533 stdin_filename: Optional[str],
534 src: Tuple[str, ...],
535 config: Optional[str],
537 """The uncompromising code formatter."""
538 write_back = WriteBack.from_configuration(check=check, diff=diff, color=color)
540 versions = set(target_version)
542 # We'll autodetect later.
545 target_versions=versions,
546 line_length=line_length,
548 string_normalization=not skip_string_normalization,
549 experimental_string_processing=experimental_string_processing,
551 if config and verbose:
552 out(f"Using configuration from {config}.", bold=False, fg="blue")
554 print(format_str(code, mode=mode))
556 report = Report(check=check, diff=diff, quiet=quiet, verbose=verbose)
557 sources = get_sources(
564 force_exclude=force_exclude,
566 stdin_filename=stdin_filename,
571 "No Python files are present to be formatted. Nothing to do 😴",
577 if len(sources) == 1:
581 write_back=write_back,
587 sources=sources, fast=fast, write_back=write_back, mode=mode, report=report
590 if verbose or not quiet:
591 out("Oh no! 💥 💔 💥" if report.return_code else "All done! ✨ 🍰 ✨")
592 click.secho(str(report), err=True)
593 ctx.exit(report.return_code)
599 src: Tuple[str, ...],
604 force_exclude: Optional[str],
606 stdin_filename: Optional[str],
608 """Compute the set of files to be formatted."""
610 include_regex = re_compile_maybe_verbose(include)
612 err(f"Invalid regular expression for include given: {include!r}")
615 exclude_regex = re_compile_maybe_verbose(exclude)
617 err(f"Invalid regular expression for exclude given: {exclude!r}")
620 force_exclude_regex = (
621 re_compile_maybe_verbose(force_exclude) if force_exclude else None
624 err(f"Invalid regular expression for force_exclude given: {force_exclude!r}")
627 root = find_project_root(src)
628 sources: Set[Path] = set()
629 path_empty(src, "No Path provided. Nothing to do 😴", quiet, verbose, ctx)
630 gitignore = get_gitignore(root)
633 if s == "-" and stdin_filename:
634 p = Path(stdin_filename)
640 if is_stdin or p.is_file():
641 normalized_path = normalize_path_maybe_ignore(p, root, report)
642 if normalized_path is None:
645 normalized_path = "/" + normalized_path
646 # Hard-exclude any files that matches the `--force-exclude` regex.
647 if force_exclude_regex:
648 force_exclude_match = force_exclude_regex.search(normalized_path)
650 force_exclude_match = None
651 if force_exclude_match and force_exclude_match.group(0):
652 report.path_ignored(p, "matches the --force-exclude regular expression")
656 p = Path(f"{STDIN_PLACEHOLDER}{str(p)}")
674 err(f"invalid path: {s}")
679 src: Sized, msg: str, quiet: bool, verbose: bool, ctx: click.Context
682 Exit if there is no `src` provided for formatting
684 if not src and (verbose or not quiet):
690 src: Path, fast: bool, write_back: WriteBack, mode: Mode, report: "Report"
692 """Reformat a single file under `src` without spawning child processes.
694 `fast`, `write_back`, and `mode` options are passed to
695 :func:`format_file_in_place` or :func:`format_stdin_to_stdout`.
702 elif str(src).startswith(STDIN_PLACEHOLDER):
704 # Use the original name again in case we want to print something
706 src = Path(str(src)[len(STDIN_PLACEHOLDER) :])
711 if format_stdin_to_stdout(fast=fast, write_back=write_back, mode=mode):
712 changed = Changed.YES
715 if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
716 cache = read_cache(mode)
717 res_src = src.resolve()
718 if res_src in cache and cache[res_src] == get_cache_info(res_src):
719 changed = Changed.CACHED
720 if changed is not Changed.CACHED and format_file_in_place(
721 src, fast=fast, write_back=write_back, mode=mode
723 changed = Changed.YES
724 if (write_back is WriteBack.YES and changed is not Changed.CACHED) or (
725 write_back is WriteBack.CHECK and changed is Changed.NO
727 write_cache(cache, [src], mode)
728 report.done(src, changed)
729 except Exception as exc:
731 traceback.print_exc()
732 report.failed(src, str(exc))
736 sources: Set[Path], fast: bool, write_back: WriteBack, mode: Mode, report: "Report"
738 """Reformat multiple files using a ProcessPoolExecutor."""
740 loop = asyncio.get_event_loop()
741 worker_count = os.cpu_count()
742 if sys.platform == "win32":
743 # Work around https://bugs.python.org/issue26903
744 worker_count = min(worker_count, 60)
746 executor = ProcessPoolExecutor(max_workers=worker_count)
747 except (ImportError, OSError):
748 # we arrive here if the underlying system does not support multi-processing
749 # like in AWS Lambda or Termux, in which case we gracefully fallback to
750 # a ThreadPollExecutor with just a single worker (more workers would not do us
751 # any good due to the Global Interpreter Lock)
752 executor = ThreadPoolExecutor(max_workers=1)
755 loop.run_until_complete(
759 write_back=write_back,
768 if executor is not None:
772 async def schedule_formatting(
775 write_back: WriteBack,
778 loop: asyncio.AbstractEventLoop,
781 """Run formatting of `sources` in parallel using the provided `executor`.
783 (Use ProcessPoolExecutors for actual parallelism.)
785 `write_back`, `fast`, and `mode` options are passed to
786 :func:`format_file_in_place`.
789 if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
790 cache = read_cache(mode)
791 sources, cached = filter_cached(cache, sources)
792 for src in sorted(cached):
793 report.done(src, Changed.CACHED)
798 sources_to_cache = []
800 if write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
801 # For diff output, we need locks to ensure we don't interleave output
802 # from different processes.
804 lock = manager.Lock()
806 asyncio.ensure_future(
807 loop.run_in_executor(
808 executor, format_file_in_place, src, fast, mode, write_back, lock
811 for src in sorted(sources)
813 pending: Iterable["asyncio.Future[bool]"] = tasks.keys()
815 loop.add_signal_handler(signal.SIGINT, cancel, pending)
816 loop.add_signal_handler(signal.SIGTERM, cancel, pending)
817 except NotImplementedError:
818 # There are no good alternatives for these on Windows.
821 done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
823 src = tasks.pop(task)
825 cancelled.append(task)
826 elif task.exception():
827 report.failed(src, str(task.exception()))
829 changed = Changed.YES if task.result() else Changed.NO
830 # If the file was written back or was successfully checked as
831 # well-formatted, store this information in the cache.
832 if write_back is WriteBack.YES or (
833 write_back is WriteBack.CHECK and changed is Changed.NO
835 sources_to_cache.append(src)
836 report.done(src, changed)
838 await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
840 write_cache(cache, sources_to_cache, mode)
843 def format_file_in_place(
847 write_back: WriteBack = WriteBack.NO,
848 lock: Any = None, # multiprocessing.Manager().Lock() is some crazy proxy
850 """Format file under `src` path. Return True if changed.
852 If `write_back` is DIFF, write a diff to stdout. If it is YES, write reformatted
854 `mode` and `fast` options are passed to :func:`format_file_contents`.
856 if src.suffix == ".pyi":
857 mode = replace(mode, is_pyi=True)
859 then = datetime.utcfromtimestamp(src.stat().st_mtime)
860 with open(src, "rb") as buf:
861 src_contents, encoding, newline = decode_bytes(buf.read())
863 dst_contents = format_file_contents(src_contents, fast=fast, mode=mode)
864 except NothingChanged:
867 if write_back == WriteBack.YES:
868 with open(src, "w", encoding=encoding, newline=newline) as f:
869 f.write(dst_contents)
870 elif write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
871 now = datetime.utcnow()
872 src_name = f"{src}\t{then} +0000"
873 dst_name = f"{src}\t{now} +0000"
874 diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
876 if write_back == write_back.COLOR_DIFF:
877 diff_contents = color_diff(diff_contents)
879 with lock or nullcontext():
880 f = io.TextIOWrapper(
886 f = wrap_stream_for_windows(f)
887 f.write(diff_contents)
893 def color_diff(contents: str) -> str:
894 """Inject the ANSI color codes to the diff."""
895 lines = contents.split("\n")
896 for i, line in enumerate(lines):
897 if line.startswith("+++") or line.startswith("---"):
898 line = "\033[1;37m" + line + "\033[0m" # bold white, reset
899 elif line.startswith("@@"):
900 line = "\033[36m" + line + "\033[0m" # cyan, reset
901 elif line.startswith("+"):
902 line = "\033[32m" + line + "\033[0m" # green, reset
903 elif line.startswith("-"):
904 line = "\033[31m" + line + "\033[0m" # red, reset
906 return "\n".join(lines)
909 def wrap_stream_for_windows(
911 ) -> Union[io.TextIOWrapper, "colorama.AnsiToWin32"]:
913 Wrap stream with colorama's wrap_stream so colors are shown on Windows.
915 If `colorama` is unavailable, the original stream is returned unmodified.
916 Otherwise, the `wrap_stream()` function determines whether the stream needs
917 to be wrapped for a Windows environment and will accordingly either return
918 an `AnsiToWin32` wrapper or the original stream.
921 from colorama.initialise import wrap_stream
925 # Set `strip=False` to avoid needing to modify test_express_diff_with_color.
926 return wrap_stream(f, convert=None, strip=False, autoreset=False, wrap=True)
929 def format_stdin_to_stdout(
930 fast: bool, *, write_back: WriteBack = WriteBack.NO, mode: Mode
932 """Format file on stdin. Return True if changed.
934 If `write_back` is YES, write reformatted code back to stdout. If it is DIFF,
935 write a diff to stdout. The `mode` argument is passed to
936 :func:`format_file_contents`.
938 then = datetime.utcnow()
939 src, encoding, newline = decode_bytes(sys.stdin.buffer.read())
942 dst = format_file_contents(src, fast=fast, mode=mode)
945 except NothingChanged:
949 f = io.TextIOWrapper(
950 sys.stdout.buffer, encoding=encoding, newline=newline, write_through=True
952 if write_back == WriteBack.YES:
954 elif write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
955 now = datetime.utcnow()
956 src_name = f"STDIN\t{then} +0000"
957 dst_name = f"STDOUT\t{now} +0000"
958 d = diff(src, dst, src_name, dst_name)
959 if write_back == WriteBack.COLOR_DIFF:
961 f = wrap_stream_for_windows(f)
966 def format_file_contents(src_contents: str, *, fast: bool, mode: Mode) -> FileContent:
967 """Reformat contents of a file and return new contents.
969 If `fast` is False, additionally confirm that the reformatted code is
970 valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
971 `mode` is passed to :func:`format_str`.
973 if not src_contents.strip():
976 dst_contents = format_str(src_contents, mode=mode)
977 if src_contents == dst_contents:
981 assert_equivalent(src_contents, dst_contents)
982 assert_stable(src_contents, dst_contents, mode=mode)
986 def format_str(src_contents: str, *, mode: Mode) -> FileContent:
987 """Reformat a string and return new contents.
989 `mode` determines formatting options, such as how many characters per line are
993 >>> print(black.format_str("def f(arg:str='')->None:...", mode=black.Mode()))
994 def f(arg: str = "") -> None:
997 A more complex example:
1000 ... black.format_str(
1001 ... "def f(arg:str='')->None: hey",
1002 ... mode=black.Mode(
1003 ... target_versions={black.TargetVersion.PY36},
1005 ... string_normalization=False,
1016 src_node = lib2to3_parse(src_contents.lstrip(), mode.target_versions)
1018 future_imports = get_future_imports(src_node)
1019 if mode.target_versions:
1020 versions = mode.target_versions
1022 versions = detect_target_versions(src_node)
1023 normalize_fmt_off(src_node)
1024 lines = LineGenerator(
1025 remove_u_prefix="unicode_literals" in future_imports
1026 or supports_feature(versions, Feature.UNICODE_LITERALS),
1028 normalize_strings=mode.string_normalization,
1030 elt = EmptyLineTracker(is_pyi=mode.is_pyi)
1033 split_line_features = {
1035 for feature in {Feature.TRAILING_COMMA_IN_CALL, Feature.TRAILING_COMMA_IN_DEF}
1036 if supports_feature(versions, feature)
1038 for current_line in lines.visit(src_node):
1039 dst_contents.append(str(empty_line) * after)
1040 before, after = elt.maybe_empty_lines(current_line)
1041 dst_contents.append(str(empty_line) * before)
1042 for line in transform_line(
1043 current_line, mode=mode, features=split_line_features
1045 dst_contents.append(str(line))
1046 return "".join(dst_contents)
1049 def decode_bytes(src: bytes) -> Tuple[FileContent, Encoding, NewLine]:
1050 """Return a tuple of (decoded_contents, encoding, newline).
1052 `newline` is either CRLF or LF but `decoded_contents` is decoded with
1053 universal newlines (i.e. only contains LF).
1055 srcbuf = io.BytesIO(src)
1056 encoding, lines = tokenize.detect_encoding(srcbuf.readline)
1058 return "", encoding, "\n"
1060 newline = "\r\n" if b"\r\n" == lines[0][-2:] else "\n"
1062 with io.TextIOWrapper(srcbuf, encoding) as tiow:
1063 return tiow.read(), encoding, newline
1066 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
1067 if not target_versions:
1068 # No target_version specified, so try all grammars.
1071 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
1073 pygram.python_grammar_no_print_statement_no_exec_statement,
1074 # Python 2.7 with future print_function import
1075 pygram.python_grammar_no_print_statement,
1077 pygram.python_grammar,
1080 if all(version.is_python2() for version in target_versions):
1081 # Python 2-only code, so try Python 2 grammars.
1083 # Python 2.7 with future print_function import
1084 pygram.python_grammar_no_print_statement,
1086 pygram.python_grammar,
1089 # Python 3-compatible code, so only try Python 3 grammar.
1091 # If we have to parse both, try to parse async as a keyword first
1092 if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
1095 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
1097 if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
1099 grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
1100 # At least one of the above branches must have been taken, because every Python
1101 # version has exactly one of the two 'ASYNC_*' flags
1105 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
1106 """Given a string with source, return the lib2to3 Node."""
1107 if not src_txt.endswith("\n"):
1110 for grammar in get_grammars(set(target_versions)):
1111 drv = driver.Driver(grammar, pytree.convert)
1113 result = drv.parse_string(src_txt, True)
1116 except ParseError as pe:
1117 lineno, column = pe.context[1]
1118 lines = src_txt.splitlines()
1120 faulty_line = lines[lineno - 1]
1122 faulty_line = "<line number missing in source>"
1123 exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
1127 if isinstance(result, Leaf):
1128 result = Node(syms.file_input, [result])
1132 def lib2to3_unparse(node: Node) -> str:
1133 """Given a lib2to3 node, return its string representation."""
1138 class Visitor(Generic[T]):
1139 """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
1141 def visit(self, node: LN) -> Iterator[T]:
1142 """Main method to visit `node` and its children.
1144 It tries to find a `visit_*()` method for the given `node.type`, like
1145 `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
1146 If no dedicated `visit_*()` method is found, chooses `visit_default()`
1149 Then yields objects of type `T` from the selected visitor.
1152 name = token.tok_name[node.type]
1154 name = str(type_repr(node.type))
1155 # We explicitly branch on whether a visitor exists (instead of
1156 # using self.visit_default as the default arg to getattr) in order
1157 # to save needing to create a bound method object and so mypyc can
1158 # generate a native call to visit_default.
1159 visitf = getattr(self, f"visit_{name}", None)
1161 yield from visitf(node)
1163 yield from self.visit_default(node)
1165 def visit_default(self, node: LN) -> Iterator[T]:
1166 """Default `visit_*()` implementation. Recurses to children of `node`."""
1167 if isinstance(node, Node):
1168 for child in node.children:
1169 yield from self.visit(child)
1173 class DebugVisitor(Visitor[T]):
1176 def visit_default(self, node: LN) -> Iterator[T]:
1177 indent = " " * (2 * self.tree_depth)
1178 if isinstance(node, Node):
1179 _type = type_repr(node.type)
1180 out(f"{indent}{_type}", fg="yellow")
1181 self.tree_depth += 1
1182 for child in node.children:
1183 yield from self.visit(child)
1185 self.tree_depth -= 1
1186 out(f"{indent}/{_type}", fg="yellow", bold=False)
1188 _type = token.tok_name.get(node.type, str(node.type))
1189 out(f"{indent}{_type}", fg="blue", nl=False)
1191 # We don't have to handle prefixes for `Node` objects since
1192 # that delegates to the first child anyway.
1193 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
1194 out(f" {node.value!r}", fg="blue", bold=False)
1197 def show(cls, code: Union[str, Leaf, Node]) -> None:
1198 """Pretty-print the lib2to3 AST of a given string of `code`.
1200 Convenience method for debugging.
1202 v: DebugVisitor[None] = DebugVisitor()
1203 if isinstance(code, str):
1204 code = lib2to3_parse(code)
1208 WHITESPACE: Final = {token.DEDENT, token.INDENT, token.NEWLINE}
1209 STATEMENT: Final = {
1219 STANDALONE_COMMENT: Final = 153
1220 token.tok_name[STANDALONE_COMMENT] = "STANDALONE_COMMENT"
1221 LOGIC_OPERATORS: Final = {"and", "or"}
1222 COMPARATORS: Final = {
1230 MATH_OPERATORS: Final = {
1246 STARS: Final = {token.STAR, token.DOUBLESTAR}
1247 VARARGS_SPECIALS: Final = STARS | {token.SLASH}
1248 VARARGS_PARENTS: Final = {
1250 syms.argument, # double star in arglist
1251 syms.trailer, # single argument to call
1253 syms.varargslist, # lambdas
1255 UNPACKING_PARENTS: Final = {
1256 syms.atom, # single element of a list or set literal
1260 syms.testlist_star_expr,
1262 TEST_DESCENDANTS: Final = {
1279 ASSIGNMENTS: Final = {
1295 COMPREHENSION_PRIORITY: Final = 20
1296 COMMA_PRIORITY: Final = 18
1297 TERNARY_PRIORITY: Final = 16
1298 LOGIC_PRIORITY: Final = 14
1299 STRING_PRIORITY: Final = 12
1300 COMPARATOR_PRIORITY: Final = 10
1301 MATH_PRIORITIES: Final = {
1303 token.CIRCUMFLEX: 8,
1306 token.RIGHTSHIFT: 6,
1311 token.DOUBLESLASH: 4,
1315 token.DOUBLESTAR: 2,
1317 DOT_PRIORITY: Final = 1
1321 class BracketTracker:
1322 """Keeps track of brackets on a line."""
1325 bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = field(default_factory=dict)
1326 delimiters: Dict[LeafID, Priority] = field(default_factory=dict)
1327 previous: Optional[Leaf] = None
1328 _for_loop_depths: List[int] = field(default_factory=list)
1329 _lambda_argument_depths: List[int] = field(default_factory=list)
1330 invisible: List[Leaf] = field(default_factory=list)
1332 def mark(self, leaf: Leaf) -> None:
1333 """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
1335 All leaves receive an int `bracket_depth` field that stores how deep
1336 within brackets a given leaf is. 0 means there are no enclosing brackets
1337 that started on this line.
1339 If a leaf is itself a closing bracket, it receives an `opening_bracket`
1340 field that it forms a pair with. This is a one-directional link to
1341 avoid reference cycles.
1343 If a leaf is a delimiter (a token on which Black can split the line if
1344 needed) and it's on depth 0, its `id()` is stored in the tracker's
1347 if leaf.type == token.COMMENT:
1350 self.maybe_decrement_after_for_loop_variable(leaf)
1351 self.maybe_decrement_after_lambda_arguments(leaf)
1352 if leaf.type in CLOSING_BRACKETS:
1355 opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
1356 except KeyError as e:
1357 raise BracketMatchError(
1358 "Unable to match a closing bracket to the following opening"
1361 leaf.opening_bracket = opening_bracket
1363 self.invisible.append(leaf)
1364 leaf.bracket_depth = self.depth
1366 delim = is_split_before_delimiter(leaf, self.previous)
1367 if delim and self.previous is not None:
1368 self.delimiters[id(self.previous)] = delim
1370 delim = is_split_after_delimiter(leaf, self.previous)
1372 self.delimiters[id(leaf)] = delim
1373 if leaf.type in OPENING_BRACKETS:
1374 self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
1377 self.invisible.append(leaf)
1378 self.previous = leaf
1379 self.maybe_increment_lambda_arguments(leaf)
1380 self.maybe_increment_for_loop_variable(leaf)
1382 def any_open_brackets(self) -> bool:
1383 """Return True if there is an yet unmatched open bracket on the line."""
1384 return bool(self.bracket_match)
1386 def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> Priority:
1387 """Return the highest priority of a delimiter found on the line.
1389 Values are consistent with what `is_split_*_delimiter()` return.
1390 Raises ValueError on no delimiters.
1392 return max(v for k, v in self.delimiters.items() if k not in exclude)
1394 def delimiter_count_with_priority(self, priority: Priority = 0) -> int:
1395 """Return the number of delimiters with the given `priority`.
1397 If no `priority` is passed, defaults to max priority on the line.
1399 if not self.delimiters:
1402 priority = priority or self.max_delimiter_priority()
1403 return sum(1 for p in self.delimiters.values() if p == priority)
1405 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
1406 """In a for loop, or comprehension, the variables are often unpacks.
1408 To avoid splitting on the comma in this situation, increase the depth of
1409 tokens between `for` and `in`.
1411 if leaf.type == token.NAME and leaf.value == "for":
1413 self._for_loop_depths.append(self.depth)
1418 def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
1419 """See `maybe_increment_for_loop_variable` above for explanation."""
1421 self._for_loop_depths
1422 and self._for_loop_depths[-1] == self.depth
1423 and leaf.type == token.NAME
1424 and leaf.value == "in"
1427 self._for_loop_depths.pop()
1432 def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
1433 """In a lambda expression, there might be more than one argument.
1435 To avoid splitting on the comma in this situation, increase the depth of
1436 tokens between `lambda` and `:`.
1438 if leaf.type == token.NAME and leaf.value == "lambda":
1440 self._lambda_argument_depths.append(self.depth)
1445 def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
1446 """See `maybe_increment_lambda_arguments` above for explanation."""
1448 self._lambda_argument_depths
1449 and self._lambda_argument_depths[-1] == self.depth
1450 and leaf.type == token.COLON
1453 self._lambda_argument_depths.pop()
1458 def get_open_lsqb(self) -> Optional[Leaf]:
1459 """Return the most recent opening square bracket (if any)."""
1460 return self.bracket_match.get((self.depth - 1, token.RSQB))
1465 """Holds leaves and comments. Can be printed with `str(line)`."""
1468 leaves: List[Leaf] = field(default_factory=list)
1469 # keys ordered like `leaves`
1470 comments: Dict[LeafID, List[Leaf]] = field(default_factory=dict)
1471 bracket_tracker: BracketTracker = field(default_factory=BracketTracker)
1472 inside_brackets: bool = False
1473 should_explode: bool = False
1475 def append(self, leaf: Leaf, preformatted: bool = False) -> None:
1476 """Add a new `leaf` to the end of the line.
1478 Unless `preformatted` is True, the `leaf` will receive a new consistent
1479 whitespace prefix and metadata applied by :class:`BracketTracker`.
1480 Trailing commas are maybe removed, unpacked for loop variables are
1481 demoted from being delimiters.
1483 Inline comments are put aside.
1485 has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
1489 if token.COLON == leaf.type and self.is_class_paren_empty:
1490 del self.leaves[-2:]
1491 if self.leaves and not preformatted:
1492 # Note: at this point leaf.prefix should be empty except for
1493 # imports, for which we only preserve newlines.
1494 leaf.prefix += whitespace(
1495 leaf, complex_subscript=self.is_complex_subscript(leaf)
1497 if self.inside_brackets or not preformatted:
1498 self.bracket_tracker.mark(leaf)
1499 if self.maybe_should_explode(leaf):
1500 self.should_explode = True
1501 if not self.append_comment(leaf):
1502 self.leaves.append(leaf)
1504 def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
1505 """Like :func:`append()` but disallow invalid standalone comment structure.
1507 Raises ValueError when any `leaf` is appended after a standalone comment
1508 or when a standalone comment is not the first leaf on the line.
1510 if self.bracket_tracker.depth == 0:
1512 raise ValueError("cannot append to standalone comments")
1514 if self.leaves and leaf.type == STANDALONE_COMMENT:
1516 "cannot append standalone comments to a populated line"
1519 self.append(leaf, preformatted=preformatted)
1522 def is_comment(self) -> bool:
1523 """Is this line a standalone comment?"""
1524 return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
1527 def is_decorator(self) -> bool:
1528 """Is this line a decorator?"""
1529 return bool(self) and self.leaves[0].type == token.AT
1532 def is_import(self) -> bool:
1533 """Is this an import line?"""
1534 return bool(self) and is_import(self.leaves[0])
1537 def is_class(self) -> bool:
1538 """Is this line a class definition?"""
1541 and self.leaves[0].type == token.NAME
1542 and self.leaves[0].value == "class"
1546 def is_stub_class(self) -> bool:
1547 """Is this line a class definition with a body consisting only of "..."?"""
1548 return self.is_class and self.leaves[-3:] == [
1549 Leaf(token.DOT, ".") for _ in range(3)
1553 def is_def(self) -> bool:
1554 """Is this a function definition? (Also returns True for async defs.)"""
1556 first_leaf = self.leaves[0]
1561 second_leaf: Optional[Leaf] = self.leaves[1]
1564 return (first_leaf.type == token.NAME and first_leaf.value == "def") or (
1565 first_leaf.type == token.ASYNC
1566 and second_leaf is not None
1567 and second_leaf.type == token.NAME
1568 and second_leaf.value == "def"
1572 def is_class_paren_empty(self) -> bool:
1573 """Is this a class with no base classes but using parentheses?
1575 Those are unnecessary and should be removed.
1579 and len(self.leaves) == 4
1581 and self.leaves[2].type == token.LPAR
1582 and self.leaves[2].value == "("
1583 and self.leaves[3].type == token.RPAR
1584 and self.leaves[3].value == ")"
1588 def is_triple_quoted_string(self) -> bool:
1589 """Is the line a triple quoted string?"""
1592 and self.leaves[0].type == token.STRING
1593 and self.leaves[0].value.startswith(('"""', "'''"))
1596 def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
1597 """If so, needs to be split before emitting."""
1598 for leaf in self.leaves:
1599 if leaf.type == STANDALONE_COMMENT and leaf.bracket_depth <= depth_limit:
1604 def contains_uncollapsable_type_comments(self) -> bool:
1607 last_leaf = self.leaves[-1]
1608 ignored_ids.add(id(last_leaf))
1609 if last_leaf.type == token.COMMA or (
1610 last_leaf.type == token.RPAR and not last_leaf.value
1612 # When trailing commas or optional parens are inserted by Black for
1613 # consistency, comments after the previous last element are not moved
1614 # (they don't have to, rendering will still be correct). So we ignore
1615 # trailing commas and invisible.
1616 last_leaf = self.leaves[-2]
1617 ignored_ids.add(id(last_leaf))
1621 # A type comment is uncollapsable if it is attached to a leaf
1622 # that isn't at the end of the line (since that could cause it
1623 # to get associated to a different argument) or if there are
1624 # comments before it (since that could cause it to get hidden
1626 comment_seen = False
1627 for leaf_id, comments in self.comments.items():
1628 for comment in comments:
1629 if is_type_comment(comment):
1630 if comment_seen or (
1631 not is_type_comment(comment, " ignore")
1632 and leaf_id not in ignored_ids
1640 def contains_unsplittable_type_ignore(self) -> bool:
1644 # If a 'type: ignore' is attached to the end of a line, we
1645 # can't split the line, because we can't know which of the
1646 # subexpressions the ignore was meant to apply to.
1648 # We only want this to apply to actual physical lines from the
1649 # original source, though: we don't want the presence of a
1650 # 'type: ignore' at the end of a multiline expression to
1651 # justify pushing it all onto one line. Thus we
1652 # (unfortunately) need to check the actual source lines and
1653 # only report an unsplittable 'type: ignore' if this line was
1654 # one line in the original code.
1656 # Grab the first and last line numbers, skipping generated leaves
1657 first_line = next((leaf.lineno for leaf in self.leaves if leaf.lineno != 0), 0)
1659 (leaf.lineno for leaf in reversed(self.leaves) if leaf.lineno != 0), 0
1662 if first_line == last_line:
1663 # We look at the last two leaves since a comma or an
1664 # invisible paren could have been added at the end of the
1666 for node in self.leaves[-2:]:
1667 for comment in self.comments.get(id(node), []):
1668 if is_type_comment(comment, " ignore"):
1673 def contains_multiline_strings(self) -> bool:
1674 return any(is_multiline_string(leaf) for leaf in self.leaves)
1676 def maybe_should_explode(self, closing: Leaf) -> bool:
1677 """Return True if this line should explode (always be split), that is when:
1678 - there's a trailing comma here; and
1679 - it's not a one-tuple.
1682 closing.type in CLOSING_BRACKETS
1684 and self.leaves[-1].type == token.COMMA
1688 if closing.type in {token.RBRACE, token.RSQB}:
1694 if not is_one_tuple_between(closing.opening_bracket, closing, self.leaves):
1699 def append_comment(self, comment: Leaf) -> bool:
1700 """Add an inline or standalone comment to the line."""
1702 comment.type == STANDALONE_COMMENT
1703 and self.bracket_tracker.any_open_brackets()
1708 if comment.type != token.COMMENT:
1712 comment.type = STANDALONE_COMMENT
1716 last_leaf = self.leaves[-1]
1718 last_leaf.type == token.RPAR
1719 and not last_leaf.value
1720 and last_leaf.parent
1721 and len(list(last_leaf.parent.leaves())) <= 3
1722 and not is_type_comment(comment)
1724 # Comments on an optional parens wrapping a single leaf should belong to
1725 # the wrapped node except if it's a type comment. Pinning the comment like
1726 # this avoids unstable formatting caused by comment migration.
1727 if len(self.leaves) < 2:
1728 comment.type = STANDALONE_COMMENT
1732 last_leaf = self.leaves[-2]
1733 self.comments.setdefault(id(last_leaf), []).append(comment)
1736 def comments_after(self, leaf: Leaf) -> List[Leaf]:
1737 """Generate comments that should appear directly after `leaf`."""
1738 return self.comments.get(id(leaf), [])
1740 def remove_trailing_comma(self) -> None:
1741 """Remove the trailing comma and moves the comments attached to it."""
1742 trailing_comma = self.leaves.pop()
1743 trailing_comma_comments = self.comments.pop(id(trailing_comma), [])
1744 self.comments.setdefault(id(self.leaves[-1]), []).extend(
1745 trailing_comma_comments
1748 def is_complex_subscript(self, leaf: Leaf) -> bool:
1749 """Return True iff `leaf` is part of a slice with non-trivial exprs."""
1750 open_lsqb = self.bracket_tracker.get_open_lsqb()
1751 if open_lsqb is None:
1754 subscript_start = open_lsqb.next_sibling
1756 if isinstance(subscript_start, Node):
1757 if subscript_start.type == syms.listmaker:
1760 if subscript_start.type == syms.subscriptlist:
1761 subscript_start = child_towards(subscript_start, leaf)
1762 return subscript_start is not None and any(
1763 n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
1766 def clone(self) -> "Line":
1769 inside_brackets=self.inside_brackets,
1770 should_explode=self.should_explode,
1773 def __str__(self) -> str:
1774 """Render the line."""
1778 indent = " " * self.depth
1779 leaves = iter(self.leaves)
1780 first = next(leaves)
1781 res = f"{first.prefix}{indent}{first.value}"
1784 for comment in itertools.chain.from_iterable(self.comments.values()):
1789 def __bool__(self) -> bool:
1790 """Return True if the line has leaves or comments."""
1791 return bool(self.leaves or self.comments)
1795 class EmptyLineTracker:
1796 """Provides a stateful method that returns the number of potential extra
1797 empty lines needed before and after the currently processed line.
1799 Note: this tracker works on lines that haven't been split yet. It assumes
1800 the prefix of the first leaf consists of optional newlines. Those newlines
1801 are consumed by `maybe_empty_lines()` and included in the computation.
1804 is_pyi: bool = False
1805 previous_line: Optional[Line] = None
1806 previous_after: int = 0
1807 previous_defs: List[int] = field(default_factory=list)
1809 def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1810 """Return the number of extra empty lines before and after the `current_line`.
1812 This is for separating `def`, `async def` and `class` with extra empty
1813 lines (two on module-level).
1815 before, after = self._maybe_empty_lines(current_line)
1817 # Black should not insert empty lines at the beginning
1820 if self.previous_line is None
1821 else before - self.previous_after
1823 self.previous_after = after
1824 self.previous_line = current_line
1825 return before, after
1827 def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1829 if current_line.depth == 0:
1830 max_allowed = 1 if self.is_pyi else 2
1831 if current_line.leaves:
1832 # Consume the first leaf's extra newlines.
1833 first_leaf = current_line.leaves[0]
1834 before = first_leaf.prefix.count("\n")
1835 before = min(before, max_allowed)
1836 first_leaf.prefix = ""
1839 depth = current_line.depth
1840 while self.previous_defs and self.previous_defs[-1] >= depth:
1841 self.previous_defs.pop()
1843 before = 0 if depth else 1
1845 before = 1 if depth else 2
1846 if current_line.is_decorator or current_line.is_def or current_line.is_class:
1847 return self._maybe_empty_lines_for_class_or_def(current_line, before)
1851 and self.previous_line.is_import
1852 and not current_line.is_import
1853 and depth == self.previous_line.depth
1855 return (before or 1), 0
1859 and self.previous_line.is_class
1860 and current_line.is_triple_quoted_string
1866 def _maybe_empty_lines_for_class_or_def(
1867 self, current_line: Line, before: int
1868 ) -> Tuple[int, int]:
1869 if not current_line.is_decorator:
1870 self.previous_defs.append(current_line.depth)
1871 if self.previous_line is None:
1872 # Don't insert empty lines before the first line in the file.
1875 if self.previous_line.is_decorator:
1876 if self.is_pyi and current_line.is_stub_class:
1877 # Insert an empty line after a decorated stub class
1882 if self.previous_line.depth < current_line.depth and (
1883 self.previous_line.is_class or self.previous_line.is_def
1888 self.previous_line.is_comment
1889 and self.previous_line.depth == current_line.depth
1895 if self.previous_line.depth > current_line.depth:
1897 elif current_line.is_class or self.previous_line.is_class:
1898 if current_line.is_stub_class and self.previous_line.is_stub_class:
1899 # No blank line between classes with an empty body
1904 current_line.is_def or current_line.is_decorator
1905 ) and not self.previous_line.is_def:
1906 # Blank line between a block of functions (maybe with preceding
1907 # decorators) and a block of non-functions
1913 if current_line.depth and newlines:
1919 class LineGenerator(Visitor[Line]):
1920 """Generates reformatted Line objects. Empty lines are not emitted.
1922 Note: destroys the tree it's visiting by mutating prefixes of its leaves
1923 in ways that will no longer stringify to valid Python code on the tree.
1926 is_pyi: bool = False
1927 normalize_strings: bool = True
1928 current_line: Line = field(default_factory=Line)
1929 remove_u_prefix: bool = False
1931 def line(self, indent: int = 0) -> Iterator[Line]:
1934 If the line is empty, only emit if it makes sense.
1935 If the line is too long, split it first and then generate.
1937 If any lines were generated, set up a new current_line.
1939 if not self.current_line:
1940 self.current_line.depth += indent
1941 return # Line is empty, don't emit. Creating a new one unnecessary.
1943 complete_line = self.current_line
1944 self.current_line = Line(depth=complete_line.depth + indent)
1947 def visit_default(self, node: LN) -> Iterator[Line]:
1948 """Default `visit_*()` implementation. Recurses to children of `node`."""
1949 if isinstance(node, Leaf):
1950 any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1951 for comment in generate_comments(node):
1952 if any_open_brackets:
1953 # any comment within brackets is subject to splitting
1954 self.current_line.append(comment)
1955 elif comment.type == token.COMMENT:
1956 # regular trailing comment
1957 self.current_line.append(comment)
1958 yield from self.line()
1961 # regular standalone comment
1962 yield from self.line()
1964 self.current_line.append(comment)
1965 yield from self.line()
1967 normalize_prefix(node, inside_brackets=any_open_brackets)
1968 if self.normalize_strings and node.type == token.STRING:
1969 normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix)
1970 normalize_string_quotes(node)
1971 if node.type == token.NUMBER:
1972 normalize_numeric_literal(node)
1973 if node.type not in WHITESPACE:
1974 self.current_line.append(node)
1975 yield from super().visit_default(node)
1977 def visit_INDENT(self, node: Leaf) -> Iterator[Line]:
1978 """Increase indentation level, maybe yield a line."""
1979 # In blib2to3 INDENT never holds comments.
1980 yield from self.line(+1)
1981 yield from self.visit_default(node)
1983 def visit_DEDENT(self, node: Leaf) -> Iterator[Line]:
1984 """Decrease indentation level, maybe yield a line."""
1985 # The current line might still wait for trailing comments. At DEDENT time
1986 # there won't be any (they would be prefixes on the preceding NEWLINE).
1987 # Emit the line then.
1988 yield from self.line()
1990 # While DEDENT has no value, its prefix may contain standalone comments
1991 # that belong to the current indentation level. Get 'em.
1992 yield from self.visit_default(node)
1994 # Finally, emit the dedent.
1995 yield from self.line(-1)
1998 self, node: Node, keywords: Set[str], parens: Set[str]
1999 ) -> Iterator[Line]:
2000 """Visit a statement.
2002 This implementation is shared for `if`, `while`, `for`, `try`, `except`,
2003 `def`, `with`, `class`, `assert` and assignments.
2005 The relevant Python language `keywords` for a given statement will be
2006 NAME leaves within it. This methods puts those on a separate line.
2008 `parens` holds a set of string leaf values immediately after which
2009 invisible parens should be put.
2011 normalize_invisible_parens(node, parens_after=parens)
2012 for child in node.children:
2013 if child.type == token.NAME and child.value in keywords: # type: ignore
2014 yield from self.line()
2016 yield from self.visit(child)
2018 def visit_suite(self, node: Node) -> Iterator[Line]:
2019 """Visit a suite."""
2020 if self.is_pyi and is_stub_suite(node):
2021 yield from self.visit(node.children[2])
2023 yield from self.visit_default(node)
2025 def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
2026 """Visit a statement without nested statements."""
2027 is_suite_like = node.parent and node.parent.type in STATEMENT
2029 if self.is_pyi and is_stub_body(node):
2030 yield from self.visit_default(node)
2032 yield from self.line(+1)
2033 yield from self.visit_default(node)
2034 yield from self.line(-1)
2037 if not self.is_pyi or not node.parent or not is_stub_suite(node.parent):
2038 yield from self.line()
2039 yield from self.visit_default(node)
2041 def visit_async_stmt(self, node: Node) -> Iterator[Line]:
2042 """Visit `async def`, `async for`, `async with`."""
2043 yield from self.line()
2045 children = iter(node.children)
2046 for child in children:
2047 yield from self.visit(child)
2049 if child.type == token.ASYNC:
2052 internal_stmt = next(children)
2053 for child in internal_stmt.children:
2054 yield from self.visit(child)
2056 def visit_decorators(self, node: Node) -> Iterator[Line]:
2057 """Visit decorators."""
2058 for child in node.children:
2059 yield from self.line()
2060 yield from self.visit(child)
2062 def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
2063 """Remove a semicolon and put the other statement on a separate line."""
2064 yield from self.line()
2066 def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
2067 """End of file. Process outstanding comments and end with a newline."""
2068 yield from self.visit_default(leaf)
2069 yield from self.line()
2071 def visit_STANDALONE_COMMENT(self, leaf: Leaf) -> Iterator[Line]:
2072 if not self.current_line.bracket_tracker.any_open_brackets():
2073 yield from self.line()
2074 yield from self.visit_default(leaf)
2076 def visit_factor(self, node: Node) -> Iterator[Line]:
2077 """Force parentheses between a unary op and a binary power:
2079 -2 ** 8 -> -(2 ** 8)
2081 _operator, operand = node.children
2083 operand.type == syms.power
2084 and len(operand.children) == 3
2085 and operand.children[1].type == token.DOUBLESTAR
2087 lpar = Leaf(token.LPAR, "(")
2088 rpar = Leaf(token.RPAR, ")")
2089 index = operand.remove() or 0
2090 node.insert_child(index, Node(syms.atom, [lpar, operand, rpar]))
2091 yield from self.visit_default(node)
2093 def visit_STRING(self, leaf: Leaf) -> Iterator[Line]:
2094 if is_docstring(leaf) and "\\\n" not in leaf.value:
2095 # We're ignoring docstrings with backslash newline escapes because changing
2096 # indentation of those changes the AST representation of the code.
2097 prefix = get_string_prefix(leaf.value)
2098 lead_len = len(prefix) + 3
2100 indent = " " * 4 * self.current_line.depth
2101 docstring = fix_docstring(leaf.value[lead_len:tail_len], indent)
2103 if leaf.value[lead_len - 1] == docstring[0]:
2104 docstring = " " + docstring
2105 if leaf.value[tail_len + 1] == docstring[-1]:
2106 docstring = docstring + " "
2107 leaf.value = leaf.value[0:lead_len] + docstring + leaf.value[tail_len:]
2109 yield from self.visit_default(leaf)
2111 def __post_init__(self) -> None:
2112 """You are in a twisty little maze of passages."""
2115 self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
2116 self.visit_if_stmt = partial(
2117 v, keywords={"if", "else", "elif"}, parens={"if", "elif"}
2119 self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
2120 self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
2121 self.visit_try_stmt = partial(
2122 v, keywords={"try", "except", "else", "finally"}, parens=Ø
2124 self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
2125 self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
2126 self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
2127 self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
2128 self.visit_expr_stmt = partial(v, keywords=Ø, parens=ASSIGNMENTS)
2129 self.visit_return_stmt = partial(v, keywords={"return"}, parens={"return"})
2130 self.visit_import_from = partial(v, keywords=Ø, parens={"import"})
2131 self.visit_del_stmt = partial(v, keywords=Ø, parens={"del"})
2132 self.visit_async_funcdef = self.visit_async_stmt
2133 self.visit_decorated = self.visit_decorators
2136 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
2137 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
2138 OPENING_BRACKETS = set(BRACKET.keys())
2139 CLOSING_BRACKETS = set(BRACKET.values())
2140 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
2141 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
2144 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str: # noqa: C901
2145 """Return whitespace prefix if needed for the given `leaf`.
2147 `complex_subscript` signals whether the given leaf is part of a subscription
2148 which has non-trivial arguments, like arithmetic expressions or function calls.
2156 if t in ALWAYS_NO_SPACE:
2159 if t == token.COMMENT:
2162 assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
2163 if t == token.COLON and p.type not in {
2170 prev = leaf.prev_sibling
2172 prevp = preceding_leaf(p)
2173 if not prevp or prevp.type in OPENING_BRACKETS:
2176 if t == token.COLON:
2177 if prevp.type == token.COLON:
2180 elif prevp.type != token.COMMA and not complex_subscript:
2185 if prevp.type == token.EQUAL:
2187 if prevp.parent.type in {
2195 elif prevp.parent.type == syms.typedargslist:
2196 # A bit hacky: if the equal sign has whitespace, it means we
2197 # previously found it's a typed argument. So, we're using
2201 elif prevp.type in VARARGS_SPECIALS:
2202 if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
2205 elif prevp.type == token.COLON:
2206 if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
2207 return SPACE if complex_subscript else NO
2211 and prevp.parent.type == syms.factor
2212 and prevp.type in MATH_OPERATORS
2217 prevp.type == token.RIGHTSHIFT
2219 and prevp.parent.type == syms.shift_expr
2220 and prevp.prev_sibling
2221 and prevp.prev_sibling.type == token.NAME
2222 and prevp.prev_sibling.value == "print" # type: ignore
2224 # Python 2 print chevron
2226 elif prevp.type == token.AT and p.parent and p.parent.type == syms.decorator:
2227 # no space in decorators
2230 elif prev.type in OPENING_BRACKETS:
2233 if p.type in {syms.parameters, syms.arglist}:
2234 # untyped function signatures or calls
2235 if not prev or prev.type != token.COMMA:
2238 elif p.type == syms.varargslist:
2240 if prev and prev.type != token.COMMA:
2243 elif p.type == syms.typedargslist:
2244 # typed function signatures
2248 if t == token.EQUAL:
2249 if prev.type != syms.tname:
2252 elif prev.type == token.EQUAL:
2253 # A bit hacky: if the equal sign has whitespace, it means we
2254 # previously found it's a typed argument. So, we're using that, too.
2257 elif prev.type != token.COMMA:
2260 elif p.type == syms.tname:
2263 prevp = preceding_leaf(p)
2264 if not prevp or prevp.type != token.COMMA:
2267 elif p.type == syms.trailer:
2268 # attributes and calls
2269 if t == token.LPAR or t == token.RPAR:
2274 prevp = preceding_leaf(p)
2275 if not prevp or prevp.type != token.NUMBER:
2278 elif t == token.LSQB:
2281 elif prev.type != token.COMMA:
2284 elif p.type == syms.argument:
2286 if t == token.EQUAL:
2290 prevp = preceding_leaf(p)
2291 if not prevp or prevp.type == token.LPAR:
2294 elif prev.type in {token.EQUAL} | VARARGS_SPECIALS:
2297 elif p.type == syms.decorator:
2301 elif p.type == syms.dotted_name:
2305 prevp = preceding_leaf(p)
2306 if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
2309 elif p.type == syms.classdef:
2313 if prev and prev.type == token.LPAR:
2316 elif p.type in {syms.subscript, syms.sliceop}:
2319 assert p.parent is not None, "subscripts are always parented"
2320 if p.parent.type == syms.subscriptlist:
2325 elif not complex_subscript:
2328 elif p.type == syms.atom:
2329 if prev and t == token.DOT:
2330 # dots, but not the first one.
2333 elif p.type == syms.dictsetmaker:
2335 if prev and prev.type == token.DOUBLESTAR:
2338 elif p.type in {syms.factor, syms.star_expr}:
2341 prevp = preceding_leaf(p)
2342 if not prevp or prevp.type in OPENING_BRACKETS:
2345 prevp_parent = prevp.parent
2346 assert prevp_parent is not None
2347 if prevp.type == token.COLON and prevp_parent.type in {
2353 elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
2356 elif t in {token.NAME, token.NUMBER, token.STRING}:
2359 elif p.type == syms.import_from:
2361 if prev and prev.type == token.DOT:
2364 elif t == token.NAME:
2368 if prev and prev.type == token.DOT:
2371 elif p.type == syms.sliceop:
2377 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
2378 """Return the first leaf that precedes `node`, if any."""
2380 res = node.prev_sibling
2382 if isinstance(res, Leaf):
2386 return list(res.leaves())[-1]
2395 def prev_siblings_are(node: Optional[LN], tokens: List[Optional[NodeType]]) -> bool:
2396 """Return if the `node` and its previous siblings match types against the provided
2397 list of tokens; the provided `node`has its type matched against the last element in
2398 the list. `None` can be used as the first element to declare that the start of the
2399 list is anchored at the start of its parent's children."""
2402 if tokens[-1] is None:
2406 if node.type != tokens[-1]:
2408 return prev_siblings_are(node.prev_sibling, tokens[:-1])
2411 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
2412 """Return the child of `ancestor` that contains `descendant`."""
2413 node: Optional[LN] = descendant
2414 while node and node.parent != ancestor:
2419 def container_of(leaf: Leaf) -> LN:
2420 """Return `leaf` or one of its ancestors that is the topmost container of it.
2422 By "container" we mean a node where `leaf` is the very first child.
2424 same_prefix = leaf.prefix
2425 container: LN = leaf
2427 parent = container.parent
2431 if parent.children[0].prefix != same_prefix:
2434 if parent.type == syms.file_input:
2437 if parent.prev_sibling is not None and parent.prev_sibling.type in BRACKETS:
2444 def is_split_after_delimiter(leaf: Leaf, previous: Optional[Leaf] = None) -> Priority:
2445 """Return the priority of the `leaf` delimiter, given a line break after it.
2447 The delimiter priorities returned here are from those delimiters that would
2448 cause a line break after themselves.
2450 Higher numbers are higher priority.
2452 if leaf.type == token.COMMA:
2453 return COMMA_PRIORITY
2458 def is_split_before_delimiter(leaf: Leaf, previous: Optional[Leaf] = None) -> Priority:
2459 """Return the priority of the `leaf` delimiter, given a line break before it.
2461 The delimiter priorities returned here are from those delimiters that would
2462 cause a line break before themselves.
2464 Higher numbers are higher priority.
2466 if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
2467 # * and ** might also be MATH_OPERATORS but in this case they are not.
2468 # Don't treat them as a delimiter.
2472 leaf.type == token.DOT
2474 and leaf.parent.type not in {syms.import_from, syms.dotted_name}
2475 and (previous is None or previous.type in CLOSING_BRACKETS)
2480 leaf.type in MATH_OPERATORS
2482 and leaf.parent.type not in {syms.factor, syms.star_expr}
2484 return MATH_PRIORITIES[leaf.type]
2486 if leaf.type in COMPARATORS:
2487 return COMPARATOR_PRIORITY
2490 leaf.type == token.STRING
2491 and previous is not None
2492 and previous.type == token.STRING
2494 return STRING_PRIORITY
2496 if leaf.type not in {token.NAME, token.ASYNC}:
2502 and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
2503 or leaf.type == token.ASYNC
2506 not isinstance(leaf.prev_sibling, Leaf)
2507 or leaf.prev_sibling.value != "async"
2509 return COMPREHENSION_PRIORITY
2514 and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
2516 return COMPREHENSION_PRIORITY
2518 if leaf.value in {"if", "else"} and leaf.parent and leaf.parent.type == syms.test:
2519 return TERNARY_PRIORITY
2521 if leaf.value == "is":
2522 return COMPARATOR_PRIORITY
2527 and leaf.parent.type in {syms.comp_op, syms.comparison}
2529 previous is not None
2530 and previous.type == token.NAME
2531 and previous.value == "not"
2534 return COMPARATOR_PRIORITY
2539 and leaf.parent.type == syms.comp_op
2541 previous is not None
2542 and previous.type == token.NAME
2543 and previous.value == "is"
2546 return COMPARATOR_PRIORITY
2548 if leaf.value in LOGIC_OPERATORS and leaf.parent:
2549 return LOGIC_PRIORITY
2554 FMT_OFF = {"# fmt: off", "# fmt:off", "# yapf: disable"}
2555 FMT_ON = {"# fmt: on", "# fmt:on", "# yapf: enable"}
2558 def generate_comments(leaf: LN) -> Iterator[Leaf]:
2559 """Clean the prefix of the `leaf` and generate comments from it, if any.
2561 Comments in lib2to3 are shoved into the whitespace prefix. This happens
2562 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
2563 move because it does away with modifying the grammar to include all the
2564 possible places in which comments can be placed.
2566 The sad consequence for us though is that comments don't "belong" anywhere.
2567 This is why this function generates simple parentless Leaf objects for
2568 comments. We simply don't know what the correct parent should be.
2570 No matter though, we can live without this. We really only need to
2571 differentiate between inline and standalone comments. The latter don't
2572 share the line with any code.
2574 Inline comments are emitted as regular token.COMMENT leaves. Standalone
2575 are emitted with a fake STANDALONE_COMMENT token identifier.
2577 for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER):
2578 yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
2583 """Describes a piece of syntax that is a comment.
2585 It's not a :class:`blib2to3.pytree.Leaf` so that:
2587 * it can be cached (`Leaf` objects should not be reused more than once as
2588 they store their lineno, column, prefix, and parent information);
2589 * `newlines` and `consumed` fields are kept separate from the `value`. This
2590 simplifies handling of special marker comments like ``# fmt: off/on``.
2593 type: int # token.COMMENT or STANDALONE_COMMENT
2594 value: str # content of the comment
2595 newlines: int # how many newlines before the comment
2596 consumed: int # how many characters of the original leaf's prefix did we consume
2599 @lru_cache(maxsize=4096)
2600 def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]:
2601 """Return a list of :class:`ProtoComment` objects parsed from the given `prefix`."""
2602 result: List[ProtoComment] = []
2603 if not prefix or "#" not in prefix:
2609 for index, line in enumerate(prefix.split("\n")):
2610 consumed += len(line) + 1 # adding the length of the split '\n'
2611 line = line.lstrip()
2614 if not line.startswith("#"):
2615 # Escaped newlines outside of a comment are not really newlines at
2616 # all. We treat a single-line comment following an escaped newline
2617 # as a simple trailing comment.
2618 if line.endswith("\\"):
2622 if index == ignored_lines and not is_endmarker:
2623 comment_type = token.COMMENT # simple trailing comment
2625 comment_type = STANDALONE_COMMENT
2626 comment = make_comment(line)
2629 type=comment_type, value=comment, newlines=nlines, consumed=consumed
2636 def make_comment(content: str) -> str:
2637 """Return a consistently formatted comment from the given `content` string.
2639 All comments (except for "##", "#!", "#:", '#'", "#%%") should have a single
2640 space between the hash sign and the content.
2642 If `content` didn't start with a hash sign, one is provided.
2644 content = content.rstrip()
2648 if content[0] == "#":
2649 content = content[1:]
2650 if content and content[0] not in " !:#'%":
2651 content = " " + content
2652 return "#" + content
2656 line: Line, mode: Mode, features: Collection[Feature] = ()
2657 ) -> Iterator[Line]:
2658 """Transform a `line`, potentially splitting it into many lines.
2660 They should fit in the allotted `line_length` but might not be able to.
2662 `features` are syntactical features that may be used in the output.
2668 line_str = line_to_string(line)
2670 def init_st(ST: Type[StringTransformer]) -> StringTransformer:
2671 """Initialize StringTransformer"""
2672 return ST(mode.line_length, mode.string_normalization)
2674 string_merge = init_st(StringMerger)
2675 string_paren_strip = init_st(StringParenStripper)
2676 string_split = init_st(StringSplitter)
2677 string_paren_wrap = init_st(StringParenWrapper)
2679 transformers: List[Transformer]
2681 not line.contains_uncollapsable_type_comments()
2682 and not line.should_explode
2684 is_line_short_enough(line, line_length=mode.line_length, line_str=line_str)
2685 or line.contains_unsplittable_type_ignore()
2687 and not (line.inside_brackets and line.contains_standalone_comments())
2689 # Only apply basic string preprocessing, since lines shouldn't be split here.
2690 if mode.experimental_string_processing:
2691 transformers = [string_merge, string_paren_strip]
2695 transformers = [left_hand_split]
2698 def rhs(line: Line, features: Collection[Feature]) -> Iterator[Line]:
2699 """Wraps calls to `right_hand_split`.
2701 The calls increasingly `omit` right-hand trailers (bracket pairs with
2702 content), meaning the trailers get glued together to split on another
2703 bracket pair instead.
2705 for omit in generate_trailers_to_omit(line, mode.line_length):
2707 right_hand_split(line, mode.line_length, features, omit=omit)
2709 # Note: this check is only able to figure out if the first line of the
2710 # *current* transformation fits in the line length. This is true only
2711 # for simple cases. All others require running more transforms via
2712 # `transform_line()`. This check doesn't know if those would succeed.
2713 if is_line_short_enough(lines[0], line_length=mode.line_length):
2717 # All splits failed, best effort split with no omits.
2718 # This mostly happens to multiline strings that are by definition
2719 # reported as not fitting a single line, as well as lines that contain
2720 # trailing commas (those have to be exploded).
2721 yield from right_hand_split(
2722 line, line_length=mode.line_length, features=features
2725 if mode.experimental_string_processing:
2726 if line.inside_brackets:
2732 standalone_comment_split,
2745 if line.inside_brackets:
2746 transformers = [delimiter_split, standalone_comment_split, rhs]
2748 transformers = [rhs]
2750 for transform in transformers:
2751 # We are accumulating lines in `result` because we might want to abort
2752 # mission and return the original line in the end, or attempt a different
2755 result = run_transformer(line, transform, mode, features, line_str=line_str)
2756 except CannotTransform:
2766 @dataclass # type: ignore
2767 class StringTransformer(ABC):
2769 An implementation of the Transformer protocol that relies on its
2770 subclasses overriding the template methods `do_match(...)` and
2771 `do_transform(...)`.
2773 This Transformer works exclusively on strings (for example, by merging
2776 The following sections can be found among the docstrings of each concrete
2777 StringTransformer subclass.
2780 Which requirements must be met of the given Line for this
2781 StringTransformer to be applied?
2784 If the given Line meets all of the above requirements, which string
2785 transformations can you expect to be applied to it by this
2789 What contractual agreements does this StringTransformer have with other
2790 StringTransfomers? Such collaborations should be eliminated/minimized
2791 as much as possible.
2795 normalize_strings: bool
2796 __name__ = "StringTransformer"
2799 def do_match(self, line: Line) -> TMatchResult:
2802 * Ok(string_idx) such that `line.leaves[string_idx]` is our target
2803 string, if a match was able to be made.
2805 * Err(CannotTransform), if a match was not able to be made.
2809 def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
2812 * Ok(new_line) where new_line is the new transformed line.
2814 * Err(CannotTransform) if the transformation failed for some reason. The
2815 `do_match(...)` template method should usually be used to reject
2816 the form of the given Line, but in some cases it is difficult to
2817 know whether or not a Line meets the StringTransformer's
2818 requirements until the transformation is already midway.
2821 This method should NOT mutate @line directly, but it MAY mutate the
2822 Line's underlying Node structure. (WARNING: If the underlying Node
2823 structure IS altered, then this method should NOT be allowed to
2824 yield an CannotTransform after that point.)
2827 def __call__(self, line: Line, _features: Collection[Feature]) -> Iterator[Line]:
2829 StringTransformer instances have a call signature that mirrors that of
2830 the Transformer type.
2833 CannotTransform(...) if the concrete StringTransformer class is unable
2836 # Optimization to avoid calling `self.do_match(...)` when the line does
2837 # not contain any string.
2838 if not any(leaf.type == token.STRING for leaf in line.leaves):
2839 raise CannotTransform("There are no strings in this line.")
2841 match_result = self.do_match(line)
2843 if isinstance(match_result, Err):
2844 cant_transform = match_result.err()
2845 raise CannotTransform(
2846 f"The string transformer {self.__class__.__name__} does not recognize"
2847 " this line as one that it can transform."
2848 ) from cant_transform
2850 string_idx = match_result.ok()
2852 for line_result in self.do_transform(line, string_idx):
2853 if isinstance(line_result, Err):
2854 cant_transform = line_result.err()
2855 raise CannotTransform(
2856 "StringTransformer failed while attempting to transform string."
2857 ) from cant_transform
2858 line = line_result.ok()
2864 """A custom (i.e. manual) string split.
2866 A single CustomSplit instance represents a single substring.
2869 Consider the following string:
2876 This string will correspond to the following three CustomSplit instances:
2878 CustomSplit(False, 16)
2879 CustomSplit(False, 17)
2880 CustomSplit(True, 16)
2888 class CustomSplitMapMixin:
2890 This mixin class is used to map merged strings to a sequence of
2891 CustomSplits, which will then be used to re-split the strings iff none of
2892 the resultant substrings go over the configured max line length.
2895 _Key = Tuple[StringID, str]
2896 _CUSTOM_SPLIT_MAP: Dict[_Key, Tuple[CustomSplit, ...]] = defaultdict(tuple)
2899 def _get_key(string: str) -> "CustomSplitMapMixin._Key":
2902 A unique identifier that is used internally to map @string to a
2903 group of custom splits.
2905 return (id(string), string)
2907 def add_custom_splits(
2908 self, string: str, custom_splits: Iterable[CustomSplit]
2910 """Custom Split Map Setter Method
2913 Adds a mapping from @string to the custom splits @custom_splits.
2915 key = self._get_key(string)
2916 self._CUSTOM_SPLIT_MAP[key] = tuple(custom_splits)
2918 def pop_custom_splits(self, string: str) -> List[CustomSplit]:
2919 """Custom Split Map Getter Method
2922 * A list of the custom splits that are mapped to @string, if any
2928 Deletes the mapping between @string and its associated custom
2929 splits (which are returned to the caller).
2931 key = self._get_key(string)
2933 custom_splits = self._CUSTOM_SPLIT_MAP[key]
2934 del self._CUSTOM_SPLIT_MAP[key]
2936 return list(custom_splits)
2938 def has_custom_splits(self, string: str) -> bool:
2941 True iff @string is associated with a set of custom splits.
2943 key = self._get_key(string)
2944 return key in self._CUSTOM_SPLIT_MAP
2947 class StringMerger(CustomSplitMapMixin, StringTransformer):
2948 """StringTransformer that merges strings together.
2951 (A) The line contains adjacent strings such that ALL of the validation checks
2952 listed in StringMerger.__validate_msg(...)'s docstring pass.
2954 (B) The line contains a string which uses line continuation backslashes.
2957 Depending on which of the two requirements above where met, either:
2959 (A) The string group associated with the target string is merged.
2961 (B) All line-continuation backslashes are removed from the target string.
2964 StringMerger provides custom split information to StringSplitter.
2967 def do_match(self, line: Line) -> TMatchResult:
2970 is_valid_index = is_valid_index_factory(LL)
2972 for (i, leaf) in enumerate(LL):
2974 leaf.type == token.STRING
2975 and is_valid_index(i + 1)
2976 and LL[i + 1].type == token.STRING
2980 if leaf.type == token.STRING and "\\\n" in leaf.value:
2983 return TErr("This line has no strings that need merging.")
2985 def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
2987 rblc_result = self.__remove_backslash_line_continuation_chars(
2988 new_line, string_idx
2990 if isinstance(rblc_result, Ok):
2991 new_line = rblc_result.ok()
2993 msg_result = self.__merge_string_group(new_line, string_idx)
2994 if isinstance(msg_result, Ok):
2995 new_line = msg_result.ok()
2997 if isinstance(rblc_result, Err) and isinstance(msg_result, Err):
2998 msg_cant_transform = msg_result.err()
2999 rblc_cant_transform = rblc_result.err()
3000 cant_transform = CannotTransform(
3001 "StringMerger failed to merge any strings in this line."
3004 # Chain the errors together using `__cause__`.
3005 msg_cant_transform.__cause__ = rblc_cant_transform
3006 cant_transform.__cause__ = msg_cant_transform
3008 yield Err(cant_transform)
3013 def __remove_backslash_line_continuation_chars(
3014 line: Line, string_idx: int
3017 Merge strings that were split across multiple lines using
3018 line-continuation backslashes.
3021 Ok(new_line), if @line contains backslash line-continuation
3024 Err(CannotTransform), otherwise.
3028 string_leaf = LL[string_idx]
3030 string_leaf.type == token.STRING
3031 and "\\\n" in string_leaf.value
3032 and not has_triple_quotes(string_leaf.value)
3035 f"String leaf {string_leaf} does not contain any backslash line"
3036 " continuation characters."
3039 new_line = line.clone()
3040 new_line.comments = line.comments.copy()
3041 append_leaves(new_line, line, LL)
3043 new_string_leaf = new_line.leaves[string_idx]
3044 new_string_leaf.value = new_string_leaf.value.replace("\\\n", "")
3048 def __merge_string_group(self, line: Line, string_idx: int) -> TResult[Line]:
3050 Merges string group (i.e. set of adjacent strings) where the first
3051 string in the group is `line.leaves[string_idx]`.
3054 Ok(new_line), if ALL of the validation checks found in
3055 __validate_msg(...) pass.
3057 Err(CannotTransform), otherwise.
3061 is_valid_index = is_valid_index_factory(LL)
3063 vresult = self.__validate_msg(line, string_idx)
3064 if isinstance(vresult, Err):
3067 # If the string group is wrapped inside an Atom node, we must make sure
3068 # to later replace that Atom with our new (merged) string leaf.
3069 atom_node = LL[string_idx].parent
3071 # We will place BREAK_MARK in between every two substrings that we
3072 # merge. We will then later go through our final result and use the
3073 # various instances of BREAK_MARK we find to add the right values to
3074 # the custom split map.
3075 BREAK_MARK = "@@@@@ BLACK BREAKPOINT MARKER @@@@@"
3077 QUOTE = LL[string_idx].value[-1]
3079 def make_naked(string: str, string_prefix: str) -> str:
3080 """Strip @string (i.e. make it a "naked" string)
3083 * assert_is_leaf_string(@string)
3086 A string that is identical to @string except that
3087 @string_prefix has been stripped, the surrounding QUOTE
3088 characters have been removed, and any remaining QUOTE
3089 characters have been escaped.
3091 assert_is_leaf_string(string)
3093 RE_EVEN_BACKSLASHES = r"(?:(?<!\\)(?:\\\\)*)"
3094 naked_string = string[len(string_prefix) + 1 : -1]
3095 naked_string = re.sub(
3096 "(" + RE_EVEN_BACKSLASHES + ")" + QUOTE, r"\1\\" + QUOTE, naked_string
3100 # Holds the CustomSplit objects that will later be added to the custom
3104 # Temporary storage for the 'has_prefix' part of the CustomSplit objects.
3107 # Sets the 'prefix' variable. This is the prefix that the final merged
3109 next_str_idx = string_idx
3113 and is_valid_index(next_str_idx)
3114 and LL[next_str_idx].type == token.STRING
3116 prefix = get_string_prefix(LL[next_str_idx].value)
3119 # The next loop merges the string group. The final string will be
3122 # The following convenience variables are used:
3127 # NSS: naked next string
3131 next_str_idx = string_idx
3132 while is_valid_index(next_str_idx) and LL[next_str_idx].type == token.STRING:
3135 SS = LL[next_str_idx].value
3136 next_prefix = get_string_prefix(SS)
3138 # If this is an f-string group but this substring is not prefixed
3140 if "f" in prefix and "f" not in next_prefix:
3141 # Then we must escape any braces contained in this substring.
3142 SS = re.subf(r"(\{|\})", "{1}{1}", SS)
3144 NSS = make_naked(SS, next_prefix)
3146 has_prefix = bool(next_prefix)
3147 prefix_tracker.append(has_prefix)
3149 S = prefix + QUOTE + NS + NSS + BREAK_MARK + QUOTE
3150 NS = make_naked(S, prefix)
3154 S_leaf = Leaf(token.STRING, S)
3155 if self.normalize_strings:
3156 normalize_string_quotes(S_leaf)
3158 # Fill the 'custom_splits' list with the appropriate CustomSplit objects.
3159 temp_string = S_leaf.value[len(prefix) + 1 : -1]
3160 for has_prefix in prefix_tracker:
3161 mark_idx = temp_string.find(BREAK_MARK)
3164 ), "Logic error while filling the custom string breakpoint cache."
3166 temp_string = temp_string[mark_idx + len(BREAK_MARK) :]
3167 breakpoint_idx = mark_idx + (len(prefix) if has_prefix else 0) + 1
3168 custom_splits.append(CustomSplit(has_prefix, breakpoint_idx))
3170 string_leaf = Leaf(token.STRING, S_leaf.value.replace(BREAK_MARK, ""))
3172 if atom_node is not None:
3173 replace_child(atom_node, string_leaf)
3175 # Build the final line ('new_line') that this method will later return.
3176 new_line = line.clone()
3177 for (i, leaf) in enumerate(LL):
3179 new_line.append(string_leaf)
3181 if string_idx <= i < string_idx + num_of_strings:
3182 for comment_leaf in line.comments_after(LL[i]):
3183 new_line.append(comment_leaf, preformatted=True)
3186 append_leaves(new_line, line, [leaf])
3188 self.add_custom_splits(string_leaf.value, custom_splits)
3192 def __validate_msg(line: Line, string_idx: int) -> TResult[None]:
3193 """Validate (M)erge (S)tring (G)roup
3195 Transform-time string validation logic for __merge_string_group(...).
3198 * Ok(None), if ALL validation checks (listed below) pass.
3200 * Err(CannotTransform), if any of the following are true:
3201 - The target string group does not contain ANY stand-alone comments.
3202 - The target string is not in a string group (i.e. it has no
3204 - The string group has more than one inline comment.
3205 - The string group has an inline comment that appears to be a pragma.
3206 - The set of all string prefixes in the string group is of
3207 length greater than one and is not equal to {"", "f"}.
3208 - The string group consists of raw strings.
3210 # We first check for "inner" stand-alone comments (i.e. stand-alone
3211 # comments that have a string leaf before them AND after them).
3214 found_sa_comment = False
3215 is_valid_index = is_valid_index_factory(line.leaves)
3216 while is_valid_index(i) and line.leaves[i].type in [
3220 if line.leaves[i].type == STANDALONE_COMMENT:
3221 found_sa_comment = True
3222 elif found_sa_comment:
3224 "StringMerger does NOT merge string groups which contain "
3225 "stand-alone comments."
3230 num_of_inline_string_comments = 0
3231 set_of_prefixes = set()
3233 for leaf in line.leaves[string_idx:]:
3234 if leaf.type != token.STRING:
3235 # If the string group is trailed by a comma, we count the
3236 # comments trailing the comma to be one of the string group's
3238 if leaf.type == token.COMMA and id(leaf) in line.comments:
3239 num_of_inline_string_comments += 1
3242 if has_triple_quotes(leaf.value):
3243 return TErr("StringMerger does NOT merge multiline strings.")
3246 prefix = get_string_prefix(leaf.value)
3248 return TErr("StringMerger does NOT merge raw strings.")
3250 set_of_prefixes.add(prefix)
3252 if id(leaf) in line.comments:
3253 num_of_inline_string_comments += 1
3254 if contains_pragma_comment(line.comments[id(leaf)]):
3255 return TErr("Cannot merge strings which have pragma comments.")
3257 if num_of_strings < 2:
3259 f"Not enough strings to merge (num_of_strings={num_of_strings})."
3262 if num_of_inline_string_comments > 1:
3264 f"Too many inline string comments ({num_of_inline_string_comments})."
3267 if len(set_of_prefixes) > 1 and set_of_prefixes != {"", "f"}:
3268 return TErr(f"Too many different prefixes ({set_of_prefixes}).")
3273 class StringParenStripper(StringTransformer):
3274 """StringTransformer that strips surrounding parentheses from strings.
3277 The line contains a string which is surrounded by parentheses and:
3278 - The target string is NOT the only argument to a function call.
3279 - The target string is NOT a "pointless" string.
3280 - If the target string contains a PERCENT, the brackets are not
3281 preceeded or followed by an operator with higher precedence than
3285 The parentheses mentioned in the 'Requirements' section are stripped.
3288 StringParenStripper has its own inherent usefulness, but it is also
3289 relied on to clean up the parentheses created by StringParenWrapper (in
3290 the event that they are no longer needed).
3293 def do_match(self, line: Line) -> TMatchResult:
3296 is_valid_index = is_valid_index_factory(LL)
3298 for (idx, leaf) in enumerate(LL):
3299 # Should be a string...
3300 if leaf.type != token.STRING:
3303 # If this is a "pointless" string...
3306 and leaf.parent.parent
3307 and leaf.parent.parent.type == syms.simple_stmt
3311 # Should be preceded by a non-empty LPAR...
3313 not is_valid_index(idx - 1)
3314 or LL[idx - 1].type != token.LPAR
3315 or is_empty_lpar(LL[idx - 1])
3319 # That LPAR should NOT be preceded by a function name or a closing
3320 # bracket (which could be a function which returns a function or a
3321 # list/dictionary that contains a function)...
3322 if is_valid_index(idx - 2) and (
3323 LL[idx - 2].type == token.NAME or LL[idx - 2].type in CLOSING_BRACKETS
3329 # Skip the string trailer, if one exists.
3330 string_parser = StringParser()
3331 next_idx = string_parser.parse(LL, string_idx)
3333 # if the leaves in the parsed string include a PERCENT, we need to
3334 # make sure the initial LPAR is NOT preceded by an operator with
3335 # higher or equal precedence to PERCENT
3336 if is_valid_index(idx - 2):
3337 # mypy can't quite follow unless we name this
3338 before_lpar = LL[idx - 2]
3339 if token.PERCENT in {leaf.type for leaf in LL[idx - 1 : next_idx]} and (
3356 # only unary PLUS/MINUS
3358 and before_lpar.parent.type == syms.factor
3359 and (before_lpar.type in {token.PLUS, token.MINUS})
3364 # Should be followed by a non-empty RPAR...
3366 is_valid_index(next_idx)
3367 and LL[next_idx].type == token.RPAR
3368 and not is_empty_rpar(LL[next_idx])
3370 # That RPAR should NOT be followed by anything with higher
3371 # precedence than PERCENT
3372 if is_valid_index(next_idx + 1) and LL[next_idx + 1].type in {
3380 return Ok(string_idx)
3382 return TErr("This line has no strings wrapped in parens.")
3384 def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
3387 string_parser = StringParser()
3388 rpar_idx = string_parser.parse(LL, string_idx)
3390 for leaf in (LL[string_idx - 1], LL[rpar_idx]):
3391 if line.comments_after(leaf):
3393 "Will not strip parentheses which have comments attached to them."
3397 new_line = line.clone()
3398 new_line.comments = line.comments.copy()
3400 append_leaves(new_line, line, LL[: string_idx - 1])
3401 except BracketMatchError:
3402 # HACK: I believe there is currently a bug somewhere in
3403 # right_hand_split() that is causing brackets to not be tracked
3404 # properly by a shared BracketTracker.
3405 append_leaves(new_line, line, LL[: string_idx - 1], preformatted=True)
3407 string_leaf = Leaf(token.STRING, LL[string_idx].value)
3408 LL[string_idx - 1].remove()
3409 replace_child(LL[string_idx], string_leaf)
3410 new_line.append(string_leaf)
3413 new_line, line, LL[string_idx + 1 : rpar_idx] + LL[rpar_idx + 1 :]
3416 LL[rpar_idx].remove()
3421 class BaseStringSplitter(StringTransformer):
3423 Abstract class for StringTransformers which transform a Line's strings by splitting
3424 them or placing them on their own lines where necessary to avoid going over
3425 the configured line length.
3428 * The target string value is responsible for the line going over the
3429 line length limit. It follows that after all of black's other line
3430 split methods have been exhausted, this line (or one of the resulting
3431 lines after all line splits are performed) would still be over the
3432 line_length limit unless we split this string.
3434 * The target string is NOT a "pointless" string (i.e. a string that has
3435 no parent or siblings).
3437 * The target string is not followed by an inline comment that appears
3440 * The target string is not a multiline (i.e. triple-quote) string.
3444 def do_splitter_match(self, line: Line) -> TMatchResult:
3446 BaseStringSplitter asks its clients to override this method instead of
3447 `StringTransformer.do_match(...)`.
3449 Follows the same protocol as `StringTransformer.do_match(...)`.
3451 Refer to `help(StringTransformer.do_match)` for more information.
3454 def do_match(self, line: Line) -> TMatchResult:
3455 match_result = self.do_splitter_match(line)
3456 if isinstance(match_result, Err):
3459 string_idx = match_result.ok()
3460 vresult = self.__validate(line, string_idx)
3461 if isinstance(vresult, Err):
3466 def __validate(self, line: Line, string_idx: int) -> TResult[None]:
3468 Checks that @line meets all of the requirements listed in this classes'
3469 docstring. Refer to `help(BaseStringSplitter)` for a detailed
3470 description of those requirements.
3473 * Ok(None), if ALL of the requirements are met.
3475 * Err(CannotTransform), if ANY of the requirements are NOT met.
3479 string_leaf = LL[string_idx]
3481 max_string_length = self.__get_max_string_length(line, string_idx)
3482 if len(string_leaf.value) <= max_string_length:
3484 "The string itself is not what is causing this line to be too long."
3487 if not string_leaf.parent or [L.type for L in string_leaf.parent.children] == [
3492 f"This string ({string_leaf.value}) appears to be pointless (i.e. has"
3496 if id(line.leaves[string_idx]) in line.comments and contains_pragma_comment(
3497 line.comments[id(line.leaves[string_idx])]
3500 "Line appears to end with an inline pragma comment. Splitting the line"
3501 " could modify the pragma's behavior."
3504 if has_triple_quotes(string_leaf.value):
3505 return TErr("We cannot split multiline strings.")
3509 def __get_max_string_length(self, line: Line, string_idx: int) -> int:
3511 Calculates the max string length used when attempting to determine
3512 whether or not the target string is responsible for causing the line to
3513 go over the line length limit.
3515 WARNING: This method is tightly coupled to both StringSplitter and
3516 (especially) StringParenWrapper. There is probably a better way to
3517 accomplish what is being done here.
3520 max_string_length: such that `line.leaves[string_idx].value >
3521 max_string_length` implies that the target string IS responsible
3522 for causing this line to exceed the line length limit.
3526 is_valid_index = is_valid_index_factory(LL)
3528 # We use the shorthand "WMA4" in comments to abbreviate "We must
3529 # account for". When giving examples, we use STRING to mean some/any
3532 # Finally, we use the following convenience variables:
3534 # P: The leaf that is before the target string leaf.
3535 # N: The leaf that is after the target string leaf.
3536 # NN: The leaf that is after N.
3538 # WMA4 the whitespace at the beginning of the line.
3539 offset = line.depth * 4
3541 if is_valid_index(string_idx - 1):
3542 p_idx = string_idx - 1
3544 LL[string_idx - 1].type == token.LPAR
3545 and LL[string_idx - 1].value == ""
3548 # If the previous leaf is an empty LPAR placeholder, we should skip it.
3552 if P.type == token.PLUS:
3553 # WMA4 a space and a '+' character (e.g. `+ STRING`).
3556 if P.type == token.COMMA:
3557 # WMA4 a space, a comma, and a closing bracket [e.g. `), STRING`].
3560 if P.type in [token.COLON, token.EQUAL, token.NAME]:
3561 # This conditional branch is meant to handle dictionary keys,
3562 # variable assignments, 'return STRING' statement lines, and
3563 # 'else STRING' ternary expression lines.
3565 # WMA4 a single space.
3568 # WMA4 the lengths of any leaves that came before that space,
3569 # but after any closing bracket before that space.
3570 for leaf in reversed(LL[: p_idx + 1]):
3571 offset += len(str(leaf))
3572 if leaf.type in CLOSING_BRACKETS:
3575 if is_valid_index(string_idx + 1):
3576 N = LL[string_idx + 1]
3577 if N.type == token.RPAR and N.value == "" and len(LL) > string_idx + 2:
3578 # If the next leaf is an empty RPAR placeholder, we should skip it.
3579 N = LL[string_idx + 2]
3581 if N.type == token.COMMA:
3582 # WMA4 a single comma at the end of the string (e.g `STRING,`).
3585 if is_valid_index(string_idx + 2):
3586 NN = LL[string_idx + 2]
3588 if N.type == token.DOT and NN.type == token.NAME:
3589 # This conditional branch is meant to handle method calls invoked
3590 # off of a string literal up to and including the LPAR character.
3592 # WMA4 the '.' character.
3596 is_valid_index(string_idx + 3)
3597 and LL[string_idx + 3].type == token.LPAR
3599 # WMA4 the left parenthesis character.
3602 # WMA4 the length of the method's name.
3603 offset += len(NN.value)
3605 has_comments = False
3606 for comment_leaf in line.comments_after(LL[string_idx]):
3607 if not has_comments:
3609 # WMA4 two spaces before the '#' character.
3612 # WMA4 the length of the inline comment.
3613 offset += len(comment_leaf.value)
3615 max_string_length = self.line_length - offset
3616 return max_string_length
3619 class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
3621 StringTransformer that splits "atom" strings (i.e. strings which exist on
3622 lines by themselves).
3625 * The line consists ONLY of a single string (with the exception of a
3626 '+' symbol which MAY exist at the start of the line), MAYBE a string
3627 trailer, and MAYBE a trailing comma.
3629 * All of the requirements listed in BaseStringSplitter's docstring.
3632 The string mentioned in the 'Requirements' section is split into as
3633 many substrings as necessary to adhere to the configured line length.
3635 In the final set of substrings, no substring should be smaller than
3636 MIN_SUBSTR_SIZE characters.
3638 The string will ONLY be split on spaces (i.e. each new substring should
3639 start with a space). Note that the string will NOT be split on a space
3640 which is escaped with a backslash.
3642 If the string is an f-string, it will NOT be split in the middle of an
3643 f-expression (e.g. in f"FooBar: {foo() if x else bar()}", {foo() if x
3644 else bar()} is an f-expression).
3646 If the string that is being split has an associated set of custom split
3647 records and those custom splits will NOT result in any line going over
3648 the configured line length, those custom splits are used. Otherwise the
3649 string is split as late as possible (from left-to-right) while still
3650 adhering to the transformation rules listed above.
3653 StringSplitter relies on StringMerger to construct the appropriate
3654 CustomSplit objects and add them to the custom split map.
3658 # Matches an "f-expression" (e.g. {var}) that might be found in an f-string.
3660 (?<!\{) (?:\{\{)* \{ (?!\{)
3667 (?<!\}) \} (?:\}\})* (?!\})
3670 def do_splitter_match(self, line: Line) -> TMatchResult:
3673 is_valid_index = is_valid_index_factory(LL)
3677 # The first leaf MAY be a '+' symbol...
3678 if is_valid_index(idx) and LL[idx].type == token.PLUS:
3681 # The next/first leaf MAY be an empty LPAR...
3682 if is_valid_index(idx) and is_empty_lpar(LL[idx]):
3685 # The next/first leaf MUST be a string...
3686 if not is_valid_index(idx) or LL[idx].type != token.STRING:
3687 return TErr("Line does not start with a string.")
3691 # Skip the string trailer, if one exists.
3692 string_parser = StringParser()
3693 idx = string_parser.parse(LL, string_idx)
3695 # That string MAY be followed by an empty RPAR...
3696 if is_valid_index(idx) and is_empty_rpar(LL[idx]):
3699 # That string / empty RPAR leaf MAY be followed by a comma...
3700 if is_valid_index(idx) and LL[idx].type == token.COMMA:
3703 # But no more leaves are allowed...
3704 if is_valid_index(idx):
3705 return TErr("This line does not end with a string.")
3707 return Ok(string_idx)
3709 def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
3712 QUOTE = LL[string_idx].value[-1]
3714 is_valid_index = is_valid_index_factory(LL)
3715 insert_str_child = insert_str_child_factory(LL[string_idx])
3717 prefix = get_string_prefix(LL[string_idx].value)
3719 # We MAY choose to drop the 'f' prefix from substrings that don't
3720 # contain any f-expressions, but ONLY if the original f-string
3721 # contains at least one f-expression. Otherwise, we will alter the AST
3723 drop_pointless_f_prefix = ("f" in prefix) and re.search(
3724 self.RE_FEXPR, LL[string_idx].value, re.VERBOSE
3727 first_string_line = True
3728 starts_with_plus = LL[0].type == token.PLUS
3730 def line_needs_plus() -> bool:
3731 return first_string_line and starts_with_plus
3733 def maybe_append_plus(new_line: Line) -> None:
3736 If @line starts with a plus and this is the first line we are
3737 constructing, this function appends a PLUS leaf to @new_line
3738 and replaces the old PLUS leaf in the node structure. Otherwise
3739 this function does nothing.
3741 if line_needs_plus():
3742 plus_leaf = Leaf(token.PLUS, "+")
3743 replace_child(LL[0], plus_leaf)
3744 new_line.append(plus_leaf)
3747 is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA
3750 def max_last_string() -> int:
3753 The max allowed length of the string value used for the last
3754 line we will construct.
3756 result = self.line_length
3757 result -= line.depth * 4
3758 result -= 1 if ends_with_comma else 0
3759 result -= 2 if line_needs_plus() else 0
3762 # --- Calculate Max Break Index (for string value)
3763 # We start with the line length limit
3764 max_break_idx = self.line_length
3765 # The last index of a string of length N is N-1.
3767 # Leading whitespace is not present in the string value (e.g. Leaf.value).
3768 max_break_idx -= line.depth * 4
3769 if max_break_idx < 0:
3771 f"Unable to split {LL[string_idx].value} at such high of a line depth:"
3776 # Check if StringMerger registered any custom splits.
3777 custom_splits = self.pop_custom_splits(LL[string_idx].value)
3778 # We use them ONLY if none of them would produce lines that exceed the
3780 use_custom_breakpoints = bool(
3782 and all(csplit.break_idx <= max_break_idx for csplit in custom_splits)
3785 # Temporary storage for the remaining chunk of the string line that
3786 # can't fit onto the line currently being constructed.
3787 rest_value = LL[string_idx].value
3789 def more_splits_should_be_made() -> bool:
3792 True iff `rest_value` (the remaining string value from the last
3793 split), should be split again.
3795 if use_custom_breakpoints:
3796 return len(custom_splits) > 1
3798 return len(rest_value) > max_last_string()
3800 string_line_results: List[Ok[Line]] = []
3801 while more_splits_should_be_made():
3802 if use_custom_breakpoints:
3803 # Custom User Split (manual)
3804 csplit = custom_splits.pop(0)
3805 break_idx = csplit.break_idx
3807 # Algorithmic Split (automatic)
3808 max_bidx = max_break_idx - 2 if line_needs_plus() else max_break_idx
3809 maybe_break_idx = self.__get_break_idx(rest_value, max_bidx)
3810 if maybe_break_idx is None:
3811 # If we are unable to algorithmically determine a good split
3812 # and this string has custom splits registered to it, we
3813 # fall back to using them--which means we have to start
3814 # over from the beginning.
3816 rest_value = LL[string_idx].value
3817 string_line_results = []
3818 first_string_line = True
3819 use_custom_breakpoints = True
3822 # Otherwise, we stop splitting here.
3825 break_idx = maybe_break_idx
3827 # --- Construct `next_value`
3828 next_value = rest_value[:break_idx] + QUOTE
3830 # Are we allowed to try to drop a pointless 'f' prefix?
3831 drop_pointless_f_prefix
3832 # If we are, will we be successful?
3833 and next_value != self.__normalize_f_string(next_value, prefix)
3835 # If the current custom split did NOT originally use a prefix,
3836 # then `csplit.break_idx` will be off by one after removing
3840 if use_custom_breakpoints and not csplit.has_prefix
3843 next_value = rest_value[:break_idx] + QUOTE
3844 next_value = self.__normalize_f_string(next_value, prefix)
3846 # --- Construct `next_leaf`
3847 next_leaf = Leaf(token.STRING, next_value)
3848 insert_str_child(next_leaf)
3849 self.__maybe_normalize_string_quotes(next_leaf)
3851 # --- Construct `next_line`
3852 next_line = line.clone()
3853 maybe_append_plus(next_line)
3854 next_line.append(next_leaf)
3855 string_line_results.append(Ok(next_line))
3857 rest_value = prefix + QUOTE + rest_value[break_idx:]
3858 first_string_line = False
3860 yield from string_line_results
3862 if drop_pointless_f_prefix:
3863 rest_value = self.__normalize_f_string(rest_value, prefix)
3865 rest_leaf = Leaf(token.STRING, rest_value)
3866 insert_str_child(rest_leaf)
3868 # NOTE: I could not find a test case that verifies that the following
3869 # line is actually necessary, but it seems to be. Otherwise we risk
3870 # not normalizing the last substring, right?
3871 self.__maybe_normalize_string_quotes(rest_leaf)
3873 last_line = line.clone()
3874 maybe_append_plus(last_line)
3876 # If there are any leaves to the right of the target string...
3877 if is_valid_index(string_idx + 1):
3878 # We use `temp_value` here to determine how long the last line
3879 # would be if we were to append all the leaves to the right of the
3880 # target string to the last string line.
3881 temp_value = rest_value
3882 for leaf in LL[string_idx + 1 :]:
3883 temp_value += str(leaf)
3884 if leaf.type == token.LPAR:
3887 # Try to fit them all on the same line with the last substring...
3889 len(temp_value) <= max_last_string()
3890 or LL[string_idx + 1].type == token.COMMA
3892 last_line.append(rest_leaf)
3893 append_leaves(last_line, line, LL[string_idx + 1 :])
3895 # Otherwise, place the last substring on one line and everything
3896 # else on a line below that...
3898 last_line.append(rest_leaf)
3901 non_string_line = line.clone()
3902 append_leaves(non_string_line, line, LL[string_idx + 1 :])
3903 yield Ok(non_string_line)
3904 # Else the target string was the last leaf...
3906 last_line.append(rest_leaf)
3907 last_line.comments = line.comments.copy()
3910 def __get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
3912 This method contains the algorithm that StringSplitter uses to
3913 determine which character to split each string at.
3916 @string: The substring that we are attempting to split.
3917 @max_break_idx: The ideal break index. We will return this value if it
3918 meets all the necessary conditions. In the likely event that it
3919 doesn't we will try to find the closest index BELOW @max_break_idx
3920 that does. If that fails, we will expand our search by also
3921 considering all valid indices ABOVE @max_break_idx.
3924 * assert_is_leaf_string(@string)
3925 * 0 <= @max_break_idx < len(@string)
3928 break_idx, if an index is able to be found that meets all of the
3929 conditions listed in the 'Transformations' section of this classes'
3934 is_valid_index = is_valid_index_factory(string)
3936 assert is_valid_index(max_break_idx)
3937 assert_is_leaf_string(string)
3939 _fexpr_slices: Optional[List[Tuple[Index, Index]]] = None
3941 def fexpr_slices() -> Iterator[Tuple[Index, Index]]:
3944 All ranges of @string which, if @string were to be split there,
3945 would result in the splitting of an f-expression (which is NOT
3948 nonlocal _fexpr_slices
3950 if _fexpr_slices is None:
3952 for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE):
3953 _fexpr_slices.append(match.span())
3955 yield from _fexpr_slices
3957 is_fstring = "f" in get_string_prefix(string)
3959 def breaks_fstring_expression(i: Index) -> bool:
3962 True iff returning @i would result in the splitting of an
3963 f-expression (which is NOT allowed).
3968 for (start, end) in fexpr_slices():
3969 if start <= i < end:
3974 def passes_all_checks(i: Index) -> bool:
3977 True iff ALL of the conditions listed in the 'Transformations'
3978 section of this classes' docstring would be be met by returning @i.
3980 is_space = string[i] == " "
3982 is_not_escaped = True
3984 while is_valid_index(j) and string[j] == "\\":
3985 is_not_escaped = not is_not_escaped
3989 len(string[i:]) >= self.MIN_SUBSTR_SIZE
3990 and len(string[:i]) >= self.MIN_SUBSTR_SIZE
3996 and not breaks_fstring_expression(i)
3999 # First, we check all indices BELOW @max_break_idx.
4000 break_idx = max_break_idx
4001 while is_valid_index(break_idx - 1) and not passes_all_checks(break_idx):
4004 if not passes_all_checks(break_idx):
4005 # If that fails, we check all indices ABOVE @max_break_idx.
4007 # If we are able to find a valid index here, the next line is going
4008 # to be longer than the specified line length, but it's probably
4009 # better than doing nothing at all.
4010 break_idx = max_break_idx + 1
4011 while is_valid_index(break_idx + 1) and not passes_all_checks(break_idx):
4014 if not is_valid_index(break_idx) or not passes_all_checks(break_idx):
4019 def __maybe_normalize_string_quotes(self, leaf: Leaf) -> None:
4020 if self.normalize_strings:
4021 normalize_string_quotes(leaf)
4023 def __normalize_f_string(self, string: str, prefix: str) -> str:
4026 * assert_is_leaf_string(@string)
4029 * If @string is an f-string that contains no f-expressions, we
4030 return a string identical to @string except that the 'f' prefix
4031 has been stripped and all double braces (i.e. '{{' or '}}') have
4032 been normalized (i.e. turned into '{' or '}').
4034 * Otherwise, we return @string.
4036 assert_is_leaf_string(string)
4038 if "f" in prefix and not re.search(self.RE_FEXPR, string, re.VERBOSE):
4039 new_prefix = prefix.replace("f", "")
4041 temp = string[len(prefix) :]
4042 temp = re.sub(r"\{\{", "{", temp)
4043 temp = re.sub(r"\}\}", "}", temp)
4046 return f"{new_prefix}{new_string}"
4051 class StringParenWrapper(CustomSplitMapMixin, BaseStringSplitter):
4053 StringTransformer that splits non-"atom" strings (i.e. strings that do not
4054 exist on lines by themselves).
4057 All of the requirements listed in BaseStringSplitter's docstring in
4058 addition to the requirements listed below:
4060 * The line is a return/yield statement, which returns/yields a string.
4062 * The line is part of a ternary expression (e.g. `x = y if cond else
4063 z`) such that the line starts with `else <string>`, where <string> is
4066 * The line is an assert statement, which ends with a string.
4068 * The line is an assignment statement (e.g. `x = <string>` or `x +=
4069 <string>`) such that the variable is being assigned the value of some
4072 * The line is a dictionary key assignment where some valid key is being
4073 assigned the value of some string.
4076 The chosen string is wrapped in parentheses and then split at the LPAR.
4078 We then have one line which ends with an LPAR and another line that
4079 starts with the chosen string. The latter line is then split again at
4080 the RPAR. This results in the RPAR (and possibly a trailing comma)
4081 being placed on its own line.
4083 NOTE: If any leaves exist to the right of the chosen string (except
4084 for a trailing comma, which would be placed after the RPAR), those
4085 leaves are placed inside the parentheses. In effect, the chosen
4086 string is not necessarily being "wrapped" by parentheses. We can,
4087 however, count on the LPAR being placed directly before the chosen
4090 In other words, StringParenWrapper creates "atom" strings. These
4091 can then be split again by StringSplitter, if necessary.
4094 In the event that a string line split by StringParenWrapper is
4095 changed such that it no longer needs to be given its own line,
4096 StringParenWrapper relies on StringParenStripper to clean up the
4097 parentheses it created.
4100 def do_splitter_match(self, line: Line) -> TMatchResult:
4104 self._return_match(LL)
4105 or self._else_match(LL)
4106 or self._assert_match(LL)
4107 or self._assign_match(LL)
4108 or self._dict_match(LL)
4111 if string_idx is not None:
4112 string_value = line.leaves[string_idx].value
4113 # If the string has no spaces...
4114 if " " not in string_value:
4115 # And will still violate the line length limit when split...
4116 max_string_length = self.line_length - ((line.depth + 1) * 4)
4117 if len(string_value) > max_string_length:
4118 # And has no associated custom splits...
4119 if not self.has_custom_splits(string_value):
4120 # Then we should NOT put this string on its own line.
4122 "We do not wrap long strings in parentheses when the"
4123 " resultant line would still be over the specified line"
4124 " length and can't be split further by StringSplitter."
4126 return Ok(string_idx)
4128 return TErr("This line does not contain any non-atomic strings.")
4131 def _return_match(LL: List[Leaf]) -> Optional[int]:
4134 string_idx such that @LL[string_idx] is equal to our target (i.e.
4135 matched) string, if this line matches the return/yield statement
4136 requirements listed in the 'Requirements' section of this classes'
4141 # If this line is apart of a return/yield statement and the first leaf
4142 # contains either the "return" or "yield" keywords...
4143 if parent_type(LL[0]) in [syms.return_stmt, syms.yield_expr] and LL[
4145 ].value in ["return", "yield"]:
4146 is_valid_index = is_valid_index_factory(LL)
4148 idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
4149 # The next visible leaf MUST contain a string...
4150 if is_valid_index(idx) and LL[idx].type == token.STRING:
4156 def _else_match(LL: List[Leaf]) -> Optional[int]:
4159 string_idx such that @LL[string_idx] is equal to our target (i.e.
4160 matched) string, if this line matches the ternary expression
4161 requirements listed in the 'Requirements' section of this classes'
4166 # If this line is apart of a ternary expression and the first leaf
4167 # contains the "else" keyword...
4169 parent_type(LL[0]) == syms.test
4170 and LL[0].type == token.NAME
4171 and LL[0].value == "else"
4173 is_valid_index = is_valid_index_factory(LL)
4175 idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
4176 # The next visible leaf MUST contain a string...
4177 if is_valid_index(idx) and LL[idx].type == token.STRING:
4183 def _assert_match(LL: List[Leaf]) -> Optional[int]:
4186 string_idx such that @LL[string_idx] is equal to our target (i.e.
4187 matched) string, if this line matches the assert statement
4188 requirements listed in the 'Requirements' section of this classes'
4193 # If this line is apart of an assert statement and the first leaf
4194 # contains the "assert" keyword...
4195 if parent_type(LL[0]) == syms.assert_stmt and LL[0].value == "assert":
4196 is_valid_index = is_valid_index_factory(LL)
4198 for (i, leaf) in enumerate(LL):
4199 # We MUST find a comma...
4200 if leaf.type == token.COMMA:
4201 idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
4203 # That comma MUST be followed by a string...
4204 if is_valid_index(idx) and LL[idx].type == token.STRING:
4207 # Skip the string trailer, if one exists.
4208 string_parser = StringParser()
4209 idx = string_parser.parse(LL, string_idx)
4211 # But no more leaves are allowed...
4212 if not is_valid_index(idx):
4218 def _assign_match(LL: List[Leaf]) -> Optional[int]:
4221 string_idx such that @LL[string_idx] is equal to our target (i.e.
4222 matched) string, if this line matches the assignment statement
4223 requirements listed in the 'Requirements' section of this classes'
4228 # If this line is apart of an expression statement or is a function
4229 # argument AND the first leaf contains a variable name...
4231 parent_type(LL[0]) in [syms.expr_stmt, syms.argument, syms.power]
4232 and LL[0].type == token.NAME
4234 is_valid_index = is_valid_index_factory(LL)
4236 for (i, leaf) in enumerate(LL):
4237 # We MUST find either an '=' or '+=' symbol...
4238 if leaf.type in [token.EQUAL, token.PLUSEQUAL]:
4239 idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
4241 # That symbol MUST be followed by a string...
4242 if is_valid_index(idx) and LL[idx].type == token.STRING:
4245 # Skip the string trailer, if one exists.
4246 string_parser = StringParser()
4247 idx = string_parser.parse(LL, string_idx)
4249 # The next leaf MAY be a comma iff this line is apart
4250 # of a function argument...
4252 parent_type(LL[0]) == syms.argument
4253 and is_valid_index(idx)
4254 and LL[idx].type == token.COMMA
4258 # But no more leaves are allowed...
4259 if not is_valid_index(idx):
4265 def _dict_match(LL: List[Leaf]) -> Optional[int]:
4268 string_idx such that @LL[string_idx] is equal to our target (i.e.
4269 matched) string, if this line matches the dictionary key assignment
4270 statement requirements listed in the 'Requirements' section of this
4275 # If this line is apart of a dictionary key assignment...
4276 if syms.dictsetmaker in [parent_type(LL[0]), parent_type(LL[0].parent)]:
4277 is_valid_index = is_valid_index_factory(LL)
4279 for (i, leaf) in enumerate(LL):
4280 # We MUST find a colon...
4281 if leaf.type == token.COLON:
4282 idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
4284 # That colon MUST be followed by a string...
4285 if is_valid_index(idx) and LL[idx].type == token.STRING:
4288 # Skip the string trailer, if one exists.
4289 string_parser = StringParser()
4290 idx = string_parser.parse(LL, string_idx)
4292 # That string MAY be followed by a comma...
4293 if is_valid_index(idx) and LL[idx].type == token.COMMA:
4296 # But no more leaves are allowed...
4297 if not is_valid_index(idx):
4302 def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
4305 is_valid_index = is_valid_index_factory(LL)
4306 insert_str_child = insert_str_child_factory(LL[string_idx])
4309 ends_with_comma = False
4310 if LL[comma_idx].type == token.COMMA:
4311 ends_with_comma = True
4313 leaves_to_steal_comments_from = [LL[string_idx]]
4315 leaves_to_steal_comments_from.append(LL[comma_idx])
4318 first_line = line.clone()
4319 left_leaves = LL[:string_idx]
4321 # We have to remember to account for (possibly invisible) LPAR and RPAR
4322 # leaves that already wrapped the target string. If these leaves do
4323 # exist, we will replace them with our own LPAR and RPAR leaves.
4324 old_parens_exist = False
4325 if left_leaves and left_leaves[-1].type == token.LPAR:
4326 old_parens_exist = True
4327 leaves_to_steal_comments_from.append(left_leaves[-1])
4330 append_leaves(first_line, line, left_leaves)
4332 lpar_leaf = Leaf(token.LPAR, "(")
4333 if old_parens_exist:
4334 replace_child(LL[string_idx - 1], lpar_leaf)
4336 insert_str_child(lpar_leaf)
4337 first_line.append(lpar_leaf)
4339 # We throw inline comments that were originally to the right of the
4340 # target string to the top line. They will now be shown to the right of
4342 for leaf in leaves_to_steal_comments_from:
4343 for comment_leaf in line.comments_after(leaf):
4344 first_line.append(comment_leaf, preformatted=True)
4346 yield Ok(first_line)
4348 # --- Middle (String) Line
4349 # We only need to yield one (possibly too long) string line, since the
4350 # `StringSplitter` will break it down further if necessary.
4351 string_value = LL[string_idx].value
4353 depth=line.depth + 1,
4354 inside_brackets=True,
4355 should_explode=line.should_explode,
4357 string_leaf = Leaf(token.STRING, string_value)
4358 insert_str_child(string_leaf)
4359 string_line.append(string_leaf)
4361 old_rpar_leaf = None
4362 if is_valid_index(string_idx + 1):
4363 right_leaves = LL[string_idx + 1 :]
4367 if old_parens_exist:
4369 right_leaves and right_leaves[-1].type == token.RPAR
4370 ), "Apparently, old parentheses do NOT exist?!"
4371 old_rpar_leaf = right_leaves.pop()
4373 append_leaves(string_line, line, right_leaves)
4375 yield Ok(string_line)
4378 last_line = line.clone()
4379 last_line.bracket_tracker = first_line.bracket_tracker
4381 new_rpar_leaf = Leaf(token.RPAR, ")")
4382 if old_rpar_leaf is not None:
4383 replace_child(old_rpar_leaf, new_rpar_leaf)
4385 insert_str_child(new_rpar_leaf)
4386 last_line.append(new_rpar_leaf)
4388 # If the target string ended with a comma, we place this comma to the
4389 # right of the RPAR on the last line.
4391 comma_leaf = Leaf(token.COMMA, ",")
4392 replace_child(LL[comma_idx], comma_leaf)
4393 last_line.append(comma_leaf)
4400 A state machine that aids in parsing a string's "trailer", which can be
4401 either non-existent, an old-style formatting sequence (e.g. `% varX` or `%
4402 (varX, varY)`), or a method-call / attribute access (e.g. `.format(varX,
4405 NOTE: A new StringParser object MUST be instantiated for each string
4406 trailer we need to parse.
4409 We shall assume that `line` equals the `Line` object that corresponds
4410 to the following line of python code:
4412 x = "Some {}.".format("String") + some_other_string
4415 Furthermore, we will assume that `string_idx` is some index such that:
4417 assert line.leaves[string_idx].value == "Some {}."
4420 The following code snippet then holds:
4422 string_parser = StringParser()
4423 idx = string_parser.parse(line.leaves, string_idx)
4424 assert line.leaves[idx].type == token.PLUS
4430 # String Parser States
4440 # Lookup Table for Next State
4441 _goto: Dict[Tuple[ParserState, NodeType], ParserState] = {
4442 # A string trailer may start with '.' OR '%'.
4443 (START, token.DOT): DOT,
4444 (START, token.PERCENT): PERCENT,
4445 (START, DEFAULT_TOKEN): DONE,
4446 # A '.' MUST be followed by an attribute or method name.
4447 (DOT, token.NAME): NAME,
4448 # A method name MUST be followed by an '(', whereas an attribute name
4449 # is the last symbol in the string trailer.
4450 (NAME, token.LPAR): LPAR,
4451 (NAME, DEFAULT_TOKEN): DONE,
4452 # A '%' symbol can be followed by an '(' or a single argument (e.g. a
4453 # string or variable name).
4454 (PERCENT, token.LPAR): LPAR,
4455 (PERCENT, DEFAULT_TOKEN): SINGLE_FMT_ARG,
4456 # If a '%' symbol is followed by a single argument, that argument is
4457 # the last leaf in the string trailer.
4458 (SINGLE_FMT_ARG, DEFAULT_TOKEN): DONE,
4459 # If present, a ')' symbol is the last symbol in a string trailer.
4460 # (NOTE: LPARS and nested RPARS are not included in this lookup table,
4461 # since they are treated as a special case by the parsing logic in this
4462 # classes' implementation.)
4463 (RPAR, DEFAULT_TOKEN): DONE,
4466 def __init__(self) -> None:
4467 self._state = self.START
4468 self._unmatched_lpars = 0
4470 def parse(self, leaves: List[Leaf], string_idx: int) -> int:
4473 * @leaves[@string_idx].type == token.STRING
4476 The index directly after the last leaf which is apart of the string
4477 trailer, if a "trailer" exists.
4479 @string_idx + 1, if no string "trailer" exists.
4481 assert leaves[string_idx].type == token.STRING
4483 idx = string_idx + 1
4484 while idx < len(leaves) and self._next_state(leaves[idx]):
4488 def _next_state(self, leaf: Leaf) -> bool:
4491 * On the first call to this function, @leaf MUST be the leaf that
4492 was directly after the string leaf in question (e.g. if our target
4493 string is `line.leaves[i]` then the first call to this method must
4494 be `line.leaves[i + 1]`).
4495 * On the next call to this function, the leaf parameter passed in
4496 MUST be the leaf directly following @leaf.
4499 True iff @leaf is apart of the string's trailer.
4501 # We ignore empty LPAR or RPAR leaves.
4502 if is_empty_par(leaf):
4505 next_token = leaf.type
4506 if next_token == token.LPAR:
4507 self._unmatched_lpars += 1
4509 current_state = self._state
4511 # The LPAR parser state is a special case. We will return True until we
4512 # find the matching RPAR token.
4513 if current_state == self.LPAR:
4514 if next_token == token.RPAR:
4515 self._unmatched_lpars -= 1
4516 if self._unmatched_lpars == 0:
4517 self._state = self.RPAR
4518 # Otherwise, we use a lookup table to determine the next state.
4520 # If the lookup table matches the current state to the next
4521 # token, we use the lookup table.
4522 if (current_state, next_token) in self._goto:
4523 self._state = self._goto[current_state, next_token]
4525 # Otherwise, we check if a the current state was assigned a
4527 if (current_state, self.DEFAULT_TOKEN) in self._goto:
4528 self._state = self._goto[current_state, self.DEFAULT_TOKEN]
4529 # If no default has been assigned, then this parser has a logic
4532 raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
4534 if self._state == self.DONE:
4540 def TErr(err_msg: str) -> Err[CannotTransform]:
4543 Convenience function used when working with the TResult type.
4545 cant_transform = CannotTransform(err_msg)
4546 return Err(cant_transform)
4549 def contains_pragma_comment(comment_list: List[Leaf]) -> bool:
4552 True iff one of the comments in @comment_list is a pragma used by one
4553 of the more common static analysis tools for python (e.g. mypy, flake8,
4556 for comment in comment_list:
4557 if comment.value.startswith(("# type:", "# noqa", "# pylint:")):
4563 def insert_str_child_factory(string_leaf: Leaf) -> Callable[[LN], None]:
4565 Factory for a convenience function that is used to orphan @string_leaf
4566 and then insert multiple new leaves into the same part of the node
4567 structure that @string_leaf had originally occupied.
4570 Let `string_leaf = Leaf(token.STRING, '"foo"')` and `N =
4571 string_leaf.parent`. Assume the node `N` has the following
4578 Leaf(STRING, '"foo"'),
4582 We then run the code snippet shown below.
4584 insert_str_child = insert_str_child_factory(string_leaf)
4586 lpar = Leaf(token.LPAR, '(')
4587 insert_str_child(lpar)
4589 bar = Leaf(token.STRING, '"bar"')
4590 insert_str_child(bar)
4592 rpar = Leaf(token.RPAR, ')')
4593 insert_str_child(rpar)
4596 After which point, it follows that `string_leaf.parent is None` and
4597 the node `N` now has the following structure:
4604 Leaf(STRING, '"bar"'),
4609 string_parent = string_leaf.parent
4610 string_child_idx = string_leaf.remove()
4612 def insert_str_child(child: LN) -> None:
4613 nonlocal string_child_idx
4615 assert string_parent is not None
4616 assert string_child_idx is not None
4618 string_parent.insert_child(string_child_idx, child)
4619 string_child_idx += 1
4621 return insert_str_child
4624 def has_triple_quotes(string: str) -> bool:
4627 True iff @string starts with three quotation characters.
4629 raw_string = string.lstrip(STRING_PREFIX_CHARS)
4630 return raw_string[:3] in {'"""', "'''"}
4633 def parent_type(node: Optional[LN]) -> Optional[NodeType]:
4636 @node.parent.type, if @node is not None and has a parent.
4640 if node is None or node.parent is None:
4643 return node.parent.type
4646 def is_empty_par(leaf: Leaf) -> bool:
4647 return is_empty_lpar(leaf) or is_empty_rpar(leaf)
4650 def is_empty_lpar(leaf: Leaf) -> bool:
4651 return leaf.type == token.LPAR and leaf.value == ""
4654 def is_empty_rpar(leaf: Leaf) -> bool:
4655 return leaf.type == token.RPAR and leaf.value == ""
4658 def is_valid_index_factory(seq: Sequence[Any]) -> Callable[[int], bool]:
4664 is_valid_index = is_valid_index_factory(my_list)
4666 assert is_valid_index(0)
4667 assert is_valid_index(2)
4669 assert not is_valid_index(3)
4670 assert not is_valid_index(-1)
4674 def is_valid_index(idx: int) -> bool:
4677 True iff @idx is positive AND seq[@idx] does NOT raise an
4680 return 0 <= idx < len(seq)
4682 return is_valid_index
4685 def line_to_string(line: Line) -> str:
4686 """Returns the string representation of @line.
4688 WARNING: This is known to be computationally expensive.
4690 return str(line).strip("\n")
4694 new_line: Line, old_line: Line, leaves: List[Leaf], preformatted: bool = False
4697 Append leaves (taken from @old_line) to @new_line, making sure to fix the
4698 underlying Node structure where appropriate.
4700 All of the leaves in @leaves are duplicated. The duplicates are then
4701 appended to @new_line and used to replace their originals in the underlying
4702 Node structure. Any comments attached to the old leaves are reattached to
4706 set(@leaves) is a subset of set(@old_line.leaves).
4708 for old_leaf in leaves:
4709 new_leaf = Leaf(old_leaf.type, old_leaf.value)
4710 replace_child(old_leaf, new_leaf)
4711 new_line.append(new_leaf, preformatted=preformatted)
4713 for comment_leaf in old_line.comments_after(old_leaf):
4714 new_line.append(comment_leaf, preformatted=True)
4717 def replace_child(old_child: LN, new_child: LN) -> None:
4720 * If @old_child.parent is set, replace @old_child with @new_child in
4721 @old_child's underlying Node structure.
4723 * Otherwise, this function does nothing.
4725 parent = old_child.parent
4729 child_idx = old_child.remove()
4730 if child_idx is not None:
4731 parent.insert_child(child_idx, new_child)
4734 def get_string_prefix(string: str) -> str:
4737 * assert_is_leaf_string(@string)
4740 @string's prefix (e.g. '', 'r', 'f', or 'rf').
4742 assert_is_leaf_string(string)
4746 while string[prefix_idx] in STRING_PREFIX_CHARS:
4747 prefix += string[prefix_idx].lower()
4753 def assert_is_leaf_string(string: str) -> None:
4755 Checks the pre-condition that @string has the format that you would expect
4756 of `leaf.value` where `leaf` is some Leaf such that `leaf.type ==
4757 token.STRING`. A more precise description of the pre-conditions that are
4758 checked are listed below.
4761 * @string starts with either ', ", <prefix>', or <prefix>" where
4762 `set(<prefix>)` is some subset of `set(STRING_PREFIX_CHARS)`.
4763 * @string ends with a quote character (' or ").
4766 AssertionError(...) if the pre-conditions listed above are not
4769 dquote_idx = string.find('"')
4770 squote_idx = string.find("'")
4771 if -1 in [dquote_idx, squote_idx]:
4772 quote_idx = max(dquote_idx, squote_idx)
4774 quote_idx = min(squote_idx, dquote_idx)
4777 0 <= quote_idx < len(string) - 1
4778 ), f"{string!r} is missing a starting quote character (' or \")."
4779 assert string[-1] in (
4782 ), f"{string!r} is missing an ending quote character (' or \")."
4783 assert set(string[:quote_idx]).issubset(
4784 set(STRING_PREFIX_CHARS)
4785 ), f"{set(string[:quote_idx])} is NOT a subset of {set(STRING_PREFIX_CHARS)}."
4788 def left_hand_split(line: Line, _features: Collection[Feature] = ()) -> Iterator[Line]:
4789 """Split line into many lines, starting with the first matching bracket pair.
4791 Note: this usually looks weird, only use this for function definitions.
4792 Prefer RHS otherwise. This is why this function is not symmetrical with
4793 :func:`right_hand_split` which also handles optional parentheses.
4795 tail_leaves: List[Leaf] = []
4796 body_leaves: List[Leaf] = []
4797 head_leaves: List[Leaf] = []
4798 current_leaves = head_leaves
4799 matching_bracket: Optional[Leaf] = None
4800 for leaf in line.leaves:
4802 current_leaves is body_leaves
4803 and leaf.type in CLOSING_BRACKETS
4804 and leaf.opening_bracket is matching_bracket
4806 current_leaves = tail_leaves if body_leaves else head_leaves
4807 current_leaves.append(leaf)
4808 if current_leaves is head_leaves:
4809 if leaf.type in OPENING_BRACKETS:
4810 matching_bracket = leaf
4811 current_leaves = body_leaves
4812 if not matching_bracket:
4813 raise CannotSplit("No brackets found")
4815 head = bracket_split_build_line(head_leaves, line, matching_bracket)
4816 body = bracket_split_build_line(body_leaves, line, matching_bracket, is_body=True)
4817 tail = bracket_split_build_line(tail_leaves, line, matching_bracket)
4818 bracket_split_succeeded_or_raise(head, body, tail)
4819 for result in (head, body, tail):
4824 def right_hand_split(
4827 features: Collection[Feature] = (),
4828 omit: Collection[LeafID] = (),
4829 ) -> Iterator[Line]:
4830 """Split line into many lines, starting with the last matching bracket pair.
4832 If the split was by optional parentheses, attempt splitting without them, too.
4833 `omit` is a collection of closing bracket IDs that shouldn't be considered for
4836 Note: running this function modifies `bracket_depth` on the leaves of `line`.
4838 tail_leaves: List[Leaf] = []
4839 body_leaves: List[Leaf] = []
4840 head_leaves: List[Leaf] = []
4841 current_leaves = tail_leaves
4842 opening_bracket: Optional[Leaf] = None
4843 closing_bracket: Optional[Leaf] = None
4844 for leaf in reversed(line.leaves):
4845 if current_leaves is body_leaves:
4846 if leaf is opening_bracket:
4847 current_leaves = head_leaves if body_leaves else tail_leaves
4848 current_leaves.append(leaf)
4849 if current_leaves is tail_leaves:
4850 if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
4851 opening_bracket = leaf.opening_bracket
4852 closing_bracket = leaf
4853 current_leaves = body_leaves
4854 if not (opening_bracket and closing_bracket and head_leaves):
4855 # If there is no opening or closing_bracket that means the split failed and
4856 # all content is in the tail. Otherwise, if `head_leaves` are empty, it means
4857 # the matching `opening_bracket` wasn't available on `line` anymore.
4858 raise CannotSplit("No brackets found")
4860 tail_leaves.reverse()
4861 body_leaves.reverse()
4862 head_leaves.reverse()
4863 head = bracket_split_build_line(head_leaves, line, opening_bracket)
4864 body = bracket_split_build_line(body_leaves, line, opening_bracket, is_body=True)
4865 tail = bracket_split_build_line(tail_leaves, line, opening_bracket)
4866 bracket_split_succeeded_or_raise(head, body, tail)
4868 Feature.FORCE_OPTIONAL_PARENTHESES not in features
4869 # the opening bracket is an optional paren
4870 and opening_bracket.type == token.LPAR
4871 and not opening_bracket.value
4872 # the closing bracket is an optional paren
4873 and closing_bracket.type == token.RPAR
4874 and not closing_bracket.value
4875 # it's not an import (optional parens are the only thing we can split on
4876 # in this case; attempting a split without them is a waste of time)
4877 and not line.is_import
4878 # there are no standalone comments in the body
4879 and not body.contains_standalone_comments(0)
4880 # and we can actually remove the parens
4881 and can_omit_invisible_parens(body, line_length, omit_on_explode=omit)
4883 omit = {id(closing_bracket), *omit}
4885 yield from right_hand_split(line, line_length, features=features, omit=omit)
4891 or is_line_short_enough(body, line_length=line_length)
4894 "Splitting failed, body is still too long and can't be split."
4897 elif head.contains_multiline_strings() or tail.contains_multiline_strings():
4899 "The current optional pair of parentheses is bound to fail to"
4900 " satisfy the splitting algorithm because the head or the tail"
4901 " contains multiline strings which by definition never fit one"
4905 ensure_visible(opening_bracket)
4906 ensure_visible(closing_bracket)
4907 for result in (head, body, tail):
4912 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
4913 """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
4915 Do nothing otherwise.
4917 A left- or right-hand split is based on a pair of brackets. Content before
4918 (and including) the opening bracket is left on one line, content inside the
4919 brackets is put on a separate line, and finally content starting with and
4920 following the closing bracket is put on a separate line.
4922 Those are called `head`, `body`, and `tail`, respectively. If the split
4923 produced the same line (all content in `head`) or ended up with an empty `body`
4924 and the `tail` is just the closing bracket, then it's considered failed.
4926 tail_len = len(str(tail).strip())
4929 raise CannotSplit("Splitting brackets produced the same line")
4933 f"Splitting brackets on an empty body to save {tail_len} characters is"
4938 def bracket_split_build_line(
4939 leaves: List[Leaf], original: Line, opening_bracket: Leaf, *, is_body: bool = False
4941 """Return a new line with given `leaves` and respective comments from `original`.
4943 If `is_body` is True, the result line is one-indented inside brackets and as such
4944 has its first leaf's prefix normalized and a trailing comma added when expected.
4946 result = Line(depth=original.depth)
4948 result.inside_brackets = True
4951 # Since body is a new indent level, remove spurious leading whitespace.
4952 normalize_prefix(leaves[0], inside_brackets=True)
4953 # Ensure a trailing comma for imports and standalone function arguments, but
4954 # be careful not to add one after any comments or within type annotations.
4957 and opening_bracket.value == "("
4958 and not any(leaf.type == token.COMMA for leaf in leaves)
4961 if original.is_import or no_commas:
4962 for i in range(len(leaves) - 1, -1, -1):
4963 if leaves[i].type == STANDALONE_COMMENT:
4966 if leaves[i].type != token.COMMA:
4967 new_comma = Leaf(token.COMMA, ",")
4968 leaves.insert(i + 1, new_comma)
4973 result.append(leaf, preformatted=True)
4974 for comment_after in original.comments_after(leaf):
4975 result.append(comment_after, preformatted=True)
4976 if is_body and should_split_body_explode(result, opening_bracket):
4977 result.should_explode = True
4981 def dont_increase_indentation(split_func: Transformer) -> Transformer:
4982 """Normalize prefix of the first leaf in every line returned by `split_func`.
4984 This is a decorator over relevant split functions.
4988 def split_wrapper(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]:
4989 for line in split_func(line, features):
4990 normalize_prefix(line.leaves[0], inside_brackets=True)
4993 return split_wrapper
4996 @dont_increase_indentation
4997 def delimiter_split(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]:
4998 """Split according to delimiters of the highest priority.
5000 If the appropriate Features are given, the split will add trailing commas
5001 also in function signatures and calls that contain `*` and `**`.
5004 last_leaf = line.leaves[-1]
5006 raise CannotSplit("Line empty")
5008 bt = line.bracket_tracker
5010 delimiter_priority = bt.max_delimiter_priority(exclude={id(last_leaf)})
5012 raise CannotSplit("No delimiters found")
5014 if delimiter_priority == DOT_PRIORITY:
5015 if bt.delimiter_count_with_priority(delimiter_priority) == 1:
5016 raise CannotSplit("Splitting a single attribute from its owner looks wrong")
5018 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
5019 lowest_depth = sys.maxsize
5020 trailing_comma_safe = True
5022 def append_to_line(leaf: Leaf) -> Iterator[Line]:
5023 """Append `leaf` to current line or to new line if appending impossible."""
5024 nonlocal current_line
5026 current_line.append_safe(leaf, preformatted=True)
5030 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
5031 current_line.append(leaf)
5033 for leaf in line.leaves:
5034 yield from append_to_line(leaf)
5036 for comment_after in line.comments_after(leaf):
5037 yield from append_to_line(comment_after)
5039 lowest_depth = min(lowest_depth, leaf.bracket_depth)
5040 if leaf.bracket_depth == lowest_depth:
5041 if is_vararg(leaf, within={syms.typedargslist}):
5042 trailing_comma_safe = (
5043 trailing_comma_safe and Feature.TRAILING_COMMA_IN_DEF in features
5045 elif is_vararg(leaf, within={syms.arglist, syms.argument}):
5046 trailing_comma_safe = (
5047 trailing_comma_safe and Feature.TRAILING_COMMA_IN_CALL in features
5050 leaf_priority = bt.delimiters.get(id(leaf))
5051 if leaf_priority == delimiter_priority:
5054 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
5058 and delimiter_priority == COMMA_PRIORITY
5059 and current_line.leaves[-1].type != token.COMMA
5060 and current_line.leaves[-1].type != STANDALONE_COMMENT
5062 new_comma = Leaf(token.COMMA, ",")
5063 current_line.append(new_comma)
5067 @dont_increase_indentation
5068 def standalone_comment_split(
5069 line: Line, features: Collection[Feature] = ()
5070 ) -> Iterator[Line]:
5071 """Split standalone comments from the rest of the line."""
5072 if not line.contains_standalone_comments(0):
5073 raise CannotSplit("Line does not have any standalone comments")
5075 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
5077 def append_to_line(leaf: Leaf) -> Iterator[Line]:
5078 """Append `leaf` to current line or to new line if appending impossible."""
5079 nonlocal current_line
5081 current_line.append_safe(leaf, preformatted=True)
5085 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
5086 current_line.append(leaf)
5088 for leaf in line.leaves:
5089 yield from append_to_line(leaf)
5091 for comment_after in line.comments_after(leaf):
5092 yield from append_to_line(comment_after)
5098 def is_import(leaf: Leaf) -> bool:
5099 """Return True if the given leaf starts an import statement."""
5106 (v == "import" and p and p.type == syms.import_name)
5107 or (v == "from" and p and p.type == syms.import_from)
5112 def is_type_comment(leaf: Leaf, suffix: str = "") -> bool:
5113 """Return True if the given leaf is a special comment.
5114 Only returns true for type comments for now."""
5117 return t in {token.COMMENT, STANDALONE_COMMENT} and v.startswith("# type:" + suffix)
5120 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
5121 """Leave existing extra newlines if not `inside_brackets`. Remove everything
5124 Note: don't use backslashes for formatting or you'll lose your voting rights.
5126 if not inside_brackets:
5127 spl = leaf.prefix.split("#")
5128 if "\\" not in spl[0]:
5129 nl_count = spl[-1].count("\n")
5132 leaf.prefix = "\n" * nl_count
5138 def normalize_string_prefix(leaf: Leaf, remove_u_prefix: bool = False) -> None:
5139 """Make all string prefixes lowercase.
5141 If remove_u_prefix is given, also removes any u prefix from the string.
5143 Note: Mutates its argument.
5145 match = re.match(r"^([" + STRING_PREFIX_CHARS + r"]*)(.*)$", leaf.value, re.DOTALL)
5146 assert match is not None, f"failed to match string {leaf.value!r}"
5147 orig_prefix = match.group(1)
5148 new_prefix = orig_prefix.replace("F", "f").replace("B", "b").replace("U", "u")
5150 new_prefix = new_prefix.replace("u", "")
5151 leaf.value = f"{new_prefix}{match.group(2)}"
5154 def normalize_string_quotes(leaf: Leaf) -> None:
5155 """Prefer double quotes but only if it doesn't cause more escaping.
5157 Adds or removes backslashes as appropriate. Doesn't parse and fix
5158 strings nested in f-strings (yet).
5160 Note: Mutates its argument.
5162 value = leaf.value.lstrip(STRING_PREFIX_CHARS)
5163 if value[:3] == '"""':
5166 elif value[:3] == "'''":
5169 elif value[0] == '"':
5175 first_quote_pos = leaf.value.find(orig_quote)
5176 if first_quote_pos == -1:
5177 return # There's an internal error
5179 prefix = leaf.value[:first_quote_pos]
5180 unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
5181 escaped_new_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
5182 escaped_orig_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
5183 body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
5184 if "r" in prefix.casefold():
5185 if unescaped_new_quote.search(body):
5186 # There's at least one unescaped new_quote in this raw string
5187 # so converting is impossible
5190 # Do not introduce or remove backslashes in raw strings
5193 # remove unnecessary escapes
5194 new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
5195 if body != new_body:
5196 # Consider the string without unnecessary escapes as the original
5198 leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
5199 new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
5200 new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
5201 if "f" in prefix.casefold():
5202 matches = re.findall(
5204 (?:[^{]|^)\{ # start of the string or a non-{ followed by a single {
5205 ([^{].*?) # contents of the brackets except if begins with {{
5206 \}(?:[^}]|$) # A } followed by end of the string or a non-}
5213 # Do not introduce backslashes in interpolated expressions
5216 if new_quote == '"""' and new_body[-1:] == '"':
5218 new_body = new_body[:-1] + '\\"'
5219 orig_escape_count = body.count("\\")
5220 new_escape_count = new_body.count("\\")
5221 if new_escape_count > orig_escape_count:
5222 return # Do not introduce more escaping
5224 if new_escape_count == orig_escape_count and orig_quote == '"':
5225 return # Prefer double quotes
5227 leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
5230 def normalize_numeric_literal(leaf: Leaf) -> None:
5231 """Normalizes numeric (float, int, and complex) literals.
5233 All letters used in the representation are normalized to lowercase (except
5234 in Python 2 long literals).
5236 text = leaf.value.lower()
5237 if text.startswith(("0o", "0b")):
5238 # Leave octal and binary literals alone.
5240 elif text.startswith("0x"):
5241 text = format_hex(text)
5243 text = format_scientific_notation(text)
5244 elif text.endswith(("j", "l")):
5245 text = format_long_or_complex_number(text)
5247 text = format_float_or_int_string(text)
5251 def format_hex(text: str) -> str:
5253 Formats a hexadecimal string like "0x12b3"
5255 Uses lowercase because of similarity between "B" and "8", which
5256 can cause security issues.
5257 see: https://github.com/psf/black/issues/1692
5260 before, after = text[:2], text[2:]
5261 return f"{before}{after.lower()}"
5264 def format_scientific_notation(text: str) -> str:
5265 """Formats a numeric string utilizing scentific notation"""
5266 before, after = text.split("e")
5268 if after.startswith("-"):
5271 elif after.startswith("+"):
5273 before = format_float_or_int_string(before)
5274 return f"{before}e{sign}{after}"
5277 def format_long_or_complex_number(text: str) -> str:
5278 """Formats a long or complex string like `10L` or `10j`"""
5281 # Capitalize in "2L" because "l" looks too similar to "1".
5284 return f"{format_float_or_int_string(number)}{suffix}"
5287 def format_float_or_int_string(text: str) -> str:
5288 """Formats a float string like "1.0"."""
5292 before, after = text.split(".")
5293 return f"{before or 0}.{after or 0}"
5296 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
5297 """Make existing optional parentheses invisible or create new ones.
5299 `parens_after` is a set of string leaf values immediately after which parens
5302 Standardizes on visible parentheses for single-element tuples, and keeps
5303 existing visible parentheses for other tuples and generator expressions.
5305 for pc in list_comments(node.prefix, is_endmarker=False):
5306 if pc.value in FMT_OFF:
5307 # This `node` has a prefix with `# fmt: off`, don't mess with parens.
5310 for index, child in enumerate(list(node.children)):
5311 # Fixes a bug where invisible parens are not properly stripped from
5312 # assignment statements that contain type annotations.
5313 if isinstance(child, Node) and child.type == syms.annassign:
5314 normalize_invisible_parens(child, parens_after=parens_after)
5316 # Add parentheses around long tuple unpacking in assignments.
5319 and isinstance(child, Node)
5320 and child.type == syms.testlist_star_expr
5325 if is_walrus_assignment(child):
5328 elif child.type == syms.atom:
5329 if maybe_make_parens_invisible_in_atom(child, parent=node):
5330 wrap_in_parentheses(node, child, visible=False)
5331 elif is_one_tuple(child):
5332 wrap_in_parentheses(node, child, visible=True)
5333 elif node.type == syms.import_from:
5334 # "import from" nodes store parentheses directly as part of
5336 if child.type == token.LPAR:
5337 # make parentheses invisible
5338 child.value = "" # type: ignore
5339 node.children[-1].value = "" # type: ignore
5340 elif child.type != token.STAR:
5341 # insert invisible parentheses
5342 node.insert_child(index, Leaf(token.LPAR, ""))
5343 node.append_child(Leaf(token.RPAR, ""))
5346 elif not (isinstance(child, Leaf) and is_multiline_string(child)):
5347 wrap_in_parentheses(node, child, visible=False)
5349 check_lpar = isinstance(child, Leaf) and child.value in parens_after
5352 def normalize_fmt_off(node: Node) -> None:
5353 """Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
5356 try_again = convert_one_fmt_off_pair(node)
5359 def convert_one_fmt_off_pair(node: Node) -> bool:
5360 """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
5362 Returns True if a pair was converted.
5364 for leaf in node.leaves():
5365 previous_consumed = 0
5366 for comment in list_comments(leaf.prefix, is_endmarker=False):
5367 if comment.value in FMT_OFF:
5368 # We only want standalone comments. If there's no previous leaf or
5369 # the previous leaf is indentation, it's a standalone comment in
5371 if comment.type != STANDALONE_COMMENT:
5372 prev = preceding_leaf(leaf)
5373 if prev and prev.type not in WHITESPACE:
5376 ignored_nodes = list(generate_ignored_nodes(leaf))
5377 if not ignored_nodes:
5380 first = ignored_nodes[0] # Can be a container node with the `leaf`.
5381 parent = first.parent
5382 prefix = first.prefix
5383 first.prefix = prefix[comment.consumed :]
5385 comment.value + "\n" + "".join(str(n) for n in ignored_nodes)
5387 if hidden_value.endswith("\n"):
5388 # That happens when one of the `ignored_nodes` ended with a NEWLINE
5389 # leaf (possibly followed by a DEDENT).
5390 hidden_value = hidden_value[:-1]
5391 first_idx: Optional[int] = None
5392 for ignored in ignored_nodes:
5393 index = ignored.remove()
5394 if first_idx is None:
5396 assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)"
5397 assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)"
5398 parent.insert_child(
5403 prefix=prefix[:previous_consumed] + "\n" * comment.newlines,
5408 previous_consumed = comment.consumed
5413 def generate_ignored_nodes(leaf: Leaf) -> Iterator[LN]:
5414 """Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
5416 Stops at the end of the block.
5418 container: Optional[LN] = container_of(leaf)
5419 while container is not None and container.type != token.ENDMARKER:
5420 if is_fmt_on(container):
5423 # fix for fmt: on in children
5424 if contains_fmt_on_at_column(container, leaf.column):
5425 for child in container.children:
5426 if contains_fmt_on_at_column(child, leaf.column):
5431 container = container.next_sibling
5434 def is_fmt_on(container: LN) -> bool:
5435 """Determine whether formatting is switched on within a container.
5436 Determined by whether the last `# fmt:` comment is `on` or `off`.
5439 for comment in list_comments(container.prefix, is_endmarker=False):
5440 if comment.value in FMT_ON:
5442 elif comment.value in FMT_OFF:
5447 def contains_fmt_on_at_column(container: LN, column: int) -> bool:
5448 """Determine if children at a given column have formatting switched on."""
5449 for child in container.children:
5451 isinstance(child, Node)
5452 and first_leaf_column(child) == column
5453 or isinstance(child, Leaf)
5454 and child.column == column
5456 if is_fmt_on(child):
5462 def first_leaf_column(node: Node) -> Optional[int]:
5463 """Returns the column of the first leaf child of a node."""
5464 for child in node.children:
5465 if isinstance(child, Leaf):
5470 def maybe_make_parens_invisible_in_atom(node: LN, parent: LN) -> bool:
5471 """If it's safe, make the parens in the atom `node` invisible, recursively.
5472 Additionally, remove repeated, adjacent invisible parens from the atom `node`
5473 as they are redundant.
5475 Returns whether the node should itself be wrapped in invisible parentheses.
5479 node.type != syms.atom
5480 or is_empty_tuple(node)
5481 or is_one_tuple(node)
5482 or (is_yield(node) and parent.type != syms.expr_stmt)
5483 or max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
5487 first = node.children[0]
5488 last = node.children[-1]
5489 if first.type == token.LPAR and last.type == token.RPAR:
5490 middle = node.children[1]
5491 # make parentheses invisible
5492 first.value = "" # type: ignore
5493 last.value = "" # type: ignore
5494 maybe_make_parens_invisible_in_atom(middle, parent=parent)
5496 if is_atom_with_invisible_parens(middle):
5497 # Strip the invisible parens from `middle` by replacing
5498 # it with the child in-between the invisible parens
5499 middle.replace(middle.children[1])
5506 def is_atom_with_invisible_parens(node: LN) -> bool:
5507 """Given a `LN`, determines whether it's an atom `node` with invisible
5508 parens. Useful in dedupe-ing and normalizing parens.
5510 if isinstance(node, Leaf) or node.type != syms.atom:
5513 first, last = node.children[0], node.children[-1]
5515 isinstance(first, Leaf)
5516 and first.type == token.LPAR
5517 and first.value == ""
5518 and isinstance(last, Leaf)
5519 and last.type == token.RPAR
5520 and last.value == ""
5524 def is_empty_tuple(node: LN) -> bool:
5525 """Return True if `node` holds an empty tuple."""
5527 node.type == syms.atom
5528 and len(node.children) == 2
5529 and node.children[0].type == token.LPAR
5530 and node.children[1].type == token.RPAR
5534 def unwrap_singleton_parenthesis(node: LN) -> Optional[LN]:
5535 """Returns `wrapped` if `node` is of the shape ( wrapped ).
5537 Parenthesis can be optional. Returns None otherwise"""
5538 if len(node.children) != 3:
5541 lpar, wrapped, rpar = node.children
5542 if not (lpar.type == token.LPAR and rpar.type == token.RPAR):
5548 def wrap_in_parentheses(parent: Node, child: LN, *, visible: bool = True) -> None:
5549 """Wrap `child` in parentheses.
5551 This replaces `child` with an atom holding the parentheses and the old
5552 child. That requires moving the prefix.
5554 If `visible` is False, the leaves will be valueless (and thus invisible).
5556 lpar = Leaf(token.LPAR, "(" if visible else "")
5557 rpar = Leaf(token.RPAR, ")" if visible else "")
5558 prefix = child.prefix
5560 index = child.remove() or 0
5561 new_child = Node(syms.atom, [lpar, child, rpar])
5562 new_child.prefix = prefix
5563 parent.insert_child(index, new_child)
5566 def is_one_tuple(node: LN) -> bool:
5567 """Return True if `node` holds a tuple with one element, with or without parens."""
5568 if node.type == syms.atom:
5569 gexp = unwrap_singleton_parenthesis(node)
5570 if gexp is None or gexp.type != syms.testlist_gexp:
5573 return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
5576 node.type in IMPLICIT_TUPLE
5577 and len(node.children) == 2
5578 and node.children[1].type == token.COMMA
5582 def is_walrus_assignment(node: LN) -> bool:
5583 """Return True iff `node` is of the shape ( test := test )"""
5584 inner = unwrap_singleton_parenthesis(node)
5585 return inner is not None and inner.type == syms.namedexpr_test
5588 def is_simple_decorator_trailer(node: LN, last: bool = False) -> bool:
5589 """Return True iff `node` is a trailer valid in a simple decorator"""
5590 return node.type == syms.trailer and (
5592 len(node.children) == 2
5593 and node.children[0].type == token.DOT
5594 and node.children[1].type == token.NAME
5596 # last trailer can be arguments
5599 and len(node.children) == 3
5600 and node.children[0].type == token.LPAR
5601 # and node.children[1].type == syms.argument
5602 and node.children[2].type == token.RPAR
5607 def is_simple_decorator_expression(node: LN) -> bool:
5608 """Return True iff `node` could be a 'dotted name' decorator
5610 This function takes the node of the 'namedexpr_test' of the new decorator
5611 grammar and test if it would be valid under the old decorator grammar.
5613 The old grammar was: decorator: @ dotted_name [arguments] NEWLINE
5614 The new grammar is : decorator: @ namedexpr_test NEWLINE
5616 if node.type == token.NAME:
5618 if node.type == syms.power:
5621 node.children[0].type == token.NAME
5622 and all(map(is_simple_decorator_trailer, node.children[1:-1]))
5624 len(node.children) < 2
5625 or is_simple_decorator_trailer(node.children[-1], last=True)
5631 def is_yield(node: LN) -> bool:
5632 """Return True if `node` holds a `yield` or `yield from` expression."""
5633 if node.type == syms.yield_expr:
5636 if node.type == token.NAME and node.value == "yield": # type: ignore
5639 if node.type != syms.atom:
5642 if len(node.children) != 3:
5645 lpar, expr, rpar = node.children
5646 if lpar.type == token.LPAR and rpar.type == token.RPAR:
5647 return is_yield(expr)
5652 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
5653 """Return True if `leaf` is a star or double star in a vararg or kwarg.
5655 If `within` includes VARARGS_PARENTS, this applies to function signatures.
5656 If `within` includes UNPACKING_PARENTS, it applies to right hand-side
5657 extended iterable unpacking (PEP 3132) and additional unpacking
5658 generalizations (PEP 448).
5660 if leaf.type not in VARARGS_SPECIALS or not leaf.parent:
5664 if p.type == syms.star_expr:
5665 # Star expressions are also used as assignment targets in extended
5666 # iterable unpacking (PEP 3132). See what its parent is instead.
5672 return p.type in within
5675 def is_multiline_string(leaf: Leaf) -> bool:
5676 """Return True if `leaf` is a multiline string that actually spans many lines."""
5677 return has_triple_quotes(leaf.value) and "\n" in leaf.value
5680 def is_stub_suite(node: Node) -> bool:
5681 """Return True if `node` is a suite with a stub body."""
5683 len(node.children) != 4
5684 or node.children[0].type != token.NEWLINE
5685 or node.children[1].type != token.INDENT
5686 or node.children[3].type != token.DEDENT
5690 return is_stub_body(node.children[2])
5693 def is_stub_body(node: LN) -> bool:
5694 """Return True if `node` is a simple statement containing an ellipsis."""
5695 if not isinstance(node, Node) or node.type != syms.simple_stmt:
5698 if len(node.children) != 2:
5701 child = node.children[0]
5703 child.type == syms.atom
5704 and len(child.children) == 3
5705 and all(leaf == Leaf(token.DOT, ".") for leaf in child.children)
5709 def max_delimiter_priority_in_atom(node: LN) -> Priority:
5710 """Return maximum delimiter priority inside `node`.
5712 This is specific to atoms with contents contained in a pair of parentheses.
5713 If `node` isn't an atom or there are no enclosing parentheses, returns 0.
5715 if node.type != syms.atom:
5718 first = node.children[0]
5719 last = node.children[-1]
5720 if not (first.type == token.LPAR and last.type == token.RPAR):
5723 bt = BracketTracker()
5724 for c in node.children[1:-1]:
5725 if isinstance(c, Leaf):
5728 for leaf in c.leaves():
5731 return bt.max_delimiter_priority()
5737 def ensure_visible(leaf: Leaf) -> None:
5738 """Make sure parentheses are visible.
5740 They could be invisible as part of some statements (see
5741 :func:`normalize_invisible_parens` and :func:`visit_import_from`).
5743 if leaf.type == token.LPAR:
5745 elif leaf.type == token.RPAR:
5749 def should_split_body_explode(line: Line, opening_bracket: Leaf) -> bool:
5750 """Should `line` be immediately split with `delimiter_split()` after RHS?"""
5752 if not (opening_bracket.parent and opening_bracket.value in "[{("):
5755 # We're essentially checking if the body is delimited by commas and there's more
5756 # than one of them (we're excluding the trailing comma and if the delimiter priority
5757 # is still commas, that means there's more).
5759 trailing_comma = False
5761 last_leaf = line.leaves[-1]
5762 if last_leaf.type == token.COMMA:
5763 trailing_comma = True
5764 exclude.add(id(last_leaf))
5765 max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude)
5766 except (IndexError, ValueError):
5769 return max_priority == COMMA_PRIORITY and (
5771 # always explode imports
5772 or opening_bracket.parent.type in {syms.atom, syms.import_from}
5776 def is_one_tuple_between(opening: Leaf, closing: Leaf, leaves: List[Leaf]) -> bool:
5777 """Return True if content between `opening` and `closing` looks like a one-tuple."""
5778 if opening.type != token.LPAR and closing.type != token.RPAR:
5781 depth = closing.bracket_depth + 1
5782 for _opening_index, leaf in enumerate(leaves):
5787 raise LookupError("Opening paren not found in `leaves`")
5791 for leaf in leaves[_opening_index:]:
5795 bracket_depth = leaf.bracket_depth
5796 if bracket_depth == depth and leaf.type == token.COMMA:
5798 if leaf.parent and leaf.parent.type in {
5808 def get_features_used(node: Node) -> Set[Feature]:
5809 """Return a set of (relatively) new Python features used in this file.
5811 Currently looking for:
5813 - underscores in numeric literals;
5814 - trailing commas after * or ** in function signatures and calls;
5815 - positional only arguments in function signatures and lambdas;
5816 - assignment expression;
5817 - relaxed decorator syntax;
5819 features: Set[Feature] = set()
5820 for n in node.pre_order():
5821 if n.type == token.STRING:
5822 value_head = n.value[:2] # type: ignore
5823 if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
5824 features.add(Feature.F_STRINGS)
5826 elif n.type == token.NUMBER:
5827 if "_" in n.value: # type: ignore
5828 features.add(Feature.NUMERIC_UNDERSCORES)
5830 elif n.type == token.SLASH:
5831 if n.parent and n.parent.type in {syms.typedargslist, syms.arglist}:
5832 features.add(Feature.POS_ONLY_ARGUMENTS)
5834 elif n.type == token.COLONEQUAL:
5835 features.add(Feature.ASSIGNMENT_EXPRESSIONS)
5837 elif n.type == syms.decorator:
5838 if len(n.children) > 1 and not is_simple_decorator_expression(
5841 features.add(Feature.RELAXED_DECORATORS)
5844 n.type in {syms.typedargslist, syms.arglist}
5846 and n.children[-1].type == token.COMMA
5848 if n.type == syms.typedargslist:
5849 feature = Feature.TRAILING_COMMA_IN_DEF
5851 feature = Feature.TRAILING_COMMA_IN_CALL
5853 for ch in n.children:
5854 if ch.type in STARS:
5855 features.add(feature)
5857 if ch.type == syms.argument:
5858 for argch in ch.children:
5859 if argch.type in STARS:
5860 features.add(feature)
5865 def detect_target_versions(node: Node) -> Set[TargetVersion]:
5866 """Detect the version to target based on the nodes used."""
5867 features = get_features_used(node)
5869 version for version in TargetVersion if features <= VERSION_TO_FEATURES[version]
5873 def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[LeafID]]:
5874 """Generate sets of closing bracket IDs that should be omitted in a RHS.
5876 Brackets can be omitted if the entire trailer up to and including
5877 a preceding closing bracket fits in one line.
5879 Yielded sets are cumulative (contain results of previous yields, too). First
5880 set is empty, unless the line should explode, in which case bracket pairs until
5881 the one that needs to explode are omitted.
5884 omit: Set[LeafID] = set()
5885 if not line.should_explode:
5888 length = 4 * line.depth
5889 opening_bracket: Optional[Leaf] = None
5890 closing_bracket: Optional[Leaf] = None
5891 inner_brackets: Set[LeafID] = set()
5892 for index, leaf, leaf_length in enumerate_with_length(line, reversed=True):
5893 length += leaf_length
5894 if length > line_length:
5897 has_inline_comment = leaf_length > len(leaf.value) + len(leaf.prefix)
5898 if leaf.type == STANDALONE_COMMENT or has_inline_comment:
5902 if leaf is opening_bracket:
5903 opening_bracket = None
5904 elif leaf.type in CLOSING_BRACKETS:
5905 prev = line.leaves[index - 1] if index > 0 else None
5909 and prev.type == token.COMMA
5910 and not is_one_tuple_between(
5911 leaf.opening_bracket, leaf, line.leaves
5914 # Never omit bracket pairs with trailing commas.
5915 # We need to explode on those.
5918 inner_brackets.add(id(leaf))
5919 elif leaf.type in CLOSING_BRACKETS:
5920 prev = line.leaves[index - 1] if index > 0 else None
5921 if prev and prev.type in OPENING_BRACKETS:
5922 # Empty brackets would fail a split so treat them as "inner"
5923 # brackets (e.g. only add them to the `omit` set if another
5924 # pair of brackets was good enough.
5925 inner_brackets.add(id(leaf))
5929 omit.add(id(closing_bracket))
5930 omit.update(inner_brackets)
5931 inner_brackets.clear()
5937 and prev.type == token.COMMA
5938 and not is_one_tuple_between(leaf.opening_bracket, leaf, line.leaves)
5940 # Never omit bracket pairs with trailing commas.
5941 # We need to explode on those.
5945 opening_bracket = leaf.opening_bracket
5946 closing_bracket = leaf
5949 def get_future_imports(node: Node) -> Set[str]:
5950 """Return a set of __future__ imports in the file."""
5951 imports: Set[str] = set()
5953 def get_imports_from_children(children: List[LN]) -> Generator[str, None, None]:
5954 for child in children:
5955 if isinstance(child, Leaf):
5956 if child.type == token.NAME:
5959 elif child.type == syms.import_as_name:
5960 orig_name = child.children[0]
5961 assert isinstance(orig_name, Leaf), "Invalid syntax parsing imports"
5962 assert orig_name.type == token.NAME, "Invalid syntax parsing imports"
5963 yield orig_name.value
5965 elif child.type == syms.import_as_names:
5966 yield from get_imports_from_children(child.children)
5969 raise AssertionError("Invalid syntax parsing imports")
5971 for child in node.children:
5972 if child.type != syms.simple_stmt:
5975 first_child = child.children[0]
5976 if isinstance(first_child, Leaf):
5977 # Continue looking if we see a docstring; otherwise stop.
5979 len(child.children) == 2
5980 and first_child.type == token.STRING
5981 and child.children[1].type == token.NEWLINE
5987 elif first_child.type == syms.import_from:
5988 module_name = first_child.children[1]
5989 if not isinstance(module_name, Leaf) or module_name.value != "__future__":
5992 imports |= set(get_imports_from_children(first_child.children[3:]))
6000 def get_gitignore(root: Path) -> PathSpec:
6001 """ Return a PathSpec matching gitignore content if present."""
6002 gitignore = root / ".gitignore"
6003 lines: List[str] = []
6004 if gitignore.is_file():
6005 with gitignore.open() as gf:
6006 lines = gf.readlines()
6007 return PathSpec.from_lines("gitwildmatch", lines)
6010 def normalize_path_maybe_ignore(
6011 path: Path, root: Path, report: "Report"
6013 """Normalize `path`. May return `None` if `path` was ignored.
6015 `report` is where "path ignored" output goes.
6018 abspath = path if path.is_absolute() else Path.cwd() / path
6019 normalized_path = abspath.resolve().relative_to(root).as_posix()
6020 except OSError as e:
6021 report.path_ignored(path, f"cannot be read because {e}")
6025 if path.is_symlink():
6026 report.path_ignored(path, f"is a symbolic link that points outside {root}")
6031 return normalized_path
6034 def gen_python_files(
6035 paths: Iterable[Path],
6037 include: Optional[Pattern[str]],
6038 exclude: Pattern[str],
6039 force_exclude: Optional[Pattern[str]],
6041 gitignore: PathSpec,
6042 ) -> Iterator[Path]:
6043 """Generate all files under `path` whose paths are not excluded by the
6044 `exclude_regex` or `force_exclude` regexes, but are included by the `include` regex.
6046 Symbolic links pointing outside of the `root` directory are ignored.
6048 `report` is where output about exclusions goes.
6050 assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
6052 normalized_path = normalize_path_maybe_ignore(child, root, report)
6053 if normalized_path is None:
6056 # First ignore files matching .gitignore
6057 if gitignore.match_file(normalized_path):
6058 report.path_ignored(child, "matches the .gitignore file content")
6061 # Then ignore with `--exclude` and `--force-exclude` options.
6062 normalized_path = "/" + normalized_path
6064 normalized_path += "/"
6066 exclude_match = exclude.search(normalized_path) if exclude else None
6067 if exclude_match and exclude_match.group(0):
6068 report.path_ignored(child, "matches the --exclude regular expression")
6071 force_exclude_match = (
6072 force_exclude.search(normalized_path) if force_exclude else None
6074 if force_exclude_match and force_exclude_match.group(0):
6075 report.path_ignored(child, "matches the --force-exclude regular expression")
6079 yield from gen_python_files(
6089 elif child.is_file():
6090 include_match = include.search(normalized_path) if include else True
6096 def find_project_root(srcs: Iterable[str]) -> Path:
6097 """Return a directory containing .git, .hg, or pyproject.toml.
6099 That directory will be a common parent of all files and directories
6102 If no directory in the tree contains a marker that would specify it's the
6103 project root, the root of the file system is returned.
6106 return Path("/").resolve()
6108 path_srcs = [Path(Path.cwd(), src).resolve() for src in srcs]
6110 # A list of lists of parents for each 'src'. 'src' is included as a
6111 # "parent" of itself if it is a directory
6113 list(path.parents) + ([path] if path.is_dir() else []) for path in path_srcs
6117 set.intersection(*(set(parents) for parents in src_parents)),
6118 key=lambda path: path.parts,
6121 for directory in (common_base, *common_base.parents):
6122 if (directory / ".git").exists():
6125 if (directory / ".hg").is_dir():
6128 if (directory / "pyproject.toml").is_file():
6136 """Provides a reformatting counter. Can be rendered with `str(report)`."""
6141 verbose: bool = False
6142 change_count: int = 0
6144 failure_count: int = 0
6146 def done(self, src: Path, changed: Changed) -> None:
6147 """Increment the counter for successful reformatting. Write out a message."""
6148 if changed is Changed.YES:
6149 reformatted = "would reformat" if self.check or self.diff else "reformatted"
6150 if self.verbose or not self.quiet:
6151 out(f"{reformatted} {src}")
6152 self.change_count += 1
6155 if changed is Changed.NO:
6156 msg = f"{src} already well formatted, good job."
6158 msg = f"{src} wasn't modified on disk since last run."
6159 out(msg, bold=False)
6160 self.same_count += 1
6162 def failed(self, src: Path, message: str) -> None:
6163 """Increment the counter for failed reformatting. Write out a message."""
6164 err(f"error: cannot format {src}: {message}")
6165 self.failure_count += 1
6167 def path_ignored(self, path: Path, message: str) -> None:
6169 out(f"{path} ignored: {message}", bold=False)
6172 def return_code(self) -> int:
6173 """Return the exit code that the app should use.
6175 This considers the current state of changed files and failures:
6176 - if there were any failures, return 123;
6177 - if any files were changed and --check is being used, return 1;
6178 - otherwise return 0.
6180 # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
6181 # 126 we have special return codes reserved by the shell.
6182 if self.failure_count:
6185 elif self.change_count and self.check:
6190 def __str__(self) -> str:
6191 """Render a color report of the current state.
6193 Use `click.unstyle` to remove colors.
6195 if self.check or self.diff:
6196 reformatted = "would be reformatted"
6197 unchanged = "would be left unchanged"
6198 failed = "would fail to reformat"
6200 reformatted = "reformatted"
6201 unchanged = "left unchanged"
6202 failed = "failed to reformat"
6204 if self.change_count:
6205 s = "s" if self.change_count > 1 else ""
6207 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
6210 s = "s" if self.same_count > 1 else ""
6211 report.append(f"{self.same_count} file{s} {unchanged}")
6212 if self.failure_count:
6213 s = "s" if self.failure_count > 1 else ""
6215 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
6217 return ", ".join(report) + "."
6220 def parse_ast(src: str) -> Union[ast.AST, ast3.AST, ast27.AST]:
6221 filename = "<unknown>"
6222 if sys.version_info >= (3, 8):
6223 # TODO: support Python 4+ ;)
6224 for minor_version in range(sys.version_info[1], 4, -1):
6226 return ast.parse(src, filename, feature_version=(3, minor_version))
6230 for feature_version in (7, 6):
6232 return ast3.parse(src, filename, feature_version=feature_version)
6236 return ast27.parse(src)
6239 def _fixup_ast_constants(
6240 node: Union[ast.AST, ast3.AST, ast27.AST]
6241 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
6242 """Map ast nodes deprecated in 3.8 to Constant."""
6243 if isinstance(node, (ast.Str, ast3.Str, ast27.Str, ast.Bytes, ast3.Bytes)):
6244 return ast.Constant(value=node.s)
6246 if isinstance(node, (ast.Num, ast3.Num, ast27.Num)):
6247 return ast.Constant(value=node.n)
6249 if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
6250 return ast.Constant(value=node.value)
6256 node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0
6258 """Simple visitor generating strings to compare ASTs by content."""
6260 node = _fixup_ast_constants(node)
6262 yield f"{' ' * depth}{node.__class__.__name__}("
6264 for field in sorted(node._fields): # noqa: F402
6265 # TypeIgnore has only one field 'lineno' which breaks this comparison
6266 type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore)
6267 if sys.version_info >= (3, 8):
6268 type_ignore_classes += (ast.TypeIgnore,)
6269 if isinstance(node, type_ignore_classes):
6273 value = getattr(node, field)
6274 except AttributeError:
6277 yield f"{' ' * (depth+1)}{field}="
6279 if isinstance(value, list):
6281 # Ignore nested tuples within del statements, because we may insert
6282 # parentheses and they change the AST.
6285 and isinstance(node, (ast.Delete, ast3.Delete, ast27.Delete))
6286 and isinstance(item, (ast.Tuple, ast3.Tuple, ast27.Tuple))
6288 for item in item.elts:
6289 yield from _stringify_ast(item, depth + 2)
6291 elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)):
6292 yield from _stringify_ast(item, depth + 2)
6294 elif isinstance(value, (ast.AST, ast3.AST, ast27.AST)):
6295 yield from _stringify_ast(value, depth + 2)
6298 # Constant strings may be indented across newlines, if they are
6299 # docstrings; fold spaces after newlines when comparing. Similarly,
6300 # trailing and leading space may be removed.
6302 isinstance(node, ast.Constant)
6303 and field == "value"
6304 and isinstance(value, str)
6306 normalized = re.sub(r" *\n[ \t]*", "\n", value).strip()
6309 yield f"{' ' * (depth+2)}{normalized!r}, # {value.__class__.__name__}"
6311 yield f"{' ' * depth}) # /{node.__class__.__name__}"
6314 def assert_equivalent(src: str, dst: str) -> None:
6315 """Raise AssertionError if `src` and `dst` aren't equivalent."""
6317 src_ast = parse_ast(src)
6318 except Exception as exc:
6319 raise AssertionError(
6320 "cannot use --safe with this file; failed to parse source file. AST"
6321 f" error message: {exc}"
6325 dst_ast = parse_ast(dst)
6326 except Exception as exc:
6327 log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
6328 raise AssertionError(
6329 f"INTERNAL ERROR: Black produced invalid code: {exc}. Please report a bug"
6330 " on https://github.com/psf/black/issues. This invalid output might be"
6334 src_ast_str = "\n".join(_stringify_ast(src_ast))
6335 dst_ast_str = "\n".join(_stringify_ast(dst_ast))
6336 if src_ast_str != dst_ast_str:
6337 log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
6338 raise AssertionError(
6339 "INTERNAL ERROR: Black produced code that is not equivalent to the"
6340 " source. Please report a bug on https://github.com/psf/black/issues. "
6341 f" This diff might be helpful: {log}"
6345 def assert_stable(src: str, dst: str, mode: Mode) -> None:
6346 """Raise AssertionError if `dst` reformats differently the second time."""
6347 newdst = format_str(dst, mode=mode)
6351 diff(src, dst, "source", "first pass"),
6352 diff(dst, newdst, "first pass", "second pass"),
6354 raise AssertionError(
6355 "INTERNAL ERROR: Black produced different code on the second pass of the"
6356 " formatter. Please report a bug on https://github.com/psf/black/issues."
6357 f" This diff might be helpful: {log}"
6361 @mypyc_attr(patchable=True)
6362 def dump_to_file(*output: str) -> str:
6363 """Dump `output` to a temporary file. Return path to the file."""
6364 with tempfile.NamedTemporaryFile(
6365 mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
6367 for lines in output:
6369 if lines and lines[-1] != "\n":
6375 def nullcontext() -> Iterator[None]:
6376 """Return an empty context manager.
6378 To be used like `nullcontext` in Python 3.7.
6383 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
6384 """Return a unified diff string between strings `a` and `b`."""
6387 a_lines = [line + "\n" for line in a.splitlines()]
6388 b_lines = [line + "\n" for line in b.splitlines()]
6390 difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
6394 def cancel(tasks: Iterable["asyncio.Task[Any]"]) -> None:
6395 """asyncio signal handler that cancels all `tasks` and reports to stderr."""
6401 def shutdown(loop: asyncio.AbstractEventLoop) -> None:
6402 """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
6404 if sys.version_info[:2] >= (3, 7):
6405 all_tasks = asyncio.all_tasks
6407 all_tasks = asyncio.Task.all_tasks
6408 # This part is borrowed from asyncio/runners.py in Python 3.7b2.
6409 to_cancel = [task for task in all_tasks(loop) if not task.done()]
6413 for task in to_cancel:
6415 loop.run_until_complete(
6416 asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
6419 # `concurrent.futures.Future` objects cannot be cancelled once they
6420 # are already running. There might be some when the `shutdown()` happened.
6421 # Silence their logger's spew about the event loop being closed.
6422 cf_logger = logging.getLogger("concurrent.futures")
6423 cf_logger.setLevel(logging.CRITICAL)
6427 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
6428 """Replace `regex` with `replacement` twice on `original`.
6430 This is used by string normalization to perform replaces on
6431 overlapping matches.
6433 return regex.sub(replacement, regex.sub(replacement, original))
6436 def re_compile_maybe_verbose(regex: str) -> Pattern[str]:
6437 """Compile a regular expression string in `regex`.
6439 If it contains newlines, use verbose mode.
6442 regex = "(?x)" + regex
6443 compiled: Pattern[str] = re.compile(regex)
6447 def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]:
6448 """Like `reversed(enumerate(sequence))` if that were possible."""
6449 index = len(sequence) - 1
6450 for element in reversed(sequence):
6451 yield (index, element)
6455 def enumerate_with_length(
6456 line: Line, reversed: bool = False
6457 ) -> Iterator[Tuple[Index, Leaf, int]]:
6458 """Return an enumeration of leaves with their length.
6460 Stops prematurely on multiline strings and standalone comments.
6463 Callable[[Sequence[Leaf]], Iterator[Tuple[Index, Leaf]]],
6464 enumerate_reversed if reversed else enumerate,
6466 for index, leaf in op(line.leaves):
6467 length = len(leaf.prefix) + len(leaf.value)
6468 if "\n" in leaf.value:
6469 return # Multiline strings, we can't continue.
6471 for comment in line.comments_after(leaf):
6472 length += len(comment.value)
6474 yield index, leaf, length
6477 def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
6478 """Return True if `line` is no longer than `line_length`.
6480 Uses the provided `line_str` rendering, if any, otherwise computes a new one.
6483 line_str = line_to_string(line)
6485 len(line_str) <= line_length
6486 and "\n" not in line_str # multiline strings
6487 and not line.contains_standalone_comments()
6491 def can_be_split(line: Line) -> bool:
6492 """Return False if the line cannot be split *for sure*.
6494 This is not an exhaustive search but a cheap heuristic that we can use to
6495 avoid some unfortunate formattings (mostly around wrapping unsplittable code
6496 in unnecessary parentheses).
6498 leaves = line.leaves
6502 if leaves[0].type == token.STRING and leaves[1].type == token.DOT:
6506 for leaf in leaves[-2::-1]:
6507 if leaf.type in OPENING_BRACKETS:
6508 if next.type not in CLOSING_BRACKETS:
6512 elif leaf.type == token.DOT:
6514 elif leaf.type == token.NAME:
6515 if not (next.type == token.DOT or next.type in OPENING_BRACKETS):
6518 elif leaf.type not in CLOSING_BRACKETS:
6521 if dot_count > 1 and call_count > 1:
6527 def can_omit_invisible_parens(
6530 omit_on_explode: Collection[LeafID] = (),
6532 """Does `line` have a shape safe to reformat without optional parens around it?
6534 Returns True for only a subset of potentially nice looking formattings but
6535 the point is to not return false positives that end up producing lines that
6538 bt = line.bracket_tracker
6539 if not bt.delimiters:
6540 # Without delimiters the optional parentheses are useless.
6543 max_priority = bt.max_delimiter_priority()
6544 if bt.delimiter_count_with_priority(max_priority) > 1:
6545 # With more than one delimiter of a kind the optional parentheses read better.
6548 if max_priority == DOT_PRIORITY:
6549 # A single stranded method call doesn't require optional parentheses.
6552 assert len(line.leaves) >= 2, "Stranded delimiter"
6554 # With a single delimiter, omit if the expression starts or ends with
6556 first = line.leaves[0]
6557 second = line.leaves[1]
6558 if first.type in OPENING_BRACKETS and second.type not in CLOSING_BRACKETS:
6559 if _can_omit_opening_paren(line, first=first, line_length=line_length):
6562 # Note: we are not returning False here because a line might have *both*
6563 # a leading opening bracket and a trailing closing bracket. If the
6564 # opening bracket doesn't match our rule, maybe the closing will.
6566 penultimate = line.leaves[-2]
6567 last = line.leaves[-1]
6568 if line.should_explode:
6570 penultimate, last = last_two_except(line.leaves, omit=omit_on_explode)
6572 # Turns out we'd omit everything. We cannot skip the optional parentheses.
6576 last.type == token.RPAR
6577 or last.type == token.RBRACE
6579 # don't use indexing for omitting optional parentheses;
6581 last.type == token.RSQB
6583 and last.parent.type != syms.trailer
6586 if penultimate.type in OPENING_BRACKETS:
6587 # Empty brackets don't help.
6590 if is_multiline_string(first):
6591 # Additional wrapping of a multiline string in this situation is
6595 if line.should_explode and penultimate.type == token.COMMA:
6596 # The rightmost non-omitted bracket pair is the one we want to explode on.
6599 if _can_omit_closing_paren(line, last=last, line_length=line_length):
6605 def _can_omit_opening_paren(line: Line, *, first: Leaf, line_length: int) -> bool:
6606 """See `can_omit_invisible_parens`."""
6608 length = 4 * line.depth
6610 for _index, leaf, leaf_length in enumerate_with_length(line):
6611 if leaf.type in CLOSING_BRACKETS and leaf.opening_bracket is first:
6614 length += leaf_length
6615 if length > line_length:
6618 if leaf.type in OPENING_BRACKETS:
6619 # There are brackets we can further split on.
6623 # checked the entire string and line length wasn't exceeded
6624 if len(line.leaves) == _index + 1:
6630 def _can_omit_closing_paren(line: Line, *, last: Leaf, line_length: int) -> bool:
6631 """See `can_omit_invisible_parens`."""
6632 length = 4 * line.depth
6633 seen_other_brackets = False
6634 for _index, leaf, leaf_length in enumerate_with_length(line):
6635 length += leaf_length
6636 if leaf is last.opening_bracket:
6637 if seen_other_brackets or length <= line_length:
6640 elif leaf.type in OPENING_BRACKETS:
6641 # There are brackets we can further split on.
6642 seen_other_brackets = True
6647 def last_two_except(leaves: List[Leaf], omit: Collection[LeafID]) -> Tuple[Leaf, Leaf]:
6648 """Return (penultimate, last) leaves skipping brackets in `omit` and contents."""
6651 for leaf in reversed(leaves):
6653 if leaf is stop_after:
6660 if id(leaf) in omit:
6661 stop_after = leaf.opening_bracket
6665 raise LookupError("Last two leaves were also skipped")
6668 def run_transformer(
6670 transform: Transformer,
6672 features: Collection[Feature],
6677 line_str = line_to_string(line)
6678 result: List[Line] = []
6679 for transformed_line in transform(line, features):
6680 if str(transformed_line).strip("\n") == line_str:
6681 raise CannotTransform("Line transformer returned an unchanged result")
6683 result.extend(transform_line(transformed_line, mode=mode, features=features))
6686 transform.__name__ == "rhs"
6687 and line.bracket_tracker.invisible
6688 and not any(bracket.value for bracket in line.bracket_tracker.invisible)
6689 and not line.contains_multiline_strings()
6690 and not result[0].contains_uncollapsable_type_comments()
6691 and not result[0].contains_unsplittable_type_ignore()
6692 and not is_line_short_enough(result[0], line_length=mode.line_length)
6696 line_copy = line.clone()
6697 append_leaves(line_copy, line, line.leaves)
6698 features_fop = set(features) | {Feature.FORCE_OPTIONAL_PARENTHESES}
6699 second_opinion = run_transformer(
6700 line_copy, transform, mode, features_fop, line_str=line_str
6703 is_line_short_enough(ln, line_length=mode.line_length) for ln in second_opinion
6705 result = second_opinion
6709 def get_cache_file(mode: Mode) -> Path:
6710 return CACHE_DIR / f"cache.{mode.get_cache_key()}.pickle"
6713 def read_cache(mode: Mode) -> Cache:
6714 """Read the cache if it exists and is well formed.
6716 If it is not well formed, the call to write_cache later should resolve the issue.
6718 cache_file = get_cache_file(mode)
6719 if not cache_file.exists():
6722 with cache_file.open("rb") as fobj:
6724 cache: Cache = pickle.load(fobj)
6725 except (pickle.UnpicklingError, ValueError):
6731 def get_cache_info(path: Path) -> CacheInfo:
6732 """Return the information used to check if a file is already formatted or not."""
6734 return stat.st_mtime, stat.st_size
6737 def filter_cached(cache: Cache, sources: Iterable[Path]) -> Tuple[Set[Path], Set[Path]]:
6738 """Split an iterable of paths in `sources` into two sets.
6740 The first contains paths of files that modified on disk or are not in the
6741 cache. The other contains paths to non-modified files.
6743 todo, done = set(), set()
6746 if cache.get(src) != get_cache_info(src):
6753 def write_cache(cache: Cache, sources: Iterable[Path], mode: Mode) -> None:
6754 """Update the cache file."""
6755 cache_file = get_cache_file(mode)
6757 CACHE_DIR.mkdir(parents=True, exist_ok=True)
6758 new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
6759 with tempfile.NamedTemporaryFile(dir=str(cache_file.parent), delete=False) as f:
6760 pickle.dump(new_cache, f, protocol=4)
6761 os.replace(f.name, cache_file)
6766 def patch_click() -> None:
6767 """Make Click not crash.
6769 On certain misconfigured environments, Python 3 selects the ASCII encoding as the
6770 default which restricts paths that it can access during the lifetime of the
6771 application. Click refuses to work in this scenario by raising a RuntimeError.
6773 In case of Black the likelihood that non-ASCII characters are going to be used in
6774 file paths is minimal since it's Python source code. Moreover, this crash was
6775 spurious on Python 3.7 thanks to PEP 538 and PEP 540.
6778 from click import core
6779 from click import _unicodefun # type: ignore
6780 except ModuleNotFoundError:
6783 for module in (core, _unicodefun):
6784 if hasattr(module, "_verify_python3_env"):
6785 module._verify_python3_env = lambda: None
6788 def patched_main() -> None:
6794 def is_docstring(leaf: Leaf) -> bool:
6795 if not is_multiline_string(leaf):
6796 # For the purposes of docstring re-indentation, we don't need to do anything
6797 # with single-line docstrings.
6800 if prev_siblings_are(
6801 leaf.parent, [None, token.NEWLINE, token.INDENT, syms.simple_stmt]
6805 # Multiline docstring on the same line as the `def`.
6806 if prev_siblings_are(leaf.parent, [syms.parameters, token.COLON, syms.simple_stmt]):
6807 # `syms.parameters` is only used in funcdefs and async_funcdefs in the Python
6808 # grammar. We're safe to return True without further checks.
6814 def lines_with_leading_tabs_expanded(s: str) -> List[str]:
6816 Splits string into lines and expands only leading tabs (following the normal
6820 for line in s.splitlines():
6821 # Find the index of the first non-whitespace character after a string of
6822 # whitespace that includes at least one tab
6823 match = re.match(r"\s*\t+\s*(\S)", line)
6825 first_non_whitespace_idx = match.start(1)
6828 line[:first_non_whitespace_idx].expandtabs()
6829 + line[first_non_whitespace_idx:]
6836 def fix_docstring(docstring: str, prefix: str) -> str:
6837 # https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation
6840 lines = lines_with_leading_tabs_expanded(docstring)
6841 # Determine minimum indentation (first line doesn't count):
6842 indent = sys.maxsize
6843 for line in lines[1:]:
6844 stripped = line.lstrip()
6846 indent = min(indent, len(line) - len(stripped))
6847 # Remove indentation (first line is special):
6848 trimmed = [lines[0].strip()]
6849 if indent < sys.maxsize:
6850 last_line_idx = len(lines) - 2
6851 for i, line in enumerate(lines[1:]):
6852 stripped_line = line[indent:].rstrip()
6853 if stripped_line or i == last_line_idx:
6854 trimmed.append(prefix + stripped_line)
6857 return "\n".join(trimmed)
6860 if __name__ == "__main__":