X-Git-Url: https://git.madduck.net/etc/vim.git/blobdiff_plain/b73ec93fa71ae8980d0743555a9f49f46528e6e1..07c046b7cd0e4ced8f1713c8b8fa9fb0190cf472:/black.py diff --git a/black.py b/black.py index 65545e9..e55e4fe 100644 --- a/black.py +++ b/black.py @@ -37,12 +37,15 @@ from typing import ( Union, cast, ) +from typing_extensions import Final +from mypy_extensions import mypyc_attr from appdirs import user_cache_dir -from attr import dataclass, evolve, Factory +from dataclasses import dataclass, field, replace import click import toml from typed_ast import ast3, ast27 +from pathspec import PathSpec # lib2to3 fork from blib2to3.pytree import Node, Leaf, type_repr @@ -51,7 +54,7 @@ from blib2to3.pgen2 import driver, token from blib2to3.pgen2.grammar import Grammar from blib2to3.pgen2.parse import ParseError -from _version import version as __version__ +from _black_version import version as __version__ DEFAULT_LINE_LENGTH = 88 DEFAULT_EXCLUDES = r"/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist)/" # noqa: B950 @@ -183,8 +186,8 @@ VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = { @dataclass -class FileMode: - target_versions: Set[TargetVersion] = Factory(set) +class Mode: + target_versions: Set[TargetVersion] = field(default_factory=set) line_length: int = DEFAULT_LINE_LENGTH string_normalization: bool = True is_pyi: bool = False @@ -206,30 +209,46 @@ class FileMode: return ".".join(parts) +# Legacy name, left for integrations. +FileMode = Mode + + def supports_feature(target_versions: Set[TargetVersion], feature: Feature) -> bool: return all(feature in VERSION_TO_FEATURES[version] for version in target_versions) +def find_pyproject_toml(path_search_start: str) -> Optional[str]: + """Find the absolute filepath to a pyproject.toml if it exists""" + path_project_root = find_project_root(path_search_start) + path_pyproject_toml = path_project_root / "pyproject.toml" + return str(path_pyproject_toml) if path_pyproject_toml.is_file() else None + + +def parse_pyproject_toml(path_config: str) -> Dict[str, Any]: + """Parse a pyproject toml file, pulling out relevant parts for Black + + If parsing fails, will raise a toml.TomlDecodeError + """ + pyproject_toml = toml.load(path_config) + config = pyproject_toml.get("tool", {}).get("black", {}) + return {k.replace("--", "").replace("-", "_"): v for k, v in config.items()} + + def read_pyproject_toml( - ctx: click.Context, param: click.Parameter, value: Union[str, int, bool, None] + ctx: click.Context, param: click.Parameter, value: Optional[str] ) -> Optional[str]: """Inject Black configuration from "pyproject.toml" into defaults in `ctx`. Returns the path to a successfully found and read configuration file, None otherwise. """ - assert not isinstance(value, (int, bool)), "Invalid parameter type passed" if not value: - root = find_project_root(ctx.params.get("src", ())) - path = root / "pyproject.toml" - if path.is_file(): - value = str(path) - else: + value = find_pyproject_toml(ctx.params.get("src", ())) + if value is None: return None try: - pyproject_toml = toml.load(value) - config = pyproject_toml.get("tool", {}).get("black", {}) + config = parse_pyproject_toml(value) except (toml.TomlDecodeError, OSError) as e: raise click.FileError( filename=value, hint=f"Error reading configuration file: {e}" @@ -238,14 +257,26 @@ def read_pyproject_toml( if not config: return None - if ctx.default_map is None: - ctx.default_map = {} - ctx.default_map.update( # type: ignore # bad types in .pyi - {k.replace("--", "").replace("-", "_"): v for k, v in config.items()} - ) + default_map: Dict[str, Any] = {} + if ctx.default_map: + default_map.update(ctx.default_map) + default_map.update(config) + + ctx.default_map = default_map return value +def target_version_option_callback( + c: click.Context, p: Union[click.Option, click.Parameter], v: Tuple[str, ...] +) -> List[TargetVersion]: + """Compute the target versions from a --target-version flag. + + This is its own function because mypy couldn't infer the type correctly + when it was a lambda, causing mypyc trouble. + """ + return [TargetVersion[val.upper()] for val in v] + + @click.command(context_settings=dict(help_option_names=["-h", "--help"])) @click.option("-c", "--code", type=str, help="Format the code passed in as a string.") @click.option( @@ -260,7 +291,7 @@ def read_pyproject_toml( "-t", "--target-version", type=click.Choice([v.name.lower() for v in TargetVersion]), - callback=lambda c, p, v: [TargetVersion[val.upper()] for val in v], + callback=target_version_option_callback, multiple=True, help=( "Python versions that should be supported by Black's output. [default: " @@ -365,7 +396,12 @@ def read_pyproject_toml( @click.option( "--config", type=click.Path( - exists=False, file_okay=True, dir_okay=False, readable=True, allow_dash=False + exists=True, + file_okay=True, + dir_okay=False, + readable=True, + allow_dash=False, + path_type=str, ), is_eager=True, callback=read_pyproject_toml, @@ -387,14 +423,14 @@ def main( verbose: bool, include: str, exclude: str, - src: Tuple[str], + src: Tuple[str, ...], config: Optional[str], ) -> None: """The uncompromising code formatter.""" write_back = WriteBack.from_configuration(check=check, diff=diff) if target_version: if py36: - err(f"Cannot use both --target-version and --py36") + err("Cannot use both --target-version and --py36") ctx.exit(2) else: versions = set(target_version) @@ -407,7 +443,7 @@ def main( else: # We'll autodetect later. versions = set() - mode = FileMode( + mode = Mode( target_versions=versions, line_length=line_length, is_pyi=pyi, @@ -428,7 +464,7 @@ def main( except re.error: err(f"Invalid regular expression for exclude given: {exclude!r}") ctx.exit(2) - report = Report(check=check, quiet=quiet, verbose=verbose) + report = Report(check=check, diff=diff, quiet=quiet, verbose=verbose) root = find_project_root(src) sources: Set[Path] = set() path_empty(src, quiet, verbose, ctx) @@ -436,7 +472,9 @@ def main( p = Path(s) if p.is_dir(): sources.update( - gen_python_files_in_dir(p, root, include_regex, exclude_regex, report) + gen_python_files_in_dir( + p, root, include_regex, exclude_regex, report, get_gitignore(root) + ) ) elif p.is_file() or s == "-": # if a file was explicitly given, we don't care about its extension @@ -467,7 +505,9 @@ def main( ctx.exit(report.return_code) -def path_empty(src: Tuple[str], quiet: bool, verbose: bool, ctx: click.Context) -> None: +def path_empty( + src: Tuple[str, ...], quiet: bool, verbose: bool, ctx: click.Context +) -> None: """ Exit if there is no `src` provided for formatting """ @@ -478,7 +518,7 @@ def path_empty(src: Tuple[str], quiet: bool, verbose: bool, ctx: click.Context) def reformat_one( - src: Path, fast: bool, write_back: WriteBack, mode: FileMode, report: "Report" + src: Path, fast: bool, write_back: WriteBack, mode: Mode, report: "Report" ) -> None: """Reformat a single file under `src` without spawning child processes. @@ -511,11 +551,7 @@ def reformat_one( def reformat_many( - sources: Set[Path], - fast: bool, - write_back: WriteBack, - mode: FileMode, - report: "Report", + sources: Set[Path], fast: bool, write_back: WriteBack, mode: Mode, report: "Report" ) -> None: """Reformat multiple files using a ProcessPoolExecutor.""" loop = asyncio.get_event_loop() @@ -545,7 +581,7 @@ async def schedule_formatting( sources: Set[Path], fast: bool, write_back: WriteBack, - mode: FileMode, + mode: Mode, report: "Report", loop: asyncio.AbstractEventLoop, executor: Executor, @@ -582,7 +618,7 @@ async def schedule_formatting( ): src for src in sorted(sources) } - pending: Iterable[asyncio.Future] = tasks.keys() + pending: Iterable["asyncio.Future[bool]"] = tasks.keys() try: loop.add_signal_handler(signal.SIGINT, cancel, pending) loop.add_signal_handler(signal.SIGTERM, cancel, pending) @@ -615,7 +651,7 @@ async def schedule_formatting( def format_file_in_place( src: Path, fast: bool, - mode: FileMode, + mode: Mode, write_back: WriteBack = WriteBack.NO, lock: Any = None, # multiprocessing.Manager().Lock() is some crazy proxy ) -> bool: @@ -626,7 +662,7 @@ def format_file_in_place( `mode` and `fast` options are passed to :func:`format_file_contents`. """ if src.suffix == ".pyi": - mode = evolve(mode, is_pyi=True) + mode = replace(mode, is_pyi=True) then = datetime.utcfromtimestamp(src.stat().st_mtime) with open(src, "rb") as buf: @@ -636,10 +672,10 @@ def format_file_in_place( except NothingChanged: return False - if write_back == write_back.YES: + if write_back == WriteBack.YES: with open(src, "w", encoding=encoding, newline=newline) as f: f.write(dst_contents) - elif write_back == write_back.DIFF: + elif write_back == WriteBack.DIFF: now = datetime.utcnow() src_name = f"{src}\t{then} +0000" dst_name = f"{src}\t{now} +0000" @@ -659,7 +695,7 @@ def format_file_in_place( def format_stdin_to_stdout( - fast: bool, *, write_back: WriteBack = WriteBack.NO, mode: FileMode + fast: bool, *, write_back: WriteBack = WriteBack.NO, mode: Mode ) -> bool: """Format file on stdin. Return True if changed. @@ -691,9 +727,7 @@ def format_stdin_to_stdout( f.detach() -def format_file_contents( - src_contents: str, *, fast: bool, mode: FileMode -) -> FileContent: +def format_file_contents(src_contents: str, *, fast: bool, mode: Mode) -> FileContent: """Reformat contents a file and return new contents. If `fast` is False, additionally confirm that the reformatted code is @@ -713,11 +747,34 @@ def format_file_contents( return dst_contents -def format_str(src_contents: str, *, mode: FileMode) -> FileContent: +def format_str(src_contents: str, *, mode: Mode) -> FileContent: """Reformat a string and return new contents. `mode` determines formatting options, such as how many characters per line are - allowed. + allowed. Example: + + >>> import black + >>> print(black.format_str("def f(arg:str='')->None:...", mode=Mode())) + def f(arg: str = "") -> None: + ... + + A more complex example: + >>> print( + ... black.format_str( + ... "def f(arg:str='')->None: hey", + ... mode=black.Mode( + ... target_versions={black.TargetVersion.PY36}, + ... line_length=10, + ... string_normalization=False, + ... is_pyi=False, + ... ), + ... ), + ... ) + def f( + arg: str = '', + ) -> None: + hey + """ src_node = lib2to3_parse(src_contents.lstrip(), mode.target_versions) dst_contents = [] @@ -742,11 +799,9 @@ def format_str(src_contents: str, *, mode: FileMode) -> FileContent: if supports_feature(versions, feature) } for current_line in lines.visit(src_node): - for _ in range(after): - dst_contents.append(str(empty_line)) + dst_contents.append(str(empty_line) * after) before, after = elt.maybe_empty_lines(current_line) - for _ in range(before): - dst_contents.append(str(empty_line)) + dst_contents.append(str(empty_line) * before) for line in split_line( current_line, line_length=mode.line_length, features=split_line_features ): @@ -784,7 +839,8 @@ def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]: # Python 2.7 pygram.python_grammar, ] - elif all(version.is_python2() for version in target_versions): + + if all(version.is_python2() for version in target_versions): # Python 2-only code, so try Python 2 grammars. return [ # Python 2.7 with future print_function import @@ -792,21 +848,21 @@ def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]: # Python 2.7 pygram.python_grammar, ] - else: - # Python 3-compatible code, so only try Python 3 grammar. - grammars = [] - # If we have to parse both, try to parse async as a keyword first - if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS): - # Python 3.7+ - grammars.append( - pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords # noqa: B950 - ) - if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS): - # Python 3.0-3.6 - grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement) - # At least one of the above branches must have been taken, because every Python - # version has exactly one of the two 'ASYNC_*' flags - return grammars + + # Python 3-compatible code, so only try Python 3 grammar. + grammars = [] + # If we have to parse both, try to parse async as a keyword first + if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS): + # Python 3.7+ + grammars.append( + pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords + ) + if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS): + # Python 3.0-3.6 + grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement) + # At least one of the above branches must have been taken, because every Python + # version has exactly one of the two 'ASYNC_*' flags + return grammars def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node: @@ -861,8 +917,16 @@ class Visitor(Generic[T]): if node.type < 256: name = token.tok_name[node.type] else: - name = type_repr(node.type) - yield from getattr(self, f"visit_{name}", self.visit_default)(node) + name = str(type_repr(node.type)) + # We explicitly branch on whether a visitor exists (instead of + # using self.visit_default as the default arg to getattr) in order + # to save needing to create a bound method object and so mypyc can + # generate a native call to visit_default. + visitf = getattr(self, f"visit_{name}", None) + if visitf: + yield from visitf(node) + else: + yield from self.visit_default(node) def visit_default(self, node: LN) -> Iterator[T]: """Default `visit_*()` implementation. Recurses to children of `node`.""" @@ -907,8 +971,8 @@ class DebugVisitor(Visitor[T]): list(v.visit(code)) -WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE} -STATEMENT = { +WHITESPACE: Final = {token.DEDENT, token.INDENT, token.NEWLINE} +STATEMENT: Final = { syms.if_stmt, syms.while_stmt, syms.for_stmt, @@ -918,10 +982,10 @@ STATEMENT = { syms.funcdef, syms.classdef, } -STANDALONE_COMMENT = 153 +STANDALONE_COMMENT: Final = 153 token.tok_name[STANDALONE_COMMENT] = "STANDALONE_COMMENT" -LOGIC_OPERATORS = {"and", "or"} -COMPARATORS = { +LOGIC_OPERATORS: Final = {"and", "or"} +COMPARATORS: Final = { token.LESS, token.GREATER, token.EQEQUAL, @@ -929,7 +993,7 @@ COMPARATORS = { token.LESSEQUAL, token.GREATEREQUAL, } -MATH_OPERATORS = { +MATH_OPERATORS: Final = { token.VBAR, token.CIRCUMFLEX, token.AMPER, @@ -945,23 +1009,23 @@ MATH_OPERATORS = { token.TILDE, token.DOUBLESTAR, } -STARS = {token.STAR, token.DOUBLESTAR} -VARARGS_SPECIALS = STARS | {token.SLASH} -VARARGS_PARENTS = { +STARS: Final = {token.STAR, token.DOUBLESTAR} +VARARGS_SPECIALS: Final = STARS | {token.SLASH} +VARARGS_PARENTS: Final = { syms.arglist, syms.argument, # double star in arglist syms.trailer, # single argument to call syms.typedargslist, syms.varargslist, # lambdas } -UNPACKING_PARENTS = { +UNPACKING_PARENTS: Final = { syms.atom, # single element of a list or set literal syms.dictsetmaker, syms.listmaker, syms.testlist_gexp, syms.testlist_star_expr, } -TEST_DESCENDANTS = { +TEST_DESCENDANTS: Final = { syms.test, syms.lambdef, syms.or_test, @@ -978,7 +1042,7 @@ TEST_DESCENDANTS = { syms.term, syms.power, } -ASSIGNMENTS = { +ASSIGNMENTS: Final = { "=", "+=", "-=", @@ -994,13 +1058,13 @@ ASSIGNMENTS = { "**=", "//=", } -COMPREHENSION_PRIORITY = 20 -COMMA_PRIORITY = 18 -TERNARY_PRIORITY = 16 -LOGIC_PRIORITY = 14 -STRING_PRIORITY = 12 -COMPARATOR_PRIORITY = 10 -MATH_PRIORITIES = { +COMPREHENSION_PRIORITY: Final = 20 +COMMA_PRIORITY: Final = 18 +TERNARY_PRIORITY: Final = 16 +LOGIC_PRIORITY: Final = 14 +STRING_PRIORITY: Final = 12 +COMPARATOR_PRIORITY: Final = 10 +MATH_PRIORITIES: Final = { token.VBAR: 9, token.CIRCUMFLEX: 8, token.AMPER: 7, @@ -1016,7 +1080,7 @@ MATH_PRIORITIES = { token.TILDE: 3, token.DOUBLESTAR: 2, } -DOT_PRIORITY = 1 +DOT_PRIORITY: Final = 1 @dataclass @@ -1024,11 +1088,11 @@ class BracketTracker: """Keeps track of brackets on a line.""" depth: int = 0 - bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict) - delimiters: Dict[LeafID, Priority] = Factory(dict) + bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = field(default_factory=dict) + delimiters: Dict[LeafID, Priority] = field(default_factory=dict) previous: Optional[Leaf] = None - _for_loop_depths: List[int] = Factory(list) - _lambda_argument_depths: List[int] = Factory(list) + _for_loop_depths: List[int] = field(default_factory=list) + _lambda_argument_depths: List[int] = field(default_factory=list) def mark(self, leaf: Leaf) -> None: """Mark `leaf` with bracket-related metadata. Keep track of delimiters. @@ -1156,9 +1220,10 @@ class Line: """Holds leaves and comments. Can be printed with `str(line)`.""" depth: int = 0 - leaves: List[Leaf] = Factory(list) - comments: Dict[LeafID, List[Leaf]] = Factory(dict) # keys ordered like `leaves` - bracket_tracker: BracketTracker = Factory(BracketTracker) + leaves: List[Leaf] = field(default_factory=list) + # keys ordered like `leaves` + comments: Dict[LeafID, List[Leaf]] = field(default_factory=dict) + bracket_tracker: BracketTracker = field(default_factory=BracketTracker) inside_brackets: bool = False should_explode: bool = False @@ -1246,6 +1311,7 @@ class Line: """ if not self.leaves or len(self.leaves) < 4: return False + # Look for and address a trailing colon. if self.leaves[-1].type == token.COLON: closer = self.leaves[-2] @@ -1255,6 +1321,7 @@ class Line: close_index = -1 if closer.type not in CLOSING_BRACKETS or self.inside_brackets: return False + if closer.type == token.RPAR: # Tuples require an extra check, because if there's only # one element in the tuple removing the comma unmakes the @@ -1269,9 +1336,11 @@ class Line: for _open_index, leaf in enumerate(self.leaves): if leaf is opener: break + else: # Couldn't find the matching opening paren, play it safe. return False + commas = 0 comma_depth = self.leaves[close_index - 1].bracket_depth for leaf in self.leaves[_open_index + 1 : close_index]: @@ -1281,16 +1350,20 @@ class Line: # We haven't looked yet for the trailing comma because # we might also have caught noop parens. return self.leaves[close_index - 1].type == token.COMMA + elif commas == 1: return False # it's either a one-tuple or didn't have a trailing comma + if self.leaves[close_index - 1].type in CLOSING_BRACKETS: close_index -= 1 closer = self.leaves[close_index] if closer.type == token.RPAR: # TODO: this is a gut feeling. Will we ever see this? return False + if self.leaves[close_index - 1].type != token.COMMA: return False + return True @property @@ -1340,9 +1413,9 @@ class Line: def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool: """If so, needs to be split before emitting.""" for leaf in self.leaves: - if leaf.type == STANDALONE_COMMENT: - if leaf.bracket_depth <= depth_limit: - return True + if leaf.type == STANDALONE_COMMENT and leaf.bracket_depth <= depth_limit: + return True + return False def contains_uncollapsable_type_comments(self) -> bool: @@ -1371,7 +1444,10 @@ class Line: for leaf_id, comments in self.comments.items(): for comment in comments: if is_type_comment(comment): - if leaf_id not in ignored_ids or comment_seen: + if comment_seen or ( + not is_type_comment(comment, " ignore") + and leaf_id not in ignored_ids + ): return True comment_seen = True @@ -1410,16 +1486,13 @@ class Line: return False def contains_multiline_strings(self) -> bool: - for leaf in self.leaves: - if is_multiline_string(leaf): - return True - - return False + return any(is_multiline_string(leaf) for leaf in self.leaves) def maybe_remove_trailing_comma(self, closing: Leaf) -> bool: """Remove trailing comma if there is one and it's safe.""" if not (self.leaves and self.leaves[-1].type == token.COMMA): return False + # We remove trailing commas only in the case of importing a # single name from a module. if not ( @@ -1484,6 +1557,7 @@ class Line: comment.type = STANDALONE_COMMENT comment.prefix = "" return False + last_leaf = self.leaves[-2] self.comments.setdefault(id(last_leaf), []).append(comment) return True @@ -1551,7 +1625,7 @@ class EmptyLineTracker: is_pyi: bool = False previous_line: Optional[Line] = None previous_after: int = 0 - previous_defs: List[int] = Factory(list) + previous_defs: List[int] = field(default_factory=list) def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]: """Return the number of extra empty lines before and after the `current_line`. @@ -1665,7 +1739,7 @@ class LineGenerator(Visitor[Line]): is_pyi: bool = False normalize_strings: bool = True - current_line: Line = Factory(Line) + current_line: Line = field(default_factory=Line) remove_u_prefix: bool = False def line(self, indent: int = 0) -> Iterator[Line]: @@ -1714,46 +1788,13 @@ class LineGenerator(Visitor[Line]): self.current_line.append(node) yield from super().visit_default(node) - def visit_atom(self, node: Node) -> Iterator[Line]: - # Always make parentheses invisible around a single node, because it should - # not be needed (except in the case of yield, where removing the parentheses - # produces a SyntaxError). - if ( - len(node.children) == 3 - and isinstance(node.children[0], Leaf) - and node.children[0].type == token.LPAR - and isinstance(node.children[2], Leaf) - and node.children[2].type == token.RPAR - and isinstance(node.children[1], Leaf) - and not ( - node.children[1].type == token.NAME - and node.children[1].value == "yield" - ) - ): - node.children[0].value = "" - node.children[2].value = "" - yield from super().visit_default(node) - - def visit_factor(self, node: Node) -> Iterator[Line]: - """Force parentheses between a unary op and a binary power: - - -2 ** 8 -> -(2 ** 8) - """ - child = node.children[1] - if child.type == syms.power and len(child.children) == 3: - lpar = Leaf(token.LPAR, "(") - rpar = Leaf(token.RPAR, ")") - index = child.remove() or 0 - node.insert_child(index, Node(syms.atom, [lpar, child, rpar])) - yield from self.visit_default(node) - - def visit_INDENT(self, node: Node) -> Iterator[Line]: + def visit_INDENT(self, node: Leaf) -> Iterator[Line]: """Increase indentation level, maybe yield a line.""" # In blib2to3 INDENT never holds comments. yield from self.line(+1) yield from self.visit_default(node) - def visit_DEDENT(self, node: Node) -> Iterator[Line]: + def visit_DEDENT(self, node: Leaf) -> Iterator[Line]: """Decrease indentation level, maybe yield a line.""" # The current line might still wait for trailing comments. At DEDENT time # there won't be any (they would be prefixes on the preceding NEWLINE). @@ -1846,7 +1887,24 @@ class LineGenerator(Visitor[Line]): yield from self.line() yield from self.visit_default(leaf) - def __attrs_post_init__(self) -> None: + def visit_factor(self, node: Node) -> Iterator[Line]: + """Force parentheses between a unary op and a binary power: + + -2 ** 8 -> -(2 ** 8) + """ + _operator, operand = node.children + if ( + operand.type == syms.power + and len(operand.children) == 3 + and operand.children[1].type == token.DOUBLESTAR + ): + lpar = Leaf(token.LPAR, "(") + rpar = Leaf(token.RPAR, ")") + index = operand.remove() or 0 + node.insert_child(index, Node(syms.atom, [lpar, operand, rpar])) + yield from self.visit_default(node) + + def __post_init__(self) -> None: """You are in a twisty little maze of passages.""" v = self.visit_stmt Ø: Set[str] = set() @@ -2419,7 +2477,9 @@ def split_line( # All splits failed, best effort split with no omits. # This mostly happens to multiline strings that are by definition # reported as not fitting a single line. - yield from right_hand_split(line, line_length, features=features) + # line_length=1 here was historically a bug that somehow became a feature. + # See #762 and #781 for the full story. + yield from right_hand_split(line, line_length=1, features=features) if line.inside_brackets: split_funcs = [delimiter_split, standalone_comment_split, rhs] @@ -2462,7 +2522,7 @@ def left_hand_split(line: Line, features: Collection[Feature] = ()) -> Iterator[ body_leaves: List[Leaf] = [] head_leaves: List[Leaf] = [] current_leaves = head_leaves - matching_bracket = None + matching_bracket: Optional[Leaf] = None for leaf in line.leaves: if ( current_leaves is body_leaves @@ -2505,8 +2565,8 @@ def right_hand_split( body_leaves: List[Leaf] = [] head_leaves: List[Leaf] = [] current_leaves = tail_leaves - opening_bracket = None - closing_bracket = None + opening_bracket: Optional[Leaf] = None + closing_bracket: Optional[Leaf] = None for leaf in reversed(line.leaves): if current_leaves is body_leaves: if leaf is opening_bracket: @@ -2618,20 +2678,22 @@ def bracket_split_build_line( # Since body is a new indent level, remove spurious leading whitespace. normalize_prefix(leaves[0], inside_brackets=True) # Ensure a trailing comma for imports and standalone function arguments, but - # be careful not to add one after any comments. - no_commas = original.is_def and not any( - l.type == token.COMMA for l in leaves + # be careful not to add one after any comments or within type annotations. + no_commas = ( + original.is_def + and opening_bracket.value == "(" + and not any(l.type == token.COMMA for l in leaves) ) if original.is_import or no_commas: for i in range(len(leaves) - 1, -1, -1): if leaves[i].type == STANDALONE_COMMENT: continue - elif leaves[i].type == token.COMMA: - break - else: + + if leaves[i].type != token.COMMA: leaves.insert(i + 1, Leaf(token.COMMA, ",")) - break + break + # Populate the line for leaf in leaves: result.append(leaf, preformatted=True) @@ -2777,9 +2839,7 @@ def is_type_comment(leaf: Leaf, suffix: str = "") -> bool: Only returns true for type comments for now.""" t = leaf.type v = leaf.value - return t in {token.COMMENT, t == STANDALONE_COMMENT} and v.startswith( - "# type:" + suffix - ) + return t in {token.COMMENT, STANDALONE_COMMENT} and v.startswith("# type:" + suffix) def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None: @@ -2810,7 +2870,7 @@ def normalize_string_prefix(leaf: Leaf, remove_u_prefix: bool = False) -> None: match = re.match(r"^([furbFURB]*)(.*)$", leaf.value, re.DOTALL) assert match is not None, f"failed to match string {leaf.value!r}" orig_prefix = match.group(1) - new_prefix = orig_prefix.lower() + new_prefix = orig_prefix.replace("F", "f").replace("B", "b").replace("U", "u") if remove_u_prefix: new_prefix = new_prefix.replace("u", "") leaf.value = f"{new_prefix}{match.group(2)}" @@ -2877,6 +2937,7 @@ def normalize_string_quotes(leaf: Leaf) -> None: if "\\" in str(m): # Do not introduce backslashes in interpolated expressions return + if new_quote == '"""' and new_body[-1:] == '"': # edge case: new_body = new_body[:-1] + '\\"' @@ -2949,7 +3010,6 @@ def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None: if pc.value in FMT_OFF: # This `node` has a prefix with `# fmt: off`, don't mess with parens. return - check_lpar = False for index, child in enumerate(list(node.children)): # Add parentheses around long tuple unpacking in assignments. @@ -2963,26 +3023,12 @@ def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None: if check_lpar: if is_walrus_assignment(child): continue - if child.type == syms.atom: - # Determines if the underlying atom should be surrounded with - # invisible params - also makes parens invisible recursively - # within the atom and removes repeated invisible parens within - # the atom - should_surround_with_parens = maybe_make_parens_invisible_in_atom( - child, parent=node - ) - if should_surround_with_parens: - lpar = Leaf(token.LPAR, "") - rpar = Leaf(token.RPAR, "") - index = child.remove() or 0 - node.insert_child(index, Node(syms.atom, [lpar, child, rpar])) + if child.type == syms.atom: + if maybe_make_parens_invisible_in_atom(child, parent=node): + wrap_in_parentheses(node, child, visible=False) elif is_one_tuple(child): - # wrap child in visible parentheses - lpar = Leaf(token.LPAR, "(") - rpar = Leaf(token.RPAR, ")") - child.remove() - node.insert_child(index, Node(syms.atom, [lpar, child, rpar])) + wrap_in_parentheses(node, child, visible=True) elif node.type == syms.import_from: # "import from" nodes store parentheses directly as part of # the statement @@ -2997,15 +3043,7 @@ def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None: break elif not (isinstance(child, Leaf) and is_multiline_string(child)): - # wrap child in invisible parentheses - lpar = Leaf(token.LPAR, "") - rpar = Leaf(token.RPAR, "") - index = child.remove() or 0 - prefix = child.prefix - child.prefix = "" - new_child = Node(syms.atom, [lpar, child, rpar]) - new_child.prefix = prefix - node.insert_child(index, new_child) + wrap_in_parentheses(node, child, visible=False) check_lpar = isinstance(child, Leaf) and child.value in parens_after @@ -3049,7 +3087,7 @@ def convert_one_fmt_off_pair(node: Node) -> bool: # That happens when one of the `ignored_nodes` ended with a NEWLINE # leaf (possibly followed by a DEDENT). hidden_value = hidden_value[:-1] - first_idx = None + first_idx: Optional[int] = None for ignored in ignored_nodes: index = ignored.remove() if first_idx is None: @@ -3078,13 +3116,49 @@ def generate_ignored_nodes(leaf: Leaf) -> Iterator[LN]: """ container: Optional[LN] = container_of(leaf) while container is not None and container.type != token.ENDMARKER: - for comment in list_comments(container.prefix, is_endmarker=False): - if comment.value in FMT_ON: - return + if fmt_on(container): + return + + # fix for fmt: on in children + if contains_fmt_on_at_column(container, leaf.column): + for child in container.children: + if contains_fmt_on_at_column(child, leaf.column): + return + yield child + else: + yield container + container = container.next_sibling + + +def fmt_on(container: LN) -> bool: + is_fmt_on = False + for comment in list_comments(container.prefix, is_endmarker=False): + if comment.value in FMT_ON: + is_fmt_on = True + elif comment.value in FMT_OFF: + is_fmt_on = False + return is_fmt_on - yield container - container = container.next_sibling +def contains_fmt_on_at_column(container: LN, column: int) -> bool: + for child in container.children: + if ( + isinstance(child, Node) + and first_leaf_column(child) == column + or isinstance(child, Leaf) + and child.column == column + ): + if fmt_on(child): + return True + + return False + + +def first_leaf_column(node: Node) -> Optional[int]: + for child in node.children: + if isinstance(child, Leaf): + return child.column + return None def maybe_make_parens_invisible_in_atom(node: LN, parent: LN) -> bool: @@ -3157,6 +3231,7 @@ def unwrap_singleton_parenthesis(node: LN) -> Optional[LN]: Parenthesis can be optional. Returns None otherwise""" if len(node.children) != 3: return None + lpar, wrapped, rpar = node.children if not (lpar.type == token.LPAR and rpar.type == token.RPAR): return None @@ -3164,6 +3239,24 @@ def unwrap_singleton_parenthesis(node: LN) -> Optional[LN]: return wrapped +def wrap_in_parentheses(parent: Node, child: LN, *, visible: bool = True) -> None: + """Wrap `child` in parentheses. + + This replaces `child` with an atom holding the parentheses and the old + child. That requires moving the prefix. + + If `visible` is False, the leaves will be valueless (and thus invisible). + """ + lpar = Leaf(token.LPAR, "(" if visible else "") + rpar = Leaf(token.RPAR, ")" if visible else "") + prefix = child.prefix + child.prefix = "" + index = child.remove() or 0 + new_child = Node(syms.atom, [lpar, child, rpar]) + new_child.prefix = prefix + parent.insert_child(index, new_child) + + def is_one_tuple(node: LN) -> bool: """Return True if `node` holds a tuple with one element, with or without parens.""" if node.type == syms.atom: @@ -3396,8 +3489,8 @@ def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[Leaf yield omit length = 4 * line.depth - opening_bracket = None - closing_bracket = None + opening_bracket: Optional[Leaf] = None + closing_bracket: Optional[Leaf] = None inner_brackets: Set[LeafID] = set() for index, leaf, leaf_length in enumerate_with_length(line, reversed=True): length += leaf_length @@ -3441,19 +3534,23 @@ def get_future_imports(node: Node) -> Set[str]: if isinstance(child, Leaf): if child.type == token.NAME: yield child.value + elif child.type == syms.import_as_name: orig_name = child.children[0] assert isinstance(orig_name, Leaf), "Invalid syntax parsing imports" assert orig_name.type == token.NAME, "Invalid syntax parsing imports" yield orig_name.value + elif child.type == syms.import_as_names: yield from get_imports_from_children(child.children) + else: raise AssertionError("Invalid syntax parsing imports") for child in node.children: if child.type != syms.simple_stmt: break + first_child = child.children[0] if isinstance(first_child, Leaf): # Continue looking if we see a docstring; otherwise stop. @@ -3463,24 +3560,39 @@ def get_future_imports(node: Node) -> Set[str]: and child.children[1].type == token.NEWLINE ): continue - else: - break + + break + elif first_child.type == syms.import_from: module_name = first_child.children[1] if not isinstance(module_name, Leaf) or module_name.value != "__future__": break + imports |= set(get_imports_from_children(first_child.children[3:])) else: break + return imports +@lru_cache() +def get_gitignore(root: Path) -> PathSpec: + """ Return a PathSpec matching gitignore content if present.""" + gitignore = root / ".gitignore" + lines: List[str] = [] + if gitignore.is_file(): + with gitignore.open() as gf: + lines = gf.readlines() + return PathSpec.from_lines("gitwildmatch", lines) + + def gen_python_files_in_dir( path: Path, root: Path, include: Pattern[str], exclude: Pattern[str], report: "Report", + gitignore: PathSpec, ) -> Iterator[Path]: """Generate all files under `path` whose paths are not excluded by the `exclude` regex, but are included by the `include` regex. @@ -3491,8 +3603,18 @@ def gen_python_files_in_dir( """ assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}" for child in path.iterdir(): + # First ignore files matching .gitignore + if gitignore.match_file(child.as_posix()): + report.path_ignored(child, "matches the .gitignore file content") + continue + + # Then ignore with `exclude` option. try: normalized_path = "/" + child.resolve().relative_to(root).as_posix() + except OSError as e: + report.path_ignored(child, f"cannot be read because {e}") + continue + except ValueError: if child.is_symlink(): report.path_ignored( @@ -3504,13 +3626,16 @@ def gen_python_files_in_dir( if child.is_dir(): normalized_path += "/" + exclude_match = exclude.search(normalized_path) if exclude_match and exclude_match.group(0): - report.path_ignored(child, f"matches the --exclude regular expression") + report.path_ignored(child, "matches the --exclude regular expression") continue if child.is_dir(): - yield from gen_python_files_in_dir(child, root, include, exclude, report) + yield from gen_python_files_in_dir( + child, root, include, exclude, report, gitignore + ) elif child.is_file(): include_match = include.search(normalized_path) @@ -3536,7 +3661,7 @@ def find_project_root(srcs: Iterable[str]) -> Path: # Append a fake file so `parents` below returns `common_base_dir`, too. common_base /= "fake-file" for directory in common_base.parents: - if (directory / ".git").is_dir(): + if (directory / ".git").exists(): return directory if (directory / ".hg").is_dir(): @@ -3553,6 +3678,7 @@ class Report: """Provides a reformatting counter. Can be rendered with `str(report)`.""" check: bool = False + diff: bool = False quiet: bool = False verbose: bool = False change_count: int = 0 @@ -3562,7 +3688,7 @@ class Report: def done(self, src: Path, changed: Changed) -> None: """Increment the counter for successful reformatting. Write out a message.""" if changed is Changed.YES: - reformatted = "would reformat" if self.check else "reformatted" + reformatted = "would reformat" if self.check or self.diff else "reformatted" if self.verbose or not self.quiet: out(f"{reformatted} {src}") self.change_count += 1 @@ -3608,7 +3734,7 @@ class Report: Use `click.unstyle` to remove colors. """ - if self.check: + if self.check or self.diff: reformatted = "would be reformatted" unchanged = "would be left unchanged" failed = "would fail to reformat" @@ -3656,14 +3782,15 @@ def _fixup_ast_constants( node: Union[ast.AST, ast3.AST, ast27.AST] ) -> Union[ast.AST, ast3.AST, ast27.AST]: """Map ast nodes deprecated in 3.8 to Constant.""" - # casts are required until this is released: - # https://github.com/python/typeshed/pull/3142 if isinstance(node, (ast.Str, ast3.Str, ast27.Str, ast.Bytes, ast3.Bytes)): - return cast(ast.AST, ast.Constant(value=node.s)) - elif isinstance(node, (ast.Num, ast3.Num, ast27.Num)): - return cast(ast.AST, ast.Constant(value=node.n)) - elif isinstance(node, (ast.NameConstant, ast3.NameConstant)): - return cast(ast.AST, ast.Constant(value=node.value)) + return ast.Constant(value=node.s) + + if isinstance(node, (ast.Num, ast3.Num, ast27.Num)): + return ast.Constant(value=node.n) + + if isinstance(node, (ast.NameConstant, ast3.NameConstant)): + return ast.Constant(value=node.value) + return node @@ -3677,7 +3804,7 @@ def assert_equivalent(src: str, dst: str) -> None: yield f"{' ' * depth}{node.__class__.__name__}(" - for field in sorted(node._fields): + for field in sorted(node._fields): # noqa: F402 # TypeIgnore has only one field 'lineno' which breaks this comparison type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore) if sys.version_info >= (3, 8): @@ -3703,6 +3830,7 @@ def assert_equivalent(src: str, dst: str) -> None: ): for item in item.elts: yield from _v(item, depth + 2) + elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)): yield from _v(item, depth + 2) @@ -3744,7 +3872,7 @@ def assert_equivalent(src: str, dst: str) -> None: ) from None -def assert_stable(src: str, dst: str, mode: FileMode) -> None: +def assert_stable(src: str, dst: str, mode: Mode) -> None: """Raise AssertionError if `dst` reformats differently the second time.""" newdst = format_str(dst, mode=mode) if dst != newdst: @@ -3760,6 +3888,7 @@ def assert_stable(src: str, dst: str, mode: FileMode) -> None: ) from None +@mypyc_attr(patchable=True) def dump_to_file(*output: str) -> str: """Dump `output` to a temporary file. Return path to the file.""" with tempfile.NamedTemporaryFile( @@ -3774,8 +3903,10 @@ def dump_to_file(*output: str) -> str: @contextmanager def nullcontext() -> Iterator[None]: - """Return context manager that does nothing. - Similar to `nullcontext` from python 3.7""" + """Return an empty context manager. + + To be used like `nullcontext` in Python 3.7. + """ yield @@ -3783,14 +3914,14 @@ def diff(a: str, b: str, a_name: str, b_name: str) -> str: """Return a unified diff string between strings `a` and `b`.""" import difflib - a_lines = [line + "\n" for line in a.split("\n")] - b_lines = [line + "\n" for line in b.split("\n")] + a_lines = [line + "\n" for line in a.splitlines()] + b_lines = [line + "\n" for line in b.splitlines()] return "".join( difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5) ) -def cancel(tasks: Iterable[asyncio.Task]) -> None: +def cancel(tasks: Iterable["asyncio.Task[Any]"]) -> None: """asyncio signal handler that cancels all `tasks` and reports to stderr.""" err("Aborted!") for task in tasks: @@ -4012,11 +4143,11 @@ def can_omit_invisible_parens(line: Line, line_length: int) -> bool: return False -def get_cache_file(mode: FileMode) -> Path: +def get_cache_file(mode: Mode) -> Path: return CACHE_DIR / f"cache.{mode.get_cache_key()}.pickle" -def read_cache(mode: FileMode) -> Cache: +def read_cache(mode: Mode) -> Cache: """Read the cache if it exists and is well formed. If it is not well formed, the call to write_cache later should resolve the issue. @@ -4028,7 +4159,7 @@ def read_cache(mode: FileMode) -> Cache: with cache_file.open("rb") as fobj: try: cache: Cache = pickle.load(fobj) - except pickle.UnpicklingError: + except (pickle.UnpicklingError, ValueError): return {} return cache @@ -4056,14 +4187,14 @@ def filter_cached(cache: Cache, sources: Iterable[Path]) -> Tuple[Set[Path], Set return todo, done -def write_cache(cache: Cache, sources: Iterable[Path], mode: FileMode) -> None: +def write_cache(cache: Cache, sources: Iterable[Path], mode: Mode) -> None: """Update the cache file.""" cache_file = get_cache_file(mode) try: CACHE_DIR.mkdir(parents=True, exist_ok=True) new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}} with tempfile.NamedTemporaryFile(dir=str(cache_file.parent), delete=False) as f: - pickle.dump(new_cache, f, protocol=pickle.HIGHEST_PROTOCOL) + pickle.dump(new_cache, f, protocol=4) os.replace(f.name, cache_file) except OSError: pass