X-Git-Url: https://git.madduck.net/etc/vim.git/blobdiff_plain/5938106ac4932cb9015122352c87e2f1daba55e8..6bedb5c58a7d8c25aa9509f8217bc24e9797e90d:/black.py?ds=sidebyside diff --git a/black.py b/black.py index 957e51a..a48f647 100644 --- a/black.py +++ b/black.py @@ -12,7 +12,7 @@ from multiprocessing import Manager, freeze_support import os from pathlib import Path import pickle -import re +import regex as re import signal import sys import tempfile @@ -43,6 +43,7 @@ from attr import dataclass, evolve, Factory import click import toml from typed_ast import ast3, ast27 +from pathspec import PathSpec # lib2to3 fork from blib2to3.pytree import Node, Leaf, type_repr @@ -51,12 +52,10 @@ from blib2to3.pgen2 import driver, token from blib2to3.pgen2.grammar import Grammar from blib2to3.pgen2.parse import ParseError -from _version import version as __version__ +from _black_version import version as __version__ DEFAULT_LINE_LENGTH = 88 -DEFAULT_EXCLUDES = ( - r"/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|_build|buck-out|build|dist)/" -) +DEFAULT_EXCLUDES = r"/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist)/" # noqa: B950 DEFAULT_INCLUDES = r"\.pyi?$" CACHE_DIR = Path(user_cache_dir("black", version=__version__)) @@ -438,7 +437,9 @@ def main( p = Path(s) if p.is_dir(): sources.update( - gen_python_files_in_dir(p, root, include_regex, exclude_regex, report) + gen_python_files_in_dir( + p, root, include_regex, exclude_regex, report, get_gitignore(root) + ) ) elif p.is_file() or s == "-": # if a file was explicitly given, we don't care about its extension @@ -786,7 +787,8 @@ def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]: # Python 2.7 pygram.python_grammar, ] - elif all(version.is_python2() for version in target_versions): + + if all(version.is_python2() for version in target_versions): # Python 2-only code, so try Python 2 grammars. return [ # Python 2.7 with future print_function import @@ -794,21 +796,21 @@ def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]: # Python 2.7 pygram.python_grammar, ] - else: - # Python 3-compatible code, so only try Python 3 grammar. - grammars = [] - # If we have to parse both, try to parse async as a keyword first - if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS): - # Python 3.7+ - grammars.append( - pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords # noqa: B950 - ) - if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS): - # Python 3.0-3.6 - grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement) - # At least one of the above branches must have been taken, because every Python - # version has exactly one of the two 'ASYNC_*' flags - return grammars + + # Python 3-compatible code, so only try Python 3 grammar. + grammars = [] + # If we have to parse both, try to parse async as a keyword first + if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS): + # Python 3.7+ + grammars.append( + pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords + ) + if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS): + # Python 3.0-3.6 + grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement) + # At least one of the above branches must have been taken, because every Python + # version has exactly one of the two 'ASYNC_*' flags + return grammars def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node: @@ -1240,6 +1242,69 @@ class Line: Leaf(token.DOT, ".") for _ in range(3) ] + @property + def is_collection_with_optional_trailing_comma(self) -> bool: + """Is this line a collection literal with a trailing comma that's optional? + + Note that the trailing comma in a 1-tuple is not optional. + """ + if not self.leaves or len(self.leaves) < 4: + return False + + # Look for and address a trailing colon. + if self.leaves[-1].type == token.COLON: + closer = self.leaves[-2] + close_index = -2 + else: + closer = self.leaves[-1] + close_index = -1 + if closer.type not in CLOSING_BRACKETS or self.inside_brackets: + return False + + if closer.type == token.RPAR: + # Tuples require an extra check, because if there's only + # one element in the tuple removing the comma unmakes the + # tuple. + # + # We also check for parens before looking for the trailing + # comma because in some cases (eg assigning a dict + # literal) the literal gets wrapped in temporary parens + # during parsing. This case is covered by the + # collections.py test data. + opener = closer.opening_bracket + for _open_index, leaf in enumerate(self.leaves): + if leaf is opener: + break + + else: + # Couldn't find the matching opening paren, play it safe. + return False + + commas = 0 + comma_depth = self.leaves[close_index - 1].bracket_depth + for leaf in self.leaves[_open_index + 1 : close_index]: + if leaf.bracket_depth == comma_depth and leaf.type == token.COMMA: + commas += 1 + if commas > 1: + # We haven't looked yet for the trailing comma because + # we might also have caught noop parens. + return self.leaves[close_index - 1].type == token.COMMA + + elif commas == 1: + return False # it's either a one-tuple or didn't have a trailing comma + + if self.leaves[close_index - 1].type in CLOSING_BRACKETS: + close_index -= 1 + closer = self.leaves[close_index] + if closer.type == token.RPAR: + # TODO: this is a gut feeling. Will we ever see this? + return False + + if self.leaves[close_index - 1].type != token.COMMA: + return False + + return True + @property def is_def(self) -> bool: """Is this a function definition? (Also returns True for async defs.)""" @@ -1287,9 +1352,9 @@ class Line: def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool: """If so, needs to be split before emitting.""" for leaf in self.leaves: - if leaf.type == STANDALONE_COMMENT: - if leaf.bracket_depth <= depth_limit: - return True + if leaf.type == STANDALONE_COMMENT and leaf.bracket_depth <= depth_limit: + return True + return False def contains_uncollapsable_type_comments(self) -> bool: @@ -1321,7 +1386,38 @@ class Line: if leaf_id not in ignored_ids or comment_seen: return True - comment_seen = True + comment_seen = True + + return False + + def contains_unsplittable_type_ignore(self) -> bool: + if not self.leaves: + return False + + # If a 'type: ignore' is attached to the end of a line, we + # can't split the line, because we can't know which of the + # subexpressions the ignore was meant to apply to. + # + # We only want this to apply to actual physical lines from the + # original source, though: we don't want the presence of a + # 'type: ignore' at the end of a multiline expression to + # justify pushing it all onto one line. Thus we + # (unfortunately) need to check the actual source lines and + # only report an unsplittable 'type: ignore' if this line was + # one line in the original code. + + # Grab the first and last line numbers, skipping generated leaves + first_line = next((l.lineno for l in self.leaves if l.lineno != 0), 0) + last_line = next((l.lineno for l in reversed(self.leaves) if l.lineno != 0), 0) + + if first_line == last_line: + # We look at the last two leaves since a comma or an + # invisible paren could have been added at the end of the + # line. + for node in self.leaves[-2:]: + for comment in self.comments.get(id(node), []): + if is_type_comment(comment, " ignore"): + return True return False @@ -1334,60 +1430,40 @@ class Line: def maybe_remove_trailing_comma(self, closing: Leaf) -> bool: """Remove trailing comma if there is one and it's safe.""" + if not (self.leaves and self.leaves[-1].type == token.COMMA): + return False + + # We remove trailing commas only in the case of importing a + # single name from a module. if not ( self.leaves + and self.is_import + and len(self.leaves) > 4 and self.leaves[-1].type == token.COMMA and closing.type in CLOSING_BRACKETS + and self.leaves[-4].type == token.NAME + and ( + # regular `from foo import bar,` + self.leaves[-4].value == "import" + # `from foo import (bar as baz,) + or ( + len(self.leaves) > 6 + and self.leaves[-6].value == "import" + and self.leaves[-3].value == "as" + ) + # `from foo import bar as baz,` + or ( + len(self.leaves) > 5 + and self.leaves[-5].value == "import" + and self.leaves[-3].value == "as" + ) + ) + and closing.type == token.RPAR ): return False - if closing.type == token.RBRACE: - self.remove_trailing_comma() - return True - - if closing.type == token.RSQB: - comma = self.leaves[-1] - if comma.parent and comma.parent.type == syms.listmaker: - self.remove_trailing_comma() - return True - - # For parens let's check if it's safe to remove the comma. - # Imports are always safe. - if self.is_import: - self.remove_trailing_comma() - return True - - # Otherwise, if the trailing one is the only one, we might mistakenly - # change a tuple into a different type by removing the comma. - depth = closing.bracket_depth + 1 - commas = 0 - opening = closing.opening_bracket - for _opening_index, leaf in enumerate(self.leaves): - if leaf is opening: - break - - else: - return False - - for leaf in self.leaves[_opening_index + 1 :]: - if leaf is closing: - break - - bracket_depth = leaf.bracket_depth - if bracket_depth == depth and leaf.type == token.COMMA: - commas += 1 - if leaf.parent and leaf.parent.type in { - syms.arglist, - syms.typedargslist, - }: - commas += 1 - break - - if commas > 1: - self.remove_trailing_comma() - return True - - return False + self.remove_trailing_comma() + return True def append_comment(self, comment: Leaf) -> bool: """Add an inline or standalone comment to the line.""" @@ -1421,6 +1497,7 @@ class Line: comment.type = STANDALONE_COMMENT comment.prefix = "" return False + last_leaf = self.leaves[-2] self.comments.setdefault(id(last_leaf), []).append(comment) return True @@ -1651,39 +1728,6 @@ class LineGenerator(Visitor[Line]): self.current_line.append(node) yield from super().visit_default(node) - def visit_atom(self, node: Node) -> Iterator[Line]: - # Always make parentheses invisible around a single node, because it should - # not be needed (except in the case of yield, where removing the parentheses - # produces a SyntaxError). - if ( - len(node.children) == 3 - and isinstance(node.children[0], Leaf) - and node.children[0].type == token.LPAR - and isinstance(node.children[2], Leaf) - and node.children[2].type == token.RPAR - and isinstance(node.children[1], Leaf) - and not ( - node.children[1].type == token.NAME - and node.children[1].value == "yield" - ) - ): - node.children[0].value = "" - node.children[2].value = "" - yield from super().visit_default(node) - - def visit_factor(self, node: Node) -> Iterator[Line]: - """Force parentheses between a unary op and a binary power: - - -2 ** 8 -> -(2 ** 8) - """ - child = node.children[1] - if child.type == syms.power and len(child.children) == 3: - lpar = Leaf(token.LPAR, "(") - rpar = Leaf(token.RPAR, ")") - index = child.remove() or 0 - node.insert_child(index, Node(syms.atom, [lpar, child, rpar])) - yield from self.visit_default(node) - def visit_INDENT(self, node: Node) -> Iterator[Line]: """Increase indentation level, maybe yield a line.""" # In blib2to3 INDENT never holds comments. @@ -1783,6 +1827,23 @@ class LineGenerator(Visitor[Line]): yield from self.line() yield from self.visit_default(leaf) + def visit_factor(self, node: Node) -> Iterator[Line]: + """Force parentheses between a unary op and a binary power: + + -2 ** 8 -> -(2 ** 8) + """ + _operator, operand = node.children + if ( + operand.type == syms.power + and len(operand.children) == 3 + and operand.children[1].type == token.DOUBLESTAR + ): + lpar = Leaf(token.LPAR, "(") + rpar = Leaf(token.RPAR, ")") + index = operand.remove() or 0 + node.insert_child(index, Node(syms.atom, [lpar, operand, rpar])) + yield from self.visit_default(node) + def __attrs_post_init__(self) -> None: """You are in a twisty little maze of passages.""" v = self.visit_stmt @@ -2332,7 +2393,11 @@ def split_line( if ( not line.contains_uncollapsable_type_comments() and not line.should_explode - and is_line_short_enough(line, line_length=line_length, line_str=line_str) + and not line.is_collection_with_optional_trailing_comma + and ( + is_line_short_enough(line, line_length=line_length, line_str=line_str) + or line.contains_unsplittable_type_ignore() + ) ): yield line return @@ -2352,7 +2417,9 @@ def split_line( # All splits failed, best effort split with no omits. # This mostly happens to multiline strings that are by definition # reported as not fitting a single line. - yield from right_hand_split(line, line_length, features=features) + # line_length=1 here was historically a bug that somehow became a feature. + # See #762 and #781 for the full story. + yield from right_hand_split(line, line_length=1, features=features) if line.inside_brackets: split_funcs = [delimiter_split, standalone_comment_split, rhs] @@ -2551,20 +2618,22 @@ def bracket_split_build_line( # Since body is a new indent level, remove spurious leading whitespace. normalize_prefix(leaves[0], inside_brackets=True) # Ensure a trailing comma for imports and standalone function arguments, but - # be careful not to add one after any comments. - no_commas = original.is_def and not any( - l.type == token.COMMA for l in leaves + # be careful not to add one after any comments or within type annotations. + no_commas = ( + original.is_def + and opening_bracket.value == "(" + and not any(l.type == token.COMMA for l in leaves) ) if original.is_import or no_commas: for i in range(len(leaves) - 1, -1, -1): if leaves[i].type == STANDALONE_COMMENT: continue - elif leaves[i].type == token.COMMA: - break - else: + + if leaves[i].type != token.COMMA: leaves.insert(i + 1, Leaf(token.COMMA, ",")) - break + break + # Populate the line for leaf in leaves: result.append(leaf, preformatted=True) @@ -2705,12 +2774,12 @@ def is_import(leaf: Leaf) -> bool: ) -def is_type_comment(leaf: Leaf) -> bool: +def is_type_comment(leaf: Leaf, suffix: str = "") -> bool: """Return True if the given leaf is a special comment. Only returns true for type comments for now.""" t = leaf.type v = leaf.value - return t in {token.COMMENT, t == STANDALONE_COMMENT} and v.startswith("# type:") + return t in {token.COMMENT, STANDALONE_COMMENT} and v.startswith("# type:" + suffix) def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None: @@ -2808,6 +2877,7 @@ def normalize_string_quotes(leaf: Leaf) -> None: if "\\" in str(m): # Do not introduce backslashes in interpolated expressions return + if new_quote == '"""' and new_body[-1:] == '"': # edge case: new_body = new_body[:-1] + '\\"' @@ -2880,7 +2950,6 @@ def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None: if pc.value in FMT_OFF: # This `node` has a prefix with `# fmt: off`, don't mess with parens. return - check_lpar = False for index, child in enumerate(list(node.children)): # Add parentheses around long tuple unpacking in assignments. @@ -2894,26 +2963,12 @@ def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None: if check_lpar: if is_walrus_assignment(child): continue - if child.type == syms.atom: - # Determines if the underlying atom should be surrounded with - # invisible params - also makes parens invisible recursively - # within the atom and removes repeated invisible parens within - # the atom - should_surround_with_parens = maybe_make_parens_invisible_in_atom( - child, parent=node - ) - if should_surround_with_parens: - lpar = Leaf(token.LPAR, "") - rpar = Leaf(token.RPAR, "") - index = child.remove() or 0 - node.insert_child(index, Node(syms.atom, [lpar, child, rpar])) + if child.type == syms.atom: + if maybe_make_parens_invisible_in_atom(child, parent=node): + wrap_in_parentheses(node, child, visible=False) elif is_one_tuple(child): - # wrap child in visible parentheses - lpar = Leaf(token.LPAR, "(") - rpar = Leaf(token.RPAR, ")") - child.remove() - node.insert_child(index, Node(syms.atom, [lpar, child, rpar])) + wrap_in_parentheses(node, child, visible=True) elif node.type == syms.import_from: # "import from" nodes store parentheses directly as part of # the statement @@ -2928,15 +2983,7 @@ def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None: break elif not (isinstance(child, Leaf) and is_multiline_string(child)): - # wrap child in invisible parentheses - lpar = Leaf(token.LPAR, "") - rpar = Leaf(token.RPAR, "") - index = child.remove() or 0 - prefix = child.prefix - child.prefix = "" - new_child = Node(syms.atom, [lpar, child, rpar]) - new_child.prefix = prefix - node.insert_child(index, new_child) + wrap_in_parentheses(node, child, visible=False) check_lpar = isinstance(child, Leaf) and child.value in parens_after @@ -3009,9 +3056,14 @@ def generate_ignored_nodes(leaf: Leaf) -> Iterator[LN]: """ container: Optional[LN] = container_of(leaf) while container is not None and container.type != token.ENDMARKER: + is_fmt_on = False for comment in list_comments(container.prefix, is_endmarker=False): if comment.value in FMT_ON: - return + is_fmt_on = True + elif comment.value in FMT_OFF: + is_fmt_on = False + if is_fmt_on: + return yield container @@ -3088,6 +3140,7 @@ def unwrap_singleton_parenthesis(node: LN) -> Optional[LN]: Parenthesis can be optional. Returns None otherwise""" if len(node.children) != 3: return None + lpar, wrapped, rpar = node.children if not (lpar.type == token.LPAR and rpar.type == token.RPAR): return None @@ -3095,6 +3148,24 @@ def unwrap_singleton_parenthesis(node: LN) -> Optional[LN]: return wrapped +def wrap_in_parentheses(parent: Node, child: LN, *, visible: bool = True) -> None: + """Wrap `child` in parentheses. + + This replaces `child` with an atom holding the parentheses and the old + child. That requires moving the prefix. + + If `visible` is False, the leaves will be valueless (and thus invisible). + """ + lpar = Leaf(token.LPAR, "(" if visible else "") + rpar = Leaf(token.RPAR, ")" if visible else "") + prefix = child.prefix + child.prefix = "" + index = child.remove() or 0 + new_child = Node(syms.atom, [lpar, child, rpar]) + new_child.prefix = prefix + parent.insert_child(index, new_child) + + def is_one_tuple(node: LN) -> bool: """Return True if `node` holds a tuple with one element, with or without parens.""" if node.type == syms.atom: @@ -3372,19 +3443,23 @@ def get_future_imports(node: Node) -> Set[str]: if isinstance(child, Leaf): if child.type == token.NAME: yield child.value + elif child.type == syms.import_as_name: orig_name = child.children[0] assert isinstance(orig_name, Leaf), "Invalid syntax parsing imports" assert orig_name.type == token.NAME, "Invalid syntax parsing imports" yield orig_name.value + elif child.type == syms.import_as_names: yield from get_imports_from_children(child.children) + else: raise AssertionError("Invalid syntax parsing imports") for child in node.children: if child.type != syms.simple_stmt: break + first_child = child.children[0] if isinstance(first_child, Leaf): # Continue looking if we see a docstring; otherwise stop. @@ -3394,24 +3469,39 @@ def get_future_imports(node: Node) -> Set[str]: and child.children[1].type == token.NEWLINE ): continue - else: - break + + break + elif first_child.type == syms.import_from: module_name = first_child.children[1] if not isinstance(module_name, Leaf) or module_name.value != "__future__": break + imports |= set(get_imports_from_children(first_child.children[3:])) else: break + return imports +@lru_cache() +def get_gitignore(root: Path) -> PathSpec: + """ Return a PathSpec matching gitignore content if present.""" + gitignore = root / ".gitignore" + lines: List[str] = [] + if gitignore.is_file(): + with gitignore.open() as gf: + lines = gf.readlines() + return PathSpec.from_lines("gitwildmatch", lines) + + def gen_python_files_in_dir( path: Path, root: Path, include: Pattern[str], exclude: Pattern[str], report: "Report", + gitignore: PathSpec, ) -> Iterator[Path]: """Generate all files under `path` whose paths are not excluded by the `exclude` regex, but are included by the `include` regex. @@ -3422,8 +3512,18 @@ def gen_python_files_in_dir( """ assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}" for child in path.iterdir(): + # First ignore files matching .gitignore + if gitignore.match_file(child.as_posix()): + report.path_ignored(child, f"matches the .gitignore file content") + continue + + # Then ignore with `exclude` option. try: normalized_path = "/" + child.resolve().relative_to(root).as_posix() + except OSError as e: + report.path_ignored(child, f"cannot be read because {e}") + continue + except ValueError: if child.is_symlink(): report.path_ignored( @@ -3435,13 +3535,16 @@ def gen_python_files_in_dir( if child.is_dir(): normalized_path += "/" + exclude_match = exclude.search(normalized_path) if exclude_match and exclude_match.group(0): report.path_ignored(child, f"matches the --exclude regular expression") continue if child.is_dir(): - yield from gen_python_files_in_dir(child, root, include, exclude, report) + yield from gen_python_files_in_dir( + child, root, include, exclude, report, gitignore + ) elif child.is_file(): include_match = include.search(normalized_path) @@ -3587,14 +3690,15 @@ def _fixup_ast_constants( node: Union[ast.AST, ast3.AST, ast27.AST] ) -> Union[ast.AST, ast3.AST, ast27.AST]: """Map ast nodes deprecated in 3.8 to Constant.""" - # casts are required until this is released: - # https://github.com/python/typeshed/pull/3142 if isinstance(node, (ast.Str, ast3.Str, ast27.Str, ast.Bytes, ast3.Bytes)): - return cast(ast.AST, ast.Constant(value=node.s)) - elif isinstance(node, (ast.Num, ast3.Num, ast27.Num)): - return cast(ast.AST, ast.Constant(value=node.n)) - elif isinstance(node, (ast.NameConstant, ast3.NameConstant)): - return cast(ast.AST, ast.Constant(value=node.value)) + return ast.Constant(value=node.s) + + if isinstance(node, (ast.Num, ast3.Num, ast27.Num)): + return ast.Constant(value=node.n) + + if isinstance(node, (ast.NameConstant, ast3.NameConstant)): + return ast.Constant(value=node.value) + return node @@ -3634,6 +3738,7 @@ def assert_equivalent(src: str, dst: str) -> None: ): for item in item.elts: yield from _v(item, depth + 2) + elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)): yield from _v(item, depth + 2) @@ -3705,8 +3810,10 @@ def dump_to_file(*output: str) -> str: @contextmanager def nullcontext() -> Iterator[None]: - """Return context manager that does nothing. - Similar to `nullcontext` from python 3.7""" + """Return an empty context manager. + + To be used like `nullcontext` in Python 3.7. + """ yield @@ -3770,7 +3877,8 @@ def re_compile_maybe_verbose(regex: str) -> Pattern[str]: """ if "\n" in regex: regex = "(?x)" + regex - return re.compile(regex) + compiled: Pattern[str] = re.compile(regex) + return compiled def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]: @@ -3958,7 +4066,7 @@ def read_cache(mode: FileMode) -> Cache: with cache_file.open("rb") as fobj: try: cache: Cache = pickle.load(fobj) - except pickle.UnpicklingError: + except (pickle.UnpicklingError, ValueError): return {} return cache @@ -3993,7 +4101,7 @@ def write_cache(cache: Cache, sources: Iterable[Path], mode: FileMode) -> None: CACHE_DIR.mkdir(parents=True, exist_ok=True) new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}} with tempfile.NamedTemporaryFile(dir=str(cache_file.parent), delete=False) as f: - pickle.dump(new_cache, f, protocol=pickle.HIGHEST_PROTOCOL) + pickle.dump(new_cache, f, protocol=4) os.replace(f.name, cache_file) except OSError: pass