X-Git-Url: https://git.madduck.net/etc/vim.git/blobdiff_plain/b73ec93fa71ae8980d0743555a9f49f46528e6e1..f2203a77c53a7790ad2bf7691f59de8599b16025:/black.py diff --git a/black.py b/black.py index 65545e9..a48f647 100644 --- a/black.py +++ b/black.py @@ -43,6 +43,7 @@ from attr import dataclass, evolve, Factory import click import toml from typed_ast import ast3, ast27 +from pathspec import PathSpec # lib2to3 fork from blib2to3.pytree import Node, Leaf, type_repr @@ -51,7 +52,7 @@ from blib2to3.pgen2 import driver, token from blib2to3.pgen2.grammar import Grammar from blib2to3.pgen2.parse import ParseError -from _version import version as __version__ +from _black_version import version as __version__ DEFAULT_LINE_LENGTH = 88 DEFAULT_EXCLUDES = r"/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist)/" # noqa: B950 @@ -436,7 +437,9 @@ def main( p = Path(s) if p.is_dir(): sources.update( - gen_python_files_in_dir(p, root, include_regex, exclude_regex, report) + gen_python_files_in_dir( + p, root, include_regex, exclude_regex, report, get_gitignore(root) + ) ) elif p.is_file() or s == "-": # if a file was explicitly given, we don't care about its extension @@ -784,7 +787,8 @@ def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]: # Python 2.7 pygram.python_grammar, ] - elif all(version.is_python2() for version in target_versions): + + if all(version.is_python2() for version in target_versions): # Python 2-only code, so try Python 2 grammars. return [ # Python 2.7 with future print_function import @@ -792,21 +796,21 @@ def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]: # Python 2.7 pygram.python_grammar, ] - else: - # Python 3-compatible code, so only try Python 3 grammar. - grammars = [] - # If we have to parse both, try to parse async as a keyword first - if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS): - # Python 3.7+ - grammars.append( - pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords # noqa: B950 - ) - if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS): - # Python 3.0-3.6 - grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement) - # At least one of the above branches must have been taken, because every Python - # version has exactly one of the two 'ASYNC_*' flags - return grammars + + # Python 3-compatible code, so only try Python 3 grammar. + grammars = [] + # If we have to parse both, try to parse async as a keyword first + if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS): + # Python 3.7+ + grammars.append( + pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords + ) + if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS): + # Python 3.0-3.6 + grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement) + # At least one of the above branches must have been taken, because every Python + # version has exactly one of the two 'ASYNC_*' flags + return grammars def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node: @@ -1246,6 +1250,7 @@ class Line: """ if not self.leaves or len(self.leaves) < 4: return False + # Look for and address a trailing colon. if self.leaves[-1].type == token.COLON: closer = self.leaves[-2] @@ -1255,6 +1260,7 @@ class Line: close_index = -1 if closer.type not in CLOSING_BRACKETS or self.inside_brackets: return False + if closer.type == token.RPAR: # Tuples require an extra check, because if there's only # one element in the tuple removing the comma unmakes the @@ -1269,9 +1275,11 @@ class Line: for _open_index, leaf in enumerate(self.leaves): if leaf is opener: break + else: # Couldn't find the matching opening paren, play it safe. return False + commas = 0 comma_depth = self.leaves[close_index - 1].bracket_depth for leaf in self.leaves[_open_index + 1 : close_index]: @@ -1281,16 +1289,20 @@ class Line: # We haven't looked yet for the trailing comma because # we might also have caught noop parens. return self.leaves[close_index - 1].type == token.COMMA + elif commas == 1: return False # it's either a one-tuple or didn't have a trailing comma + if self.leaves[close_index - 1].type in CLOSING_BRACKETS: close_index -= 1 closer = self.leaves[close_index] if closer.type == token.RPAR: # TODO: this is a gut feeling. Will we ever see this? return False + if self.leaves[close_index - 1].type != token.COMMA: return False + return True @property @@ -1340,9 +1352,9 @@ class Line: def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool: """If so, needs to be split before emitting.""" for leaf in self.leaves: - if leaf.type == STANDALONE_COMMENT: - if leaf.bracket_depth <= depth_limit: - return True + if leaf.type == STANDALONE_COMMENT and leaf.bracket_depth <= depth_limit: + return True + return False def contains_uncollapsable_type_comments(self) -> bool: @@ -1420,6 +1432,7 @@ class Line: """Remove trailing comma if there is one and it's safe.""" if not (self.leaves and self.leaves[-1].type == token.COMMA): return False + # We remove trailing commas only in the case of importing a # single name from a module. if not ( @@ -1484,6 +1497,7 @@ class Line: comment.type = STANDALONE_COMMENT comment.prefix = "" return False + last_leaf = self.leaves[-2] self.comments.setdefault(id(last_leaf), []).append(comment) return True @@ -1714,39 +1728,6 @@ class LineGenerator(Visitor[Line]): self.current_line.append(node) yield from super().visit_default(node) - def visit_atom(self, node: Node) -> Iterator[Line]: - # Always make parentheses invisible around a single node, because it should - # not be needed (except in the case of yield, where removing the parentheses - # produces a SyntaxError). - if ( - len(node.children) == 3 - and isinstance(node.children[0], Leaf) - and node.children[0].type == token.LPAR - and isinstance(node.children[2], Leaf) - and node.children[2].type == token.RPAR - and isinstance(node.children[1], Leaf) - and not ( - node.children[1].type == token.NAME - and node.children[1].value == "yield" - ) - ): - node.children[0].value = "" - node.children[2].value = "" - yield from super().visit_default(node) - - def visit_factor(self, node: Node) -> Iterator[Line]: - """Force parentheses between a unary op and a binary power: - - -2 ** 8 -> -(2 ** 8) - """ - child = node.children[1] - if child.type == syms.power and len(child.children) == 3: - lpar = Leaf(token.LPAR, "(") - rpar = Leaf(token.RPAR, ")") - index = child.remove() or 0 - node.insert_child(index, Node(syms.atom, [lpar, child, rpar])) - yield from self.visit_default(node) - def visit_INDENT(self, node: Node) -> Iterator[Line]: """Increase indentation level, maybe yield a line.""" # In blib2to3 INDENT never holds comments. @@ -1846,6 +1827,23 @@ class LineGenerator(Visitor[Line]): yield from self.line() yield from self.visit_default(leaf) + def visit_factor(self, node: Node) -> Iterator[Line]: + """Force parentheses between a unary op and a binary power: + + -2 ** 8 -> -(2 ** 8) + """ + _operator, operand = node.children + if ( + operand.type == syms.power + and len(operand.children) == 3 + and operand.children[1].type == token.DOUBLESTAR + ): + lpar = Leaf(token.LPAR, "(") + rpar = Leaf(token.RPAR, ")") + index = operand.remove() or 0 + node.insert_child(index, Node(syms.atom, [lpar, operand, rpar])) + yield from self.visit_default(node) + def __attrs_post_init__(self) -> None: """You are in a twisty little maze of passages.""" v = self.visit_stmt @@ -2419,7 +2417,9 @@ def split_line( # All splits failed, best effort split with no omits. # This mostly happens to multiline strings that are by definition # reported as not fitting a single line. - yield from right_hand_split(line, line_length, features=features) + # line_length=1 here was historically a bug that somehow became a feature. + # See #762 and #781 for the full story. + yield from right_hand_split(line, line_length=1, features=features) if line.inside_brackets: split_funcs = [delimiter_split, standalone_comment_split, rhs] @@ -2618,20 +2618,22 @@ def bracket_split_build_line( # Since body is a new indent level, remove spurious leading whitespace. normalize_prefix(leaves[0], inside_brackets=True) # Ensure a trailing comma for imports and standalone function arguments, but - # be careful not to add one after any comments. - no_commas = original.is_def and not any( - l.type == token.COMMA for l in leaves + # be careful not to add one after any comments or within type annotations. + no_commas = ( + original.is_def + and opening_bracket.value == "(" + and not any(l.type == token.COMMA for l in leaves) ) if original.is_import or no_commas: for i in range(len(leaves) - 1, -1, -1): if leaves[i].type == STANDALONE_COMMENT: continue - elif leaves[i].type == token.COMMA: - break - else: + + if leaves[i].type != token.COMMA: leaves.insert(i + 1, Leaf(token.COMMA, ",")) - break + break + # Populate the line for leaf in leaves: result.append(leaf, preformatted=True) @@ -2777,9 +2779,7 @@ def is_type_comment(leaf: Leaf, suffix: str = "") -> bool: Only returns true for type comments for now.""" t = leaf.type v = leaf.value - return t in {token.COMMENT, t == STANDALONE_COMMENT} and v.startswith( - "# type:" + suffix - ) + return t in {token.COMMENT, STANDALONE_COMMENT} and v.startswith("# type:" + suffix) def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None: @@ -2877,6 +2877,7 @@ def normalize_string_quotes(leaf: Leaf) -> None: if "\\" in str(m): # Do not introduce backslashes in interpolated expressions return + if new_quote == '"""' and new_body[-1:] == '"': # edge case: new_body = new_body[:-1] + '\\"' @@ -2949,7 +2950,6 @@ def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None: if pc.value in FMT_OFF: # This `node` has a prefix with `# fmt: off`, don't mess with parens. return - check_lpar = False for index, child in enumerate(list(node.children)): # Add parentheses around long tuple unpacking in assignments. @@ -2963,26 +2963,12 @@ def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None: if check_lpar: if is_walrus_assignment(child): continue - if child.type == syms.atom: - # Determines if the underlying atom should be surrounded with - # invisible params - also makes parens invisible recursively - # within the atom and removes repeated invisible parens within - # the atom - should_surround_with_parens = maybe_make_parens_invisible_in_atom( - child, parent=node - ) - if should_surround_with_parens: - lpar = Leaf(token.LPAR, "") - rpar = Leaf(token.RPAR, "") - index = child.remove() or 0 - node.insert_child(index, Node(syms.atom, [lpar, child, rpar])) + if child.type == syms.atom: + if maybe_make_parens_invisible_in_atom(child, parent=node): + wrap_in_parentheses(node, child, visible=False) elif is_one_tuple(child): - # wrap child in visible parentheses - lpar = Leaf(token.LPAR, "(") - rpar = Leaf(token.RPAR, ")") - child.remove() - node.insert_child(index, Node(syms.atom, [lpar, child, rpar])) + wrap_in_parentheses(node, child, visible=True) elif node.type == syms.import_from: # "import from" nodes store parentheses directly as part of # the statement @@ -2997,15 +2983,7 @@ def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None: break elif not (isinstance(child, Leaf) and is_multiline_string(child)): - # wrap child in invisible parentheses - lpar = Leaf(token.LPAR, "") - rpar = Leaf(token.RPAR, "") - index = child.remove() or 0 - prefix = child.prefix - child.prefix = "" - new_child = Node(syms.atom, [lpar, child, rpar]) - new_child.prefix = prefix - node.insert_child(index, new_child) + wrap_in_parentheses(node, child, visible=False) check_lpar = isinstance(child, Leaf) and child.value in parens_after @@ -3078,9 +3056,14 @@ def generate_ignored_nodes(leaf: Leaf) -> Iterator[LN]: """ container: Optional[LN] = container_of(leaf) while container is not None and container.type != token.ENDMARKER: + is_fmt_on = False for comment in list_comments(container.prefix, is_endmarker=False): if comment.value in FMT_ON: - return + is_fmt_on = True + elif comment.value in FMT_OFF: + is_fmt_on = False + if is_fmt_on: + return yield container @@ -3157,6 +3140,7 @@ def unwrap_singleton_parenthesis(node: LN) -> Optional[LN]: Parenthesis can be optional. Returns None otherwise""" if len(node.children) != 3: return None + lpar, wrapped, rpar = node.children if not (lpar.type == token.LPAR and rpar.type == token.RPAR): return None @@ -3164,6 +3148,24 @@ def unwrap_singleton_parenthesis(node: LN) -> Optional[LN]: return wrapped +def wrap_in_parentheses(parent: Node, child: LN, *, visible: bool = True) -> None: + """Wrap `child` in parentheses. + + This replaces `child` with an atom holding the parentheses and the old + child. That requires moving the prefix. + + If `visible` is False, the leaves will be valueless (and thus invisible). + """ + lpar = Leaf(token.LPAR, "(" if visible else "") + rpar = Leaf(token.RPAR, ")" if visible else "") + prefix = child.prefix + child.prefix = "" + index = child.remove() or 0 + new_child = Node(syms.atom, [lpar, child, rpar]) + new_child.prefix = prefix + parent.insert_child(index, new_child) + + def is_one_tuple(node: LN) -> bool: """Return True if `node` holds a tuple with one element, with or without parens.""" if node.type == syms.atom: @@ -3441,19 +3443,23 @@ def get_future_imports(node: Node) -> Set[str]: if isinstance(child, Leaf): if child.type == token.NAME: yield child.value + elif child.type == syms.import_as_name: orig_name = child.children[0] assert isinstance(orig_name, Leaf), "Invalid syntax parsing imports" assert orig_name.type == token.NAME, "Invalid syntax parsing imports" yield orig_name.value + elif child.type == syms.import_as_names: yield from get_imports_from_children(child.children) + else: raise AssertionError("Invalid syntax parsing imports") for child in node.children: if child.type != syms.simple_stmt: break + first_child = child.children[0] if isinstance(first_child, Leaf): # Continue looking if we see a docstring; otherwise stop. @@ -3463,24 +3469,39 @@ def get_future_imports(node: Node) -> Set[str]: and child.children[1].type == token.NEWLINE ): continue - else: - break + + break + elif first_child.type == syms.import_from: module_name = first_child.children[1] if not isinstance(module_name, Leaf) or module_name.value != "__future__": break + imports |= set(get_imports_from_children(first_child.children[3:])) else: break + return imports +@lru_cache() +def get_gitignore(root: Path) -> PathSpec: + """ Return a PathSpec matching gitignore content if present.""" + gitignore = root / ".gitignore" + lines: List[str] = [] + if gitignore.is_file(): + with gitignore.open() as gf: + lines = gf.readlines() + return PathSpec.from_lines("gitwildmatch", lines) + + def gen_python_files_in_dir( path: Path, root: Path, include: Pattern[str], exclude: Pattern[str], report: "Report", + gitignore: PathSpec, ) -> Iterator[Path]: """Generate all files under `path` whose paths are not excluded by the `exclude` regex, but are included by the `include` regex. @@ -3491,8 +3512,18 @@ def gen_python_files_in_dir( """ assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}" for child in path.iterdir(): + # First ignore files matching .gitignore + if gitignore.match_file(child.as_posix()): + report.path_ignored(child, f"matches the .gitignore file content") + continue + + # Then ignore with `exclude` option. try: normalized_path = "/" + child.resolve().relative_to(root).as_posix() + except OSError as e: + report.path_ignored(child, f"cannot be read because {e}") + continue + except ValueError: if child.is_symlink(): report.path_ignored( @@ -3504,13 +3535,16 @@ def gen_python_files_in_dir( if child.is_dir(): normalized_path += "/" + exclude_match = exclude.search(normalized_path) if exclude_match and exclude_match.group(0): report.path_ignored(child, f"matches the --exclude regular expression") continue if child.is_dir(): - yield from gen_python_files_in_dir(child, root, include, exclude, report) + yield from gen_python_files_in_dir( + child, root, include, exclude, report, gitignore + ) elif child.is_file(): include_match = include.search(normalized_path) @@ -3656,14 +3690,15 @@ def _fixup_ast_constants( node: Union[ast.AST, ast3.AST, ast27.AST] ) -> Union[ast.AST, ast3.AST, ast27.AST]: """Map ast nodes deprecated in 3.8 to Constant.""" - # casts are required until this is released: - # https://github.com/python/typeshed/pull/3142 if isinstance(node, (ast.Str, ast3.Str, ast27.Str, ast.Bytes, ast3.Bytes)): - return cast(ast.AST, ast.Constant(value=node.s)) - elif isinstance(node, (ast.Num, ast3.Num, ast27.Num)): - return cast(ast.AST, ast.Constant(value=node.n)) - elif isinstance(node, (ast.NameConstant, ast3.NameConstant)): - return cast(ast.AST, ast.Constant(value=node.value)) + return ast.Constant(value=node.s) + + if isinstance(node, (ast.Num, ast3.Num, ast27.Num)): + return ast.Constant(value=node.n) + + if isinstance(node, (ast.NameConstant, ast3.NameConstant)): + return ast.Constant(value=node.value) + return node @@ -3703,6 +3738,7 @@ def assert_equivalent(src: str, dst: str) -> None: ): for item in item.elts: yield from _v(item, depth + 2) + elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)): yield from _v(item, depth + 2) @@ -3774,8 +3810,10 @@ def dump_to_file(*output: str) -> str: @contextmanager def nullcontext() -> Iterator[None]: - """Return context manager that does nothing. - Similar to `nullcontext` from python 3.7""" + """Return an empty context manager. + + To be used like `nullcontext` in Python 3.7. + """ yield @@ -4028,7 +4066,7 @@ def read_cache(mode: FileMode) -> Cache: with cache_file.open("rb") as fobj: try: cache: Cache = pickle.load(fobj) - except pickle.UnpicklingError: + except (pickle.UnpicklingError, ValueError): return {} return cache @@ -4063,7 +4101,7 @@ def write_cache(cache: Cache, sources: Iterable[Path], mode: FileMode) -> None: CACHE_DIR.mkdir(parents=True, exist_ok=True) new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}} with tempfile.NamedTemporaryFile(dir=str(cache_file.parent), delete=False) as f: - pickle.dump(new_cache, f, protocol=pickle.HIGHEST_PROTOCOL) + pickle.dump(new_cache, f, protocol=4) os.replace(f.name, cache_file) except OSError: pass