X-Git-Url: https://git.madduck.net/etc/vim.git/blobdiff_plain/2082a325fdd14f0aabd88f7f12a20f9fb085c538..4938cc9e9abf2581cb154c6a8d1ae66eb18e0d65:/src/black/__init__.py?ds=inline diff --git a/src/black/__init__.py b/src/black/__init__.py index 3ab4bc7..2250943 100644 --- a/src/black/__init__.py +++ b/src/black/__init__.py @@ -65,7 +65,7 @@ if TYPE_CHECKING: import colorama # noqa: F401 DEFAULT_LINE_LENGTH = 88 -DEFAULT_EXCLUDES = r"/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist)/" # noqa: B950 +DEFAULT_EXCLUDES = r"/(\.direnv|\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist)/" # noqa: B950 DEFAULT_INCLUDES = r"\.pyi?$" CACHE_DIR = Path(user_cache_dir("black", version=__version__)) @@ -240,6 +240,7 @@ class Mode: target_versions: Set[TargetVersion] = field(default_factory=set) line_length: int = DEFAULT_LINE_LENGTH string_normalization: bool = True + experimental_string_processing: bool = False is_pyi: bool = False def get_cache_key(self) -> str: @@ -267,7 +268,7 @@ def supports_feature(target_versions: Set[TargetVersion], feature: Feature) -> b return all(feature in VERSION_TO_FEATURES[version] for version in target_versions) -def find_pyproject_toml(path_search_start: str) -> Optional[str]: +def find_pyproject_toml(path_search_start: Iterable[str]) -> Optional[str]: """Find the absolute filepath to a pyproject.toml if it exists""" path_project_root = find_project_root(path_search_start) path_pyproject_toml = path_project_root / "pyproject.toml" @@ -306,11 +307,19 @@ def read_pyproject_toml( if not config: return None + else: + # Sanitize the values to be Click friendly. For more information please see: + # https://github.com/psf/black/issues/1458 + # https://github.com/pallets/click/issues/1567 + config = { + k: str(v) if not isinstance(v, (list, dict)) else v + for k, v in config.items() + } target_version = config.get("target_version") if target_version is not None and not isinstance(target_version, list): raise click.BadOptionUsage( - "target-version", f"Config key target-version must be a list" + "target-version", "Config key target-version must be a list" ) default_map: Dict[str, Any] = {} @@ -368,6 +377,15 @@ def target_version_option_callback( is_flag=True, help="Don't normalize string quotes or prefixes.", ) +@click.option( + "--experimental-string-processing", + is_flag=True, + hidden=True, + help=( + "Experimental option that performs more normalization on string literals." + " Currently disabled because it leads to some crashes." + ), +) @click.option( "--check", is_flag=True, @@ -463,7 +481,7 @@ def target_version_option_callback( ), is_eager=True, callback=read_pyproject_toml, - help="Read configuration from PATH.", + help="Read configuration from FILE path.", ) @click.pass_context def main( @@ -477,6 +495,7 @@ def main( fast: bool, pyi: bool, skip_string_normalization: bool, + experimental_string_processing: bool, quiet: bool, verbose: bool, include: str, @@ -497,6 +516,7 @@ def main( line_length=line_length, is_pyi=pyi, string_normalization=not skip_string_normalization, + experimental_string_processing=experimental_string_processing, ) if config and verbose: out(f"Using configuration from {config}.", bold=False, fg="blue") @@ -575,9 +595,7 @@ def get_sources( root = find_project_root(src) sources: Set[Path] = set() path_empty(src, "No Path provided. Nothing to do 😴", quiet, verbose, ctx) - exclude_regexes = [exclude_regex] - if force_exclude_regex is not None: - exclude_regexes.append(force_exclude_regex) + gitignore = get_gitignore(root) for s in src: p = Path(s) @@ -587,19 +605,30 @@ def get_sources( p.iterdir(), root, include_regex, - exclude_regexes, + exclude_regex, + force_exclude_regex, report, - get_gitignore(root), + gitignore, ) ) elif s == "-": sources.add(p) elif p.is_file(): - sources.update( - gen_python_files( - [p], root, None, exclude_regexes, report, get_gitignore(root) - ) - ) + normalized_path = normalize_path_maybe_ignore(p, root, report) + if normalized_path is None: + continue + + normalized_path = "/" + normalized_path + # Hard-exclude any files that matches the `--force-exclude` regex. + if force_exclude_regex: + force_exclude_match = force_exclude_regex.search(normalized_path) + else: + force_exclude_match = None + if force_exclude_match and force_exclude_match.group(0): + report.path_ignored(p, "matches the --force-exclude regular expression") + continue + + sources.add(p) else: err(f"invalid path: {s}") return sources @@ -647,6 +676,8 @@ def reformat_one( write_cache(cache, [src], mode) report.done(src, changed) except Exception as exc: + if report.verbose: + traceback.print_exc() report.failed(src, str(exc)) @@ -662,9 +693,9 @@ def reformat_many( worker_count = min(worker_count, 61) try: executor = ProcessPoolExecutor(max_workers=worker_count) - except OSError: + except (ImportError, OSError): # we arrive here if the underlying system does not support multi-processing - # like in AWS Lambda, in which case we gracefully fallback to + # like in AWS Lambda or Termux, in which case we gracefully fallback to # a ThreadPollExecutor with just a single worker (more workers would not do us # any good due to the Global Interpreter Lock) executor = ThreadPoolExecutor(max_workers=1) @@ -921,6 +952,7 @@ def format_str(src_contents: str, *, mode: Mode) -> FileContent: ... A more complex example: + >>> print( ... black.format_str( ... "def f(arg:str='')->None: hey", @@ -965,10 +997,7 @@ def format_str(src_contents: str, *, mode: Mode) -> FileContent: before, after = elt.maybe_empty_lines(current_line) dst_contents.append(str(empty_line) * before) for line in transform_line( - current_line, - line_length=mode.line_length, - normalize_strings=mode.string_normalization, - features=split_line_features, + current_line, mode=mode, features=split_line_features ): dst_contents.append(str(line)) return "".join(dst_contents) @@ -1465,69 +1494,6 @@ class Line: Leaf(token.DOT, ".") for _ in range(3) ] - @property - def is_collection_with_optional_trailing_comma(self) -> bool: - """Is this line a collection literal with a trailing comma that's optional? - - Note that the trailing comma in a 1-tuple is not optional. - """ - if not self.leaves or len(self.leaves) < 4: - return False - - # Look for and address a trailing colon. - if self.leaves[-1].type == token.COLON: - closer = self.leaves[-2] - close_index = -2 - else: - closer = self.leaves[-1] - close_index = -1 - if closer.type not in CLOSING_BRACKETS or self.inside_brackets: - return False - - if closer.type == token.RPAR: - # Tuples require an extra check, because if there's only - # one element in the tuple removing the comma unmakes the - # tuple. - # - # We also check for parens before looking for the trailing - # comma because in some cases (eg assigning a dict - # literal) the literal gets wrapped in temporary parens - # during parsing. This case is covered by the - # collections.py test data. - opener = closer.opening_bracket - for _open_index, leaf in enumerate(self.leaves): - if leaf is opener: - break - - else: - # Couldn't find the matching opening paren, play it safe. - return False - - commas = 0 - comma_depth = self.leaves[close_index - 1].bracket_depth - for leaf in self.leaves[_open_index + 1 : close_index]: - if leaf.bracket_depth == comma_depth and leaf.type == token.COMMA: - commas += 1 - if commas > 1: - # We haven't looked yet for the trailing comma because - # we might also have caught noop parens. - return self.leaves[close_index - 1].type == token.COMMA - - elif commas == 1: - return False # it's either a one-tuple or didn't have a trailing comma - - if self.leaves[close_index - 1].type in CLOSING_BRACKETS: - close_index -= 1 - closer = self.leaves[close_index] - if closer.type == token.RPAR: - # TODO: this is a gut feeling. Will we ever see this? - return False - - if self.leaves[close_index - 1].type != token.COMMA: - return False - - return True - @property def is_def(self) -> bool: """Is this a function definition? (Also returns True for async defs.)""" @@ -1633,8 +1599,10 @@ class Line: # one line in the original code. # Grab the first and last line numbers, skipping generated leaves - first_line = next((l.lineno for l in self.leaves if l.lineno != 0), 0) - last_line = next((l.lineno for l in reversed(self.leaves) if l.lineno != 0), 0) + first_line = next((leaf.lineno for leaf in self.leaves if leaf.lineno != 0), 0) + last_line = next( + (leaf.lineno for leaf in reversed(self.leaves) if leaf.lineno != 0), 0 + ) if first_line == last_line: # We look at the last two leaves since a comma or an @@ -1652,40 +1620,60 @@ class Line: def maybe_remove_trailing_comma(self, closing: Leaf) -> bool: """Remove trailing comma if there is one and it's safe.""" - if not (self.leaves and self.leaves[-1].type == token.COMMA): - return False - - # We remove trailing commas only in the case of importing a - # single name from a module. if not ( self.leaves - and self.is_import - and len(self.leaves) > 4 and self.leaves[-1].type == token.COMMA and closing.type in CLOSING_BRACKETS - and self.leaves[-4].type == token.NAME - and ( - # regular `from foo import bar,` - self.leaves[-4].value == "import" - # `from foo import (bar as baz,) - or ( - len(self.leaves) > 6 - and self.leaves[-6].value == "import" - and self.leaves[-3].value == "as" - ) - # `from foo import bar as baz,` - or ( - len(self.leaves) > 5 - and self.leaves[-5].value == "import" - and self.leaves[-3].value == "as" - ) - ) - and closing.type == token.RPAR ): return False - self.remove_trailing_comma() - return True + if closing.type == token.RBRACE: + self.remove_trailing_comma() + return True + + if closing.type == token.RSQB: + comma = self.leaves[-1] + if comma.parent and comma.parent.type == syms.listmaker: + self.remove_trailing_comma() + return True + + # For parens let's check if it's safe to remove the comma. + # Imports are always safe. + if self.is_import: + self.remove_trailing_comma() + return True + + # Otherwise, if the trailing one is the only one, we might mistakenly + # change a tuple into a different type by removing the comma. + depth = closing.bracket_depth + 1 + commas = 0 + opening = closing.opening_bracket + for _opening_index, leaf in enumerate(self.leaves): + if leaf is opening: + break + + else: + return False + + for leaf in self.leaves[_opening_index + 1 :]: + if leaf is closing: + break + + bracket_depth = leaf.bracket_depth + if bracket_depth == depth and leaf.type == token.COMMA: + commas += 1 + if leaf.parent and leaf.parent.type in { + syms.arglist, + syms.typedargslist, + }: + commas += 1 + break + + if commas > 1: + self.remove_trailing_comma() + return True + + return False def append_comment(self, comment: Leaf) -> bool: """Add an inline or standalone comment to the line.""" @@ -2628,10 +2616,7 @@ def make_comment(content: str) -> str: def transform_line( - line: Line, - line_length: int, - normalize_strings: bool, - features: Collection[Feature] = (), + line: Line, mode: Mode, features: Collection[Feature] = () ) -> Iterator[Line]: """Transform a `line`, potentially splitting it into many lines. @@ -2647,7 +2632,7 @@ def transform_line( def init_st(ST: Type[StringTransformer]) -> StringTransformer: """Initialize StringTransformer""" - return ST(line_length, normalize_strings) + return ST(mode.line_length, mode.string_normalization) string_merge = init_st(StringMerger) string_paren_strip = init_st(StringParenStripper) @@ -2658,23 +2643,27 @@ def transform_line( if ( not line.contains_uncollapsable_type_comments() and not line.should_explode - and not line.is_collection_with_optional_trailing_comma and ( - is_line_short_enough(line, line_length=line_length, line_str=line_str) + is_line_short_enough(line, line_length=mode.line_length, line_str=line_str) or line.contains_unsplittable_type_ignore() ) and not (line.contains_standalone_comments() and line.inside_brackets) ): # Only apply basic string preprocessing, since lines shouldn't be split here. - transformers = [string_merge, string_paren_strip] + if mode.experimental_string_processing: + transformers = [string_merge, string_paren_strip] + else: + transformers = [] elif line.is_def: transformers = [left_hand_split] else: def rhs(line: Line, features: Collection[Feature]) -> Iterator[Line]: - for omit in generate_trailers_to_omit(line, line_length): - lines = list(right_hand_split(line, line_length, features, omit=omit)) - if is_line_short_enough(lines[0], line_length=line_length): + for omit in generate_trailers_to_omit(line, mode.line_length): + lines = list( + right_hand_split(line, mode.line_length, features, omit=omit) + ) + if is_line_short_enough(lines[0], line_length=mode.line_length): yield from lines return @@ -2685,24 +2674,30 @@ def transform_line( # See #762 and #781 for the full story. yield from right_hand_split(line, line_length=1, features=features) - if line.inside_brackets: - transformers = [ - string_merge, - string_paren_strip, - delimiter_split, - standalone_comment_split, - string_split, - string_paren_wrap, - rhs, - ] + if mode.experimental_string_processing: + if line.inside_brackets: + transformers = [ + string_merge, + string_paren_strip, + delimiter_split, + standalone_comment_split, + string_split, + string_paren_wrap, + rhs, + ] + else: + transformers = [ + string_merge, + string_paren_strip, + string_split, + string_paren_wrap, + rhs, + ] else: - transformers = [ - string_merge, - string_paren_strip, - string_split, - string_paren_wrap, - rhs, - ] + if line.inside_brackets: + transformers = [delimiter_split, standalone_comment_split, rhs] + else: + transformers = [rhs] for transform in transformers: # We are accumulating lines in `result` because we might want to abort @@ -2710,19 +2705,14 @@ def transform_line( # split altogether. result: List[Line] = [] try: - for l in transform(line, features): - if str(l).strip("\n") == line_str: + for transformed_line in transform(line, features): + if str(transformed_line).strip("\n") == line_str: raise CannotTransform( "Line transformer returned an unchanged result" ) result.extend( - transform_line( - l, - line_length=line_length, - normalize_strings=normalize_strings, - features=features, - ) + transform_line(transformed_line, mode=mode, features=features) ) except CannotTransform: continue @@ -2752,7 +2742,7 @@ class StringTransformer(ABC): StringTransformer to be applied? Transformations: - If the given Line meets all of the above requirments, which string + If the given Line meets all of the above requirements, which string transformations can you expect to be applied to it by this StringTransformer? @@ -3228,7 +3218,9 @@ class StringParenStripper(StringTransformer): Requirements: The line contains a string which is surrounded by parentheses and: - The target string is NOT the only argument to a function call). - - The RPAR is NOT followed by an attribute access (i.e. a dot). + - If the target string contains a PERCENT, the brackets are not + preceeded or followed by an operator with higher precedence than + PERCENT. Transformations: The parentheses mentioned in the 'Requirements' section are stripped. @@ -3271,14 +3263,51 @@ class StringParenStripper(StringTransformer): string_parser = StringParser() next_idx = string_parser.parse(LL, string_idx) + # if the leaves in the parsed string include a PERCENT, we need to + # make sure the initial LPAR is NOT preceded by an operator with + # higher or equal precedence to PERCENT + if is_valid_index(idx - 2): + # mypy can't quite follow unless we name this + before_lpar = LL[idx - 2] + if token.PERCENT in {leaf.type for leaf in LL[idx - 1 : next_idx]} and ( + ( + before_lpar.type + in { + token.STAR, + token.AT, + token.SLASH, + token.DOUBLESLASH, + token.PERCENT, + token.TILDE, + token.DOUBLESTAR, + token.AWAIT, + token.LSQB, + token.LPAR, + } + ) + or ( + # only unary PLUS/MINUS + before_lpar.parent + and before_lpar.parent.type == syms.factor + and (before_lpar.type in {token.PLUS, token.MINUS}) + ) + ): + continue + # Should be followed by a non-empty RPAR... if ( is_valid_index(next_idx) and LL[next_idx].type == token.RPAR and not is_empty_rpar(LL[next_idx]) ): - # That RPAR should NOT be followed by a '.' symbol. - if is_valid_index(next_idx + 1) and LL[next_idx + 1].type == token.DOT: + # That RPAR should NOT be followed by anything with higher + # precedence than PERCENT + if is_valid_index(next_idx + 1) and LL[next_idx + 1].type in { + token.DOUBLESTAR, + token.LSQB, + token.LPAR, + token.DOT, + }: continue return Ok(string_idx) @@ -3308,7 +3337,7 @@ class StringParenStripper(StringTransformer): new_line.append(string_leaf) append_leaves( - new_line, line, LL[string_idx + 1 : rpar_idx] + LL[rpar_idx + 1 :], + new_line, line, LL[string_idx + 1 : rpar_idx] + LL[rpar_idx + 1 :] ) LL[rpar_idx].remove() @@ -3611,7 +3640,7 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter): # We MAY choose to drop the 'f' prefix from substrings that don't # contain any f-expressions, but ONLY if the original f-string - # containes at least one f-expression. Otherwise, we will alter the AST + # contains at least one f-expression. Otherwise, we will alter the AST # of the program. drop_pointless_f_prefix = ("f" in prefix) and re.search( self.RE_FEXPR, LL[string_idx].value, re.VERBOSE @@ -3701,7 +3730,7 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter): max_bidx = max_break_idx - 2 if line_needs_plus() else max_break_idx maybe_break_idx = self.__get_break_idx(rest_value, max_bidx) if maybe_break_idx is None: - # If we are unable to algorthmically determine a good split + # If we are unable to algorithmically determine a good split # and this string has custom splits registered to it, we # fall back to using them--which means we have to start # over from the beginning. @@ -4278,7 +4307,7 @@ class StringParenWrapper(CustomSplitMapMixin, BaseStringSplitter): class StringParser: """ A state machine that aids in parsing a string's "trailer", which can be - either non-existant, an old-style formatting sequence (e.g. `% varX` or `% + either non-existent, an old-style formatting sequence (e.g. `% varX` or `% (varX, varY)`), or a method-call / attribute access (e.g. `.format(varX, varY)`). @@ -4372,7 +4401,7 @@ class StringParser: was directly after the string leaf in question (e.g. if our target string is `line.leaves[i]` then the first call to this method must be `line.leaves[i + 1]`). - * On the next call to this function, the leaf paramater passed in + * On the next call to this function, the leaf parameter passed in MUST be the leaf directly following @leaf. Returns: @@ -4577,15 +4606,13 @@ def append_leaves(new_line: Line, old_line: Line, leaves: List[Leaf]) -> None: All of the leaves in @leaves are duplicated. The duplicates are then appended to @new_line and used to replace their originals in the underlying - Node structure. Any comments attatched to the old leaves are reattached to + Node structure. Any comments attached to the old leaves are reattached to the new leaves. Pre-conditions: set(@leaves) is a subset of set(@old_line.leaves). """ for old_leaf in leaves: - assert old_leaf in old_line.leaves - new_leaf = Leaf(old_leaf.type, old_leaf.value) replace_child(old_leaf, new_leaf) new_line.append(new_leaf) @@ -4836,7 +4863,7 @@ def bracket_split_build_line( no_commas = ( original.is_def and opening_bracket.value == "(" - and not any(l.type == token.COMMA for l in leaves) + and not any(leaf.type == token.COMMA for leaf in leaves) ) if original.is_import or no_commas: @@ -4866,9 +4893,9 @@ def dont_increase_indentation(split_func: Transformer) -> Transformer: @wraps(split_func) def split_wrapper(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]: - for l in split_func(line, features): - normalize_prefix(l.leaves[0], inside_brackets=True) - yield l + for line in split_func(line, features): + normalize_prefix(line.leaves[0], inside_brackets=True) + yield line return split_wrapper @@ -5749,16 +5776,40 @@ def get_gitignore(root: Path) -> PathSpec: return PathSpec.from_lines("gitwildmatch", lines) +def normalize_path_maybe_ignore( + path: Path, root: Path, report: "Report" +) -> Optional[str]: + """Normalize `path`. May return `None` if `path` was ignored. + + `report` is where "path ignored" output goes. + """ + try: + normalized_path = path.resolve().relative_to(root).as_posix() + except OSError as e: + report.path_ignored(path, f"cannot be read because {e}") + return None + + except ValueError: + if path.is_symlink(): + report.path_ignored(path, f"is a symbolic link that points outside {root}") + return None + + raise + + return normalized_path + + def gen_python_files( paths: Iterable[Path], root: Path, include: Optional[Pattern[str]], - exclude_regexes: Iterable[Pattern[str]], + exclude: Pattern[str], + force_exclude: Optional[Pattern[str]], report: "Report", gitignore: PathSpec, ) -> Iterator[Path]: """Generate all files under `path` whose paths are not excluded by the - `exclude` regex, but are included by the `include` regex. + `exclude_regex` or `force_exclude` regexes, but are included by the `include` regex. Symbolic links pointing outside of the `root` directory are ignored. @@ -5766,43 +5817,41 @@ def gen_python_files( """ assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}" for child in paths: - # Then ignore with `exclude` option. - try: - normalized_path = child.resolve().relative_to(root).as_posix() - except OSError as e: - report.path_ignored(child, f"cannot be read because {e}") + normalized_path = normalize_path_maybe_ignore(child, root, report) + if normalized_path is None: continue - except ValueError: - if child.is_symlink(): - report.path_ignored( - child, f"is a symbolic link that points outside {root}" - ) - continue - - raise # First ignore files matching .gitignore if gitignore.match_file(normalized_path): report.path_ignored(child, "matches the .gitignore file content") continue + # Then ignore with `--exclude` and `--force-exclude` options. normalized_path = "/" + normalized_path if child.is_dir(): normalized_path += "/" - is_excluded = False - for exclude in exclude_regexes: - exclude_match = exclude.search(normalized_path) if exclude else None - if exclude_match and exclude_match.group(0): - report.path_ignored(child, "matches the --exclude regular expression") - is_excluded = True - break - if is_excluded: + exclude_match = exclude.search(normalized_path) if exclude else None + if exclude_match and exclude_match.group(0): + report.path_ignored(child, "matches the --exclude regular expression") + continue + + force_exclude_match = ( + force_exclude.search(normalized_path) if force_exclude else None + ) + if force_exclude_match and force_exclude_match.group(0): + report.path_ignored(child, "matches the --force-exclude regular expression") continue if child.is_dir(): yield from gen_python_files( - child.iterdir(), root, include, exclude_regexes, report, gitignore + child.iterdir(), + root, + include, + exclude, + force_exclude, + report, + gitignore, ) elif child.is_file(): @@ -5815,8 +5864,8 @@ def gen_python_files( def find_project_root(srcs: Iterable[str]) -> Path: """Return a directory containing .git, .hg, or pyproject.toml. - That directory can be one of the directories passed in `srcs` or their - common parent. + That directory will be a common parent of all files and directories + passed in `srcs`. If no directory in the tree contains a marker that would specify it's the project root, the root of the file system is returned. @@ -5824,11 +5873,20 @@ def find_project_root(srcs: Iterable[str]) -> Path: if not srcs: return Path("/").resolve() - common_base = min(Path(src).resolve() for src in srcs) - if common_base.is_dir(): - # Append a fake file so `parents` below returns `common_base_dir`, too. - common_base /= "fake-file" - for directory in common_base.parents: + path_srcs = [Path(Path.cwd(), src).resolve() for src in srcs] + + # A list of lists of parents for each 'src'. 'src' is included as a + # "parent" of itself if it is a directory + src_parents = [ + list(path.parents) + ([path] if path.is_dir() else []) for path in path_srcs + ] + + common_base = max( + set.intersection(*(set(parents) for parents in src_parents)), + key=lambda path: path.parts, + ) + + for directory in (common_base, *common_base.parents): if (directory / ".git").exists(): return directory @@ -6006,13 +6064,14 @@ def _stringify_ast( else: # Constant strings may be indented across newlines, if they are - # docstrings; fold spaces after newlines when comparing + # docstrings; fold spaces after newlines when comparing. Similarly, + # trailing and leading space may be removed. if ( isinstance(node, ast.Constant) and field == "value" and isinstance(value, str) ): - normalized = re.sub(r"\n[ \t]+", "\n ", value) + normalized = re.sub(r" *\n[ \t]*", "\n", value).strip() else: normalized = value yield f"{' ' * (depth+2)}{normalized!r}, # {value.__class__.__name__}"