X-Git-Url: https://git.madduck.net/etc/vim.git/blobdiff_plain/4ca92ac91c944b252caf8c2265e5854f839a66f4..4310f39bd94c7fa29fc31079247d2aab4bd50f1a:/src/black/__init__.py?ds=inline diff --git a/src/black/__init__.py b/src/black/__init__.py index 96dd0e4..91f70d9 100644 --- a/src/black/__init__.py +++ b/src/black/__init__.py @@ -42,7 +42,6 @@ from typing import ( cast, TYPE_CHECKING, ) -from typing_extensions import Final from mypy_extensions import mypyc_attr from appdirs import user_cache_dir @@ -61,6 +60,11 @@ from blib2to3.pgen2.parse import ParseError from _black_version import version as __version__ +if sys.version_info < (3, 8): + from typing_extensions import Final +else: + from typing import Final + if TYPE_CHECKING: import colorama # noqa: F401 @@ -68,6 +72,7 @@ DEFAULT_LINE_LENGTH = 88 DEFAULT_EXCLUDES = r"/(\.direnv|\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist)/" # noqa: B950 DEFAULT_INCLUDES = r"\.pyi?$" CACHE_DIR = Path(user_cache_dir("black", version=__version__)) +STDIN_PLACEHOLDER = "__BLACK_STDIN_FILENAME__" STRING_PREFIX_CHARS: Final = "furbFURB" # All possible string prefix characters. @@ -112,6 +117,10 @@ class InvalidInput(ValueError): """Raised when input source code fails all parse attempts.""" +class BracketMatchError(KeyError): + """Raised when an opening bracket is unable to be matched to a closing bracket.""" + + T = TypeVar("T") E = TypeVar("E", bound=Exception) @@ -174,14 +183,12 @@ class TargetVersion(Enum): PY36 = 6 PY37 = 7 PY38 = 8 + PY39 = 9 def is_python2(self) -> bool: return self is TargetVersion.PY27 -PY36_VERSIONS = {TargetVersion.PY36, TargetVersion.PY37, TargetVersion.PY38} - - class Feature(Enum): # All string literals are unicode UNICODE_LITERALS = 1 @@ -195,6 +202,7 @@ class Feature(Enum): ASYNC_KEYWORDS = 7 ASSIGNMENT_EXPRESSIONS = 8 POS_ONLY_ARGUMENTS = 9 + RELAXED_DECORATORS = 10 FORCE_OPTIONAL_PARENTHESES = 50 @@ -233,6 +241,17 @@ VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = { Feature.ASSIGNMENT_EXPRESSIONS, Feature.POS_ONLY_ARGUMENTS, }, + TargetVersion.PY39: { + Feature.UNICODE_LITERALS, + Feature.F_STRINGS, + Feature.NUMERIC_UNDERSCORES, + Feature.TRAILING_COMMA_IN_CALL, + Feature.TRAILING_COMMA_IN_DEF, + Feature.ASYNC_KEYWORDS, + Feature.ASSIGNMENT_EXPRESSIONS, + Feature.RELAXED_DECORATORS, + Feature.POS_ONLY_ARGUMENTS, + }, } @@ -440,7 +459,16 @@ def target_version_option_callback( type=str, help=( "Like --exclude, but files and directories matching this regex will be " - "excluded even when they are passed explicitly as arguments" + "excluded even when they are passed explicitly as arguments." + ), +) +@click.option( + "--stdin-filename", + type=str, + help=( + "The name of the file when passing it through stdin. Useful to make " + "sure Black will respect --force-exclude option on some " + "editors that rely on using stdin." ), ) @click.option( @@ -502,6 +530,7 @@ def main( include: str, exclude: str, force_exclude: Optional[str], + stdin_filename: Optional[str], src: Tuple[str, ...], config: Optional[str], ) -> None: @@ -534,6 +563,7 @@ def main( exclude=exclude, force_exclude=force_exclude, report=report, + stdin_filename=stdin_filename, ) path_empty( @@ -573,6 +603,7 @@ def get_sources( exclude: str, force_exclude: Optional[str], report: "Report", + stdin_filename: Optional[str], ) -> Set[Path]: """Compute the set of files to be formatted.""" try: @@ -599,22 +630,14 @@ def get_sources( gitignore = get_gitignore(root) for s in src: - p = Path(s) - if p.is_dir(): - sources.update( - gen_python_files( - p.iterdir(), - root, - include_regex, - exclude_regex, - force_exclude_regex, - report, - gitignore, - ) - ) - elif s == "-": - sources.add(p) - elif p.is_file(): + if s == "-" and stdin_filename: + p = Path(stdin_filename) + is_stdin = True + else: + p = Path(s) + is_stdin = False + + if is_stdin or p.is_file(): normalized_path = normalize_path_maybe_ignore(p, root, report) if normalized_path is None: continue @@ -629,6 +652,23 @@ def get_sources( report.path_ignored(p, "matches the --force-exclude regular expression") continue + if is_stdin: + p = Path(f"{STDIN_PLACEHOLDER}{str(p)}") + + sources.add(p) + elif p.is_dir(): + sources.update( + gen_python_files( + p.iterdir(), + root, + include_regex, + exclude_regex, + force_exclude_regex, + report, + gitignore, + ) + ) + elif s == "-": sources.add(p) else: err(f"invalid path: {s}") @@ -656,12 +696,23 @@ def reformat_one( """ try: changed = Changed.NO - if not src.is_file() and str(src) == "-": + + if str(src) == "-": + is_stdin = True + elif str(src).startswith(STDIN_PLACEHOLDER): + is_stdin = True + # Use the original name again in case we want to print something + # to the user + src = Path(str(src)[len(STDIN_PLACEHOLDER) :]) + else: + is_stdin = False + + if is_stdin: if format_stdin_to_stdout(fast=fast, write_back=write_back, mode=mode): changed = Changed.YES else: cache: Cache = {} - if write_back != WriteBack.DIFF: + if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF): cache = read_cache(mode) res_src = src.resolve() if res_src in cache and cache[res_src] == get_cache_info(res_src): @@ -690,7 +741,7 @@ def reformat_many( worker_count = os.cpu_count() if sys.platform == "win32": # Work around https://bugs.python.org/issue26903 - worker_count = min(worker_count, 61) + worker_count = min(worker_count, 60) try: executor = ProcessPoolExecutor(max_workers=worker_count) except (ImportError, OSError): @@ -735,7 +786,7 @@ async def schedule_formatting( :func:`format_file_in_place`. """ cache: Cache = {} - if write_back != WriteBack.DIFF: + if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF): cache = read_cache(mode) sources, cached = filter_cached(cache, sources) for src in sorted(cached): @@ -746,7 +797,7 @@ async def schedule_formatting( cancelled = [] sources_to_cache = [] lock = None - if write_back == WriteBack.DIFF: + if write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF): # For diff output, we need locks to ensure we don't interleave output # from different processes. manager = Manager() @@ -845,9 +896,9 @@ def color_diff(contents: str) -> str: for i, line in enumerate(lines): if line.startswith("+++") or line.startswith("---"): line = "\033[1;37m" + line + "\033[0m" # bold white, reset - if line.startswith("@@"): + elif line.startswith("@@"): line = "\033[36m" + line + "\033[0m" # cyan, reset - if line.startswith("+"): + elif line.startswith("+"): line = "\033[32m" + line + "\033[0m" # green, reset elif line.startswith("-"): line = "\033[31m" + line + "\033[0m" # red, reset @@ -857,30 +908,22 @@ def color_diff(contents: str) -> str: def wrap_stream_for_windows( f: io.TextIOWrapper, -) -> Union[io.TextIOWrapper, "colorama.AnsiToWin32.AnsiToWin32"]: +) -> Union[io.TextIOWrapper, "colorama.AnsiToWin32"]: """ - Wrap the stream in colorama's wrap_stream so colors are shown on Windows. + Wrap stream with colorama's wrap_stream so colors are shown on Windows. - If `colorama` is not found, then no change is made. If `colorama` does - exist, then it handles the logic to determine whether or not to change - things. + If `colorama` is unavailable, the original stream is returned unmodified. + Otherwise, the `wrap_stream()` function determines whether the stream needs + to be wrapped for a Windows environment and will accordingly either return + an `AnsiToWin32` wrapper or the original stream. """ try: - from colorama import initialise - - # We set `strip=False` so that we can don't have to modify - # test_express_diff_with_color. - f = initialise.wrap_stream( - f, convert=None, strip=False, autoreset=False, wrap=True - ) - - # wrap_stream returns a `colorama.AnsiToWin32.AnsiToWin32` object - # which does not have a `detach()` method. So we fake one. - f.detach = lambda *args, **kwargs: None # type: ignore + from colorama.initialise import wrap_stream except ImportError: - pass - - return f + return f + else: + # Set `strip=False` to avoid needing to modify test_express_diff_with_color. + return wrap_stream(f, convert=None, strip=False, autoreset=False, wrap=True) def format_stdin_to_stdout( @@ -921,7 +964,7 @@ def format_stdin_to_stdout( def format_file_contents(src_contents: str, *, fast: bool, mode: Mode) -> FileContent: - """Reformat contents a file and return new contents. + """Reformat contents of a file and return new contents. If `fast` is False, additionally confirm that the reformatted code is valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it. @@ -947,7 +990,7 @@ def format_str(src_contents: str, *, mode: Mode) -> FileContent: allowed. Example: >>> import black - >>> print(black.format_str("def f(arg:str='')->None:...", mode=Mode())) + >>> print(black.format_str("def f(arg:str='')->None:...", mode=black.Mode())) def f(arg: str = "") -> None: ... @@ -1308,7 +1351,13 @@ class BracketTracker: self.maybe_decrement_after_lambda_arguments(leaf) if leaf.type in CLOSING_BRACKETS: self.depth -= 1 - opening_bracket = self.bracket_match.pop((self.depth, leaf.type)) + try: + opening_bracket = self.bracket_match.pop((self.depth, leaf.type)) + except KeyError as e: + raise BracketMatchError( + "Unable to match a closing bracket to the following opening" + f" bracket: {leaf}" + ) from e leaf.opening_bracket = opening_bracket if not leaf.value: self.invisible.append(leaf) @@ -1824,6 +1873,10 @@ class EmptyLineTracker: return 0, 0 if self.previous_line.is_decorator: + if self.is_pyi and current_line.is_stub_class: + # Insert an empty line after a decorated stub class + return 0, 1 + return 0, 0 if self.previous_line.depth < current_line.depth and ( @@ -1847,8 +1900,11 @@ class EmptyLineTracker: newlines = 0 else: newlines = 1 - elif current_line.is_def and not self.previous_line.is_def: - # Blank line between a block of functions and a block of non-functions + elif ( + current_line.is_def or current_line.is_decorator + ) and not self.previous_line.is_def: + # Blank line between a block of functions (maybe with preceding + # decorators) and a block of non-functions newlines = 1 else: newlines = 0 @@ -2049,7 +2105,6 @@ class LineGenerator(Visitor[Line]): if leaf.value[tail_len + 1] == docstring[-1]: docstring = docstring + " " leaf.value = leaf.value[0:lead_len] + docstring + leaf.value[tail_len:] - normalize_string_quotes(leaf) yield from self.visit_default(leaf) @@ -2168,6 +2223,9 @@ def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str: # noqa: C901 ): # Python 2 print chevron return NO + elif prevp.type == token.AT and p.parent and p.parent.type == syms.decorator: + # no space in decorators + return NO elif prev.type in OPENING_BRACKETS: return NO @@ -2669,9 +2727,9 @@ def transform_line( transformers = [ string_merge, string_paren_strip, + string_split, delimiter_split, standalone_comment_split, - string_split, string_paren_wrap, rhs, ] @@ -2890,11 +2948,8 @@ class StringMerger(CustomSplitMapMixin, StringTransformer): """StringTransformer that merges strings together. Requirements: - (A) The line contains adjacent strings such that at most one substring - has inline comments AND none of those inline comments are pragmas AND - the set of all substring prefixes is either of length 1 or equal to - {"", "f"} AND none of the substrings are raw strings (i.e. are prefixed - with 'r'). + (A) The line contains adjacent strings such that ALL of the validation checks + listed in StringMerger.__validate_msg(...)'s docstring pass. OR (B) The line contains a string which uses line continuation backslashes. @@ -3143,6 +3198,7 @@ class StringMerger(CustomSplitMapMixin, StringTransformer): * Ok(None), if ALL validation checks (listed below) pass. OR * Err(CannotTransform), if any of the following are true: + - The target string group does not contain ANY stand-alone comments. - The target string is not in a string group (i.e. it has no adjacent strings). - The string group has more than one inline comment. @@ -3151,6 +3207,26 @@ class StringMerger(CustomSplitMapMixin, StringTransformer): length greater than one and is not equal to {"", "f"}. - The string group consists of raw strings. """ + # We first check for "inner" stand-alone comments (i.e. stand-alone + # comments that have a string leaf before them AND after them). + for inc in [1, -1]: + i = string_idx + found_sa_comment = False + is_valid_index = is_valid_index_factory(line.leaves) + while is_valid_index(i) and line.leaves[i].type in [ + token.STRING, + STANDALONE_COMMENT, + ]: + if line.leaves[i].type == STANDALONE_COMMENT: + found_sa_comment = True + elif found_sa_comment: + return TErr( + "StringMerger does NOT merge string groups which contain " + "stand-alone comments." + ) + + i += inc + num_of_inline_string_comments = 0 set_of_prefixes = set() num_of_strings = 0 @@ -3199,7 +3275,8 @@ class StringParenStripper(StringTransformer): Requirements: The line contains a string which is surrounded by parentheses and: - - The target string is NOT the only argument to a function call). + - The target string is NOT the only argument to a function call. + - The target string is NOT a "pointless" string. - If the target string contains a PERCENT, the brackets are not preceeded or followed by an operator with higher precedence than PERCENT. @@ -3223,6 +3300,14 @@ class StringParenStripper(StringTransformer): if leaf.type != token.STRING: continue + # If this is a "pointless" string... + if ( + leaf.parent + and leaf.parent.parent + and leaf.parent.parent.type == syms.simple_stmt + ): + continue + # Should be preceded by a non-empty LPAR... if ( not is_valid_index(idx - 1) @@ -3307,10 +3392,17 @@ class StringParenStripper(StringTransformer): yield TErr( "Will not strip parentheses which have comments attached to them." ) + return new_line = line.clone() new_line.comments = line.comments.copy() - append_leaves(new_line, line, LL[: string_idx - 1]) + try: + append_leaves(new_line, line, LL[: string_idx - 1]) + except BracketMatchError: + # HACK: I believe there is currently a bug somewhere in + # right_hand_split() that is causing brackets to not be tracked + # properly by a shared BracketTracker. + append_leaves(new_line, line, LL[: string_idx - 1], preformatted=True) string_leaf = Leaf(token.STRING, LL[string_idx].value) LL[string_idx - 1].remove() @@ -3473,9 +3565,12 @@ class BaseStringSplitter(StringTransformer): # WMA4 a single space. offset += 1 - # WMA4 the lengths of any leaves that came before that space. - for leaf in LL[: p_idx + 1]: + # WMA4 the lengths of any leaves that came before that space, + # but after any closing bracket before that space. + for leaf in reversed(LL[: p_idx + 1]): offset += len(str(leaf)) + if leaf.type in CLOSING_BRACKETS: + break if is_valid_index(string_idx + 1): N = LL[string_idx + 1] @@ -3541,7 +3636,8 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter): MIN_SUBSTR_SIZE characters. The string will ONLY be split on spaces (i.e. each new substring should - start with a space). + start with a space). Note that the string will NOT be split on a space + which is escaped with a backslash. If the string is an f-string, it will NOT be split in the middle of an f-expression (e.g. in f"FooBar: {foo() if x else bar()}", {foo() if x @@ -3561,13 +3657,14 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter): MIN_SUBSTR_SIZE = 6 # Matches an "f-expression" (e.g. {var}) that might be found in an f-string. RE_FEXPR = r""" - (? TMatchResult: @@ -3881,11 +3978,23 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter): section of this classes' docstring would be be met by returning @i. """ is_space = string[i] == " " + + is_not_escaped = True + j = i - 1 + while is_valid_index(j) and string[j] == "\\": + is_not_escaped = not is_not_escaped + j -= 1 + is_big_enough = ( len(string[i:]) >= self.MIN_SUBSTR_SIZE and len(string[:i]) >= self.MIN_SUBSTR_SIZE ) - return is_space and is_big_enough and not breaks_fstring_expression(i) + return ( + is_space + and is_not_escaped + and is_big_enough + and not breaks_fstring_expression(i) + ) # First, we check all indices BELOW @max_break_idx. break_idx = max_break_idx @@ -4444,10 +4553,11 @@ def contains_pragma_comment(comment_list: List[Leaf]) -> bool: of the more common static analysis tools for python (e.g. mypy, flake8, pylint). """ - return any( - comment.value.startswith(("# type:", "# noqa", "# pylint:")) - for comment in comment_list - ) + for comment in comment_list: + if comment.value.startswith(("# type:", "# noqa", "# pylint:")): + return True + + return False def insert_str_child_factory(string_leaf: Leaf) -> Callable[[LN], None]: @@ -4580,7 +4690,9 @@ def line_to_string(line: Line) -> str: return str(line).strip("\n") -def append_leaves(new_line: Line, old_line: Line, leaves: List[Leaf]) -> None: +def append_leaves( + new_line: Line, old_line: Line, leaves: List[Leaf], preformatted: bool = False +) -> None: """ Append leaves (taken from @old_line) to @new_line, making sure to fix the underlying Node structure where appropriate. @@ -4596,7 +4708,7 @@ def append_leaves(new_line: Line, old_line: Line, leaves: List[Leaf]) -> None: for old_leaf in leaves: new_leaf = Leaf(old_leaf.type, old_leaf.value) replace_child(old_leaf, new_leaf) - new_line.append(new_leaf) + new_line.append(new_leaf, preformatted=preformatted) for comment_leaf in old_line.comments_after(old_leaf): new_line.append(comment_leaf, preformatted=True) @@ -5126,31 +5238,52 @@ def normalize_numeric_literal(leaf: Leaf) -> None: # Leave octal and binary literals alone. pass elif text.startswith("0x"): - # Change hex literals to upper case. - before, after = text[:2], text[2:] - text = f"{before}{after.upper()}" + text = format_hex(text) elif "e" in text: - before, after = text.split("e") - sign = "" - if after.startswith("-"): - after = after[1:] - sign = "-" - elif after.startswith("+"): - after = after[1:] - before = format_float_or_int_string(before) - text = f"{before}e{sign}{after}" + text = format_scientific_notation(text) elif text.endswith(("j", "l")): - number = text[:-1] - suffix = text[-1] - # Capitalize in "2L" because "l" looks too similar to "1". - if suffix == "l": - suffix = "L" - text = f"{format_float_or_int_string(number)}{suffix}" + text = format_long_or_complex_number(text) else: text = format_float_or_int_string(text) leaf.value = text +def format_hex(text: str) -> str: + """ + Formats a hexadecimal string like "0x12b3" + + Uses lowercase because of similarity between "B" and "8", which + can cause security issues. + see: https://github.com/psf/black/issues/1692 + """ + + before, after = text[:2], text[2:] + return f"{before}{after.lower()}" + + +def format_scientific_notation(text: str) -> str: + """Formats a numeric string utilizing scentific notation""" + before, after = text.split("e") + sign = "" + if after.startswith("-"): + after = after[1:] + sign = "-" + elif after.startswith("+"): + after = after[1:] + before = format_float_or_int_string(before) + return f"{before}e{sign}{after}" + + +def format_long_or_complex_number(text: str) -> str: + """Formats a long or complex string like `10L` or `10j`""" + number = text[:-1] + suffix = text[-1] + # Capitalize in "2L" because "l" looks too similar to "1". + if suffix == "l": + suffix = "L" + return f"{format_float_or_int_string(number)}{suffix}" + + def format_float_or_int_string(text: str) -> str: """Formats a float string like "1.0".""" if "." not in text: @@ -5190,9 +5323,9 @@ def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None: if check_lpar: if is_walrus_assignment(child): - continue + pass - if child.type == syms.atom: + elif child.type == syms.atom: if maybe_make_parens_invisible_in_atom(child, parent=node): wrap_in_parentheses(node, child, visible=False) elif is_one_tuple(child): @@ -5452,6 +5585,49 @@ def is_walrus_assignment(node: LN) -> bool: return inner is not None and inner.type == syms.namedexpr_test +def is_simple_decorator_trailer(node: LN, last: bool = False) -> bool: + """Return True iff `node` is a trailer valid in a simple decorator""" + return node.type == syms.trailer and ( + ( + len(node.children) == 2 + and node.children[0].type == token.DOT + and node.children[1].type == token.NAME + ) + # last trailer can be arguments + or ( + last + and len(node.children) == 3 + and node.children[0].type == token.LPAR + # and node.children[1].type == syms.argument + and node.children[2].type == token.RPAR + ) + ) + + +def is_simple_decorator_expression(node: LN) -> bool: + """Return True iff `node` could be a 'dotted name' decorator + + This function takes the node of the 'namedexpr_test' of the new decorator + grammar and test if it would be valid under the old decorator grammar. + + The old grammar was: decorator: @ dotted_name [arguments] NEWLINE + The new grammar is : decorator: @ namedexpr_test NEWLINE + """ + if node.type == token.NAME: + return True + if node.type == syms.power: + if node.children: + return ( + node.children[0].type == token.NAME + and all(map(is_simple_decorator_trailer, node.children[1:-1])) + and ( + len(node.children) < 2 + or is_simple_decorator_trailer(node.children[-1], last=True) + ) + ) + return False + + def is_yield(node: LN) -> bool: """Return True if `node` holds a `yield` or `yield from` expression.""" if node.type == syms.yield_expr: @@ -5637,6 +5813,8 @@ def get_features_used(node: Node) -> Set[Feature]: - underscores in numeric literals; - trailing commas after * or ** in function signatures and calls; - positional only arguments in function signatures and lambdas; + - assignment expression; + - relaxed decorator syntax; """ features: Set[Feature] = set() for n in node.pre_order(): @@ -5656,6 +5834,12 @@ def get_features_used(node: Node) -> Set[Feature]: elif n.type == token.COLONEQUAL: features.add(Feature.ASSIGNMENT_EXPRESSIONS) + elif n.type == syms.decorator: + if len(n.children) > 1 and not is_simple_decorator_expression( + n.children[1] + ): + features.add(Feature.RELAXED_DECORATORS) + elif ( n.type in {syms.typedargslist, syms.arglist} and n.children @@ -5831,7 +6015,8 @@ def normalize_path_maybe_ignore( `report` is where "path ignored" output goes. """ try: - normalized_path = path.resolve().relative_to(root).as_posix() + abspath = path if path.is_absolute() else Path.cwd() / path + normalized_path = abspath.resolve().relative_to(root).as_posix() except OSError as e: report.path_ignored(path, f"cannot be read because {e}") return None @@ -6626,13 +6811,33 @@ def is_docstring(leaf: Leaf) -> bool: return False +def lines_with_leading_tabs_expanded(s: str) -> List[str]: + """ + Splits string into lines and expands only leading tabs (following the normal + Python rules) + """ + lines = [] + for line in s.splitlines(): + # Find the index of the first non-whitespace character after a string of + # whitespace that includes at least one tab + match = re.match(r"\s*\t+\s*(\S)", line) + if match: + first_non_whitespace_idx = match.start(1) + + lines.append( + line[:first_non_whitespace_idx].expandtabs() + + line[first_non_whitespace_idx:] + ) + else: + lines.append(line) + return lines + + def fix_docstring(docstring: str, prefix: str) -> str: # https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation if not docstring: return "" - # Convert tabs to spaces (following the normal Python rules) - # and split into a list of lines: - lines = docstring.expandtabs().splitlines() + lines = lines_with_leading_tabs_expanded(docstring) # Determine minimum indentation (first line doesn't count): indent = sys.maxsize for line in lines[1:]: