X-Git-Url: https://git.madduck.net/etc/vim.git/blobdiff_plain/8842c5ffa888aab2e4961fc54db45e7f3ca32ad9..bc71685d229baa28fdee8bb1928b82e9f632e0a4:/src/black/__init__.py?ds=inline diff --git a/src/black/__init__.py b/src/black/__init__.py index 349da1e..8d0c70f 100644 --- a/src/black/__init__.py +++ b/src/black/__init__.py @@ -240,6 +240,7 @@ class Mode: target_versions: Set[TargetVersion] = field(default_factory=set) line_length: int = DEFAULT_LINE_LENGTH string_normalization: bool = True + experimental_string_processing: bool = False is_pyi: bool = False def get_cache_key(self) -> str: @@ -376,6 +377,15 @@ def target_version_option_callback( is_flag=True, help="Don't normalize string quotes or prefixes.", ) +@click.option( + "--experimental-string-processing", + is_flag=True, + hidden=True, + help=( + "Experimental option that performs more normalization on string literals." + " Currently disabled because it leads to some crashes." + ), +) @click.option( "--check", is_flag=True, @@ -485,6 +495,7 @@ def main( fast: bool, pyi: bool, skip_string_normalization: bool, + experimental_string_processing: bool, quiet: bool, verbose: bool, include: str, @@ -505,6 +516,7 @@ def main( line_length=line_length, is_pyi=pyi, string_normalization=not skip_string_normalization, + experimental_string_processing=experimental_string_processing, ) if config and verbose: out(f"Using configuration from {config}.", bold=False, fg="blue") @@ -940,6 +952,7 @@ def format_str(src_contents: str, *, mode: Mode) -> FileContent: ... A more complex example: + >>> print( ... black.format_str( ... "def f(arg:str='')->None: hey", @@ -984,10 +997,7 @@ def format_str(src_contents: str, *, mode: Mode) -> FileContent: before, after = elt.maybe_empty_lines(current_line) dst_contents.append(str(empty_line) * before) for line in transform_line( - current_line, - line_length=mode.line_length, - normalize_strings=mode.string_normalization, - features=split_line_features, + current_line, mode=mode, features=split_line_features ): dst_contents.append(str(line)) return "".join(dst_contents) @@ -1432,7 +1442,8 @@ class Line: ) if self.inside_brackets or not preformatted: self.bracket_tracker.mark(leaf) - self.maybe_remove_trailing_comma(leaf) + if self.maybe_should_explode(leaf): + self.should_explode = True if not self.append_comment(leaf): self.leaves.append(leaf) @@ -1484,69 +1495,6 @@ class Line: Leaf(token.DOT, ".") for _ in range(3) ] - @property - def is_collection_with_optional_trailing_comma(self) -> bool: - """Is this line a collection literal with a trailing comma that's optional? - - Note that the trailing comma in a 1-tuple is not optional. - """ - if not self.leaves or len(self.leaves) < 4: - return False - - # Look for and address a trailing colon. - if self.leaves[-1].type == token.COLON: - closer = self.leaves[-2] - close_index = -2 - else: - closer = self.leaves[-1] - close_index = -1 - if closer.type not in CLOSING_BRACKETS or self.inside_brackets: - return False - - if closer.type == token.RPAR: - # Tuples require an extra check, because if there's only - # one element in the tuple removing the comma unmakes the - # tuple. - # - # We also check for parens before looking for the trailing - # comma because in some cases (eg assigning a dict - # literal) the literal gets wrapped in temporary parens - # during parsing. This case is covered by the - # collections.py test data. - opener = closer.opening_bracket - for _open_index, leaf in enumerate(self.leaves): - if leaf is opener: - break - - else: - # Couldn't find the matching opening paren, play it safe. - return False - - commas = 0 - comma_depth = self.leaves[close_index - 1].bracket_depth - for leaf in self.leaves[_open_index + 1 : close_index]: - if leaf.bracket_depth == comma_depth and leaf.type == token.COMMA: - commas += 1 - if commas > 1: - # We haven't looked yet for the trailing comma because - # we might also have caught noop parens. - return self.leaves[close_index - 1].type == token.COMMA - - elif commas == 1: - return False # it's either a one-tuple or didn't have a trailing comma - - if self.leaves[close_index - 1].type in CLOSING_BRACKETS: - close_index -= 1 - closer = self.leaves[close_index] - if closer.type == token.RPAR: - # TODO: this is a gut feeling. Will we ever see this? - return False - - if self.leaves[close_index - 1].type != token.COMMA: - return False - - return True - @property def is_def(self) -> bool: """Is this a function definition? (Also returns True for async defs.)""" @@ -1671,42 +1619,29 @@ class Line: def contains_multiline_strings(self) -> bool: return any(is_multiline_string(leaf) for leaf in self.leaves) - def maybe_remove_trailing_comma(self, closing: Leaf) -> bool: - """Remove trailing comma if there is one and it's safe.""" - if not (self.leaves and self.leaves[-1].type == token.COMMA): - return False - - # We remove trailing commas only in the case of importing a - # single name from a module. + def maybe_should_explode(self, closing: Leaf) -> bool: + """Return True if this line should explode (always be split), that is when: + - there's a pre-existing trailing comma here; and + - it's not a one-tuple. + """ if not ( - self.leaves - and self.is_import - and len(self.leaves) > 4 + closing.type in CLOSING_BRACKETS + and self.leaves and self.leaves[-1].type == token.COMMA - and closing.type in CLOSING_BRACKETS - and self.leaves[-4].type == token.NAME - and ( - # regular `from foo import bar,` - self.leaves[-4].value == "import" - # `from foo import (bar as baz,) - or ( - len(self.leaves) > 6 - and self.leaves[-6].value == "import" - and self.leaves[-3].value == "as" - ) - # `from foo import bar as baz,` - or ( - len(self.leaves) > 5 - and self.leaves[-5].value == "import" - and self.leaves[-3].value == "as" - ) - ) - and closing.type == token.RPAR + and not self.leaves[-1].was_checked # pre-existing ): return False - self.remove_trailing_comma() - return True + if closing.type in {token.RBRACE, token.RSQB}: + return True + + if self.is_import: + return True + + if not is_one_tuple_between(closing.opening_bracket, closing, self.leaves): + return True + + return False def append_comment(self, comment: Leaf) -> bool: """Add an inline or standalone comment to the line.""" @@ -2649,10 +2584,7 @@ def make_comment(content: str) -> str: def transform_line( - line: Line, - line_length: int, - normalize_strings: bool, - features: Collection[Feature] = (), + line: Line, mode: Mode, features: Collection[Feature] = () ) -> Iterator[Line]: """Transform a `line`, potentially splitting it into many lines. @@ -2668,7 +2600,7 @@ def transform_line( def init_st(ST: Type[StringTransformer]) -> StringTransformer: """Initialize StringTransformer""" - return ST(line_length, normalize_strings) + return ST(mode.line_length, mode.string_normalization) string_merge = init_st(StringMerger) string_paren_strip = init_st(StringParenStripper) @@ -2679,23 +2611,27 @@ def transform_line( if ( not line.contains_uncollapsable_type_comments() and not line.should_explode - and not line.is_collection_with_optional_trailing_comma and ( - is_line_short_enough(line, line_length=line_length, line_str=line_str) + is_line_short_enough(line, line_length=mode.line_length, line_str=line_str) or line.contains_unsplittable_type_ignore() ) - and not (line.contains_standalone_comments() and line.inside_brackets) + and not (line.inside_brackets and line.contains_standalone_comments()) ): # Only apply basic string preprocessing, since lines shouldn't be split here. - transformers = [string_merge, string_paren_strip] + if mode.experimental_string_processing: + transformers = [string_merge, string_paren_strip] + else: + transformers = [] elif line.is_def: transformers = [left_hand_split] else: def rhs(line: Line, features: Collection[Feature]) -> Iterator[Line]: - for omit in generate_trailers_to_omit(line, line_length): - lines = list(right_hand_split(line, line_length, features, omit=omit)) - if is_line_short_enough(lines[0], line_length=line_length): + for omit in generate_trailers_to_omit(line, mode.line_length): + lines = list( + right_hand_split(line, mode.line_length, features, omit=omit) + ) + if is_line_short_enough(lines[0], line_length=mode.line_length): yield from lines return @@ -2706,24 +2642,30 @@ def transform_line( # See #762 and #781 for the full story. yield from right_hand_split(line, line_length=1, features=features) - if line.inside_brackets: - transformers = [ - string_merge, - string_paren_strip, - delimiter_split, - standalone_comment_split, - string_split, - string_paren_wrap, - rhs, - ] + if mode.experimental_string_processing: + if line.inside_brackets: + transformers = [ + string_merge, + string_paren_strip, + delimiter_split, + standalone_comment_split, + string_split, + string_paren_wrap, + rhs, + ] + else: + transformers = [ + string_merge, + string_paren_strip, + string_split, + string_paren_wrap, + rhs, + ] else: - transformers = [ - string_merge, - string_paren_strip, - string_split, - string_paren_wrap, - rhs, - ] + if line.inside_brackets: + transformers = [delimiter_split, standalone_comment_split, rhs] + else: + transformers = [rhs] for transform in transformers: # We are accumulating lines in `result` because we might want to abort @@ -2738,12 +2680,7 @@ def transform_line( ) result.extend( - transform_line( - transformed_line, - line_length=line_length, - normalize_strings=normalize_strings, - features=features, - ) + transform_line(transformed_line, mode=mode, features=features) ) except CannotTransform: continue @@ -3249,7 +3186,9 @@ class StringParenStripper(StringTransformer): Requirements: The line contains a string which is surrounded by parentheses and: - The target string is NOT the only argument to a function call). - - The RPAR is NOT followed by an attribute access (i.e. a dot). + - If the target string contains a PERCENT, the brackets are not + preceeded or followed by an operator with higher precedence than + PERCENT. Transformations: The parentheses mentioned in the 'Requirements' section are stripped. @@ -3292,14 +3231,51 @@ class StringParenStripper(StringTransformer): string_parser = StringParser() next_idx = string_parser.parse(LL, string_idx) + # if the leaves in the parsed string include a PERCENT, we need to + # make sure the initial LPAR is NOT preceded by an operator with + # higher or equal precedence to PERCENT + if is_valid_index(idx - 2): + # mypy can't quite follow unless we name this + before_lpar = LL[idx - 2] + if token.PERCENT in {leaf.type for leaf in LL[idx - 1 : next_idx]} and ( + ( + before_lpar.type + in { + token.STAR, + token.AT, + token.SLASH, + token.DOUBLESLASH, + token.PERCENT, + token.TILDE, + token.DOUBLESTAR, + token.AWAIT, + token.LSQB, + token.LPAR, + } + ) + or ( + # only unary PLUS/MINUS + before_lpar.parent + and before_lpar.parent.type == syms.factor + and (before_lpar.type in {token.PLUS, token.MINUS}) + ) + ): + continue + # Should be followed by a non-empty RPAR... if ( is_valid_index(next_idx) and LL[next_idx].type == token.RPAR and not is_empty_rpar(LL[next_idx]) ): - # That RPAR should NOT be followed by a '.' symbol. - if is_valid_index(next_idx + 1) and LL[next_idx + 1].type == token.DOT: + # That RPAR should NOT be followed by anything with higher + # precedence than PERCENT + if is_valid_index(next_idx + 1) and LL[next_idx + 1].type in { + token.DOUBLESTAR, + token.LSQB, + token.LPAR, + token.DOT, + }: continue return Ok(string_idx) @@ -4764,10 +4740,8 @@ def right_hand_split( tail = bracket_split_build_line(tail_leaves, line, opening_bracket) bracket_split_succeeded_or_raise(head, body, tail) if ( - # the body shouldn't be exploded - not body.should_explode # the opening bracket is an optional paren - and opening_bracket.type == token.LPAR + opening_bracket.type == token.LPAR and not opening_bracket.value # the closing bracket is an optional paren and closing_bracket.type == token.RPAR @@ -4864,7 +4838,9 @@ def bracket_split_build_line( continue if leaves[i].type != token.COMMA: - leaves.insert(i + 1, Leaf(token.COMMA, ",")) + new_comma = Leaf(token.COMMA, ",") + new_comma.was_checked = True + leaves.insert(i + 1, new_comma) break # Populate the line @@ -4872,8 +4848,8 @@ def bracket_split_build_line( result.append(leaf, preformatted=True) for comment_after in original.comments_after(leaf): result.append(comment_after, preformatted=True) - if is_body: - result.should_explode = should_explode(result, opening_bracket) + if is_body and should_split_body_explode(result, opening_bracket): + result.should_explode = True return result @@ -4958,7 +4934,9 @@ def delimiter_split(line: Line, features: Collection[Feature] = ()) -> Iterator[ and current_line.leaves[-1].type != token.COMMA and current_line.leaves[-1].type != STANDALONE_COMMENT ): - current_line.append(Leaf(token.COMMA, ",")) + new_comma = Leaf(token.COMMA, ",") + new_comma.was_checked = True + current_line.append(new_comma) yield current_line @@ -5580,24 +5558,60 @@ def ensure_visible(leaf: Leaf) -> None: leaf.value = ")" -def should_explode(line: Line, opening_bracket: Leaf) -> bool: +def should_split_body_explode(line: Line, opening_bracket: Leaf) -> bool: """Should `line` immediately be split with `delimiter_split()` after RHS?""" - if not ( - opening_bracket.parent - and opening_bracket.parent.type in {syms.atom, syms.import_from} - and opening_bracket.value in "[{(" - ): + if not (opening_bracket.parent and opening_bracket.value in "[{("): return False + # We're essentially checking if the body is delimited by commas and there's more + # than one of them (we're excluding the trailing comma and if the delimiter priority + # is still commas, that means there's more). + exclude = set() + pre_existing_trailing_comma = False try: last_leaf = line.leaves[-1] - exclude = {id(last_leaf)} if last_leaf.type == token.COMMA else set() + if last_leaf.type == token.COMMA: + pre_existing_trailing_comma = not last_leaf.was_checked + exclude.add(id(last_leaf)) max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude) except (IndexError, ValueError): return False - return max_priority == COMMA_PRIORITY + return max_priority == COMMA_PRIORITY and ( + # always explode imports + opening_bracket.parent.type in {syms.atom, syms.import_from} + or pre_existing_trailing_comma + ) + + +def is_one_tuple_between(opening: Leaf, closing: Leaf, leaves: List[Leaf]) -> bool: + """Return True if content between `opening` and `closing` looks like a one-tuple.""" + depth = closing.bracket_depth + 1 + for _opening_index, leaf in enumerate(leaves): + if leaf is opening: + break + + else: + raise LookupError("Opening paren not found in `leaves`") + + commas = 0 + _opening_index += 1 + for leaf in leaves[_opening_index:]: + if leaf is closing: + break + + bracket_depth = leaf.bracket_depth + if bracket_depth == depth and leaf.type == token.COMMA: + commas += 1 + if leaf.parent and leaf.parent.type in { + syms.arglist, + syms.typedargslist, + }: + commas += 1 + break + + return commas < 2 def get_features_used(node: Node) -> Set[Feature]: @@ -6063,7 +6077,7 @@ def _stringify_ast( and field == "value" and isinstance(value, str) ): - normalized = re.sub(r" *\n[ \t]+", "\n ", value).strip() + normalized = re.sub(r" *\n[ \t]*", "\n", value).strip() else: normalized = value yield f"{' ' * (depth+2)}{normalized!r}, # {value.__class__.__name__}"