X-Git-Url: https://git.madduck.net/etc/vim.git/blobdiff_plain/e2adcd7de10eb570987bb894d95f2ff8c8693b9f..34a93a8d49951828dacd4237feda5db1f6ec854b:/src/black/trans.py diff --git a/src/black/trans.py b/src/black/trans.py index 74b932b..a6a416e 100644 --- a/src/black/trans.py +++ b/src/black/trans.py @@ -30,10 +30,9 @@ else: from mypy_extensions import trait -from black.brackets import BracketMatchError from black.comments import contains_pragma_comment from black.lines import Line, append_leaves -from black.mode import Feature +from black.mode import Feature, Mode from black.nodes import ( CLOSING_BRACKETS, OPENING_BRACKETS, @@ -41,6 +40,7 @@ from black.nodes import ( is_empty_lpar, is_empty_par, is_empty_rpar, + is_part_of_annotation, parent_type, replace_child, syms, @@ -63,13 +63,13 @@ class CannotTransform(Exception): # types T = TypeVar("T") LN = Union[Leaf, Node] -Transformer = Callable[[Line, Collection[Feature]], Iterator[Line]] +Transformer = Callable[[Line, Collection[Feature], Mode], Iterator[Line]] Index = int NodeType = int ParserState = int StringID = int TResult = Result[T, CannotTransform] # (T)ransform Result -TMatchResult = TResult[Index] +TMatchResult = TResult[List[Index]] def TErr(err_msg: str) -> Err[CannotTransform]: @@ -81,7 +81,9 @@ def TErr(err_msg: str) -> Err[CannotTransform]: return Err(cant_transform) -def hug_power_op(line: Line, features: Collection[Feature]) -> Iterator[Line]: +def hug_power_op( + line: Line, features: Collection[Feature], mode: Mode +) -> Iterator[Line]: """A transformer which normalizes spacing around power operators.""" # Performance optimization to avoid unnecessary Leaf clones and other ops. @@ -198,14 +200,19 @@ class StringTransformer(ABC): def do_match(self, line: Line) -> TMatchResult: """ Returns: - * Ok(string_idx) such that `line.leaves[string_idx]` is our target - string, if a match was able to be made. + * Ok(string_indices) such that for each index, `line.leaves[index]` + is our target string if a match was able to be made. For + transformers that don't result in more lines (e.g. StringMerger, + StringParenStripper), multiple matches and transforms are done at + once to reduce the complexity. OR - * Err(CannotTransform), if a match was not able to be made. + * Err(CannotTransform), if no match could be made. """ @abstractmethod - def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]: + def do_transform( + self, line: Line, string_indices: List[int] + ) -> Iterator[TResult[Line]]: """ Yields: * Ok(new_line) where new_line is the new transformed line. @@ -223,7 +230,9 @@ class StringTransformer(ABC): yield an CannotTransform after that point.) """ - def __call__(self, line: Line, _features: Collection[Feature]) -> Iterator[Line]: + def __call__( + self, line: Line, _features: Collection[Feature], _mode: Mode + ) -> Iterator[Line]: """ StringTransformer instances have a call signature that mirrors that of the Transformer type. @@ -246,9 +255,9 @@ class StringTransformer(ABC): " this line as one that it can transform." ) from cant_transform - string_idx = match_result.ok() + string_indices = match_result.ok() - for line_result in self.do_transform(line, string_idx): + for line_result in self.do_transform(line, string_indices): if isinstance(line_result, Err): cant_transform = line_result.err() raise CannotTransform( @@ -351,7 +360,7 @@ class StringMerger(StringTransformer, CustomSplitMapMixin): Requirements: (A) The line contains adjacent strings such that ALL of the validation checks - listed in StringMerger.__validate_msg(...)'s docstring pass. + listed in StringMerger._validate_msg(...)'s docstring pass. OR (B) The line contains a string which uses line continuation backslashes. @@ -371,28 +380,50 @@ class StringMerger(StringTransformer, CustomSplitMapMixin): is_valid_index = is_valid_index_factory(LL) - for i, leaf in enumerate(LL): + string_indices = [] + idx = 0 + while is_valid_index(idx): + leaf = LL[idx] if ( leaf.type == token.STRING - and is_valid_index(i + 1) - and LL[i + 1].type == token.STRING + and is_valid_index(idx + 1) + and LL[idx + 1].type == token.STRING ): - return Ok(i) + if not is_part_of_annotation(leaf): + string_indices.append(idx) + + # Advance to the next non-STRING leaf. + idx += 2 + while is_valid_index(idx) and LL[idx].type == token.STRING: + idx += 1 - if leaf.type == token.STRING and "\\\n" in leaf.value: - return Ok(i) + elif leaf.type == token.STRING and "\\\n" in leaf.value: + string_indices.append(idx) + # Advance to the next non-STRING leaf. + idx += 1 + while is_valid_index(idx) and LL[idx].type == token.STRING: + idx += 1 - return TErr("This line has no strings that need merging.") + else: + idx += 1 + + if string_indices: + return Ok(string_indices) + else: + return TErr("This line has no strings that need merging.") - def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]: + def do_transform( + self, line: Line, string_indices: List[int] + ) -> Iterator[TResult[Line]]: new_line = line + rblc_result = self._remove_backslash_line_continuation_chars( - new_line, string_idx + new_line, string_indices ) if isinstance(rblc_result, Ok): new_line = rblc_result.ok() - msg_result = self._merge_string_group(new_line, string_idx) + msg_result = self._merge_string_group(new_line, string_indices) if isinstance(msg_result, Ok): new_line = msg_result.ok() @@ -413,7 +444,7 @@ class StringMerger(StringTransformer, CustomSplitMapMixin): @staticmethod def _remove_backslash_line_continuation_chars( - line: Line, string_idx: int + line: Line, string_indices: List[int] ) -> TResult[Line]: """ Merge strings that were split across multiple lines using @@ -427,34 +458,44 @@ class StringMerger(StringTransformer, CustomSplitMapMixin): """ LL = line.leaves - string_leaf = LL[string_idx] - if not ( - string_leaf.type == token.STRING - and "\\\n" in string_leaf.value - and not has_triple_quotes(string_leaf.value) - ): + indices_to_transform = [] + for string_idx in string_indices: + string_leaf = LL[string_idx] + if ( + string_leaf.type == token.STRING + and "\\\n" in string_leaf.value + and not has_triple_quotes(string_leaf.value) + ): + indices_to_transform.append(string_idx) + + if not indices_to_transform: return TErr( - f"String leaf {string_leaf} does not contain any backslash line" - " continuation characters." + "Found no string leaves that contain backslash line continuation" + " characters." ) new_line = line.clone() new_line.comments = line.comments.copy() append_leaves(new_line, line, LL) - new_string_leaf = new_line.leaves[string_idx] - new_string_leaf.value = new_string_leaf.value.replace("\\\n", "") + for string_idx in indices_to_transform: + new_string_leaf = new_line.leaves[string_idx] + new_string_leaf.value = new_string_leaf.value.replace("\\\n", "") return Ok(new_line) - def _merge_string_group(self, line: Line, string_idx: int) -> TResult[Line]: + def _merge_string_group( + self, line: Line, string_indices: List[int] + ) -> TResult[Line]: """ - Merges string group (i.e. set of adjacent strings) where the first - string in the group is `line.leaves[string_idx]`. + Merges string groups (i.e. set of adjacent strings). + + Each index from `string_indices` designates one string group's first + leaf in `line.leaves`. Returns: Ok(new_line), if ALL of the validation checks found in - __validate_msg(...) pass. + _validate_msg(...) pass. OR Err(CannotTransform), otherwise. """ @@ -462,10 +503,54 @@ class StringMerger(StringTransformer, CustomSplitMapMixin): is_valid_index = is_valid_index_factory(LL) - vresult = self._validate_msg(line, string_idx) - if isinstance(vresult, Err): - return vresult + # A dict of {string_idx: tuple[num_of_strings, string_leaf]}. + merged_string_idx_dict: Dict[int, Tuple[int, Leaf]] = {} + for string_idx in string_indices: + vresult = self._validate_msg(line, string_idx) + if isinstance(vresult, Err): + continue + merged_string_idx_dict[string_idx] = self._merge_one_string_group( + LL, string_idx, is_valid_index + ) + + if not merged_string_idx_dict: + return TErr("No string group is merged") + # Build the final line ('new_line') that this method will later return. + new_line = line.clone() + previous_merged_string_idx = -1 + previous_merged_num_of_strings = -1 + for i, leaf in enumerate(LL): + if i in merged_string_idx_dict: + previous_merged_string_idx = i + previous_merged_num_of_strings, string_leaf = merged_string_idx_dict[i] + new_line.append(string_leaf) + + if ( + previous_merged_string_idx + <= i + < previous_merged_string_idx + previous_merged_num_of_strings + ): + for comment_leaf in line.comments_after(LL[i]): + new_line.append(comment_leaf, preformatted=True) + continue + + append_leaves(new_line, line, [leaf]) + + return Ok(new_line) + + def _merge_one_string_group( + self, LL: List[Leaf], string_idx: int, is_valid_index: Callable[[int], bool] + ) -> Tuple[int, Leaf]: + """ + Merges one string group where the first string in the group is + `LL[string_idx]`. + + Returns: + A tuple of `(num_of_strings, leaf)` where `num_of_strings` is the + number of strings merged and `leaf` is the newly merged string + to be replaced in the new line. + """ # If the string group is wrapped inside an Atom node, we must make sure # to later replace that Atom with our new (merged) string leaf. atom_node = LL[string_idx].parent @@ -491,6 +576,12 @@ class StringMerger(StringTransformer, CustomSplitMapMixin): characters have been escaped. """ assert_is_leaf_string(string) + if "f" in string_prefix: + string = _toggle_fexpr_quotes(string, QUOTE) + # After quotes toggling, quotes in expressions won't be escaped + # because quotes can't be reused in f-strings. So we can simply + # let the escaping logic below run without knowing f-string + # expressions. RE_EVEN_BACKSLASHES = r"(?:(? TResult[None]: """Validate (M)erge (S)tring (G)roup - Transform-time string validation logic for __merge_string_group(...). + Transform-time string validation logic for _merge_string_group(...). Returns: * Ok(None), if ALL validation checks (listed below) pass. @@ -622,6 +700,11 @@ class StringMerger(StringTransformer, CustomSplitMapMixin): - The set of all string prefixes in the string group is of length greater than one and is not equal to {"", "f"}. - The string group consists of raw strings. + - The string group is stringified type annotations. We don't want to + process stringified type annotations since pyright doesn't support + them spanning multiple string values. (NOTE: mypy, pytype, pyre do + support them, so we can change if pyright also gains support in the + future. See https://github.com/microsoft/pyright/issues/4359.) """ # We first check for "inner" stand-alone comments (i.e. stand-alone # comments that have a string leaf before them AND after them). @@ -711,7 +794,15 @@ class StringParenStripper(StringTransformer): is_valid_index = is_valid_index_factory(LL) - for idx, leaf in enumerate(LL): + string_indices = [] + + idx = -1 + while True: + idx += 1 + if idx >= len(LL): + break + leaf = LL[idx] + # Should be a string... if leaf.type != token.STRING: continue @@ -793,45 +884,73 @@ class StringParenStripper(StringTransformer): }: continue - return Ok(string_idx) + string_indices.append(string_idx) + idx = string_idx + while idx < len(LL) - 1 and LL[idx + 1].type == token.STRING: + idx += 1 + if string_indices: + return Ok(string_indices) return TErr("This line has no strings wrapped in parens.") - def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]: + def do_transform( + self, line: Line, string_indices: List[int] + ) -> Iterator[TResult[Line]]: LL = line.leaves - string_parser = StringParser() - rpar_idx = string_parser.parse(LL, string_idx) + string_and_rpar_indices: List[int] = [] + for string_idx in string_indices: + string_parser = StringParser() + rpar_idx = string_parser.parse(LL, string_idx) + + should_transform = True + for leaf in (LL[string_idx - 1], LL[rpar_idx]): + if line.comments_after(leaf): + # Should not strip parentheses which have comments attached + # to them. + should_transform = False + break + if should_transform: + string_and_rpar_indices.extend((string_idx, rpar_idx)) - for leaf in (LL[string_idx - 1], LL[rpar_idx]): - if line.comments_after(leaf): - yield TErr( - "Will not strip parentheses which have comments attached to them." - ) - return + if string_and_rpar_indices: + yield Ok(self._transform_to_new_line(line, string_and_rpar_indices)) + else: + yield Err( + CannotTransform("All string groups have comments attached to them.") + ) + + def _transform_to_new_line( + self, line: Line, string_and_rpar_indices: List[int] + ) -> Line: + LL = line.leaves new_line = line.clone() new_line.comments = line.comments.copy() - try: - append_leaves(new_line, line, LL[: string_idx - 1]) - except BracketMatchError: - # HACK: I believe there is currently a bug somewhere in - # right_hand_split() that is causing brackets to not be tracked - # properly by a shared BracketTracker. - append_leaves(new_line, line, LL[: string_idx - 1], preformatted=True) - - string_leaf = Leaf(token.STRING, LL[string_idx].value) - LL[string_idx - 1].remove() - replace_child(LL[string_idx], string_leaf) - new_line.append(string_leaf) - - append_leaves( - new_line, line, LL[string_idx + 1 : rpar_idx] + LL[rpar_idx + 1 :] - ) - LL[rpar_idx].remove() + previous_idx = -1 + # We need to sort the indices, since string_idx and its matching + # rpar_idx may not come in order, e.g. in + # `("outer" % ("inner".join(items)))`, the "inner" string's + # string_idx is smaller than "outer" string's rpar_idx. + for idx in sorted(string_and_rpar_indices): + leaf = LL[idx] + lpar_or_rpar_idx = idx - 1 if leaf.type == token.STRING else idx + append_leaves(new_line, line, LL[previous_idx + 1 : lpar_or_rpar_idx]) + if leaf.type == token.STRING: + string_leaf = Leaf(token.STRING, LL[idx].value) + LL[lpar_or_rpar_idx].remove() # Remove lpar. + replace_child(LL[idx], string_leaf) + new_line.append(string_leaf) + else: + LL[lpar_or_rpar_idx].remove() # This is a rpar. + + previous_idx = idx + + # Append the leaves after the last idx: + append_leaves(new_line, line, LL[idx + 1 :]) - yield Ok(new_line) + return new_line class BaseStringSplitter(StringTransformer): @@ -884,7 +1003,12 @@ class BaseStringSplitter(StringTransformer): if isinstance(match_result, Err): return match_result - string_idx = match_result.ok() + string_indices = match_result.ok() + assert len(string_indices) == 1, ( + f"{self.__class__.__name__} should only find one match at a time, found" + f" {len(string_indices)}" + ) + string_idx = string_indices[0] vresult = self._validate(line, string_idx) if isinstance(vresult, Err): return vresult @@ -1058,33 +1182,19 @@ class BaseStringSplitter(StringTransformer): if LL[0].type != token.STRING: return None - matching_nodes = [ - syms.listmaker, - syms.dictsetmaker, - syms.testlist_gexp, - ] - # If the string is an immediate child of a list/set/tuple literal... - if ( - parent_type(LL[0]) in matching_nodes - or parent_type(LL[0].parent) in matching_nodes + # If the string is surrounded by commas (or is the first/last child)... + prev_sibling = LL[0].prev_sibling + next_sibling = LL[0].next_sibling + if not prev_sibling and not next_sibling and parent_type(LL[0]) == syms.atom: + # If it's an atom string, we need to check the parent atom's siblings. + parent = LL[0].parent + assert parent is not None # For type checkers. + prev_sibling = parent.prev_sibling + next_sibling = parent.next_sibling + if (not prev_sibling or prev_sibling.type == token.COMMA) and ( + not next_sibling or next_sibling.type == token.COMMA ): - # And the string is surrounded by commas (or is the first/last child)... - prev_sibling = LL[0].prev_sibling - next_sibling = LL[0].next_sibling - if ( - not prev_sibling - and not next_sibling - and parent_type(LL[0]) == syms.atom - ): - # If it's an atom string, we need to check the parent atom's siblings. - parent = LL[0].parent - assert parent is not None # For type checkers. - prev_sibling = parent.prev_sibling - next_sibling = parent.next_sibling - if (not prev_sibling or prev_sibling.type == token.COMMA) and ( - not next_sibling or next_sibling.type == token.COMMA - ): - return 0 + return 0 return None @@ -1140,6 +1250,30 @@ def fstring_contains_expr(s: str) -> bool: return any(iter_fexpr_spans(s)) +def _toggle_fexpr_quotes(fstring: str, old_quote: str) -> str: + """ + Toggles quotes used in f-string expressions that are `old_quote`. + + f-string expressions can't contain backslashes, so we need to toggle the + quotes if the f-string itself will end up using the same quote. We can + simply toggle without escaping because, quotes can't be reused in f-string + expressions. They will fail to parse. + + NOTE: If PEP 701 is accepted, above statement will no longer be true. + Though if quotes can be reused, we can simply reuse them without updates or + escaping, once Black figures out how to parse the new grammar. + """ + new_quote = "'" if old_quote == '"' else '"' + parts = [] + previous_index = 0 + for start, end in iter_fexpr_spans(fstring): + parts.append(fstring[previous_index:start]) + parts.append(fstring[start:end].replace(old_quote, new_quote)) + previous_index = end + parts.append(fstring[previous_index:]) + return "".join(parts) + + class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): """ StringTransformer that splits "atom" strings (i.e. strings which exist on @@ -1232,10 +1366,17 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): if is_valid_index(idx): return TErr("This line does not end with a string.") - return Ok(string_idx) + return Ok([string_idx]) - def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]: + def do_transform( + self, line: Line, string_indices: List[int] + ) -> Iterator[TResult[Line]]: LL = line.leaves + assert len(string_indices) == 1, ( + f"{self.__class__.__name__} should only find one match at a time, found" + f" {len(string_indices)}" + ) + string_idx = string_indices[0] QUOTE = LL[string_idx].value[-1] @@ -1256,7 +1397,7 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): string_op_leaves = self._get_string_operator_leaves(LL) string_op_leaves_length = ( - sum([len(str(prefix_leaf)) for prefix_leaf in string_op_leaves]) + 1 + sum(len(str(prefix_leaf)) for prefix_leaf in string_op_leaves) + 1 if string_op_leaves else 0 ) @@ -1373,9 +1514,14 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): # prefix, and the current custom split did NOT originally use a # prefix... if ( - next_value != self._normalize_f_string(next_value, prefix) - and use_custom_breakpoints + use_custom_breakpoints and not csplit.has_prefix + and ( + # `next_value == prefix + QUOTE` happens when the custom + # split is an empty string. + next_value == prefix + QUOTE + or next_value != self._normalize_f_string(next_value, prefix) + ) ): # Then `csplit.break_idx` will be off by one after removing # the 'f' prefix. @@ -1652,10 +1798,11 @@ class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin): * The line is a dictionary key assignment where some valid key is being assigned the value of some string. OR + * The line is an lambda expression and the value is a string. + OR * The line starts with an "atom" string that prefers to be wrapped in - parens. It's preferred to be wrapped when it's is an immediate child of - a list/set/tuple literal, AND the string is surrounded by commas (or is - the first/last child). + parens. It's preferred to be wrapped when the string is surrounded by + commas (or is the first/last child). Transformations: The chosen string is wrapped in parentheses and then split at the LPAR. @@ -1698,7 +1845,7 @@ class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin): or self._else_match(LL) or self._assert_match(LL) or self._assign_match(LL) - or self._dict_match(LL) + or self._dict_or_lambda_match(LL) or self._prefer_paren_wrap_match(LL) ) @@ -1717,7 +1864,7 @@ class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin): " resultant line would still be over the specified line" " length and can't be split further by StringSplitter." ) - return Ok(string_idx) + return Ok([string_idx]) return TErr("This line does not contain any non-atomic strings.") @@ -1856,23 +2003,24 @@ class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin): return None @staticmethod - def _dict_match(LL: List[Leaf]) -> Optional[int]: + def _dict_or_lambda_match(LL: List[Leaf]) -> Optional[int]: """ Returns: string_idx such that @LL[string_idx] is equal to our target (i.e. matched) string, if this line matches the dictionary key assignment - statement requirements listed in the 'Requirements' section of this - classes' docstring. + statement or lambda expression requirements listed in the + 'Requirements' section of this classes' docstring. OR None, otherwise. """ - # If this line is apart of a dictionary key assignment... - if syms.dictsetmaker in [parent_type(LL[0]), parent_type(LL[0].parent)]: + # If this line is a part of a dictionary key assignment or lambda expression... + parent_types = [parent_type(LL[0]), parent_type(LL[0].parent)] + if syms.dictsetmaker in parent_types or syms.lambdef in parent_types: is_valid_index = is_valid_index_factory(LL) for i, leaf in enumerate(LL): - # We MUST find a colon... - if leaf.type == token.COLON: + # We MUST find a colon, it can either be dict's or lambda's colon... + if leaf.type == token.COLON and i < len(LL) - 1: idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1 # That colon MUST be followed by a string... @@ -1893,8 +2041,15 @@ class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin): return None - def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]: + def do_transform( + self, line: Line, string_indices: List[int] + ) -> Iterator[TResult[Line]]: LL = line.leaves + assert len(string_indices) == 1, ( + f"{self.__class__.__name__} should only find one match at a time, found" + f" {len(string_indices)}" + ) + string_idx = string_indices[0] is_valid_index = is_valid_index_factory(LL) insert_str_child = insert_str_child_factory(LL[string_idx]) @@ -1966,6 +2121,25 @@ class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin): f" (left_leaves={left_leaves}, right_leaves={right_leaves})" ) old_rpar_leaf = right_leaves.pop() + elif right_leaves and right_leaves[-1].type == token.RPAR: + # Special case for lambda expressions as dict's value, e.g.: + # my_dict = { + # "key": lambda x: f"formatted: {x}, + # } + # After wrapping the dict's value with parentheses, the string is + # followed by a RPAR but its opening bracket is lambda's, not + # the string's: + # "key": (lambda x: f"formatted: {x}), + opening_bracket = right_leaves[-1].opening_bracket + if opening_bracket is not None and opening_bracket in left_leaves: + index = left_leaves.index(opening_bracket) + if ( + index > 0 + and index < len(left_leaves) - 1 + and left_leaves[index - 1].type == token.COLON + and left_leaves[index + 1].value == "lambda" + ): + right_leaves.pop() append_leaves(string_line, line, right_leaves)