X-Git-Url: https://git.madduck.net/etc/vim.git/blobdiff_plain/a36878eb2f375e2ac1e13052f663909f3835ec46..7aa37ea0adf864baf3ef3dfbcfaf5ff1ff780250:/src/black/trans.py diff --git a/src/black/trans.py b/src/black/trans.py index 2360c13..c0cc926 100644 --- a/src/black/trans.py +++ b/src/black/trans.py @@ -2,7 +2,6 @@ String transformers that can split and merge strings. """ import re -import sys from abc import ABC, abstractmethod from collections import defaultdict from dataclasses import dataclass @@ -12,9 +11,11 @@ from typing import ( ClassVar, Collection, Dict, + Final, Iterable, Iterator, List, + Literal, Optional, Sequence, Set, @@ -23,16 +24,11 @@ from typing import ( Union, ) -if sys.version_info < (3, 8): - from typing_extensions import Final, Literal -else: - from typing import Literal, Final - from mypy_extensions import trait from black.comments import contains_pragma_comment from black.lines import Line, append_leaves -from black.mode import Feature +from black.mode import Feature, Mode from black.nodes import ( CLOSING_BRACKETS, OPENING_BRACKETS, @@ -48,9 +44,11 @@ from black.nodes import ( from black.rusty import Err, Ok, Result from black.strings import ( assert_is_leaf_string, + count_chars_in_width, get_string_prefix, has_triple_quotes, normalize_string_quotes, + str_width, ) from blib2to3.pgen2 import token from blib2to3.pytree import Leaf, Node @@ -63,7 +61,7 @@ class CannotTransform(Exception): # types T = TypeVar("T") LN = Union[Leaf, Node] -Transformer = Callable[[Line, Collection[Feature]], Iterator[Line]] +Transformer = Callable[[Line, Collection[Feature], Mode], Iterator[Line]] Index = int NodeType = int ParserState = int @@ -71,6 +69,8 @@ StringID = int TResult = Result[T, CannotTransform] # (T)ransform Result TMatchResult = TResult[List[Index]] +SPLIT_SAFE_CHARS = frozenset(["\u3001", "\u3002", "\uff0c"]) # East Asian stops + def TErr(err_msg: str) -> Err[CannotTransform]: """(T)ransform Err @@ -81,7 +81,9 @@ def TErr(err_msg: str) -> Err[CannotTransform]: return Err(cant_transform) -def hug_power_op(line: Line, features: Collection[Feature]) -> Iterator[Line]: +def hug_power_op( + line: Line, features: Collection[Feature], mode: Mode +) -> Iterator[Line]: """A transformer which normalizes spacing around power operators.""" # Performance optimization to avoid unnecessary Leaf clones and other ops. @@ -199,11 +201,11 @@ class StringTransformer(ABC): """ Returns: * Ok(string_indices) such that for each index, `line.leaves[index]` - is our target string if a match was able to be made. For - transformers that don't result in more lines (e.g. StringMerger, - StringParenStripper), multiple matches and transforms are done at - once to reduce the complexity. - OR + is our target string if a match was able to be made. For + transformers that don't result in more lines (e.g. StringMerger, + StringParenStripper), multiple matches and transforms are done at + once to reduce the complexity. + OR * Err(CannotTransform), if no match could be made. """ @@ -214,12 +216,12 @@ class StringTransformer(ABC): """ Yields: * Ok(new_line) where new_line is the new transformed line. - OR + OR * Err(CannotTransform) if the transformation failed for some reason. The - `do_match(...)` template method should usually be used to reject - the form of the given Line, but in some cases it is difficult to - know whether or not a Line meets the StringTransformer's - requirements until the transformation is already midway. + `do_match(...)` template method should usually be used to reject + the form of the given Line, but in some cases it is difficult to + know whether or not a Line meets the StringTransformer's + requirements until the transformation is already midway. Side Effects: This method should NOT mutate @line directly, but it MAY mutate the @@ -228,7 +230,9 @@ class StringTransformer(ABC): yield an CannotTransform after that point.) """ - def __call__(self, line: Line, _features: Collection[Feature]) -> Iterator[Line]: + def __call__( + self, line: Line, _features: Collection[Feature], _mode: Mode + ) -> Iterator[Line]: """ StringTransformer instances have a call signature that mirrors that of the Transformer type. @@ -327,8 +331,8 @@ class CustomSplitMapMixin: Returns: * A list of the custom splits that are mapped to @string, if any - exist. - OR + exist. + OR * [], otherwise. Side Effects: @@ -357,14 +361,14 @@ class StringMerger(StringTransformer, CustomSplitMapMixin): Requirements: (A) The line contains adjacent strings such that ALL of the validation checks listed in StringMerger._validate_msg(...)'s docstring pass. - OR + OR (B) The line contains a string which uses line continuation backslashes. Transformations: Depending on which of the two requirements above where met, either: (A) The string group associated with the target string is merged. - OR + OR (B) All line-continuation backslashes are removed from the target string. Collaborations: @@ -938,6 +942,9 @@ class StringParenStripper(StringTransformer): LL[lpar_or_rpar_idx].remove() # Remove lpar. replace_child(LL[idx], string_leaf) new_line.append(string_leaf) + # replace comments + old_comments = new_line.comments.pop(id(LL[idx]), []) + new_line.comments.setdefault(id(string_leaf), []).extend(old_comments) else: LL[lpar_or_rpar_idx].remove() # This is a rpar. @@ -957,17 +964,20 @@ class BaseStringSplitter(StringTransformer): Requirements: * The target string value is responsible for the line going over the - line length limit. It follows that after all of black's other line - split methods have been exhausted, this line (or one of the resulting - lines after all line splits are performed) would still be over the - line_length limit unless we split this string. - AND + line length limit. It follows that after all of black's other line + split methods have been exhausted, this line (or one of the resulting + lines after all line splits are performed) would still be over the + line_length limit unless we split this string. + AND + * The target string is NOT a "pointless" string (i.e. a string that has - no parent or siblings). - AND + no parent or siblings). + AND + * The target string is not followed by an inline comment that appears - to be a pragma. - AND + to be a pragma. + AND + * The target string is not a multiline (i.e. triple-quote) string. """ @@ -1019,7 +1029,7 @@ class BaseStringSplitter(StringTransformer): Returns: * Ok(None), if ALL of the requirements are met. - OR + OR * Err(CannotTransform), if ANY of the requirements are NOT met. """ LL = line.leaves @@ -1160,7 +1170,7 @@ class BaseStringSplitter(StringTransformer): # WMA4 the length of the inline comment. offset += len(comment_leaf.value) - max_string_length = self.line_length - offset + max_string_length = count_chars_in_width(str(line), self.line_length - offset) return max_string_length @staticmethod @@ -1178,19 +1188,33 @@ class BaseStringSplitter(StringTransformer): if LL[0].type != token.STRING: return None - # If the string is surrounded by commas (or is the first/last child)... - prev_sibling = LL[0].prev_sibling - next_sibling = LL[0].next_sibling - if not prev_sibling and not next_sibling and parent_type(LL[0]) == syms.atom: - # If it's an atom string, we need to check the parent atom's siblings. - parent = LL[0].parent - assert parent is not None # For type checkers. - prev_sibling = parent.prev_sibling - next_sibling = parent.next_sibling - if (not prev_sibling or prev_sibling.type == token.COMMA) and ( - not next_sibling or next_sibling.type == token.COMMA + matching_nodes = [ + syms.listmaker, + syms.dictsetmaker, + syms.testlist_gexp, + ] + # If the string is an immediate child of a list/set/tuple literal... + if ( + parent_type(LL[0]) in matching_nodes + or parent_type(LL[0].parent) in matching_nodes ): - return 0 + # And the string is surrounded by commas (or is the first/last child)... + prev_sibling = LL[0].prev_sibling + next_sibling = LL[0].next_sibling + if ( + not prev_sibling + and not next_sibling + and parent_type(LL[0]) == syms.atom + ): + # If it's an atom string, we need to check the parent atom's siblings. + parent = LL[0].parent + assert parent is not None # For type checkers. + prev_sibling = parent.prev_sibling + next_sibling = parent.next_sibling + if (not prev_sibling or prev_sibling.type == token.COMMA) and ( + not next_sibling or next_sibling.type == token.COMMA + ): + return 0 return None @@ -1277,9 +1301,9 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): Requirements: * The line consists ONLY of a single string (possibly prefixed by a - string operator [e.g. '+' or '==']), MAYBE a string trailer, and MAYBE - a trailing comma. - AND + string operator [e.g. '+' or '==']), MAYBE a string trailer, and MAYBE + a trailing comma. + AND * All of the requirements listed in BaseStringSplitter's docstring. Transformations: @@ -1415,11 +1439,13 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA ) - def max_last_string() -> int: + def max_last_string_column() -> int: """ Returns: - The max allowed length of the string value used for the last - line we will construct. + The max allowed width of the string value used for the last + line we will construct. Note that this value means the width + rather than the number of characters (e.g., many East Asian + characters expand to two columns). """ result = self.line_length result -= line.depth * 4 @@ -1427,14 +1453,14 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): result -= string_op_leaves_length return result - # --- Calculate Max Break Index (for string value) + # --- Calculate Max Break Width (for string value) # We start with the line length limit - max_break_idx = self.line_length + max_break_width = self.line_length # The last index of a string of length N is N-1. - max_break_idx -= 1 + max_break_width -= 1 # Leading whitespace is not present in the string value (e.g. Leaf.value). - max_break_idx -= line.depth * 4 - if max_break_idx < 0: + max_break_width -= line.depth * 4 + if max_break_width < 0: yield TErr( f"Unable to split {LL[string_idx].value} at such high of a line depth:" f" {line.depth}" @@ -1447,7 +1473,7 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): # line limit. use_custom_breakpoints = bool( custom_splits - and all(csplit.break_idx <= max_break_idx for csplit in custom_splits) + and all(csplit.break_idx <= max_break_width for csplit in custom_splits) ) # Temporary storage for the remaining chunk of the string line that @@ -1463,7 +1489,7 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): if use_custom_breakpoints: return len(custom_splits) > 1 else: - return len(rest_value) > max_last_string() + return str_width(rest_value) > max_last_string_column() string_line_results: List[Ok[Line]] = [] while more_splits_should_be_made(): @@ -1473,7 +1499,10 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): break_idx = csplit.break_idx else: # Algorithmic Split (automatic) - max_bidx = max_break_idx - string_op_leaves_length + max_bidx = ( + count_chars_in_width(rest_value, max_break_width) + - string_op_leaves_length + ) maybe_break_idx = self._get_break_idx(rest_value, max_bidx) if maybe_break_idx is None: # If we are unable to algorithmically determine a good split @@ -1570,7 +1599,7 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): # Try to fit them all on the same line with the last substring... if ( - len(temp_value) <= max_last_string() + str_width(temp_value) <= max_last_string_column() or LL[string_idx + 1].type == token.COMMA ): last_line.append(rest_leaf) @@ -1690,6 +1719,7 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): section of this classes' docstring would be be met by returning @i. """ is_space = string[i] == " " + is_split_safe = is_valid_index(i - 1) and string[i - 1] in SPLIT_SAFE_CHARS is_not_escaped = True j = i - 1 @@ -1702,7 +1732,7 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): and len(string[:i]) >= self.MIN_SUBSTR_SIZE ) return ( - is_space + (is_space or is_split_safe) and is_not_escaped and is_big_enough and not breaks_unsplittable_expression(i) @@ -1780,25 +1810,26 @@ class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin): addition to the requirements listed below: * The line is a return/yield statement, which returns/yields a string. - OR + OR * The line is part of a ternary expression (e.g. `x = y if cond else - z`) such that the line starts with `else `, where is - some string. - OR + z`) such that the line starts with `else `, where is + some string. + OR * The line is an assert statement, which ends with a string. - OR + OR * The line is an assignment statement (e.g. `x = ` or `x += - `) such that the variable is being assigned the value of some - string. - OR + `) such that the variable is being assigned the value of some + string. + OR * The line is a dictionary key assignment where some valid key is being - assigned the value of some string. - OR + assigned the value of some string. + OR * The line is an lambda expression and the value is a string. - OR + OR * The line starts with an "atom" string that prefers to be wrapped in - parens. It's preferred to be wrapped when the string is surrounded by - commas (or is the first/last child). + parens. It's preferred to be wrapped when it's is an immediate child of + a list/set/tuple literal, AND the string is surrounded by commas (or is + the first/last child). Transformations: The chosen string is wrapped in parentheses and then split at the LPAR. @@ -1847,11 +1878,13 @@ class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin): if string_idx is not None: string_value = line.leaves[string_idx].value - # If the string has no spaces... - if " " not in string_value: + # If the string has neither spaces nor East Asian stops... + if not any( + char == " " or char in SPLIT_SAFE_CHARS for char in string_value + ): # And will still violate the line length limit when split... - max_string_length = self.line_length - ((line.depth + 1) * 4) - if len(string_value) > max_string_length: + max_string_width = self.line_length - ((line.depth + 1) * 4) + if str_width(string_value) > max_string_width: # And has no associated custom splits... if not self.has_custom_splits(string_value): # Then we should NOT put this string on its own line. @@ -2242,7 +2275,7 @@ class StringParser: Returns: The index directly after the last leaf which is apart of the string trailer, if a "trailer" exists. - OR + OR @string_idx + 1, if no string "trailer" exists. """ assert leaves[string_idx].type == token.STRING @@ -2256,11 +2289,11 @@ class StringParser: """ Pre-conditions: * On the first call to this function, @leaf MUST be the leaf that - was directly after the string leaf in question (e.g. if our target - string is `line.leaves[i]` then the first call to this method must - be `line.leaves[i + 1]`). + was directly after the string leaf in question (e.g. if our target + string is `line.leaves[i]` then the first call to this method must + be `line.leaves[i + 1]`). * On the next call to this function, the leaf parameter passed in - MUST be the leaf directly following @leaf. + MUST be the leaf directly following @leaf. Returns: True iff @leaf is apart of the string's trailer.