X-Git-Url: https://git.madduck.net/etc/vim.git/blobdiff_plain/3feff21eca0eee4b9fe72d12b506ac273cb5bcd0..3decbd6db9f120e8d7c8fa86b5b2f64f7861da0c:/src/black/trans.py diff --git a/src/black/trans.py b/src/black/trans.py index ec07f5e..1e28ed0 100644 --- a/src/black/trans.py +++ b/src/black/trans.py @@ -32,7 +32,7 @@ from mypy_extensions import trait from black.comments import contains_pragma_comment from black.lines import Line, append_leaves -from black.mode import Feature +from black.mode import Feature, Mode from black.nodes import ( CLOSING_BRACKETS, OPENING_BRACKETS, @@ -48,9 +48,11 @@ from black.nodes import ( from black.rusty import Err, Ok, Result from black.strings import ( assert_is_leaf_string, + count_chars_in_width, get_string_prefix, has_triple_quotes, normalize_string_quotes, + str_width, ) from blib2to3.pgen2 import token from blib2to3.pytree import Leaf, Node @@ -63,7 +65,7 @@ class CannotTransform(Exception): # types T = TypeVar("T") LN = Union[Leaf, Node] -Transformer = Callable[[Line, Collection[Feature]], Iterator[Line]] +Transformer = Callable[[Line, Collection[Feature], Mode], Iterator[Line]] Index = int NodeType = int ParserState = int @@ -71,6 +73,8 @@ StringID = int TResult = Result[T, CannotTransform] # (T)ransform Result TMatchResult = TResult[List[Index]] +SPLIT_SAFE_CHARS = frozenset(["\u3001", "\u3002", "\uff0c"]) # East Asian stops + def TErr(err_msg: str) -> Err[CannotTransform]: """(T)ransform Err @@ -81,7 +85,9 @@ def TErr(err_msg: str) -> Err[CannotTransform]: return Err(cant_transform) -def hug_power_op(line: Line, features: Collection[Feature]) -> Iterator[Line]: +def hug_power_op( + line: Line, features: Collection[Feature], mode: Mode +) -> Iterator[Line]: """A transformer which normalizes spacing around power operators.""" # Performance optimization to avoid unnecessary Leaf clones and other ops. @@ -228,7 +234,9 @@ class StringTransformer(ABC): yield an CannotTransform after that point.) """ - def __call__(self, line: Line, _features: Collection[Feature]) -> Iterator[Line]: + def __call__( + self, line: Line, _features: Collection[Feature], _mode: Mode + ) -> Iterator[Line]: """ StringTransformer instances have a call signature that mirrors that of the Transformer type. @@ -572,6 +580,12 @@ class StringMerger(StringTransformer, CustomSplitMapMixin): characters have been escaped. """ assert_is_leaf_string(string) + if "f" in string_prefix: + string = _toggle_fexpr_quotes(string, QUOTE) + # After quotes toggling, quotes in expressions won't be escaped + # because quotes can't be reused in f-strings. So we can simply + # let the escaping logic below run without knowing f-string + # expressions. RE_EVEN_BACKSLASHES = r"(?:(? bool: return any(iter_fexpr_spans(s)) +def _toggle_fexpr_quotes(fstring: str, old_quote: str) -> str: + """ + Toggles quotes used in f-string expressions that are `old_quote`. + + f-string expressions can't contain backslashes, so we need to toggle the + quotes if the f-string itself will end up using the same quote. We can + simply toggle without escaping because, quotes can't be reused in f-string + expressions. They will fail to parse. + + NOTE: If PEP 701 is accepted, above statement will no longer be true. + Though if quotes can be reused, we can simply reuse them without updates or + escaping, once Black figures out how to parse the new grammar. + """ + new_quote = "'" if old_quote == '"' else '"' + parts = [] + previous_index = 0 + for start, end in iter_fexpr_spans(fstring): + parts.append(fstring[previous_index:start]) + parts.append(fstring[start:end].replace(old_quote, new_quote)) + previous_index = end + parts.append(fstring[previous_index:]) + return "".join(parts) + + class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): """ StringTransformer that splits "atom" strings (i.e. strings which exist on @@ -1385,11 +1437,13 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA ) - def max_last_string() -> int: + def max_last_string_column() -> int: """ Returns: - The max allowed length of the string value used for the last - line we will construct. + The max allowed width of the string value used for the last + line we will construct. Note that this value means the width + rather than the number of characters (e.g., many East Asian + characters expand to two columns). """ result = self.line_length result -= line.depth * 4 @@ -1397,14 +1451,14 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): result -= string_op_leaves_length return result - # --- Calculate Max Break Index (for string value) + # --- Calculate Max Break Width (for string value) # We start with the line length limit - max_break_idx = self.line_length + max_break_width = self.line_length # The last index of a string of length N is N-1. - max_break_idx -= 1 + max_break_width -= 1 # Leading whitespace is not present in the string value (e.g. Leaf.value). - max_break_idx -= line.depth * 4 - if max_break_idx < 0: + max_break_width -= line.depth * 4 + if max_break_width < 0: yield TErr( f"Unable to split {LL[string_idx].value} at such high of a line depth:" f" {line.depth}" @@ -1417,7 +1471,7 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): # line limit. use_custom_breakpoints = bool( custom_splits - and all(csplit.break_idx <= max_break_idx for csplit in custom_splits) + and all(csplit.break_idx <= max_break_width for csplit in custom_splits) ) # Temporary storage for the remaining chunk of the string line that @@ -1433,7 +1487,7 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): if use_custom_breakpoints: return len(custom_splits) > 1 else: - return len(rest_value) > max_last_string() + return str_width(rest_value) > max_last_string_column() string_line_results: List[Ok[Line]] = [] while more_splits_should_be_made(): @@ -1443,7 +1497,10 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): break_idx = csplit.break_idx else: # Algorithmic Split (automatic) - max_bidx = max_break_idx - string_op_leaves_length + max_bidx = ( + count_chars_in_width(rest_value, max_break_width) + - string_op_leaves_length + ) maybe_break_idx = self._get_break_idx(rest_value, max_bidx) if maybe_break_idx is None: # If we are unable to algorithmically determine a good split @@ -1540,7 +1597,7 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): # Try to fit them all on the same line with the last substring... if ( - len(temp_value) <= max_last_string() + str_width(temp_value) <= max_last_string_column() or LL[string_idx + 1].type == token.COMMA ): last_line.append(rest_leaf) @@ -1660,6 +1717,7 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): section of this classes' docstring would be be met by returning @i. """ is_space = string[i] == " " + is_split_safe = is_valid_index(i - 1) and string[i - 1] in SPLIT_SAFE_CHARS is_not_escaped = True j = i - 1 @@ -1672,7 +1730,7 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): and len(string[:i]) >= self.MIN_SUBSTR_SIZE ) return ( - is_space + (is_space or is_split_safe) and is_not_escaped and is_big_enough and not breaks_unsplittable_expression(i) @@ -1767,8 +1825,9 @@ class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin): * The line is an lambda expression and the value is a string. OR * The line starts with an "atom" string that prefers to be wrapped in - parens. It's preferred to be wrapped when the string is surrounded by - commas (or is the first/last child). + parens. It's preferred to be wrapped when it's is an immediate child of + a list/set/tuple literal, AND the string is surrounded by commas (or is + the first/last child). Transformations: The chosen string is wrapped in parentheses and then split at the LPAR. @@ -1817,11 +1876,13 @@ class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin): if string_idx is not None: string_value = line.leaves[string_idx].value - # If the string has no spaces... - if " " not in string_value: + # If the string has neither spaces nor East Asian stops... + if not any( + char == " " or char in SPLIT_SAFE_CHARS for char in string_value + ): # And will still violate the line length limit when split... - max_string_length = self.line_length - ((line.depth + 1) * 4) - if len(string_value) > max_string_length: + max_string_width = self.line_length - ((line.depth + 1) * 4) + if str_width(string_value) > max_string_width: # And has no associated custom splits... if not self.has_custom_splits(string_value): # Then we should NOT put this string on its own line.