All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
2 Simple formatting on strings. Further string formatting code is in trans.py.
7 from typing import List, Pattern
10 STRING_PREFIX_CHARS = "furbFURB" # All possible string prefix characters.
13 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
14 """Replace `regex` with `replacement` twice on `original`.
16 This is used by string normalization to perform replaces on
19 return regex.sub(replacement, regex.sub(replacement, original))
22 def has_triple_quotes(string: str) -> bool:
25 True iff @string starts with three quotation characters.
27 raw_string = string.lstrip(STRING_PREFIX_CHARS)
28 return raw_string[:3] in {'"""', "'''"}
31 def lines_with_leading_tabs_expanded(s: str) -> List[str]:
33 Splits string into lines and expands only leading tabs (following the normal
37 for line in s.splitlines():
38 # Find the index of the first non-whitespace character after a string of
39 # whitespace that includes at least one tab
40 match = re.match(r"\s*\t+\s*(\S)", line)
42 first_non_whitespace_idx = match.start(1)
45 line[:first_non_whitespace_idx].expandtabs()
46 + line[first_non_whitespace_idx:]
53 def fix_docstring(docstring: str, prefix: str) -> str:
54 # https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation
57 lines = lines_with_leading_tabs_expanded(docstring)
58 # Determine minimum indentation (first line doesn't count):
60 for line in lines[1:]:
61 stripped = line.lstrip()
63 indent = min(indent, len(line) - len(stripped))
64 # Remove indentation (first line is special):
65 trimmed = [lines[0].strip()]
66 if indent < sys.maxsize:
67 last_line_idx = len(lines) - 2
68 for i, line in enumerate(lines[1:]):
69 stripped_line = line[indent:].rstrip()
70 if stripped_line or i == last_line_idx:
71 trimmed.append(prefix + stripped_line)
74 return "\n".join(trimmed)
77 def get_string_prefix(string: str) -> str:
80 * assert_is_leaf_string(@string)
83 @string's prefix (e.g. '', 'r', 'f', or 'rf').
85 assert_is_leaf_string(string)
89 while string[prefix_idx] in STRING_PREFIX_CHARS:
90 prefix += string[prefix_idx]
96 def assert_is_leaf_string(string: str) -> None:
98 Checks the pre-condition that @string has the format that you would expect
99 of `leaf.value` where `leaf` is some Leaf such that `leaf.type ==
100 token.STRING`. A more precise description of the pre-conditions that are
101 checked are listed below.
104 * @string starts with either ', ", <prefix>', or <prefix>" where
105 `set(<prefix>)` is some subset of `set(STRING_PREFIX_CHARS)`.
106 * @string ends with a quote character (' or ").
109 AssertionError(...) if the pre-conditions listed above are not
112 dquote_idx = string.find('"')
113 squote_idx = string.find("'")
114 if -1 in [dquote_idx, squote_idx]:
115 quote_idx = max(dquote_idx, squote_idx)
117 quote_idx = min(squote_idx, dquote_idx)
120 0 <= quote_idx < len(string) - 1
121 ), f"{string!r} is missing a starting quote character (' or \")."
122 assert string[-1] in (
125 ), f"{string!r} is missing an ending quote character (' or \")."
126 assert set(string[:quote_idx]).issubset(
127 set(STRING_PREFIX_CHARS)
128 ), f"{set(string[:quote_idx])} is NOT a subset of {set(STRING_PREFIX_CHARS)}."
131 def normalize_string_prefix(s: str, remove_u_prefix: bool = False) -> str:
132 """Make all string prefixes lowercase.
134 If remove_u_prefix is given, also removes any u prefix from the string.
136 match = re.match(r"^([" + STRING_PREFIX_CHARS + r"]*)(.*)$", s, re.DOTALL)
137 assert match is not None, f"failed to match string {s!r}"
138 orig_prefix = match.group(1)
139 new_prefix = orig_prefix.replace("F", "f").replace("B", "b").replace("U", "u")
141 new_prefix = new_prefix.replace("u", "")
142 return f"{new_prefix}{match.group(2)}"
145 def normalize_string_quotes(s: str) -> str:
146 """Prefer double quotes but only if it doesn't cause more escaping.
148 Adds or removes backslashes as appropriate. Doesn't parse and fix
149 strings nested in f-strings.
151 value = s.lstrip(STRING_PREFIX_CHARS)
152 if value[:3] == '"""':
155 elif value[:3] == "'''":
158 elif value[0] == '"':
164 first_quote_pos = s.find(orig_quote)
165 if first_quote_pos == -1:
166 return s # There's an internal error
168 prefix = s[:first_quote_pos]
169 unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
170 escaped_new_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
171 escaped_orig_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
172 body = s[first_quote_pos + len(orig_quote) : -len(orig_quote)]
173 if "r" in prefix.casefold():
174 if unescaped_new_quote.search(body):
175 # There's at least one unescaped new_quote in this raw string
176 # so converting is impossible
179 # Do not introduce or remove backslashes in raw strings
182 # remove unnecessary escapes
183 new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
185 # Consider the string without unnecessary escapes as the original
187 s = f"{prefix}{orig_quote}{body}{orig_quote}"
188 new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
189 new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
190 if "f" in prefix.casefold():
191 matches = re.findall(
193 (?:(?<!\{)|^)\{ # start of the string or a non-{ followed by a single {
194 ([^{].*?) # contents of the brackets except if begins with {{
195 \}(?:(?!\})|$) # A } followed by end of the string or a non-}
202 # Do not introduce backslashes in interpolated expressions
205 if new_quote == '"""' and new_body[-1:] == '"':
207 new_body = new_body[:-1] + '\\"'
208 orig_escape_count = body.count("\\")
209 new_escape_count = new_body.count("\\")
210 if new_escape_count > orig_escape_count:
211 return s # Do not introduce more escaping
213 if new_escape_count == orig_escape_count and orig_quote == '"':
214 return s # Prefer double quotes
216 return f"{prefix}{new_quote}{new_body}{new_quote}"