]> git.madduck.net Git - etc/vim.git/blob - src/black/strings.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Add @zzzeek testimonial to README and docs
[etc/vim.git] / src / black / strings.py
1 """
2 Simple formatting on strings. Further string formatting code is in trans.py.
3 """
4
5 import regex as re
6 import sys
7 from typing import List, Pattern
8
9
10 STRING_PREFIX_CHARS = "furbFURB"  # All possible string prefix characters.
11
12
13 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
14     """Replace `regex` with `replacement` twice on `original`.
15
16     This is used by string normalization to perform replaces on
17     overlapping matches.
18     """
19     return regex.sub(replacement, regex.sub(replacement, original))
20
21
22 def has_triple_quotes(string: str) -> bool:
23     """
24     Returns:
25         True iff @string starts with three quotation characters.
26     """
27     raw_string = string.lstrip(STRING_PREFIX_CHARS)
28     return raw_string[:3] in {'"""', "'''"}
29
30
31 def lines_with_leading_tabs_expanded(s: str) -> List[str]:
32     """
33     Splits string into lines and expands only leading tabs (following the normal
34     Python rules)
35     """
36     lines = []
37     for line in s.splitlines():
38         # Find the index of the first non-whitespace character after a string of
39         # whitespace that includes at least one tab
40         match = re.match(r"\s*\t+\s*(\S)", line)
41         if match:
42             first_non_whitespace_idx = match.start(1)
43
44             lines.append(
45                 line[:first_non_whitespace_idx].expandtabs()
46                 + line[first_non_whitespace_idx:]
47             )
48         else:
49             lines.append(line)
50     return lines
51
52
53 def fix_docstring(docstring: str, prefix: str) -> str:
54     # https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation
55     if not docstring:
56         return ""
57     lines = lines_with_leading_tabs_expanded(docstring)
58     # Determine minimum indentation (first line doesn't count):
59     indent = sys.maxsize
60     for line in lines[1:]:
61         stripped = line.lstrip()
62         if stripped:
63             indent = min(indent, len(line) - len(stripped))
64     # Remove indentation (first line is special):
65     trimmed = [lines[0].strip()]
66     if indent < sys.maxsize:
67         last_line_idx = len(lines) - 2
68         for i, line in enumerate(lines[1:]):
69             stripped_line = line[indent:].rstrip()
70             if stripped_line or i == last_line_idx:
71                 trimmed.append(prefix + stripped_line)
72             else:
73                 trimmed.append("")
74     return "\n".join(trimmed)
75
76
77 def get_string_prefix(string: str) -> str:
78     """
79     Pre-conditions:
80         * assert_is_leaf_string(@string)
81
82     Returns:
83         @string's prefix (e.g. '', 'r', 'f', or 'rf').
84     """
85     assert_is_leaf_string(string)
86
87     prefix = ""
88     prefix_idx = 0
89     while string[prefix_idx] in STRING_PREFIX_CHARS:
90         prefix += string[prefix_idx].lower()
91         prefix_idx += 1
92
93     return prefix
94
95
96 def assert_is_leaf_string(string: str) -> None:
97     """
98     Checks the pre-condition that @string has the format that you would expect
99     of `leaf.value` where `leaf` is some Leaf such that `leaf.type ==
100     token.STRING`. A more precise description of the pre-conditions that are
101     checked are listed below.
102
103     Pre-conditions:
104         * @string starts with either ', ", <prefix>', or <prefix>" where
105         `set(<prefix>)` is some subset of `set(STRING_PREFIX_CHARS)`.
106         * @string ends with a quote character (' or ").
107
108     Raises:
109         AssertionError(...) if the pre-conditions listed above are not
110         satisfied.
111     """
112     dquote_idx = string.find('"')
113     squote_idx = string.find("'")
114     if -1 in [dquote_idx, squote_idx]:
115         quote_idx = max(dquote_idx, squote_idx)
116     else:
117         quote_idx = min(squote_idx, dquote_idx)
118
119     assert (
120         0 <= quote_idx < len(string) - 1
121     ), f"{string!r} is missing a starting quote character (' or \")."
122     assert string[-1] in (
123         "'",
124         '"',
125     ), f"{string!r} is missing an ending quote character (' or \")."
126     assert set(string[:quote_idx]).issubset(
127         set(STRING_PREFIX_CHARS)
128     ), f"{set(string[:quote_idx])} is NOT a subset of {set(STRING_PREFIX_CHARS)}."
129
130
131 def normalize_string_prefix(s: str, remove_u_prefix: bool = False) -> str:
132     """Make all string prefixes lowercase.
133
134     If remove_u_prefix is given, also removes any u prefix from the string.
135     """
136     match = re.match(r"^([" + STRING_PREFIX_CHARS + r"]*)(.*)$", s, re.DOTALL)
137     assert match is not None, f"failed to match string {s!r}"
138     orig_prefix = match.group(1)
139     new_prefix = orig_prefix.replace("F", "f").replace("B", "b").replace("U", "u")
140     if remove_u_prefix:
141         new_prefix = new_prefix.replace("u", "")
142     return f"{new_prefix}{match.group(2)}"
143
144
145 def normalize_string_quotes(s: str) -> str:
146     """Prefer double quotes but only if it doesn't cause more escaping.
147
148     Adds or removes backslashes as appropriate. Doesn't parse and fix
149     strings nested in f-strings.
150     """
151     value = s.lstrip(STRING_PREFIX_CHARS)
152     if value[:3] == '"""':
153         return s
154
155     elif value[:3] == "'''":
156         orig_quote = "'''"
157         new_quote = '"""'
158     elif value[0] == '"':
159         orig_quote = '"'
160         new_quote = "'"
161     else:
162         orig_quote = "'"
163         new_quote = '"'
164     first_quote_pos = s.find(orig_quote)
165     if first_quote_pos == -1:
166         return s  # There's an internal error
167
168     prefix = s[:first_quote_pos]
169     unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
170     escaped_new_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
171     escaped_orig_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
172     body = s[first_quote_pos + len(orig_quote) : -len(orig_quote)]
173     if "r" in prefix.casefold():
174         if unescaped_new_quote.search(body):
175             # There's at least one unescaped new_quote in this raw string
176             # so converting is impossible
177             return s
178
179         # Do not introduce or remove backslashes in raw strings
180         new_body = body
181     else:
182         # remove unnecessary escapes
183         new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
184         if body != new_body:
185             # Consider the string without unnecessary escapes as the original
186             body = new_body
187             s = f"{prefix}{orig_quote}{body}{orig_quote}"
188         new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
189         new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
190     if "f" in prefix.casefold():
191         matches = re.findall(
192             r"""
193             (?:[^{]|^)\{  # start of the string or a non-{ followed by a single {
194                 ([^{].*?)  # contents of the brackets except if begins with {{
195             \}(?:[^}]|$)  # A } followed by end of the string or a non-}
196             """,
197             new_body,
198             re.VERBOSE,
199         )
200         for m in matches:
201             if "\\" in str(m):
202                 # Do not introduce backslashes in interpolated expressions
203                 return s
204
205     if new_quote == '"""' and new_body[-1:] == '"':
206         # edge case:
207         new_body = new_body[:-1] + '\\"'
208     orig_escape_count = body.count("\\")
209     new_escape_count = new_body.count("\\")
210     if new_escape_count > orig_escape_count:
211         return s  # Do not introduce more escaping
212
213     if new_escape_count == orig_escape_count and orig_quote == '"':
214         return s  # Prefer double quotes
215
216     return f"{prefix}{new_quote}{new_body}{new_quote}"