compared to their non-async version. (#3609)
- `with` statements that contain two context managers will be consistently wrapped in
parentheses (#3589)
+- Let string splitters respect [East Asian Width](https://www.unicode.org/reports/tr11/)
+ (#3445)
+- Now long string literals can be split after East Asian commas and periods (`、` U+3001
+ IDEOGRAPHIC COMMA, `。` U+3002 IDEOGRAPHIC FULL STOP, & `,` U+FF0C FULLWIDTH COMMA)
+ besides before spaces (#3445)
- For stubs, enforce one blank line after a nested class with a body other than just
`...` (#3564)
--- /dev/null
+"""Generates a width table for Unicode characters.
+
+This script generates a width table for Unicode characters that are not
+narrow (width 1). The table is written to src/black/_width_table.py (note
+that although this file is generated, it is checked into Git) and is used
+by the char_width() function in src/black/strings.py.
+
+You should run this script when you upgrade wcwidth, which is expected to
+happen when a new Unicode version is released. The generated table contains
+the version of wcwidth and Unicode that it was generated for.
+
+In order to run this script, you need to install the latest version of wcwidth.
+You can do this by running:
+
+ pip install -U wcwidth
+
+"""
+import sys
+from os.path import basename, dirname, join
+from typing import Iterable, Tuple
+
+import wcwidth
+
+
+def make_width_table() -> Iterable[Tuple[int, int, int]]:
+ start_codepoint = -1
+ end_codepoint = -1
+ range_width = -2
+ for codepoint in range(0, sys.maxunicode + 1):
+ width = wcwidth.wcwidth(chr(codepoint))
+ if width <= 1:
+ # Ignore narrow characters along with zero-width characters so that
+ # they are treated as single-width. Note that treating zero-width
+ # characters as single-width is consistent with the heuristics built
+ # on top of str.isascii() in the str_width() function in strings.py.
+ continue
+ if start_codepoint < 0:
+ start_codepoint = codepoint
+ range_width = width
+ elif width != range_width or codepoint != end_codepoint + 1:
+ yield (start_codepoint, end_codepoint, range_width)
+ start_codepoint = codepoint
+ range_width = width
+ end_codepoint = codepoint
+ if start_codepoint >= 0:
+ yield (start_codepoint, end_codepoint, range_width)
+
+
+def main() -> None:
+ table_path = join(dirname(__file__), "..", "src", "black", "_width_table.py")
+ with open(table_path, "w") as f:
+ f.write(
+ f"""# Generated by {basename(__file__)}
+# wcwidth {wcwidth.__version__}
+# Unicode {wcwidth.list_versions()[-1]}
+import sys
+from typing import List, Tuple
+
+if sys.version_info < (3, 8):
+ from typing_extensions import Final
+else:
+ from typing import Final
+
+WIDTH_TABLE: Final[List[Tuple[int, int, int]]] = [
+"""
+ )
+ for triple in make_width_table():
+ f.write(f" {triple!r},\n")
+ f.write("]\n")
+
+
+if __name__ == "__main__":
+ main()
--- /dev/null
+# Generated by make_width_table.py
+# wcwidth 0.2.6
+# Unicode 15.0.0
+import sys
+from typing import List, Tuple
+
+if sys.version_info < (3, 8):
+ from typing_extensions import Final
+else:
+ from typing import Final
+
+WIDTH_TABLE: Final[List[Tuple[int, int, int]]] = [
+ (0, 0, 0),
+ (1, 31, -1),
+ (127, 159, -1),
+ (768, 879, 0),
+ (1155, 1161, 0),
+ (1425, 1469, 0),
+ (1471, 1471, 0),
+ (1473, 1474, 0),
+ (1476, 1477, 0),
+ (1479, 1479, 0),
+ (1552, 1562, 0),
+ (1611, 1631, 0),
+ (1648, 1648, 0),
+ (1750, 1756, 0),
+ (1759, 1764, 0),
+ (1767, 1768, 0),
+ (1770, 1773, 0),
+ (1809, 1809, 0),
+ (1840, 1866, 0),
+ (1958, 1968, 0),
+ (2027, 2035, 0),
+ (2045, 2045, 0),
+ (2070, 2073, 0),
+ (2075, 2083, 0),
+ (2085, 2087, 0),
+ (2089, 2093, 0),
+ (2137, 2139, 0),
+ (2200, 2207, 0),
+ (2250, 2273, 0),
+ (2275, 2306, 0),
+ (2362, 2362, 0),
+ (2364, 2364, 0),
+ (2369, 2376, 0),
+ (2381, 2381, 0),
+ (2385, 2391, 0),
+ (2402, 2403, 0),
+ (2433, 2433, 0),
+ (2492, 2492, 0),
+ (2497, 2500, 0),
+ (2509, 2509, 0),
+ (2530, 2531, 0),
+ (2558, 2558, 0),
+ (2561, 2562, 0),
+ (2620, 2620, 0),
+ (2625, 2626, 0),
+ (2631, 2632, 0),
+ (2635, 2637, 0),
+ (2641, 2641, 0),
+ (2672, 2673, 0),
+ (2677, 2677, 0),
+ (2689, 2690, 0),
+ (2748, 2748, 0),
+ (2753, 2757, 0),
+ (2759, 2760, 0),
+ (2765, 2765, 0),
+ (2786, 2787, 0),
+ (2810, 2815, 0),
+ (2817, 2817, 0),
+ (2876, 2876, 0),
+ (2879, 2879, 0),
+ (2881, 2884, 0),
+ (2893, 2893, 0),
+ (2901, 2902, 0),
+ (2914, 2915, 0),
+ (2946, 2946, 0),
+ (3008, 3008, 0),
+ (3021, 3021, 0),
+ (3072, 3072, 0),
+ (3076, 3076, 0),
+ (3132, 3132, 0),
+ (3134, 3136, 0),
+ (3142, 3144, 0),
+ (3146, 3149, 0),
+ (3157, 3158, 0),
+ (3170, 3171, 0),
+ (3201, 3201, 0),
+ (3260, 3260, 0),
+ (3263, 3263, 0),
+ (3270, 3270, 0),
+ (3276, 3277, 0),
+ (3298, 3299, 0),
+ (3328, 3329, 0),
+ (3387, 3388, 0),
+ (3393, 3396, 0),
+ (3405, 3405, 0),
+ (3426, 3427, 0),
+ (3457, 3457, 0),
+ (3530, 3530, 0),
+ (3538, 3540, 0),
+ (3542, 3542, 0),
+ (3633, 3633, 0),
+ (3636, 3642, 0),
+ (3655, 3662, 0),
+ (3761, 3761, 0),
+ (3764, 3772, 0),
+ (3784, 3790, 0),
+ (3864, 3865, 0),
+ (3893, 3893, 0),
+ (3895, 3895, 0),
+ (3897, 3897, 0),
+ (3953, 3966, 0),
+ (3968, 3972, 0),
+ (3974, 3975, 0),
+ (3981, 3991, 0),
+ (3993, 4028, 0),
+ (4038, 4038, 0),
+ (4141, 4144, 0),
+ (4146, 4151, 0),
+ (4153, 4154, 0),
+ (4157, 4158, 0),
+ (4184, 4185, 0),
+ (4190, 4192, 0),
+ (4209, 4212, 0),
+ (4226, 4226, 0),
+ (4229, 4230, 0),
+ (4237, 4237, 0),
+ (4253, 4253, 0),
+ (4352, 4447, 2),
+ (4957, 4959, 0),
+ (5906, 5908, 0),
+ (5938, 5939, 0),
+ (5970, 5971, 0),
+ (6002, 6003, 0),
+ (6068, 6069, 0),
+ (6071, 6077, 0),
+ (6086, 6086, 0),
+ (6089, 6099, 0),
+ (6109, 6109, 0),
+ (6155, 6157, 0),
+ (6159, 6159, 0),
+ (6277, 6278, 0),
+ (6313, 6313, 0),
+ (6432, 6434, 0),
+ (6439, 6440, 0),
+ (6450, 6450, 0),
+ (6457, 6459, 0),
+ (6679, 6680, 0),
+ (6683, 6683, 0),
+ (6742, 6742, 0),
+ (6744, 6750, 0),
+ (6752, 6752, 0),
+ (6754, 6754, 0),
+ (6757, 6764, 0),
+ (6771, 6780, 0),
+ (6783, 6783, 0),
+ (6832, 6862, 0),
+ (6912, 6915, 0),
+ (6964, 6964, 0),
+ (6966, 6970, 0),
+ (6972, 6972, 0),
+ (6978, 6978, 0),
+ (7019, 7027, 0),
+ (7040, 7041, 0),
+ (7074, 7077, 0),
+ (7080, 7081, 0),
+ (7083, 7085, 0),
+ (7142, 7142, 0),
+ (7144, 7145, 0),
+ (7149, 7149, 0),
+ (7151, 7153, 0),
+ (7212, 7219, 0),
+ (7222, 7223, 0),
+ (7376, 7378, 0),
+ (7380, 7392, 0),
+ (7394, 7400, 0),
+ (7405, 7405, 0),
+ (7412, 7412, 0),
+ (7416, 7417, 0),
+ (7616, 7679, 0),
+ (8203, 8207, 0),
+ (8232, 8238, 0),
+ (8288, 8291, 0),
+ (8400, 8432, 0),
+ (8986, 8987, 2),
+ (9001, 9002, 2),
+ (9193, 9196, 2),
+ (9200, 9200, 2),
+ (9203, 9203, 2),
+ (9725, 9726, 2),
+ (9748, 9749, 2),
+ (9800, 9811, 2),
+ (9855, 9855, 2),
+ (9875, 9875, 2),
+ (9889, 9889, 2),
+ (9898, 9899, 2),
+ (9917, 9918, 2),
+ (9924, 9925, 2),
+ (9934, 9934, 2),
+ (9940, 9940, 2),
+ (9962, 9962, 2),
+ (9970, 9971, 2),
+ (9973, 9973, 2),
+ (9978, 9978, 2),
+ (9981, 9981, 2),
+ (9989, 9989, 2),
+ (9994, 9995, 2),
+ (10024, 10024, 2),
+ (10060, 10060, 2),
+ (10062, 10062, 2),
+ (10067, 10069, 2),
+ (10071, 10071, 2),
+ (10133, 10135, 2),
+ (10160, 10160, 2),
+ (10175, 10175, 2),
+ (11035, 11036, 2),
+ (11088, 11088, 2),
+ (11093, 11093, 2),
+ (11503, 11505, 0),
+ (11647, 11647, 0),
+ (11744, 11775, 0),
+ (11904, 11929, 2),
+ (11931, 12019, 2),
+ (12032, 12245, 2),
+ (12272, 12283, 2),
+ (12288, 12329, 2),
+ (12330, 12333, 0),
+ (12334, 12350, 2),
+ (12353, 12438, 2),
+ (12441, 12442, 0),
+ (12443, 12543, 2),
+ (12549, 12591, 2),
+ (12593, 12686, 2),
+ (12688, 12771, 2),
+ (12784, 12830, 2),
+ (12832, 12871, 2),
+ (12880, 19903, 2),
+ (19968, 42124, 2),
+ (42128, 42182, 2),
+ (42607, 42610, 0),
+ (42612, 42621, 0),
+ (42654, 42655, 0),
+ (42736, 42737, 0),
+ (43010, 43010, 0),
+ (43014, 43014, 0),
+ (43019, 43019, 0),
+ (43045, 43046, 0),
+ (43052, 43052, 0),
+ (43204, 43205, 0),
+ (43232, 43249, 0),
+ (43263, 43263, 0),
+ (43302, 43309, 0),
+ (43335, 43345, 0),
+ (43360, 43388, 2),
+ (43392, 43394, 0),
+ (43443, 43443, 0),
+ (43446, 43449, 0),
+ (43452, 43453, 0),
+ (43493, 43493, 0),
+ (43561, 43566, 0),
+ (43569, 43570, 0),
+ (43573, 43574, 0),
+ (43587, 43587, 0),
+ (43596, 43596, 0),
+ (43644, 43644, 0),
+ (43696, 43696, 0),
+ (43698, 43700, 0),
+ (43703, 43704, 0),
+ (43710, 43711, 0),
+ (43713, 43713, 0),
+ (43756, 43757, 0),
+ (43766, 43766, 0),
+ (44005, 44005, 0),
+ (44008, 44008, 0),
+ (44013, 44013, 0),
+ (44032, 55203, 2),
+ (63744, 64255, 2),
+ (64286, 64286, 0),
+ (65024, 65039, 0),
+ (65040, 65049, 2),
+ (65056, 65071, 0),
+ (65072, 65106, 2),
+ (65108, 65126, 2),
+ (65128, 65131, 2),
+ (65281, 65376, 2),
+ (65504, 65510, 2),
+ (66045, 66045, 0),
+ (66272, 66272, 0),
+ (66422, 66426, 0),
+ (68097, 68099, 0),
+ (68101, 68102, 0),
+ (68108, 68111, 0),
+ (68152, 68154, 0),
+ (68159, 68159, 0),
+ (68325, 68326, 0),
+ (68900, 68903, 0),
+ (69291, 69292, 0),
+ (69373, 69375, 0),
+ (69446, 69456, 0),
+ (69506, 69509, 0),
+ (69633, 69633, 0),
+ (69688, 69702, 0),
+ (69744, 69744, 0),
+ (69747, 69748, 0),
+ (69759, 69761, 0),
+ (69811, 69814, 0),
+ (69817, 69818, 0),
+ (69826, 69826, 0),
+ (69888, 69890, 0),
+ (69927, 69931, 0),
+ (69933, 69940, 0),
+ (70003, 70003, 0),
+ (70016, 70017, 0),
+ (70070, 70078, 0),
+ (70089, 70092, 0),
+ (70095, 70095, 0),
+ (70191, 70193, 0),
+ (70196, 70196, 0),
+ (70198, 70199, 0),
+ (70206, 70206, 0),
+ (70209, 70209, 0),
+ (70367, 70367, 0),
+ (70371, 70378, 0),
+ (70400, 70401, 0),
+ (70459, 70460, 0),
+ (70464, 70464, 0),
+ (70502, 70508, 0),
+ (70512, 70516, 0),
+ (70712, 70719, 0),
+ (70722, 70724, 0),
+ (70726, 70726, 0),
+ (70750, 70750, 0),
+ (70835, 70840, 0),
+ (70842, 70842, 0),
+ (70847, 70848, 0),
+ (70850, 70851, 0),
+ (71090, 71093, 0),
+ (71100, 71101, 0),
+ (71103, 71104, 0),
+ (71132, 71133, 0),
+ (71219, 71226, 0),
+ (71229, 71229, 0),
+ (71231, 71232, 0),
+ (71339, 71339, 0),
+ (71341, 71341, 0),
+ (71344, 71349, 0),
+ (71351, 71351, 0),
+ (71453, 71455, 0),
+ (71458, 71461, 0),
+ (71463, 71467, 0),
+ (71727, 71735, 0),
+ (71737, 71738, 0),
+ (71995, 71996, 0),
+ (71998, 71998, 0),
+ (72003, 72003, 0),
+ (72148, 72151, 0),
+ (72154, 72155, 0),
+ (72160, 72160, 0),
+ (72193, 72202, 0),
+ (72243, 72248, 0),
+ (72251, 72254, 0),
+ (72263, 72263, 0),
+ (72273, 72278, 0),
+ (72281, 72283, 0),
+ (72330, 72342, 0),
+ (72344, 72345, 0),
+ (72752, 72758, 0),
+ (72760, 72765, 0),
+ (72767, 72767, 0),
+ (72850, 72871, 0),
+ (72874, 72880, 0),
+ (72882, 72883, 0),
+ (72885, 72886, 0),
+ (73009, 73014, 0),
+ (73018, 73018, 0),
+ (73020, 73021, 0),
+ (73023, 73029, 0),
+ (73031, 73031, 0),
+ (73104, 73105, 0),
+ (73109, 73109, 0),
+ (73111, 73111, 0),
+ (73459, 73460, 0),
+ (73472, 73473, 0),
+ (73526, 73530, 0),
+ (73536, 73536, 0),
+ (73538, 73538, 0),
+ (78912, 78912, 0),
+ (78919, 78933, 0),
+ (92912, 92916, 0),
+ (92976, 92982, 0),
+ (94031, 94031, 0),
+ (94095, 94098, 0),
+ (94176, 94179, 2),
+ (94180, 94180, 0),
+ (94192, 94193, 2),
+ (94208, 100343, 2),
+ (100352, 101589, 2),
+ (101632, 101640, 2),
+ (110576, 110579, 2),
+ (110581, 110587, 2),
+ (110589, 110590, 2),
+ (110592, 110882, 2),
+ (110898, 110898, 2),
+ (110928, 110930, 2),
+ (110933, 110933, 2),
+ (110948, 110951, 2),
+ (110960, 111355, 2),
+ (113821, 113822, 0),
+ (118528, 118573, 0),
+ (118576, 118598, 0),
+ (119143, 119145, 0),
+ (119163, 119170, 0),
+ (119173, 119179, 0),
+ (119210, 119213, 0),
+ (119362, 119364, 0),
+ (121344, 121398, 0),
+ (121403, 121452, 0),
+ (121461, 121461, 0),
+ (121476, 121476, 0),
+ (121499, 121503, 0),
+ (121505, 121519, 0),
+ (122880, 122886, 0),
+ (122888, 122904, 0),
+ (122907, 122913, 0),
+ (122915, 122916, 0),
+ (122918, 122922, 0),
+ (123023, 123023, 0),
+ (123184, 123190, 0),
+ (123566, 123566, 0),
+ (123628, 123631, 0),
+ (124140, 124143, 0),
+ (125136, 125142, 0),
+ (125252, 125258, 0),
+ (126980, 126980, 2),
+ (127183, 127183, 2),
+ (127374, 127374, 2),
+ (127377, 127386, 2),
+ (127488, 127490, 2),
+ (127504, 127547, 2),
+ (127552, 127560, 2),
+ (127568, 127569, 2),
+ (127584, 127589, 2),
+ (127744, 127776, 2),
+ (127789, 127797, 2),
+ (127799, 127868, 2),
+ (127870, 127891, 2),
+ (127904, 127946, 2),
+ (127951, 127955, 2),
+ (127968, 127984, 2),
+ (127988, 127988, 2),
+ (127992, 128062, 2),
+ (128064, 128064, 2),
+ (128066, 128252, 2),
+ (128255, 128317, 2),
+ (128331, 128334, 2),
+ (128336, 128359, 2),
+ (128378, 128378, 2),
+ (128405, 128406, 2),
+ (128420, 128420, 2),
+ (128507, 128591, 2),
+ (128640, 128709, 2),
+ (128716, 128716, 2),
+ (128720, 128722, 2),
+ (128725, 128727, 2),
+ (128732, 128735, 2),
+ (128747, 128748, 2),
+ (128756, 128764, 2),
+ (128992, 129003, 2),
+ (129008, 129008, 2),
+ (129292, 129338, 2),
+ (129340, 129349, 2),
+ (129351, 129535, 2),
+ (129648, 129660, 2),
+ (129664, 129672, 2),
+ (129680, 129725, 2),
+ (129727, 129733, 2),
+ (129742, 129755, 2),
+ (129760, 129768, 2),
+ (129776, 129784, 2),
+ (131072, 196605, 2),
+ (196608, 262141, 2),
+ (917760, 917999, 0),
+]
syms,
whitespace,
)
+from black.strings import str_width
from blib2to3.pgen2 import token
from blib2to3.pytree import Leaf, Node
if not line_str:
line_str = line_to_string(line)
+ width = str_width if mode.preview else len
+
if Preview.multiline_string_handling not in mode:
return (
- len(line_str) <= mode.line_length
+ width(line_str) <= mode.line_length
and "\n" not in line_str # multiline strings
and not line.contains_standalone_comments()
)
return False
if "\n" not in line_str:
# No multiline strings (MLS) present
- return len(line_str) <= mode.line_length
+ return width(line_str) <= mode.line_length
first, *_, last = line_str.split("\n")
- if len(first) > mode.line_length or len(last) > mode.line_length:
+ if width(first) > mode.line_length or width(last) > mode.line_length:
return False
# Traverse the AST to examine the context of the multiline string (MLS),
else:
from typing import Final
+from black._width_table import WIDTH_TABLE
STRING_PREFIX_CHARS: Final = "furbFURB" # All possible string prefix characters.
STRING_PREFIX_RE: Final = re.compile(
return back_slashes + "N{" + groups["N"].upper() + "}"
leaf.value = re.sub(UNICODE_ESCAPE_RE, replace, text)
+
+
+@lru_cache(maxsize=4096)
+def char_width(char: str) -> int:
+ """Return the width of a single character as it would be displayed in a
+ terminal or editor (which respects Unicode East Asian Width).
+
+ Full width characters are counted as 2, while half width characters are
+ counted as 1. Also control characters are counted as 0.
+ """
+ table = WIDTH_TABLE
+ codepoint = ord(char)
+ highest = len(table) - 1
+ lowest = 0
+ idx = highest // 2
+ while True:
+ start_codepoint, end_codepoint, width = table[idx]
+ if codepoint < start_codepoint:
+ highest = idx - 1
+ elif codepoint > end_codepoint:
+ lowest = idx + 1
+ else:
+ return 0 if width < 0 else width
+ if highest < lowest:
+ break
+ idx = (highest + lowest) // 2
+ return 1
+
+
+def str_width(line_str: str) -> int:
+ """Return the width of `line_str` as it would be displayed in a terminal
+ or editor (which respects Unicode East Asian Width).
+
+ You could utilize this function to determine, for example, if a string
+ is too wide to display in a terminal or editor.
+ """
+ if line_str.isascii():
+ # Fast path for a line consisting of only ASCII characters
+ return len(line_str)
+ return sum(map(char_width, line_str))
+
+
+def count_chars_in_width(line_str: str, max_width: int) -> int:
+ """Count the number of characters in `line_str` that would fit in a
+ terminal or editor of `max_width` (which respects Unicode East Asian
+ Width).
+ """
+ total_width = 0
+ for i, char in enumerate(line_str):
+ width = char_width(char)
+ if width + total_width > max_width:
+ return i
+ total_width += width
+ return len(line_str)
from black.rusty import Err, Ok, Result
from black.strings import (
assert_is_leaf_string,
+ count_chars_in_width,
get_string_prefix,
has_triple_quotes,
normalize_string_quotes,
+ str_width,
)
from blib2to3.pgen2 import token
from blib2to3.pytree import Leaf, Node
TResult = Result[T, CannotTransform] # (T)ransform Result
TMatchResult = TResult[List[Index]]
+SPLIT_SAFE_CHARS = frozenset(["\u3001", "\u3002", "\uff0c"]) # East Asian stops
+
def TErr(err_msg: str) -> Err[CannotTransform]:
"""(T)ransform Err
# WMA4 the length of the inline comment.
offset += len(comment_leaf.value)
- max_string_length = self.line_length - offset
+ max_string_length = count_chars_in_width(str(line), self.line_length - offset)
return max_string_length
@staticmethod
is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA
)
- def max_last_string() -> int:
+ def max_last_string_column() -> int:
"""
Returns:
- The max allowed length of the string value used for the last
- line we will construct.
+ The max allowed width of the string value used for the last
+ line we will construct. Note that this value means the width
+ rather than the number of characters (e.g., many East Asian
+ characters expand to two columns).
"""
result = self.line_length
result -= line.depth * 4
result -= string_op_leaves_length
return result
- # --- Calculate Max Break Index (for string value)
+ # --- Calculate Max Break Width (for string value)
# We start with the line length limit
- max_break_idx = self.line_length
+ max_break_width = self.line_length
# The last index of a string of length N is N-1.
- max_break_idx -= 1
+ max_break_width -= 1
# Leading whitespace is not present in the string value (e.g. Leaf.value).
- max_break_idx -= line.depth * 4
- if max_break_idx < 0:
+ max_break_width -= line.depth * 4
+ if max_break_width < 0:
yield TErr(
f"Unable to split {LL[string_idx].value} at such high of a line depth:"
f" {line.depth}"
# line limit.
use_custom_breakpoints = bool(
custom_splits
- and all(csplit.break_idx <= max_break_idx for csplit in custom_splits)
+ and all(csplit.break_idx <= max_break_width for csplit in custom_splits)
)
# Temporary storage for the remaining chunk of the string line that
if use_custom_breakpoints:
return len(custom_splits) > 1
else:
- return len(rest_value) > max_last_string()
+ return str_width(rest_value) > max_last_string_column()
string_line_results: List[Ok[Line]] = []
while more_splits_should_be_made():
break_idx = csplit.break_idx
else:
# Algorithmic Split (automatic)
- max_bidx = max_break_idx - string_op_leaves_length
+ max_bidx = (
+ count_chars_in_width(rest_value, max_break_width)
+ - string_op_leaves_length
+ )
maybe_break_idx = self._get_break_idx(rest_value, max_bidx)
if maybe_break_idx is None:
# If we are unable to algorithmically determine a good split
# Try to fit them all on the same line with the last substring...
if (
- len(temp_value) <= max_last_string()
+ str_width(temp_value) <= max_last_string_column()
or LL[string_idx + 1].type == token.COMMA
):
last_line.append(rest_leaf)
section of this classes' docstring would be be met by returning @i.
"""
is_space = string[i] == " "
+ is_split_safe = is_valid_index(i - 1) and string[i - 1] in SPLIT_SAFE_CHARS
is_not_escaped = True
j = i - 1
and len(string[:i]) >= self.MIN_SUBSTR_SIZE
)
return (
- is_space
+ (is_space or is_split_safe)
and is_not_escaped
and is_big_enough
and not breaks_unsplittable_expression(i)
if string_idx is not None:
string_value = line.leaves[string_idx].value
- # If the string has no spaces...
- if " " not in string_value:
+ # If the string has neither spaces nor East Asian stops...
+ if not any(
+ char == " " or char in SPLIT_SAFE_CHARS for char in string_value
+ ):
# And will still violate the line length limit when split...
- max_string_length = self.line_length - ((line.depth + 1) * 4)
- if len(string_value) > max_string_length:
+ max_string_width = self.line_length - ((line.depth + 1) * 4)
+ if str_width(string_value) > max_string_width:
# And has no associated custom splits...
if not self.has_custom_splits(string_value):
# Then we should NOT put this string on its own line.
--- /dev/null
+# The following strings do not have not-so-many chars, but are long enough\r
+# when these are rendered in a monospace font (if the renderer respects\r
+# Unicode East Asian Width properties).\r
+hangul = '코드포인트 수는 적으나 실제 터미널이나 에디터에서 렌더링될 땐 너무 길어서 줄바꿈이 필요한 문자열'\r
+hanzi = '中文測試:代碼點數量少,但在真正的終端模擬器或編輯器中呈現時太長,因此需要換行的字符串。'\r
+japanese = 'コードポイントの数は少ないが、実際の端末エミュレータやエディタでレンダリングされる時は長すぎる為、改行が要る文字列'\r
+\r
+# output\r
+\r
+# The following strings do not have not-so-many chars, but are long enough\r
+# when these are rendered in a monospace font (if the renderer respects\r
+# Unicode East Asian Width properties).\r
+hangul = (\r
+ "코드포인트 수는 적으나 실제 터미널이나 에디터에서 렌더링될 땐 너무 길어서 줄바꿈이"\r
+ " 필요한 문자열"\r
+)\r
+hanzi = (\r
+ "中文測試:代碼點數量少,但在真正的終端模擬器或編輯器中呈現時太長,"\r
+ "因此需要換行的字符串。"\r
+)\r
+japanese = (\r
+ "コードポイントの数は少ないが、"\r
+ "実際の端末エミュレータやエディタでレンダリングされる時は長すぎる為、"\r
+ "改行が要る文字列"\r
+)\r