]> git.madduck.net Git - etc/vim.git/blob - scripts/make_width_table.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

061fdc8d95da77d16fbc1617e84a1ccf370fb229
[etc/vim.git] / scripts / make_width_table.py
1 """Generates a width table for Unicode characters.
2
3 This script generates a width table for Unicode characters that are not
4 narrow (width 1). The table is written to src/black/_width_table.py (note
5 that although this file is generated, it is checked into Git) and is used
6 by the char_width() function in src/black/strings.py.
7
8 You should run this script when you upgrade wcwidth, which is expected to
9 happen when a new Unicode version is released. The generated table contains
10 the version of wcwidth and Unicode that it was generated for.
11
12 In order to run this script, you need to install the latest version of wcwidth.
13 You can do this by running:
14
15     pip install -U wcwidth
16
17 """
18
19 import sys
20 from os.path import basename, dirname, join
21 from typing import Iterable, Tuple
22
23 import wcwidth
24
25
26 def make_width_table() -> Iterable[Tuple[int, int, int]]:
27     start_codepoint = -1
28     end_codepoint = -1
29     range_width = -2
30     for codepoint in range(0, sys.maxunicode + 1):
31         width = wcwidth.wcwidth(chr(codepoint))
32         if width <= 1:
33             # Ignore narrow characters along with zero-width characters so that
34             # they are treated as single-width.  Note that treating zero-width
35             # characters as single-width is consistent with the heuristics built
36             # on top of str.isascii() in the str_width() function in strings.py.
37             continue
38         if start_codepoint < 0:
39             start_codepoint = codepoint
40             range_width = width
41         elif width != range_width or codepoint != end_codepoint + 1:
42             yield (start_codepoint, end_codepoint, range_width)
43             start_codepoint = codepoint
44             range_width = width
45         end_codepoint = codepoint
46     if start_codepoint >= 0:
47         yield (start_codepoint, end_codepoint, range_width)
48
49
50 def main() -> None:
51     table_path = join(dirname(__file__), "..", "src", "black", "_width_table.py")
52     with open(table_path, "w") as f:
53         f.write(f"""# Generated by {basename(__file__)}
54 # wcwidth {wcwidth.__version__}
55 # Unicode {wcwidth.list_versions()[-1]}
56 from typing import Final, List, Tuple
57
58 WIDTH_TABLE: Final[List[Tuple[int, int, int]]] = [
59 """)
60         for triple in make_width_table():
61             f.write(f"    {triple!r},\n")
62         f.write("]\n")
63
64
65 if __name__ == "__main__":
66     main()