]> git.madduck.net Git - etc/vim.git/blob - scripts/make_width_table.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

09aca9c34b56bba43241a6e6f277b6fc83352d0d
[etc/vim.git] / scripts / make_width_table.py
1 """Generates a width table for Unicode characters.
2
3 This script generates a width table for Unicode characters that are not
4 narrow (width 1). The table is written to src/black/_width_table.py (note
5 that although this file is generated, it is checked into Git) and is used
6 by the char_width() function in src/black/strings.py.
7
8 You should run this script when you upgrade wcwidth, which is expected to
9 happen when a new Unicode version is released. The generated table contains
10 the version of wcwidth and Unicode that it was generated for.
11
12 In order to run this script, you need to install the latest version of wcwidth.
13 You can do this by running:
14
15     pip install -U wcwidth
16
17 """
18 import sys
19 from os.path import basename, dirname, join
20 from typing import Iterable, Tuple
21
22 import wcwidth
23
24
25 def make_width_table() -> Iterable[Tuple[int, int, int]]:
26     start_codepoint = -1
27     end_codepoint = -1
28     range_width = -2
29     for codepoint in range(0, sys.maxunicode + 1):
30         width = wcwidth.wcwidth(chr(codepoint))
31         if width <= 1:
32             # Ignore narrow characters along with zero-width characters so that
33             # they are treated as single-width.  Note that treating zero-width
34             # characters as single-width is consistent with the heuristics built
35             # on top of str.isascii() in the str_width() function in strings.py.
36             continue
37         if start_codepoint < 0:
38             start_codepoint = codepoint
39             range_width = width
40         elif width != range_width or codepoint != end_codepoint + 1:
41             yield (start_codepoint, end_codepoint, range_width)
42             start_codepoint = codepoint
43             range_width = width
44         end_codepoint = codepoint
45     if start_codepoint >= 0:
46         yield (start_codepoint, end_codepoint, range_width)
47
48
49 def main() -> None:
50     table_path = join(dirname(__file__), "..", "src", "black", "_width_table.py")
51     with open(table_path, "w") as f:
52         f.write(
53             f"""# Generated by {basename(__file__)}
54 # wcwidth {wcwidth.__version__}
55 # Unicode {wcwidth.list_versions()[-1]}
56 import sys
57 from typing import List, Tuple
58
59 if sys.version_info < (3, 8):
60     from typing_extensions import Final
61 else:
62     from typing import Final
63
64 WIDTH_TABLE: Final[List[Tuple[int, int, int]]] = [
65 """
66         )
67         for triple in make_width_table():
68             f.write(f"    {triple!r},\n")
69         f.write("]\n")
70
71
72 if __name__ == "__main__":
73     main()