From 6aef6c9d458e8df88f510e94c18f216232b6a786 Mon Sep 17 00:00:00 2001 From: Andrey Date: Sun, 13 Oct 2019 20:21:15 +0300 Subject: [PATCH] #455 Fix bug with tricky unicode symbols (#1047) * add test for special unicode symbol which usual re can not process correctly add regex lib which supports unicode 12.1.0 standard replace re usage in project in favor to regex * #455 fix dependency --- Pipfile | 1 + Pipfile.lock | 142 ++++++++++++++------------- black.py | 5 +- blib2to3/pgen2/conv.py | 2 +- blib2to3/pgen2/literals.py | 2 +- blib2to3/pgen2/tokenize.py | 2 +- docs/conf.py | 2 +- pyproject.toml | 1 + setup.py | 1 + tests/data/tricky_unicode_symbols.py | 6 ++ tests/test_black.py | 9 +- 11 files changed, 100 insertions(+), 73 deletions(-) create mode 100644 tests/data/tricky_unicode_symbols.py diff --git a/Pipfile b/Pipfile index 623f603..d82e2fe 100644 --- a/Pipfile +++ b/Pipfile @@ -12,6 +12,7 @@ toml = ">=0.9.4" black = {path = ".",extras = ["d"],editable = true} aiohttp-cors = "*" typed-ast = ">=1.3.1" +regex = "*" [dev-packages] pre-commit = "*" diff --git a/Pipfile.lock b/Pipfile.lock index c5108b0..45899b8 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "1e7537ef8102c7c4b5746b53247645a75fca24db7e0d94721fdcc8a62eb8a090" + "sha256": "5cceced346048c294218b3ecc9a550fd7667656d7115114cc5e7d3be18b40818" }, "pipfile-spec": 6, "requires": {}, @@ -16,23 +16,22 @@ "default": { "aiohttp": { "hashes": [ - "sha256:1ab7ab0a710135133dcc2980dd48fdd92f6f6066b66ef0356f458f395aa375af", - "sha256:1cf5b433a0aa3cf45b0acd4adb14cb20d99166aaa967ab89f629635ac263ca64", - "sha256:27b2bc8ca5555d5dadeee07cc2d6f8c06092c9d9c3f203c79c124d07474d3cf8", - "sha256:315f55a8469284f3ee54534d76f525b5c104dc514999dca4a007524a458aaba2", - "sha256:4f3c1572716ce2c8f22877a8185414ec213c057df35d27f7195f185691828608", - "sha256:635bef0626e28446372511e1fd31585205db2f18dab37a43d8adb30b0483e1bf", - "sha256:6907359de725e7ccd04b458a0f3322c7d1ba78df3df02e2ceb5abb0e21c975e6", - "sha256:772cfc0ff7c088d9e211377951a51c8a5173110cf56214f3e3d08a89be07badc", - "sha256:a91251585acf5203842551e37d2700c13c0bb411fa61b13485ab9e8d2dd400e9", - "sha256:acbbf0c47aa713d7a4baf52f11a356b01b82cabb53da452328546acaa21c6605", - "sha256:af7809ce7de6709afc7770403a70dfdbc4e988c91451108c8e123fac46b870d9", - "sha256:de611d7b95c1067d9a415979c63503dbdc735b943d08779506886614b410644a", - "sha256:e0fe698d1e6a852a27a88d2844a1a63839ee764d7cf214fd58cbea480407cc1d", - "sha256:fa155e309cc2277d6f9d099aecaf3ce78d86a31f5a62a994debc872e4c34ddf4" + "sha256:022c400e30848b1994236e31fb38db1dc4b551efe049f737cbac690ab2cdf5c4", + "sha256:10f9316ef068536dec0b9f09531fa1cb6bfa8394f278022cb96e789c77811ad2", + "sha256:2599b93fd5ba1120b3bd1366d67a7e26bd45b3d5d5548069e00b2fbef7f20ab0", + "sha256:2a1c71e7fb8c50e60fb4c9bab8bd5cf7c07f91a6b27dc2556d7354cd2ebb3689", + "sha256:6a19d34cc01414d94dd5a4466f8f397293fcb8929df8eeb8989119cc5ef928bb", + "sha256:7aab39c2a61a5c6b15bb7e561218ef64770ca1fbf4cc1878c96e630e2b7cc3cc", + "sha256:8959e28bc1b87542b0ee4a8302128f633bee296252f261bf03e118c4dff725f0", + "sha256:89820f7c488f4e9b1f74371da33403181e11e006663ddf074317aacd690838a6", + "sha256:ab761cf0f0b0b90887e276b4a7918f11e323f2228bbb30814bbd538c122028bf", + "sha256:cc648ecaca79e37c6e26f370e802e7ae640a069913f661f66c0421084bef219a", + "sha256:d6f26e80cd55ac88e1f0397fc8d547933225a5dc1add040e27788c2a028c64c6", + "sha256:e7d6ae4a36bfe6d7f93c6f42a0bfa1659f7d011006cb6e8207c85ef5acdb2986", + "sha256:fc55b1fec0e4cc1134ffb09ea3970783ee2906dc5dfd7cd16917913f2cfed65b" ], "index": "pypi", - "version": "==3.6.0" + "version": "==3.6.1" }, "aiohttp-cors": { "hashes": [ @@ -59,11 +58,11 @@ }, "attrs": { "hashes": [ - "sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79", - "sha256:f0b870f674851ecbfbbbd364d6b5cbdff9dcedbc7f3f5e18a6891057f21fe399" + "sha256:ec20e7a4825331c1b5ebf261d111e16fa9612c1f7a5e1f884f12bd53a664dfd2", + "sha256:f913492e1663d3c36f502e5e9ba6cd13cf19d7fab50aa13239e420fef95e1396" ], "index": "pypi", - "version": "==19.1.0" + "version": "==19.2.0" }, "black": { "editable": true, @@ -128,6 +127,23 @@ ], "version": "==4.5.2" }, + "regex": { + "hashes": [ + "sha256:1e9f9bc44ca195baf0040b1938e6801d2f3409661c15fe57f8164c678cfc663f", + "sha256:587b62d48ca359d2d4f02d486f1f0aa9a20fbaf23a9d4198c4bed72ab2f6c849", + "sha256:835ccdcdc612821edf132c20aef3eaaecfb884c9454fdc480d5887562594ac61", + "sha256:93f6c9da57e704e128d90736430c5c59dd733327882b371b0cae8833106c2a21", + "sha256:a46f27d267665016acb3ec8c6046ec5eae8cf80befe85ba47f43c6f5ec636dcd", + "sha256:c5c8999b3a341b21ac2c6ec704cfcccbc50f1fedd61b6a8ee915ca7fd4b0a557", + "sha256:d4d1829cf97632673aa49f378b0a2c3925acd795148c5ace8ef854217abbee89", + "sha256:d96479257e8e4d1d7800adb26bf9c5ca5bab1648a1eddcac84d107b73dc68327", + "sha256:f20f4912daf443220436759858f96fefbfc6c6ba9e67835fd6e4e9b73582791a", + "sha256:f2b37b5b2c2a9d56d9e88efef200ec09c36c7f323f9d58d0b985a90923df386d", + "sha256:fe765b809a1f7ce642c2edeee351e7ebd84391640031ba4b60af8d91a9045890" + ], + "index": "pypi", + "version": "==2019.8.19" + }, "toml": { "hashes": [ "sha256:229f81c57791a41d65e399fc06bf0848bab550a9dfd5ed66df18ce5f05e73d5c", @@ -191,11 +207,11 @@ }, "attrs": { "hashes": [ - "sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79", - "sha256:f0b870f674851ecbfbbbd364d6b5cbdff9dcedbc7f3f5e18a6891057f21fe399" + "sha256:ec20e7a4825331c1b5ebf261d111e16fa9612c1f7a5e1f884f12bd53a664dfd2", + "sha256:f913492e1663d3c36f502e5e9ba6cd13cf19d7fab50aa13239e420fef95e1396" ], "index": "pypi", - "version": "==19.1.0" + "version": "==19.2.0" }, "babel": { "hashes": [ @@ -234,10 +250,10 @@ }, "commonmark": { "hashes": [ - "sha256:14c3df31e8c9c463377e287b2a1eefaa6019ab97b22dad36e2f32be59d61d68d", - "sha256:867fc5db078ede373ab811e16b6789e9d033b15ccd7296f370ca52d1ee792ce0" + "sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60", + "sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9" ], - "version": "==0.9.0" + "version": "==0.9.1" }, "coverage": { "hashes": [ @@ -316,12 +332,6 @@ "index": "pypi", "version": "==17.8.0" }, - "future": { - "hashes": [ - "sha256:67045236dcfd6816dc439556d009594abf643e5eb48992e36beac09c2ca659b8" - ], - "version": "==0.17.1" - }, "identify": { "hashes": [ "sha256:4f1fe9a59df4e80fcb0213086fcf502bc1765a01ea4fe8be48da3b65afd2a017", @@ -352,10 +362,10 @@ }, "jinja2": { "hashes": [ - "sha256:065c4f02ebe7f7cf559e49ee5a95fb800a9e4528727aec6f24402a5374c65013", - "sha256:14dd6caf1527abb21f08f86c784eac40853ba93edb79552aa1e4b8aef1b61c7b" + "sha256:74320bb91f31270f9551d46522e33af46a80c3d619f4a4bf42b3164d30b5911f", + "sha256:9fe95f19286cfefaa917656583d020be14e7859c6b0252588391e47db34527de" ], - "version": "==2.10.1" + "version": "==2.10.3" }, "markupsafe": { "hashes": [ @@ -406,27 +416,26 @@ }, "mypy": { "hashes": [ - "sha256:0107bff4f46a289f0e4081d59b77cef1c48ea43da5a0dbf0005d54748b26df2a", - "sha256:07957f5471b3bb768c61f08690c96d8a09be0912185a27a68700f3ede99184e4", - "sha256:10af62f87b6921eac50271e667cc234162a194e742d8e02fc4ddc121e129a5b0", - "sha256:11fd60d2f69f0cefbe53ce551acf5b1cec1a89e7ce2d47b4e95a84eefb2899ae", - "sha256:15e43d3b1546813669bd1a6ec7e6a11d2888db938e0607f7b5eef6b976671339", - "sha256:352c24ba054a89bb9a35dd064ee95ab9b12903b56c72a8d3863d882e2632dc76", - "sha256:437020a39417e85e22ea8edcb709612903a9924209e10b3ec6d8c9f05b79f498", - "sha256:49925f9da7cee47eebf3420d7c0e00ec662ec6abb2780eb0a16260a7ba25f9c4", - "sha256:6724fcd5777aa6cebfa7e644c526888c9d639bd22edd26b2a8038c674a7c34bd", - "sha256:7a17613f7ea374ab64f39f03257f22b5755335b73251d0d253687a69029701ba", - "sha256:cdc1151ced496ca1496272da7fc356580e95f2682be1d32377c22ddebdf73c91" + "sha256:1d98fd818ad3128a5408148c9e4a5edce6ed6b58cc314283e631dd5d9216527b", + "sha256:22ee018e8fc212fe601aba65d3699689dd29a26410ef0d2cc1943de7bec7e3ac", + "sha256:3a24f80776edc706ec8d05329e854d5b9e464cd332e25cde10c8da2da0a0db6c", + "sha256:42a78944e80770f21609f504ca6c8173f7768043205b5ac51c9144e057dcf879", + "sha256:4b2b20106973548975f0c0b1112eceb4d77ed0cafe0a231a1318f3b3a22fc795", + "sha256:591a9625b4d285f3ba69f541c84c0ad9e7bffa7794da3fa0585ef13cf95cb021", + "sha256:5b4b70da3d8bae73b908a90bb2c387b977e59d484d22c604a2131f6f4397c1a3", + "sha256:84edda1ffeda0941b2ab38ecf49302326df79947fa33d98cdcfbf8ca9cf0bb23", + "sha256:b2b83d29babd61b876ae375786960a5374bba0e4aba3c293328ca6ca5dc448dd", + "sha256:cc4502f84c37223a1a5ab700649b5ab1b5e4d2bf2d426907161f20672a21930b", + "sha256:e29e24dd6e7f39f200a5bb55dcaa645d38a397dd5a6674f6042ef02df5795046" ], "index": "pypi", - "version": "==0.720" + "version": "==0.730" }, "mypy-extensions": { "hashes": [ - "sha256:37e0e956f41369209a3d5f34580150bcacfabaa57b33a15c0b25f4b5725e0812", - "sha256:b16cabe759f55e3409a7d231ebd2841378fb0c27a5d1994719e340e4f429ac3e" + "sha256:a161e3b917053de87dbe469987e173e49fb454eca10ef28b48b384538cc11458" ], - "version": "==0.4.1" + "version": "==0.4.2" }, "nodeenv": { "hashes": [ @@ -436,10 +445,10 @@ }, "packaging": { "hashes": [ - "sha256:a7ac867b97fdc07ee80a8058fe4435ccd274ecc3b0ed61d852d7d53055528cf9", - "sha256:c491ca87294da7cc01902edbe30a5bc6c4c28172b5138ab4e4aa1b9d7bfaeafe" + "sha256:28b924174df7a2fa32c1953825ff29c61e2f5e082343165438812f00d3a7fc47", + "sha256:d9551545c6d761f3def1677baf08ab2a3ca17c56879e70fecba2fc4dde4ed108" ], - "version": "==19.1" + "version": "==19.2" }, "pkginfo": { "hashes": [ @@ -486,10 +495,10 @@ }, "pytz": { "hashes": [ - "sha256:26c0b32e437e54a18161324a2fca3c4b9846b74a8dccddd843113109e1116b32", - "sha256:c894d57500a4cd2d5c71114aaab77dbab5eabd9022308ce5ac9bb93a60a6f0c7" + "sha256:1c557d7d0e871de1f5ccd5833f60fb2550652da6be2693c1e02300743d21500d", + "sha256:b02c06db6cf09c12dd25137e563b31700d3b80fcc4ad23abb7a315f2789819be" ], - "version": "==2019.2" + "version": "==2019.3" }, "pyyaml": { "hashes": [ @@ -556,9 +565,10 @@ }, "snowballstemmer": { "hashes": [ - "sha256:713e53b79cbcf97bc5245a06080a33d54a77e7cce2f789c835a143bcdb5c033e" + "sha256:209f257d7533fdb3cb73bdbd24f436239ca3b2fa67d56f6ff88e86be08cc5ef0", + "sha256:df3bac3df4c2c01363f3dd2cfa78cce2840a79b9f1c2d2de9ce8d31683992f52" ], - "version": "==1.9.1" + "version": "==2.0.0" }, "sphinx": { "hashes": [ @@ -620,18 +630,18 @@ }, "tqdm": { "hashes": [ - "sha256:4c34f077399736e5dbf403183b5f0f1bda46e06433a3f93812386a9d56b28004", - "sha256:74d40d49cab95a93735323e450161f1e580dac42b25bf39770f6e3501d36ebfb" + "sha256:abc25d0ce2397d070ef07d8c7e706aede7920da163c64997585d42d3537ece3d", + "sha256:dd3fcca8488bb1d416aa7469d2f277902f26260c45aa86b667b074cd44b3b115" ], - "version": "==4.36.0" + "version": "==4.36.1" }, "twine": { "hashes": [ - "sha256:630fadd6e342e725930be6c696537e3f9ccc54331742b16245dab292a17d0460", - "sha256:a3d22aab467b4682a22de4a422632e79d07eebd07ff2a7079effb13f8a693787" + "sha256:5319dd3e02ac73fcddcd94f035b9631589ab5d23e1f4699d57365199d85261e1", + "sha256:9fe7091715c7576df166df8ef6654e61bada39571783f2fd415bdcba867c6993" ], "index": "pypi", - "version": "==1.15.0" + "version": "==2.0.0" }, "typed-ast": { "hashes": [ @@ -664,10 +674,10 @@ }, "urllib3": { "hashes": [ - "sha256:b246607a25ac80bedac05c6f282e3cdaf3afb65420fd024ac94435cabe6e18d1", - "sha256:dbe59173209418ae49d485b87d1681aefa36252ee85884c31346debd19463232" + "sha256:3de946ffbed6e6746608990594d08faac602528ac7015ac28d33cee6a45b7398", + "sha256:9a107b99a5393caf59c7aa3c1249c16e6879447533d0887f4336dde834c7be86" ], - "version": "==1.25.3" + "version": "==1.25.6" }, "virtualenv": { "hashes": [ diff --git a/black.py b/black.py index b8736e7..ff373c8 100644 --- a/black.py +++ b/black.py @@ -12,7 +12,7 @@ from multiprocessing import Manager, freeze_support import os from pathlib import Path import pickle -import re +import regex as re import signal import sys import tempfile @@ -3810,7 +3810,8 @@ def re_compile_maybe_verbose(regex: str) -> Pattern[str]: """ if "\n" in regex: regex = "(?x)" + regex - return re.compile(regex) + compiled: Pattern[str] = re.compile(regex) + return compiled def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]: diff --git a/blib2to3/pgen2/conv.py b/blib2to3/pgen2/conv.py index ed0cac5..dd70752 100644 --- a/blib2to3/pgen2/conv.py +++ b/blib2to3/pgen2/conv.py @@ -27,7 +27,7 @@ without having to invoke the Python pgen C program. """ # Python imports -import re +import regex as re # Local imports from pgen2 import grammar, token diff --git a/blib2to3/pgen2/literals.py b/blib2to3/pgen2/literals.py index b9b63e6..baa17e1 100644 --- a/blib2to3/pgen2/literals.py +++ b/blib2to3/pgen2/literals.py @@ -3,7 +3,7 @@ """Safely evaluate Python string literals without using eval().""" -import re +import regex as re simple_escapes = {"a": "\a", "b": "\b", diff --git a/blib2to3/pgen2/tokenize.py b/blib2to3/pgen2/tokenize.py index a5c6462..9775489 100644 --- a/blib2to3/pgen2/tokenize.py +++ b/blib2to3/pgen2/tokenize.py @@ -29,7 +29,7 @@ __author__ = 'Ka-Ping Yee ' __credits__ = \ 'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro' -import re +import regex as re from codecs import BOM_UTF8, lookup from blib2to3.pgen2.token import * diff --git a/docs/conf.py b/docs/conf.py index 37a6f84..ce7536d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -13,7 +13,7 @@ # documentation root, use os.path.abspath to make it absolute, like shown here. # from pathlib import Path -import re +import regex as re import shutil import string diff --git a/pyproject.toml b/pyproject.toml index 071a80e..17476bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,6 +75,7 @@ click = "^6.5" toml = "^0.9.4" appdirs = "^1.4" aiohttp = { version = "^3.4", optional = true } +regex = "^2019.8" [tool.poetry.extras] d = ["aiohttp"] diff --git a/setup.py b/setup.py index d4e9405..1ba7a00 100644 --- a/setup.py +++ b/setup.py @@ -39,6 +39,7 @@ setup( "appdirs", "toml>=0.9.4", "typed-ast>=1.3.1", + "regex", ], extras_require={"d": ["aiohttp>=3.3.2", "aiohttp-cors"]}, test_suite="tests.test_black", diff --git a/tests/data/tricky_unicode_symbols.py b/tests/data/tricky_unicode_symbols.py new file mode 100644 index 0000000..366a92f --- /dev/null +++ b/tests/data/tricky_unicode_symbols.py @@ -0,0 +1,6 @@ +ä = 1 +µ = 2 +蟒 = 3 +x󠄀 = 4 +មុ = 1 +Q̇_per_meter = 4 diff --git a/tests/test_black.py b/tests/test_black.py index 66a0761..92d77ac 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -7,7 +7,7 @@ from functools import partial from io import BytesIO, TextIOWrapper import os from pathlib import Path -import re +import regex as re import sys from tempfile import TemporaryDirectory from typing import Any, BinaryIO, Generator, List, Tuple, Iterator, TypeVar @@ -1245,6 +1245,13 @@ class BlackTestCase(unittest.TestCase): two = black.read_cache(short_mode) self.assertNotIn(path, two) + def test_tricky_unicode_symbols(self) -> None: + source, expected = read_data("tricky_unicode_symbols") + actual = fs(source) + self.assertFormatEqual(expected, actual) + black.assert_equivalent(source, actual) + black.assert_stable(source, actual, black.FileMode()) + def test_single_file_force_pyi(self) -> None: reg_mode = black.FileMode() pyi_mode = black.FileMode(is_pyi=True) -- 2.39.5