From e74117f172e29e8a980e2c9de929ad50d3769150 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=C5=81ukasz=20Langa?= Date: Wed, 14 Mar 2018 12:55:32 -0700 Subject: [PATCH 1/1] Initial commit --- .flake8 | 8 + .gitignore | 1 + .travis.yml | 15 + LICENSE | 21 + MANIFEST.in | 2 + Pipfile | 17 + Pipfile.lock | 243 +++ README.md | 260 ++++ black.py | 1478 +++++++++++++++++++ blib2to3/Grammar.txt | 173 +++ blib2to3/Grammar3.6.4.final.0.pickle | Bin 0 -> 33148 bytes blib2to3/PatternGrammar.txt | 28 + blib2to3/PatternGrammar3.6.4.final.0.pickle | Bin 0 -> 2093 bytes blib2to3/README | 7 + blib2to3/__init__.py | 1 + blib2to3/__init__.pyi | 1 + blib2to3/pgen2/__init__.py | 4 + blib2to3/pgen2/__init__.pyi | 10 + blib2to3/pgen2/conv.py | 257 ++++ blib2to3/pgen2/driver.py | 178 +++ blib2to3/pgen2/driver.pyi | 24 + blib2to3/pgen2/grammar.py | 211 +++ blib2to3/pgen2/grammar.pyi | 29 + blib2to3/pgen2/literals.py | 60 + blib2to3/pgen2/literals.pyi | 9 + blib2to3/pgen2/parse.py | 201 +++ blib2to3/pgen2/parse.pyi | 29 + blib2to3/pgen2/pgen.py | 386 +++++ blib2to3/pgen2/pgen.pyi | 49 + blib2to3/pgen2/token.py | 83 ++ blib2to3/pgen2/token.pyi | 73 + blib2to3/pgen2/tokenize.py | 518 +++++++ blib2to3/pgen2/tokenize.pyi | 30 + blib2to3/pygram.py | 40 + blib2to3/pygram.pyi | 119 ++ blib2to3/pytree.py | 854 +++++++++++ blib2to3/pytree.pyi | 86 ++ mypy.ini | 31 + setup.py | 67 + tests/.flake8 | 8 + tests/cantfit.py | 27 + tests/comments.py | 60 + tests/comments2.py | 202 +++ tests/composition.py | 21 + tests/expression.py | 240 +++ tests/function.py | 137 ++ tests/import_spacing.py | 77 + tests/test_black.py | 220 +++ 48 files changed, 6595 insertions(+) create mode 100644 .flake8 create mode 100644 .gitignore create mode 100644 .travis.yml create mode 100644 LICENSE create mode 100644 MANIFEST.in create mode 100644 Pipfile create mode 100644 Pipfile.lock create mode 100644 README.md create mode 100644 black.py create mode 100644 blib2to3/Grammar.txt create mode 100644 blib2to3/Grammar3.6.4.final.0.pickle create mode 100644 blib2to3/PatternGrammar.txt create mode 100644 blib2to3/PatternGrammar3.6.4.final.0.pickle create mode 100644 blib2to3/README create mode 100644 blib2to3/__init__.py create mode 100644 blib2to3/__init__.pyi create mode 100644 blib2to3/pgen2/__init__.py create mode 100644 blib2to3/pgen2/__init__.pyi create mode 100644 blib2to3/pgen2/conv.py create mode 100644 blib2to3/pgen2/driver.py create mode 100644 blib2to3/pgen2/driver.pyi create mode 100644 blib2to3/pgen2/grammar.py create mode 100644 blib2to3/pgen2/grammar.pyi create mode 100644 blib2to3/pgen2/literals.py create mode 100644 blib2to3/pgen2/literals.pyi create mode 100644 blib2to3/pgen2/parse.py create mode 100644 blib2to3/pgen2/parse.pyi create mode 100644 blib2to3/pgen2/pgen.py create mode 100644 blib2to3/pgen2/pgen.pyi create mode 100755 blib2to3/pgen2/token.py create mode 100644 blib2to3/pgen2/token.pyi create mode 100644 blib2to3/pgen2/tokenize.py create mode 100644 blib2to3/pgen2/tokenize.pyi create mode 100644 blib2to3/pygram.py create mode 100644 blib2to3/pygram.pyi create mode 100644 blib2to3/pytree.py create mode 100644 blib2to3/pytree.pyi create mode 100644 mypy.ini create mode 100644 setup.py create mode 100644 tests/.flake8 create mode 100644 tests/cantfit.py create mode 100644 tests/comments.py create mode 100644 tests/comments2.py create mode 100644 tests/composition.py create mode 100644 tests/expression.py create mode 100644 tests/function.py create mode 100644 tests/import_spacing.py create mode 100644 tests/test_black.py diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..cf36923 --- /dev/null +++ b/.flake8 @@ -0,0 +1,8 @@ +# This is an example .flake8 config, used when developing *Black* itself. +# Keep in sync with setup.cfg which is used for source packages. + +[flake8] +ignore = E266, E501 +max-line-length = 80 +max-complexity = 12 +select = B,C,E,F,W,T4,B9 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6350e98 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.coverage diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..e434c44 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,15 @@ +sudo: false +language: python +before_script: +- pip install -e . +# test script +script: python setup.py test +notifications: + on_success: change + on_failure: always +matrix: + include: + - python: 3.6 + - python: 3.6-dev + - python: 3.7-dev + - python: 3.8-dev diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..7a9b891 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2018 Łukasz Langa + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..9ae6851 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +include *.rst *.md LICENSE +recursive-include tests *.txt *.py diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..3c20aff --- /dev/null +++ b/Pipfile @@ -0,0 +1,17 @@ +[[source]] +url = "https://pypi.python.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +attrs = "*" +click = "*" + +[dev-packages] +coverage = "*" +flake8 = "*" +flake8-bugbear = "*" +flake8-mypy = "*" +mypy = "*" +pypandoc = "*" +twine = "*" diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..7c173f4 --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,243 @@ +{ + "_meta": { + "hash": { + "sha256": "e2dc877c2f32df83197fc3dc0f49e0a66d0d099aab106b99d64fdbe5b14cc91b" + }, + "host-environment-markers": { + "implementation_name": "cpython", + "implementation_version": "3.6.4", + "os_name": "posix", + "platform_machine": "x86_64", + "platform_python_implementation": "CPython", + "platform_release": "17.4.0", + "platform_system": "Darwin", + "platform_version": "Darwin Kernel Version 17.4.0: Sun Dec 17 09:19:54 PST 2017; root:xnu-4570.41.2~1/RELEASE_X86_64", + "python_full_version": "3.6.4", + "python_version": "3.6", + "sys_platform": "darwin" + }, + "pipfile-spec": 6, + "requires": {}, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.python.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "attrs": { + "hashes": [ + "sha256:a17a9573a6f475c99b551c0e0a812707ddda1ec9653bed04c13841404ed6f450", + "sha256:1c7960ccfd6a005cd9f7ba884e6316b5e430a3f1a6c37c5f87d8b43f83b54ec9" + ], + "version": "==17.4.0" + }, + "click": { + "hashes": [ + "sha256:29f99fc6125fbc931b758dc053b3114e55c77a6e4c6c3a2674a2dc986016381d", + "sha256:f15516df478d5a56180fbf80e68f206010e6d160fc39fa508b65e035fd75130b" + ], + "version": "==6.7" + } + }, + "develop": { + "attrs": { + "hashes": [ + "sha256:a17a9573a6f475c99b551c0e0a812707ddda1ec9653bed04c13841404ed6f450", + "sha256:1c7960ccfd6a005cd9f7ba884e6316b5e430a3f1a6c37c5f87d8b43f83b54ec9" + ], + "version": "==17.4.0" + }, + "certifi": { + "hashes": [ + "sha256:14131608ad2fd56836d33a71ee60fa1c82bc9d2c8d98b7bdbc631fe1b3cd1296", + "sha256:edbc3f203427eef571f79a7692bb160a2b0f7ccaa31953e99bd17e307cf63f7d" + ], + "version": "==2018.1.18" + }, + "chardet": { + "hashes": [ + "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691", + "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae" + ], + "version": "==3.0.4" + }, + "coverage": { + "hashes": [ + "sha256:7608a3dd5d73cb06c531b8925e0ef8d3de31fed2544a7de6c63960a1e73ea4bc", + "sha256:3a2184c6d797a125dca8367878d3b9a178b6fdd05fdc2d35d758c3006a1cd694", + "sha256:f3f501f345f24383c0000395b26b726e46758b71393267aeae0bd36f8b3ade80", + "sha256:0b136648de27201056c1869a6c0d4e23f464750fd9a9ba9750b8336a244429ed", + "sha256:337ded681dd2ef9ca04ef5d93cfc87e52e09db2594c296b4a0a3662cb1b41249", + "sha256:3eb42bf89a6be7deb64116dd1cc4b08171734d721e7a7e57ad64cc4ef29ed2f1", + "sha256:be6cfcd8053d13f5f5eeb284aa8a814220c3da1b0078fa859011c7fffd86dab9", + "sha256:69bf008a06b76619d3c3f3b1983f5145c75a305a0fea513aca094cae5c40a8f5", + "sha256:2eb564bbf7816a9d68dd3369a510be3327f1c618d2357fa6b1216994c2e3d508", + "sha256:9d6dd10d49e01571bf6e147d3b505141ffc093a06756c60b053a859cb2128b1f", + "sha256:701cd6093d63e6b8ad7009d8a92425428bc4d6e7ab8d75efbb665c806c1d79ba", + "sha256:5a13ea7911ff5e1796b6d5e4fbbf6952381a611209b736d48e675c2756f3f74e", + "sha256:c1bb572fab8208c400adaf06a8133ac0712179a334c09224fb11393e920abcdd", + "sha256:03481e81d558d30d230bc12999e3edffe392d244349a90f4ef9b88425fac74ba", + "sha256:28b2191e7283f4f3568962e373b47ef7f0392993bb6660d079c62bd50fe9d162", + "sha256:de4418dadaa1c01d497e539210cb6baa015965526ff5afc078c57ca69160108d", + "sha256:8c3cb8c35ec4d9506979b4cf90ee9918bc2e49f84189d9bf5c36c0c1119c6558", + "sha256:7e1fe19bd6dce69d9fd159d8e4a80a8f52101380d5d3a4d374b6d3eae0e5de9c", + "sha256:6bc583dc18d5979dc0f6cec26a8603129de0304d5ae1f17e57a12834e7235062", + "sha256:198626739a79b09fa0a2f06e083ffd12eb55449b5f8bfdbeed1df4910b2ca640", + "sha256:7aa36d2b844a3e4a4b356708d79fd2c260281a7390d678a10b91ca595ddc9e99", + "sha256:3d72c20bd105022d29b14a7d628462ebdc61de2f303322c0212a054352f3b287", + "sha256:4635a184d0bbe537aa185a34193898eee409332a8ccb27eea36f262566585000", + "sha256:e05cb4d9aad6233d67e0541caa7e511fa4047ed7750ec2510d466e806e0255d6", + "sha256:76ecd006d1d8f739430ec50cc872889af1f9c1b6b8f48e29941814b09b0fd3cc", + "sha256:7d3f553904b0c5c016d1dad058a7554c7ac4c91a789fca496e7d8347ad040653", + "sha256:3c79a6f7b95751cdebcd9037e4d06f8d5a9b60e4ed0cd231342aa8ad7124882a", + "sha256:56e448f051a201c5ebbaa86a5efd0ca90d327204d8b059ab25ad0f35fbfd79f1", + "sha256:ac4fef68da01116a5c117eba4dd46f2e06847a497de5ed1d64bb99a5fda1ef91", + "sha256:1c383d2ef13ade2acc636556fd544dba6e14fa30755f26812f54300e401f98f2", + "sha256:b8815995e050764c8610dbc82641807d196927c3dbed207f0a079833ffcf588d", + "sha256:104ab3934abaf5be871a583541e8829d6c19ce7bde2923b2751e0d3ca44db60a", + "sha256:9e112fcbe0148a6fa4f0a02e8d58e94470fc6cb82a5481618fea901699bf34c4", + "sha256:15b111b6a0f46ee1a485414a52a7ad1d703bdf984e9ed3c288a4414d3871dcbd", + "sha256:e4d96c07229f58cb686120f168276e434660e4358cc9cf3b0464210b04913e77", + "sha256:f8a923a85cb099422ad5a2e345fe877bbc89a8a8b23235824a93488150e45f6e" + ], + "version": "==4.5.1" + }, + "flake8": { + "hashes": [ + "sha256:c7841163e2b576d435799169b78703ad6ac1bbb0f199994fc05f700b2a90ea37", + "sha256:7253265f7abd8b313e3892944044a365e3f4ac3fcdcfb4298f55ee9ddf188ba0" + ], + "version": "==3.5.0" + }, + "flake8-bugbear": { + "hashes": [ + "sha256:541746f0f3b2f1a8d7278e1d2d218df298996b60b02677708560db7c7e620e3b", + "sha256:5f14a99d458e29cb92be9079c970030e0dd398b2decb179d76d39a5266ea1578" + ], + "version": "==18.2.0" + }, + "flake8-mypy": { + "hashes": [ + "sha256:cff009f4250e8391bf48990093cff85802778c345c8449d6498b62efefeebcbc", + "sha256:47120db63aff631ee1f84bac6fe8e64731dc66da3efc1c51f85e15ade4a3ba18" + ], + "version": "==17.8.0" + }, + "idna": { + "hashes": [ + "sha256:8c7309c718f94b3a625cb648ace320157ad16ff131ae0af362c9f21b80ef6ec4", + "sha256:2c6a5de3089009e3da7c5dde64a141dbc8551d5b7f6cf4ed7c2568d0cc520a8f" + ], + "version": "==2.6" + }, + "mccabe": { + "hashes": [ + "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42", + "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f" + ], + "version": "==0.6.1" + }, + "mypy": { + "hashes": [ + "sha256:884f18f3a40cfcf24cdd5860b84958cfb35e6563e439c5adc1503878df221dc3", + "sha256:83d798f66323f2de6191d66d9ae5ab234e4ee5b400010e19c58d75d308049f25" + ], + "version": "==0.570" + }, + "pkginfo": { + "hashes": [ + "sha256:31a49103180ae1518b65d3f4ce09c784e2bc54e338197668b4fb7dc539521024", + "sha256:bb1a6aeabfc898f5df124e7e00303a5b3ec9a489535f346bfbddb081af93f89e" + ], + "version": "==1.4.1" + }, + "pycodestyle": { + "hashes": [ + "sha256:6c4245ade1edfad79c3446fadfc96b0de2759662dc29d07d80a6f27ad1ca6ba9", + "sha256:682256a5b318149ca0d2a9185d365d8864a768a28db66a84a2ea946bcc426766" + ], + "version": "==2.3.1" + }, + "pyflakes": { + "hashes": [ + "sha256:08bd6a50edf8cffa9fa09a463063c425ecaaf10d1eb0335a7e8b1401aef89e6f", + "sha256:8d616a382f243dbf19b54743f280b80198be0bca3a5396f1d2e1fca6223e8805" + ], + "version": "==1.6.0" + }, + "pypandoc": { + "hashes": [ + "sha256:e914e6d5f84a76764887e4d909b09d63308725f0cbb5293872c2c92f07c11a5b" + ], + "version": "==1.4" + }, + "requests": { + "hashes": [ + "sha256:6a1b267aa90cac58ac3a765d067950e7dbbf75b1da07e895d1f594193a40a38b", + "sha256:9c443e7324ba5b85070c4a818ade28bfabedf16ea10206da1132edaa6dda237e" + ], + "version": "==2.18.4" + }, + "requests-toolbelt": { + "hashes": [ + "sha256:42c9c170abc2cacb78b8ab23ac957945c7716249206f90874651971a4acff237", + "sha256:f6a531936c6fa4c6cfce1b9c10d5c4f498d16528d2a54a22ca00011205a187b5" + ], + "version": "==0.8.0" + }, + "tqdm": { + "hashes": [ + "sha256:f66468c14ccd011a627734c9b3fd72f20ce16f8faecc47384eb2507af5924fb9", + "sha256:5ec0d4442358e55cdb4a0471d04c6c831518fd8837f259db5537d90feab380df" + ], + "version": "==4.19.6" + }, + "twine": { + "hashes": [ + "sha256:d3ce5c480c22ccfb761cd358526e862b32546d2fe4bc93d46b5cf04ea3cc46ca", + "sha256:caa45b7987fc96321258cd7668e3be2ff34064f5c66d2d975b641adca659c1ab" + ], + "version": "==1.9.1" + }, + "typed-ast": { + "hashes": [ + "sha256:0948004fa228ae071054f5208840a1e88747a357ec1101c17217bfe99b299d58", + "sha256:25d8feefe27eb0303b73545416b13d108c6067b846b543738a25ff304824ed9a", + "sha256:c05b41bc1deade9f90ddc5d988fe506208019ebba9f2578c622516fd201f5863", + "sha256:519425deca5c2b2bdac49f77b2c5625781abbaf9a809d727d3a5596b30bb4ded", + "sha256:6de012d2b166fe7a4cdf505eee3aaa12192f7ba365beeefaca4ec10e31241a85", + "sha256:79b91ebe5a28d349b6d0d323023350133e927b4de5b651a8aa2db69c761420c6", + "sha256:a8034021801bc0440f2e027c354b4eafd95891b573e12ff0418dec385c76785c", + "sha256:f19f2a4f547505fe9072e15f6f4ae714af51b5a681a97f187971f50c283193b6", + "sha256:c9b060bd1e5a26ab6e8267fd46fc9e02b54eb15fffb16d112d4c7b1c12987559", + "sha256:2e214b72168ea0275efd6c884b114ab42e316de3ffa125b267e732ed2abda892", + "sha256:bc978ac17468fe868ee589c795d06777f75496b1ed576d308002c8a5756fb9ea", + "sha256:edb04bdd45bfd76c8292c4d9654568efaedf76fe78eb246dde69bdb13b2dad87", + "sha256:668d0cec391d9aed1c6a388b0d5b97cd22e6073eaa5fbaa6d2946603b4871efe", + "sha256:29464a177d56e4e055b5f7b629935af7f49c196be47528cc94e0a7bf83fbc2b9", + "sha256:8550177fa5d4c1f09b5e5f524411c44633c80ec69b24e0e98906dd761941ca46", + "sha256:3e0d5e48e3a23e9a4d1a9f698e32a542a4a288c871d33ed8df1b092a40f3a0f9", + "sha256:68ba70684990f59497680ff90d18e756a47bf4863c604098f10de9716b2c0bdd", + "sha256:57fe287f0cdd9ceaf69e7b71a2e94a24b5d268b35df251a88fef5cc241bf73aa" + ], + "version": "==1.1.0" + }, + "urllib3": { + "hashes": [ + "sha256:06330f386d6e4b195fbfc736b297f58c5a892e4440e54d294d7004e3a9bbea1b", + "sha256:cc44da8e1145637334317feebd728bd869a35285b93cbb4cca2577da7e62db4f" + ], + "version": "==1.22" + }, + "wheel": { + "hashes": [ + "sha256:e721e53864f084f956f40f96124a74da0631ac13fbbd1ba99e8e2b5e9cafdf64", + "sha256:9515fe0a94e823fd90b08d22de45d7bde57c90edce705b22f5e1ecf7e1b653c8" + ], + "version": "==0.30.0" + } + } +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..d1724ca --- /dev/null +++ b/README.md @@ -0,0 +1,260 @@ +# black + +[![Build Status](https://travis-ci.org/ambv/black.svg?branch=master)](https://travis-ci.org/ambv/black) + +> Any color you like. + + +*Black* is the uncompromising Python code formatter. By using it, you +agree to cease control over minutiae of hand-formatting. In return, +*Black* gives you speed, determinism, and freedom from `pycodestyle` +nagging about formatting. You will save time and mental energy for +more important matters. + +Blackened code looks the same regardless of the project you're reading. +Formatting becomes transparent after a while and you can focus on the +content instead. + +*Black* makes code review faster by producing the smallest diffs +possible. + + +## NOTE: This is an early pre-release + +*Black* can already successfully format itself and the standard library. +It also sports a decent test suite. However, it is still very new. +Things will probably be wonky for a while. This is made explicit by the +"Alpha" trove classifier, as well as by the "a" in the version number. +What this means for you is that **until the formatter becomes stable, +you should expect some formatting to change in the future**. + +Also, as a temporary safety measure, *Black* will check that the +reformatted code still produces a valid AST that is equivalent to the +original. This slows it down. If you're feeling confident, use +``--fast``. + + +## Usage + +*Black* can be installed by running `pip install black`. + +``` +black [OPTIONS] [SRC]... + +Options: + -l, --line-length INTEGER Where to wrap around. [default: 88] + --fast / --safe If --fast given, skip temporary sanity checks. + [default: --safe] + --version Show the version and exit. + --help Show this message and exit. +``` + + +## The philosophy behind *Black* + +*Black* reformats entire files in place. It is not configurable. It +doesn't take previous formatting into account. It doesn't reformat +blocks that start with `# fmt: off` and end with `# fmt: on`. It also +recognizes [YAPF](https://github.com/google/yapf)'s block comments to +the same effect, as a courtesy for straddling code. + + +### How *Black* formats files + +*Black* ignores previous formatting and applies uniform horizontal +and vertical whitespace to your code. The rules for horizontal +whitespace are pretty obvious and can be summarized as: do whatever +makes `pycodestyle` happy. + +As for vertical whitespace, *Black* tries to render one full expression +or simple statement per line. If this fits the allotted line length, +great. +```!py3 +# in: +l = [1, + 2, + 3, +] + +# out: +l = [1, 2, 3] +``` + +If not, *Black* will look at the contents of the first outer matching +brackets and put that in a separate indented line. +```!py3 +# in: +l = [[n for n in list_bosses()], [n for n in list_employees()]] + +# out: +l = [ + [n for n in list_bosses()], [n for n in list_employees()] +] +``` + +If that still doesn't fit the bill, it will decompose the internal +expression further using the same rule, indenting matching brackets +every time. If the contents of the matching brackets pair are +comma-separated (like an argument list, or a dict literal, and so on) +then *Black* will first try to keep them on the same line with the +matching brackets. If that doesn't work, it will put all of them in +separate lines. +```!py3 +# in: +def very_important_function(template: str, *variables, *, file: os.PathLike, debug: bool = False): + """Applies `variables` to the `template` and writes to `file`.""" + with open(file, 'w') as f: + ... + +# out: +def very_important_function( + template: str, + *variables, + *, + file: os.PathLike, + debug: bool = False, +): + """Applies `variables` to the `template` and writes to `file`.""" + with open(file, 'w') as f: + ... +``` + +You might have noticed that closing brackets are always dedented and +that a trailing comma is always added. Such formatting produces smaller +diffs; when you add or remove an element, it's always just one line. +Also, having the closing bracket dedented provides a clear delimiter +between two distinct sections of the code that otherwise share the same +indentation level (like the arguments list and the docstring in the +example above). + +Unnecessary trailing commas are removed if an expression fits in one +line. This makes it 1% more likely that your line won't exceed the +allotted line length limit. + +*Black* avoids spurious vertical whitespace. This is in the spirit of +PEP 8 which says that in-function vertical whitespace should only be +used sparingly. One exception is control flow statements: *Black* will +always emit an extra empty line after ``return``, ``raise``, ``break``, +``continue``, and ``yield``. This is to make changes in control flow +more prominent to readers of your code. + +That's it. The rest of the whitespace formatting rules follow PEP 8 and +are designed to keep `pycodestyle` quiet. + + +### Line length + +You probably noticed the peculiar default line length. *Black* defaults +to 88 characters per line, which happens to be 10% over 80. This number +was found to produce significantly shorter files than sticking with 80 +(the most popular), or even 79 (used by the standard library). In +general, [90-ish seems like the wise choice](https://youtu.be/wf-BqAjZb8M?t=260). + +If you're paid by the line of code you write, you can pass +`--line-length` with a lower number. *Black* will try to respect that. +However, sometimes it won't be able to without breaking other rules. In +those rare cases, auto-formatted code will exceed your allotted limit. + +You can also increase it, but remember that people with sight disabilities +find it harder to work with line lengths exceeding 100 characters. +It also adversely affects side-by-side diff review on typical screen +resolutions. Long lines also make it harder to present code neatly +in documentation or talk slides. + +If you're using Flake8, you can bump `max-line-length` to 88 and forget +about it. Alternatively, use [Bugbear](https://github.com/PyCQA/flake8-bugbear)'s +B950 warning instead of E501 and keep the max line length at 80 which +you are probably already using. You'd do it like this: +```!ini +[flake8] +max-line-length = 80 +... +select = C,E,F,W,B,B950 +ignore = E501 +``` + +You'll find *Black*'s own .flake8 config file is configured like this. +If you're curious about the reasoning behind B950, Bugbear's documentation +explains it. The tl;dr is "it's like highway speed limits, we won't +bother you if you overdo it by a few km/h". + + +### Editor integration + +There is currently no integration with any text editors. Vim and +Atom/Nuclide integration is planned by the author, others will require +external contributions. + +Patches welcome! ✨ 🍰 ✨ + + +## Testimonials + +**Dusty Phillips**, [writer](https://smile.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=dusty+phillips): + +> Black is opinionated so you don't have to be. + +**Hynek Schlawack**, [creator of `attrs`](http://www.attrs.org/), core +developer of Twisted and CPython: + +> An auto-formatter that doesn't suck is all I want for Xmas! + +**Carl Meyer**, [Django](https://www.djangoproject.com/) core developer: + +> At least the name is good. + + +## Tests + +Just run: + +``` +python setup.py test +``` + +## This tool requires Python 3.6.0+ to run + +But you can reformat Python 2 code with it, too. *Black* is able to parse +all of the new syntax supported on Python 3.6 but also *effectively all* +the Python 2 syntax at the same time, as long as you're not using print +statements. + +By making the code exclusively Python 3.6+, I'm able to focus on the +quality of the formatting and re-use all the nice features of the new +releases (check out [pathlib](docs.python.org/3/library/pathlib.html) or +f-strings) instead of wasting cycles on Unicode compatibility, and so on. + + +## License + +MIT + + +## Contributing + +In terms of inspiration, *Black* is about as configurable as *gofmt* and +*rustfmt* are. This is deliberate. + +Bug reports and fixes are always welcome! However, before you suggest a +new feature or configuration knob, ask yourself why you want it. If it +enables better integration with some workflow, fixes an inconsistency, +speeds things up, and so on - go for it! On the other hand, if your +answer is "because I don't like a particular formatting" then you're not +ready to embrace *Black* yet. Such changes are unlikely to get accepted. +You can still try but prepare to be disappointed. + + +## Change Log + +### 18.3a0 + +* first published version, Happy 🍰 Day 2018! + +* alpha quality + +* date-versioned (see: http://calver.org/) + + +## Authors + +Glued together by [Łukasz Langa](mailto:lukasz@langa.pl). diff --git a/black.py b/black.py new file mode 100644 index 0000000..24c57ca --- /dev/null +++ b/black.py @@ -0,0 +1,1478 @@ +#!/usr/bin/env python3 +import asyncio +from asyncio.base_events import BaseEventLoop +from concurrent.futures import Executor, ProcessPoolExecutor +from functools import partial +import keyword +import os +from pathlib import Path +import tokenize +from typing import ( + Dict, Generic, Iterable, Iterator, List, Optional, Set, Tuple, TypeVar, Union +) + +from attr import attrib, dataclass, Factory +import click + +# lib2to3 fork +from blib2to3.pytree import Node, Leaf, type_repr +from blib2to3 import pygram, pytree +from blib2to3.pgen2 import driver, token +from blib2to3.pgen2.parse import ParseError + +__version__ = "18.3a0" +DEFAULT_LINE_LENGTH = 88 +# types +syms = pygram.python_symbols +FileContent = str +Encoding = str +Depth = int +NodeType = int +LeafID = int +Priority = int +LN = Union[Leaf, Node] +out = partial(click.secho, bold=True, err=True) +err = partial(click.secho, fg='red', err=True) + + +class NothingChanged(UserWarning): + """Raised by `format_file` when the reformatted code is the same as source.""" + + +class CannotSplit(Exception): + """A readable split that fits the allotted line length is impossible. + + Raised by `left_hand_split()` and `right_hand_split()`. + """ + + +@click.command() +@click.option( + '-l', + '--line-length', + type=int, + default=DEFAULT_LINE_LENGTH, + help='How many character per line to allow.', + show_default=True, +) +@click.option( + '--fast/--safe', + is_flag=True, + help='If --fast given, skip temporary sanity checks. [default: --safe]', +) +@click.version_option(version=__version__) +@click.argument( + 'src', + nargs=-1, + type=click.Path(exists=True, file_okay=True, dir_okay=True, readable=True), +) +@click.pass_context +def main(ctx: click.Context, line_length: int, fast: bool, src: List[str]) -> None: + """The uncompromising code formatter.""" + sources: List[Path] = [] + for s in src: + p = Path(s) + if p.is_dir(): + sources.extend(gen_python_files_in_dir(p)) + elif p.is_file(): + # if a file was explicitly given, we don't care about its extension + sources.append(p) + else: + err(f'invalid path: {s}') + if len(sources) == 0: + ctx.exit(0) + elif len(sources) == 1: + p = sources[0] + report = Report() + try: + changed = format_file_in_place(p, line_length=line_length, fast=fast) + report.done(p, changed) + except Exception as exc: + report.failed(p, str(exc)) + ctx.exit(report.return_code) + else: + loop = asyncio.get_event_loop() + executor = ProcessPoolExecutor(max_workers=os.cpu_count()) + return_code = 1 + try: + return_code = loop.run_until_complete( + schedule_formatting(sources, line_length, fast, loop, executor) + ) + finally: + loop.close() + ctx.exit(return_code) + + +async def schedule_formatting( + sources: List[Path], + line_length: int, + fast: bool, + loop: BaseEventLoop, + executor: Executor, +) -> int: + tasks = { + src: loop.run_in_executor( + executor, format_file_in_place, src, line_length, fast + ) + for src in sources + } + await asyncio.wait(tasks.values()) + cancelled = [] + report = Report() + for src, task in tasks.items(): + if not task.done(): + report.failed(src, 'timed out, cancelling') + task.cancel() + cancelled.append(task) + elif task.exception(): + report.failed(src, str(task.exception())) + else: + report.done(src, task.result()) + if cancelled: + await asyncio.wait(cancelled, timeout=2) + out('All done! ✨ 🍰 ✨') + click.echo(str(report)) + return report.return_code + + +def format_file_in_place(src: Path, line_length: int, fast: bool) -> bool: + """Format the file and rewrite if changed. Return True if changed.""" + try: + contents, encoding = format_file(src, line_length=line_length, fast=fast) + except NothingChanged: + return False + + with open(src, "w", encoding=encoding) as f: + f.write(contents) + return True + + +def format_file( + src: Path, line_length: int, fast: bool +) -> Tuple[FileContent, Encoding]: + """Reformats a file and returns its contents and encoding.""" + with tokenize.open(src) as src_buffer: + src_contents = src_buffer.read() + if src_contents.strip() == '': + raise NothingChanged(src) + + dst_contents = format_str(src_contents, line_length=line_length) + if src_contents == dst_contents: + raise NothingChanged(src) + + if not fast: + assert_equivalent(src_contents, dst_contents) + assert_stable(src_contents, dst_contents, line_length=line_length) + return dst_contents, src_buffer.encoding + + +def format_str(src_contents: str, line_length: int) -> FileContent: + """Reformats a string and returns new contents.""" + src_node = lib2to3_parse(src_contents) + dst_contents = "" + comments: List[Line] = [] + lines = LineGenerator() + elt = EmptyLineTracker() + empty_line = Line() + after = 0 + for current_line in lines.visit(src_node): + for _ in range(after): + dst_contents += str(empty_line) + before, after = elt.maybe_empty_lines(current_line) + for _ in range(before): + dst_contents += str(empty_line) + if not current_line.is_comment: + for comment in comments: + dst_contents += str(comment) + comments = [] + for line in split_line(current_line, line_length=line_length): + dst_contents += str(line) + else: + comments.append(current_line) + for comment in comments: + dst_contents += str(comment) + return dst_contents + + +def lib2to3_parse(src_txt: str) -> Node: + """Given a string with source, return the lib2to3 Node.""" + grammar = pygram.python_grammar_no_print_statement + drv = driver.Driver(grammar, pytree.convert) + if src_txt[-1] != '\n': + nl = '\r\n' if '\r\n' in src_txt[:1024] else '\n' + src_txt += nl + try: + result = drv.parse_string(src_txt, True) + except ParseError as pe: + lineno, column = pe.context[1] + lines = src_txt.splitlines() + try: + faulty_line = lines[lineno - 1] + except IndexError: + faulty_line = "" + raise ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}") from None + + if isinstance(result, Leaf): + result = Node(syms.file_input, [result]) + return result + + +def lib2to3_unparse(node: Node) -> str: + """Given a lib2to3 node, return its string representation.""" + code = str(node) + return code + + +T = TypeVar('T') + + +class Visitor(Generic[T]): + """Basic lib2to3 visitor that yields things on visiting.""" + + def visit(self, node: LN) -> Iterator[T]: + if node.type < 256: + name = token.tok_name[node.type] + else: + name = type_repr(node.type) + yield from getattr(self, f'visit_{name}', self.visit_default)(node) + + def visit_default(self, node: LN) -> Iterator[T]: + if isinstance(node, Node): + for child in node.children: + yield from self.visit(child) + + +@dataclass +class DebugVisitor(Visitor[T]): + tree_depth: int = attrib(default=0) + + def visit_default(self, node: LN) -> Iterator[T]: + indent = ' ' * (2 * self.tree_depth) + if isinstance(node, Node): + _type = type_repr(node.type) + out(f'{indent}{_type}', fg='yellow') + self.tree_depth += 1 + for child in node.children: + yield from self.visit(child) + + self.tree_depth -= 1 + out(f'{indent}/{_type}', fg='yellow', bold=False) + else: + _type = token.tok_name.get(node.type, str(node.type)) + out(f'{indent}{_type}', fg='blue', nl=False) + if node.prefix: + # We don't have to handle prefixes for `Node` objects since + # that delegates to the first child anyway. + out(f' {node.prefix!r}', fg='green', bold=False, nl=False) + out(f' {node.value!r}', fg='blue', bold=False) + + +KEYWORDS = set(keyword.kwlist) +WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE} +FLOW_CONTROL = {'return', 'raise', 'break', 'continue'} +STATEMENT = { + syms.if_stmt, + syms.while_stmt, + syms.for_stmt, + syms.try_stmt, + syms.except_clause, + syms.with_stmt, + syms.funcdef, + syms.classdef, +} +STANDALONE_COMMENT = 153 +LOGIC_OPERATORS = {'and', 'or'} +COMPARATORS = { + token.LESS, + token.GREATER, + token.EQEQUAL, + token.NOTEQUAL, + token.LESSEQUAL, + token.GREATEREQUAL, +} +MATH_OPERATORS = { + token.PLUS, + token.MINUS, + token.STAR, + token.SLASH, + token.VBAR, + token.AMPER, + token.PERCENT, + token.CIRCUMFLEX, + token.LEFTSHIFT, + token.RIGHTSHIFT, + token.DOUBLESTAR, + token.DOUBLESLASH, +} +COMPREHENSION_PRIORITY = 20 +COMMA_PRIORITY = 10 +LOGIC_PRIORITY = 5 +STRING_PRIORITY = 4 +COMPARATOR_PRIORITY = 3 +MATH_PRIORITY = 1 + + +@dataclass +class BracketTracker: + depth: int = attrib(default=0) + bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = attrib(default=Factory(dict)) + delimiters: Dict[LeafID, Priority] = attrib(default=Factory(dict)) + previous: Optional[Leaf] = attrib(default=None) + + def mark(self, leaf: Leaf) -> None: + if leaf.type == token.COMMENT: + return + + if leaf.type in CLOSING_BRACKETS: + self.depth -= 1 + opening_bracket = self.bracket_match.pop((self.depth, leaf.type)) + leaf.opening_bracket = opening_bracket # type: ignore + leaf.bracket_depth = self.depth # type: ignore + if self.depth == 0: + delim = is_delimiter(leaf) + if delim: + self.delimiters[id(leaf)] = delim + elif self.previous is not None: + if leaf.type == token.STRING and self.previous.type == token.STRING: + self.delimiters[id(self.previous)] = STRING_PRIORITY + elif ( + leaf.type == token.NAME and + leaf.value == 'for' and + leaf.parent and + leaf.parent.type in {syms.comp_for, syms.old_comp_for} + ): + self.delimiters[id(self.previous)] = COMPREHENSION_PRIORITY + elif ( + leaf.type == token.NAME and + leaf.value == 'if' and + leaf.parent and + leaf.parent.type in {syms.comp_if, syms.old_comp_if} + ): + self.delimiters[id(self.previous)] = COMPREHENSION_PRIORITY + if leaf.type in OPENING_BRACKETS: + self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf + self.depth += 1 + self.previous = leaf + + def any_open_brackets(self) -> bool: + """Returns True if there is an yet unmatched open bracket on the line.""" + return bool(self.bracket_match) + + def max_priority(self, exclude: Iterable[LeafID] = ()) -> int: + """Returns the highest priority of a delimiter found on the line. + + Values are consistent with what `is_delimiter()` returns. + """ + return max(v for k, v in self.delimiters.items() if k not in exclude) + + +@dataclass +class Line: + depth: int = attrib(default=0) + leaves: List[Leaf] = attrib(default=Factory(list)) + comments: Dict[LeafID, Leaf] = attrib(default=Factory(dict)) + bracket_tracker: BracketTracker = attrib(default=Factory(BracketTracker)) + inside_brackets: bool = attrib(default=False) + + def append(self, leaf: Leaf, preformatted: bool = False) -> None: + has_value = leaf.value.strip() + if not has_value: + return + + if self.leaves and not preformatted: + # Note: at this point leaf.prefix should be empty except for + # imports, for which we only preserve newlines. + leaf.prefix += whitespace(leaf) + if self.inside_brackets or not preformatted: + self.bracket_tracker.mark(leaf) + self.maybe_remove_trailing_comma(leaf) + if self.maybe_adapt_standalone_comment(leaf): + return + + if not self.append_comment(leaf): + self.leaves.append(leaf) + + @property + def is_comment(self) -> bool: + return bool(self) and self.leaves[0].type == STANDALONE_COMMENT + + @property + def is_decorator(self) -> bool: + return bool(self) and self.leaves[0].type == token.AT + + @property + def is_import(self) -> bool: + return bool(self) and is_import(self.leaves[0]) + + @property + def is_class(self) -> bool: + return ( + bool(self) and + self.leaves[0].type == token.NAME and + self.leaves[0].value == 'class' + ) + + @property + def is_def(self) -> bool: + """Also returns True for async defs.""" + try: + first_leaf = self.leaves[0] + except IndexError: + return False + + try: + second_leaf: Optional[Leaf] = self.leaves[1] + except IndexError: + second_leaf = None + return ( + (first_leaf.type == token.NAME and first_leaf.value == 'def') or + ( + first_leaf.type == token.NAME and + first_leaf.value == 'async' and + second_leaf is not None and + second_leaf.type == token.NAME and + second_leaf.value == 'def' + ) + ) + + @property + def is_flow_control(self) -> bool: + return ( + bool(self) and + self.leaves[0].type == token.NAME and + self.leaves[0].value in FLOW_CONTROL + ) + + @property + def is_yield(self) -> bool: + return ( + bool(self) and + self.leaves[0].type == token.NAME and + self.leaves[0].value == 'yield' + ) + + def maybe_remove_trailing_comma(self, closing: Leaf) -> bool: + if not ( + self.leaves and + self.leaves[-1].type == token.COMMA and + closing.type in CLOSING_BRACKETS + ): + return False + + if closing.type == token.RSQB or closing.type == token.RBRACE: + self.leaves.pop() + return True + + # For parens let's check if it's safe to remove the comma. If the + # trailing one is the only one, we might mistakenly change a tuple + # into a different type by removing the comma. + depth = closing.bracket_depth + 1 # type: ignore + commas = 0 + opening = closing.opening_bracket # type: ignore + for _opening_index, leaf in enumerate(self.leaves): + if leaf is opening: + break + + else: + return False + + for leaf in self.leaves[_opening_index + 1:]: + if leaf is closing: + break + + bracket_depth = leaf.bracket_depth # type: ignore + if bracket_depth == depth and leaf.type == token.COMMA: + commas += 1 + if commas > 1: + self.leaves.pop() + return True + + return False + + def maybe_adapt_standalone_comment(self, comment: Leaf) -> bool: + """Hack a standalone comment to act as a trailing comment for line splitting. + + If this line has brackets and a standalone `comment`, we need to adapt + it to be able to still reformat the line. + + This is not perfect, the line to which the standalone comment gets + appended will appear "too long" when splitting. + """ + if not ( + comment.type == STANDALONE_COMMENT and + self.bracket_tracker.any_open_brackets() + ): + return False + + comment.type = token.COMMENT + comment.prefix = '\n' + ' ' * (self.depth + 1) + return self.append_comment(comment) + + def append_comment(self, comment: Leaf) -> bool: + if comment.type != token.COMMENT: + return False + + try: + after = id(self.last_non_delimiter()) + except LookupError: + comment.type = STANDALONE_COMMENT + comment.prefix = '' + return False + + else: + if after in self.comments: + self.comments[after].value += str(comment) + else: + self.comments[after] = comment + return True + + def last_non_delimiter(self) -> Leaf: + for i in range(len(self.leaves)): + last = self.leaves[-i - 1] + if not is_delimiter(last): + return last + + raise LookupError("No non-delimiters found") + + def __str__(self) -> str: + if not self: + return '\n' + + indent = ' ' * self.depth + leaves = iter(self.leaves) + first = next(leaves) + res = f'{first.prefix}{indent}{first.value}' + for leaf in leaves: + res += str(leaf) + for comment in self.comments.values(): + res += str(comment) + return res + '\n' + + def __bool__(self) -> bool: + return bool(self.leaves or self.comments) + + +@dataclass +class EmptyLineTracker: + """Provides a stateful method that returns the number of potential extra + empty lines needed before and after the currently processed line. + + Note: this tracker works on lines that haven't been split yet. + """ + previous_line: Optional[Line] = attrib(default=None) + previous_after: int = attrib(default=0) + previous_defs: List[int] = attrib(default=Factory(list)) + + def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]: + """Returns the number of extra empty lines before and after the `current_line`. + + This is for separating `def`, `async def` and `class` with extra empty lines + (two on module-level), as well as providing an extra empty line after flow + control keywords to make them more prominent. + """ + before, after = self._maybe_empty_lines(current_line) + self.previous_after = after + self.previous_line = current_line + return before, after + + def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]: + before = 0 + depth = current_line.depth + while self.previous_defs and self.previous_defs[-1] >= depth: + self.previous_defs.pop() + before = (1 if depth else 2) - self.previous_after + is_decorator = current_line.is_decorator + if is_decorator or current_line.is_def or current_line.is_class: + if not is_decorator: + self.previous_defs.append(depth) + if self.previous_line is None: + # Don't insert empty lines before the first line in the file. + return 0, 0 + + if self.previous_line and self.previous_line.is_decorator: + # Don't insert empty lines between decorators. + return 0, 0 + + newlines = 2 + if current_line.depth: + newlines -= 1 + newlines -= self.previous_after + return newlines, 0 + + if current_line.is_flow_control: + return before, 1 + + if ( + self.previous_line and + self.previous_line.is_import and + not current_line.is_import and + depth == self.previous_line.depth + ): + return (before or 1), 0 + + if ( + self.previous_line and + self.previous_line.is_yield and + (not current_line.is_yield or depth != self.previous_line.depth) + ): + return (before or 1), 0 + + return before, 0 + + +@dataclass +class LineGenerator(Visitor[Line]): + """Generates reformatted Line objects. Empty lines are not emitted. + + Note: destroys the tree it's visiting by mutating prefixes of its leaves + in ways that will no longer stringify to valid Python code on the tree. + """ + current_line: Line = attrib(default=Factory(Line)) + standalone_comments: List[Leaf] = attrib(default=Factory(list)) + + def line(self, indent: int = 0) -> Iterator[Line]: + """Generate a line. + + If the line is empty, only emit if it makes sense. + If the line is too long, split it first and then generate. + + If any lines were generated, set up a new current_line. + """ + if not self.current_line: + self.current_line.depth += indent + return # Line is empty, don't emit. Creating a new one unnecessary. + + complete_line = self.current_line + self.current_line = Line(depth=complete_line.depth + indent) + yield complete_line + + def visit_default(self, node: LN) -> Iterator[Line]: + if isinstance(node, Leaf): + for comment in generate_comments(node): + if self.current_line.bracket_tracker.any_open_brackets(): + # any comment within brackets is subject to splitting + self.current_line.append(comment) + elif comment.type == token.COMMENT: + # regular trailing comment + self.current_line.append(comment) + yield from self.line() + + else: + # regular standalone comment, to be processed later (see + # docstring in `generate_comments()` + self.standalone_comments.append(comment) + normalize_prefix(node) + if node.type not in WHITESPACE: + for comment in self.standalone_comments: + yield from self.line() + + self.current_line.append(comment) + yield from self.line() + + self.standalone_comments = [] + self.current_line.append(node) + yield from super().visit_default(node) + + def visit_suite(self, node: Node) -> Iterator[Line]: + """Body of a statement after a colon.""" + children = iter(node.children) + # Process newline before indenting. It might contain an inline + # comment that should go right after the colon. + newline = next(children) + yield from self.visit(newline) + yield from self.line(+1) + + for child in children: + yield from self.visit(child) + + yield from self.line(-1) + + def visit_stmt(self, node: Node, keywords: Set[str]) -> Iterator[Line]: + """Visit a statement. + + The relevant Python language keywords for this statement are NAME leaves + within it. + """ + for child in node.children: + if child.type == token.NAME and child.value in keywords: # type: ignore + yield from self.line() + + yield from self.visit(child) + + def visit_simple_stmt(self, node: Node) -> Iterator[Line]: + """A statement without nested statements.""" + is_suite_like = node.parent and node.parent.type in STATEMENT + if is_suite_like: + yield from self.line(+1) + yield from self.visit_default(node) + yield from self.line(-1) + + else: + yield from self.line() + yield from self.visit_default(node) + + def visit_async_stmt(self, node: Node) -> Iterator[Line]: + yield from self.line() + + children = iter(node.children) + for child in children: + yield from self.visit(child) + + if child.type == token.NAME and child.value == 'async': # type: ignore + break + + internal_stmt = next(children) + for child in internal_stmt.children: + yield from self.visit(child) + + def visit_decorators(self, node: Node) -> Iterator[Line]: + for child in node.children: + yield from self.line() + yield from self.visit(child) + + def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]: + yield from self.line() + + def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]: + yield from self.visit_default(leaf) + yield from self.line() + + def __attrs_post_init__(self) -> None: + """You are in a twisty little maze of passages.""" + v = self.visit_stmt + self.visit_if_stmt = partial(v, keywords={'if', 'else', 'elif'}) + self.visit_while_stmt = partial(v, keywords={'while', 'else'}) + self.visit_for_stmt = partial(v, keywords={'for', 'else'}) + self.visit_try_stmt = partial(v, keywords={'try', 'except', 'else', 'finally'}) + self.visit_except_clause = partial(v, keywords={'except'}) + self.visit_funcdef = partial(v, keywords={'def'}) + self.visit_with_stmt = partial(v, keywords={'with'}) + self.visit_classdef = partial(v, keywords={'class'}) + self.visit_async_funcdef = self.visit_async_stmt + self.visit_decorated = self.visit_decorators + + +BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE} +OPENING_BRACKETS = set(BRACKET.keys()) +CLOSING_BRACKETS = set(BRACKET.values()) +BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS + + +def whitespace(leaf: Leaf) -> str: + """Return whitespace prefix if needed for the given `leaf`.""" + NO = '' + SPACE = ' ' + DOUBLESPACE = ' ' + t = leaf.type + p = leaf.parent + if t == token.COLON: + return NO + + if t == token.COMMA: + return NO + + if t == token.RPAR: + return NO + + if t == token.COMMENT: + return DOUBLESPACE + + if t == STANDALONE_COMMENT: + return NO + + assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}" + if p.type in {syms.parameters, syms.arglist}: + # untyped function signatures or calls + if t == token.RPAR: + return NO + + prev = leaf.prev_sibling + if not prev or prev.type != token.COMMA: + return NO + + if p.type == syms.varargslist: + # lambdas + if t == token.RPAR: + return NO + + prev = leaf.prev_sibling + if prev and prev.type != token.COMMA: + return NO + + elif p.type == syms.typedargslist: + # typed function signatures + prev = leaf.prev_sibling + if not prev: + return NO + + if t == token.EQUAL: + if prev.type != syms.tname: + return NO + + elif prev.type == token.EQUAL: + # A bit hacky: if the equal sign has whitespace, it means we + # previously found it's a typed argument. So, we're using that, too. + return prev.prefix + + elif prev.type != token.COMMA: + return NO + + elif p.type == syms.tname: + # type names + prev = leaf.prev_sibling + if not prev: + prevp = preceding_leaf(p) + if not prevp or prevp.type != token.COMMA: + return NO + + elif p.type == syms.trailer: + # attributes and calls + if t == token.LPAR or t == token.RPAR: + return NO + + prev = leaf.prev_sibling + if not prev: + if t == token.DOT: + prevp = preceding_leaf(p) + if not prevp or prevp.type != token.NUMBER: + return NO + + elif t == token.LSQB: + return NO + + elif prev.type != token.COMMA: + return NO + + elif p.type == syms.argument: + # single argument + if t == token.EQUAL: + return NO + + prev = leaf.prev_sibling + if not prev: + prevp = preceding_leaf(p) + if not prevp or prevp.type == token.LPAR: + return NO + + elif prev.type == token.EQUAL or prev.type == token.DOUBLESTAR: + return NO + + elif p.type == syms.decorator: + # decorators + return NO + + elif p.type == syms.dotted_name: + prev = leaf.prev_sibling + if prev: + return NO + + prevp = preceding_leaf(p) + if not prevp or prevp.type == token.AT: + return NO + + elif p.type == syms.classdef: + if t == token.LPAR: + return NO + + prev = leaf.prev_sibling + if prev and prev.type == token.LPAR: + return NO + + elif p.type == syms.subscript: + # indexing + if t == token.COLON: + return NO + + prev = leaf.prev_sibling + if not prev or prev.type == token.COLON: + return NO + + elif p.type in { + syms.test, + syms.not_test, + syms.xor_expr, + syms.or_test, + syms.and_test, + syms.arith_expr, + syms.shift_expr, + syms.yield_expr, + syms.term, + syms.power, + syms.comparison, + }: + # various arithmetic and logic expressions + prev = leaf.prev_sibling + if not prev: + prevp = preceding_leaf(p) + if not prevp or prevp.type in OPENING_BRACKETS: + return NO + + if prevp.type == token.EQUAL: + if prevp.parent and prevp.parent.type in { + syms.varargslist, syms.parameters, syms.arglist, syms.argument + }: + return NO + + return SPACE + + elif p.type == syms.atom: + if t in CLOSING_BRACKETS: + return NO + + prev = leaf.prev_sibling + if not prev: + prevp = preceding_leaf(p) + if not prevp: + return NO + + if prevp.type in OPENING_BRACKETS: + return NO + + if prevp.type == token.EQUAL: + if prevp.parent and prevp.parent.type in { + syms.varargslist, syms.parameters, syms.arglist, syms.argument + }: + return NO + + if prevp.type == token.DOUBLESTAR: + if prevp.parent and prevp.parent.type in { + syms.varargslist, syms.parameters, syms.arglist, syms.dictsetmaker + }: + return NO + + elif prev.type in OPENING_BRACKETS: + return NO + + elif t == token.DOT: + # dots, but not the first one. + return NO + + elif ( + p.type == syms.listmaker or + p.type == syms.testlist_gexp or + p.type == syms.subscriptlist + ): + # list interior, including unpacking + prev = leaf.prev_sibling + if not prev: + return NO + + elif p.type == syms.dictsetmaker: + # dict and set interior, including unpacking + prev = leaf.prev_sibling + if not prev: + return NO + + if prev.type == token.DOUBLESTAR: + return NO + + elif p.type == syms.factor or p.type == syms.star_expr: + # unary ops + prev = leaf.prev_sibling + if not prev: + prevp = preceding_leaf(p) + if not prevp or prevp.type in OPENING_BRACKETS: + return NO + + prevp_parent = prevp.parent + assert prevp_parent is not None + if prevp.type == token.COLON and prevp_parent.type in { + syms.subscript, syms.sliceop + }: + return NO + + elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument: + return NO + + elif t == token.NAME or t == token.NUMBER: + return NO + + elif p.type == syms.import_from and t == token.NAME: + prev = leaf.prev_sibling + if prev and prev.type == token.DOT: + return NO + + elif p.type == syms.sliceop: + return NO + + return SPACE + + +def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]: + """Returns the first leaf that precedes `node`, if any.""" + while node: + res = node.prev_sibling + if res: + if isinstance(res, Leaf): + return res + + try: + return list(res.leaves())[-1] + + except IndexError: + return None + + node = node.parent + return None + + +def is_delimiter(leaf: Leaf) -> int: + """Returns the priority of the `leaf` delimiter. Returns 0 if not delimiter. + + Higher numbers are higher priority. + """ + if leaf.type == token.COMMA: + return COMMA_PRIORITY + + if leaf.type == token.NAME and leaf.value in LOGIC_OPERATORS: + return LOGIC_PRIORITY + + if leaf.type in COMPARATORS: + return COMPARATOR_PRIORITY + + if ( + leaf.type in MATH_OPERATORS and + leaf.parent and + leaf.parent.type not in {syms.factor, syms.star_expr} + ): + return MATH_PRIORITY + + return 0 + + +def generate_comments(leaf: Leaf) -> Iterator[Leaf]: + """Cleans the prefix of the `leaf` and generates comments from it, if any. + + Comments in lib2to3 are shoved into the whitespace prefix. This happens + in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation + move because it does away with modifying the grammar to include all the + possible places in which comments can be placed. + + The sad consequence for us though is that comments don't "belong" anywhere. + This is why this function generates simple parentless Leaf objects for + comments. We simply don't know what the correct parent should be. + + No matter though, we can live without this. We really only need to + differentiate between inline and standalone comments. The latter don't + share the line with any code. + + Inline comments are emitted as regular token.COMMENT leaves. Standalone + are emitted with a fake STANDALONE_COMMENT token identifier. + """ + if not leaf.prefix: + return + + if '#' not in leaf.prefix: + return + + before_comment, content = leaf.prefix.split('#', 1) + content = content.rstrip() + if content and (content[0] not in {' ', '!', '#'}): + content = ' ' + content + is_standalone_comment = ( + '\n' in before_comment or '\n' in content or leaf.type == token.DEDENT + ) + if not is_standalone_comment: + # simple trailing comment + yield Leaf(token.COMMENT, value='#' + content) + return + + for line in ('#' + content).split('\n'): + line = line.lstrip() + if not line.startswith('#'): + continue + + yield Leaf(STANDALONE_COMMENT, line) + + +def split_line(line: Line, line_length: int, inner: bool = False) -> Iterator[Line]: + """Splits a `line` into potentially many lines. + + They should fit in the allotted `line_length` but might not be able to. + `inner` signifies that there were a pair of brackets somewhere around the + current `line`, possibly transitively. This means we can fallback to splitting + by delimiters if the LHS/RHS don't yield any results. + """ + line_str = str(line).strip('\n') + if len(line_str) <= line_length and '\n' not in line_str: + yield line + return + + if line.is_def: + split_funcs = [left_hand_split] + elif line.inside_brackets: + split_funcs = [delimiter_split] + if '\n' not in line_str: + # Only attempt RHS if we don't have multiline strings or comments + # on this line. + split_funcs.append(right_hand_split) + else: + split_funcs = [right_hand_split] + for split_func in split_funcs: + # We are accumulating lines in `result` because we might want to abort + # mission and return the original line in the end, or attempt a different + # split altogether. + result: List[Line] = [] + try: + for l in split_func(line): + if str(l).strip('\n') == line_str: + raise CannotSplit("Split function returned an unchanged result") + + result.extend(split_line(l, line_length=line_length, inner=True)) + except CannotSplit as cs: + continue + + else: + yield from result + break + + else: + yield line + + +def left_hand_split(line: Line) -> Iterator[Line]: + """Split line into many lines, starting with the first matching bracket pair. + + Note: this usually looks weird, only use this for function definitions. + Prefer RHS otherwise. + """ + head = Line(depth=line.depth) + body = Line(depth=line.depth + 1, inside_brackets=True) + tail = Line(depth=line.depth) + tail_leaves: List[Leaf] = [] + body_leaves: List[Leaf] = [] + head_leaves: List[Leaf] = [] + current_leaves = head_leaves + matching_bracket = None + for leaf in line.leaves: + if ( + current_leaves is body_leaves and + leaf.type in CLOSING_BRACKETS and + leaf.opening_bracket is matching_bracket # type: ignore + ): + current_leaves = tail_leaves + current_leaves.append(leaf) + if current_leaves is head_leaves: + if leaf.type in OPENING_BRACKETS: + matching_bracket = leaf + current_leaves = body_leaves + # Since body is a new indent level, remove spurious leading whitespace. + if body_leaves: + normalize_prefix(body_leaves[0]) + # Build the new lines. + for result, leaves in ( + (head, head_leaves), (body, body_leaves), (tail, tail_leaves) + ): + for leaf in leaves: + result.append(leaf, preformatted=True) + comment_after = line.comments.get(id(leaf)) + if comment_after: + result.append(comment_after, preformatted=True) + # Check if the split succeeded. + tail_len = len(str(tail)) + if not body: + if tail_len == 0: + raise CannotSplit("Splitting brackets produced the same line") + + elif tail_len < 3: + raise CannotSplit( + f"Splitting brackets on an empty body to save " + f"{tail_len} characters is not worth it" + ) + + for result in (head, body, tail): + if result: + yield result + + +def right_hand_split(line: Line) -> Iterator[Line]: + """Split line into many lines, starting with the last matching bracket pair.""" + head = Line(depth=line.depth) + body = Line(depth=line.depth + 1, inside_brackets=True) + tail = Line(depth=line.depth) + tail_leaves: List[Leaf] = [] + body_leaves: List[Leaf] = [] + head_leaves: List[Leaf] = [] + current_leaves = tail_leaves + opening_bracket = None + for leaf in reversed(line.leaves): + if current_leaves is body_leaves: + if leaf is opening_bracket: + current_leaves = head_leaves + current_leaves.append(leaf) + if current_leaves is tail_leaves: + if leaf.type in CLOSING_BRACKETS: + opening_bracket = leaf.opening_bracket # type: ignore + current_leaves = body_leaves + tail_leaves.reverse() + body_leaves.reverse() + head_leaves.reverse() + # Since body is a new indent level, remove spurious leading whitespace. + if body_leaves: + normalize_prefix(body_leaves[0]) + # Build the new lines. + for result, leaves in ( + (head, head_leaves), (body, body_leaves), (tail, tail_leaves) + ): + for leaf in leaves: + result.append(leaf, preformatted=True) + comment_after = line.comments.get(id(leaf)) + if comment_after: + result.append(comment_after, preformatted=True) + # Check if the split succeeded. + tail_len = len(str(tail).strip('\n')) + if not body: + if tail_len == 0: + raise CannotSplit("Splitting brackets produced the same line") + + elif tail_len < 3: + raise CannotSplit( + f"Splitting brackets on an empty body to save " + f"{tail_len} characters is not worth it" + ) + + for result in (head, body, tail): + if result: + yield result + + +def delimiter_split(line: Line) -> Iterator[Line]: + """Split according to delimiters of the highest priority. + + This kind of split doesn't increase indentation. + """ + try: + last_leaf = line.leaves[-1] + except IndexError: + raise CannotSplit("Line empty") + + delimiters = line.bracket_tracker.delimiters + try: + delimiter_priority = line.bracket_tracker.max_priority(exclude={id(last_leaf)}) + except ValueError: + raise CannotSplit("No delimiters found") + + current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets) + for leaf in line.leaves: + current_line.append(leaf, preformatted=True) + comment_after = line.comments.get(id(leaf)) + if comment_after: + current_line.append(comment_after, preformatted=True) + leaf_priority = delimiters.get(id(leaf)) + if leaf_priority == delimiter_priority: + normalize_prefix(current_line.leaves[0]) + yield current_line + + current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets) + if current_line: + if ( + delimiter_priority == COMMA_PRIORITY and + current_line.leaves[-1].type != token.COMMA + ): + current_line.append(Leaf(token.COMMA, ',')) + normalize_prefix(current_line.leaves[0]) + yield current_line + + +def is_import(leaf: Leaf) -> bool: + """Returns True if the given leaf starts an import statement.""" + p = leaf.parent + t = leaf.type + v = leaf.value + return bool( + t == token.NAME and + ( + (v == 'import' and p and p.type == syms.import_name) or + (v == 'from' and p and p.type == syms.import_from) + ) + ) + + +def normalize_prefix(leaf: Leaf) -> None: + """Leave existing extra newlines for imports. Remove everything else.""" + if is_import(leaf): + spl = leaf.prefix.split('#', 1) + nl_count = spl[0].count('\n') + if len(spl) > 1: + # Skip one newline since it was for a standalone comment. + nl_count -= 1 + leaf.prefix = '\n' * nl_count + return + + leaf.prefix = '' + + +PYTHON_EXTENSIONS = {'.py'} +BLACKLISTED_DIRECTORIES = { + 'build', 'buck-out', 'dist', '_build', '.git', '.hg', '.mypy_cache', '.tox', '.venv' +} + + +def gen_python_files_in_dir(path: Path) -> Iterator[Path]: + for child in path.iterdir(): + if child.is_dir(): + if child.name in BLACKLISTED_DIRECTORIES: + continue + + yield from gen_python_files_in_dir(child) + + elif child.suffix in PYTHON_EXTENSIONS: + yield child + + +@dataclass +class Report: + """Provides a reformatting counter.""" + change_count: int = attrib(default=0) + same_count: int = attrib(default=0) + failure_count: int = attrib(default=0) + + def done(self, src: Path, changed: bool) -> None: + """Increment the counter for successful reformatting. Write out a message.""" + if changed: + out(f'reformatted {src}') + self.change_count += 1 + else: + out(f'{src} already well formatted, good job.', bold=False) + self.same_count += 1 + + def failed(self, src: Path, message: str) -> None: + """Increment the counter for failed reformatting. Write out a message.""" + err(f'error: cannot format {src}: {message}') + self.failure_count += 1 + + @property + def return_code(self) -> int: + """Which return code should the app use considering the current state.""" + return 1 if self.failure_count else 0 + + def __str__(self) -> str: + """A color report of the current state. + + Use `click.unstyle` to remove colors. + """ + report = [] + if self.change_count: + s = 's' if self.change_count > 1 else '' + report.append( + click.style(f'{self.change_count} file{s} reformatted', bold=True) + ) + if self.same_count: + s = 's' if self.same_count > 1 else '' + report.append(f'{self.same_count} file{s} left unchanged') + if self.failure_count: + s = 's' if self.failure_count > 1 else '' + report.append( + click.style( + f'{self.failure_count} file{s} failed to reformat', fg='red' + ) + ) + return ', '.join(report) + '.' + + +def assert_equivalent(src: str, dst: str) -> None: + """Raises AssertionError if `src` and `dst` aren't equivalent. + + This is a temporary sanity check until Black becomes stable. + """ + + import ast + import traceback + + def _v(node: ast.AST, depth: int = 0) -> Iterator[str]: + """Simple visitor generating strings to compare ASTs by content.""" + yield f"{' ' * depth}{node.__class__.__name__}(" + + for field in sorted(node._fields): + try: + value = getattr(node, field) + except AttributeError: + continue + + yield f"{' ' * (depth+1)}{field}=" + + if isinstance(value, list): + for item in value: + if isinstance(item, ast.AST): + yield from _v(item, depth + 2) + + elif isinstance(value, ast.AST): + yield from _v(value, depth + 2) + + else: + yield f"{' ' * (depth+2)}{value!r}, # {value.__class__.__name__}" + + yield f"{' ' * depth}) # /{node.__class__.__name__}" + + try: + src_ast = ast.parse(src) + except Exception as exc: + raise AssertionError(f"cannot parse source: {exc}") from None + + try: + dst_ast = ast.parse(dst) + except Exception as exc: + log = dump_to_file(''.join(traceback.format_tb(exc.__traceback__)), dst) + raise AssertionError( + f"INTERNAL ERROR: Black produced invalid code: {exc}. " + f"Please report a bug on https://github.com/ambv/black/issues. " + f"This invalid output might be helpful: {log}", + ) from None + + src_ast_str = '\n'.join(_v(src_ast)) + dst_ast_str = '\n'.join(_v(dst_ast)) + if src_ast_str != dst_ast_str: + log = dump_to_file(diff(src_ast_str, dst_ast_str, 'src', 'dst')) + raise AssertionError( + f"INTERNAL ERROR: Black produced code that is not equivalent to " + f"the source. " + f"Please report a bug on https://github.com/ambv/black/issues. " + f"This diff might be helpful: {log}", + ) from None + + +def assert_stable(src: str, dst: str, line_length: int) -> None: + """Raises AssertionError if `dst` reformats differently the second time. + + This is a temporary sanity check until Black becomes stable. + """ + newdst = format_str(dst, line_length=line_length) + if dst != newdst: + log = dump_to_file( + diff(src, dst, 'source', 'first pass'), + diff(dst, newdst, 'first pass', 'second pass'), + ) + raise AssertionError( + f"INTERNAL ERROR: Black produced different code on the second pass " + f"of the formatter. " + f"Please report a bug on https://github.com/ambv/black/issues. " + f"This diff might be helpful: {log}", + ) from None + + +def dump_to_file(*output: str) -> str: + """Dumps `output` to a temporary file. Returns path to the file.""" + import tempfile + + with tempfile.NamedTemporaryFile( + mode='w', prefix='blk_', suffix='.log', delete=False + ) as f: + for lines in output: + f.write(lines) + f.write('\n') + return f.name + + +def diff(a: str, b: str, a_name: str, b_name: str) -> str: + """Returns a udiff string between strings `a` and `b`.""" + import difflib + + a_lines = [line + '\n' for line in a.split('\n')] + b_lines = [line + '\n' for line in b.split('\n')] + return ''.join( + difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5) + ) + + +if __name__ == '__main__': + main() diff --git a/blib2to3/Grammar.txt b/blib2to3/Grammar.txt new file mode 100644 index 0000000..b19b4a2 --- /dev/null +++ b/blib2to3/Grammar.txt @@ -0,0 +1,173 @@ +# Grammar for 2to3. This grammar supports Python 2.x and 3.x. + +# NOTE WELL: You should also follow all the steps listed at +# https://devguide.python.org/grammar/ + +# Start symbols for the grammar: +# file_input is a module or sequence of commands read from an input file; +# single_input is a single interactive statement; +# eval_input is the input for the eval() and input() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +file_input: (NEWLINE | stmt)* ENDMARKER +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +eval_input: testlist NEWLINE* ENDMARKER + +decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef | async_funcdef) +async_funcdef: 'async' funcdef +funcdef: 'def' NAME parameters ['->' test] ':' suite +parameters: '(' [typedargslist] ')' +typedargslist: ((tfpdef ['=' test] ',')* + ('*' [tname] (',' tname ['=' test])* [',' ['**' tname [',']]] | '**' tname [',']) + | tfpdef ['=' test] (',' tfpdef ['=' test])* [',']) +tname: NAME [':' test] +tfpdef: tname | '(' tfplist ')' +tfplist: tfpdef (',' tfpdef)* [','] +varargslist: ((vfpdef ['=' test] ',')* + ('*' [vname] (',' vname ['=' test])* [',' ['**' vname [',']]] | '**' vname [',']) + | vfpdef ['=' test] (',' vfpdef ['=' test])* [',']) +vname: NAME +vfpdef: vname | '(' vfplist ')' +vfplist: vfpdef (',' vfpdef)* [','] + +stmt: simple_stmt | compound_stmt +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | exec_stmt | assert_stmt) +expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist_star_expr))*) +annassign: ':' test ['=' test] +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal and annotated assignments, additional restrictions enforced by the interpreter +print_stmt: 'print' ( [ test (',' test)* [','] ] | + '>>' test [ (',' test)+ [','] ] ) +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist] +yield_stmt: yield_expr +raise_stmt: 'raise' [test ['from' test | ',' test [',' test]]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +import_from: ('from' ('.'* dotted_name | '.'+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: ('global' | 'nonlocal') NAME (',' NAME)* +exec_stmt: 'exec' expr ['in' test [',' test]] +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt +async_stmt: 'async' (funcdef | with_stmt | for_stmt) +if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] +while_stmt: 'while' test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +with_var: 'as' expr +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test [(',' | 'as') test]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +# Backward compatibility cruft to support: +# [ x for x in lambda: True, lambda: False if x() ] +# even while also allowing: +# lambda x: 5 if x else 2 +# (But not a mix of the two) +testlist_safe: old_test [(',' old_test)+ [',']] +old_test: or_test | old_lambdef +old_lambdef: 'lambda' [varargslist] ':' old_test + +test: or_test ['if' or_test 'else' test] | lambdef +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +star_expr: '*' expr +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'@'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: ['await'] atom trailer* ['**' factor] +atom: ('(' [yield_expr|testlist_gexp] ')' | + '[' [listmaker] ']' | + '{' [dictsetmaker] '}' | + '`' testlist1 '`' | + NAME | NUMBER | STRING+ | '.' '.' '.') +listmaker: (test|star_expr) ( old_comp_for | (',' (test|star_expr))* [','] ) +testlist_gexp: (test|star_expr) ( old_comp_for | (',' (test|star_expr))* [','] ) +lambdef: 'lambda' [varargslist] ':' test +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +testlist: test (',' test)* [','] +dictsetmaker: ( ((test ':' test | '**' expr) + (comp_for | (',' (test ':' test | '**' expr))* [','])) | + ((test | star_expr) + (comp_for | (',' (test | star_expr))* [','])) ) + +classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +arglist: argument (',' argument)* [','] + +# "test '=' test" is really "keyword '=' test", but we have no such token. +# These need to be in a single rule to avoid grammar that is ambiguous +# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, +# we explicitly match '*' here, too, to give it proper precedence. +# Illegal combinations and orderings are blocked in ast.c: +# multiple (test comp_for) arguments are blocked; keyword unpackings +# that precede iterable unpackings are blocked; etc. +argument: ( test [comp_for] | + test '=' test | + '**' expr | + star_expr ) + +comp_iter: comp_for | comp_if +comp_for: ['async'] 'for' exprlist 'in' or_test [comp_iter] +comp_if: 'if' old_test [comp_iter] + +# As noted above, testlist_safe extends the syntax allowed in list +# comprehensions and generators. We can't use it indiscriminately in all +# derivations using a comp_for-like pattern because the testlist_safe derivation +# contains comma which clashes with trailing comma in arglist. +# +# This was an issue because the parser would not follow the correct derivation +# when parsing syntactically valid Python code. Since testlist_safe was created +# specifically to handle list comprehensions and generator expressions enclosed +# with parentheses, it's safe to only use it in those. That avoids the issue; we +# can parse code like set(x for x in [],). +# +# The syntax supported by this set of rules is not a valid Python 3 syntax, +# hence the prefix "old". +# +# See https://bugs.python.org/issue27494 +old_comp_iter: old_comp_for | old_comp_if +old_comp_for: ['async'] 'for' exprlist 'in' testlist_safe [old_comp_iter] +old_comp_if: 'if' old_test [old_comp_iter] + +testlist1: test (',' test)* + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [yield_arg] +yield_arg: 'from' test | testlist diff --git a/blib2to3/Grammar3.6.4.final.0.pickle b/blib2to3/Grammar3.6.4.final.0.pickle new file mode 100644 index 0000000000000000000000000000000000000000..da228142c52b22d4e8d48b206b6741e972af18fd GIT binary patch literal 33148 zcmd6v1$Y%#*ZniNySo$I-K{`}i%XCYLI{vcNXR6?YSdCny@eL2E2Zx4?(XjH?*0GP z-jhJTx8;4l_xsz!6ZX1m?X%B5GIP(&oim%tWo6~(=4NLV<>coTR+|_jnBy{ zs#cKFWoAK;npPntC1u(2%))|lE5(#gO{4_r1r<`$3o53DK^T-2R0_+5DJ2D!!}2m! zveOHyhA9%NWoBEM>gI@75LV!?B7c?mtIS^&{;Kjhf2Q zzxw<&;IAQnjreQKUlabC^4E;N=KQtbuO)x2_-oBy8~)nz*N(sT{B_{3BY&Ov>&%}r z>kdS85Wotk6RaoC@d-Hny5g~ZUpTf_EFFt`1VW< z`zk~)&%8I0`lPzt*Ms+q8`Iw#^IwO3?;g|e{X?U|el|P<5*5i|AUO<56;)C&IP9dT_@V_I^{SuYPb0T?8 z3I{38$)vD_qDc)s?xPPYZd$9k0pr<7&Q_pnjnW2NGq+llX%<=}!2F}s& z(9XHu5%Xwnnh*4R)CI6~$-=nhixh%^PES-J=wgCq=qM~HSb}e6I9UC*6qx0~mc_-g zm z50ok9>_AJ_a)Kb|bo=>eeu z|LeJFXQFzLUT`{Hb4F@7P6KmhI9}$gaDvR);eImbgcD`X4JXN*7fzNrKipsDf-sc1 zFq|TDQJ9d~6;748IGiT4o0eQcqkxyj*Sd|y&Q=>6N9IvQd=vG8^r(^wrAL)9 zd;t)!3Mex@swzIr;EWqplctV{s)PKz8`UsfE0X`#7kN~Za*6s>TuTAcquLf{iK5p5 zhM7Wjg_a7{6B;5^UudvU1EDOThC<7P8VO|!H5M8w)C5FdH5Fm$Hv`3e)f|{=izm*) zwouX{NsA*!cJDLR3XkEkn%hIA95`5biz#nto>$s&+ydIA%T zsHPWyYI?^~Nz?~KHGM^_RDga?^vC%5AsQg997kp}5JYW*Ow)-X8Z1rcifD*yLq+II zFd7CT@^BGWdV~`rMOficBD_mRi}YN_027VLbu55f$4Sajmy8Dyae{bf_LFAaI}sGm zzeyrJ)?{F!39AoDT#M`5+O!L>vr?hw2cZfAv@7X#J-X(7#<5N1KR~XvR1n zs(|UyVHge&X3pUtQawU^nAC6|Da~eK35d*&646-$vl)c*(e44UMTF0)W86!#>R4c+ zIkg=Jptj>Btx|0#fCzA+ct)0U?n%fM7u$B<0T^Oc(@c42fqyH1N1l8Wi2+kuHTr=o) zTXKCGK(5b7%2Te-f(Y=O_%L77>v?I`e=mUI>Gh)N|NZ5`OO#8rqvDqpAU%2o6RuIk zuY#!fHSt`N(o3&PD^TyeAr$GX^`?*|^A?Dvzl}*H0_VJUq>T`I*R}UV*s1k>5Wzpd zq%v~}`JuECLLY$$^s#AH@h8%>PK`d5=Dqis>1y$Rcur#4eol-;ds_d6f~7}aT1;D( zeFfxuhmQCfL_Xh`rn?Byx6-W9-?{d^X;$tBX;$t>_rm`td3n}9o9DmFx+AUX#2@F2 zJvV)#UnraCK(@asNP6^}#o~Z1iGBy_7F9{~hY)2;qCbTSm2XNpYe9esi$s(I(f#E` z*d(tY!ZWN0-1n*A@3X5!xkN{@tE>R&Q5B0*tW2r`!?i-ygw_dF7g{e=Lui9gO%Od@ zON4cAZBU$69bl^6Wz!wD(z|6HY;j!|Ezyap>&YoSs*lmylBFw%hBg#o3z9}oG!|h8 zdJ`v_ipbTBZstUD5j{2MS~$^C#0(wNt(<6$Newq@_0UF|uAZW{AezulL{EKTw+E3y z2N5&0Cg|uyClRcuN}|ppyzf&*k~n9CF2F=*+SC<5o4QG2&CFsHM8qBMPA!(+@;~{YBUrY5<6E14ZbfG#cc@U=dbih!aCa=q^GuOoV4V zTqJ3c@d#idm5fIM$as{D?t7L*qvgdN&yr{ii0a3Ru;j;qsD3;~XH#4zNV8eFA1FR< zCYnw#h2hAZ#OFj8Dw`~Ur6|U(OrsD)z$qf^`zGPUR1rPZJ=2_+E@FmyXND6qF{$A} z%3+qYjT-&gLUwMS10wagB6@19=YgmxO@#H_d?yx&u#Q?N!aHh_NUnA0sB~bWE4eQQ zkb8zC4s}+cAR=ap_f^nRX?84RfvA3&2wMeZgK%E%9<*zP2(N9Wd#Sb@V4@qfB>~j7 zO47lqEf++9)#B-TW>TIst1TZyZEH+3?*iBGillixg(7`bQv^(Or=DT}^{kb|oo|+* zAnI8!o)r=iHb`5kvDgTro&)x12TE(IVL7PG>tOdfq)gfbBIKbMovRq9!=zQzupBOQ zh|m!rf*&5b0-hxyJvtks9~w%cb3mkcu6QOcIi4r&aCQIrAZoZkge`6^ z1mS#<2=m?L#Kj`)FxV}^+jEIX>YzQB0u#Nd{xSg7UoPX{gICC_mom8$L~U1@W)tpe zX-6oNYlN(k*Sgnr=G9*l;d*J7-VN?W8*X&3o17rP%^)(r1ryKaTU{f-ZJ>C@-fsH8 z`vTs_r3`(22QdLFzY-T(l!h7;T z;J&|fiT?M$qC^i-Hqn=C9#)X_=n;#>ZI+VgQ6S$ZRQ?!~ zr$y+FVf2g>&x$C~A@-aT&tp=qAk7wLFM^2wk`qWTgW|@$0xY|eNB6zT=R`je zeoaDp^g71R-EV+M>P->4w-~+U#M>BENt*9Sv+jErM3wJ3f%HBouJQwsezyG(nCMS! z9|5TCV;SZg-SdgOHVb_UBGzXjY!ZGBier5NEK6TOX~LIqi2(%tN@9BSHO7y&Z$LER zTk(DbeJ9P1w(mjI@`H%E>f#?kIR7NVj<%nj_(g;rZNG}}Ir5uGKiYl=CI(Xd9{{TV zQzkrGElDYl*KDBxL~Z3nSewd&;#d`c`<`$6J_0IIJ~4;{Dk%_aK1{epwN?Sqq^jb> zW9)XOG)tg5C_W5pn6AU{pDsB6dBWGEY+^8#*HRE}X=3yfCwDYKR9;s+=W`}dJ!!{k z;?xJx9Sua-{AmcnxseF-Z7jn3x`{|aa%O7^Obns6W&mnyE{R`lSr~!{&{Bl8uayW7 z&^q=aKpS9UC;{362+&SOzZtVQl-F^}vjd3iI+hWgKm_lM(d{4_kSgtXId>7VHgpA1 ztec1;&8Y4miuG_0h@K)mr(QrG^eXgw=?^$0VkefrUTgHmH!+N)`=}7NGBNs{np>G5 zYUnRMJVEIUkY+dL1`1hM3<8nZV3){a2#8`sG4WONFllx*I2?rYh*;z7G14{4jdE?Y zX;#S?*Ek`Kb#0u8URs!r2T{ocj4EOIy`MB&i%kSk$t2Uvd$MbI?JvzcDHKUV8MrCH z#Bll`0iX}2O5)0c6(opyri-U$`e24MYxztN^~^HOZfeYy)<~l>$Gz~M>t6Fj^wP;S z4MfQKPS6DloLGoaTPVFqnw@RaLDaF>37j)P@hC45NtQGq6POr51C|14K$fI7YQQoO zwPlOvvWJA0OS1;7015a$TmZpWOFB`(^FRd87ayLaQ@|Q& zCkPb?Sv3);bQNiy`#Kr?{Uv7+@hmBKm@DXbSCo}%_`kXBB|(MAyM zJHRw!4wPo)4sz{a5jHgs0mW_K1oXq_pKdttyL8#JAi7HB=C(Zp!Xnb1|6rx26Zk(q z`_TdI?^}8(btgvC(!-QQdUUwuVz>2<0ES!D#3O~asre-!ns}55>+H=S`r~L3R%(j~ zpYg{4Q^o3p_b)e7xaD7Vs!;p%^Nmg!{2Kay?|GN#SaM8^q5a1xSr*_JzAJ{d$eoZ&Y`VM)WCbUy%yU^)E zJA}><+9`CV(CI>F37sKyw$PbE=Lnr8bS{XD&J$rNo)3yMx&Zk9YJOcv-HCB@!9_}f z8^{<<1@0e%=-=HU>|W_5PFyO&zJo3k;Zx~ykrbm-uK*^-6Y)v_5wDVAo)YqEdD%R@ z21IvUE5f?tI#3+zdf?x8$NwKw@&@utOrU``D$(@lCd=36et+UT< zu5miQUC3H}hkM;=UKZgl*O;<*yLOKg1h^N(fZT^k4K3vTt`YJ9P<&2((DZ$sQ}?mL z(}BS%_aR~=_M?dpD;QT6nD878;iDjucuc(CI(S@~wdM&BZGO@;^M1-TTJy9tpDWLp z{y$6bSz;t6lHhX+mL5HCG3^BR0+2~YoiBpO=Oq#6Y6xBik^CznYzDvT#A_nT>znp< zC*HuMhUY2Bn`PQtptuQdoBmH{&e8?e|G964cc?!xiRQnnM7R{func5;-Urdd55$M( z>x=h8X%&S&61qU>W1$O$J`v(UNUns0Y#cuW#SQ%&=*Qwet=9kkYwHWjB_@;LmkPku zlEt}58Ga4qI)$$N2E_P$E5a)N4n*?bV^X=^V#I!M4d)+0IR7MKwub6w5YE4dup4&2 zI`NwbyX)|~2%mF*0QcRFeH;`V5PwoWu|Mre;XzZ{6JWfOav;(uFWwJ`3ev1S6+z@& z$uvu;vTL-biZoBIs_FmX0YP%rD4z&PuDSxHM>Q~>Tul(k)e=u~jB;&hmRucBoIqXE z|MLXuQ9dz+1nMggms%FjehX;`^jsQ&=#0ju**%~p((E2kQz6T$nR_)iFS`fSLYmzJ zYUy5dUMu%%?F93r4Ty%e#l)k~&NU`Vdr;ia4yLQ2d{_PB1?aw~LPugG5;V4xf^ngS z;i8_IoeCm}F5-PEbd_f9>IS0q-9^mR%ckOe8^mJ>K1X0Z@^BSgEn=5U)E@W4` zmM5aKqvvYxWp{d z-zbr5JB%JGil$IVm2Li27nGb zQ^ptkXUVIdj-IoHcB_xh0g=MF=4EH8^Q7%kADu5`lkWoey3i#C`XUgC?!xG>pt~-X z)?59wTgXmxmw*U{GeY;FseIT6g7hyMl9stGD`$3>jZ+$oErd&T?i^C`8Fk%nOfwj2#S%V%0 zdV?Mlx?CgnIEX5qFfZ$_C#7AX5qnC=M(k<#dd4Ms>sb(4KZl94eqNf5*b727VlRRS z{*rlFPrfYe3XRw+?!}0`>Rzvzm!e>&)^PZ43 z@O>d0u@69G^PvbEv5!DB;$uuaVxLH}5&KlgM(i^X#Xfg}488zS>`P4C_OD!H#J&dM z{EY}3v2Q`~hUSsp5Mkd?e~R#PVM@i)s{I=(;B#UgRhE;$auh>%aUQGyB5XzR{tZ=0n$3dBAabZ; znt4}sjr^)f^UA85u5WFxtOlPGX;fBI0?SK`BvzLoDyt*HYN#v1qtuJNI0@AUCgu~M z0e}DvCACw4Mj!$-7Gbwnn>f)_g#Cu!%!%eA>^J-tPPD}6tjZi~CC$EHT7zg{8xeLI zZwn%RI}!HFeS0T5h_Ff3QG{pQNhE2Jac5v+0U4(P$heCnTjz8I5wV+i|K+~BG;4hi z5Y_h-F+gM73xsoT_kidl!fWdb^x5=JmsGc82NuS|4|0PIrpWwt8%`i=A<2QZBKWif1VR4<%b1?&WeHALzHu=Yr^`c_M74 zrh!Omz6fj80ukP-g}{A}t?m@>dwdrWBauN?=?cbg02pl)NaKggl+41YK%bInmF!btc2pEaVrr;2 z(KqB}MTH`2IQFi94s8~d8BeHTc3k##QBI;N=S^0TIIeEp|(K>N; zEsbT_%cJ%BSTFvV8*R|X`e~eDQ?he&mPZ@)`JnhSc^#n78!gZ5^;y|#ilPI>tagv} z*;&y+`rII`G7*;Nile;fI5BOkf}&_cbi6*cwG!)A=HzBaCx~cgb*#%NS{a?F&+W}&Lr!+?vgjlc z9g5?IEX~d>j82AP9nAl!VT!pR?7ZC)21Z4#6M`x@X(Ea&~zz@F7pqX zEv%dUgC+_~XaAsiz%w}hK@)(7v;BjHpH;Mf&}j2Gj(^Zdvzqn~8f6yL{y_uGlR5rD z!^8L zfV)KX5p}nyAgNOc&)eujb=Z5QPnLe4s78SMMO75_fT$XP2L(C+9+Ear+QUL^g&q;A zCG@D!B%#NIItx86)Jf-2m?a*g6e*UudGx2SV+IK9pOK)VYQ`$f5^{{#dAv&?iCzg+3LkE%X^EYzg>W zpaI|ufmVPo1zG^U5@-ncTA(%H8-XCH6B&LXeiLdT^t(_ap+7)WU4Y?cujpo?Q}|xuwl63E;c%ciloOpU zy1eLmfC{3fimE77UZ|2#b)m{aLxrjcbr7m5R70qmPy?arLc@h>2#pY`DYQhW7Kj`U zupDZO9xS?!=s|$Gu5UEGp7gHL>kD-gY9Q2HsG(4Ip+-VIgc=LY5o#jTSE#8_J)veo z38Cgf{e@Zx^%H6-G(f18P&uL2Akx*B3O8s)&lcTQsG3kap_xMM<=P(5K~yDC9YwVR zbP_dIRA*5&0jZ+ui|QgYN~o()Wub0DO@z7&wGrwe)J~|UP)DI&La9Q%g?bA05gH=Y zS7?|}KcO)~{e>n74FH7|0Rsgp0R{;)0Sp$X3m78M8!!~W6Plo5LNkPh3r!aqAv8^B zq|kmsqd;`s!Pa%7MYk3`Mrf|kSfMnbaYFNi#tSVFnjjPi?FS0m0w#)DENT)cJFO*RP49 zDI&}5A&asLi=u?c@c&x}|kcvUk*R}RvuD{7(EiW!APtBazU;;RJKBwI0NEHBQ>(i(4;l&WzcttGv{ zY|+&MTZ3d4u@IXhqI#Tf@rpR#xngU0X3O1SddjxiR83l~@VybQI9IR9(0Xsa z$lBiEtbE9v<uL742Cjp;?etOG17TwPj=$X5?kA=DjVFng?mM|3xZ{mP>2l zBODjAGAqQl^p<7kW#uo+$y<>@a=Fn;DXl!)?6sM>@l?$b-8wE{%k>O~s+c*Gl+wl< zoz1e{8?s7lTb~g&t#d`RD|K5Fx!Y>7?Y$G^W>Yv%WQV}ktIIR9is-|9Q60VX^4$D& zUV4q#PF|Yn;beix&OY*`(>4;9>fPWM2JTxZzKi?jEO)L*TvzXDzep(S>0(LUyr=&v zae*YQyH^owc1f~Me2>5IHUjI#_xuZQb8dt9UY?R&b#N?f6xZ8ZuO9L#ae&x9-g>S) zGVDZgpvb;Hf%0>gW!Mp+L+2m~{k*<1fy3uu@%?>Z_wYJ|4v{ioPl?iOHi;kT^{Rx` zd#K1ko;D+4++pGd`&8m!W3CdmFgjfH5O0kxioDy85IZzT;{=_uCVw6EA1P*7TwE7R z?pPvvxYKsY6aAnT&B0u#VEqau@7cyR)ldb!=h#&3!SI9w8 zcCJ1~!WeI0VQx-VHYb#0#f^<)ujT^PV;m=XoHwwrC^PaJj~6@Mj|i*k1Q8Qrx8kLR zSy2ub!6%B{&&Q*%l(oa+Bqa#@7j#JyPCYx*Y6^sro zKU>NiAK5mlzewyNZ-V)fShP!Ix|8cUbnI+-vBbrq`)v<(dA$^8uLGQsIGK#l??aJxKnjCkf#}0Y+fT& zUg%m7x4*6v!HSCOR!}C)4OpVwD50x_n5CHE|dGS8W2-p{*Mj>}PL zh%ZXy#~$jNxld?;a%viL{>zw(IZ84gDI6KQ&9RrINlweT~LTJgEu z*yk&O2eP;T1yTGf5mwRHPJDw=KatC~(k%DyKm_?-gmwQ9AWHw}9(4atB5?T`OFh3x zsHu8>mB8;gBJAZNzdP}V2n+tF2#Tg~&yb=4#*3B%QM9~xdp%WDL7GLWD5RHEMU_C5 zt}MdhRRJkpRV?wU$@owC)#YP9<<}6hpYm%8>0MP(Eg`+GDyl7{H&#V;g!Iy?sIHLS zTNTv<5xzdg-(D3pkfs+{MGb}Y4y&koJH(5nZK(wYQ#@}ZZHIt@ST1Cx4glr)~ zFSd$Wf^cqy@z+~Lt)=M=S5X@f&TU25CcbtcoZDmkZC6nT*GRS_21D)X5cOn;usv=|oX8Ymd)$^fktIU!#)_6Xk&W>;WJSxRah1>h zS`hJ9I>9{10V$~@mZVlmV1F1-!$@FDgb3Tmmghu1#^00`t&wKOYXOKLkrVW4AxL41 zu!Jp^u~%h9YvnUfHLL^SvR;G@;|7olZNyUO02zB5R&=0z=Bv;_LJNcr7D^L3M2PoQ z@gR)QBB4Wt(uEEank96&kY1e?9U-K*XGKSXsIo+aC2L;E{gu{zj~mq*)CogLFPWMPhoi)qIwy)HeBK3Y{vnROmFJ zAwt`Q1`F*FvbS076j~dQIasPbR|2~{xc4t1Jvv{6 zd0rp_&kM15UL;|;JasvAM>HYccU5dYv?TQ`Ge!g4|%59c4F4vqSVI*KQVJzwX`wBJ!;wtn_V8 z+%Ccj-ywo{cVbEEE(tkG>TU@vc*Hw%uQcnK`#>7L`z7*H6$$lK=s^&L9uhHAEqU09 zM?_dBJSu{sk6|hLxQw2S;V!&7Nf6s4(I2j*(;x( z7qUuT08#A4Jzg(KvsXmDEM)$#2-%Y`uY&M@4Wp=h@4a5;^@e+Ka(vUh-ZC$HMbz8U z>=jY(xEJ}n>t63U!Rh6F5Gj8k!d4w0I`I)Eenr&BuJMiei4b3DEPg=b^qF~COFwsw zFX1m-`%;9xBI+v;b$l(tUJ><;6W?Ou7JVnpo+bMpMC2bt*ejxb1Zhlu!ZId5OIWEP z`9%URDiQCGL;NPq=D_bD%KRb1US#km206?- zmS$DA0a2!{X|^(LC(T}y)ZV@D@8Dh?Mc8cV1R`W-_h1^PiXc!IER}ba&_I=UlfYBS zB5d*A!-<}l_?dS_CW44F2@}7xXtFeWY0>_!g(B==p8_Iy0u#TqXsR@OY0)$gfu@^g70-}n z>xP-q5NAo`ITG`+Z@M}1u_vbI%7^DjOta^!brac>obTEK)2!S=X;yBLd(qHzc@bi< z+*tI;=-GDODk7iCLYYE5>B771gbLL`Ss;on6Hz208$>ggi?9)2Ap)0`SR&`hgvE-I zl+RkBRYL28a)s6ltrprKlm{X}z6k60H6Vp7z!EZ&&`_R*5?K0(uOI zL_KF?bO6(kbEMhWp9|8gI!_{N90|6>y#Pd^3q{!Z>LMq0iLgh=E_PzK2z!L=5+^Rj z#IM=8OqxBcce#+AGOhqo;gurnVZEzB)O587>xOHbxK@O<`Z^JmxgJZBH%PFv_>B_S znkU}BhHjQ-Q}7lLWo{K=UqiQnaK7C=Sl-?t0+&0nguP3Gy$a`U39N9$(_tJl_e!&{ z_kk#LziHfXA*csj)w{)a7#x#GldGjlxA<*_zFasuSM9G)HfiUzZGG=-#PKU2%C35 zh@i}mSnByn#?P@o%cqyB`~`%|ucq0+|0c~gFa0iL8UNv4f0~zVUP|F7P|qU}vd$<6 zB35}Pm~It7q*xIX&#FqU@r_j(q?uDiB8waI;SCu)H6R~*;Y@Y;uO-`h=8?3 zSe3O!;8F)ml)5s05nWF{b`f14giC`mq9KSVjYQZ*bYmx)h_H+3rcN{yVK0hl?nDbr z{Gyna((KEy6^Qt)onQgb2Bf6gVo9o<1UtjDm%y9TMA$`iM<+UA;+Mg6mS#;$1rel+ z6ZB?RkivGu61KaHzxAbud^W4^dV+B2CBjCoH%Nv0U@6pBg3Yyl5_qOoyw9Hj(rm5` z1W{&?2z#r`U=YqjMA%##>clV+HrIxWpv(v?^^BD9*SCz4&uk?(8idOj5tiIokP3~% zQfRz{E%KZofg3pD{go>drCFg#AWfXf5_wIUgbFGYf+#daJO?&MPePi#DrG8&GSfs@ z?@kBdJVS)}&J=;mEG%JXOR#sM%#pxL(nMGS^F-jOrvUJrFXJyiSs)*K`N={ME{n>D zbP!P%W8#;eWJo(+X)Y16tTI6qTPmVR>$)ru#g@4TM79WAmSd@Wg#_O1!7F+tu#^$+ zFEL3SdV)>YOsk=s+tqu!7I7hL@%r10|az(Bcn`V`)b&b>V zI@i{Vu-B4o08zT+P$`juCyvJ`Y1({(G&|%^ z1X0IHPT+hpNW*dpmQ1%wXrp1-CV_XNiTCHLPm^YuZU<3jhZ7{V6Qn4oV~KKx1bhF; znG(2jBi`RXa<(*k|HwH)7Vlh;y6Zf-v9gh1?*O>~MDYv7`@2Ifl4frT*#)A^#ikjv zTbh--#I;LB*o?Rgqy#U=Qo|K8eqDH_e70&Nt`gd&wqFgx`5FezX$r1AL1#i*hT7GPgNFjkkjo)s; z8b^d(fIjKOQzGnGe_8}ZpTSb}Ss8zu#B=ho9(*2z%L^i`z866%^b(drFUxq3y&@kw z6kY}4@|tP(I)&F=V+y|^WRc!`kiw&4 zgbgt9TL;Pk8Dj3aloztM4pb1bw+>VkvbPRY60){d1}S(IEWxYFgcm7#HTld|d#cNa zm5vB2R1-wSwJ`A;18Td5a~%-Qbw$k9iK!k4=lUY-+M|IJ4Mo^8uaOAKG{%xclQN;H z1l|ND-Uq3S?1)qkHW~AxTB@n z*>emC=dq@l_c+(^8ZQlTf<#t6=3`wmQ9jl+ljOs~$29Aj{iRtVp=(o2vvLV(R&J_$ z(IM01MTqHgWA!8Bi>I0Lu^r~K1(I?A}F%}OW1`H zcB|_aN#OS@@!oZdrCG)qAj&K;&Ac;RBhRJM5VIt*{E=Xnzu6!HE*I}B(G}7x;7Sl> zazxmQBniTKm3uJ!xgu~`jU{ZJj5j}DKK48E8X@}~xd4Q7WL~z3yHJ{K2`_T3ScGi} zUkjqHbs}s__K$;I{OXT+|^Re&QbLC?jY0nd~t=?XIn(ghrNXVwkF88|FCDw4e zL1c6ZMw69(xm23%6uwNz4&uu}1i!+(tl}%B+1~D}+>68cYWKRvyex%lU1NECoom;} z8V?KH;2JsH=-N%DSp#pDX7$`6WDUGk$X3s{fym}|5w=tK4iJsF6BD0*?viFZh3^)! z(YOahv3s2$gZn@fyB`y`{Q=kb9p*t0&JT&OrSiid&C^G)jK-rfzUp~QK9=m`Lbk2> z2@uXtnwNFOQ_^hT_R~T(8qc`bvo6sS&w(E5>vi+86y9)+(RkCfw_=UHc-u8{c*nJOO|#K>Pny;9zK}KW10frY4?$$} zkq8@&k3lrz6HGiBpGvdQ_)N$~<8u(jzHovJz64S1D@@$>uU%s_z5(I9??4)j z@3D-=4>EpI`%ykuXnFAy2$!ElT&dr=e*sa$uOjT6{+koOi?HkAKSWUGPb^_mcsSkn zT?ZhFmcuAHqL-IuqgVk%nTn=eu5W-!uHjW#8e$cREQ2K2IlUT)fYn7{o(QQvDb7?kPj;!5jLn3K?Iy6!m6Gu0+;=yL`gSMjUMI6a0NEfBXQjjM^hc8e?&S&+sr#3`klMHY#g?qy5&ilmF{lg819 zTiI5T#bQtVmz^RRqEG#sjUr1Vtnes%_K9SQU9xAJ$Wnbi&hsqWC6XmFvZ8x!5?LnJ z_d}QM5y=*pY3Y|e=e}IrKx?IOJmS7W)I6)jDLvi3QskUHemu^dBPzoxmS4O*Bq{bt zYogdayF*q<*cuCaZw|?ou-59^vo~b5$fbLeJd~d&GRJD%YiCHl*yYx&vW+2Y#GUr9 z`$7uDkNvxCA(50-f4eKBP|_wF%)K{-6p3DG4e}k?rCU6TC0u5?@7dw8R^)aYOM41= zojwnUol5t2tQWb%nj$BAw0eWMnf4fRyt`wgm?A5>XLH8^B6nKRvb`M#iW_2sR=Tz0 zAW?&@Eqm|mI9T-Q=2^P2;}B6<)>VJqyb*1ZaP)Uba_`P>^947kEy=i+! z{BY5S?M>U0*hh#y!;&f8L~*33i>%4&6yMQNB65^9xoktnQR2GWz?W^$(0-21;*YSZ z%6R7b(V{oo81A7p-?vCOs?5JMk7Gn1ZB>@-!V$U*muU#A`i(O|~?!AfQ6w&L=d9OVjTg7%Y=e@UZ zY!kiB(l6bpajK}Zz5Jei8mEap+0x&$XJNa@{x+>`>~`pLd3P$^juGt?d5-n+U-xL7 zF5wb?=-#$yoFQU{g|i(RXX^8y_%mnfv-G)_^?&IGis)={xmIN<*C#qh)FLjL_h4I#m3&iZUekQS7*~qCY7c2cdzHwcmU-F6i>t-und6>)7uSfq)+*X-+r_nFN9@H~ z(Cfr*DE7Hw`#}5puF&Xu6#Ejt0g#%U!n5jZ{!Auh%LU1)vgL#1G}#J4a=L8AAUQ*} zQjnY}TRBM1lC2UXXUkR%l5=FM1z zi)8Bt$#mIzL2|Kd{UDhk+aO3Tk!=_xGi4hE$)&Q5gJhO$lOVZFwrP;emTeX!m&-N} zk}G6e1j&`MErVo^Y^xxdlx-a(SIM>slDV>NgXC)2c0n>vwtbMym+cTFL)nxdxkk2Q zkSvhx6eJ_r&Ox$JHZ@2V$#w~n#j;(4W1<46BDdWu@gm{Me5%3~V^!{%RpT~RbuL9}aA8uD-Ft(0%iW46yIb?rb{ih>YRjcX zJAV3V&yQXmxB~CUu7ghOH|WehgjDtzbYZtbSN17%WA{LJ_7e19w?R+#8T4YuLT~QP V_hCmuU$X4Sj)OtrfyKo={}+PRSQG#N literal 0 HcmV?d00001 diff --git a/blib2to3/PatternGrammar.txt b/blib2to3/PatternGrammar.txt new file mode 100644 index 0000000..36bf814 --- /dev/null +++ b/blib2to3/PatternGrammar.txt @@ -0,0 +1,28 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +# A grammar to describe tree matching patterns. +# Not shown here: +# - 'TOKEN' stands for any token (leaf node) +# - 'any' stands for any node (leaf or interior) +# With 'any' we can still specify the sub-structure. + +# The start symbol is 'Matcher'. + +Matcher: Alternatives ENDMARKER + +Alternatives: Alternative ('|' Alternative)* + +Alternative: (Unit | NegatedUnit)+ + +Unit: [NAME '='] ( STRING [Repeater] + | NAME [Details] [Repeater] + | '(' Alternatives ')' [Repeater] + | '[' Alternatives ']' + ) + +NegatedUnit: 'not' (STRING | NAME [Details] | '(' Alternatives ')') + +Repeater: '*' | '+' | '{' NUMBER [',' NUMBER] '}' + +Details: '<' Alternatives '>' diff --git a/blib2to3/PatternGrammar3.6.4.final.0.pickle b/blib2to3/PatternGrammar3.6.4.final.0.pickle new file mode 100644 index 0000000000000000000000000000000000000000..e0275040acc9a6b65686525aec8035565b84651b GIT binary patch literal 2093 zcmZ{l2Y3`!5XY~i9wCH~1O$6sEU2+}QLv2{FoGWHsh8ZsMUEVAZx6%8 zP7`-BRk2tsKQUJ}iGsBxopdaoHC0a5R7+gqfvL%5O|2v(5tusEdWl&zm2<=RevTxG ztKw_pYxvemyb_p(bR3-i;50f?XMqMty?r#fSu-#)HLGb5n0Q)T#cW*?W?MP|=3p>~ zNXlj`ML|Oy=_j$k3{#M$Bamg51hQ;83CMDQEH4cfWCcK0q;9%e33X-ZFUi2Hg1Rb@ zsH;W2IySx>nBnOv5LO3a4cl~J)+BZtt z=D=OF#ZGN;S5U`ki{nAv4YejPyQ5|tX_MN(OwgwHkik+Hm_4=ky-3SCYT8@xU>{8T z>K)`r?~)-lz8f_k72s+C6DgoRFq5=^0%<)U5W63-lcPH>D)tmS#P&vGQCc(>i)M9d zB~u3tCc87Hm%M!0m~gbcOcG;n$v0)GE@x`^QOtF@#j^3Urs7B}Q;lhKdhECfW?EpT zGn^fP*njU`ZkmVS;$fM|cNbjG%%+k|KMW&_zU%eo{GMs9nL}flhQ7wKnM-Y9?kLyK z^>8Wk=qb$Ya3|$_H$T3&$2apSCA`~CcdCokGYjZBTsonDVI{o8vY+#ObE+-JUNb*^ zw`@+!n$wxSGx(m#R58J4@qW+NU33oWxx8EE@w^-pa6VBNL~G?jT`L!1zBn>nqNYp9 z0Ov9n*X3kfp?l;?z^;n+$kiHhjSab$kn40yTu;aiRKa;SqTZxi;%3xaNPuv++A3}% z;dWgLci_4+S_*e5WA4_0y@$Yil`;1bbbmCk4=B!qcppNAw-4ieL<#YzMSl$R$DM zzljR?Rtxw}nf|>N@B?W-f&kNhBKGI#;(pQNe#HaRe{)?o99Nz7_K>sY_i*C>a2A{H zKP9#JaQ`KJXZvqy{Qqcsv3QJ1`V6-hx3HaM&_l7Pc#3PEm&F^;5I%(u?&;eN@c`9Ufg(F literal 0 HcmV?d00001 diff --git a/blib2to3/README b/blib2to3/README new file mode 100644 index 0000000..2c12c62 --- /dev/null +++ b/blib2to3/README @@ -0,0 +1,7 @@ +A subset of lib2to3 taken from Python 3.7.0b2. +Commit hash: 9c17e3a1987004b8bcfbe423953aad84493a7984 + +Reasons for forking: +- consistent handling of f-strings for users of Python < 3.6.2 +- better ability to debug +- ability to Cythonize diff --git a/blib2to3/__init__.py b/blib2to3/__init__.py new file mode 100644 index 0000000..ea30561 --- /dev/null +++ b/blib2to3/__init__.py @@ -0,0 +1 @@ +#empty diff --git a/blib2to3/__init__.pyi b/blib2to3/__init__.pyi new file mode 100644 index 0000000..145e31b --- /dev/null +++ b/blib2to3/__init__.pyi @@ -0,0 +1 @@ +# Stubs for lib2to3 (Python 3.6) diff --git a/blib2to3/pgen2/__init__.py b/blib2to3/pgen2/__init__.py new file mode 100644 index 0000000..af39048 --- /dev/null +++ b/blib2to3/pgen2/__init__.py @@ -0,0 +1,4 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""The pgen2 package.""" diff --git a/blib2to3/pgen2/__init__.pyi b/blib2to3/pgen2/__init__.pyi new file mode 100644 index 0000000..1adc82a --- /dev/null +++ b/blib2to3/pgen2/__init__.pyi @@ -0,0 +1,10 @@ +# Stubs for lib2to3.pgen2 (Python 3.6) + +import os +import sys +from typing import Text, Union + +if sys.version_info >= (3, 6): + _Path = Union[Text, os.PathLike] +else: + _Path = Text diff --git a/blib2to3/pgen2/conv.py b/blib2to3/pgen2/conv.py new file mode 100644 index 0000000..ed0cac5 --- /dev/null +++ b/blib2to3/pgen2/conv.py @@ -0,0 +1,257 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Convert graminit.[ch] spit out by pgen to Python code. + +Pgen is the Python parser generator. It is useful to quickly create a +parser from a grammar file in Python's grammar notation. But I don't +want my parsers to be written in C (yet), so I'm translating the +parsing tables to Python data structures and writing a Python parse +engine. + +Note that the token numbers are constants determined by the standard +Python tokenizer. The standard token module defines these numbers and +their names (the names are not used much). The token numbers are +hardcoded into the Python tokenizer and into pgen. A Python +implementation of the Python tokenizer is also available, in the +standard tokenize module. + +On the other hand, symbol numbers (representing the grammar's +non-terminals) are assigned by pgen based on the actual grammar +input. + +Note: this module is pretty much obsolete; the pgen module generates +equivalent grammar tables directly from the Grammar.txt input file +without having to invoke the Python pgen C program. + +""" + +# Python imports +import re + +# Local imports +from pgen2 import grammar, token + + +class Converter(grammar.Grammar): + """Grammar subclass that reads classic pgen output files. + + The run() method reads the tables as produced by the pgen parser + generator, typically contained in two C files, graminit.h and + graminit.c. The other methods are for internal use only. + + See the base class for more documentation. + + """ + + def run(self, graminit_h, graminit_c): + """Load the grammar tables from the text files written by pgen.""" + self.parse_graminit_h(graminit_h) + self.parse_graminit_c(graminit_c) + self.finish_off() + + def parse_graminit_h(self, filename): + """Parse the .h file written by pgen. (Internal) + + This file is a sequence of #define statements defining the + nonterminals of the grammar as numbers. We build two tables + mapping the numbers to names and back. + + """ + try: + f = open(filename) + except OSError as err: + print("Can't open %s: %s" % (filename, err)) + return False + self.symbol2number = {} + self.number2symbol = {} + lineno = 0 + for line in f: + lineno += 1 + mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line) + if not mo and line.strip(): + print("%s(%s): can't parse %s" % (filename, lineno, + line.strip())) + else: + symbol, number = mo.groups() + number = int(number) + assert symbol not in self.symbol2number + assert number not in self.number2symbol + self.symbol2number[symbol] = number + self.number2symbol[number] = symbol + return True + + def parse_graminit_c(self, filename): + """Parse the .c file written by pgen. (Internal) + + The file looks as follows. The first two lines are always this: + + #include "pgenheaders.h" + #include "grammar.h" + + After that come four blocks: + + 1) one or more state definitions + 2) a table defining dfas + 3) a table defining labels + 4) a struct defining the grammar + + A state definition has the following form: + - one or more arc arrays, each of the form: + static arc arcs__[] = { + {, }, + ... + }; + - followed by a state array, of the form: + static state states_[] = { + {, arcs__}, + ... + }; + + """ + try: + f = open(filename) + except OSError as err: + print("Can't open %s: %s" % (filename, err)) + return False + # The code below essentially uses f's iterator-ness! + lineno = 0 + + # Expect the two #include lines + lineno, line = lineno+1, next(f) + assert line == '#include "pgenheaders.h"\n', (lineno, line) + lineno, line = lineno+1, next(f) + assert line == '#include "grammar.h"\n', (lineno, line) + + # Parse the state definitions + lineno, line = lineno+1, next(f) + allarcs = {} + states = [] + while line.startswith("static arc "): + while line.startswith("static arc "): + mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$", + line) + assert mo, (lineno, line) + n, m, k = list(map(int, mo.groups())) + arcs = [] + for _ in range(k): + lineno, line = lineno+1, next(f) + mo = re.match(r"\s+{(\d+), (\d+)},$", line) + assert mo, (lineno, line) + i, j = list(map(int, mo.groups())) + arcs.append((i, j)) + lineno, line = lineno+1, next(f) + assert line == "};\n", (lineno, line) + allarcs[(n, m)] = arcs + lineno, line = lineno+1, next(f) + mo = re.match(r"static state states_(\d+)\[(\d+)\] = {$", line) + assert mo, (lineno, line) + s, t = list(map(int, mo.groups())) + assert s == len(states), (lineno, line) + state = [] + for _ in range(t): + lineno, line = lineno+1, next(f) + mo = re.match(r"\s+{(\d+), arcs_(\d+)_(\d+)},$", line) + assert mo, (lineno, line) + k, n, m = list(map(int, mo.groups())) + arcs = allarcs[n, m] + assert k == len(arcs), (lineno, line) + state.append(arcs) + states.append(state) + lineno, line = lineno+1, next(f) + assert line == "};\n", (lineno, line) + lineno, line = lineno+1, next(f) + self.states = states + + # Parse the dfas + dfas = {} + mo = re.match(r"static dfa dfas\[(\d+)\] = {$", line) + assert mo, (lineno, line) + ndfas = int(mo.group(1)) + for i in range(ndfas): + lineno, line = lineno+1, next(f) + mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$', + line) + assert mo, (lineno, line) + symbol = mo.group(2) + number, x, y, z = list(map(int, mo.group(1, 3, 4, 5))) + assert self.symbol2number[symbol] == number, (lineno, line) + assert self.number2symbol[number] == symbol, (lineno, line) + assert x == 0, (lineno, line) + state = states[z] + assert y == len(state), (lineno, line) + lineno, line = lineno+1, next(f) + mo = re.match(r'\s+("(?:\\\d\d\d)*")},$', line) + assert mo, (lineno, line) + first = {} + rawbitset = eval(mo.group(1)) + for i, c in enumerate(rawbitset): + byte = ord(c) + for j in range(8): + if byte & (1<= os.path.getmtime(b) + + +def load_packaged_grammar(package, grammar_source): + """Normally, loads a pickled grammar by doing + pkgutil.get_data(package, pickled_grammar) + where *pickled_grammar* is computed from *grammar_source* by adding the + Python version and using a ``.pickle`` extension. + + However, if *grammar_source* is an extant file, load_grammar(grammar_source) + is called instead. This facilitates using a packaged grammar file when needed + but preserves load_grammar's automatic regeneration behavior when possible. + + """ + if os.path.isfile(grammar_source): + return load_grammar(grammar_source) + pickled_name = _generate_pickle_name(os.path.basename(grammar_source)) + data = pkgutil.get_data(package, pickled_name) + g = grammar.Grammar() + g.loads(data) + return g + + +def main(*args): + """Main program, when run as a script: produce grammar pickle files. + + Calls load_grammar for each argument, a path to a grammar text file. + """ + if not args: + args = sys.argv[1:] + logging.basicConfig(level=logging.INFO, stream=sys.stdout, + format='%(message)s') + for gt in args: + load_grammar(gt, save=True, force=True) + return True + +if __name__ == "__main__": + sys.exit(int(not main())) diff --git a/blib2to3/pgen2/driver.pyi b/blib2to3/pgen2/driver.pyi new file mode 100644 index 0000000..f098bf5 --- /dev/null +++ b/blib2to3/pgen2/driver.pyi @@ -0,0 +1,24 @@ +# Stubs for lib2to3.pgen2.driver (Python 3.6) + +import os +import sys +from typing import Any, Callable, IO, Iterable, List, Optional, Text, Tuple, Union + +from logging import Logger +from blib2to3.pytree import _Convert, _NL +from blib2to3.pgen2 import _Path +from blib2to3.pgen2.grammar import Grammar + + +class Driver: + grammar: Grammar + logger: Logger + convert: _Convert + def __init__(self, grammar: Grammar, convert: Optional[_Convert] = ..., logger: Optional[Logger] = ...) -> None: ... + def parse_tokens(self, tokens: Iterable[Any], debug: bool = ...) -> _NL: ... + def parse_stream_raw(self, stream: IO[Text], debug: bool = ...) -> _NL: ... + def parse_stream(self, stream: IO[Text], debug: bool = ...) -> _NL: ... + def parse_file(self, filename: _Path, encoding: Optional[Text] = ..., debug: bool = ...) -> _NL: ... + def parse_string(self, text: Text, debug: bool = ...) -> _NL: ... + +def load_grammar(gt: Text = ..., gp: Optional[Text] = ..., save: bool = ..., force: bool = ..., logger: Optional[Logger] = ...) -> Grammar: ... diff --git a/blib2to3/pgen2/grammar.py b/blib2to3/pgen2/grammar.py new file mode 100644 index 0000000..088c58b --- /dev/null +++ b/blib2to3/pgen2/grammar.py @@ -0,0 +1,211 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""This module defines the data structures used to represent a grammar. + +These are a bit arcane because they are derived from the data +structures used by Python's 'pgen' parser generator. + +There's also a table here mapping operators to their names in the +token module; the Python tokenize module reports all operators as the +fallback token code OP, but the parser needs the actual token code. + +""" + +# Python imports +import collections +import pickle + +# Local imports +from . import token + + +class Grammar(object): + """Pgen parsing tables conversion class. + + Once initialized, this class supplies the grammar tables for the + parsing engine implemented by parse.py. The parsing engine + accesses the instance variables directly. The class here does not + provide initialization of the tables; several subclasses exist to + do this (see the conv and pgen modules). + + The load() method reads the tables from a pickle file, which is + much faster than the other ways offered by subclasses. The pickle + file is written by calling dump() (after loading the grammar + tables using a subclass). The report() method prints a readable + representation of the tables to stdout, for debugging. + + The instance variables are as follows: + + symbol2number -- a dict mapping symbol names to numbers. Symbol + numbers are always 256 or higher, to distinguish + them from token numbers, which are between 0 and + 255 (inclusive). + + number2symbol -- a dict mapping numbers to symbol names; + these two are each other's inverse. + + states -- a list of DFAs, where each DFA is a list of + states, each state is a list of arcs, and each + arc is a (i, j) pair where i is a label and j is + a state number. The DFA number is the index into + this list. (This name is slightly confusing.) + Final states are represented by a special arc of + the form (0, j) where j is its own state number. + + dfas -- a dict mapping symbol numbers to (DFA, first) + pairs, where DFA is an item from the states list + above, and first is a set of tokens that can + begin this grammar rule (represented by a dict + whose values are always 1). + + labels -- a list of (x, y) pairs where x is either a token + number or a symbol number, and y is either None + or a string; the strings are keywords. The label + number is the index in this list; label numbers + are used to mark state transitions (arcs) in the + DFAs. + + start -- the number of the grammar's start symbol. + + keywords -- a dict mapping keyword strings to arc labels. + + tokens -- a dict mapping token numbers to arc labels. + + """ + + def __init__(self): + self.symbol2number = {} + self.number2symbol = {} + self.states = [] + self.dfas = {} + self.labels = [(0, "EMPTY")] + self.keywords = {} + self.tokens = {} + self.symbol2label = {} + self.start = 256 + + def dump(self, filename): + """Dump the grammar tables to a pickle file. + + dump() recursively changes all dict to OrderedDict, so the pickled file + is not exactly the same as what was passed in to dump(). load() uses the + pickled file to create the tables, but only changes OrderedDict to dict + at the top level; it does not recursively change OrderedDict to dict. + So, the loaded tables are different from the original tables that were + passed to load() in that some of the OrderedDict (from the pickled file) + are not changed back to dict. For parsing, this has no effect on + performance because OrderedDict uses dict's __getitem__ with nothing in + between. + """ + with open(filename, "wb") as f: + d = _make_deterministic(self.__dict__) + pickle.dump(d, f, 2) + + def load(self, filename): + """Load the grammar tables from a pickle file.""" + with open(filename, "rb") as f: + d = pickle.load(f) + self.__dict__.update(d) + + def loads(self, pkl): + """Load the grammar tables from a pickle bytes object.""" + self.__dict__.update(pickle.loads(pkl)) + + def copy(self): + """ + Copy the grammar. + """ + new = self.__class__() + for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords", + "tokens", "symbol2label"): + setattr(new, dict_attr, getattr(self, dict_attr).copy()) + new.labels = self.labels[:] + new.states = self.states[:] + new.start = self.start + return new + + def report(self): + """Dump the grammar tables to standard output, for debugging.""" + from pprint import pprint + print("s2n") + pprint(self.symbol2number) + print("n2s") + pprint(self.number2symbol) + print("states") + pprint(self.states) + print("dfas") + pprint(self.dfas) + print("labels") + pprint(self.labels) + print("start", self.start) + + +def _make_deterministic(top): + if isinstance(top, dict): + return collections.OrderedDict( + sorted(((k, _make_deterministic(v)) for k, v in top.items()))) + if isinstance(top, list): + return [_make_deterministic(e) for e in top] + if isinstance(top, tuple): + return tuple(_make_deterministic(e) for e in top) + return top + + +# Map from operator to number (since tokenize doesn't do this) + +opmap_raw = """ +( LPAR +) RPAR +[ LSQB +] RSQB +: COLON +, COMMA +; SEMI ++ PLUS +- MINUS +* STAR +/ SLASH +| VBAR +& AMPER +< LESS +> GREATER += EQUAL +. DOT +% PERCENT +` BACKQUOTE +{ LBRACE +} RBRACE +@ AT +@= ATEQUAL +== EQEQUAL +!= NOTEQUAL +<> NOTEQUAL +<= LESSEQUAL +>= GREATEREQUAL +~ TILDE +^ CIRCUMFLEX +<< LEFTSHIFT +>> RIGHTSHIFT +** DOUBLESTAR ++= PLUSEQUAL +-= MINEQUAL +*= STAREQUAL +/= SLASHEQUAL +%= PERCENTEQUAL +&= AMPEREQUAL +|= VBAREQUAL +^= CIRCUMFLEXEQUAL +<<= LEFTSHIFTEQUAL +>>= RIGHTSHIFTEQUAL +**= DOUBLESTAREQUAL +// DOUBLESLASH +//= DOUBLESLASHEQUAL +-> RARROW +""" + +opmap = {} +for line in opmap_raw.splitlines(): + if line: + op, name = line.split() + opmap[op] = getattr(token, name) diff --git a/blib2to3/pgen2/grammar.pyi b/blib2to3/pgen2/grammar.pyi new file mode 100644 index 0000000..353086d --- /dev/null +++ b/blib2to3/pgen2/grammar.pyi @@ -0,0 +1,29 @@ +# Stubs for lib2to3.pgen2.grammar (Python 3.6) + +from blib2to3.pgen2 import _Path + +from typing import Any, Dict, List, Optional, Text, Tuple, TypeVar + +_P = TypeVar('_P') +_Label = Tuple[int, Optional[Text]] +_DFA = List[List[Tuple[int, int]]] +_DFAS = Tuple[_DFA, Dict[int, int]] + +class Grammar: + symbol2number: Dict[Text, int] + number2symbol: Dict[int, Text] + states: List[_DFA] + dfas: Dict[int, _DFAS] + labels: List[_Label] + keywords: Dict[Text, int] + tokens: Dict[int, int] + symbol2label: Dict[Text, int] + start: int + def __init__(self) -> None: ... + def dump(self, filename: _Path) -> None: ... + def load(self, filename: _Path) -> None: ... + def copy(self: _P) -> _P: ... + def report(self) -> None: ... + +opmap_raw: Text +opmap: Dict[Text, Text] diff --git a/blib2to3/pgen2/literals.py b/blib2to3/pgen2/literals.py new file mode 100644 index 0000000..b9b63e6 --- /dev/null +++ b/blib2to3/pgen2/literals.py @@ -0,0 +1,60 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Safely evaluate Python string literals without using eval().""" + +import re + +simple_escapes = {"a": "\a", + "b": "\b", + "f": "\f", + "n": "\n", + "r": "\r", + "t": "\t", + "v": "\v", + "'": "'", + '"': '"', + "\\": "\\"} + +def escape(m): + all, tail = m.group(0, 1) + assert all.startswith("\\") + esc = simple_escapes.get(tail) + if esc is not None: + return esc + if tail.startswith("x"): + hexes = tail[1:] + if len(hexes) < 2: + raise ValueError("invalid hex string escape ('\\%s')" % tail) + try: + i = int(hexes, 16) + except ValueError: + raise ValueError("invalid hex string escape ('\\%s')" % tail) from None + else: + try: + i = int(tail, 8) + except ValueError: + raise ValueError("invalid octal string escape ('\\%s')" % tail) from None + return chr(i) + +def evalString(s): + assert s.startswith("'") or s.startswith('"'), repr(s[:1]) + q = s[0] + if s[:3] == q*3: + q = q*3 + assert s.endswith(q), repr(s[-len(q):]) + assert len(s) >= 2*len(q) + s = s[len(q):-len(q)] + return re.sub(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})", escape, s) + +def test(): + for i in range(256): + c = chr(i) + s = repr(c) + e = evalString(s) + if e != c: + print(i, c, s, e) + + +if __name__ == "__main__": + test() diff --git a/blib2to3/pgen2/literals.pyi b/blib2to3/pgen2/literals.pyi new file mode 100644 index 0000000..8719500 --- /dev/null +++ b/blib2to3/pgen2/literals.pyi @@ -0,0 +1,9 @@ +# Stubs for lib2to3.pgen2.literals (Python 3.6) + +from typing import Dict, Match, Text + +simple_escapes: Dict[Text, Text] + +def escape(m: Match) -> Text: ... +def evalString(s: Text) -> Text: ... +def test() -> None: ... diff --git a/blib2to3/pgen2/parse.py b/blib2to3/pgen2/parse.py new file mode 100644 index 0000000..6bebdbb --- /dev/null +++ b/blib2to3/pgen2/parse.py @@ -0,0 +1,201 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Parser engine for the grammar tables generated by pgen. + +The grammar table must be loaded first. + +See Parser/parser.c in the Python distribution for additional info on +how this parsing engine works. + +""" + +# Local imports +from . import token + +class ParseError(Exception): + """Exception to signal the parser is stuck.""" + + def __init__(self, msg, type, value, context): + Exception.__init__(self, "%s: type=%r, value=%r, context=%r" % + (msg, type, value, context)) + self.msg = msg + self.type = type + self.value = value + self.context = context + +class Parser(object): + """Parser engine. + + The proper usage sequence is: + + p = Parser(grammar, [converter]) # create instance + p.setup([start]) # prepare for parsing + : + if p.addtoken(...): # parse a token; may raise ParseError + break + root = p.rootnode # root of abstract syntax tree + + A Parser instance may be reused by calling setup() repeatedly. + + A Parser instance contains state pertaining to the current token + sequence, and should not be used concurrently by different threads + to parse separate token sequences. + + See driver.py for how to get input tokens by tokenizing a file or + string. + + Parsing is complete when addtoken() returns True; the root of the + abstract syntax tree can then be retrieved from the rootnode + instance variable. When a syntax error occurs, addtoken() raises + the ParseError exception. There is no error recovery; the parser + cannot be used after a syntax error was reported (but it can be + reinitialized by calling setup()). + + """ + + def __init__(self, grammar, convert=None): + """Constructor. + + The grammar argument is a grammar.Grammar instance; see the + grammar module for more information. + + The parser is not ready yet for parsing; you must call the + setup() method to get it started. + + The optional convert argument is a function mapping concrete + syntax tree nodes to abstract syntax tree nodes. If not + given, no conversion is done and the syntax tree produced is + the concrete syntax tree. If given, it must be a function of + two arguments, the first being the grammar (a grammar.Grammar + instance), and the second being the concrete syntax tree node + to be converted. The syntax tree is converted from the bottom + up. + + A concrete syntax tree node is a (type, value, context, nodes) + tuple, where type is the node type (a token or symbol number), + value is None for symbols and a string for tokens, context is + None or an opaque value used for error reporting (typically a + (lineno, offset) pair), and nodes is a list of children for + symbols, and None for tokens. + + An abstract syntax tree node may be anything; this is entirely + up to the converter function. + + """ + self.grammar = grammar + self.convert = convert or (lambda grammar, node: node) + + def setup(self, start=None): + """Prepare for parsing. + + This *must* be called before starting to parse. + + The optional argument is an alternative start symbol; it + defaults to the grammar's start symbol. + + You can use a Parser instance to parse any number of programs; + each time you call setup() the parser is reset to an initial + state determined by the (implicit or explicit) start symbol. + + """ + if start is None: + start = self.grammar.start + # Each stack entry is a tuple: (dfa, state, node). + # A node is a tuple: (type, value, context, children), + # where children is a list of nodes or None, and context may be None. + newnode = (start, None, None, []) + stackentry = (self.grammar.dfas[start], 0, newnode) + self.stack = [stackentry] + self.rootnode = None + self.used_names = set() # Aliased to self.rootnode.used_names in pop() + + def addtoken(self, type, value, context): + """Add a token; return True iff this is the end of the program.""" + # Map from token to label + ilabel = self.classify(type, value, context) + # Loop until the token is shifted; may raise exceptions + while True: + dfa, state, node = self.stack[-1] + states, first = dfa + arcs = states[state] + # Look for a state with this label + for i, newstate in arcs: + t, v = self.grammar.labels[i] + if ilabel == i: + # Look it up in the list of labels + assert t < 256 + # Shift a token; we're done with it + self.shift(type, value, newstate, context) + # Pop while we are in an accept-only state + state = newstate + while states[state] == [(0, state)]: + self.pop() + if not self.stack: + # Done parsing! + return True + dfa, state, node = self.stack[-1] + states, first = dfa + # Done with this token + return False + elif t >= 256: + # See if it's a symbol and if we're in its first set + itsdfa = self.grammar.dfas[t] + itsstates, itsfirst = itsdfa + if ilabel in itsfirst: + # Push a symbol + self.push(t, self.grammar.dfas[t], newstate, context) + break # To continue the outer while loop + else: + if (0, state) in arcs: + # An accepting state, pop it and try something else + self.pop() + if not self.stack: + # Done parsing, but another token is input + raise ParseError("too much input", + type, value, context) + else: + # No success finding a transition + raise ParseError("bad input", type, value, context) + + def classify(self, type, value, context): + """Turn a token into a label. (Internal)""" + if type == token.NAME: + # Keep a listing of all used names + self.used_names.add(value) + # Check for reserved words + ilabel = self.grammar.keywords.get(value) + if ilabel is not None: + return ilabel + ilabel = self.grammar.tokens.get(type) + if ilabel is None: + raise ParseError("bad token", type, value, context) + return ilabel + + def shift(self, type, value, newstate, context): + """Shift a token. (Internal)""" + dfa, state, node = self.stack[-1] + newnode = (type, value, context, None) + newnode = self.convert(self.grammar, newnode) + if newnode is not None: + node[-1].append(newnode) + self.stack[-1] = (dfa, newstate, node) + + def push(self, type, newdfa, newstate, context): + """Push a nonterminal. (Internal)""" + dfa, state, node = self.stack[-1] + newnode = (type, None, context, []) + self.stack[-1] = (dfa, newstate, node) + self.stack.append((newdfa, 0, newnode)) + + def pop(self): + """Pop a nonterminal. (Internal)""" + popdfa, popstate, popnode = self.stack.pop() + newnode = self.convert(self.grammar, popnode) + if newnode is not None: + if self.stack: + dfa, state, node = self.stack[-1] + node[-1].append(newnode) + else: + self.rootnode = newnode + self.rootnode.used_names = self.used_names diff --git a/blib2to3/pgen2/parse.pyi b/blib2to3/pgen2/parse.pyi new file mode 100644 index 0000000..cbcf941 --- /dev/null +++ b/blib2to3/pgen2/parse.pyi @@ -0,0 +1,29 @@ +# Stubs for lib2to3.pgen2.parse (Python 3.6) + +from typing import Any, Dict, List, Optional, Sequence, Set, Text, Tuple + +from blib2to3.pgen2.grammar import Grammar, _DFAS +from blib2to3.pytree import _NL, _Convert, _RawNode + +_Context = Sequence[Any] + +class ParseError(Exception): + msg: Text + type: int + value: Optional[Text] + context: _Context + def __init__(self, msg: Text, type: int, value: Optional[Text], context: _Context) -> None: ... + +class Parser: + grammar: Grammar + convert: _Convert + stack: List[Tuple[_DFAS, int, _RawNode]] + rootnode: Optional[_NL] + used_names: Set[Text] + def __init__(self, grammar: Grammar, convert: Optional[_Convert] = ...) -> None: ... + def setup(self, start: Optional[int] = ...) -> None: ... + def addtoken(self, type: int, value: Optional[Text], context: _Context) -> bool: ... + def classify(self, type: int, value: Optional[Text], context: _Context) -> int: ... + def shift(self, type: int, value: Optional[Text], newstate: int, context: _Context) -> None: ... + def push(self, type: int, newdfa: _DFAS, newstate: int, context: _Context) -> None: ... + def pop(self) -> None: ... diff --git a/blib2to3/pgen2/pgen.py b/blib2to3/pgen2/pgen.py new file mode 100644 index 0000000..b0cbd16 --- /dev/null +++ b/blib2to3/pgen2/pgen.py @@ -0,0 +1,386 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +# Pgen imports +from . import grammar, token, tokenize + +class PgenGrammar(grammar.Grammar): + pass + +class ParserGenerator(object): + + def __init__(self, filename, stream=None): + close_stream = None + if stream is None: + stream = open(filename) + close_stream = stream.close + self.filename = filename + self.stream = stream + self.generator = tokenize.generate_tokens(stream.readline) + self.gettoken() # Initialize lookahead + self.dfas, self.startsymbol = self.parse() + if close_stream is not None: + close_stream() + self.first = {} # map from symbol name to set of tokens + self.addfirstsets() + + def make_grammar(self): + c = PgenGrammar() + names = list(self.dfas.keys()) + names.sort() + names.remove(self.startsymbol) + names.insert(0, self.startsymbol) + for name in names: + i = 256 + len(c.symbol2number) + c.symbol2number[name] = i + c.number2symbol[i] = name + for name in names: + dfa = self.dfas[name] + states = [] + for state in dfa: + arcs = [] + for label, next in sorted(state.arcs.items()): + arcs.append((self.make_label(c, label), dfa.index(next))) + if state.isfinal: + arcs.append((0, dfa.index(state))) + states.append(arcs) + c.states.append(states) + c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name)) + c.start = c.symbol2number[self.startsymbol] + return c + + def make_first(self, c, name): + rawfirst = self.first[name] + first = {} + for label in sorted(rawfirst): + ilabel = self.make_label(c, label) + ##assert ilabel not in first # XXX failed on <> ... != + first[ilabel] = 1 + return first + + def make_label(self, c, label): + # XXX Maybe this should be a method on a subclass of converter? + ilabel = len(c.labels) + if label[0].isalpha(): + # Either a symbol name or a named token + if label in c.symbol2number: + # A symbol name (a non-terminal) + if label in c.symbol2label: + return c.symbol2label[label] + else: + c.labels.append((c.symbol2number[label], None)) + c.symbol2label[label] = ilabel + return ilabel + else: + # A named token (NAME, NUMBER, STRING) + itoken = getattr(token, label, None) + assert isinstance(itoken, int), label + assert itoken in token.tok_name, label + if itoken in c.tokens: + return c.tokens[itoken] + else: + c.labels.append((itoken, None)) + c.tokens[itoken] = ilabel + return ilabel + else: + # Either a keyword or an operator + assert label[0] in ('"', "'"), label + value = eval(label) + if value[0].isalpha(): + # A keyword + if value in c.keywords: + return c.keywords[value] + else: + c.labels.append((token.NAME, value)) + c.keywords[value] = ilabel + return ilabel + else: + # An operator (any non-numeric token) + itoken = grammar.opmap[value] # Fails if unknown token + if itoken in c.tokens: + return c.tokens[itoken] + else: + c.labels.append((itoken, None)) + c.tokens[itoken] = ilabel + return ilabel + + def addfirstsets(self): + names = list(self.dfas.keys()) + names.sort() + for name in names: + if name not in self.first: + self.calcfirst(name) + #print name, self.first[name].keys() + + def calcfirst(self, name): + dfa = self.dfas[name] + self.first[name] = None # dummy to detect left recursion + state = dfa[0] + totalset = {} + overlapcheck = {} + for label, next in state.arcs.items(): + if label in self.dfas: + if label in self.first: + fset = self.first[label] + if fset is None: + raise ValueError("recursion for rule %r" % name) + else: + self.calcfirst(label) + fset = self.first[label] + totalset.update(fset) + overlapcheck[label] = fset + else: + totalset[label] = 1 + overlapcheck[label] = {label: 1} + inverse = {} + for label, itsfirst in overlapcheck.items(): + for symbol in itsfirst: + if symbol in inverse: + raise ValueError("rule %s is ambiguous; %s is in the" + " first sets of %s as well as %s" % + (name, symbol, label, inverse[symbol])) + inverse[symbol] = label + self.first[name] = totalset + + def parse(self): + dfas = {} + startsymbol = None + # MSTART: (NEWLINE | RULE)* ENDMARKER + while self.type != token.ENDMARKER: + while self.type == token.NEWLINE: + self.gettoken() + # RULE: NAME ':' RHS NEWLINE + name = self.expect(token.NAME) + self.expect(token.OP, ":") + a, z = self.parse_rhs() + self.expect(token.NEWLINE) + #self.dump_nfa(name, a, z) + dfa = self.make_dfa(a, z) + #self.dump_dfa(name, dfa) + oldlen = len(dfa) + self.simplify_dfa(dfa) + newlen = len(dfa) + dfas[name] = dfa + #print name, oldlen, newlen + if startsymbol is None: + startsymbol = name + return dfas, startsymbol + + def make_dfa(self, start, finish): + # To turn an NFA into a DFA, we define the states of the DFA + # to correspond to *sets* of states of the NFA. Then do some + # state reduction. Let's represent sets as dicts with 1 for + # values. + assert isinstance(start, NFAState) + assert isinstance(finish, NFAState) + def closure(state): + base = {} + addclosure(state, base) + return base + def addclosure(state, base): + assert isinstance(state, NFAState) + if state in base: + return + base[state] = 1 + for label, next in state.arcs: + if label is None: + addclosure(next, base) + states = [DFAState(closure(start), finish)] + for state in states: # NB states grows while we're iterating + arcs = {} + for nfastate in state.nfaset: + for label, next in nfastate.arcs: + if label is not None: + addclosure(next, arcs.setdefault(label, {})) + for label, nfaset in sorted(arcs.items()): + for st in states: + if st.nfaset == nfaset: + break + else: + st = DFAState(nfaset, finish) + states.append(st) + state.addarc(st, label) + return states # List of DFAState instances; first one is start + + def dump_nfa(self, name, start, finish): + print("Dump of NFA for", name) + todo = [start] + for i, state in enumerate(todo): + print(" State", i, state is finish and "(final)" or "") + for label, next in state.arcs: + if next in todo: + j = todo.index(next) + else: + j = len(todo) + todo.append(next) + if label is None: + print(" -> %d" % j) + else: + print(" %s -> %d" % (label, j)) + + def dump_dfa(self, name, dfa): + print("Dump of DFA for", name) + for i, state in enumerate(dfa): + print(" State", i, state.isfinal and "(final)" or "") + for label, next in sorted(state.arcs.items()): + print(" %s -> %d" % (label, dfa.index(next))) + + def simplify_dfa(self, dfa): + # This is not theoretically optimal, but works well enough. + # Algorithm: repeatedly look for two states that have the same + # set of arcs (same labels pointing to the same nodes) and + # unify them, until things stop changing. + + # dfa is a list of DFAState instances + changes = True + while changes: + changes = False + for i, state_i in enumerate(dfa): + for j in range(i+1, len(dfa)): + state_j = dfa[j] + if state_i == state_j: + #print " unify", i, j + del dfa[j] + for state in dfa: + state.unifystate(state_j, state_i) + changes = True + break + + def parse_rhs(self): + # RHS: ALT ('|' ALT)* + a, z = self.parse_alt() + if self.value != "|": + return a, z + else: + aa = NFAState() + zz = NFAState() + aa.addarc(a) + z.addarc(zz) + while self.value == "|": + self.gettoken() + a, z = self.parse_alt() + aa.addarc(a) + z.addarc(zz) + return aa, zz + + def parse_alt(self): + # ALT: ITEM+ + a, b = self.parse_item() + while (self.value in ("(", "[") or + self.type in (token.NAME, token.STRING)): + c, d = self.parse_item() + b.addarc(c) + b = d + return a, b + + def parse_item(self): + # ITEM: '[' RHS ']' | ATOM ['+' | '*'] + if self.value == "[": + self.gettoken() + a, z = self.parse_rhs() + self.expect(token.OP, "]") + a.addarc(z) + return a, z + else: + a, z = self.parse_atom() + value = self.value + if value not in ("+", "*"): + return a, z + self.gettoken() + z.addarc(a) + if value == "+": + return a, z + else: + return a, a + + def parse_atom(self): + # ATOM: '(' RHS ')' | NAME | STRING + if self.value == "(": + self.gettoken() + a, z = self.parse_rhs() + self.expect(token.OP, ")") + return a, z + elif self.type in (token.NAME, token.STRING): + a = NFAState() + z = NFAState() + a.addarc(z, self.value) + self.gettoken() + return a, z + else: + self.raise_error("expected (...) or NAME or STRING, got %s/%s", + self.type, self.value) + + def expect(self, type, value=None): + if self.type != type or (value is not None and self.value != value): + self.raise_error("expected %s/%s, got %s/%s", + type, value, self.type, self.value) + value = self.value + self.gettoken() + return value + + def gettoken(self): + tup = next(self.generator) + while tup[0] in (tokenize.COMMENT, tokenize.NL): + tup = next(self.generator) + self.type, self.value, self.begin, self.end, self.line = tup + #print token.tok_name[self.type], repr(self.value) + + def raise_error(self, msg, *args): + if args: + try: + msg = msg % args + except: + msg = " ".join([msg] + list(map(str, args))) + raise SyntaxError(msg, (self.filename, self.end[0], + self.end[1], self.line)) + +class NFAState(object): + + def __init__(self): + self.arcs = [] # list of (label, NFAState) pairs + + def addarc(self, next, label=None): + assert label is None or isinstance(label, str) + assert isinstance(next, NFAState) + self.arcs.append((label, next)) + +class DFAState(object): + + def __init__(self, nfaset, final): + assert isinstance(nfaset, dict) + assert isinstance(next(iter(nfaset)), NFAState) + assert isinstance(final, NFAState) + self.nfaset = nfaset + self.isfinal = final in nfaset + self.arcs = {} # map from label to DFAState + + def addarc(self, next, label): + assert isinstance(label, str) + assert label not in self.arcs + assert isinstance(next, DFAState) + self.arcs[label] = next + + def unifystate(self, old, new): + for label, next in self.arcs.items(): + if next is old: + self.arcs[label] = new + + def __eq__(self, other): + # Equality test -- ignore the nfaset instance variable + assert isinstance(other, DFAState) + if self.isfinal != other.isfinal: + return False + # Can't just return self.arcs == other.arcs, because that + # would invoke this method recursively, with cycles... + if len(self.arcs) != len(other.arcs): + return False + for label, next in self.arcs.items(): + if next is not other.arcs.get(label): + return False + return True + + __hash__ = None # For Py3 compatibility. + +def generate_grammar(filename="Grammar.txt"): + p = ParserGenerator(filename) + return p.make_grammar() diff --git a/blib2to3/pgen2/pgen.pyi b/blib2to3/pgen2/pgen.pyi new file mode 100644 index 0000000..1529ad0 --- /dev/null +++ b/blib2to3/pgen2/pgen.pyi @@ -0,0 +1,49 @@ +# Stubs for lib2to3.pgen2.pgen (Python 3.6) + +from typing import Any, Dict, IO, Iterable, Iterator, List, Optional, Text, Tuple +from mypy_extensions import NoReturn + +from blib2to3.pgen2 import _Path, grammar +from blib2to3.pgen2.tokenize import _TokenInfo + +class PgenGrammar(grammar.Grammar): ... + +class ParserGenerator: + filename: _Path + stream: IO[Text] + generator: Iterator[_TokenInfo] + first: Dict[Text, Dict[Text, int]] + def __init__(self, filename: _Path, stream: Optional[IO[Text]] = ...) -> None: ... + def make_grammar(self) -> PgenGrammar: ... + def make_first(self, c: PgenGrammar, name: Text) -> Dict[int, int]: ... + def make_label(self, c: PgenGrammar, label: Text) -> int: ... + def addfirstsets(self) -> None: ... + def calcfirst(self, name: Text) -> None: ... + def parse(self) -> Tuple[Dict[Text, List[DFAState]], Text]: ... + def make_dfa(self, start: NFAState, finish: NFAState) -> List[DFAState]: ... + def dump_nfa(self, name: Text, start: NFAState, finish: NFAState) -> List[DFAState]: ... + def dump_dfa(self, name: Text, dfa: Iterable[DFAState]) -> None: ... + def simplify_dfa(self, dfa: List[DFAState]) -> None: ... + def parse_rhs(self) -> Tuple[NFAState, NFAState]: ... + def parse_alt(self) -> Tuple[NFAState, NFAState]: ... + def parse_item(self) -> Tuple[NFAState, NFAState]: ... + def parse_atom(self) -> Tuple[NFAState, NFAState]: ... + def expect(self, type: int, value: Optional[Any] = ...) -> Text: ... + def gettoken(self) -> None: ... + def raise_error(self, msg: str, *args: Any) -> NoReturn: ... + +class NFAState: + arcs: List[Tuple[Optional[Text], NFAState]] + def __init__(self) -> None: ... + def addarc(self, next: NFAState, label: Optional[Text] = ...) -> None: ... + +class DFAState: + nfaset: Dict[NFAState, Any] + isfinal: bool + arcs: Dict[Text, DFAState] + def __init__(self, nfaset: Dict[NFAState, Any], final: NFAState) -> None: ... + def addarc(self, next: DFAState, label: Text) -> None: ... + def unifystate(self, old: DFAState, new: DFAState) -> None: ... + def __eq__(self, other: Any) -> bool: ... + +def generate_grammar(filename: _Path = ...) -> PgenGrammar: ... diff --git a/blib2to3/pgen2/token.py b/blib2to3/pgen2/token.py new file mode 100755 index 0000000..7599396 --- /dev/null +++ b/blib2to3/pgen2/token.py @@ -0,0 +1,83 @@ +#! /usr/bin/env python3 + +"""Token constants (from "token.h").""" + +# Taken from Python (r53757) and modified to include some tokens +# originally monkeypatched in by pgen2.tokenize + +#--start constants-- +ENDMARKER = 0 +NAME = 1 +NUMBER = 2 +STRING = 3 +NEWLINE = 4 +INDENT = 5 +DEDENT = 6 +LPAR = 7 +RPAR = 8 +LSQB = 9 +RSQB = 10 +COLON = 11 +COMMA = 12 +SEMI = 13 +PLUS = 14 +MINUS = 15 +STAR = 16 +SLASH = 17 +VBAR = 18 +AMPER = 19 +LESS = 20 +GREATER = 21 +EQUAL = 22 +DOT = 23 +PERCENT = 24 +BACKQUOTE = 25 +LBRACE = 26 +RBRACE = 27 +EQEQUAL = 28 +NOTEQUAL = 29 +LESSEQUAL = 30 +GREATEREQUAL = 31 +TILDE = 32 +CIRCUMFLEX = 33 +LEFTSHIFT = 34 +RIGHTSHIFT = 35 +DOUBLESTAR = 36 +PLUSEQUAL = 37 +MINEQUAL = 38 +STAREQUAL = 39 +SLASHEQUAL = 40 +PERCENTEQUAL = 41 +AMPEREQUAL = 42 +VBAREQUAL = 43 +CIRCUMFLEXEQUAL = 44 +LEFTSHIFTEQUAL = 45 +RIGHTSHIFTEQUAL = 46 +DOUBLESTAREQUAL = 47 +DOUBLESLASH = 48 +DOUBLESLASHEQUAL = 49 +AT = 50 +ATEQUAL = 51 +OP = 52 +COMMENT = 53 +NL = 54 +RARROW = 55 +ERRORTOKEN = 56 +N_TOKENS = 57 +NT_OFFSET = 256 +#--end constants-- + +tok_name = {} +for _name, _value in list(globals().items()): + if type(_value) is type(0): + tok_name[_value] = _name + + +def ISTERMINAL(x): + return x < NT_OFFSET + +def ISNONTERMINAL(x): + return x >= NT_OFFSET + +def ISEOF(x): + return x == ENDMARKER diff --git a/blib2to3/pgen2/token.pyi b/blib2to3/pgen2/token.pyi new file mode 100644 index 0000000..c256af8 --- /dev/null +++ b/blib2to3/pgen2/token.pyi @@ -0,0 +1,73 @@ +# Stubs for lib2to3.pgen2.token (Python 3.6) + +import sys +from typing import Dict, Text + +ENDMARKER: int +NAME: int +NUMBER: int +STRING: int +NEWLINE: int +INDENT: int +DEDENT: int +LPAR: int +RPAR: int +LSQB: int +RSQB: int +COLON: int +COMMA: int +SEMI: int +PLUS: int +MINUS: int +STAR: int +SLASH: int +VBAR: int +AMPER: int +LESS: int +GREATER: int +EQUAL: int +DOT: int +PERCENT: int +BACKQUOTE: int +LBRACE: int +RBRACE: int +EQEQUAL: int +NOTEQUAL: int +LESSEQUAL: int +GREATEREQUAL: int +TILDE: int +CIRCUMFLEX: int +LEFTSHIFT: int +RIGHTSHIFT: int +DOUBLESTAR: int +PLUSEQUAL: int +MINEQUAL: int +STAREQUAL: int +SLASHEQUAL: int +PERCENTEQUAL: int +AMPEREQUAL: int +VBAREQUAL: int +CIRCUMFLEXEQUAL: int +LEFTSHIFTEQUAL: int +RIGHTSHIFTEQUAL: int +DOUBLESTAREQUAL: int +DOUBLESLASH: int +DOUBLESLASHEQUAL: int +OP: int +COMMENT: int +NL: int +if sys.version_info >= (3,): + RARROW: int +if sys.version_info >= (3, 5): + AT: int + ATEQUAL: int + AWAIT: int + ASYNC: int +ERRORTOKEN: int +N_TOKENS: int +NT_OFFSET: int +tok_name: Dict[int, Text] + +def ISTERMINAL(x: int) -> bool: ... +def ISNONTERMINAL(x: int) -> bool: ... +def ISEOF(x: int) -> bool: ... diff --git a/blib2to3/pgen2/tokenize.py b/blib2to3/pgen2/tokenize.py new file mode 100644 index 0000000..14560e4 --- /dev/null +++ b/blib2to3/pgen2/tokenize.py @@ -0,0 +1,518 @@ +# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation. +# All rights reserved. + +"""Tokenization help for Python programs. + +generate_tokens(readline) is a generator that breaks a stream of +text into Python tokens. It accepts a readline-like method which is called +repeatedly to get the next line of input (or "" for EOF). It generates +5-tuples with these members: + + the token type (see token.py) + the token (a string) + the starting (row, column) indices of the token (a 2-tuple of ints) + the ending (row, column) indices of the token (a 2-tuple of ints) + the original line (string) + +It is designed to match the working of the Python tokenizer exactly, except +that it produces COMMENT tokens for comments and gives type OP for all +operators + +Older entry points + tokenize_loop(readline, tokeneater) + tokenize(readline, tokeneater=printtoken) +are the same, except instead of generating tokens, tokeneater is a callback +function to which the 5 fields described above are passed as 5 arguments, +each time a new token is found.""" + +__author__ = 'Ka-Ping Yee ' +__credits__ = \ + 'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro' + +import string, re +from codecs import BOM_UTF8, lookup +from lib2to3.pgen2.token import * + +from . import token +__all__ = [x for x in dir(token) if x[0] != '_'] + ["tokenize", + "generate_tokens", "untokenize"] +del token + +try: + bytes +except NameError: + # Support bytes type in Python <= 2.5, so 2to3 turns itself into + # valid Python 3 code. + bytes = str + +def group(*choices): return '(' + '|'.join(choices) + ')' +def any(*choices): return group(*choices) + '*' +def maybe(*choices): return group(*choices) + '?' + +Whitespace = r'[ \f\t]*' +Comment = r'#[^\r\n]*' +Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment) +Name = r'[a-zA-Z_]\w*' + +Binnumber = r'0[bB]_?[01]+(?:_[01]+)*' +Hexnumber = r'0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?' +Octnumber = r'0[oO]?_?[0-7]+(?:_[0-7]+)*[lL]?' +Decnumber = group(r'[1-9]\d*(?:_\d+)*[lL]?', '0[lL]?') +Intnumber = group(Binnumber, Hexnumber, Octnumber, Decnumber) +Exponent = r'[eE][-+]?\d+(?:_\d+)*' +Pointfloat = group(r'\d+(?:_\d+)*\.(?:\d+(?:_\d+)*)?', r'\.\d+(?:_\d+)*') + maybe(Exponent) +Expfloat = r'\d+(?:_\d+)*' + Exponent +Floatnumber = group(Pointfloat, Expfloat) +Imagnumber = group(r'\d+(?:_\d+)*[jJ]', Floatnumber + r'[jJ]') +Number = group(Imagnumber, Floatnumber, Intnumber) + +# Tail end of ' string. +Single = r"[^'\\]*(?:\\.[^'\\]*)*'" +# Tail end of " string. +Double = r'[^"\\]*(?:\\.[^"\\]*)*"' +# Tail end of ''' string. +Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''" +# Tail end of """ string. +Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""' +_litprefix = r"(?:[uUrRbBfF]|[rR][bB]|[bBuU][rR])?" +Triple = group(_litprefix + "'''", _litprefix + '"""') +# Single-line ' or " string. +String = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'", + _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"') + +# Because of leftmost-then-longest match semantics, be sure to put the +# longest operators first (e.g., if = came before ==, == would get +# recognized as two instances of =). +Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=", + r"//=?", r"->", + r"[+\-*/%&@|^=<>]=?", + r"~") + +Bracket = '[][(){}]' +Special = group(r'\r?\n', r'[:;.,`@]') +Funny = group(Operator, Bracket, Special) + +PlainToken = group(Number, Funny, String, Name) +Token = Ignore + PlainToken + +# First (or only) line of ' or " string. +ContStr = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + + group("'", r'\\\r?\n'), + _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + + group('"', r'\\\r?\n')) +PseudoExtras = group(r'\\\r?\n', Comment, Triple) +PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name) + +tokenprog, pseudoprog, single3prog, double3prog = list(map( + re.compile, (Token, PseudoToken, Single3, Double3))) +endprogs = {"'": re.compile(Single), '"': re.compile(Double), + "'''": single3prog, '"""': double3prog, + "r'''": single3prog, 'r"""': double3prog, + "u'''": single3prog, 'u"""': double3prog, + "b'''": single3prog, 'b"""': double3prog, + "f'''": single3prog, 'f"""': double3prog, + "ur'''": single3prog, 'ur"""': double3prog, + "br'''": single3prog, 'br"""': double3prog, + "rb'''": single3prog, 'rb"""': double3prog, + "R'''": single3prog, 'R"""': double3prog, + "U'''": single3prog, 'U"""': double3prog, + "B'''": single3prog, 'B"""': double3prog, + "F'''": single3prog, 'F"""': double3prog, + "uR'''": single3prog, 'uR"""': double3prog, + "Ur'''": single3prog, 'Ur"""': double3prog, + "UR'''": single3prog, 'UR"""': double3prog, + "bR'''": single3prog, 'bR"""': double3prog, + "Br'''": single3prog, 'Br"""': double3prog, + "BR'''": single3prog, 'BR"""': double3prog, + "rB'''": single3prog, 'rB"""': double3prog, + "Rb'''": single3prog, 'Rb"""': double3prog, + "RB'''": single3prog, 'RB"""': double3prog, + 'r': None, 'R': None, + 'u': None, 'U': None, + 'f': None, 'F': None, + 'b': None, 'B': None} + +triple_quoted = {} +for t in ("'''", '"""', + "r'''", 'r"""', "R'''", 'R"""', + "u'''", 'u"""', "U'''", 'U"""', + "b'''", 'b"""', "B'''", 'B"""', + "f'''", 'f"""', "F'''", 'F"""', + "ur'''", 'ur"""', "Ur'''", 'Ur"""', + "uR'''", 'uR"""', "UR'''", 'UR"""', + "br'''", 'br"""', "Br'''", 'Br"""', + "bR'''", 'bR"""', "BR'''", 'BR"""', + "rb'''", 'rb"""', "Rb'''", 'Rb"""', + "rB'''", 'rB"""', "RB'''", 'RB"""',): + triple_quoted[t] = t +single_quoted = {} +for t in ("'", '"', + "r'", 'r"', "R'", 'R"', + "u'", 'u"', "U'", 'U"', + "b'", 'b"', "B'", 'B"', + "f'", 'f"', "F'", 'F"', + "ur'", 'ur"', "Ur'", 'Ur"', + "uR'", 'uR"', "UR'", 'UR"', + "br'", 'br"', "Br'", 'Br"', + "bR'", 'bR"', "BR'", 'BR"', + "rb'", 'rb"', "Rb'", 'Rb"', + "rB'", 'rB"', "RB'", 'RB"',): + single_quoted[t] = t + +tabsize = 8 + +class TokenError(Exception): pass + +class StopTokenizing(Exception): pass + +def printtoken(type, token, xxx_todo_changeme, xxx_todo_changeme1, line): # for testing + (srow, scol) = xxx_todo_changeme + (erow, ecol) = xxx_todo_changeme1 + print("%d,%d-%d,%d:\t%s\t%s" % \ + (srow, scol, erow, ecol, tok_name[type], repr(token))) + +def tokenize(readline, tokeneater=printtoken): + """ + The tokenize() function accepts two parameters: one representing the + input stream, and one providing an output mechanism for tokenize(). + + The first parameter, readline, must be a callable object which provides + the same interface as the readline() method of built-in file objects. + Each call to the function should return one line of input as a string. + + The second parameter, tokeneater, must also be a callable object. It is + called once for each token, with five arguments, corresponding to the + tuples generated by generate_tokens(). + """ + try: + tokenize_loop(readline, tokeneater) + except StopTokenizing: + pass + +# backwards compatible interface +def tokenize_loop(readline, tokeneater): + for token_info in generate_tokens(readline): + tokeneater(*token_info) + +class Untokenizer: + + def __init__(self): + self.tokens = [] + self.prev_row = 1 + self.prev_col = 0 + + def add_whitespace(self, start): + row, col = start + assert row <= self.prev_row + col_offset = col - self.prev_col + if col_offset: + self.tokens.append(" " * col_offset) + + def untokenize(self, iterable): + for t in iterable: + if len(t) == 2: + self.compat(t, iterable) + break + tok_type, token, start, end, line = t + self.add_whitespace(start) + self.tokens.append(token) + self.prev_row, self.prev_col = end + if tok_type in (NEWLINE, NL): + self.prev_row += 1 + self.prev_col = 0 + return "".join(self.tokens) + + def compat(self, token, iterable): + startline = False + indents = [] + toks_append = self.tokens.append + toknum, tokval = token + if toknum in (NAME, NUMBER): + tokval += ' ' + if toknum in (NEWLINE, NL): + startline = True + for tok in iterable: + toknum, tokval = tok[:2] + + if toknum in (NAME, NUMBER): + tokval += ' ' + + if toknum == INDENT: + indents.append(tokval) + continue + elif toknum == DEDENT: + indents.pop() + continue + elif toknum in (NEWLINE, NL): + startline = True + elif startline and indents: + toks_append(indents[-1]) + startline = False + toks_append(tokval) + +cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) +blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) + +def _get_normal_name(orig_enc): + """Imitates get_normal_name in tokenizer.c.""" + # Only care about the first 12 characters. + enc = orig_enc[:12].lower().replace("_", "-") + if enc == "utf-8" or enc.startswith("utf-8-"): + return "utf-8" + if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \ + enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")): + return "iso-8859-1" + return orig_enc + +def detect_encoding(readline): + """ + The detect_encoding() function is used to detect the encoding that should + be used to decode a Python source file. It requires one argument, readline, + in the same way as the tokenize() generator. + + It will call readline a maximum of twice, and return the encoding used + (as a string) and a list of any lines (left as bytes) it has read + in. + + It detects the encoding from the presence of a utf-8 bom or an encoding + cookie as specified in pep-0263. If both a bom and a cookie are present, but + disagree, a SyntaxError will be raised. If the encoding cookie is an invalid + charset, raise a SyntaxError. Note that if a utf-8 bom is found, + 'utf-8-sig' is returned. + + If no encoding is specified, then the default of 'utf-8' will be returned. + """ + bom_found = False + encoding = None + default = 'utf-8' + def read_or_stop(): + try: + return readline() + except StopIteration: + return bytes() + + def find_cookie(line): + try: + line_string = line.decode('ascii') + except UnicodeDecodeError: + return None + match = cookie_re.match(line_string) + if not match: + return None + encoding = _get_normal_name(match.group(1)) + try: + codec = lookup(encoding) + except LookupError: + # This behaviour mimics the Python interpreter + raise SyntaxError("unknown encoding: " + encoding) + + if bom_found: + if codec.name != 'utf-8': + # This behaviour mimics the Python interpreter + raise SyntaxError('encoding problem: utf-8') + encoding += '-sig' + return encoding + + first = read_or_stop() + if first.startswith(BOM_UTF8): + bom_found = True + first = first[3:] + default = 'utf-8-sig' + if not first: + return default, [] + + encoding = find_cookie(first) + if encoding: + return encoding, [first] + if not blank_re.match(first): + return default, [first] + + second = read_or_stop() + if not second: + return default, [first] + + encoding = find_cookie(second) + if encoding: + return encoding, [first, second] + + return default, [first, second] + +def untokenize(iterable): + """Transform tokens back into Python source code. + + Each element returned by the iterable must be a token sequence + with at least two elements, a token number and token value. If + only two tokens are passed, the resulting output is poor. + + Round-trip invariant for full input: + Untokenized source will match input source exactly + + Round-trip invariant for limited intput: + # Output text will tokenize the back to the input + t1 = [tok[:2] for tok in generate_tokens(f.readline)] + newcode = untokenize(t1) + readline = iter(newcode.splitlines(1)).next + t2 = [tok[:2] for tokin generate_tokens(readline)] + assert t1 == t2 + """ + ut = Untokenizer() + return ut.untokenize(iterable) + +def generate_tokens(readline): + """ + The generate_tokens() generator requires one argument, readline, which + must be a callable object which provides the same interface as the + readline() method of built-in file objects. Each call to the function + should return one line of input as a string. Alternately, readline + can be a callable function terminating with StopIteration: + readline = open(myfile).next # Example of alternate readline + + The generator produces 5-tuples with these members: the token type; the + token string; a 2-tuple (srow, scol) of ints specifying the row and + column where the token begins in the source; a 2-tuple (erow, ecol) of + ints specifying the row and column where the token ends in the source; + and the line on which the token was found. The line passed is the + logical line; continuation lines are included. + """ + lnum = parenlev = continued = 0 + namechars, numchars = string.ascii_letters + '_', '0123456789' + contstr, needcont = '', 0 + contline = None + indents = [0] + + while 1: # loop over lines in stream + try: + line = readline() + except StopIteration: + line = '' + lnum = lnum + 1 + pos, max = 0, len(line) + + if contstr: # continued string + if not line: + raise TokenError("EOF in multi-line string", strstart) + endmatch = endprog.match(line) + if endmatch: + pos = end = endmatch.end(0) + yield (STRING, contstr + line[:end], + strstart, (lnum, end), contline + line) + contstr, needcont = '', 0 + contline = None + elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n': + yield (ERRORTOKEN, contstr + line, + strstart, (lnum, len(line)), contline) + contstr = '' + contline = None + continue + else: + contstr = contstr + line + contline = contline + line + continue + + elif parenlev == 0 and not continued: # new statement + if not line: break + column = 0 + while pos < max: # measure leading whitespace + if line[pos] == ' ': column = column + 1 + elif line[pos] == '\t': column = (column//tabsize + 1)*tabsize + elif line[pos] == '\f': column = 0 + else: break + pos = pos + 1 + if pos == max: break + + if line[pos] in '#\r\n': # skip comments or blank lines + if line[pos] == '#': + comment_token = line[pos:].rstrip('\r\n') + nl_pos = pos + len(comment_token) + yield (COMMENT, comment_token, + (lnum, pos), (lnum, pos + len(comment_token)), line) + yield (NL, line[nl_pos:], + (lnum, nl_pos), (lnum, len(line)), line) + else: + yield ((NL, COMMENT)[line[pos] == '#'], line[pos:], + (lnum, pos), (lnum, len(line)), line) + continue + + if column > indents[-1]: # count indents or dedents + indents.append(column) + yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line) + while column < indents[-1]: + if column not in indents: + raise IndentationError( + "unindent does not match any outer indentation level", + ("", lnum, pos, line)) + indents = indents[:-1] + + yield (DEDENT, '', (lnum, pos), (lnum, pos), line) + + else: # continued statement + if not line: + raise TokenError("EOF in multi-line statement", (lnum, 0)) + continued = 0 + + while pos < max: + pseudomatch = pseudoprog.match(line, pos) + if pseudomatch: # scan for tokens + start, end = pseudomatch.span(1) + spos, epos, pos = (lnum, start), (lnum, end), end + token, initial = line[start:end], line[start] + + if initial in numchars or \ + (initial == '.' and token != '.'): # ordinary number + yield (NUMBER, token, spos, epos, line) + elif initial in '\r\n': + newline = NEWLINE + if parenlev > 0: + newline = NL + yield (newline, token, spos, epos, line) + + elif initial == '#': + assert not token.endswith("\n") + yield (COMMENT, token, spos, epos, line) + elif token in triple_quoted: + endprog = endprogs[token] + endmatch = endprog.match(line, pos) + if endmatch: # all on one line + pos = endmatch.end(0) + token = line[start:pos] + yield (STRING, token, spos, (lnum, pos), line) + else: + strstart = (lnum, start) # multiple lines + contstr = line[start:] + contline = line + break + elif initial in single_quoted or \ + token[:2] in single_quoted or \ + token[:3] in single_quoted: + if token[-1] == '\n': # continued string + strstart = (lnum, start) + endprog = (endprogs[initial] or endprogs[token[1]] or + endprogs[token[2]]) + contstr, needcont = line[start:], 1 + contline = line + break + else: # ordinary string + yield (STRING, token, spos, epos, line) + elif initial in namechars: # ordinary name + yield (NAME, token, spos, epos, line) + elif initial == '\\': # continued stmt + # This yield is new; needed for better idempotency: + yield (NL, token, spos, (lnum, pos), line) + continued = 1 + else: + if initial in '([{': parenlev = parenlev + 1 + elif initial in ')]}': parenlev = parenlev - 1 + yield (OP, token, spos, epos, line) + else: + yield (ERRORTOKEN, line[pos], + (lnum, pos), (lnum, pos+1), line) + pos = pos + 1 + + for indent in indents[1:]: # pop remaining indent levels + yield (DEDENT, '', (lnum, 0), (lnum, 0), '') + yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '') + +if __name__ == '__main__': # testing + import sys + if len(sys.argv) > 1: tokenize(open(sys.argv[1]).readline) + else: tokenize(sys.stdin.readline) diff --git a/blib2to3/pgen2/tokenize.pyi b/blib2to3/pgen2/tokenize.pyi new file mode 100644 index 0000000..62352e9 --- /dev/null +++ b/blib2to3/pgen2/tokenize.pyi @@ -0,0 +1,30 @@ +# Stubs for lib2to3.pgen2.tokenize (Python 3.6) +# NOTE: Only elements from __all__ are present. + +from typing import Callable, Iterable, Iterator, List, Text, Tuple +from blib2to3.pgen2.token import * # noqa + + +_Coord = Tuple[int, int] +_TokenEater = Callable[[int, Text, _Coord, _Coord, Text], None] +_TokenInfo = Tuple[int, Text, _Coord, _Coord, Text] + + +class TokenError(Exception): ... +class StopTokenizing(Exception): ... + +def tokenize(readline: Callable[[], Text], tokeneater: _TokenEater = ...) -> None: ... + +class Untokenizer: + tokens: List[Text] + prev_row: int + prev_col: int + def __init__(self) -> None: ... + def add_whitespace(self, start: _Coord) -> None: ... + def untokenize(self, iterable: Iterable[_TokenInfo]) -> Text: ... + def compat(self, token: Tuple[int, Text], iterable: Iterable[_TokenInfo]) -> None: ... + +def untokenize(iterable: Iterable[_TokenInfo]) -> Text: ... +def generate_tokens( + readline: Callable[[], Text] +) -> Iterator[_TokenInfo]: ... diff --git a/blib2to3/pygram.py b/blib2to3/pygram.py new file mode 100644 index 0000000..919624e --- /dev/null +++ b/blib2to3/pygram.py @@ -0,0 +1,40 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Export the Python grammar and symbols.""" + +# Python imports +import os + +# Local imports +from .pgen2 import token +from .pgen2 import driver +from . import pytree + +# The grammar file +_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "Grammar.txt") +_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), + "PatternGrammar.txt") + + +class Symbols(object): + + def __init__(self, grammar): + """Initializer. + + Creates an attribute for each grammar symbol (nonterminal), + whose value is the symbol's type (an int >= 256). + """ + for name, symbol in grammar.symbol2number.items(): + setattr(self, name, symbol) + + +python_grammar = driver.load_packaged_grammar("lib2to3", _GRAMMAR_FILE) + +python_symbols = Symbols(python_grammar) + +python_grammar_no_print_statement = python_grammar.copy() +del python_grammar_no_print_statement.keywords["print"] + +pattern_grammar = driver.load_packaged_grammar("lib2to3", _PATTERN_GRAMMAR_FILE) +pattern_symbols = Symbols(pattern_grammar) diff --git a/blib2to3/pygram.pyi b/blib2to3/pygram.pyi new file mode 100644 index 0000000..3dbc648 --- /dev/null +++ b/blib2to3/pygram.pyi @@ -0,0 +1,119 @@ +# Stubs for lib2to3.pygram (Python 3.6) + +from typing import Any +from blib2to3.pgen2.grammar import Grammar + +class Symbols: + def __init__(self, grammar: Grammar) -> None: ... + +class python_symbols(Symbols): + and_expr: int + and_test: int + annassign: int + arglist: int + argument: int + arith_expr: int + assert_stmt: int + async_funcdef: int + async_stmt: int + atom: int + augassign: int + break_stmt: int + classdef: int + comp_for: int + comp_if: int + comp_iter: int + comp_op: int + comparison: int + compound_stmt: int + continue_stmt: int + decorated: int + decorator: int + decorators: int + del_stmt: int + dictsetmaker: int + dotted_as_name: int + dotted_as_names: int + dotted_name: int + encoding_decl: int + eval_input: int + except_clause: int + exec_stmt: int + expr: int + expr_stmt: int + exprlist: int + factor: int + file_input: int + flow_stmt: int + for_stmt: int + funcdef: int + global_stmt: int + if_stmt: int + import_as_name: int + import_as_names: int + import_from: int + import_name: int + import_stmt: int + lambdef: int + listmaker: int + not_test: int + old_comp_for: int + old_comp_if: int + old_comp_iter: int + old_lambdef: int + old_test: int + or_test: int + parameters: int + pass_stmt: int + power: int + print_stmt: int + raise_stmt: int + return_stmt: int + shift_expr: int + simple_stmt: int + single_input: int + sliceop: int + small_stmt: int + star_expr: int + stmt: int + subscript: int + subscriptlist: int + suite: int + term: int + test: int + testlist: int + testlist1: int + testlist_gexp: int + testlist_safe: int + testlist_star_expr: int + tfpdef: int + tfplist: int + tname: int + trailer: int + try_stmt: int + typedargslist: int + varargslist: int + vfpdef: int + vfplist: int + vname: int + while_stmt: int + with_item: int + with_stmt: int + with_var: int + xor_expr: int + yield_arg: int + yield_expr: int + yield_stmt: int + +class pattern_symbols(Symbols): + Alternative: int + Alternatives: int + Details: int + Matcher: int + NegatedUnit: int + Repeater: int + Unit: int + +python_grammar: Grammar +python_grammar_no_print_statement: Grammar +pattern_grammar: Grammar diff --git a/blib2to3/pytree.py b/blib2to3/pytree.py new file mode 100644 index 0000000..693366f --- /dev/null +++ b/blib2to3/pytree.py @@ -0,0 +1,854 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +""" +Python parse tree definitions. + +This is a very concrete parse tree; we need to keep every token and +even the comments and whitespace between tokens. + +There's also a pattern matching implementation here. +""" + +__author__ = "Guido van Rossum " + +import sys +from io import StringIO + +HUGE = 0x7FFFFFFF # maximum repeat count, default max + +_type_reprs = {} +def type_repr(type_num): + global _type_reprs + if not _type_reprs: + from .pygram import python_symbols + # printing tokens is possible but not as useful + # from .pgen2 import token // token.__dict__.items(): + for name, val in python_symbols.__dict__.items(): + if type(val) == int: _type_reprs[val] = name + return _type_reprs.setdefault(type_num, type_num) + +class Base(object): + + """ + Abstract base class for Node and Leaf. + + This provides some default functionality and boilerplate using the + template pattern. + + A node may be a subnode of at most one parent. + """ + + # Default values for instance variables + type = None # int: token number (< 256) or symbol number (>= 256) + parent = None # Parent node pointer, or None + children = () # Tuple of subnodes + was_changed = False + was_checked = False + + def __new__(cls, *args, **kwds): + """Constructor that prevents Base from being instantiated.""" + assert cls is not Base, "Cannot instantiate Base" + return object.__new__(cls) + + def __eq__(self, other): + """ + Compare two nodes for equality. + + This calls the method _eq(). + """ + if self.__class__ is not other.__class__: + return NotImplemented + return self._eq(other) + + __hash__ = None # For Py3 compatibility. + + def _eq(self, other): + """ + Compare two nodes for equality. + + This is called by __eq__ and __ne__. It is only called if the two nodes + have the same type. This must be implemented by the concrete subclass. + Nodes should be considered equal if they have the same structure, + ignoring the prefix string and other context information. + """ + raise NotImplementedError + + def clone(self): + """ + Return a cloned (deep) copy of self. + + This must be implemented by the concrete subclass. + """ + raise NotImplementedError + + def post_order(self): + """ + Return a post-order iterator for the tree. + + This must be implemented by the concrete subclass. + """ + raise NotImplementedError + + def pre_order(self): + """ + Return a pre-order iterator for the tree. + + This must be implemented by the concrete subclass. + """ + raise NotImplementedError + + def replace(self, new): + """Replace this node with a new one in the parent.""" + assert self.parent is not None, str(self) + assert new is not None + if not isinstance(new, list): + new = [new] + l_children = [] + found = False + for ch in self.parent.children: + if ch is self: + assert not found, (self.parent.children, self, new) + if new is not None: + l_children.extend(new) + found = True + else: + l_children.append(ch) + assert found, (self.children, self, new) + self.parent.changed() + self.parent.children = l_children + for x in new: + x.parent = self.parent + self.parent = None + + def get_lineno(self): + """Return the line number which generated the invocant node.""" + node = self + while not isinstance(node, Leaf): + if not node.children: + return + node = node.children[0] + return node.lineno + + def changed(self): + if self.parent: + self.parent.changed() + self.was_changed = True + + def remove(self): + """ + Remove the node from the tree. Returns the position of the node in its + parent's children before it was removed. + """ + if self.parent: + for i, node in enumerate(self.parent.children): + if node is self: + self.parent.changed() + del self.parent.children[i] + self.parent = None + return i + + @property + def next_sibling(self): + """ + The node immediately following the invocant in their parent's children + list. If the invocant does not have a next sibling, it is None + """ + if self.parent is None: + return None + + # Can't use index(); we need to test by identity + for i, child in enumerate(self.parent.children): + if child is self: + try: + return self.parent.children[i+1] + except IndexError: + return None + + @property + def prev_sibling(self): + """ + The node immediately preceding the invocant in their parent's children + list. If the invocant does not have a previous sibling, it is None. + """ + if self.parent is None: + return None + + # Can't use index(); we need to test by identity + for i, child in enumerate(self.parent.children): + if child is self: + if i == 0: + return None + return self.parent.children[i-1] + + def leaves(self): + for child in self.children: + yield from child.leaves() + + def depth(self): + if self.parent is None: + return 0 + return 1 + self.parent.depth() + + def get_suffix(self): + """ + Return the string immediately following the invocant node. This is + effectively equivalent to node.next_sibling.prefix + """ + next_sib = self.next_sibling + if next_sib is None: + return "" + return next_sib.prefix + + if sys.version_info < (3, 0): + def __str__(self): + return str(self).encode("ascii") + +class Node(Base): + + """Concrete implementation for interior nodes.""" + + def __init__(self,type, children, + context=None, + prefix=None, + fixers_applied=None): + """ + Initializer. + + Takes a type constant (a symbol number >= 256), a sequence of + child nodes, and an optional context keyword argument. + + As a side effect, the parent pointers of the children are updated. + """ + assert type >= 256, type + self.type = type + self.children = list(children) + for ch in self.children: + assert ch.parent is None, repr(ch) + ch.parent = self + if prefix is not None: + self.prefix = prefix + if fixers_applied: + self.fixers_applied = fixers_applied[:] + else: + self.fixers_applied = None + + def __repr__(self): + """Return a canonical string representation.""" + return "%s(%s, %r)" % (self.__class__.__name__, + type_repr(self.type), + self.children) + + def __unicode__(self): + """ + Return a pretty string representation. + + This reproduces the input source exactly. + """ + return "".join(map(str, self.children)) + + if sys.version_info > (3, 0): + __str__ = __unicode__ + + def _eq(self, other): + """Compare two nodes for equality.""" + return (self.type, self.children) == (other.type, other.children) + + def clone(self): + """Return a cloned (deep) copy of self.""" + return Node(self.type, [ch.clone() for ch in self.children], + fixers_applied=self.fixers_applied) + + def post_order(self): + """Return a post-order iterator for the tree.""" + for child in self.children: + yield from child.post_order() + yield self + + def pre_order(self): + """Return a pre-order iterator for the tree.""" + yield self + for child in self.children: + yield from child.pre_order() + + @property + def prefix(self): + """ + The whitespace and comments preceding this node in the input. + """ + if not self.children: + return "" + return self.children[0].prefix + + @prefix.setter + def prefix(self, prefix): + if self.children: + self.children[0].prefix = prefix + + def set_child(self, i, child): + """ + Equivalent to 'node.children[i] = child'. This method also sets the + child's parent attribute appropriately. + """ + child.parent = self + self.children[i].parent = None + self.children[i] = child + self.changed() + + def insert_child(self, i, child): + """ + Equivalent to 'node.children.insert(i, child)'. This method also sets + the child's parent attribute appropriately. + """ + child.parent = self + self.children.insert(i, child) + self.changed() + + def append_child(self, child): + """ + Equivalent to 'node.children.append(child)'. This method also sets the + child's parent attribute appropriately. + """ + child.parent = self + self.children.append(child) + self.changed() + + +class Leaf(Base): + + """Concrete implementation for leaf nodes.""" + + # Default values for instance variables + _prefix = "" # Whitespace and comments preceding this token in the input + lineno = 0 # Line where this token starts in the input + column = 0 # Column where this token tarts in the input + + def __init__(self, type, value, + context=None, + prefix=None, + fixers_applied=[]): + """ + Initializer. + + Takes a type constant (a token number < 256), a string value, and an + optional context keyword argument. + """ + assert 0 <= type < 256, type + if context is not None: + self._prefix, (self.lineno, self.column) = context + self.type = type + self.value = value + if prefix is not None: + self._prefix = prefix + self.fixers_applied = fixers_applied[:] + + def __repr__(self): + """Return a canonical string representation.""" + from .pgen2.token import tok_name + return "%s(%s, %r)" % (self.__class__.__name__, + tok_name.get(self.type, self.type), + self.value) + + def __unicode__(self): + """ + Return a pretty string representation. + + This reproduces the input source exactly. + """ + return self.prefix + str(self.value) + + if sys.version_info > (3, 0): + __str__ = __unicode__ + + def _eq(self, other): + """Compare two nodes for equality.""" + return (self.type, self.value) == (other.type, other.value) + + def clone(self): + """Return a cloned (deep) copy of self.""" + return Leaf(self.type, self.value, + (self.prefix, (self.lineno, self.column)), + fixers_applied=self.fixers_applied) + + def leaves(self): + yield self + + def post_order(self): + """Return a post-order iterator for the tree.""" + yield self + + def pre_order(self): + """Return a pre-order iterator for the tree.""" + yield self + + @property + def prefix(self): + """ + The whitespace and comments preceding this token in the input. + """ + return self._prefix + + @prefix.setter + def prefix(self, prefix): + self.changed() + self._prefix = prefix + +def convert(gr, raw_node): + """ + Convert raw node information to a Node or Leaf instance. + + This is passed to the parser driver which calls it whenever a reduction of a + grammar rule produces a new complete node, so that the tree is build + strictly bottom-up. + """ + type, value, context, children = raw_node + if children or type in gr.number2symbol: + # If there's exactly one child, return that child instead of + # creating a new node. + if len(children) == 1: + return children[0] + return Node(type, children, context=context) + else: + return Leaf(type, value, context=context) + + +class BasePattern(object): + + """ + A pattern is a tree matching pattern. + + It looks for a specific node type (token or symbol), and + optionally for a specific content. + + This is an abstract base class. There are three concrete + subclasses: + + - LeafPattern matches a single leaf node; + - NodePattern matches a single node (usually non-leaf); + - WildcardPattern matches a sequence of nodes of variable length. + """ + + # Defaults for instance variables + type = None # Node type (token if < 256, symbol if >= 256) + content = None # Optional content matching pattern + name = None # Optional name used to store match in results dict + + def __new__(cls, *args, **kwds): + """Constructor that prevents BasePattern from being instantiated.""" + assert cls is not BasePattern, "Cannot instantiate BasePattern" + return object.__new__(cls) + + def __repr__(self): + args = [type_repr(self.type), self.content, self.name] + while args and args[-1] is None: + del args[-1] + return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args))) + + def optimize(self): + """ + A subclass can define this as a hook for optimizations. + + Returns either self or another node with the same effect. + """ + return self + + def match(self, node, results=None): + """ + Does this pattern exactly match a node? + + Returns True if it matches, False if not. + + If results is not None, it must be a dict which will be + updated with the nodes matching named subpatterns. + + Default implementation for non-wildcard patterns. + """ + if self.type is not None and node.type != self.type: + return False + if self.content is not None: + r = None + if results is not None: + r = {} + if not self._submatch(node, r): + return False + if r: + results.update(r) + if results is not None and self.name: + results[self.name] = node + return True + + def match_seq(self, nodes, results=None): + """ + Does this pattern exactly match a sequence of nodes? + + Default implementation for non-wildcard patterns. + """ + if len(nodes) != 1: + return False + return self.match(nodes[0], results) + + def generate_matches(self, nodes): + """ + Generator yielding all matches for this pattern. + + Default implementation for non-wildcard patterns. + """ + r = {} + if nodes and self.match(nodes[0], r): + yield 1, r + + +class LeafPattern(BasePattern): + + def __init__(self, type=None, content=None, name=None): + """ + Initializer. Takes optional type, content, and name. + + The type, if given must be a token type (< 256). If not given, + this matches any *leaf* node; the content may still be required. + + The content, if given, must be a string. + + If a name is given, the matching node is stored in the results + dict under that key. + """ + if type is not None: + assert 0 <= type < 256, type + if content is not None: + assert isinstance(content, str), repr(content) + self.type = type + self.content = content + self.name = name + + def match(self, node, results=None): + """Override match() to insist on a leaf node.""" + if not isinstance(node, Leaf): + return False + return BasePattern.match(self, node, results) + + def _submatch(self, node, results=None): + """ + Match the pattern's content to the node's children. + + This assumes the node type matches and self.content is not None. + + Returns True if it matches, False if not. + + If results is not None, it must be a dict which will be + updated with the nodes matching named subpatterns. + + When returning False, the results dict may still be updated. + """ + return self.content == node.value + + +class NodePattern(BasePattern): + + wildcards = False + + def __init__(self, type=None, content=None, name=None): + """ + Initializer. Takes optional type, content, and name. + + The type, if given, must be a symbol type (>= 256). If the + type is None this matches *any* single node (leaf or not), + except if content is not None, in which it only matches + non-leaf nodes that also match the content pattern. + + The content, if not None, must be a sequence of Patterns that + must match the node's children exactly. If the content is + given, the type must not be None. + + If a name is given, the matching node is stored in the results + dict under that key. + """ + if type is not None: + assert type >= 256, type + if content is not None: + assert not isinstance(content, str), repr(content) + content = list(content) + for i, item in enumerate(content): + assert isinstance(item, BasePattern), (i, item) + if isinstance(item, WildcardPattern): + self.wildcards = True + self.type = type + self.content = content + self.name = name + + def _submatch(self, node, results=None): + """ + Match the pattern's content to the node's children. + + This assumes the node type matches and self.content is not None. + + Returns True if it matches, False if not. + + If results is not None, it must be a dict which will be + updated with the nodes matching named subpatterns. + + When returning False, the results dict may still be updated. + """ + if self.wildcards: + for c, r in generate_matches(self.content, node.children): + if c == len(node.children): + if results is not None: + results.update(r) + return True + return False + if len(self.content) != len(node.children): + return False + for subpattern, child in zip(self.content, node.children): + if not subpattern.match(child, results): + return False + return True + + +class WildcardPattern(BasePattern): + + """ + A wildcard pattern can match zero or more nodes. + + This has all the flexibility needed to implement patterns like: + + .* .+ .? .{m,n} + (a b c | d e | f) + (...)* (...)+ (...)? (...){m,n} + + except it always uses non-greedy matching. + """ + + def __init__(self, content=None, min=0, max=HUGE, name=None): + """ + Initializer. + + Args: + content: optional sequence of subsequences of patterns; + if absent, matches one node; + if present, each subsequence is an alternative [*] + min: optional minimum number of times to match, default 0 + max: optional maximum number of times to match, default HUGE + name: optional name assigned to this match + + [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is + equivalent to (a b c | d e | f g h); if content is None, + this is equivalent to '.' in regular expression terms. + The min and max parameters work as follows: + min=0, max=maxint: .* + min=1, max=maxint: .+ + min=0, max=1: .? + min=1, max=1: . + If content is not None, replace the dot with the parenthesized + list of alternatives, e.g. (a b c | d e | f g h)* + """ + assert 0 <= min <= max <= HUGE, (min, max) + if content is not None: + content = tuple(map(tuple, content)) # Protect against alterations + # Check sanity of alternatives + assert len(content), repr(content) # Can't have zero alternatives + for alt in content: + assert len(alt), repr(alt) # Can have empty alternatives + self.content = content + self.min = min + self.max = max + self.name = name + + def optimize(self): + """Optimize certain stacked wildcard patterns.""" + subpattern = None + if (self.content is not None and + len(self.content) == 1 and len(self.content[0]) == 1): + subpattern = self.content[0][0] + if self.min == 1 and self.max == 1: + if self.content is None: + return NodePattern(name=self.name) + if subpattern is not None and self.name == subpattern.name: + return subpattern.optimize() + if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and + subpattern.min <= 1 and self.name == subpattern.name): + return WildcardPattern(subpattern.content, + self.min*subpattern.min, + self.max*subpattern.max, + subpattern.name) + return self + + def match(self, node, results=None): + """Does this pattern exactly match a node?""" + return self.match_seq([node], results) + + def match_seq(self, nodes, results=None): + """Does this pattern exactly match a sequence of nodes?""" + for c, r in self.generate_matches(nodes): + if c == len(nodes): + if results is not None: + results.update(r) + if self.name: + results[self.name] = list(nodes) + return True + return False + + def generate_matches(self, nodes): + """ + Generator yielding matches for a sequence of nodes. + + Args: + nodes: sequence of nodes + + Yields: + (count, results) tuples where: + count: the match comprises nodes[:count]; + results: dict containing named submatches. + """ + if self.content is None: + # Shortcut for special case (see __init__.__doc__) + for count in range(self.min, 1 + min(len(nodes), self.max)): + r = {} + if self.name: + r[self.name] = nodes[:count] + yield count, r + elif self.name == "bare_name": + yield self._bare_name_matches(nodes) + else: + # The reason for this is that hitting the recursion limit usually + # results in some ugly messages about how RuntimeErrors are being + # ignored. We only have to do this on CPython, though, because other + # implementations don't have this nasty bug in the first place. + if hasattr(sys, "getrefcount"): + save_stderr = sys.stderr + sys.stderr = StringIO() + try: + for count, r in self._recursive_matches(nodes, 0): + if self.name: + r[self.name] = nodes[:count] + yield count, r + except RuntimeError: + # We fall back to the iterative pattern matching scheme if the recursive + # scheme hits the recursion limit. + for count, r in self._iterative_matches(nodes): + if self.name: + r[self.name] = nodes[:count] + yield count, r + finally: + if hasattr(sys, "getrefcount"): + sys.stderr = save_stderr + + def _iterative_matches(self, nodes): + """Helper to iteratively yield the matches.""" + nodelen = len(nodes) + if 0 >= self.min: + yield 0, {} + + results = [] + # generate matches that use just one alt from self.content + for alt in self.content: + for c, r in generate_matches(alt, nodes): + yield c, r + results.append((c, r)) + + # for each match, iterate down the nodes + while results: + new_results = [] + for c0, r0 in results: + # stop if the entire set of nodes has been matched + if c0 < nodelen and c0 <= self.max: + for alt in self.content: + for c1, r1 in generate_matches(alt, nodes[c0:]): + if c1 > 0: + r = {} + r.update(r0) + r.update(r1) + yield c0 + c1, r + new_results.append((c0 + c1, r)) + results = new_results + + def _bare_name_matches(self, nodes): + """Special optimized matcher for bare_name.""" + count = 0 + r = {} + done = False + max = len(nodes) + while not done and count < max: + done = True + for leaf in self.content: + if leaf[0].match(nodes[count], r): + count += 1 + done = False + break + r[self.name] = nodes[:count] + return count, r + + def _recursive_matches(self, nodes, count): + """Helper to recursively yield the matches.""" + assert self.content is not None + if count >= self.min: + yield 0, {} + if count < self.max: + for alt in self.content: + for c0, r0 in generate_matches(alt, nodes): + for c1, r1 in self._recursive_matches(nodes[c0:], count+1): + r = {} + r.update(r0) + r.update(r1) + yield c0 + c1, r + + +class NegatedPattern(BasePattern): + + def __init__(self, content=None): + """ + Initializer. + + The argument is either a pattern or None. If it is None, this + only matches an empty sequence (effectively '$' in regex + lingo). If it is not None, this matches whenever the argument + pattern doesn't have any matches. + """ + if content is not None: + assert isinstance(content, BasePattern), repr(content) + self.content = content + + def match(self, node): + # We never match a node in its entirety + return False + + def match_seq(self, nodes): + # We only match an empty sequence of nodes in its entirety + return len(nodes) == 0 + + def generate_matches(self, nodes): + if self.content is None: + # Return a match if there is an empty sequence + if len(nodes) == 0: + yield 0, {} + else: + # Return a match if the argument pattern has no matches + for c, r in self.content.generate_matches(nodes): + return + yield 0, {} + + +def generate_matches(patterns, nodes): + """ + Generator yielding matches for a sequence of patterns and nodes. + + Args: + patterns: a sequence of patterns + nodes: a sequence of nodes + + Yields: + (count, results) tuples where: + count: the entire sequence of patterns matches nodes[:count]; + results: dict containing named submatches. + """ + if not patterns: + yield 0, {} + else: + p, rest = patterns[0], patterns[1:] + for c0, r0 in p.generate_matches(nodes): + if not rest: + yield c0, r0 + else: + for c1, r1 in generate_matches(rest, nodes[c0:]): + r = {} + r.update(r0) + r.update(r1) + yield c0 + c1, r diff --git a/blib2to3/pytree.pyi b/blib2to3/pytree.pyi new file mode 100644 index 0000000..eb7320b --- /dev/null +++ b/blib2to3/pytree.pyi @@ -0,0 +1,86 @@ +# Stubs for lib2to3.pytree (Python 3.6) + +import sys +from typing import Any, Callable, Dict, Iterator, List, Optional, Text, Tuple, TypeVar, Union + +from blib2to3.pgen2.grammar import Grammar + +_P = TypeVar('_P') +_NL = Union[Node, Leaf] +_Context = Tuple[Text, int, int] +_Results = Dict[Text, _NL] +_RawNode = Tuple[int, Text, _Context, Optional[List[_NL]]] +_Convert = Callable[[Grammar, _RawNode], Any] + +HUGE: int + +def type_repr(type_num: int) -> Text: ... + +class Base: + type: int + parent: Optional[Node] + prefix: Text + children: List[_NL] + was_changed: bool + was_checked: bool + def __eq__(self, other: Any) -> bool: ... + def _eq(self: _P, other: _P) -> bool: ... + def clone(self: _P) -> _P: ... + def post_order(self) -> Iterator[_NL]: ... + def pre_order(self) -> Iterator[_NL]: ... + def replace(self, new: Union[_NL, List[_NL]]) -> None: ... + def get_lineno(self) -> int: ... + def changed(self) -> None: ... + def remove(self) -> Optional[int]: ... + @property + def next_sibling(self) -> Optional[_NL]: ... + @property + def prev_sibling(self) -> Optional[_NL]: ... + def leaves(self) -> Iterator[Leaf]: ... + def depth(self) -> int: ... + def get_suffix(self) -> Text: ... + if sys.version_info < (3,): + def get_prefix(self) -> Text: ... + def set_prefix(self, prefix: Text) -> None: ... + +class Node(Base): + fixers_applied: List[Any] + def __init__(self, type: int, children: List[_NL], context: Optional[Any] = ..., prefix: Optional[Text] = ..., fixers_applied: Optional[List[Any]] = ...) -> None: ... + def set_child(self, i: int, child: _NL) -> None: ... + def insert_child(self, i: int, child: _NL) -> None: ... + def append_child(self, child: _NL) -> None: ... + +class Leaf(Base): + lineno: int + column: int + value: Text + fixers_applied: List[Any] + def __init__(self, type: int, value: Text, context: Optional[_Context] = ..., prefix: Optional[Text] = ..., fixers_applied: List[Any] = ...) -> None: ... + +def convert(gr: Grammar, raw_node: _RawNode) -> _NL: ... + +class BasePattern: + type: int + content: Optional[Text] + name: Optional[Text] + def optimize(self) -> BasePattern: ... # sic, subclasses are free to optimize themselves into different patterns + def match(self, node: _NL, results: Optional[_Results] = ...) -> bool: ... + def match_seq(self, nodes: List[_NL], results: Optional[_Results] = ...) -> bool: ... + def generate_matches(self, nodes: List[_NL]) -> Iterator[Tuple[int, _Results]]: ... + +class LeafPattern(BasePattern): + def __init__(self, type: Optional[int] = ..., content: Optional[Text] = ..., name: Optional[Text] = ...) -> None: ... + +class NodePattern(BasePattern): + wildcards: bool + def __init__(self, type: Optional[int] = ..., content: Optional[Text] = ..., name: Optional[Text] = ...) -> None: ... + +class WildcardPattern(BasePattern): + min: int + max: int + def __init__(self, content: Optional[Text] = ..., min: int = ..., max: int = ..., name: Optional[Text] = ...) -> None: ... + +class NegatedPattern(BasePattern): + def __init__(self, content: Optional[Text] = ...) -> None: ... + +def generate_matches(patterns: List[BasePattern], nodes: List[_NL]) -> Iterator[Tuple[int, _Results]]: ... diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..8fa7236 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,31 @@ +[mypy] +# Specify the target platform details in config, so your developers are +# free to run mypy on Windows, Linux, or macOS and get consistent +# results. +python_version=3.6 +platform=linux + +# flake8-mypy expects the two following for sensible formatting +show_column_numbers=True + +# show error messages from unrelated files +follow_imports=normal + +# suppress errors about unsatisfied imports +ignore_missing_imports=True + +# be strict +disallow_untyped_calls=True +warn_return_any=True +strict_optional=True +warn_no_return=True +warn_redundant_casts=True +warn_unused_ignores=True + +# The following are off by default. Flip them on if you feel +# adventurous. +disallow_untyped_defs=True +check_untyped_defs=True + +# No incremental mode +cache_dir=/dev/null diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..a64482c --- /dev/null +++ b/setup.py @@ -0,0 +1,67 @@ +# Copyright (C) 2018 Łukasz Langa +import ast +import re +from setuptools import setup +import sys + +assert sys.version_info >= (3, 6, 0), "black requires Python 3.6+" +from pathlib import Path # noqa E402 + +CURRENT_DIR = Path(__file__).parent + + +def get_long_description(): + readme_md = CURRENT_DIR / 'README.md' + try: + import pypandoc + return pypandoc.convert_file(str(readme_md), 'rst') + + except (IOError, ImportError): + print() + print( + '\x1b[31m\x1b[1mwarning:\x1b[0m\x1b[31m pandoc not found, ' + 'long description will be ugly (PyPI does not support .md).' + '\x1b[0m' + ) + print() + with open(readme_md, encoding='utf8') as ld_file: + return ld_file.read() + + +def get_version(): + black_py = CURRENT_DIR / 'black.py' + _version_re = re.compile(r'__version__\s+=\s+(?P.*)') + with open(black_py, 'r', encoding='utf8') as f: + version = _version_re.search(f.read()).group('version') + return str(ast.literal_eval(version)) + + +setup( + name='black', + version=get_version(), + description="The uncompromising code formatter.", + long_description=get_long_description(), + keywords='automation formatter yapf autopep8 pyfmt gofmt rustfmt', + author='Łukasz Langa', + author_email='lukasz@langa.pl', + url='https://github.com/ambv/black', + license='MIT', + py_modules=['black'], + packages=['blib2to3', 'blib2to3.pgen2'], + python_requires=">=3.6", + zip_safe=False, + install_requires=['click', 'attrs'], + test_suite='tests.test_black', + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Environment :: Console', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: MIT License', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3.6', + 'Topic :: Software Development :: Libraries :: Python Modules', + 'Topic :: Software Development :: Quality Assurance', + ], + entry_points={'console_scripts': ['black=black:main']}, +) diff --git a/tests/.flake8 b/tests/.flake8 new file mode 100644 index 0000000..3528ac4 --- /dev/null +++ b/tests/.flake8 @@ -0,0 +1,8 @@ +# Like the base Black .flake8 but also ignores F811 which is used deliberately +# in test files. + +[flake8] +ignore = E266, E501, F811 +max-line-length = 80 +max-complexity = 12 +select = B,C,E,F,W,T4,B9 diff --git a/tests/cantfit.py b/tests/cantfit.py new file mode 100644 index 0000000..99bcaa0 --- /dev/null +++ b/tests/cantfit.py @@ -0,0 +1,27 @@ +# long variable name +this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it = 0 +this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it = 1 # with a comment +this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it = [ + 1, 2, 3 +] +this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it = function() +this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it = function( + arg1, arg2, arg3 +) +this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it = function( + [1, 2, 3], arg1, [1, 2, 3], arg2, [1, 2, 3], arg3 +) +# long function name +normal_name = but_the_function_name_is_now_ridiculously_long_and_it_is_still_super_annoying() +normal_name = but_the_function_name_is_now_ridiculously_long_and_it_is_still_super_annoying( + arg1, arg2, arg3 +) +normal_name = but_the_function_name_is_now_ridiculously_long_and_it_is_still_super_annoying( + [1, 2, 3], arg1, [1, 2, 3], arg2, [1, 2, 3], arg3 +) +# long arguments +normal_name = normal_function_name( + "but with super long string arguments that on their own exceed the line limit so there's no way it can ever fit", + "eggs with spam and eggs and spam with eggs with spam and eggs and spam with eggs with spam and eggs and spam with eggs", + this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it=0, +) diff --git a/tests/comments.py b/tests/comments.py new file mode 100644 index 0000000..e661ba6 --- /dev/null +++ b/tests/comments.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +# Some license here. +# +# Has many lines. Many, many lines. +# Many, many, many lines. +"""Module docstring. + +Possibly also many, many lines. +""" + +import os.path +import sys + +import a +from b.c import X # some noqa comment + +try: + import fast +except ImportError: + import slow as fast + + +# Some comment before a function. +def function(default=None): + """Docstring comes first. + + Possibly many lines. + """ + # FIXME: Some comment about why this function is crap but still in production. + import inner_imports + + if inner_imports.are_evil(): + # Explains why we have this if. + # In great detail indeed. + x = X() + return x.method1() # type: ignore + + # This return is also commented for some reason. + return default + + +# Explains why we use global state. +GLOBAL_STATE = {'a': a(1), 'b': a(2), 'c': a(3)} + + +# Another comment +@fast(really=True) +async def wat(): + async with X.open_async() as x: # Some more comments + result = await x.method1() + # Comment after ending a block. + if result: + print('A OK', file=sys.stdout) + # Comment between things. + print() + + +# Some closing comments. +# Maybe Vim or Emacs directives for formatting. +# Who knows. diff --git a/tests/comments2.py b/tests/comments2.py new file mode 100644 index 0000000..7d5d3a3 --- /dev/null +++ b/tests/comments2.py @@ -0,0 +1,202 @@ +# Please keep __all__ alphabetized within each category. +__all__ = [ + # Super-special typing primitives. + 'Any', + 'Callable', + 'ClassVar', + + # ABCs (from collections.abc). + 'AbstractSet', # collections.abc.Set. + 'ByteString', + 'Container', + + # Concrete collection types. + 'Counter', + 'Deque', + 'Dict', + 'DefaultDict', + 'List', + 'Set', + 'FrozenSet', + 'NamedTuple', # Not really a type. + 'Generator', +] + +def inline_comments_in_brackets_ruin_everything(): + if typedargslist: + parameters.children = [ + parameters.children[0], # (1 + body, + parameters.children[-1], # )1 + ] + else: + parameters.children = [ + parameters.children[0], # (2 what if this was actually long + body, + parameters.children[-1], # )2 + ] + if (self._proc is not None and + # has the child process finished? + self._returncode is None and + # the child process has finished, but the + # transport hasn't been notified yet? + self._proc.poll() is None): + pass + short = [ + # one + 1, + # two + 2] + call(arg1, arg2, """ +short +""", arg3=True) + + ############################################################################ + + call2( + #short + arg1, + #but + arg2, + #multiline + """ +short +""", + # yup + arg3=True) + lcomp = [ + element # yup + for element in collection # yup + if element is not None # right + ] + lcomp2 = [ + # hello + element + # yup + for element in collection + # right + if element is not None + ] + lcomp3 = [ + # This one is actually too long to fit in a single line. + element.split('\n', 1)[0] + # yup + for element in collection.select_elements() + # right + if element is not None + ] + return Node( + syms.simple_stmt, + [ + Node(statement, result), + Leaf(token.NEWLINE, '\n'), # FIXME: \r\n? + ], + ) + +instruction() + +# END COMMENTS +# MORE END COMMENTS + + +# output + + +# Please keep __all__ alphabetized within each category. +__all__ = [ + # Super-special typing primitives. + 'Any', + 'Callable', + 'ClassVar', + # ABCs (from collections.abc). + 'AbstractSet', # collections.abc.Set. + 'ByteString', + 'Container', + # Concrete collection types. + 'Counter', + 'Deque', + 'Dict', + 'DefaultDict', + 'List', + 'Set', + 'FrozenSet', + 'NamedTuple', # Not really a type. + 'Generator', +] + + +def inline_comments_in_brackets_ruin_everything(): + if typedargslist: + parameters.children = [ + parameters.children[0], body, parameters.children[-1] # (1 # )1 + ] + else: + parameters.children = [ + parameters.children[0], # (2 what if this was actually long + body, + parameters.children[-1], # )2 + ] + if ( + self._proc is not None and + # has the child process finished? + self._returncode is None and + # the child process has finished, but the + # transport hasn't been notified yet? + self._proc.poll() is None + ): + pass + short = [ + # one + 1, + # two + 2, + ] + call( + arg1, + arg2, + """ +short +""", + arg3=True, + ) + ############################################################################ + call2( + # short + arg1, + # but + arg2, + # multiline + """ +short +""", + # yup + arg3=True, + ) + lcomp = [ + element for element in collection if element is not None # yup # yup # right + ] + lcomp2 = [ + # hello + element + # yup + for element in collection + # right + if element is not None + ] + lcomp3 = [ + # This one is actually too long to fit in a single line. + element.split('\n', 1)[0] + # yup + for element in collection.select_elements() + # right + if element is not None + ] + return Node( + syms.simple_stmt, + [Node(statement, result), Leaf(token.NEWLINE, '\n')], # FIXME: \r\n? + ) + + +instruction() +# END COMMENTS +# MORE END COMMENTS diff --git a/tests/composition.py b/tests/composition.py new file mode 100644 index 0000000..7b462ac --- /dev/null +++ b/tests/composition.py @@ -0,0 +1,21 @@ +class C: + + def test(self) -> None: + with patch("black.out", print): + self.assertEqual( + unstyle(str(report)), '1 file reformatted, 1 file failed to reformat.' + ) + self.assertEqual( + unstyle(str(report)), + '1 file reformatted, 1 file left unchanged, 1 file failed to reformat.', + ) + self.assertEqual( + unstyle(str(report)), + '2 files reformatted, 1 file left unchanged, ' + '1 file failed to reformat.', + ) + self.assertEqual( + unstyle(str(report)), + '2 files reformatted, 2 files left unchanged, ' + '2 files failed to reformat.', + ) diff --git a/tests/expression.py b/tests/expression.py new file mode 100644 index 0000000..3291dc2 --- /dev/null +++ b/tests/expression.py @@ -0,0 +1,240 @@ +... +'some_string' +b'\\xa3' +Name +None +True +False +1 +1.0 +1j +True or False +True or False or None +True and False +True and False and None +(Name1 and Name2) or Name3 +Name1 and Name2 or Name3 +Name1 or (Name2 and Name3) +Name1 or Name2 and Name3 +(Name1 and Name2) or (Name3 and Name4) +Name1 and Name2 or Name3 and Name4 +Name1 or (Name2 and Name3) or Name4 +Name1 or Name2 and Name3 or Name4 +v1 << 2 +1 >> v2 +1 % finished +1 + v2 - v3 * 4 ^ 5 ** v6 / 7 // 8 +((1 + v2) - (v3 * 4)) ^ (((5 ** v6) / 7) // 8) +not great +~great ++value +-1 +~int and not v1 ^ 123 + v2 | True +(~int) and (not ((v1 ^ (123 + v2)) | True)) +lambda arg: None +lambda a=True: a +lambda a, b, c=True: a +lambda a, b, c=True, *, d=(1 << v2), e='str': a +lambda a, b, c=True, *vararg, d=(v1 << 2), e='str', **kwargs: a + b +1 if True else 2 +str or None if True else str or bytes or None +(str or None) if True else (str or bytes or None) +str or None if (1 if True else 2) else str or bytes or None +(str or None) if (1 if True else 2) else (str or bytes or None) +{'2.7': dead, '3.7': (long_live or die_hard)} +{'2.7': dead, '3.7': (long_live or die_hard), **{'3.6': verygood}} +{**a, **b, **c} +{'2.7', '3.6', '3.7', '3.8', '3.9', ('4.0' if gilectomy else '3.10')} +({'a': 'b'}, (True or False), (+value), 'string', b'bytes') or None +() +(1,) +(1, 2) +(1, 2, 3) +[] +[1, 2, 3, 4, 5, 6, 7, 8, 9, (10 or A), (11 or B), (12 or C)] +{i for i in (1, 2, 3)} +{(i ** 2) for i in (1, 2, 3)} +{(i ** 2) for i, _ in ((1, 'a'), (2, 'b'), (3, 'c'))} +{((i ** 2) + j) for i in (1, 2, 3) for j in (1, 2, 3)} +[i for i in (1, 2, 3)] +[(i ** 2) for i in (1, 2, 3)] +[(i ** 2) for i, _ in ((1, 'a'), (2, 'b'), (3, 'c'))] +[((i ** 2) + j) for i in (1, 2, 3) for j in (1, 2, 3)] +{i: 0 for i in (1, 2, 3)} +{i: j for i, j in ((1, 'a'), (2, 'b'), (3, 'c'))} +Python3 > Python2 > COBOL +Life is Life +call() +call(arg) +call(kwarg='hey') +call(arg, kwarg='hey') +call(arg, another, kwarg='hey', **kwargs) +lukasz.langa.pl +call.me(maybe) +1 .real +1.0 .real +....__class__ +list[str] +dict[str, int] +tuple[str, ...] +tuple[str, int, float, dict[str, int]] +slice[0] +slice[0:1] +slice[0:1:2] +slice[:] +slice[:-1] +slice[1:] +slice[::-1] +(str or None) if (sys.version_info[0] > (3,)) else (str or bytes or None) +f'f-string without formatted values is just a string' +f'{{NOT a formatted value}}' +f'some f-string with {a} {few():.2f} {formatted.values!r}' +f"{f'{nested} inner'} outer" +f'space between opening braces: { {a for a in (1, 2, 3)}}' +{'2.7': dead, '3.7': long_live or die_hard} +{'2.7', '3.6', '3.7', '3.8', '3.9', '4.0' if gilectomy else '3.10'} +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10 or A, 11 or B, 12 or C] +(SomeName) +SomeName +(Good, Bad, Ugly) +(i for i in (1, 2, 3)) +((i ** 2) for i in (1, 2, 3)) +((i ** 2) for i, _ in ((1, 'a'), (2, 'b'), (3, 'c'))) +(((i ** 2) + j) for i in (1, 2, 3) for j in (1, 2, 3)) +(*starred) +a = (1,) +b = 1, +c = 1 +d = (1,) + a + (2,) + + +def gen(): + yield from outside_of_generator + a = (yield) + + +async def f(): + await some.complicated[0].call(with_args=(True or (1 is not 1))) + + +# output + + +... +'some_string' +b'\\xa3' +Name +None +True +False +1 +1.0 +1j +True or False +True or False or None +True and False +True and False and None +(Name1 and Name2) or Name3 +Name1 and Name2 or Name3 +Name1 or (Name2 and Name3) +Name1 or Name2 and Name3 +(Name1 and Name2) or (Name3 and Name4) +Name1 and Name2 or Name3 and Name4 +Name1 or (Name2 and Name3) or Name4 +Name1 or Name2 and Name3 or Name4 +v1 << 2 +1 >> v2 +1 % finished +1 + v2 - v3 * 4 ^ 5 ** v6 / 7 // 8 +((1 + v2) - (v3 * 4)) ^ (((5 ** v6) / 7) // 8) +not great +~great ++value +-1 +~int and not v1 ^ 123 + v2 | True +(~int) and (not ((v1 ^ (123 + v2)) | True)) +lambda arg: None +lambda a=True: a +lambda a, b, c=True: a +lambda a, b, c=True, *, d=(1 << v2), e='str': a +lambda a, b, c=True, *vararg, d=(v1 << 2), e='str', **kwargs: a + b +1 if True else 2 +str or None if True else str or bytes or None +(str or None) if True else (str or bytes or None) +str or None if (1 if True else 2) else str or bytes or None +(str or None) if (1 if True else 2) else (str or bytes or None) +{'2.7': dead, '3.7': (long_live or die_hard)} +{'2.7': dead, '3.7': (long_live or die_hard), **{'3.6': verygood}} +{**a, **b, **c} +{'2.7', '3.6', '3.7', '3.8', '3.9', ('4.0' if gilectomy else '3.10')} +({'a': 'b'}, (True or False), (+value), 'string', b'bytes') or None +() +(1,) +(1, 2) +(1, 2, 3) +[] +[1, 2, 3, 4, 5, 6, 7, 8, 9, (10 or A), (11 or B), (12 or C)] +{i for i in (1, 2, 3)} +{(i ** 2) for i in (1, 2, 3)} +{(i ** 2) for i, _ in ((1, 'a'), (2, 'b'), (3, 'c'))} +{((i ** 2) + j) for i in (1, 2, 3) for j in (1, 2, 3)} +[i for i in (1, 2, 3)] +[(i ** 2) for i in (1, 2, 3)] +[(i ** 2) for i, _ in ((1, 'a'), (2, 'b'), (3, 'c'))] +[((i ** 2) + j) for i in (1, 2, 3) for j in (1, 2, 3)] +{i: 0 for i in (1, 2, 3)} +{i: j for i, j in ((1, 'a'), (2, 'b'), (3, 'c'))} +Python3 > Python2 > COBOL +Life is Life +call() +call(arg) +call(kwarg='hey') +call(arg, kwarg='hey') +call(arg, another, kwarg='hey', **kwargs) +lukasz.langa.pl +call.me(maybe) +1 .real +1.0 .real +....__class__ +list[str] +dict[str, int] +tuple[str, ...] +tuple[str, int, float, dict[str, int]] +slice[0] +slice[0:1] +slice[0:1:2] +slice[:] +slice[:-1] +slice[1:] +slice[::-1] +(str or None) if (sys.version_info[0] > (3,)) else (str or bytes or None) +f'f-string without formatted values is just a string' +f'{{NOT a formatted value}}' +f'some f-string with {a} {few():.2f} {formatted.values!r}' +f"{f'{nested} inner'} outer" +f'space between opening braces: { {a for a in (1, 2, 3)}}' +{'2.7': dead, '3.7': long_live or die_hard} +{'2.7', '3.6', '3.7', '3.8', '3.9', '4.0' if gilectomy else '3.10'} +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10 or A, 11 or B, 12 or C] +(SomeName) +SomeName +(Good, Bad, Ugly) +(i for i in (1, 2, 3)) +((i ** 2) for i in (1, 2, 3)) +((i ** 2) for i, _ in ((1, 'a'), (2, 'b'), (3, 'c'))) +(((i ** 2) + j) for i in (1, 2, 3) for j in (1, 2, 3)) +(*starred) +a = (1,) +b = 1, +c = 1 +d = (1,) + a + (2,) + + +def gen(): + yield from outside_of_generator + + a = (yield) + + +async def f(): + await some.complicated[0].call(with_args=(True or (1 is not 1))) diff --git a/tests/function.py b/tests/function.py new file mode 100644 index 0000000..85f7d40 --- /dev/null +++ b/tests/function.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +import asyncio +import sys + +from third_party import X, Y, Z + +from library import some_connection, \ + some_decorator + +def func_no_args(): + a; b; c + if True: raise RuntimeError + if False: ... + for i in range(10): + print(i) + continue + return None +async def coroutine(arg): + "Single-line docstring. Multiline is harder to reformat." + async with some_connection() as conn: + await conn.do_what_i_mean('SELECT bobby, tables FROM xkcd', timeout=2) + await asyncio.sleep(1) +@asyncio.coroutine +@some_decorator( +with_args=True, +many_args=[1,2,3] +) +def function_signature_stress_test(number:int,no_annotation=None,text:str="default",* ,debug:bool=False,**kwargs) -> str: + return text[number:-1] + +def long_lines(): + if True: + typedargslist.extend( + gen_annotated_params(ast_args.kwonlyargs, ast_args.kw_defaults, parameters, implicit_default=True) + ) + _type_comment_re = re.compile( + r""" + ^ + [\t ]* + \#[ ]type:[ ]* + (?P + [^#\t\n]+? + ) + (? to match + # a trailing space which is why we need the silliness below + (? + (?:\#[^\n]*)? + \n? + ) + $ + """, re.MULTILINE | re.VERBOSE + ) + +# output + + +#!/usr/bin/env python3 +import asyncio +import sys + +from third_party import X, Y, Z + +from library import some_connection, some_decorator + + +def func_no_args(): + a + b + c + if True: + raise RuntimeError + + if False: + ... + for i in range(10): + print(i) + continue + + return None + + +async def coroutine(arg): + "Single-line docstring. Multiline is harder to reformat." + async with some_connection() as conn: + await conn.do_what_i_mean('SELECT bobby, tables FROM xkcd', timeout=2) + await asyncio.sleep(1) + + +@asyncio.coroutine +@some_decorator(with_args=True, many_args=[1, 2, 3]) +def function_signature_stress_test( + number: int, + no_annotation=None, + text: str = "default", + *, + debug: bool = False, + **kwargs, +) -> str: + return text[number:-1] + + +def long_lines(): + if True: + typedargslist.extend( + gen_annotated_params( + ast_args.kwonlyargs, + ast_args.kw_defaults, + parameters, + implicit_default=True, + ) + ) + _type_comment_re = re.compile( + r""" + ^ + [\t ]* + \#[ ]type:[ ]* + (?P + [^#\t\n]+? + ) + (? to match + # a trailing space which is why we need the silliness below + (? + (?:\#[^\n]*)? + \n? + ) + $ + """, + re.MULTILINE | re.VERBOSE, + ) diff --git a/tests/import_spacing.py b/tests/import_spacing.py new file mode 100644 index 0000000..0597b62 --- /dev/null +++ b/tests/import_spacing.py @@ -0,0 +1,77 @@ +"""The asyncio package, tracking PEP 3156.""" + +# flake8: noqa + +import sys + +# This relies on each of the submodules having an __all__ variable. +from .base_events import * +from .coroutines import * +from .events import * # comment here + +from .futures import * +from .locks import * # comment here +from .protocols import * + +from .runners import * # comment here +from .queues import * +from .streams import * + +from .subprocess import * +from .tasks import * +from .transports import * + +__all__ = ( + base_events.__all__ + + coroutines.__all__ + + events.__all__ + + futures.__all__ + + locks.__all__ + + protocols.__all__ + + runners.__all__ + + queues.__all__ + + streams.__all__ + + subprocess.__all__ + + tasks.__all__ + + transports.__all__ +) + + +# output + + +"""The asyncio package, tracking PEP 3156.""" +# flake8: noqa +import sys + +# This relies on each of the submodules having an __all__ variable. +from .base_events import * +from .coroutines import * +from .events import * # comment here + +from .futures import * +from .locks import * # comment here +from .protocols import * + +from .runners import * # comment here +from .queues import * +from .streams import * + +from .subprocess import * +from .tasks import * +from .transports import * + +__all__ = ( + base_events.__all__ + + coroutines.__all__ + + events.__all__ + + futures.__all__ + + locks.__all__ + + protocols.__all__ + + runners.__all__ + + queues.__all__ + + streams.__all__ + + subprocess.__all__ + + tasks.__all__ + + transports.__all__ +) diff --git a/tests/test_black.py b/tests/test_black.py new file mode 100644 index 0000000..d9c0c5e --- /dev/null +++ b/tests/test_black.py @@ -0,0 +1,220 @@ +#!/usr/bin/env python3 +from functools import partial +from pathlib import Path +from typing import List, Tuple +import unittest +from unittest.mock import patch + +from click import unstyle + +import black + +ll = 88 +ff = partial(black.format_file, line_length=ll, fast=True) +fs = partial(black.format_str, line_length=ll) +THIS_FILE = Path(__file__) +THIS_DIR = THIS_FILE.parent + + +def dump_to_stderr(*output: str) -> str: + return '\n' + '\n'.join(output) + '\n' + + +def read_data(name: str) -> Tuple[str, str]: + """read_data('test_name') -> 'input', 'output'""" + if not name.endswith('.py'): + name += '.py' + _input: List[str] = [] + _output: List[str] = [] + with open(THIS_DIR / name, 'r', encoding='utf8') as test: + lines = test.readlines() + result = _input + for line in lines: + if line.rstrip() == '# output': + result = _output + continue + + result.append(line) + if _input and not _output: + # If there's no output marker, treat the entire file as already pre-formatted. + _output = _input[:] + return ''.join(_input).strip() + '\n', ''.join(_output).strip() + '\n' + + +class BlackTestCase(unittest.TestCase): + maxDiff = None + + def assertFormatEqual(self, expected: str, actual: str) -> None: + if actual != expected: + black.out('Expected tree:', fg='green') + try: + exp_node = black.lib2to3_parse(expected) + bdv = black.DebugVisitor() + list(bdv.visit(exp_node)) + except Exception as ve: + black.err(str(ve)) + black.out('Actual tree:', fg='red') + try: + exp_node = black.lib2to3_parse(actual) + bdv = black.DebugVisitor() + list(bdv.visit(exp_node)) + except Exception as ve: + black.err(str(ve)) + self.assertEqual(expected, actual) + + @patch("black.dump_to_file", dump_to_stderr) + def test_self(self) -> None: + source, expected = read_data('test_black') + actual = fs(source) + self.assertFormatEqual(expected, actual) + black.assert_equivalent(source, actual) + black.assert_stable(source, actual, line_length=ll) + with self.assertRaises(black.NothingChanged): + ff(THIS_FILE) + + @patch("black.dump_to_file", dump_to_stderr) + def test_black(self) -> None: + source, expected = read_data('../black') + actual = fs(source) + self.assertFormatEqual(expected, actual) + black.assert_equivalent(source, actual) + black.assert_stable(source, actual, line_length=ll) + with self.assertRaises(black.NothingChanged): + ff(THIS_FILE) + + @patch("black.dump_to_file", dump_to_stderr) + def test_setup(self) -> None: + source, expected = read_data('../setup') + actual = fs(source) + self.assertFormatEqual(expected, actual) + black.assert_equivalent(source, actual) + black.assert_stable(source, actual, line_length=ll) + with self.assertRaises(black.NothingChanged): + ff(THIS_FILE) + + @patch("black.dump_to_file", dump_to_stderr) + def test_function(self) -> None: + source, expected = read_data('function') + actual = fs(source) + self.assertFormatEqual(expected, actual) + black.assert_equivalent(source, actual) + black.assert_stable(source, actual, line_length=ll) + + @patch("black.dump_to_file", dump_to_stderr) + def test_expression(self) -> None: + source, expected = read_data('expression') + actual = fs(source) + self.assertFormatEqual(expected, actual) + black.assert_equivalent(source, actual) + black.assert_stable(source, actual, line_length=ll) + + @patch("black.dump_to_file", dump_to_stderr) + def test_comments(self) -> None: + source, expected = read_data('comments') + actual = fs(source) + self.assertFormatEqual(expected, actual) + black.assert_equivalent(source, actual) + black.assert_stable(source, actual, line_length=ll) + + @patch("black.dump_to_file", dump_to_stderr) + def test_comments2(self) -> None: + source, expected = read_data('comments2') + actual = fs(source) + self.assertFormatEqual(expected, actual) + black.assert_equivalent(source, actual) + black.assert_stable(source, actual, line_length=ll) + + @patch("black.dump_to_file", dump_to_stderr) + def test_cantfit(self) -> None: + source, expected = read_data('cantfit') + actual = fs(source) + self.assertFormatEqual(expected, actual) + black.assert_equivalent(source, actual) + black.assert_stable(source, actual, line_length=ll) + + @patch("black.dump_to_file", dump_to_stderr) + def test_import_spacing(self) -> None: + source, expected = read_data('import_spacing') + actual = fs(source) + self.assertFormatEqual(expected, actual) + black.assert_equivalent(source, actual) + black.assert_stable(source, actual, line_length=ll) + + @patch("black.dump_to_file", dump_to_stderr) + def test_composition(self) -> None: + source, expected = read_data('composition') + actual = fs(source) + self.assertFormatEqual(expected, actual) + black.assert_equivalent(source, actual) + black.assert_stable(source, actual, line_length=ll) + + def test_report(self) -> None: + report = black.Report() + out_lines = [] + err_lines = [] + + def out(msg: str, **kwargs): + out_lines.append(msg) + + def err(msg: str, **kwargs): + err_lines.append(msg) + + with patch("black.out", out), patch("black.err", err): + report.done(Path('f1'), changed=True) + self.assertEqual(len(out_lines), 1) + self.assertEqual(len(err_lines), 0) + self.assertEqual(out_lines[-1], 'reformatted f1') + self.assertEqual(unstyle(str(report)), '1 file reformatted.') + self.assertEqual(report.return_code, 0) + report.failed(Path('e1'), 'boom') + self.assertEqual(len(out_lines), 1) + self.assertEqual(len(err_lines), 1) + self.assertEqual(err_lines[-1], 'error: cannot format e1: boom') + self.assertEqual( + unstyle(str(report)), '1 file reformatted, 1 file failed to reformat.' + ) + self.assertEqual(report.return_code, 1) + report.done(Path('f2'), changed=False) + self.assertEqual(len(out_lines), 2) + self.assertEqual(len(err_lines), 1) + self.assertEqual(out_lines[-1], 'f2 already well formatted, good job.') + self.assertEqual( + unstyle(str(report)), + '1 file reformatted, 1 file left unchanged, ' + '1 file failed to reformat.', + ) + self.assertEqual(report.return_code, 1) + report.done(Path('f3'), changed=True) + self.assertEqual(len(out_lines), 3) + self.assertEqual(len(err_lines), 1) + self.assertEqual(out_lines[-1], 'reformatted f3') + self.assertEqual( + unstyle(str(report)), + '2 files reformatted, 1 file left unchanged, ' + '1 file failed to reformat.', + ) + self.assertEqual(report.return_code, 1) + report.failed(Path('e2'), 'boom') + self.assertEqual(len(out_lines), 3) + self.assertEqual(len(err_lines), 2) + self.assertEqual(err_lines[-1], 'error: cannot format e2: boom') + self.assertEqual( + unstyle(str(report)), + '2 files reformatted, 1 file left unchanged, ' + '2 files failed to reformat.', + ) + self.assertEqual(report.return_code, 1) + report.done(Path('f4'), changed=False) + self.assertEqual(len(out_lines), 4) + self.assertEqual(len(err_lines), 2) + self.assertEqual(out_lines[-1], 'f4 already well formatted, good job.') + self.assertEqual( + unstyle(str(report)), + '2 files reformatted, 2 files left unchanged, ' + '2 files failed to reformat.', + ) + self.assertEqual(report.return_code, 1) + + +if __name__ == '__main__': + unittest.main() -- 2.39.5