From: Łukasz Langa Date: Wed, 14 Mar 2018 19:55:32 +0000 (-0700) Subject: Initial commit X-Git-Url: https://git.madduck.net/etc/vim.git/commitdiff_plain/e74117f172e29e8a980e2c9de929ad50d3769150?ds=inline Initial commit --- e74117f172e29e8a980e2c9de929ad50d3769150 diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..cf36923 --- /dev/null +++ b/.flake8 @@ -0,0 +1,8 @@ +# This is an example .flake8 config, used when developing *Black* itself. +# Keep in sync with setup.cfg which is used for source packages. + +[flake8] +ignore = E266, E501 +max-line-length = 80 +max-complexity = 12 +select = B,C,E,F,W,T4,B9 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6350e98 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.coverage diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..e434c44 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,15 @@ +sudo: false +language: python +before_script: +- pip install -e . +# test script +script: python setup.py test +notifications: + on_success: change + on_failure: always +matrix: + include: + - python: 3.6 + - python: 3.6-dev + - python: 3.7-dev + - python: 3.8-dev diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..7a9b891 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2018 Łukasz Langa + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..9ae6851 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +include *.rst *.md LICENSE +recursive-include tests *.txt *.py diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..3c20aff --- /dev/null +++ b/Pipfile @@ -0,0 +1,17 @@ +[[source]] +url = "https://pypi.python.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +attrs = "*" +click = "*" + +[dev-packages] +coverage = "*" +flake8 = "*" +flake8-bugbear = "*" +flake8-mypy = "*" +mypy = "*" +pypandoc = "*" +twine = "*" diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..7c173f4 --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,243 @@ +{ + "_meta": { + "hash": { + "sha256": "e2dc877c2f32df83197fc3dc0f49e0a66d0d099aab106b99d64fdbe5b14cc91b" + }, + "host-environment-markers": { + "implementation_name": "cpython", + "implementation_version": "3.6.4", + "os_name": "posix", + "platform_machine": "x86_64", + "platform_python_implementation": "CPython", + "platform_release": "17.4.0", + "platform_system": "Darwin", + "platform_version": "Darwin Kernel Version 17.4.0: Sun Dec 17 09:19:54 PST 2017; root:xnu-4570.41.2~1/RELEASE_X86_64", + "python_full_version": "3.6.4", + "python_version": "3.6", + "sys_platform": "darwin" + }, + "pipfile-spec": 6, + "requires": {}, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.python.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "attrs": { + "hashes": [ + "sha256:a17a9573a6f475c99b551c0e0a812707ddda1ec9653bed04c13841404ed6f450", + "sha256:1c7960ccfd6a005cd9f7ba884e6316b5e430a3f1a6c37c5f87d8b43f83b54ec9" + ], + "version": "==17.4.0" + }, + "click": { + "hashes": [ + "sha256:29f99fc6125fbc931b758dc053b3114e55c77a6e4c6c3a2674a2dc986016381d", + "sha256:f15516df478d5a56180fbf80e68f206010e6d160fc39fa508b65e035fd75130b" + ], + "version": "==6.7" + } + }, + "develop": { + "attrs": { + "hashes": [ + "sha256:a17a9573a6f475c99b551c0e0a812707ddda1ec9653bed04c13841404ed6f450", + "sha256:1c7960ccfd6a005cd9f7ba884e6316b5e430a3f1a6c37c5f87d8b43f83b54ec9" + ], + "version": "==17.4.0" + }, + "certifi": { + "hashes": [ + "sha256:14131608ad2fd56836d33a71ee60fa1c82bc9d2c8d98b7bdbc631fe1b3cd1296", + "sha256:edbc3f203427eef571f79a7692bb160a2b0f7ccaa31953e99bd17e307cf63f7d" + ], + "version": "==2018.1.18" + }, + "chardet": { + "hashes": [ + "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691", + "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae" + ], + "version": "==3.0.4" + }, + "coverage": { + "hashes": [ + "sha256:7608a3dd5d73cb06c531b8925e0ef8d3de31fed2544a7de6c63960a1e73ea4bc", + "sha256:3a2184c6d797a125dca8367878d3b9a178b6fdd05fdc2d35d758c3006a1cd694", + "sha256:f3f501f345f24383c0000395b26b726e46758b71393267aeae0bd36f8b3ade80", + "sha256:0b136648de27201056c1869a6c0d4e23f464750fd9a9ba9750b8336a244429ed", + "sha256:337ded681dd2ef9ca04ef5d93cfc87e52e09db2594c296b4a0a3662cb1b41249", + "sha256:3eb42bf89a6be7deb64116dd1cc4b08171734d721e7a7e57ad64cc4ef29ed2f1", + "sha256:be6cfcd8053d13f5f5eeb284aa8a814220c3da1b0078fa859011c7fffd86dab9", + "sha256:69bf008a06b76619d3c3f3b1983f5145c75a305a0fea513aca094cae5c40a8f5", + "sha256:2eb564bbf7816a9d68dd3369a510be3327f1c618d2357fa6b1216994c2e3d508", + "sha256:9d6dd10d49e01571bf6e147d3b505141ffc093a06756c60b053a859cb2128b1f", + "sha256:701cd6093d63e6b8ad7009d8a92425428bc4d6e7ab8d75efbb665c806c1d79ba", + "sha256:5a13ea7911ff5e1796b6d5e4fbbf6952381a611209b736d48e675c2756f3f74e", + "sha256:c1bb572fab8208c400adaf06a8133ac0712179a334c09224fb11393e920abcdd", + "sha256:03481e81d558d30d230bc12999e3edffe392d244349a90f4ef9b88425fac74ba", + "sha256:28b2191e7283f4f3568962e373b47ef7f0392993bb6660d079c62bd50fe9d162", + "sha256:de4418dadaa1c01d497e539210cb6baa015965526ff5afc078c57ca69160108d", + "sha256:8c3cb8c35ec4d9506979b4cf90ee9918bc2e49f84189d9bf5c36c0c1119c6558", + "sha256:7e1fe19bd6dce69d9fd159d8e4a80a8f52101380d5d3a4d374b6d3eae0e5de9c", + "sha256:6bc583dc18d5979dc0f6cec26a8603129de0304d5ae1f17e57a12834e7235062", + "sha256:198626739a79b09fa0a2f06e083ffd12eb55449b5f8bfdbeed1df4910b2ca640", + "sha256:7aa36d2b844a3e4a4b356708d79fd2c260281a7390d678a10b91ca595ddc9e99", + "sha256:3d72c20bd105022d29b14a7d628462ebdc61de2f303322c0212a054352f3b287", + "sha256:4635a184d0bbe537aa185a34193898eee409332a8ccb27eea36f262566585000", + "sha256:e05cb4d9aad6233d67e0541caa7e511fa4047ed7750ec2510d466e806e0255d6", + "sha256:76ecd006d1d8f739430ec50cc872889af1f9c1b6b8f48e29941814b09b0fd3cc", + "sha256:7d3f553904b0c5c016d1dad058a7554c7ac4c91a789fca496e7d8347ad040653", + "sha256:3c79a6f7b95751cdebcd9037e4d06f8d5a9b60e4ed0cd231342aa8ad7124882a", + "sha256:56e448f051a201c5ebbaa86a5efd0ca90d327204d8b059ab25ad0f35fbfd79f1", + "sha256:ac4fef68da01116a5c117eba4dd46f2e06847a497de5ed1d64bb99a5fda1ef91", + "sha256:1c383d2ef13ade2acc636556fd544dba6e14fa30755f26812f54300e401f98f2", + "sha256:b8815995e050764c8610dbc82641807d196927c3dbed207f0a079833ffcf588d", + "sha256:104ab3934abaf5be871a583541e8829d6c19ce7bde2923b2751e0d3ca44db60a", + "sha256:9e112fcbe0148a6fa4f0a02e8d58e94470fc6cb82a5481618fea901699bf34c4", + "sha256:15b111b6a0f46ee1a485414a52a7ad1d703bdf984e9ed3c288a4414d3871dcbd", + "sha256:e4d96c07229f58cb686120f168276e434660e4358cc9cf3b0464210b04913e77", + "sha256:f8a923a85cb099422ad5a2e345fe877bbc89a8a8b23235824a93488150e45f6e" + ], + "version": "==4.5.1" + }, + "flake8": { + "hashes": [ + "sha256:c7841163e2b576d435799169b78703ad6ac1bbb0f199994fc05f700b2a90ea37", + "sha256:7253265f7abd8b313e3892944044a365e3f4ac3fcdcfb4298f55ee9ddf188ba0" + ], + "version": "==3.5.0" + }, + "flake8-bugbear": { + "hashes": [ + "sha256:541746f0f3b2f1a8d7278e1d2d218df298996b60b02677708560db7c7e620e3b", + "sha256:5f14a99d458e29cb92be9079c970030e0dd398b2decb179d76d39a5266ea1578" + ], + "version": "==18.2.0" + }, + "flake8-mypy": { + "hashes": [ + "sha256:cff009f4250e8391bf48990093cff85802778c345c8449d6498b62efefeebcbc", + "sha256:47120db63aff631ee1f84bac6fe8e64731dc66da3efc1c51f85e15ade4a3ba18" + ], + "version": "==17.8.0" + }, + "idna": { + "hashes": [ + "sha256:8c7309c718f94b3a625cb648ace320157ad16ff131ae0af362c9f21b80ef6ec4", + "sha256:2c6a5de3089009e3da7c5dde64a141dbc8551d5b7f6cf4ed7c2568d0cc520a8f" + ], + "version": "==2.6" + }, + "mccabe": { + "hashes": [ + "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42", + "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f" + ], + "version": "==0.6.1" + }, + "mypy": { + "hashes": [ + "sha256:884f18f3a40cfcf24cdd5860b84958cfb35e6563e439c5adc1503878df221dc3", + "sha256:83d798f66323f2de6191d66d9ae5ab234e4ee5b400010e19c58d75d308049f25" + ], + "version": "==0.570" + }, + "pkginfo": { + "hashes": [ + "sha256:31a49103180ae1518b65d3f4ce09c784e2bc54e338197668b4fb7dc539521024", + "sha256:bb1a6aeabfc898f5df124e7e00303a5b3ec9a489535f346bfbddb081af93f89e" + ], + "version": "==1.4.1" + }, + "pycodestyle": { + "hashes": [ + "sha256:6c4245ade1edfad79c3446fadfc96b0de2759662dc29d07d80a6f27ad1ca6ba9", + "sha256:682256a5b318149ca0d2a9185d365d8864a768a28db66a84a2ea946bcc426766" + ], + "version": "==2.3.1" + }, + "pyflakes": { + "hashes": [ + "sha256:08bd6a50edf8cffa9fa09a463063c425ecaaf10d1eb0335a7e8b1401aef89e6f", + "sha256:8d616a382f243dbf19b54743f280b80198be0bca3a5396f1d2e1fca6223e8805" + ], + "version": "==1.6.0" + }, + "pypandoc": { + "hashes": [ + "sha256:e914e6d5f84a76764887e4d909b09d63308725f0cbb5293872c2c92f07c11a5b" + ], + "version": "==1.4" + }, + "requests": { + "hashes": [ + "sha256:6a1b267aa90cac58ac3a765d067950e7dbbf75b1da07e895d1f594193a40a38b", + "sha256:9c443e7324ba5b85070c4a818ade28bfabedf16ea10206da1132edaa6dda237e" + ], + "version": "==2.18.4" + }, + "requests-toolbelt": { + "hashes": [ + "sha256:42c9c170abc2cacb78b8ab23ac957945c7716249206f90874651971a4acff237", + "sha256:f6a531936c6fa4c6cfce1b9c10d5c4f498d16528d2a54a22ca00011205a187b5" + ], + "version": "==0.8.0" + }, + "tqdm": { + "hashes": [ + "sha256:f66468c14ccd011a627734c9b3fd72f20ce16f8faecc47384eb2507af5924fb9", + "sha256:5ec0d4442358e55cdb4a0471d04c6c831518fd8837f259db5537d90feab380df" + ], + "version": "==4.19.6" + }, + "twine": { + "hashes": [ + "sha256:d3ce5c480c22ccfb761cd358526e862b32546d2fe4bc93d46b5cf04ea3cc46ca", + "sha256:caa45b7987fc96321258cd7668e3be2ff34064f5c66d2d975b641adca659c1ab" + ], + "version": "==1.9.1" + }, + "typed-ast": { + "hashes": [ + "sha256:0948004fa228ae071054f5208840a1e88747a357ec1101c17217bfe99b299d58", + "sha256:25d8feefe27eb0303b73545416b13d108c6067b846b543738a25ff304824ed9a", + "sha256:c05b41bc1deade9f90ddc5d988fe506208019ebba9f2578c622516fd201f5863", + "sha256:519425deca5c2b2bdac49f77b2c5625781abbaf9a809d727d3a5596b30bb4ded", + "sha256:6de012d2b166fe7a4cdf505eee3aaa12192f7ba365beeefaca4ec10e31241a85", + "sha256:79b91ebe5a28d349b6d0d323023350133e927b4de5b651a8aa2db69c761420c6", + "sha256:a8034021801bc0440f2e027c354b4eafd95891b573e12ff0418dec385c76785c", + "sha256:f19f2a4f547505fe9072e15f6f4ae714af51b5a681a97f187971f50c283193b6", + "sha256:c9b060bd1e5a26ab6e8267fd46fc9e02b54eb15fffb16d112d4c7b1c12987559", + "sha256:2e214b72168ea0275efd6c884b114ab42e316de3ffa125b267e732ed2abda892", + "sha256:bc978ac17468fe868ee589c795d06777f75496b1ed576d308002c8a5756fb9ea", + "sha256:edb04bdd45bfd76c8292c4d9654568efaedf76fe78eb246dde69bdb13b2dad87", + "sha256:668d0cec391d9aed1c6a388b0d5b97cd22e6073eaa5fbaa6d2946603b4871efe", + "sha256:29464a177d56e4e055b5f7b629935af7f49c196be47528cc94e0a7bf83fbc2b9", + "sha256:8550177fa5d4c1f09b5e5f524411c44633c80ec69b24e0e98906dd761941ca46", + "sha256:3e0d5e48e3a23e9a4d1a9f698e32a542a4a288c871d33ed8df1b092a40f3a0f9", + "sha256:68ba70684990f59497680ff90d18e756a47bf4863c604098f10de9716b2c0bdd", + "sha256:57fe287f0cdd9ceaf69e7b71a2e94a24b5d268b35df251a88fef5cc241bf73aa" + ], + "version": "==1.1.0" + }, + "urllib3": { + "hashes": [ + "sha256:06330f386d6e4b195fbfc736b297f58c5a892e4440e54d294d7004e3a9bbea1b", + "sha256:cc44da8e1145637334317feebd728bd869a35285b93cbb4cca2577da7e62db4f" + ], + "version": "==1.22" + }, + "wheel": { + "hashes": [ + "sha256:e721e53864f084f956f40f96124a74da0631ac13fbbd1ba99e8e2b5e9cafdf64", + "sha256:9515fe0a94e823fd90b08d22de45d7bde57c90edce705b22f5e1ecf7e1b653c8" + ], + "version": "==0.30.0" + } + } +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..d1724ca --- /dev/null +++ b/README.md @@ -0,0 +1,260 @@ +# black + +[![Build Status](https://travis-ci.org/ambv/black.svg?branch=master)](https://travis-ci.org/ambv/black) + +> Any color you like. + + +*Black* is the uncompromising Python code formatter. By using it, you +agree to cease control over minutiae of hand-formatting. In return, +*Black* gives you speed, determinism, and freedom from `pycodestyle` +nagging about formatting. You will save time and mental energy for +more important matters. + +Blackened code looks the same regardless of the project you're reading. +Formatting becomes transparent after a while and you can focus on the +content instead. + +*Black* makes code review faster by producing the smallest diffs +possible. + + +## NOTE: This is an early pre-release + +*Black* can already successfully format itself and the standard library. +It also sports a decent test suite. However, it is still very new. +Things will probably be wonky for a while. This is made explicit by the +"Alpha" trove classifier, as well as by the "a" in the version number. +What this means for you is that **until the formatter becomes stable, +you should expect some formatting to change in the future**. + +Also, as a temporary safety measure, *Black* will check that the +reformatted code still produces a valid AST that is equivalent to the +original. This slows it down. If you're feeling confident, use +``--fast``. + + +## Usage + +*Black* can be installed by running `pip install black`. + +``` +black [OPTIONS] [SRC]... + +Options: + -l, --line-length INTEGER Where to wrap around. [default: 88] + --fast / --safe If --fast given, skip temporary sanity checks. + [default: --safe] + --version Show the version and exit. + --help Show this message and exit. +``` + + +## The philosophy behind *Black* + +*Black* reformats entire files in place. It is not configurable. It +doesn't take previous formatting into account. It doesn't reformat +blocks that start with `# fmt: off` and end with `# fmt: on`. It also +recognizes [YAPF](https://github.com/google/yapf)'s block comments to +the same effect, as a courtesy for straddling code. + + +### How *Black* formats files + +*Black* ignores previous formatting and applies uniform horizontal +and vertical whitespace to your code. The rules for horizontal +whitespace are pretty obvious and can be summarized as: do whatever +makes `pycodestyle` happy. + +As for vertical whitespace, *Black* tries to render one full expression +or simple statement per line. If this fits the allotted line length, +great. +```!py3 +# in: +l = [1, + 2, + 3, +] + +# out: +l = [1, 2, 3] +``` + +If not, *Black* will look at the contents of the first outer matching +brackets and put that in a separate indented line. +```!py3 +# in: +l = [[n for n in list_bosses()], [n for n in list_employees()]] + +# out: +l = [ + [n for n in list_bosses()], [n for n in list_employees()] +] +``` + +If that still doesn't fit the bill, it will decompose the internal +expression further using the same rule, indenting matching brackets +every time. If the contents of the matching brackets pair are +comma-separated (like an argument list, or a dict literal, and so on) +then *Black* will first try to keep them on the same line with the +matching brackets. If that doesn't work, it will put all of them in +separate lines. +```!py3 +# in: +def very_important_function(template: str, *variables, *, file: os.PathLike, debug: bool = False): + """Applies `variables` to the `template` and writes to `file`.""" + with open(file, 'w') as f: + ... + +# out: +def very_important_function( + template: str, + *variables, + *, + file: os.PathLike, + debug: bool = False, +): + """Applies `variables` to the `template` and writes to `file`.""" + with open(file, 'w') as f: + ... +``` + +You might have noticed that closing brackets are always dedented and +that a trailing comma is always added. Such formatting produces smaller +diffs; when you add or remove an element, it's always just one line. +Also, having the closing bracket dedented provides a clear delimiter +between two distinct sections of the code that otherwise share the same +indentation level (like the arguments list and the docstring in the +example above). + +Unnecessary trailing commas are removed if an expression fits in one +line. This makes it 1% more likely that your line won't exceed the +allotted line length limit. + +*Black* avoids spurious vertical whitespace. This is in the spirit of +PEP 8 which says that in-function vertical whitespace should only be +used sparingly. One exception is control flow statements: *Black* will +always emit an extra empty line after ``return``, ``raise``, ``break``, +``continue``, and ``yield``. This is to make changes in control flow +more prominent to readers of your code. + +That's it. The rest of the whitespace formatting rules follow PEP 8 and +are designed to keep `pycodestyle` quiet. + + +### Line length + +You probably noticed the peculiar default line length. *Black* defaults +to 88 characters per line, which happens to be 10% over 80. This number +was found to produce significantly shorter files than sticking with 80 +(the most popular), or even 79 (used by the standard library). In +general, [90-ish seems like the wise choice](https://youtu.be/wf-BqAjZb8M?t=260). + +If you're paid by the line of code you write, you can pass +`--line-length` with a lower number. *Black* will try to respect that. +However, sometimes it won't be able to without breaking other rules. In +those rare cases, auto-formatted code will exceed your allotted limit. + +You can also increase it, but remember that people with sight disabilities +find it harder to work with line lengths exceeding 100 characters. +It also adversely affects side-by-side diff review on typical screen +resolutions. Long lines also make it harder to present code neatly +in documentation or talk slides. + +If you're using Flake8, you can bump `max-line-length` to 88 and forget +about it. Alternatively, use [Bugbear](https://github.com/PyCQA/flake8-bugbear)'s +B950 warning instead of E501 and keep the max line length at 80 which +you are probably already using. You'd do it like this: +```!ini +[flake8] +max-line-length = 80 +... +select = C,E,F,W,B,B950 +ignore = E501 +``` + +You'll find *Black*'s own .flake8 config file is configured like this. +If you're curious about the reasoning behind B950, Bugbear's documentation +explains it. The tl;dr is "it's like highway speed limits, we won't +bother you if you overdo it by a few km/h". + + +### Editor integration + +There is currently no integration with any text editors. Vim and +Atom/Nuclide integration is planned by the author, others will require +external contributions. + +Patches welcome! ✨ 🍰 ✨ + + +## Testimonials + +**Dusty Phillips**, [writer](https://smile.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=dusty+phillips): + +> Black is opinionated so you don't have to be. + +**Hynek Schlawack**, [creator of `attrs`](http://www.attrs.org/), core +developer of Twisted and CPython: + +> An auto-formatter that doesn't suck is all I want for Xmas! + +**Carl Meyer**, [Django](https://www.djangoproject.com/) core developer: + +> At least the name is good. + + +## Tests + +Just run: + +``` +python setup.py test +``` + +## This tool requires Python 3.6.0+ to run + +But you can reformat Python 2 code with it, too. *Black* is able to parse +all of the new syntax supported on Python 3.6 but also *effectively all* +the Python 2 syntax at the same time, as long as you're not using print +statements. + +By making the code exclusively Python 3.6+, I'm able to focus on the +quality of the formatting and re-use all the nice features of the new +releases (check out [pathlib](docs.python.org/3/library/pathlib.html) or +f-strings) instead of wasting cycles on Unicode compatibility, and so on. + + +## License + +MIT + + +## Contributing + +In terms of inspiration, *Black* is about as configurable as *gofmt* and +*rustfmt* are. This is deliberate. + +Bug reports and fixes are always welcome! However, before you suggest a +new feature or configuration knob, ask yourself why you want it. If it +enables better integration with some workflow, fixes an inconsistency, +speeds things up, and so on - go for it! On the other hand, if your +answer is "because I don't like a particular formatting" then you're not +ready to embrace *Black* yet. Such changes are unlikely to get accepted. +You can still try but prepare to be disappointed. + + +## Change Log + +### 18.3a0 + +* first published version, Happy 🍰 Day 2018! + +* alpha quality + +* date-versioned (see: http://calver.org/) + + +## Authors + +Glued together by [Łukasz Langa](mailto:lukasz@langa.pl). diff --git a/black.py b/black.py new file mode 100644 index 0000000..24c57ca --- /dev/null +++ b/black.py @@ -0,0 +1,1478 @@ +#!/usr/bin/env python3 +import asyncio +from asyncio.base_events import BaseEventLoop +from concurrent.futures import Executor, ProcessPoolExecutor +from functools import partial +import keyword +import os +from pathlib import Path +import tokenize +from typing import ( + Dict, Generic, Iterable, Iterator, List, Optional, Set, Tuple, TypeVar, Union +) + +from attr import attrib, dataclass, Factory +import click + +# lib2to3 fork +from blib2to3.pytree import Node, Leaf, type_repr +from blib2to3 import pygram, pytree +from blib2to3.pgen2 import driver, token +from blib2to3.pgen2.parse import ParseError + +__version__ = "18.3a0" +DEFAULT_LINE_LENGTH = 88 +# types +syms = pygram.python_symbols +FileContent = str +Encoding = str +Depth = int +NodeType = int +LeafID = int +Priority = int +LN = Union[Leaf, Node] +out = partial(click.secho, bold=True, err=True) +err = partial(click.secho, fg='red', err=True) + + +class NothingChanged(UserWarning): + """Raised by `format_file` when the reformatted code is the same as source.""" + + +class CannotSplit(Exception): + """A readable split that fits the allotted line length is impossible. + + Raised by `left_hand_split()` and `right_hand_split()`. + """ + + +@click.command() +@click.option( + '-l', + '--line-length', + type=int, + default=DEFAULT_LINE_LENGTH, + help='How many character per line to allow.', + show_default=True, +) +@click.option( + '--fast/--safe', + is_flag=True, + help='If --fast given, skip temporary sanity checks. [default: --safe]', +) +@click.version_option(version=__version__) +@click.argument( + 'src', + nargs=-1, + type=click.Path(exists=True, file_okay=True, dir_okay=True, readable=True), +) +@click.pass_context +def main(ctx: click.Context, line_length: int, fast: bool, src: List[str]) -> None: + """The uncompromising code formatter.""" + sources: List[Path] = [] + for s in src: + p = Path(s) + if p.is_dir(): + sources.extend(gen_python_files_in_dir(p)) + elif p.is_file(): + # if a file was explicitly given, we don't care about its extension + sources.append(p) + else: + err(f'invalid path: {s}') + if len(sources) == 0: + ctx.exit(0) + elif len(sources) == 1: + p = sources[0] + report = Report() + try: + changed = format_file_in_place(p, line_length=line_length, fast=fast) + report.done(p, changed) + except Exception as exc: + report.failed(p, str(exc)) + ctx.exit(report.return_code) + else: + loop = asyncio.get_event_loop() + executor = ProcessPoolExecutor(max_workers=os.cpu_count()) + return_code = 1 + try: + return_code = loop.run_until_complete( + schedule_formatting(sources, line_length, fast, loop, executor) + ) + finally: + loop.close() + ctx.exit(return_code) + + +async def schedule_formatting( + sources: List[Path], + line_length: int, + fast: bool, + loop: BaseEventLoop, + executor: Executor, +) -> int: + tasks = { + src: loop.run_in_executor( + executor, format_file_in_place, src, line_length, fast + ) + for src in sources + } + await asyncio.wait(tasks.values()) + cancelled = [] + report = Report() + for src, task in tasks.items(): + if not task.done(): + report.failed(src, 'timed out, cancelling') + task.cancel() + cancelled.append(task) + elif task.exception(): + report.failed(src, str(task.exception())) + else: + report.done(src, task.result()) + if cancelled: + await asyncio.wait(cancelled, timeout=2) + out('All done! ✨ 🍰 ✨') + click.echo(str(report)) + return report.return_code + + +def format_file_in_place(src: Path, line_length: int, fast: bool) -> bool: + """Format the file and rewrite if changed. Return True if changed.""" + try: + contents, encoding = format_file(src, line_length=line_length, fast=fast) + except NothingChanged: + return False + + with open(src, "w", encoding=encoding) as f: + f.write(contents) + return True + + +def format_file( + src: Path, line_length: int, fast: bool +) -> Tuple[FileContent, Encoding]: + """Reformats a file and returns its contents and encoding.""" + with tokenize.open(src) as src_buffer: + src_contents = src_buffer.read() + if src_contents.strip() == '': + raise NothingChanged(src) + + dst_contents = format_str(src_contents, line_length=line_length) + if src_contents == dst_contents: + raise NothingChanged(src) + + if not fast: + assert_equivalent(src_contents, dst_contents) + assert_stable(src_contents, dst_contents, line_length=line_length) + return dst_contents, src_buffer.encoding + + +def format_str(src_contents: str, line_length: int) -> FileContent: + """Reformats a string and returns new contents.""" + src_node = lib2to3_parse(src_contents) + dst_contents = "" + comments: List[Line] = [] + lines = LineGenerator() + elt = EmptyLineTracker() + empty_line = Line() + after = 0 + for current_line in lines.visit(src_node): + for _ in range(after): + dst_contents += str(empty_line) + before, after = elt.maybe_empty_lines(current_line) + for _ in range(before): + dst_contents += str(empty_line) + if not current_line.is_comment: + for comment in comments: + dst_contents += str(comment) + comments = [] + for line in split_line(current_line, line_length=line_length): + dst_contents += str(line) + else: + comments.append(current_line) + for comment in comments: + dst_contents += str(comment) + return dst_contents + + +def lib2to3_parse(src_txt: str) -> Node: + """Given a string with source, return the lib2to3 Node.""" + grammar = pygram.python_grammar_no_print_statement + drv = driver.Driver(grammar, pytree.convert) + if src_txt[-1] != '\n': + nl = '\r\n' if '\r\n' in src_txt[:1024] else '\n' + src_txt += nl + try: + result = drv.parse_string(src_txt, True) + except ParseError as pe: + lineno, column = pe.context[1] + lines = src_txt.splitlines() + try: + faulty_line = lines[lineno - 1] + except IndexError: + faulty_line = "" + raise ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}") from None + + if isinstance(result, Leaf): + result = Node(syms.file_input, [result]) + return result + + +def lib2to3_unparse(node: Node) -> str: + """Given a lib2to3 node, return its string representation.""" + code = str(node) + return code + + +T = TypeVar('T') + + +class Visitor(Generic[T]): + """Basic lib2to3 visitor that yields things on visiting.""" + + def visit(self, node: LN) -> Iterator[T]: + if node.type < 256: + name = token.tok_name[node.type] + else: + name = type_repr(node.type) + yield from getattr(self, f'visit_{name}', self.visit_default)(node) + + def visit_default(self, node: LN) -> Iterator[T]: + if isinstance(node, Node): + for child in node.children: + yield from self.visit(child) + + +@dataclass +class DebugVisitor(Visitor[T]): + tree_depth: int = attrib(default=0) + + def visit_default(self, node: LN) -> Iterator[T]: + indent = ' ' * (2 * self.tree_depth) + if isinstance(node, Node): + _type = type_repr(node.type) + out(f'{indent}{_type}', fg='yellow') + self.tree_depth += 1 + for child in node.children: + yield from self.visit(child) + + self.tree_depth -= 1 + out(f'{indent}/{_type}', fg='yellow', bold=False) + else: + _type = token.tok_name.get(node.type, str(node.type)) + out(f'{indent}{_type}', fg='blue', nl=False) + if node.prefix: + # We don't have to handle prefixes for `Node` objects since + # that delegates to the first child anyway. + out(f' {node.prefix!r}', fg='green', bold=False, nl=False) + out(f' {node.value!r}', fg='blue', bold=False) + + +KEYWORDS = set(keyword.kwlist) +WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE} +FLOW_CONTROL = {'return', 'raise', 'break', 'continue'} +STATEMENT = { + syms.if_stmt, + syms.while_stmt, + syms.for_stmt, + syms.try_stmt, + syms.except_clause, + syms.with_stmt, + syms.funcdef, + syms.classdef, +} +STANDALONE_COMMENT = 153 +LOGIC_OPERATORS = {'and', 'or'} +COMPARATORS = { + token.LESS, + token.GREATER, + token.EQEQUAL, + token.NOTEQUAL, + token.LESSEQUAL, + token.GREATEREQUAL, +} +MATH_OPERATORS = { + token.PLUS, + token.MINUS, + token.STAR, + token.SLASH, + token.VBAR, + token.AMPER, + token.PERCENT, + token.CIRCUMFLEX, + token.LEFTSHIFT, + token.RIGHTSHIFT, + token.DOUBLESTAR, + token.DOUBLESLASH, +} +COMPREHENSION_PRIORITY = 20 +COMMA_PRIORITY = 10 +LOGIC_PRIORITY = 5 +STRING_PRIORITY = 4 +COMPARATOR_PRIORITY = 3 +MATH_PRIORITY = 1 + + +@dataclass +class BracketTracker: + depth: int = attrib(default=0) + bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = attrib(default=Factory(dict)) + delimiters: Dict[LeafID, Priority] = attrib(default=Factory(dict)) + previous: Optional[Leaf] = attrib(default=None) + + def mark(self, leaf: Leaf) -> None: + if leaf.type == token.COMMENT: + return + + if leaf.type in CLOSING_BRACKETS: + self.depth -= 1 + opening_bracket = self.bracket_match.pop((self.depth, leaf.type)) + leaf.opening_bracket = opening_bracket # type: ignore + leaf.bracket_depth = self.depth # type: ignore + if self.depth == 0: + delim = is_delimiter(leaf) + if delim: + self.delimiters[id(leaf)] = delim + elif self.previous is not None: + if leaf.type == token.STRING and self.previous.type == token.STRING: + self.delimiters[id(self.previous)] = STRING_PRIORITY + elif ( + leaf.type == token.NAME and + leaf.value == 'for' and + leaf.parent and + leaf.parent.type in {syms.comp_for, syms.old_comp_for} + ): + self.delimiters[id(self.previous)] = COMPREHENSION_PRIORITY + elif ( + leaf.type == token.NAME and + leaf.value == 'if' and + leaf.parent and + leaf.parent.type in {syms.comp_if, syms.old_comp_if} + ): + self.delimiters[id(self.previous)] = COMPREHENSION_PRIORITY + if leaf.type in OPENING_BRACKETS: + self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf + self.depth += 1 + self.previous = leaf + + def any_open_brackets(self) -> bool: + """Returns True if there is an yet unmatched open bracket on the line.""" + return bool(self.bracket_match) + + def max_priority(self, exclude: Iterable[LeafID] = ()) -> int: + """Returns the highest priority of a delimiter found on the line. + + Values are consistent with what `is_delimiter()` returns. + """ + return max(v for k, v in self.delimiters.items() if k not in exclude) + + +@dataclass +class Line: + depth: int = attrib(default=0) + leaves: List[Leaf] = attrib(default=Factory(list)) + comments: Dict[LeafID, Leaf] = attrib(default=Factory(dict)) + bracket_tracker: BracketTracker = attrib(default=Factory(BracketTracker)) + inside_brackets: bool = attrib(default=False) + + def append(self, leaf: Leaf, preformatted: bool = False) -> None: + has_value = leaf.value.strip() + if not has_value: + return + + if self.leaves and not preformatted: + # Note: at this point leaf.prefix should be empty except for + # imports, for which we only preserve newlines. + leaf.prefix += whitespace(leaf) + if self.inside_brackets or not preformatted: + self.bracket_tracker.mark(leaf) + self.maybe_remove_trailing_comma(leaf) + if self.maybe_adapt_standalone_comment(leaf): + return + + if not self.append_comment(leaf): + self.leaves.append(leaf) + + @property + def is_comment(self) -> bool: + return bool(self) and self.leaves[0].type == STANDALONE_COMMENT + + @property + def is_decorator(self) -> bool: + return bool(self) and self.leaves[0].type == token.AT + + @property + def is_import(self) -> bool: + return bool(self) and is_import(self.leaves[0]) + + @property + def is_class(self) -> bool: + return ( + bool(self) and + self.leaves[0].type == token.NAME and + self.leaves[0].value == 'class' + ) + + @property + def is_def(self) -> bool: + """Also returns True for async defs.""" + try: + first_leaf = self.leaves[0] + except IndexError: + return False + + try: + second_leaf: Optional[Leaf] = self.leaves[1] + except IndexError: + second_leaf = None + return ( + (first_leaf.type == token.NAME and first_leaf.value == 'def') or + ( + first_leaf.type == token.NAME and + first_leaf.value == 'async' and + second_leaf is not None and + second_leaf.type == token.NAME and + second_leaf.value == 'def' + ) + ) + + @property + def is_flow_control(self) -> bool: + return ( + bool(self) and + self.leaves[0].type == token.NAME and + self.leaves[0].value in FLOW_CONTROL + ) + + @property + def is_yield(self) -> bool: + return ( + bool(self) and + self.leaves[0].type == token.NAME and + self.leaves[0].value == 'yield' + ) + + def maybe_remove_trailing_comma(self, closing: Leaf) -> bool: + if not ( + self.leaves and + self.leaves[-1].type == token.COMMA and + closing.type in CLOSING_BRACKETS + ): + return False + + if closing.type == token.RSQB or closing.type == token.RBRACE: + self.leaves.pop() + return True + + # For parens let's check if it's safe to remove the comma. If the + # trailing one is the only one, we might mistakenly change a tuple + # into a different type by removing the comma. + depth = closing.bracket_depth + 1 # type: ignore + commas = 0 + opening = closing.opening_bracket # type: ignore + for _opening_index, leaf in enumerate(self.leaves): + if leaf is opening: + break + + else: + return False + + for leaf in self.leaves[_opening_index + 1:]: + if leaf is closing: + break + + bracket_depth = leaf.bracket_depth # type: ignore + if bracket_depth == depth and leaf.type == token.COMMA: + commas += 1 + if commas > 1: + self.leaves.pop() + return True + + return False + + def maybe_adapt_standalone_comment(self, comment: Leaf) -> bool: + """Hack a standalone comment to act as a trailing comment for line splitting. + + If this line has brackets and a standalone `comment`, we need to adapt + it to be able to still reformat the line. + + This is not perfect, the line to which the standalone comment gets + appended will appear "too long" when splitting. + """ + if not ( + comment.type == STANDALONE_COMMENT and + self.bracket_tracker.any_open_brackets() + ): + return False + + comment.type = token.COMMENT + comment.prefix = '\n' + ' ' * (self.depth + 1) + return self.append_comment(comment) + + def append_comment(self, comment: Leaf) -> bool: + if comment.type != token.COMMENT: + return False + + try: + after = id(self.last_non_delimiter()) + except LookupError: + comment.type = STANDALONE_COMMENT + comment.prefix = '' + return False + + else: + if after in self.comments: + self.comments[after].value += str(comment) + else: + self.comments[after] = comment + return True + + def last_non_delimiter(self) -> Leaf: + for i in range(len(self.leaves)): + last = self.leaves[-i - 1] + if not is_delimiter(last): + return last + + raise LookupError("No non-delimiters found") + + def __str__(self) -> str: + if not self: + return '\n' + + indent = ' ' * self.depth + leaves = iter(self.leaves) + first = next(leaves) + res = f'{first.prefix}{indent}{first.value}' + for leaf in leaves: + res += str(leaf) + for comment in self.comments.values(): + res += str(comment) + return res + '\n' + + def __bool__(self) -> bool: + return bool(self.leaves or self.comments) + + +@dataclass +class EmptyLineTracker: + """Provides a stateful method that returns the number of potential extra + empty lines needed before and after the currently processed line. + + Note: this tracker works on lines that haven't been split yet. + """ + previous_line: Optional[Line] = attrib(default=None) + previous_after: int = attrib(default=0) + previous_defs: List[int] = attrib(default=Factory(list)) + + def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]: + """Returns the number of extra empty lines before and after the `current_line`. + + This is for separating `def`, `async def` and `class` with extra empty lines + (two on module-level), as well as providing an extra empty line after flow + control keywords to make them more prominent. + """ + before, after = self._maybe_empty_lines(current_line) + self.previous_after = after + self.previous_line = current_line + return before, after + + def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]: + before = 0 + depth = current_line.depth + while self.previous_defs and self.previous_defs[-1] >= depth: + self.previous_defs.pop() + before = (1 if depth else 2) - self.previous_after + is_decorator = current_line.is_decorator + if is_decorator or current_line.is_def or current_line.is_class: + if not is_decorator: + self.previous_defs.append(depth) + if self.previous_line is None: + # Don't insert empty lines before the first line in the file. + return 0, 0 + + if self.previous_line and self.previous_line.is_decorator: + # Don't insert empty lines between decorators. + return 0, 0 + + newlines = 2 + if current_line.depth: + newlines -= 1 + newlines -= self.previous_after + return newlines, 0 + + if current_line.is_flow_control: + return before, 1 + + if ( + self.previous_line and + self.previous_line.is_import and + not current_line.is_import and + depth == self.previous_line.depth + ): + return (before or 1), 0 + + if ( + self.previous_line and + self.previous_line.is_yield and + (not current_line.is_yield or depth != self.previous_line.depth) + ): + return (before or 1), 0 + + return before, 0 + + +@dataclass +class LineGenerator(Visitor[Line]): + """Generates reformatted Line objects. Empty lines are not emitted. + + Note: destroys the tree it's visiting by mutating prefixes of its leaves + in ways that will no longer stringify to valid Python code on the tree. + """ + current_line: Line = attrib(default=Factory(Line)) + standalone_comments: List[Leaf] = attrib(default=Factory(list)) + + def line(self, indent: int = 0) -> Iterator[Line]: + """Generate a line. + + If the line is empty, only emit if it makes sense. + If the line is too long, split it first and then generate. + + If any lines were generated, set up a new current_line. + """ + if not self.current_line: + self.current_line.depth += indent + return # Line is empty, don't emit. Creating a new one unnecessary. + + complete_line = self.current_line + self.current_line = Line(depth=complete_line.depth + indent) + yield complete_line + + def visit_default(self, node: LN) -> Iterator[Line]: + if isinstance(node, Leaf): + for comment in generate_comments(node): + if self.current_line.bracket_tracker.any_open_brackets(): + # any comment within brackets is subject to splitting + self.current_line.append(comment) + elif comment.type == token.COMMENT: + # regular trailing comment + self.current_line.append(comment) + yield from self.line() + + else: + # regular standalone comment, to be processed later (see + # docstring in `generate_comments()` + self.standalone_comments.append(comment) + normalize_prefix(node) + if node.type not in WHITESPACE: + for comment in self.standalone_comments: + yield from self.line() + + self.current_line.append(comment) + yield from self.line() + + self.standalone_comments = [] + self.current_line.append(node) + yield from super().visit_default(node) + + def visit_suite(self, node: Node) -> Iterator[Line]: + """Body of a statement after a colon.""" + children = iter(node.children) + # Process newline before indenting. It might contain an inline + # comment that should go right after the colon. + newline = next(children) + yield from self.visit(newline) + yield from self.line(+1) + + for child in children: + yield from self.visit(child) + + yield from self.line(-1) + + def visit_stmt(self, node: Node, keywords: Set[str]) -> Iterator[Line]: + """Visit a statement. + + The relevant Python language keywords for this statement are NAME leaves + within it. + """ + for child in node.children: + if child.type == token.NAME and child.value in keywords: # type: ignore + yield from self.line() + + yield from self.visit(child) + + def visit_simple_stmt(self, node: Node) -> Iterator[Line]: + """A statement without nested statements.""" + is_suite_like = node.parent and node.parent.type in STATEMENT + if is_suite_like: + yield from self.line(+1) + yield from self.visit_default(node) + yield from self.line(-1) + + else: + yield from self.line() + yield from self.visit_default(node) + + def visit_async_stmt(self, node: Node) -> Iterator[Line]: + yield from self.line() + + children = iter(node.children) + for child in children: + yield from self.visit(child) + + if child.type == token.NAME and child.value == 'async': # type: ignore + break + + internal_stmt = next(children) + for child in internal_stmt.children: + yield from self.visit(child) + + def visit_decorators(self, node: Node) -> Iterator[Line]: + for child in node.children: + yield from self.line() + yield from self.visit(child) + + def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]: + yield from self.line() + + def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]: + yield from self.visit_default(leaf) + yield from self.line() + + def __attrs_post_init__(self) -> None: + """You are in a twisty little maze of passages.""" + v = self.visit_stmt + self.visit_if_stmt = partial(v, keywords={'if', 'else', 'elif'}) + self.visit_while_stmt = partial(v, keywords={'while', 'else'}) + self.visit_for_stmt = partial(v, keywords={'for', 'else'}) + self.visit_try_stmt = partial(v, keywords={'try', 'except', 'else', 'finally'}) + self.visit_except_clause = partial(v, keywords={'except'}) + self.visit_funcdef = partial(v, keywords={'def'}) + self.visit_with_stmt = partial(v, keywords={'with'}) + self.visit_classdef = partial(v, keywords={'class'}) + self.visit_async_funcdef = self.visit_async_stmt + self.visit_decorated = self.visit_decorators + + +BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE} +OPENING_BRACKETS = set(BRACKET.keys()) +CLOSING_BRACKETS = set(BRACKET.values()) +BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS + + +def whitespace(leaf: Leaf) -> str: + """Return whitespace prefix if needed for the given `leaf`.""" + NO = '' + SPACE = ' ' + DOUBLESPACE = ' ' + t = leaf.type + p = leaf.parent + if t == token.COLON: + return NO + + if t == token.COMMA: + return NO + + if t == token.RPAR: + return NO + + if t == token.COMMENT: + return DOUBLESPACE + + if t == STANDALONE_COMMENT: + return NO + + assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}" + if p.type in {syms.parameters, syms.arglist}: + # untyped function signatures or calls + if t == token.RPAR: + return NO + + prev = leaf.prev_sibling + if not prev or prev.type != token.COMMA: + return NO + + if p.type == syms.varargslist: + # lambdas + if t == token.RPAR: + return NO + + prev = leaf.prev_sibling + if prev and prev.type != token.COMMA: + return NO + + elif p.type == syms.typedargslist: + # typed function signatures + prev = leaf.prev_sibling + if not prev: + return NO + + if t == token.EQUAL: + if prev.type != syms.tname: + return NO + + elif prev.type == token.EQUAL: + # A bit hacky: if the equal sign has whitespace, it means we + # previously found it's a typed argument. So, we're using that, too. + return prev.prefix + + elif prev.type != token.COMMA: + return NO + + elif p.type == syms.tname: + # type names + prev = leaf.prev_sibling + if not prev: + prevp = preceding_leaf(p) + if not prevp or prevp.type != token.COMMA: + return NO + + elif p.type == syms.trailer: + # attributes and calls + if t == token.LPAR or t == token.RPAR: + return NO + + prev = leaf.prev_sibling + if not prev: + if t == token.DOT: + prevp = preceding_leaf(p) + if not prevp or prevp.type != token.NUMBER: + return NO + + elif t == token.LSQB: + return NO + + elif prev.type != token.COMMA: + return NO + + elif p.type == syms.argument: + # single argument + if t == token.EQUAL: + return NO + + prev = leaf.prev_sibling + if not prev: + prevp = preceding_leaf(p) + if not prevp or prevp.type == token.LPAR: + return NO + + elif prev.type == token.EQUAL or prev.type == token.DOUBLESTAR: + return NO + + elif p.type == syms.decorator: + # decorators + return NO + + elif p.type == syms.dotted_name: + prev = leaf.prev_sibling + if prev: + return NO + + prevp = preceding_leaf(p) + if not prevp or prevp.type == token.AT: + return NO + + elif p.type == syms.classdef: + if t == token.LPAR: + return NO + + prev = leaf.prev_sibling + if prev and prev.type == token.LPAR: + return NO + + elif p.type == syms.subscript: + # indexing + if t == token.COLON: + return NO + + prev = leaf.prev_sibling + if not prev or prev.type == token.COLON: + return NO + + elif p.type in { + syms.test, + syms.not_test, + syms.xor_expr, + syms.or_test, + syms.and_test, + syms.arith_expr, + syms.shift_expr, + syms.yield_expr, + syms.term, + syms.power, + syms.comparison, + }: + # various arithmetic and logic expressions + prev = leaf.prev_sibling + if not prev: + prevp = preceding_leaf(p) + if not prevp or prevp.type in OPENING_BRACKETS: + return NO + + if prevp.type == token.EQUAL: + if prevp.parent and prevp.parent.type in { + syms.varargslist, syms.parameters, syms.arglist, syms.argument + }: + return NO + + return SPACE + + elif p.type == syms.atom: + if t in CLOSING_BRACKETS: + return NO + + prev = leaf.prev_sibling + if not prev: + prevp = preceding_leaf(p) + if not prevp: + return NO + + if prevp.type in OPENING_BRACKETS: + return NO + + if prevp.type == token.EQUAL: + if prevp.parent and prevp.parent.type in { + syms.varargslist, syms.parameters, syms.arglist, syms.argument + }: + return NO + + if prevp.type == token.DOUBLESTAR: + if prevp.parent and prevp.parent.type in { + syms.varargslist, syms.parameters, syms.arglist, syms.dictsetmaker + }: + return NO + + elif prev.type in OPENING_BRACKETS: + return NO + + elif t == token.DOT: + # dots, but not the first one. + return NO + + elif ( + p.type == syms.listmaker or + p.type == syms.testlist_gexp or + p.type == syms.subscriptlist + ): + # list interior, including unpacking + prev = leaf.prev_sibling + if not prev: + return NO + + elif p.type == syms.dictsetmaker: + # dict and set interior, including unpacking + prev = leaf.prev_sibling + if not prev: + return NO + + if prev.type == token.DOUBLESTAR: + return NO + + elif p.type == syms.factor or p.type == syms.star_expr: + # unary ops + prev = leaf.prev_sibling + if not prev: + prevp = preceding_leaf(p) + if not prevp or prevp.type in OPENING_BRACKETS: + return NO + + prevp_parent = prevp.parent + assert prevp_parent is not None + if prevp.type == token.COLON and prevp_parent.type in { + syms.subscript, syms.sliceop + }: + return NO + + elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument: + return NO + + elif t == token.NAME or t == token.NUMBER: + return NO + + elif p.type == syms.import_from and t == token.NAME: + prev = leaf.prev_sibling + if prev and prev.type == token.DOT: + return NO + + elif p.type == syms.sliceop: + return NO + + return SPACE + + +def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]: + """Returns the first leaf that precedes `node`, if any.""" + while node: + res = node.prev_sibling + if res: + if isinstance(res, Leaf): + return res + + try: + return list(res.leaves())[-1] + + except IndexError: + return None + + node = node.parent + return None + + +def is_delimiter(leaf: Leaf) -> int: + """Returns the priority of the `leaf` delimiter. Returns 0 if not delimiter. + + Higher numbers are higher priority. + """ + if leaf.type == token.COMMA: + return COMMA_PRIORITY + + if leaf.type == token.NAME and leaf.value in LOGIC_OPERATORS: + return LOGIC_PRIORITY + + if leaf.type in COMPARATORS: + return COMPARATOR_PRIORITY + + if ( + leaf.type in MATH_OPERATORS and + leaf.parent and + leaf.parent.type not in {syms.factor, syms.star_expr} + ): + return MATH_PRIORITY + + return 0 + + +def generate_comments(leaf: Leaf) -> Iterator[Leaf]: + """Cleans the prefix of the `leaf` and generates comments from it, if any. + + Comments in lib2to3 are shoved into the whitespace prefix. This happens + in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation + move because it does away with modifying the grammar to include all the + possible places in which comments can be placed. + + The sad consequence for us though is that comments don't "belong" anywhere. + This is why this function generates simple parentless Leaf objects for + comments. We simply don't know what the correct parent should be. + + No matter though, we can live without this. We really only need to + differentiate between inline and standalone comments. The latter don't + share the line with any code. + + Inline comments are emitted as regular token.COMMENT leaves. Standalone + are emitted with a fake STANDALONE_COMMENT token identifier. + """ + if not leaf.prefix: + return + + if '#' not in leaf.prefix: + return + + before_comment, content = leaf.prefix.split('#', 1) + content = content.rstrip() + if content and (content[0] not in {' ', '!', '#'}): + content = ' ' + content + is_standalone_comment = ( + '\n' in before_comment or '\n' in content or leaf.type == token.DEDENT + ) + if not is_standalone_comment: + # simple trailing comment + yield Leaf(token.COMMENT, value='#' + content) + return + + for line in ('#' + content).split('\n'): + line = line.lstrip() + if not line.startswith('#'): + continue + + yield Leaf(STANDALONE_COMMENT, line) + + +def split_line(line: Line, line_length: int, inner: bool = False) -> Iterator[Line]: + """Splits a `line` into potentially many lines. + + They should fit in the allotted `line_length` but might not be able to. + `inner` signifies that there were a pair of brackets somewhere around the + current `line`, possibly transitively. This means we can fallback to splitting + by delimiters if the LHS/RHS don't yield any results. + """ + line_str = str(line).strip('\n') + if len(line_str) <= line_length and '\n' not in line_str: + yield line + return + + if line.is_def: + split_funcs = [left_hand_split] + elif line.inside_brackets: + split_funcs = [delimiter_split] + if '\n' not in line_str: + # Only attempt RHS if we don't have multiline strings or comments + # on this line. + split_funcs.append(right_hand_split) + else: + split_funcs = [right_hand_split] + for split_func in split_funcs: + # We are accumulating lines in `result` because we might want to abort + # mission and return the original line in the end, or attempt a different + # split altogether. + result: List[Line] = [] + try: + for l in split_func(line): + if str(l).strip('\n') == line_str: + raise CannotSplit("Split function returned an unchanged result") + + result.extend(split_line(l, line_length=line_length, inner=True)) + except CannotSplit as cs: + continue + + else: + yield from result + break + + else: + yield line + + +def left_hand_split(line: Line) -> Iterator[Line]: + """Split line into many lines, starting with the first matching bracket pair. + + Note: this usually looks weird, only use this for function definitions. + Prefer RHS otherwise. + """ + head = Line(depth=line.depth) + body = Line(depth=line.depth + 1, inside_brackets=True) + tail = Line(depth=line.depth) + tail_leaves: List[Leaf] = [] + body_leaves: List[Leaf] = [] + head_leaves: List[Leaf] = [] + current_leaves = head_leaves + matching_bracket = None + for leaf in line.leaves: + if ( + current_leaves is body_leaves and + leaf.type in CLOSING_BRACKETS and + leaf.opening_bracket is matching_bracket # type: ignore + ): + current_leaves = tail_leaves + current_leaves.append(leaf) + if current_leaves is head_leaves: + if leaf.type in OPENING_BRACKETS: + matching_bracket = leaf + current_leaves = body_leaves + # Since body is a new indent level, remove spurious leading whitespace. + if body_leaves: + normalize_prefix(body_leaves[0]) + # Build the new lines. + for result, leaves in ( + (head, head_leaves), (body, body_leaves), (tail, tail_leaves) + ): + for leaf in leaves: + result.append(leaf, preformatted=True) + comment_after = line.comments.get(id(leaf)) + if comment_after: + result.append(comment_after, preformatted=True) + # Check if the split succeeded. + tail_len = len(str(tail)) + if not body: + if tail_len == 0: + raise CannotSplit("Splitting brackets produced the same line") + + elif tail_len < 3: + raise CannotSplit( + f"Splitting brackets on an empty body to save " + f"{tail_len} characters is not worth it" + ) + + for result in (head, body, tail): + if result: + yield result + + +def right_hand_split(line: Line) -> Iterator[Line]: + """Split line into many lines, starting with the last matching bracket pair.""" + head = Line(depth=line.depth) + body = Line(depth=line.depth + 1, inside_brackets=True) + tail = Line(depth=line.depth) + tail_leaves: List[Leaf] = [] + body_leaves: List[Leaf] = [] + head_leaves: List[Leaf] = [] + current_leaves = tail_leaves + opening_bracket = None + for leaf in reversed(line.leaves): + if current_leaves is body_leaves: + if leaf is opening_bracket: + current_leaves = head_leaves + current_leaves.append(leaf) + if current_leaves is tail_leaves: + if leaf.type in CLOSING_BRACKETS: + opening_bracket = leaf.opening_bracket # type: ignore + current_leaves = body_leaves + tail_leaves.reverse() + body_leaves.reverse() + head_leaves.reverse() + # Since body is a new indent level, remove spurious leading whitespace. + if body_leaves: + normalize_prefix(body_leaves[0]) + # Build the new lines. + for result, leaves in ( + (head, head_leaves), (body, body_leaves), (tail, tail_leaves) + ): + for leaf in leaves: + result.append(leaf, preformatted=True) + comment_after = line.comments.get(id(leaf)) + if comment_after: + result.append(comment_after, preformatted=True) + # Check if the split succeeded. + tail_len = len(str(tail).strip('\n')) + if not body: + if tail_len == 0: + raise CannotSplit("Splitting brackets produced the same line") + + elif tail_len < 3: + raise CannotSplit( + f"Splitting brackets on an empty body to save " + f"{tail_len} characters is not worth it" + ) + + for result in (head, body, tail): + if result: + yield result + + +def delimiter_split(line: Line) -> Iterator[Line]: + """Split according to delimiters of the highest priority. + + This kind of split doesn't increase indentation. + """ + try: + last_leaf = line.leaves[-1] + except IndexError: + raise CannotSplit("Line empty") + + delimiters = line.bracket_tracker.delimiters + try: + delimiter_priority = line.bracket_tracker.max_priority(exclude={id(last_leaf)}) + except ValueError: + raise CannotSplit("No delimiters found") + + current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets) + for leaf in line.leaves: + current_line.append(leaf, preformatted=True) + comment_after = line.comments.get(id(leaf)) + if comment_after: + current_line.append(comment_after, preformatted=True) + leaf_priority = delimiters.get(id(leaf)) + if leaf_priority == delimiter_priority: + normalize_prefix(current_line.leaves[0]) + yield current_line + + current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets) + if current_line: + if ( + delimiter_priority == COMMA_PRIORITY and + current_line.leaves[-1].type != token.COMMA + ): + current_line.append(Leaf(token.COMMA, ',')) + normalize_prefix(current_line.leaves[0]) + yield current_line + + +def is_import(leaf: Leaf) -> bool: + """Returns True if the given leaf starts an import statement.""" + p = leaf.parent + t = leaf.type + v = leaf.value + return bool( + t == token.NAME and + ( + (v == 'import' and p and p.type == syms.import_name) or + (v == 'from' and p and p.type == syms.import_from) + ) + ) + + +def normalize_prefix(leaf: Leaf) -> None: + """Leave existing extra newlines for imports. Remove everything else.""" + if is_import(leaf): + spl = leaf.prefix.split('#', 1) + nl_count = spl[0].count('\n') + if len(spl) > 1: + # Skip one newline since it was for a standalone comment. + nl_count -= 1 + leaf.prefix = '\n' * nl_count + return + + leaf.prefix = '' + + +PYTHON_EXTENSIONS = {'.py'} +BLACKLISTED_DIRECTORIES = { + 'build', 'buck-out', 'dist', '_build', '.git', '.hg', '.mypy_cache', '.tox', '.venv' +} + + +def gen_python_files_in_dir(path: Path) -> Iterator[Path]: + for child in path.iterdir(): + if child.is_dir(): + if child.name in BLACKLISTED_DIRECTORIES: + continue + + yield from gen_python_files_in_dir(child) + + elif child.suffix in PYTHON_EXTENSIONS: + yield child + + +@dataclass +class Report: + """Provides a reformatting counter.""" + change_count: int = attrib(default=0) + same_count: int = attrib(default=0) + failure_count: int = attrib(default=0) + + def done(self, src: Path, changed: bool) -> None: + """Increment the counter for successful reformatting. Write out a message.""" + if changed: + out(f'reformatted {src}') + self.change_count += 1 + else: + out(f'{src} already well formatted, good job.', bold=False) + self.same_count += 1 + + def failed(self, src: Path, message: str) -> None: + """Increment the counter for failed reformatting. Write out a message.""" + err(f'error: cannot format {src}: {message}') + self.failure_count += 1 + + @property + def return_code(self) -> int: + """Which return code should the app use considering the current state.""" + return 1 if self.failure_count else 0 + + def __str__(self) -> str: + """A color report of the current state. + + Use `click.unstyle` to remove colors. + """ + report = [] + if self.change_count: + s = 's' if self.change_count > 1 else '' + report.append( + click.style(f'{self.change_count} file{s} reformatted', bold=True) + ) + if self.same_count: + s = 's' if self.same_count > 1 else '' + report.append(f'{self.same_count} file{s} left unchanged') + if self.failure_count: + s = 's' if self.failure_count > 1 else '' + report.append( + click.style( + f'{self.failure_count} file{s} failed to reformat', fg='red' + ) + ) + return ', '.join(report) + '.' + + +def assert_equivalent(src: str, dst: str) -> None: + """Raises AssertionError if `src` and `dst` aren't equivalent. + + This is a temporary sanity check until Black becomes stable. + """ + + import ast + import traceback + + def _v(node: ast.AST, depth: int = 0) -> Iterator[str]: + """Simple visitor generating strings to compare ASTs by content.""" + yield f"{' ' * depth}{node.__class__.__name__}(" + + for field in sorted(node._fields): + try: + value = getattr(node, field) + except AttributeError: + continue + + yield f"{' ' * (depth+1)}{field}=" + + if isinstance(value, list): + for item in value: + if isinstance(item, ast.AST): + yield from _v(item, depth + 2) + + elif isinstance(value, ast.AST): + yield from _v(value, depth + 2) + + else: + yield f"{' ' * (depth+2)}{value!r}, # {value.__class__.__name__}" + + yield f"{' ' * depth}) # /{node.__class__.__name__}" + + try: + src_ast = ast.parse(src) + except Exception as exc: + raise AssertionError(f"cannot parse source: {exc}") from None + + try: + dst_ast = ast.parse(dst) + except Exception as exc: + log = dump_to_file(''.join(traceback.format_tb(exc.__traceback__)), dst) + raise AssertionError( + f"INTERNAL ERROR: Black produced invalid code: {exc}. " + f"Please report a bug on https://github.com/ambv/black/issues. " + f"This invalid output might be helpful: {log}", + ) from None + + src_ast_str = '\n'.join(_v(src_ast)) + dst_ast_str = '\n'.join(_v(dst_ast)) + if src_ast_str != dst_ast_str: + log = dump_to_file(diff(src_ast_str, dst_ast_str, 'src', 'dst')) + raise AssertionError( + f"INTERNAL ERROR: Black produced code that is not equivalent to " + f"the source. " + f"Please report a bug on https://github.com/ambv/black/issues. " + f"This diff might be helpful: {log}", + ) from None + + +def assert_stable(src: str, dst: str, line_length: int) -> None: + """Raises AssertionError if `dst` reformats differently the second time. + + This is a temporary sanity check until Black becomes stable. + """ + newdst = format_str(dst, line_length=line_length) + if dst != newdst: + log = dump_to_file( + diff(src, dst, 'source', 'first pass'), + diff(dst, newdst, 'first pass', 'second pass'), + ) + raise AssertionError( + f"INTERNAL ERROR: Black produced different code on the second pass " + f"of the formatter. " + f"Please report a bug on https://github.com/ambv/black/issues. " + f"This diff might be helpful: {log}", + ) from None + + +def dump_to_file(*output: str) -> str: + """Dumps `output` to a temporary file. Returns path to the file.""" + import tempfile + + with tempfile.NamedTemporaryFile( + mode='w', prefix='blk_', suffix='.log', delete=False + ) as f: + for lines in output: + f.write(lines) + f.write('\n') + return f.name + + +def diff(a: str, b: str, a_name: str, b_name: str) -> str: + """Returns a udiff string between strings `a` and `b`.""" + import difflib + + a_lines = [line + '\n' for line in a.split('\n')] + b_lines = [line + '\n' for line in b.split('\n')] + return ''.join( + difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5) + ) + + +if __name__ == '__main__': + main() diff --git a/blib2to3/Grammar.txt b/blib2to3/Grammar.txt new file mode 100644 index 0000000..b19b4a2 --- /dev/null +++ b/blib2to3/Grammar.txt @@ -0,0 +1,173 @@ +# Grammar for 2to3. This grammar supports Python 2.x and 3.x. + +# NOTE WELL: You should also follow all the steps listed at +# https://devguide.python.org/grammar/ + +# Start symbols for the grammar: +# file_input is a module or sequence of commands read from an input file; +# single_input is a single interactive statement; +# eval_input is the input for the eval() and input() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +file_input: (NEWLINE | stmt)* ENDMARKER +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +eval_input: testlist NEWLINE* ENDMARKER + +decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef | async_funcdef) +async_funcdef: 'async' funcdef +funcdef: 'def' NAME parameters ['->' test] ':' suite +parameters: '(' [typedargslist] ')' +typedargslist: ((tfpdef ['=' test] ',')* + ('*' [tname] (',' tname ['=' test])* [',' ['**' tname [',']]] | '**' tname [',']) + | tfpdef ['=' test] (',' tfpdef ['=' test])* [',']) +tname: NAME [':' test] +tfpdef: tname | '(' tfplist ')' +tfplist: tfpdef (',' tfpdef)* [','] +varargslist: ((vfpdef ['=' test] ',')* + ('*' [vname] (',' vname ['=' test])* [',' ['**' vname [',']]] | '**' vname [',']) + | vfpdef ['=' test] (',' vfpdef ['=' test])* [',']) +vname: NAME +vfpdef: vname | '(' vfplist ')' +vfplist: vfpdef (',' vfpdef)* [','] + +stmt: simple_stmt | compound_stmt +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | exec_stmt | assert_stmt) +expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist_star_expr))*) +annassign: ':' test ['=' test] +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal and annotated assignments, additional restrictions enforced by the interpreter +print_stmt: 'print' ( [ test (',' test)* [','] ] | + '>>' test [ (',' test)+ [','] ] ) +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist] +yield_stmt: yield_expr +raise_stmt: 'raise' [test ['from' test | ',' test [',' test]]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +import_from: ('from' ('.'* dotted_name | '.'+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: ('global' | 'nonlocal') NAME (',' NAME)* +exec_stmt: 'exec' expr ['in' test [',' test]] +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt +async_stmt: 'async' (funcdef | with_stmt | for_stmt) +if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] +while_stmt: 'while' test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +with_var: 'as' expr +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test [(',' | 'as') test]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +# Backward compatibility cruft to support: +# [ x for x in lambda: True, lambda: False if x() ] +# even while also allowing: +# lambda x: 5 if x else 2 +# (But not a mix of the two) +testlist_safe: old_test [(',' old_test)+ [',']] +old_test: or_test | old_lambdef +old_lambdef: 'lambda' [varargslist] ':' old_test + +test: or_test ['if' or_test 'else' test] | lambdef +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +star_expr: '*' expr +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'@'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: ['await'] atom trailer* ['**' factor] +atom: ('(' [yield_expr|testlist_gexp] ')' | + '[' [listmaker] ']' | + '{' [dictsetmaker] '}' | + '`' testlist1 '`' | + NAME | NUMBER | STRING+ | '.' '.' '.') +listmaker: (test|star_expr) ( old_comp_for | (',' (test|star_expr))* [','] ) +testlist_gexp: (test|star_expr) ( old_comp_for | (',' (test|star_expr))* [','] ) +lambdef: 'lambda' [varargslist] ':' test +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +testlist: test (',' test)* [','] +dictsetmaker: ( ((test ':' test | '**' expr) + (comp_for | (',' (test ':' test | '**' expr))* [','])) | + ((test | star_expr) + (comp_for | (',' (test | star_expr))* [','])) ) + +classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +arglist: argument (',' argument)* [','] + +# "test '=' test" is really "keyword '=' test", but we have no such token. +# These need to be in a single rule to avoid grammar that is ambiguous +# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, +# we explicitly match '*' here, too, to give it proper precedence. +# Illegal combinations and orderings are blocked in ast.c: +# multiple (test comp_for) arguments are blocked; keyword unpackings +# that precede iterable unpackings are blocked; etc. +argument: ( test [comp_for] | + test '=' test | + '**' expr | + star_expr ) + +comp_iter: comp_for | comp_if +comp_for: ['async'] 'for' exprlist 'in' or_test [comp_iter] +comp_if: 'if' old_test [comp_iter] + +# As noted above, testlist_safe extends the syntax allowed in list +# comprehensions and generators. We can't use it indiscriminately in all +# derivations using a comp_for-like pattern because the testlist_safe derivation +# contains comma which clashes with trailing comma in arglist. +# +# This was an issue because the parser would not follow the correct derivation +# when parsing syntactically valid Python code. Since testlist_safe was created +# specifically to handle list comprehensions and generator expressions enclosed +# with parentheses, it's safe to only use it in those. That avoids the issue; we +# can parse code like set(x for x in [],). +# +# The syntax supported by this set of rules is not a valid Python 3 syntax, +# hence the prefix "old". +# +# See https://bugs.python.org/issue27494 +old_comp_iter: old_comp_for | old_comp_if +old_comp_for: ['async'] 'for' exprlist 'in' testlist_safe [old_comp_iter] +old_comp_if: 'if' old_test [old_comp_iter] + +testlist1: test (',' test)* + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [yield_arg] +yield_arg: 'from' test | testlist diff --git a/blib2to3/Grammar3.6.4.final.0.pickle b/blib2to3/Grammar3.6.4.final.0.pickle new file mode 100644 index 0000000..da22814 Binary files /dev/null and b/blib2to3/Grammar3.6.4.final.0.pickle differ diff --git a/blib2to3/PatternGrammar.txt b/blib2to3/PatternGrammar.txt new file mode 100644 index 0000000..36bf814 --- /dev/null +++ b/blib2to3/PatternGrammar.txt @@ -0,0 +1,28 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +# A grammar to describe tree matching patterns. +# Not shown here: +# - 'TOKEN' stands for any token (leaf node) +# - 'any' stands for any node (leaf or interior) +# With 'any' we can still specify the sub-structure. + +# The start symbol is 'Matcher'. + +Matcher: Alternatives ENDMARKER + +Alternatives: Alternative ('|' Alternative)* + +Alternative: (Unit | NegatedUnit)+ + +Unit: [NAME '='] ( STRING [Repeater] + | NAME [Details] [Repeater] + | '(' Alternatives ')' [Repeater] + | '[' Alternatives ']' + ) + +NegatedUnit: 'not' (STRING | NAME [Details] | '(' Alternatives ')') + +Repeater: '*' | '+' | '{' NUMBER [',' NUMBER] '}' + +Details: '<' Alternatives '>' diff --git a/blib2to3/PatternGrammar3.6.4.final.0.pickle b/blib2to3/PatternGrammar3.6.4.final.0.pickle new file mode 100644 index 0000000..e027504 Binary files /dev/null and b/blib2to3/PatternGrammar3.6.4.final.0.pickle differ diff --git a/blib2to3/README b/blib2to3/README new file mode 100644 index 0000000..2c12c62 --- /dev/null +++ b/blib2to3/README @@ -0,0 +1,7 @@ +A subset of lib2to3 taken from Python 3.7.0b2. +Commit hash: 9c17e3a1987004b8bcfbe423953aad84493a7984 + +Reasons for forking: +- consistent handling of f-strings for users of Python < 3.6.2 +- better ability to debug +- ability to Cythonize diff --git a/blib2to3/__init__.py b/blib2to3/__init__.py new file mode 100644 index 0000000..ea30561 --- /dev/null +++ b/blib2to3/__init__.py @@ -0,0 +1 @@ +#empty diff --git a/blib2to3/__init__.pyi b/blib2to3/__init__.pyi new file mode 100644 index 0000000..145e31b --- /dev/null +++ b/blib2to3/__init__.pyi @@ -0,0 +1 @@ +# Stubs for lib2to3 (Python 3.6) diff --git a/blib2to3/pgen2/__init__.py b/blib2to3/pgen2/__init__.py new file mode 100644 index 0000000..af39048 --- /dev/null +++ b/blib2to3/pgen2/__init__.py @@ -0,0 +1,4 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""The pgen2 package.""" diff --git a/blib2to3/pgen2/__init__.pyi b/blib2to3/pgen2/__init__.pyi new file mode 100644 index 0000000..1adc82a --- /dev/null +++ b/blib2to3/pgen2/__init__.pyi @@ -0,0 +1,10 @@ +# Stubs for lib2to3.pgen2 (Python 3.6) + +import os +import sys +from typing import Text, Union + +if sys.version_info >= (3, 6): + _Path = Union[Text, os.PathLike] +else: + _Path = Text diff --git a/blib2to3/pgen2/conv.py b/blib2to3/pgen2/conv.py new file mode 100644 index 0000000..ed0cac5 --- /dev/null +++ b/blib2to3/pgen2/conv.py @@ -0,0 +1,257 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Convert graminit.[ch] spit out by pgen to Python code. + +Pgen is the Python parser generator. It is useful to quickly create a +parser from a grammar file in Python's grammar notation. But I don't +want my parsers to be written in C (yet), so I'm translating the +parsing tables to Python data structures and writing a Python parse +engine. + +Note that the token numbers are constants determined by the standard +Python tokenizer. The standard token module defines these numbers and +their names (the names are not used much). The token numbers are +hardcoded into the Python tokenizer and into pgen. A Python +implementation of the Python tokenizer is also available, in the +standard tokenize module. + +On the other hand, symbol numbers (representing the grammar's +non-terminals) are assigned by pgen based on the actual grammar +input. + +Note: this module is pretty much obsolete; the pgen module generates +equivalent grammar tables directly from the Grammar.txt input file +without having to invoke the Python pgen C program. + +""" + +# Python imports +import re + +# Local imports +from pgen2 import grammar, token + + +class Converter(grammar.Grammar): + """Grammar subclass that reads classic pgen output files. + + The run() method reads the tables as produced by the pgen parser + generator, typically contained in two C files, graminit.h and + graminit.c. The other methods are for internal use only. + + See the base class for more documentation. + + """ + + def run(self, graminit_h, graminit_c): + """Load the grammar tables from the text files written by pgen.""" + self.parse_graminit_h(graminit_h) + self.parse_graminit_c(graminit_c) + self.finish_off() + + def parse_graminit_h(self, filename): + """Parse the .h file written by pgen. (Internal) + + This file is a sequence of #define statements defining the + nonterminals of the grammar as numbers. We build two tables + mapping the numbers to names and back. + + """ + try: + f = open(filename) + except OSError as err: + print("Can't open %s: %s" % (filename, err)) + return False + self.symbol2number = {} + self.number2symbol = {} + lineno = 0 + for line in f: + lineno += 1 + mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line) + if not mo and line.strip(): + print("%s(%s): can't parse %s" % (filename, lineno, + line.strip())) + else: + symbol, number = mo.groups() + number = int(number) + assert symbol not in self.symbol2number + assert number not in self.number2symbol + self.symbol2number[symbol] = number + self.number2symbol[number] = symbol + return True + + def parse_graminit_c(self, filename): + """Parse the .c file written by pgen. (Internal) + + The file looks as follows. The first two lines are always this: + + #include "pgenheaders.h" + #include "grammar.h" + + After that come four blocks: + + 1) one or more state definitions + 2) a table defining dfas + 3) a table defining labels + 4) a struct defining the grammar + + A state definition has the following form: + - one or more arc arrays, each of the form: + static arc arcs__[] = { + {, }, + ... + }; + - followed by a state array, of the form: + static state states_[] = { + {, arcs__}, + ... + }; + + """ + try: + f = open(filename) + except OSError as err: + print("Can't open %s: %s" % (filename, err)) + return False + # The code below essentially uses f's iterator-ness! + lineno = 0 + + # Expect the two #include lines + lineno, line = lineno+1, next(f) + assert line == '#include "pgenheaders.h"\n', (lineno, line) + lineno, line = lineno+1, next(f) + assert line == '#include "grammar.h"\n', (lineno, line) + + # Parse the state definitions + lineno, line = lineno+1, next(f) + allarcs = {} + states = [] + while line.startswith("static arc "): + while line.startswith("static arc "): + mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$", + line) + assert mo, (lineno, line) + n, m, k = list(map(int, mo.groups())) + arcs = [] + for _ in range(k): + lineno, line = lineno+1, next(f) + mo = re.match(r"\s+{(\d+), (\d+)},$", line) + assert mo, (lineno, line) + i, j = list(map(int, mo.groups())) + arcs.append((i, j)) + lineno, line = lineno+1, next(f) + assert line == "};\n", (lineno, line) + allarcs[(n, m)] = arcs + lineno, line = lineno+1, next(f) + mo = re.match(r"static state states_(\d+)\[(\d+)\] = {$", line) + assert mo, (lineno, line) + s, t = list(map(int, mo.groups())) + assert s == len(states), (lineno, line) + state = [] + for _ in range(t): + lineno, line = lineno+1, next(f) + mo = re.match(r"\s+{(\d+), arcs_(\d+)_(\d+)},$", line) + assert mo, (lineno, line) + k, n, m = list(map(int, mo.groups())) + arcs = allarcs[n, m] + assert k == len(arcs), (lineno, line) + state.append(arcs) + states.append(state) + lineno, line = lineno+1, next(f) + assert line == "};\n", (lineno, line) + lineno, line = lineno+1, next(f) + self.states = states + + # Parse the dfas + dfas = {} + mo = re.match(r"static dfa dfas\[(\d+)\] = {$", line) + assert mo, (lineno, line) + ndfas = int(mo.group(1)) + for i in range(ndfas): + lineno, line = lineno+1, next(f) + mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$', + line) + assert mo, (lineno, line) + symbol = mo.group(2) + number, x, y, z = list(map(int, mo.group(1, 3, 4, 5))) + assert self.symbol2number[symbol] == number, (lineno, line) + assert self.number2symbol[number] == symbol, (lineno, line) + assert x == 0, (lineno, line) + state = states[z] + assert y == len(state), (lineno, line) + lineno, line = lineno+1, next(f) + mo = re.match(r'\s+("(?:\\\d\d\d)*")},$', line) + assert mo, (lineno, line) + first = {} + rawbitset = eval(mo.group(1)) + for i, c in enumerate(rawbitset): + byte = ord(c) + for j in range(8): + if byte & (1<= os.path.getmtime(b) + + +def load_packaged_grammar(package, grammar_source): + """Normally, loads a pickled grammar by doing + pkgutil.get_data(package, pickled_grammar) + where *pickled_grammar* is computed from *grammar_source* by adding the + Python version and using a ``.pickle`` extension. + + However, if *grammar_source* is an extant file, load_grammar(grammar_source) + is called instead. This facilitates using a packaged grammar file when needed + but preserves load_grammar's automatic regeneration behavior when possible. + + """ + if os.path.isfile(grammar_source): + return load_grammar(grammar_source) + pickled_name = _generate_pickle_name(os.path.basename(grammar_source)) + data = pkgutil.get_data(package, pickled_name) + g = grammar.Grammar() + g.loads(data) + return g + + +def main(*args): + """Main program, when run as a script: produce grammar pickle files. + + Calls load_grammar for each argument, a path to a grammar text file. + """ + if not args: + args = sys.argv[1:] + logging.basicConfig(level=logging.INFO, stream=sys.stdout, + format='%(message)s') + for gt in args: + load_grammar(gt, save=True, force=True) + return True + +if __name__ == "__main__": + sys.exit(int(not main())) diff --git a/blib2to3/pgen2/driver.pyi b/blib2to3/pgen2/driver.pyi new file mode 100644 index 0000000..f098bf5 --- /dev/null +++ b/blib2to3/pgen2/driver.pyi @@ -0,0 +1,24 @@ +# Stubs for lib2to3.pgen2.driver (Python 3.6) + +import os +import sys +from typing import Any, Callable, IO, Iterable, List, Optional, Text, Tuple, Union + +from logging import Logger +from blib2to3.pytree import _Convert, _NL +from blib2to3.pgen2 import _Path +from blib2to3.pgen2.grammar import Grammar + + +class Driver: + grammar: Grammar + logger: Logger + convert: _Convert + def __init__(self, grammar: Grammar, convert: Optional[_Convert] = ..., logger: Optional[Logger] = ...) -> None: ... + def parse_tokens(self, tokens: Iterable[Any], debug: bool = ...) -> _NL: ... + def parse_stream_raw(self, stream: IO[Text], debug: bool = ...) -> _NL: ... + def parse_stream(self, stream: IO[Text], debug: bool = ...) -> _NL: ... + def parse_file(self, filename: _Path, encoding: Optional[Text] = ..., debug: bool = ...) -> _NL: ... + def parse_string(self, text: Text, debug: bool = ...) -> _NL: ... + +def load_grammar(gt: Text = ..., gp: Optional[Text] = ..., save: bool = ..., force: bool = ..., logger: Optional[Logger] = ...) -> Grammar: ... diff --git a/blib2to3/pgen2/grammar.py b/blib2to3/pgen2/grammar.py new file mode 100644 index 0000000..088c58b --- /dev/null +++ b/blib2to3/pgen2/grammar.py @@ -0,0 +1,211 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""This module defines the data structures used to represent a grammar. + +These are a bit arcane because they are derived from the data +structures used by Python's 'pgen' parser generator. + +There's also a table here mapping operators to their names in the +token module; the Python tokenize module reports all operators as the +fallback token code OP, but the parser needs the actual token code. + +""" + +# Python imports +import collections +import pickle + +# Local imports +from . import token + + +class Grammar(object): + """Pgen parsing tables conversion class. + + Once initialized, this class supplies the grammar tables for the + parsing engine implemented by parse.py. The parsing engine + accesses the instance variables directly. The class here does not + provide initialization of the tables; several subclasses exist to + do this (see the conv and pgen modules). + + The load() method reads the tables from a pickle file, which is + much faster than the other ways offered by subclasses. The pickle + file is written by calling dump() (after loading the grammar + tables using a subclass). The report() method prints a readable + representation of the tables to stdout, for debugging. + + The instance variables are as follows: + + symbol2number -- a dict mapping symbol names to numbers. Symbol + numbers are always 256 or higher, to distinguish + them from token numbers, which are between 0 and + 255 (inclusive). + + number2symbol -- a dict mapping numbers to symbol names; + these two are each other's inverse. + + states -- a list of DFAs, where each DFA is a list of + states, each state is a list of arcs, and each + arc is a (i, j) pair where i is a label and j is + a state number. The DFA number is the index into + this list. (This name is slightly confusing.) + Final states are represented by a special arc of + the form (0, j) where j is its own state number. + + dfas -- a dict mapping symbol numbers to (DFA, first) + pairs, where DFA is an item from the states list + above, and first is a set of tokens that can + begin this grammar rule (represented by a dict + whose values are always 1). + + labels -- a list of (x, y) pairs where x is either a token + number or a symbol number, and y is either None + or a string; the strings are keywords. The label + number is the index in this list; label numbers + are used to mark state transitions (arcs) in the + DFAs. + + start -- the number of the grammar's start symbol. + + keywords -- a dict mapping keyword strings to arc labels. + + tokens -- a dict mapping token numbers to arc labels. + + """ + + def __init__(self): + self.symbol2number = {} + self.number2symbol = {} + self.states = [] + self.dfas = {} + self.labels = [(0, "EMPTY")] + self.keywords = {} + self.tokens = {} + self.symbol2label = {} + self.start = 256 + + def dump(self, filename): + """Dump the grammar tables to a pickle file. + + dump() recursively changes all dict to OrderedDict, so the pickled file + is not exactly the same as what was passed in to dump(). load() uses the + pickled file to create the tables, but only changes OrderedDict to dict + at the top level; it does not recursively change OrderedDict to dict. + So, the loaded tables are different from the original tables that were + passed to load() in that some of the OrderedDict (from the pickled file) + are not changed back to dict. For parsing, this has no effect on + performance because OrderedDict uses dict's __getitem__ with nothing in + between. + """ + with open(filename, "wb") as f: + d = _make_deterministic(self.__dict__) + pickle.dump(d, f, 2) + + def load(self, filename): + """Load the grammar tables from a pickle file.""" + with open(filename, "rb") as f: + d = pickle.load(f) + self.__dict__.update(d) + + def loads(self, pkl): + """Load the grammar tables from a pickle bytes object.""" + self.__dict__.update(pickle.loads(pkl)) + + def copy(self): + """ + Copy the grammar. + """ + new = self.__class__() + for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords", + "tokens", "symbol2label"): + setattr(new, dict_attr, getattr(self, dict_attr).copy()) + new.labels = self.labels[:] + new.states = self.states[:] + new.start = self.start + return new + + def report(self): + """Dump the grammar tables to standard output, for debugging.""" + from pprint import pprint + print("s2n") + pprint(self.symbol2number) + print("n2s") + pprint(self.number2symbol) + print("states") + pprint(self.states) + print("dfas") + pprint(self.dfas) + print("labels") + pprint(self.labels) + print("start", self.start) + + +def _make_deterministic(top): + if isinstance(top, dict): + return collections.OrderedDict( + sorted(((k, _make_deterministic(v)) for k, v in top.items()))) + if isinstance(top, list): + return [_make_deterministic(e) for e in top] + if isinstance(top, tuple): + return tuple(_make_deterministic(e) for e in top) + return top + + +# Map from operator to number (since tokenize doesn't do this) + +opmap_raw = """ +( LPAR +) RPAR +[ LSQB +] RSQB +: COLON +, COMMA +; SEMI ++ PLUS +- MINUS +* STAR +/ SLASH +| VBAR +& AMPER +< LESS +> GREATER += EQUAL +. DOT +% PERCENT +` BACKQUOTE +{ LBRACE +} RBRACE +@ AT +@= ATEQUAL +== EQEQUAL +!= NOTEQUAL +<> NOTEQUAL +<= LESSEQUAL +>= GREATEREQUAL +~ TILDE +^ CIRCUMFLEX +<< LEFTSHIFT +>> RIGHTSHIFT +** DOUBLESTAR ++= PLUSEQUAL +-= MINEQUAL +*= STAREQUAL +/= SLASHEQUAL +%= PERCENTEQUAL +&= AMPEREQUAL +|= VBAREQUAL +^= CIRCUMFLEXEQUAL +<<= LEFTSHIFTEQUAL +>>= RIGHTSHIFTEQUAL +**= DOUBLESTAREQUAL +// DOUBLESLASH +//= DOUBLESLASHEQUAL +-> RARROW +""" + +opmap = {} +for line in opmap_raw.splitlines(): + if line: + op, name = line.split() + opmap[op] = getattr(token, name) diff --git a/blib2to3/pgen2/grammar.pyi b/blib2to3/pgen2/grammar.pyi new file mode 100644 index 0000000..353086d --- /dev/null +++ b/blib2to3/pgen2/grammar.pyi @@ -0,0 +1,29 @@ +# Stubs for lib2to3.pgen2.grammar (Python 3.6) + +from blib2to3.pgen2 import _Path + +from typing import Any, Dict, List, Optional, Text, Tuple, TypeVar + +_P = TypeVar('_P') +_Label = Tuple[int, Optional[Text]] +_DFA = List[List[Tuple[int, int]]] +_DFAS = Tuple[_DFA, Dict[int, int]] + +class Grammar: + symbol2number: Dict[Text, int] + number2symbol: Dict[int, Text] + states: List[_DFA] + dfas: Dict[int, _DFAS] + labels: List[_Label] + keywords: Dict[Text, int] + tokens: Dict[int, int] + symbol2label: Dict[Text, int] + start: int + def __init__(self) -> None: ... + def dump(self, filename: _Path) -> None: ... + def load(self, filename: _Path) -> None: ... + def copy(self: _P) -> _P: ... + def report(self) -> None: ... + +opmap_raw: Text +opmap: Dict[Text, Text] diff --git a/blib2to3/pgen2/literals.py b/blib2to3/pgen2/literals.py new file mode 100644 index 0000000..b9b63e6 --- /dev/null +++ b/blib2to3/pgen2/literals.py @@ -0,0 +1,60 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Safely evaluate Python string literals without using eval().""" + +import re + +simple_escapes = {"a": "\a", + "b": "\b", + "f": "\f", + "n": "\n", + "r": "\r", + "t": "\t", + "v": "\v", + "'": "'", + '"': '"', + "\\": "\\"} + +def escape(m): + all, tail = m.group(0, 1) + assert all.startswith("\\") + esc = simple_escapes.get(tail) + if esc is not None: + return esc + if tail.startswith("x"): + hexes = tail[1:] + if len(hexes) < 2: + raise ValueError("invalid hex string escape ('\\%s')" % tail) + try: + i = int(hexes, 16) + except ValueError: + raise ValueError("invalid hex string escape ('\\%s')" % tail) from None + else: + try: + i = int(tail, 8) + except ValueError: + raise ValueError("invalid octal string escape ('\\%s')" % tail) from None + return chr(i) + +def evalString(s): + assert s.startswith("'") or s.startswith('"'), repr(s[:1]) + q = s[0] + if s[:3] == q*3: + q = q*3 + assert s.endswith(q), repr(s[-len(q):]) + assert len(s) >= 2*len(q) + s = s[len(q):-len(q)] + return re.sub(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})", escape, s) + +def test(): + for i in range(256): + c = chr(i) + s = repr(c) + e = evalString(s) + if e != c: + print(i, c, s, e) + + +if __name__ == "__main__": + test() diff --git a/blib2to3/pgen2/literals.pyi b/blib2to3/pgen2/literals.pyi new file mode 100644 index 0000000..8719500 --- /dev/null +++ b/blib2to3/pgen2/literals.pyi @@ -0,0 +1,9 @@ +# Stubs for lib2to3.pgen2.literals (Python 3.6) + +from typing import Dict, Match, Text + +simple_escapes: Dict[Text, Text] + +def escape(m: Match) -> Text: ... +def evalString(s: Text) -> Text: ... +def test() -> None: ... diff --git a/blib2to3/pgen2/parse.py b/blib2to3/pgen2/parse.py new file mode 100644 index 0000000..6bebdbb --- /dev/null +++ b/blib2to3/pgen2/parse.py @@ -0,0 +1,201 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Parser engine for the grammar tables generated by pgen. + +The grammar table must be loaded first. + +See Parser/parser.c in the Python distribution for additional info on +how this parsing engine works. + +""" + +# Local imports +from . import token + +class ParseError(Exception): + """Exception to signal the parser is stuck.""" + + def __init__(self, msg, type, value, context): + Exception.__init__(self, "%s: type=%r, value=%r, context=%r" % + (msg, type, value, context)) + self.msg = msg + self.type = type + self.value = value + self.context = context + +class Parser(object): + """Parser engine. + + The proper usage sequence is: + + p = Parser(grammar, [converter]) # create instance + p.setup([start]) # prepare for parsing + : + if p.addtoken(...): # parse a token; may raise ParseError + break + root = p.rootnode # root of abstract syntax tree + + A Parser instance may be reused by calling setup() repeatedly. + + A Parser instance contains state pertaining to the current token + sequence, and should not be used concurrently by different threads + to parse separate token sequences. + + See driver.py for how to get input tokens by tokenizing a file or + string. + + Parsing is complete when addtoken() returns True; the root of the + abstract syntax tree can then be retrieved from the rootnode + instance variable. When a syntax error occurs, addtoken() raises + the ParseError exception. There is no error recovery; the parser + cannot be used after a syntax error was reported (but it can be + reinitialized by calling setup()). + + """ + + def __init__(self, grammar, convert=None): + """Constructor. + + The grammar argument is a grammar.Grammar instance; see the + grammar module for more information. + + The parser is not ready yet for parsing; you must call the + setup() method to get it started. + + The optional convert argument is a function mapping concrete + syntax tree nodes to abstract syntax tree nodes. If not + given, no conversion is done and the syntax tree produced is + the concrete syntax tree. If given, it must be a function of + two arguments, the first being the grammar (a grammar.Grammar + instance), and the second being the concrete syntax tree node + to be converted. The syntax tree is converted from the bottom + up. + + A concrete syntax tree node is a (type, value, context, nodes) + tuple, where type is the node type (a token or symbol number), + value is None for symbols and a string for tokens, context is + None or an opaque value used for error reporting (typically a + (lineno, offset) pair), and nodes is a list of children for + symbols, and None for tokens. + + An abstract syntax tree node may be anything; this is entirely + up to the converter function. + + """ + self.grammar = grammar + self.convert = convert or (lambda grammar, node: node) + + def setup(self, start=None): + """Prepare for parsing. + + This *must* be called before starting to parse. + + The optional argument is an alternative start symbol; it + defaults to the grammar's start symbol. + + You can use a Parser instance to parse any number of programs; + each time you call setup() the parser is reset to an initial + state determined by the (implicit or explicit) start symbol. + + """ + if start is None: + start = self.grammar.start + # Each stack entry is a tuple: (dfa, state, node). + # A node is a tuple: (type, value, context, children), + # where children is a list of nodes or None, and context may be None. + newnode = (start, None, None, []) + stackentry = (self.grammar.dfas[start], 0, newnode) + self.stack = [stackentry] + self.rootnode = None + self.used_names = set() # Aliased to self.rootnode.used_names in pop() + + def addtoken(self, type, value, context): + """Add a token; return True iff this is the end of the program.""" + # Map from token to label + ilabel = self.classify(type, value, context) + # Loop until the token is shifted; may raise exceptions + while True: + dfa, state, node = self.stack[-1] + states, first = dfa + arcs = states[state] + # Look for a state with this label + for i, newstate in arcs: + t, v = self.grammar.labels[i] + if ilabel == i: + # Look it up in the list of labels + assert t < 256 + # Shift a token; we're done with it + self.shift(type, value, newstate, context) + # Pop while we are in an accept-only state + state = newstate + while states[state] == [(0, state)]: + self.pop() + if not self.stack: + # Done parsing! + return True + dfa, state, node = self.stack[-1] + states, first = dfa + # Done with this token + return False + elif t >= 256: + # See if it's a symbol and if we're in its first set + itsdfa = self.grammar.dfas[t] + itsstates, itsfirst = itsdfa + if ilabel in itsfirst: + # Push a symbol + self.push(t, self.grammar.dfas[t], newstate, context) + break # To continue the outer while loop + else: + if (0, state) in arcs: + # An accepting state, pop it and try something else + self.pop() + if not self.stack: + # Done parsing, but another token is input + raise ParseError("too much input", + type, value, context) + else: + # No success finding a transition + raise ParseError("bad input", type, value, context) + + def classify(self, type, value, context): + """Turn a token into a label. (Internal)""" + if type == token.NAME: + # Keep a listing of all used names + self.used_names.add(value) + # Check for reserved words + ilabel = self.grammar.keywords.get(value) + if ilabel is not None: + return ilabel + ilabel = self.grammar.tokens.get(type) + if ilabel is None: + raise ParseError("bad token", type, value, context) + return ilabel + + def shift(self, type, value, newstate, context): + """Shift a token. (Internal)""" + dfa, state, node = self.stack[-1] + newnode = (type, value, context, None) + newnode = self.convert(self.grammar, newnode) + if newnode is not None: + node[-1].append(newnode) + self.stack[-1] = (dfa, newstate, node) + + def push(self, type, newdfa, newstate, context): + """Push a nonterminal. (Internal)""" + dfa, state, node = self.stack[-1] + newnode = (type, None, context, []) + self.stack[-1] = (dfa, newstate, node) + self.stack.append((newdfa, 0, newnode)) + + def pop(self): + """Pop a nonterminal. (Internal)""" + popdfa, popstate, popnode = self.stack.pop() + newnode = self.convert(self.grammar, popnode) + if newnode is not None: + if self.stack: + dfa, state, node = self.stack[-1] + node[-1].append(newnode) + else: + self.rootnode = newnode + self.rootnode.used_names = self.used_names diff --git a/blib2to3/pgen2/parse.pyi b/blib2to3/pgen2/parse.pyi new file mode 100644 index 0000000..cbcf941 --- /dev/null +++ b/blib2to3/pgen2/parse.pyi @@ -0,0 +1,29 @@ +# Stubs for lib2to3.pgen2.parse (Python 3.6) + +from typing import Any, Dict, List, Optional, Sequence, Set, Text, Tuple + +from blib2to3.pgen2.grammar import Grammar, _DFAS +from blib2to3.pytree import _NL, _Convert, _RawNode + +_Context = Sequence[Any] + +class ParseError(Exception): + msg: Text + type: int + value: Optional[Text] + context: _Context + def __init__(self, msg: Text, type: int, value: Optional[Text], context: _Context) -> None: ... + +class Parser: + grammar: Grammar + convert: _Convert + stack: List[Tuple[_DFAS, int, _RawNode]] + rootnode: Optional[_NL] + used_names: Set[Text] + def __init__(self, grammar: Grammar, convert: Optional[_Convert] = ...) -> None: ... + def setup(self, start: Optional[int] = ...) -> None: ... + def addtoken(self, type: int, value: Optional[Text], context: _Context) -> bool: ... + def classify(self, type: int, value: Optional[Text], context: _Context) -> int: ... + def shift(self, type: int, value: Optional[Text], newstate: int, context: _Context) -> None: ... + def push(self, type: int, newdfa: _DFAS, newstate: int, context: _Context) -> None: ... + def pop(self) -> None: ... diff --git a/blib2to3/pgen2/pgen.py b/blib2to3/pgen2/pgen.py new file mode 100644 index 0000000..b0cbd16 --- /dev/null +++ b/blib2to3/pgen2/pgen.py @@ -0,0 +1,386 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +# Pgen imports +from . import grammar, token, tokenize + +class PgenGrammar(grammar.Grammar): + pass + +class ParserGenerator(object): + + def __init__(self, filename, stream=None): + close_stream = None + if stream is None: + stream = open(filename) + close_stream = stream.close + self.filename = filename + self.stream = stream + self.generator = tokenize.generate_tokens(stream.readline) + self.gettoken() # Initialize lookahead + self.dfas, self.startsymbol = self.parse() + if close_stream is not None: + close_stream() + self.first = {} # map from symbol name to set of tokens + self.addfirstsets() + + def make_grammar(self): + c = PgenGrammar() + names = list(self.dfas.keys()) + names.sort() + names.remove(self.startsymbol) + names.insert(0, self.startsymbol) + for name in names: + i = 256 + len(c.symbol2number) + c.symbol2number[name] = i + c.number2symbol[i] = name + for name in names: + dfa = self.dfas[name] + states = [] + for state in dfa: + arcs = [] + for label, next in sorted(state.arcs.items()): + arcs.append((self.make_label(c, label), dfa.index(next))) + if state.isfinal: + arcs.append((0, dfa.index(state))) + states.append(arcs) + c.states.append(states) + c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name)) + c.start = c.symbol2number[self.startsymbol] + return c + + def make_first(self, c, name): + rawfirst = self.first[name] + first = {} + for label in sorted(rawfirst): + ilabel = self.make_label(c, label) + ##assert ilabel not in first # XXX failed on <> ... != + first[ilabel] = 1 + return first + + def make_label(self, c, label): + # XXX Maybe this should be a method on a subclass of converter? + ilabel = len(c.labels) + if label[0].isalpha(): + # Either a symbol name or a named token + if label in c.symbol2number: + # A symbol name (a non-terminal) + if label in c.symbol2label: + return c.symbol2label[label] + else: + c.labels.append((c.symbol2number[label], None)) + c.symbol2label[label] = ilabel + return ilabel + else: + # A named token (NAME, NUMBER, STRING) + itoken = getattr(token, label, None) + assert isinstance(itoken, int), label + assert itoken in token.tok_name, label + if itoken in c.tokens: + return c.tokens[itoken] + else: + c.labels.append((itoken, None)) + c.tokens[itoken] = ilabel + return ilabel + else: + # Either a keyword or an operator + assert label[0] in ('"', "'"), label + value = eval(label) + if value[0].isalpha(): + # A keyword + if value in c.keywords: + return c.keywords[value] + else: + c.labels.append((token.NAME, value)) + c.keywords[value] = ilabel + return ilabel + else: + # An operator (any non-numeric token) + itoken = grammar.opmap[value] # Fails if unknown token + if itoken in c.tokens: + return c.tokens[itoken] + else: + c.labels.append((itoken, None)) + c.tokens[itoken] = ilabel + return ilabel + + def addfirstsets(self): + names = list(self.dfas.keys()) + names.sort() + for name in names: + if name not in self.first: + self.calcfirst(name) + #print name, self.first[name].keys() + + def calcfirst(self, name): + dfa = self.dfas[name] + self.first[name] = None # dummy to detect left recursion + state = dfa[0] + totalset = {} + overlapcheck = {} + for label, next in state.arcs.items(): + if label in self.dfas: + if label in self.first: + fset = self.first[label] + if fset is None: + raise ValueError("recursion for rule %r" % name) + else: + self.calcfirst(label) + fset = self.first[label] + totalset.update(fset) + overlapcheck[label] = fset + else: + totalset[label] = 1 + overlapcheck[label] = {label: 1} + inverse = {} + for label, itsfirst in overlapcheck.items(): + for symbol in itsfirst: + if symbol in inverse: + raise ValueError("rule %s is ambiguous; %s is in the" + " first sets of %s as well as %s" % + (name, symbol, label, inverse[symbol])) + inverse[symbol] = label + self.first[name] = totalset + + def parse(self): + dfas = {} + startsymbol = None + # MSTART: (NEWLINE | RULE)* ENDMARKER + while self.type != token.ENDMARKER: + while self.type == token.NEWLINE: + self.gettoken() + # RULE: NAME ':' RHS NEWLINE + name = self.expect(token.NAME) + self.expect(token.OP, ":") + a, z = self.parse_rhs() + self.expect(token.NEWLINE) + #self.dump_nfa(name, a, z) + dfa = self.make_dfa(a, z) + #self.dump_dfa(name, dfa) + oldlen = len(dfa) + self.simplify_dfa(dfa) + newlen = len(dfa) + dfas[name] = dfa + #print name, oldlen, newlen + if startsymbol is None: + startsymbol = name + return dfas, startsymbol + + def make_dfa(self, start, finish): + # To turn an NFA into a DFA, we define the states of the DFA + # to correspond to *sets* of states of the NFA. Then do some + # state reduction. Let's represent sets as dicts with 1 for + # values. + assert isinstance(start, NFAState) + assert isinstance(finish, NFAState) + def closure(state): + base = {} + addclosure(state, base) + return base + def addclosure(state, base): + assert isinstance(state, NFAState) + if state in base: + return + base[state] = 1 + for label, next in state.arcs: + if label is None: + addclosure(next, base) + states = [DFAState(closure(start), finish)] + for state in states: # NB states grows while we're iterating + arcs = {} + for nfastate in state.nfaset: + for label, next in nfastate.arcs: + if label is not None: + addclosure(next, arcs.setdefault(label, {})) + for label, nfaset in sorted(arcs.items()): + for st in states: + if st.nfaset == nfaset: + break + else: + st = DFAState(nfaset, finish) + states.append(st) + state.addarc(st, label) + return states # List of DFAState instances; first one is start + + def dump_nfa(self, name, start, finish): + print("Dump of NFA for", name) + todo = [start] + for i, state in enumerate(todo): + print(" State", i, state is finish and "(final)" or "") + for label, next in state.arcs: + if next in todo: + j = todo.index(next) + else: + j = len(todo) + todo.append(next) + if label is None: + print(" -> %d" % j) + else: + print(" %s -> %d" % (label, j)) + + def dump_dfa(self, name, dfa): + print("Dump of DFA for", name) + for i, state in enumerate(dfa): + print(" State", i, state.isfinal and "(final)" or "") + for label, next in sorted(state.arcs.items()): + print(" %s -> %d" % (label, dfa.index(next))) + + def simplify_dfa(self, dfa): + # This is not theoretically optimal, but works well enough. + # Algorithm: repeatedly look for two states that have the same + # set of arcs (same labels pointing to the same nodes) and + # unify them, until things stop changing. + + # dfa is a list of DFAState instances + changes = True + while changes: + changes = False + for i, state_i in enumerate(dfa): + for j in range(i+1, len(dfa)): + state_j = dfa[j] + if state_i == state_j: + #print " unify", i, j + del dfa[j] + for state in dfa: + state.unifystate(state_j, state_i) + changes = True + break + + def parse_rhs(self): + # RHS: ALT ('|' ALT)* + a, z = self.parse_alt() + if self.value != "|": + return a, z + else: + aa = NFAState() + zz = NFAState() + aa.addarc(a) + z.addarc(zz) + while self.value == "|": + self.gettoken() + a, z = self.parse_alt() + aa.addarc(a) + z.addarc(zz) + return aa, zz + + def parse_alt(self): + # ALT: ITEM+ + a, b = self.parse_item() + while (self.value in ("(", "[") or + self.type in (token.NAME, token.STRING)): + c, d = self.parse_item() + b.addarc(c) + b = d + return a, b + + def parse_item(self): + # ITEM: '[' RHS ']' | ATOM ['+' | '*'] + if self.value == "[": + self.gettoken() + a, z = self.parse_rhs() + self.expect(token.OP, "]") + a.addarc(z) + return a, z + else: + a, z = self.parse_atom() + value = self.value + if value not in ("+", "*"): + return a, z + self.gettoken() + z.addarc(a) + if value == "+": + return a, z + else: + return a, a + + def parse_atom(self): + # ATOM: '(' RHS ')' | NAME | STRING + if self.value == "(": + self.gettoken() + a, z = self.parse_rhs() + self.expect(token.OP, ")") + return a, z + elif self.type in (token.NAME, token.STRING): + a = NFAState() + z = NFAState() + a.addarc(z, self.value) + self.gettoken() + return a, z + else: + self.raise_error("expected (...) or NAME or STRING, got %s/%s", + self.type, self.value) + + def expect(self, type, value=None): + if self.type != type or (value is not None and self.value != value): + self.raise_error("expected %s/%s, got %s/%s", + type, value, self.type, self.value) + value = self.value + self.gettoken() + return value + + def gettoken(self): + tup = next(self.generator) + while tup[0] in (tokenize.COMMENT, tokenize.NL): + tup = next(self.generator) + self.type, self.value, self.begin, self.end, self.line = tup + #print token.tok_name[self.type], repr(self.value) + + def raise_error(self, msg, *args): + if args: + try: + msg = msg % args + except: + msg = " ".join([msg] + list(map(str, args))) + raise SyntaxError(msg, (self.filename, self.end[0], + self.end[1], self.line)) + +class NFAState(object): + + def __init__(self): + self.arcs = [] # list of (label, NFAState) pairs + + def addarc(self, next, label=None): + assert label is None or isinstance(label, str) + assert isinstance(next, NFAState) + self.arcs.append((label, next)) + +class DFAState(object): + + def __init__(self, nfaset, final): + assert isinstance(nfaset, dict) + assert isinstance(next(iter(nfaset)), NFAState) + assert isinstance(final, NFAState) + self.nfaset = nfaset + self.isfinal = final in nfaset + self.arcs = {} # map from label to DFAState + + def addarc(self, next, label): + assert isinstance(label, str) + assert label not in self.arcs + assert isinstance(next, DFAState) + self.arcs[label] = next + + def unifystate(self, old, new): + for label, next in self.arcs.items(): + if next is old: + self.arcs[label] = new + + def __eq__(self, other): + # Equality test -- ignore the nfaset instance variable + assert isinstance(other, DFAState) + if self.isfinal != other.isfinal: + return False + # Can't just return self.arcs == other.arcs, because that + # would invoke this method recursively, with cycles... + if len(self.arcs) != len(other.arcs): + return False + for label, next in self.arcs.items(): + if next is not other.arcs.get(label): + return False + return True + + __hash__ = None # For Py3 compatibility. + +def generate_grammar(filename="Grammar.txt"): + p = ParserGenerator(filename) + return p.make_grammar() diff --git a/blib2to3/pgen2/pgen.pyi b/blib2to3/pgen2/pgen.pyi new file mode 100644 index 0000000..1529ad0 --- /dev/null +++ b/blib2to3/pgen2/pgen.pyi @@ -0,0 +1,49 @@ +# Stubs for lib2to3.pgen2.pgen (Python 3.6) + +from typing import Any, Dict, IO, Iterable, Iterator, List, Optional, Text, Tuple +from mypy_extensions import NoReturn + +from blib2to3.pgen2 import _Path, grammar +from blib2to3.pgen2.tokenize import _TokenInfo + +class PgenGrammar(grammar.Grammar): ... + +class ParserGenerator: + filename: _Path + stream: IO[Text] + generator: Iterator[_TokenInfo] + first: Dict[Text, Dict[Text, int]] + def __init__(self, filename: _Path, stream: Optional[IO[Text]] = ...) -> None: ... + def make_grammar(self) -> PgenGrammar: ... + def make_first(self, c: PgenGrammar, name: Text) -> Dict[int, int]: ... + def make_label(self, c: PgenGrammar, label: Text) -> int: ... + def addfirstsets(self) -> None: ... + def calcfirst(self, name: Text) -> None: ... + def parse(self) -> Tuple[Dict[Text, List[DFAState]], Text]: ... + def make_dfa(self, start: NFAState, finish: NFAState) -> List[DFAState]: ... + def dump_nfa(self, name: Text, start: NFAState, finish: NFAState) -> List[DFAState]: ... + def dump_dfa(self, name: Text, dfa: Iterable[DFAState]) -> None: ... + def simplify_dfa(self, dfa: List[DFAState]) -> None: ... + def parse_rhs(self) -> Tuple[NFAState, NFAState]: ... + def parse_alt(self) -> Tuple[NFAState, NFAState]: ... + def parse_item(self) -> Tuple[NFAState, NFAState]: ... + def parse_atom(self) -> Tuple[NFAState, NFAState]: ... + def expect(self, type: int, value: Optional[Any] = ...) -> Text: ... + def gettoken(self) -> None: ... + def raise_error(self, msg: str, *args: Any) -> NoReturn: ... + +class NFAState: + arcs: List[Tuple[Optional[Text], NFAState]] + def __init__(self) -> None: ... + def addarc(self, next: NFAState, label: Optional[Text] = ...) -> None: ... + +class DFAState: + nfaset: Dict[NFAState, Any] + isfinal: bool + arcs: Dict[Text, DFAState] + def __init__(self, nfaset: Dict[NFAState, Any], final: NFAState) -> None: ... + def addarc(self, next: DFAState, label: Text) -> None: ... + def unifystate(self, old: DFAState, new: DFAState) -> None: ... + def __eq__(self, other: Any) -> bool: ... + +def generate_grammar(filename: _Path = ...) -> PgenGrammar: ... diff --git a/blib2to3/pgen2/token.py b/blib2to3/pgen2/token.py new file mode 100755 index 0000000..7599396 --- /dev/null +++ b/blib2to3/pgen2/token.py @@ -0,0 +1,83 @@ +#! /usr/bin/env python3 + +"""Token constants (from "token.h").""" + +# Taken from Python (r53757) and modified to include some tokens +# originally monkeypatched in by pgen2.tokenize + +#--start constants-- +ENDMARKER = 0 +NAME = 1 +NUMBER = 2 +STRING = 3 +NEWLINE = 4 +INDENT = 5 +DEDENT = 6 +LPAR = 7 +RPAR = 8 +LSQB = 9 +RSQB = 10 +COLON = 11 +COMMA = 12 +SEMI = 13 +PLUS = 14 +MINUS = 15 +STAR = 16 +SLASH = 17 +VBAR = 18 +AMPER = 19 +LESS = 20 +GREATER = 21 +EQUAL = 22 +DOT = 23 +PERCENT = 24 +BACKQUOTE = 25 +LBRACE = 26 +RBRACE = 27 +EQEQUAL = 28 +NOTEQUAL = 29 +LESSEQUAL = 30 +GREATEREQUAL = 31 +TILDE = 32 +CIRCUMFLEX = 33 +LEFTSHIFT = 34 +RIGHTSHIFT = 35 +DOUBLESTAR = 36 +PLUSEQUAL = 37 +MINEQUAL = 38 +STAREQUAL = 39 +SLASHEQUAL = 40 +PERCENTEQUAL = 41 +AMPEREQUAL = 42 +VBAREQUAL = 43 +CIRCUMFLEXEQUAL = 44 +LEFTSHIFTEQUAL = 45 +RIGHTSHIFTEQUAL = 46 +DOUBLESTAREQUAL = 47 +DOUBLESLASH = 48 +DOUBLESLASHEQUAL = 49 +AT = 50 +ATEQUAL = 51 +OP = 52 +COMMENT = 53 +NL = 54 +RARROW = 55 +ERRORTOKEN = 56 +N_TOKENS = 57 +NT_OFFSET = 256 +#--end constants-- + +tok_name = {} +for _name, _value in list(globals().items()): + if type(_value) is type(0): + tok_name[_value] = _name + + +def ISTERMINAL(x): + return x < NT_OFFSET + +def ISNONTERMINAL(x): + return x >= NT_OFFSET + +def ISEOF(x): + return x == ENDMARKER diff --git a/blib2to3/pgen2/token.pyi b/blib2to3/pgen2/token.pyi new file mode 100644 index 0000000..c256af8 --- /dev/null +++ b/blib2to3/pgen2/token.pyi @@ -0,0 +1,73 @@ +# Stubs for lib2to3.pgen2.token (Python 3.6) + +import sys +from typing import Dict, Text + +ENDMARKER: int +NAME: int +NUMBER: int +STRING: int +NEWLINE: int +INDENT: int +DEDENT: int +LPAR: int +RPAR: int +LSQB: int +RSQB: int +COLON: int +COMMA: int +SEMI: int +PLUS: int +MINUS: int +STAR: int +SLASH: int +VBAR: int +AMPER: int +LESS: int +GREATER: int +EQUAL: int +DOT: int +PERCENT: int +BACKQUOTE: int +LBRACE: int +RBRACE: int +EQEQUAL: int +NOTEQUAL: int +LESSEQUAL: int +GREATEREQUAL: int +TILDE: int +CIRCUMFLEX: int +LEFTSHIFT: int +RIGHTSHIFT: int +DOUBLESTAR: int +PLUSEQUAL: int +MINEQUAL: int +STAREQUAL: int +SLASHEQUAL: int +PERCENTEQUAL: int +AMPEREQUAL: int +VBAREQUAL: int +CIRCUMFLEXEQUAL: int +LEFTSHIFTEQUAL: int +RIGHTSHIFTEQUAL: int +DOUBLESTAREQUAL: int +DOUBLESLASH: int +DOUBLESLASHEQUAL: int +OP: int +COMMENT: int +NL: int +if sys.version_info >= (3,): + RARROW: int +if sys.version_info >= (3, 5): + AT: int + ATEQUAL: int + AWAIT: int + ASYNC: int +ERRORTOKEN: int +N_TOKENS: int +NT_OFFSET: int +tok_name: Dict[int, Text] + +def ISTERMINAL(x: int) -> bool: ... +def ISNONTERMINAL(x: int) -> bool: ... +def ISEOF(x: int) -> bool: ... diff --git a/blib2to3/pgen2/tokenize.py b/blib2to3/pgen2/tokenize.py new file mode 100644 index 0000000..14560e4 --- /dev/null +++ b/blib2to3/pgen2/tokenize.py @@ -0,0 +1,518 @@ +# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation. +# All rights reserved. + +"""Tokenization help for Python programs. + +generate_tokens(readline) is a generator that breaks a stream of +text into Python tokens. It accepts a readline-like method which is called +repeatedly to get the next line of input (or "" for EOF). It generates +5-tuples with these members: + + the token type (see token.py) + the token (a string) + the starting (row, column) indices of the token (a 2-tuple of ints) + the ending (row, column) indices of the token (a 2-tuple of ints) + the original line (string) + +It is designed to match the working of the Python tokenizer exactly, except +that it produces COMMENT tokens for comments and gives type OP for all +operators + +Older entry points + tokenize_loop(readline, tokeneater) + tokenize(readline, tokeneater=printtoken) +are the same, except instead of generating tokens, tokeneater is a callback +function to which the 5 fields described above are passed as 5 arguments, +each time a new token is found.""" + +__author__ = 'Ka-Ping Yee ' +__credits__ = \ + 'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro' + +import string, re +from codecs import BOM_UTF8, lookup +from lib2to3.pgen2.token import * + +from . import token +__all__ = [x for x in dir(token) if x[0] != '_'] + ["tokenize", + "generate_tokens", "untokenize"] +del token + +try: + bytes +except NameError: + # Support bytes type in Python <= 2.5, so 2to3 turns itself into + # valid Python 3 code. + bytes = str + +def group(*choices): return '(' + '|'.join(choices) + ')' +def any(*choices): return group(*choices) + '*' +def maybe(*choices): return group(*choices) + '?' + +Whitespace = r'[ \f\t]*' +Comment = r'#[^\r\n]*' +Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment) +Name = r'[a-zA-Z_]\w*' + +Binnumber = r'0[bB]_?[01]+(?:_[01]+)*' +Hexnumber = r'0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?' +Octnumber = r'0[oO]?_?[0-7]+(?:_[0-7]+)*[lL]?' +Decnumber = group(r'[1-9]\d*(?:_\d+)*[lL]?', '0[lL]?') +Intnumber = group(Binnumber, Hexnumber, Octnumber, Decnumber) +Exponent = r'[eE][-+]?\d+(?:_\d+)*' +Pointfloat = group(r'\d+(?:_\d+)*\.(?:\d+(?:_\d+)*)?', r'\.\d+(?:_\d+)*') + maybe(Exponent) +Expfloat = r'\d+(?:_\d+)*' + Exponent +Floatnumber = group(Pointfloat, Expfloat) +Imagnumber = group(r'\d+(?:_\d+)*[jJ]', Floatnumber + r'[jJ]') +Number = group(Imagnumber, Floatnumber, Intnumber) + +# Tail end of ' string. +Single = r"[^'\\]*(?:\\.[^'\\]*)*'" +# Tail end of " string. +Double = r'[^"\\]*(?:\\.[^"\\]*)*"' +# Tail end of ''' string. +Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''" +# Tail end of """ string. +Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""' +_litprefix = r"(?:[uUrRbBfF]|[rR][bB]|[bBuU][rR])?" +Triple = group(_litprefix + "'''", _litprefix + '"""') +# Single-line ' or " string. +String = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'", + _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"') + +# Because of leftmost-then-longest match semantics, be sure to put the +# longest operators first (e.g., if = came before ==, == would get +# recognized as two instances of =). +Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=", + r"//=?", r"->", + r"[+\-*/%&@|^=<>]=?", + r"~") + +Bracket = '[][(){}]' +Special = group(r'\r?\n', r'[:;.,`@]') +Funny = group(Operator, Bracket, Special) + +PlainToken = group(Number, Funny, String, Name) +Token = Ignore + PlainToken + +# First (or only) line of ' or " string. +ContStr = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + + group("'", r'\\\r?\n'), + _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + + group('"', r'\\\r?\n')) +PseudoExtras = group(r'\\\r?\n', Comment, Triple) +PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name) + +tokenprog, pseudoprog, single3prog, double3prog = list(map( + re.compile, (Token, PseudoToken, Single3, Double3))) +endprogs = {"'": re.compile(Single), '"': re.compile(Double), + "'''": single3prog, '"""': double3prog, + "r'''": single3prog, 'r"""': double3prog, + "u'''": single3prog, 'u"""': double3prog, + "b'''": single3prog, 'b"""': double3prog, + "f'''": single3prog, 'f"""': double3prog, + "ur'''": single3prog, 'ur"""': double3prog, + "br'''": single3prog, 'br"""': double3prog, + "rb'''": single3prog, 'rb"""': double3prog, + "R'''": single3prog, 'R"""': double3prog, + "U'''": single3prog, 'U"""': double3prog, + "B'''": single3prog, 'B"""': double3prog, + "F'''": single3prog, 'F"""': double3prog, + "uR'''": single3prog, 'uR"""': double3prog, + "Ur'''": single3prog, 'Ur"""': double3prog, + "UR'''": single3prog, 'UR"""': double3prog, + "bR'''": single3prog, 'bR"""': double3prog, + "Br'''": single3prog, 'Br"""': double3prog, + "BR'''": single3prog, 'BR"""': double3prog, + "rB'''": single3prog, 'rB"""': double3prog, + "Rb'''": single3prog, 'Rb"""': double3prog, + "RB'''": single3prog, 'RB"""': double3prog, + 'r': None, 'R': None, + 'u': None, 'U': None, + 'f': None, 'F': None, + 'b': None, 'B': None} + +triple_quoted = {} +for t in ("'''", '"""', + "r'''", 'r"""', "R'''", 'R"""', + "u'''", 'u"""', "U'''", 'U"""', + "b'''", 'b"""', "B'''", 'B"""', + "f'''", 'f"""', "F'''", 'F"""', + "ur'''", 'ur"""', "Ur'''", 'Ur"""', + "uR'''", 'uR"""', "UR'''", 'UR"""', + "br'''", 'br"""', "Br'''", 'Br"""', + "bR'''", 'bR"""', "BR'''", 'BR"""', + "rb'''", 'rb"""', "Rb'''", 'Rb"""', + "rB'''", 'rB"""', "RB'''", 'RB"""',): + triple_quoted[t] = t +single_quoted = {} +for t in ("'", '"', + "r'", 'r"', "R'", 'R"', + "u'", 'u"', "U'", 'U"', + "b'", 'b"', "B'", 'B"', + "f'", 'f"', "F'", 'F"', + "ur'", 'ur"', "Ur'", 'Ur"', + "uR'", 'uR"', "UR'", 'UR"', + "br'", 'br"', "Br'", 'Br"', + "bR'", 'bR"', "BR'", 'BR"', + "rb'", 'rb"', "Rb'", 'Rb"', + "rB'", 'rB"', "RB'", 'RB"',): + single_quoted[t] = t + +tabsize = 8 + +class TokenError(Exception): pass + +class StopTokenizing(Exception): pass + +def printtoken(type, token, xxx_todo_changeme, xxx_todo_changeme1, line): # for testing + (srow, scol) = xxx_todo_changeme + (erow, ecol) = xxx_todo_changeme1 + print("%d,%d-%d,%d:\t%s\t%s" % \ + (srow, scol, erow, ecol, tok_name[type], repr(token))) + +def tokenize(readline, tokeneater=printtoken): + """ + The tokenize() function accepts two parameters: one representing the + input stream, and one providing an output mechanism for tokenize(). + + The first parameter, readline, must be a callable object which provides + the same interface as the readline() method of built-in file objects. + Each call to the function should return one line of input as a string. + + The second parameter, tokeneater, must also be a callable object. It is + called once for each token, with five arguments, corresponding to the + tuples generated by generate_tokens(). + """ + try: + tokenize_loop(readline, tokeneater) + except StopTokenizing: + pass + +# backwards compatible interface +def tokenize_loop(readline, tokeneater): + for token_info in generate_tokens(readline): + tokeneater(*token_info) + +class Untokenizer: + + def __init__(self): + self.tokens = [] + self.prev_row = 1 + self.prev_col = 0 + + def add_whitespace(self, start): + row, col = start + assert row <= self.prev_row + col_offset = col - self.prev_col + if col_offset: + self.tokens.append(" " * col_offset) + + def untokenize(self, iterable): + for t in iterable: + if len(t) == 2: + self.compat(t, iterable) + break + tok_type, token, start, end, line = t + self.add_whitespace(start) + self.tokens.append(token) + self.prev_row, self.prev_col = end + if tok_type in (NEWLINE, NL): + self.prev_row += 1 + self.prev_col = 0 + return "".join(self.tokens) + + def compat(self, token, iterable): + startline = False + indents = [] + toks_append = self.tokens.append + toknum, tokval = token + if toknum in (NAME, NUMBER): + tokval += ' ' + if toknum in (NEWLINE, NL): + startline = True + for tok in iterable: + toknum, tokval = tok[:2] + + if toknum in (NAME, NUMBER): + tokval += ' ' + + if toknum == INDENT: + indents.append(tokval) + continue + elif toknum == DEDENT: + indents.pop() + continue + elif toknum in (NEWLINE, NL): + startline = True + elif startline and indents: + toks_append(indents[-1]) + startline = False + toks_append(tokval) + +cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) +blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) + +def _get_normal_name(orig_enc): + """Imitates get_normal_name in tokenizer.c.""" + # Only care about the first 12 characters. + enc = orig_enc[:12].lower().replace("_", "-") + if enc == "utf-8" or enc.startswith("utf-8-"): + return "utf-8" + if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \ + enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")): + return "iso-8859-1" + return orig_enc + +def detect_encoding(readline): + """ + The detect_encoding() function is used to detect the encoding that should + be used to decode a Python source file. It requires one argument, readline, + in the same way as the tokenize() generator. + + It will call readline a maximum of twice, and return the encoding used + (as a string) and a list of any lines (left as bytes) it has read + in. + + It detects the encoding from the presence of a utf-8 bom or an encoding + cookie as specified in pep-0263. If both a bom and a cookie are present, but + disagree, a SyntaxError will be raised. If the encoding cookie is an invalid + charset, raise a SyntaxError. Note that if a utf-8 bom is found, + 'utf-8-sig' is returned. + + If no encoding is specified, then the default of 'utf-8' will be returned. + """ + bom_found = False + encoding = None + default = 'utf-8' + def read_or_stop(): + try: + return readline() + except StopIteration: + return bytes() + + def find_cookie(line): + try: + line_string = line.decode('ascii') + except UnicodeDecodeError: + return None + match = cookie_re.match(line_string) + if not match: + return None + encoding = _get_normal_name(match.group(1)) + try: + codec = lookup(encoding) + except LookupError: + # This behaviour mimics the Python interpreter + raise SyntaxError("unknown encoding: " + encoding) + + if bom_found: + if codec.name != 'utf-8': + # This behaviour mimics the Python interpreter + raise SyntaxError('encoding problem: utf-8') + encoding += '-sig' + return encoding + + first = read_or_stop() + if first.startswith(BOM_UTF8): + bom_found = True + first = first[3:] + default = 'utf-8-sig' + if not first: + return default, [] + + encoding = find_cookie(first) + if encoding: + return encoding, [first] + if not blank_re.match(first): + return default, [first] + + second = read_or_stop() + if not second: + return default, [first] + + encoding = find_cookie(second) + if encoding: + return encoding, [first, second] + + return default, [first, second] + +def untokenize(iterable): + """Transform tokens back into Python source code. + + Each element returned by the iterable must be a token sequence + with at least two elements, a token number and token value. If + only two tokens are passed, the resulting output is poor. + + Round-trip invariant for full input: + Untokenized source will match input source exactly + + Round-trip invariant for limited intput: + # Output text will tokenize the back to the input + t1 = [tok[:2] for tok in generate_tokens(f.readline)] + newcode = untokenize(t1) + readline = iter(newcode.splitlines(1)).next + t2 = [tok[:2] for tokin generate_tokens(readline)] + assert t1 == t2 + """ + ut = Untokenizer() + return ut.untokenize(iterable) + +def generate_tokens(readline): + """ + The generate_tokens() generator requires one argument, readline, which + must be a callable object which provides the same interface as the + readline() method of built-in file objects. Each call to the function + should return one line of input as a string. Alternately, readline + can be a callable function terminating with StopIteration: + readline = open(myfile).next # Example of alternate readline + + The generator produces 5-tuples with these members: the token type; the + token string; a 2-tuple (srow, scol) of ints specifying the row and + column where the token begins in the source; a 2-tuple (erow, ecol) of + ints specifying the row and column where the token ends in the source; + and the line on which the token was found. The line passed is the + logical line; continuation lines are included. + """ + lnum = parenlev = continued = 0 + namechars, numchars = string.ascii_letters + '_', '0123456789' + contstr, needcont = '', 0 + contline = None + indents = [0] + + while 1: # loop over lines in stream + try: + line = readline() + except StopIteration: + line = '' + lnum = lnum + 1 + pos, max = 0, len(line) + + if contstr: # continued string + if not line: + raise TokenError("EOF in multi-line string", strstart) + endmatch = endprog.match(line) + if endmatch: + pos = end = endmatch.end(0) + yield (STRING, contstr + line[:end], + strstart, (lnum, end), contline + line) + contstr, needcont = '', 0 + contline = None + elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n': + yield (ERRORTOKEN, contstr + line, + strstart, (lnum, len(line)), contline) + contstr = '' + contline = None + continue + else: + contstr = contstr + line + contline = contline + line + continue + + elif parenlev == 0 and not continued: # new statement + if not line: break + column = 0 + while pos < max: # measure leading whitespace + if line[pos] == ' ': column = column + 1 + elif line[pos] == '\t': column = (column//tabsize + 1)*tabsize + elif line[pos] == '\f': column = 0 + else: break + pos = pos + 1 + if pos == max: break + + if line[pos] in '#\r\n': # skip comments or blank lines + if line[pos] == '#': + comment_token = line[pos:].rstrip('\r\n') + nl_pos = pos + len(comment_token) + yield (COMMENT, comment_token, + (lnum, pos), (lnum, pos + len(comment_token)), line) + yield (NL, line[nl_pos:], + (lnum, nl_pos), (lnum, len(line)), line) + else: + yield ((NL, COMMENT)[line[pos] == '#'], line[pos:], + (lnum, pos), (lnum, len(line)), line) + continue + + if column > indents[-1]: # count indents or dedents + indents.append(column) + yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line) + while column < indents[-1]: + if column not in indents: + raise IndentationError( + "unindent does not match any outer indentation level", + ("", lnum, pos, line)) + indents = indents[:-1] + + yield (DEDENT, '', (lnum, pos), (lnum, pos), line) + + else: # continued statement + if not line: + raise TokenError("EOF in multi-line statement", (lnum, 0)) + continued = 0 + + while pos < max: + pseudomatch = pseudoprog.match(line, pos) + if pseudomatch: # scan for tokens + start, end = pseudomatch.span(1) + spos, epos, pos = (lnum, start), (lnum, end), end + token, initial = line[start:end], line[start] + + if initial in numchars or \ + (initial == '.' and token != '.'): # ordinary number + yield (NUMBER, token, spos, epos, line) + elif initial in '\r\n': + newline = NEWLINE + if parenlev > 0: + newline = NL + yield (newline, token, spos, epos, line) + + elif initial == '#': + assert not token.endswith("\n") + yield (COMMENT, token, spos, epos, line) + elif token in triple_quoted: + endprog = endprogs[token] + endmatch = endprog.match(line, pos) + if endmatch: # all on one line + pos = endmatch.end(0) + token = line[start:pos] + yield (STRING, token, spos, (lnum, pos), line) + else: + strstart = (lnum, start) # multiple lines + contstr = line[start:] + contline = line + break + elif initial in single_quoted or \ + token[:2] in single_quoted or \ + token[:3] in single_quoted: + if token[-1] == '\n': # continued string + strstart = (lnum, start) + endprog = (endprogs[initial] or endprogs[token[1]] or + endprogs[token[2]]) + contstr, needcont = line[start:], 1 + contline = line + break + else: # ordinary string + yield (STRING, token, spos, epos, line) + elif initial in namechars: # ordinary name + yield (NAME, token, spos, epos, line) + elif initial == '\\': # continued stmt + # This yield is new; needed for better idempotency: + yield (NL, token, spos, (lnum, pos), line) + continued = 1 + else: + if initial in '([{': parenlev = parenlev + 1 + elif initial in ')]}': parenlev = parenlev - 1 + yield (OP, token, spos, epos, line) + else: + yield (ERRORTOKEN, line[pos], + (lnum, pos), (lnum, pos+1), line) + pos = pos + 1 + + for indent in indents[1:]: # pop remaining indent levels + yield (DEDENT, '', (lnum, 0), (lnum, 0), '') + yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '') + +if __name__ == '__main__': # testing + import sys + if len(sys.argv) > 1: tokenize(open(sys.argv[1]).readline) + else: tokenize(sys.stdin.readline) diff --git a/blib2to3/pgen2/tokenize.pyi b/blib2to3/pgen2/tokenize.pyi new file mode 100644 index 0000000..62352e9 --- /dev/null +++ b/blib2to3/pgen2/tokenize.pyi @@ -0,0 +1,30 @@ +# Stubs for lib2to3.pgen2.tokenize (Python 3.6) +# NOTE: Only elements from __all__ are present. + +from typing import Callable, Iterable, Iterator, List, Text, Tuple +from blib2to3.pgen2.token import * # noqa + + +_Coord = Tuple[int, int] +_TokenEater = Callable[[int, Text, _Coord, _Coord, Text], None] +_TokenInfo = Tuple[int, Text, _Coord, _Coord, Text] + + +class TokenError(Exception): ... +class StopTokenizing(Exception): ... + +def tokenize(readline: Callable[[], Text], tokeneater: _TokenEater = ...) -> None: ... + +class Untokenizer: + tokens: List[Text] + prev_row: int + prev_col: int + def __init__(self) -> None: ... + def add_whitespace(self, start: _Coord) -> None: ... + def untokenize(self, iterable: Iterable[_TokenInfo]) -> Text: ... + def compat(self, token: Tuple[int, Text], iterable: Iterable[_TokenInfo]) -> None: ... + +def untokenize(iterable: Iterable[_TokenInfo]) -> Text: ... +def generate_tokens( + readline: Callable[[], Text] +) -> Iterator[_TokenInfo]: ... diff --git a/blib2to3/pygram.py b/blib2to3/pygram.py new file mode 100644 index 0000000..919624e --- /dev/null +++ b/blib2to3/pygram.py @@ -0,0 +1,40 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Export the Python grammar and symbols.""" + +# Python imports +import os + +# Local imports +from .pgen2 import token +from .pgen2 import driver +from . import pytree + +# The grammar file +_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "Grammar.txt") +_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), + "PatternGrammar.txt") + + +class Symbols(object): + + def __init__(self, grammar): + """Initializer. + + Creates an attribute for each grammar symbol (nonterminal), + whose value is the symbol's type (an int >= 256). + """ + for name, symbol in grammar.symbol2number.items(): + setattr(self, name, symbol) + + +python_grammar = driver.load_packaged_grammar("lib2to3", _GRAMMAR_FILE) + +python_symbols = Symbols(python_grammar) + +python_grammar_no_print_statement = python_grammar.copy() +del python_grammar_no_print_statement.keywords["print"] + +pattern_grammar = driver.load_packaged_grammar("lib2to3", _PATTERN_GRAMMAR_FILE) +pattern_symbols = Symbols(pattern_grammar) diff --git a/blib2to3/pygram.pyi b/blib2to3/pygram.pyi new file mode 100644 index 0000000..3dbc648 --- /dev/null +++ b/blib2to3/pygram.pyi @@ -0,0 +1,119 @@ +# Stubs for lib2to3.pygram (Python 3.6) + +from typing import Any +from blib2to3.pgen2.grammar import Grammar + +class Symbols: + def __init__(self, grammar: Grammar) -> None: ... + +class python_symbols(Symbols): + and_expr: int + and_test: int + annassign: int + arglist: int + argument: int + arith_expr: int + assert_stmt: int + async_funcdef: int + async_stmt: int + atom: int + augassign: int + break_stmt: int + classdef: int + comp_for: int + comp_if: int + comp_iter: int + comp_op: int + comparison: int + compound_stmt: int + continue_stmt: int + decorated: int + decorator: int + decorators: int + del_stmt: int + dictsetmaker: int + dotted_as_name: int + dotted_as_names: int + dotted_name: int + encoding_decl: int + eval_input: int + except_clause: int + exec_stmt: int + expr: int + expr_stmt: int + exprlist: int + factor: int + file_input: int + flow_stmt: int + for_stmt: int + funcdef: int + global_stmt: int + if_stmt: int + import_as_name: int + import_as_names: int + import_from: int + import_name: int + import_stmt: int + lambdef: int + listmaker: int + not_test: int + old_comp_for: int + old_comp_if: int + old_comp_iter: int + old_lambdef: int + old_test: int + or_test: int + parameters: int + pass_stmt: int + power: int + print_stmt: int + raise_stmt: int + return_stmt: int + shift_expr: int + simple_stmt: int + single_input: int + sliceop: int + small_stmt: int + star_expr: int + stmt: int + subscript: int + subscriptlist: int + suite: int + term: int + test: int + testlist: int + testlist1: int + testlist_gexp: int + testlist_safe: int + testlist_star_expr: int + tfpdef: int + tfplist: int + tname: int + trailer: int + try_stmt: int + typedargslist: int + varargslist: int + vfpdef: int + vfplist: int + vname: int + while_stmt: int + with_item: int + with_stmt: int + with_var: int + xor_expr: int + yield_arg: int + yield_expr: int + yield_stmt: int + +class pattern_symbols(Symbols): + Alternative: int + Alternatives: int + Details: int + Matcher: int + NegatedUnit: int + Repeater: int + Unit: int + +python_grammar: Grammar +python_grammar_no_print_statement: Grammar +pattern_grammar: Grammar diff --git a/blib2to3/pytree.py b/blib2to3/pytree.py new file mode 100644 index 0000000..693366f --- /dev/null +++ b/blib2to3/pytree.py @@ -0,0 +1,854 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +""" +Python parse tree definitions. + +This is a very concrete parse tree; we need to keep every token and +even the comments and whitespace between tokens. + +There's also a pattern matching implementation here. +""" + +__author__ = "Guido van Rossum " + +import sys +from io import StringIO + +HUGE = 0x7FFFFFFF # maximum repeat count, default max + +_type_reprs = {} +def type_repr(type_num): + global _type_reprs + if not _type_reprs: + from .pygram import python_symbols + # printing tokens is possible but not as useful + # from .pgen2 import token // token.__dict__.items(): + for name, val in python_symbols.__dict__.items(): + if type(val) == int: _type_reprs[val] = name + return _type_reprs.setdefault(type_num, type_num) + +class Base(object): + + """ + Abstract base class for Node and Leaf. + + This provides some default functionality and boilerplate using the + template pattern. + + A node may be a subnode of at most one parent. + """ + + # Default values for instance variables + type = None # int: token number (< 256) or symbol number (>= 256) + parent = None # Parent node pointer, or None + children = () # Tuple of subnodes + was_changed = False + was_checked = False + + def __new__(cls, *args, **kwds): + """Constructor that prevents Base from being instantiated.""" + assert cls is not Base, "Cannot instantiate Base" + return object.__new__(cls) + + def __eq__(self, other): + """ + Compare two nodes for equality. + + This calls the method _eq(). + """ + if self.__class__ is not other.__class__: + return NotImplemented + return self._eq(other) + + __hash__ = None # For Py3 compatibility. + + def _eq(self, other): + """ + Compare two nodes for equality. + + This is called by __eq__ and __ne__. It is only called if the two nodes + have the same type. This must be implemented by the concrete subclass. + Nodes should be considered equal if they have the same structure, + ignoring the prefix string and other context information. + """ + raise NotImplementedError + + def clone(self): + """ + Return a cloned (deep) copy of self. + + This must be implemented by the concrete subclass. + """ + raise NotImplementedError + + def post_order(self): + """ + Return a post-order iterator for the tree. + + This must be implemented by the concrete subclass. + """ + raise NotImplementedError + + def pre_order(self): + """ + Return a pre-order iterator for the tree. + + This must be implemented by the concrete subclass. + """ + raise NotImplementedError + + def replace(self, new): + """Replace this node with a new one in the parent.""" + assert self.parent is not None, str(self) + assert new is not None + if not isinstance(new, list): + new = [new] + l_children = [] + found = False + for ch in self.parent.children: + if ch is self: + assert not found, (self.parent.children, self, new) + if new is not None: + l_children.extend(new) + found = True + else: + l_children.append(ch) + assert found, (self.children, self, new) + self.parent.changed() + self.parent.children = l_children + for x in new: + x.parent = self.parent + self.parent = None + + def get_lineno(self): + """Return the line number which generated the invocant node.""" + node = self + while not isinstance(node, Leaf): + if not node.children: + return + node = node.children[0] + return node.lineno + + def changed(self): + if self.parent: + self.parent.changed() + self.was_changed = True + + def remove(self): + """ + Remove the node from the tree. Returns the position of the node in its + parent's children before it was removed. + """ + if self.parent: + for i, node in enumerate(self.parent.children): + if node is self: + self.parent.changed() + del self.parent.children[i] + self.parent = None + return i + + @property + def next_sibling(self): + """ + The node immediately following the invocant in their parent's children + list. If the invocant does not have a next sibling, it is None + """ + if self.parent is None: + return None + + # Can't use index(); we need to test by identity + for i, child in enumerate(self.parent.children): + if child is self: + try: + return self.parent.children[i+1] + except IndexError: + return None + + @property + def prev_sibling(self): + """ + The node immediately preceding the invocant in their parent's children + list. If the invocant does not have a previous sibling, it is None. + """ + if self.parent is None: + return None + + # Can't use index(); we need to test by identity + for i, child in enumerate(self.parent.children): + if child is self: + if i == 0: + return None + return self.parent.children[i-1] + + def leaves(self): + for child in self.children: + yield from child.leaves() + + def depth(self): + if self.parent is None: + return 0 + return 1 + self.parent.depth() + + def get_suffix(self): + """ + Return the string immediately following the invocant node. This is + effectively equivalent to node.next_sibling.prefix + """ + next_sib = self.next_sibling + if next_sib is None: + return "" + return next_sib.prefix + + if sys.version_info < (3, 0): + def __str__(self): + return str(self).encode("ascii") + +class Node(Base): + + """Concrete implementation for interior nodes.""" + + def __init__(self,type, children, + context=None, + prefix=None, + fixers_applied=None): + """ + Initializer. + + Takes a type constant (a symbol number >= 256), a sequence of + child nodes, and an optional context keyword argument. + + As a side effect, the parent pointers of the children are updated. + """ + assert type >= 256, type + self.type = type + self.children = list(children) + for ch in self.children: + assert ch.parent is None, repr(ch) + ch.parent = self + if prefix is not None: + self.prefix = prefix + if fixers_applied: + self.fixers_applied = fixers_applied[:] + else: + self.fixers_applied = None + + def __repr__(self): + """Return a canonical string representation.""" + return "%s(%s, %r)" % (self.__class__.__name__, + type_repr(self.type), + self.children) + + def __unicode__(self): + """ + Return a pretty string representation. + + This reproduces the input source exactly. + """ + return "".join(map(str, self.children)) + + if sys.version_info > (3, 0): + __str__ = __unicode__ + + def _eq(self, other): + """Compare two nodes for equality.""" + return (self.type, self.children) == (other.type, other.children) + + def clone(self): + """Return a cloned (deep) copy of self.""" + return Node(self.type, [ch.clone() for ch in self.children], + fixers_applied=self.fixers_applied) + + def post_order(self): + """Return a post-order iterator for the tree.""" + for child in self.children: + yield from child.post_order() + yield self + + def pre_order(self): + """Return a pre-order iterator for the tree.""" + yield self + for child in self.children: + yield from child.pre_order() + + @property + def prefix(self): + """ + The whitespace and comments preceding this node in the input. + """ + if not self.children: + return "" + return self.children[0].prefix + + @prefix.setter + def prefix(self, prefix): + if self.children: + self.children[0].prefix = prefix + + def set_child(self, i, child): + """ + Equivalent to 'node.children[i] = child'. This method also sets the + child's parent attribute appropriately. + """ + child.parent = self + self.children[i].parent = None + self.children[i] = child + self.changed() + + def insert_child(self, i, child): + """ + Equivalent to 'node.children.insert(i, child)'. This method also sets + the child's parent attribute appropriately. + """ + child.parent = self + self.children.insert(i, child) + self.changed() + + def append_child(self, child): + """ + Equivalent to 'node.children.append(child)'. This method also sets the + child's parent attribute appropriately. + """ + child.parent = self + self.children.append(child) + self.changed() + + +class Leaf(Base): + + """Concrete implementation for leaf nodes.""" + + # Default values for instance variables + _prefix = "" # Whitespace and comments preceding this token in the input + lineno = 0 # Line where this token starts in the input + column = 0 # Column where this token tarts in the input + + def __init__(self, type, value, + context=None, + prefix=None, + fixers_applied=[]): + """ + Initializer. + + Takes a type constant (a token number < 256), a string value, and an + optional context keyword argument. + """ + assert 0 <= type < 256, type + if context is not None: + self._prefix, (self.lineno, self.column) = context + self.type = type + self.value = value + if prefix is not None: + self._prefix = prefix + self.fixers_applied = fixers_applied[:] + + def __repr__(self): + """Return a canonical string representation.""" + from .pgen2.token import tok_name + return "%s(%s, %r)" % (self.__class__.__name__, + tok_name.get(self.type, self.type), + self.value) + + def __unicode__(self): + """ + Return a pretty string representation. + + This reproduces the input source exactly. + """ + return self.prefix + str(self.value) + + if sys.version_info > (3, 0): + __str__ = __unicode__ + + def _eq(self, other): + """Compare two nodes for equality.""" + return (self.type, self.value) == (other.type, other.value) + + def clone(self): + """Return a cloned (deep) copy of self.""" + return Leaf(self.type, self.value, + (self.prefix, (self.lineno, self.column)), + fixers_applied=self.fixers_applied) + + def leaves(self): + yield self + + def post_order(self): + """Return a post-order iterator for the tree.""" + yield self + + def pre_order(self): + """Return a pre-order iterator for the tree.""" + yield self + + @property + def prefix(self): + """ + The whitespace and comments preceding this token in the input. + """ + return self._prefix + + @prefix.setter + def prefix(self, prefix): + self.changed() + self._prefix = prefix + +def convert(gr, raw_node): + """ + Convert raw node information to a Node or Leaf instance. + + This is passed to the parser driver which calls it whenever a reduction of a + grammar rule produces a new complete node, so that the tree is build + strictly bottom-up. + """ + type, value, context, children = raw_node + if children or type in gr.number2symbol: + # If there's exactly one child, return that child instead of + # creating a new node. + if len(children) == 1: + return children[0] + return Node(type, children, context=context) + else: + return Leaf(type, value, context=context) + + +class BasePattern(object): + + """ + A pattern is a tree matching pattern. + + It looks for a specific node type (token or symbol), and + optionally for a specific content. + + This is an abstract base class. There are three concrete + subclasses: + + - LeafPattern matches a single leaf node; + - NodePattern matches a single node (usually non-leaf); + - WildcardPattern matches a sequence of nodes of variable length. + """ + + # Defaults for instance variables + type = None # Node type (token if < 256, symbol if >= 256) + content = None # Optional content matching pattern + name = None # Optional name used to store match in results dict + + def __new__(cls, *args, **kwds): + """Constructor that prevents BasePattern from being instantiated.""" + assert cls is not BasePattern, "Cannot instantiate BasePattern" + return object.__new__(cls) + + def __repr__(self): + args = [type_repr(self.type), self.content, self.name] + while args and args[-1] is None: + del args[-1] + return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args))) + + def optimize(self): + """ + A subclass can define this as a hook for optimizations. + + Returns either self or another node with the same effect. + """ + return self + + def match(self, node, results=None): + """ + Does this pattern exactly match a node? + + Returns True if it matches, False if not. + + If results is not None, it must be a dict which will be + updated with the nodes matching named subpatterns. + + Default implementation for non-wildcard patterns. + """ + if self.type is not None and node.type != self.type: + return False + if self.content is not None: + r = None + if results is not None: + r = {} + if not self._submatch(node, r): + return False + if r: + results.update(r) + if results is not None and self.name: + results[self.name] = node + return True + + def match_seq(self, nodes, results=None): + """ + Does this pattern exactly match a sequence of nodes? + + Default implementation for non-wildcard patterns. + """ + if len(nodes) != 1: + return False + return self.match(nodes[0], results) + + def generate_matches(self, nodes): + """ + Generator yielding all matches for this pattern. + + Default implementation for non-wildcard patterns. + """ + r = {} + if nodes and self.match(nodes[0], r): + yield 1, r + + +class LeafPattern(BasePattern): + + def __init__(self, type=None, content=None, name=None): + """ + Initializer. Takes optional type, content, and name. + + The type, if given must be a token type (< 256). If not given, + this matches any *leaf* node; the content may still be required. + + The content, if given, must be a string. + + If a name is given, the matching node is stored in the results + dict under that key. + """ + if type is not None: + assert 0 <= type < 256, type + if content is not None: + assert isinstance(content, str), repr(content) + self.type = type + self.content = content + self.name = name + + def match(self, node, results=None): + """Override match() to insist on a leaf node.""" + if not isinstance(node, Leaf): + return False + return BasePattern.match(self, node, results) + + def _submatch(self, node, results=None): + """ + Match the pattern's content to the node's children. + + This assumes the node type matches and self.content is not None. + + Returns True if it matches, False if not. + + If results is not None, it must be a dict which will be + updated with the nodes matching named subpatterns. + + When returning False, the results dict may still be updated. + """ + return self.content == node.value + + +class NodePattern(BasePattern): + + wildcards = False + + def __init__(self, type=None, content=None, name=None): + """ + Initializer. Takes optional type, content, and name. + + The type, if given, must be a symbol type (>= 256). If the + type is None this matches *any* single node (leaf or not), + except if content is not None, in which it only matches + non-leaf nodes that also match the content pattern. + + The content, if not None, must be a sequence of Patterns that + must match the node's children exactly. If the content is + given, the type must not be None. + + If a name is given, the matching node is stored in the results + dict under that key. + """ + if type is not None: + assert type >= 256, type + if content is not None: + assert not isinstance(content, str), repr(content) + content = list(content) + for i, item in enumerate(content): + assert isinstance(item, BasePattern), (i, item) + if isinstance(item, WildcardPattern): + self.wildcards = True + self.type = type + self.content = content + self.name = name + + def _submatch(self, node, results=None): + """ + Match the pattern's content to the node's children. + + This assumes the node type matches and self.content is not None. + + Returns True if it matches, False if not. + + If results is not None, it must be a dict which will be + updated with the nodes matching named subpatterns. + + When returning False, the results dict may still be updated. + """ + if self.wildcards: + for c, r in generate_matches(self.content, node.children): + if c == len(node.children): + if results is not None: + results.update(r) + return True + return False + if len(self.content) != len(node.children): + return False + for subpattern, child in zip(self.content, node.children): + if not subpattern.match(child, results): + return False + return True + + +class WildcardPattern(BasePattern): + + """ + A wildcard pattern can match zero or more nodes. + + This has all the flexibility needed to implement patterns like: + + .* .+ .? .{m,n} + (a b c | d e | f) + (...)* (...)+ (...)? (...){m,n} + + except it always uses non-greedy matching. + """ + + def __init__(self, content=None, min=0, max=HUGE, name=None): + """ + Initializer. + + Args: + content: optional sequence of subsequences of patterns; + if absent, matches one node; + if present, each subsequence is an alternative [*] + min: optional minimum number of times to match, default 0 + max: optional maximum number of times to match, default HUGE + name: optional name assigned to this match + + [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is + equivalent to (a b c | d e | f g h); if content is None, + this is equivalent to '.' in regular expression terms. + The min and max parameters work as follows: + min=0, max=maxint: .* + min=1, max=maxint: .+ + min=0, max=1: .? + min=1, max=1: . + If content is not None, replace the dot with the parenthesized + list of alternatives, e.g. (a b c | d e | f g h)* + """ + assert 0 <= min <= max <= HUGE, (min, max) + if content is not None: + content = tuple(map(tuple, content)) # Protect against alterations + # Check sanity of alternatives + assert len(content), repr(content) # Can't have zero alternatives + for alt in content: + assert len(alt), repr(alt) # Can have empty alternatives + self.content = content + self.min = min + self.max = max + self.name = name + + def optimize(self): + """Optimize certain stacked wildcard patterns.""" + subpattern = None + if (self.content is not None and + len(self.content) == 1 and len(self.content[0]) == 1): + subpattern = self.content[0][0] + if self.min == 1 and self.max == 1: + if self.content is None: + return NodePattern(name=self.name) + if subpattern is not None and self.name == subpattern.name: + return subpattern.optimize() + if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and + subpattern.min <= 1 and self.name == subpattern.name): + return WildcardPattern(subpattern.content, + self.min*subpattern.min, + self.max*subpattern.max, + subpattern.name) + return self + + def match(self, node, results=None): + """Does this pattern exactly match a node?""" + return self.match_seq([node], results) + + def match_seq(self, nodes, results=None): + """Does this pattern exactly match a sequence of nodes?""" + for c, r in self.generate_matches(nodes): + if c == len(nodes): + if results is not None: + results.update(r) + if self.name: + results[self.name] = list(nodes) + return True + return False + + def generate_matches(self, nodes): + """ + Generator yielding matches for a sequence of nodes. + + Args: + nodes: sequence of nodes + + Yields: + (count, results) tuples where: + count: the match comprises nodes[:count]; + results: dict containing named submatches. + """ + if self.content is None: + # Shortcut for special case (see __init__.__doc__) + for count in range(self.min, 1 + min(len(nodes), self.max)): + r = {} + if self.name: + r[self.name] = nodes[:count] + yield count, r + elif self.name == "bare_name": + yield self._bare_name_matches(nodes) + else: + # The reason for this is that hitting the recursion limit usually + # results in some ugly messages about how RuntimeErrors are being + # ignored. We only have to do this on CPython, though, because other + # implementations don't have this nasty bug in the first place. + if hasattr(sys, "getrefcount"): + save_stderr = sys.stderr + sys.stderr = StringIO() + try: + for count, r in self._recursive_matches(nodes, 0): + if self.name: + r[self.name] = nodes[:count] + yield count, r + except RuntimeError: + # We fall back to the iterative pattern matching scheme if the recursive + # scheme hits the recursion limit. + for count, r in self._iterative_matches(nodes): + if self.name: + r[self.name] = nodes[:count] + yield count, r + finally: + if hasattr(sys, "getrefcount"): + sys.stderr = save_stderr + + def _iterative_matches(self, nodes): + """Helper to iteratively yield the matches.""" + nodelen = len(nodes) + if 0 >= self.min: + yield 0, {} + + results = [] + # generate matches that use just one alt from self.content + for alt in self.content: + for c, r in generate_matches(alt, nodes): + yield c, r + results.append((c, r)) + + # for each match, iterate down the nodes + while results: + new_results = [] + for c0, r0 in results: + # stop if the entire set of nodes has been matched + if c0 < nodelen and c0 <= self.max: + for alt in self.content: + for c1, r1 in generate_matches(alt, nodes[c0:]): + if c1 > 0: + r = {} + r.update(r0) + r.update(r1) + yield c0 + c1, r + new_results.append((c0 + c1, r)) + results = new_results + + def _bare_name_matches(self, nodes): + """Special optimized matcher for bare_name.""" + count = 0 + r = {} + done = False + max = len(nodes) + while not done and count < max: + done = True + for leaf in self.content: + if leaf[0].match(nodes[count], r): + count += 1 + done = False + break + r[self.name] = nodes[:count] + return count, r + + def _recursive_matches(self, nodes, count): + """Helper to recursively yield the matches.""" + assert self.content is not None + if count >= self.min: + yield 0, {} + if count < self.max: + for alt in self.content: + for c0, r0 in generate_matches(alt, nodes): + for c1, r1 in self._recursive_matches(nodes[c0:], count+1): + r = {} + r.update(r0) + r.update(r1) + yield c0 + c1, r + + +class NegatedPattern(BasePattern): + + def __init__(self, content=None): + """ + Initializer. + + The argument is either a pattern or None. If it is None, this + only matches an empty sequence (effectively '$' in regex + lingo). If it is not None, this matches whenever the argument + pattern doesn't have any matches. + """ + if content is not None: + assert isinstance(content, BasePattern), repr(content) + self.content = content + + def match(self, node): + # We never match a node in its entirety + return False + + def match_seq(self, nodes): + # We only match an empty sequence of nodes in its entirety + return len(nodes) == 0 + + def generate_matches(self, nodes): + if self.content is None: + # Return a match if there is an empty sequence + if len(nodes) == 0: + yield 0, {} + else: + # Return a match if the argument pattern has no matches + for c, r in self.content.generate_matches(nodes): + return + yield 0, {} + + +def generate_matches(patterns, nodes): + """ + Generator yielding matches for a sequence of patterns and nodes. + + Args: + patterns: a sequence of patterns + nodes: a sequence of nodes + + Yields: + (count, results) tuples where: + count: the entire sequence of patterns matches nodes[:count]; + results: dict containing named submatches. + """ + if not patterns: + yield 0, {} + else: + p, rest = patterns[0], patterns[1:] + for c0, r0 in p.generate_matches(nodes): + if not rest: + yield c0, r0 + else: + for c1, r1 in generate_matches(rest, nodes[c0:]): + r = {} + r.update(r0) + r.update(r1) + yield c0 + c1, r diff --git a/blib2to3/pytree.pyi b/blib2to3/pytree.pyi new file mode 100644 index 0000000..eb7320b --- /dev/null +++ b/blib2to3/pytree.pyi @@ -0,0 +1,86 @@ +# Stubs for lib2to3.pytree (Python 3.6) + +import sys +from typing import Any, Callable, Dict, Iterator, List, Optional, Text, Tuple, TypeVar, Union + +from blib2to3.pgen2.grammar import Grammar + +_P = TypeVar('_P') +_NL = Union[Node, Leaf] +_Context = Tuple[Text, int, int] +_Results = Dict[Text, _NL] +_RawNode = Tuple[int, Text, _Context, Optional[List[_NL]]] +_Convert = Callable[[Grammar, _RawNode], Any] + +HUGE: int + +def type_repr(type_num: int) -> Text: ... + +class Base: + type: int + parent: Optional[Node] + prefix: Text + children: List[_NL] + was_changed: bool + was_checked: bool + def __eq__(self, other: Any) -> bool: ... + def _eq(self: _P, other: _P) -> bool: ... + def clone(self: _P) -> _P: ... + def post_order(self) -> Iterator[_NL]: ... + def pre_order(self) -> Iterator[_NL]: ... + def replace(self, new: Union[_NL, List[_NL]]) -> None: ... + def get_lineno(self) -> int: ... + def changed(self) -> None: ... + def remove(self) -> Optional[int]: ... + @property + def next_sibling(self) -> Optional[_NL]: ... + @property + def prev_sibling(self) -> Optional[_NL]: ... + def leaves(self) -> Iterator[Leaf]: ... + def depth(self) -> int: ... + def get_suffix(self) -> Text: ... + if sys.version_info < (3,): + def get_prefix(self) -> Text: ... + def set_prefix(self, prefix: Text) -> None: ... + +class Node(Base): + fixers_applied: List[Any] + def __init__(self, type: int, children: List[_NL], context: Optional[Any] = ..., prefix: Optional[Text] = ..., fixers_applied: Optional[List[Any]] = ...) -> None: ... + def set_child(self, i: int, child: _NL) -> None: ... + def insert_child(self, i: int, child: _NL) -> None: ... + def append_child(self, child: _NL) -> None: ... + +class Leaf(Base): + lineno: int + column: int + value: Text + fixers_applied: List[Any] + def __init__(self, type: int, value: Text, context: Optional[_Context] = ..., prefix: Optional[Text] = ..., fixers_applied: List[Any] = ...) -> None: ... + +def convert(gr: Grammar, raw_node: _RawNode) -> _NL: ... + +class BasePattern: + type: int + content: Optional[Text] + name: Optional[Text] + def optimize(self) -> BasePattern: ... # sic, subclasses are free to optimize themselves into different patterns + def match(self, node: _NL, results: Optional[_Results] = ...) -> bool: ... + def match_seq(self, nodes: List[_NL], results: Optional[_Results] = ...) -> bool: ... + def generate_matches(self, nodes: List[_NL]) -> Iterator[Tuple[int, _Results]]: ... + +class LeafPattern(BasePattern): + def __init__(self, type: Optional[int] = ..., content: Optional[Text] = ..., name: Optional[Text] = ...) -> None: ... + +class NodePattern(BasePattern): + wildcards: bool + def __init__(self, type: Optional[int] = ..., content: Optional[Text] = ..., name: Optional[Text] = ...) -> None: ... + +class WildcardPattern(BasePattern): + min: int + max: int + def __init__(self, content: Optional[Text] = ..., min: int = ..., max: int = ..., name: Optional[Text] = ...) -> None: ... + +class NegatedPattern(BasePattern): + def __init__(self, content: Optional[Text] = ...) -> None: ... + +def generate_matches(patterns: List[BasePattern], nodes: List[_NL]) -> Iterator[Tuple[int, _Results]]: ... diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..8fa7236 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,31 @@ +[mypy] +# Specify the target platform details in config, so your developers are +# free to run mypy on Windows, Linux, or macOS and get consistent +# results. +python_version=3.6 +platform=linux + +# flake8-mypy expects the two following for sensible formatting +show_column_numbers=True + +# show error messages from unrelated files +follow_imports=normal + +# suppress errors about unsatisfied imports +ignore_missing_imports=True + +# be strict +disallow_untyped_calls=True +warn_return_any=True +strict_optional=True +warn_no_return=True +warn_redundant_casts=True +warn_unused_ignores=True + +# The following are off by default. Flip them on if you feel +# adventurous. +disallow_untyped_defs=True +check_untyped_defs=True + +# No incremental mode +cache_dir=/dev/null diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..a64482c --- /dev/null +++ b/setup.py @@ -0,0 +1,67 @@ +# Copyright (C) 2018 Łukasz Langa +import ast +import re +from setuptools import setup +import sys + +assert sys.version_info >= (3, 6, 0), "black requires Python 3.6+" +from pathlib import Path # noqa E402 + +CURRENT_DIR = Path(__file__).parent + + +def get_long_description(): + readme_md = CURRENT_DIR / 'README.md' + try: + import pypandoc + return pypandoc.convert_file(str(readme_md), 'rst') + + except (IOError, ImportError): + print() + print( + '\x1b[31m\x1b[1mwarning:\x1b[0m\x1b[31m pandoc not found, ' + 'long description will be ugly (PyPI does not support .md).' + '\x1b[0m' + ) + print() + with open(readme_md, encoding='utf8') as ld_file: + return ld_file.read() + + +def get_version(): + black_py = CURRENT_DIR / 'black.py' + _version_re = re.compile(r'__version__\s+=\s+(?P.*)') + with open(black_py, 'r', encoding='utf8') as f: + version = _version_re.search(f.read()).group('version') + return str(ast.literal_eval(version)) + + +setup( + name='black', + version=get_version(), + description="The uncompromising code formatter.", + long_description=get_long_description(), + keywords='automation formatter yapf autopep8 pyfmt gofmt rustfmt', + author='Łukasz Langa', + author_email='lukasz@langa.pl', + url='https://github.com/ambv/black', + license='MIT', + py_modules=['black'], + packages=['blib2to3', 'blib2to3.pgen2'], + python_requires=">=3.6", + zip_safe=False, + install_requires=['click', 'attrs'], + test_suite='tests.test_black', + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Environment :: Console', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: MIT License', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3.6', + 'Topic :: Software Development :: Libraries :: Python Modules', + 'Topic :: Software Development :: Quality Assurance', + ], + entry_points={'console_scripts': ['black=black:main']}, +) diff --git a/tests/.flake8 b/tests/.flake8 new file mode 100644 index 0000000..3528ac4 --- /dev/null +++ b/tests/.flake8 @@ -0,0 +1,8 @@ +# Like the base Black .flake8 but also ignores F811 which is used deliberately +# in test files. + +[flake8] +ignore = E266, E501, F811 +max-line-length = 80 +max-complexity = 12 +select = B,C,E,F,W,T4,B9 diff --git a/tests/cantfit.py b/tests/cantfit.py new file mode 100644 index 0000000..99bcaa0 --- /dev/null +++ b/tests/cantfit.py @@ -0,0 +1,27 @@ +# long variable name +this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it = 0 +this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it = 1 # with a comment +this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it = [ + 1, 2, 3 +] +this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it = function() +this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it = function( + arg1, arg2, arg3 +) +this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it = function( + [1, 2, 3], arg1, [1, 2, 3], arg2, [1, 2, 3], arg3 +) +# long function name +normal_name = but_the_function_name_is_now_ridiculously_long_and_it_is_still_super_annoying() +normal_name = but_the_function_name_is_now_ridiculously_long_and_it_is_still_super_annoying( + arg1, arg2, arg3 +) +normal_name = but_the_function_name_is_now_ridiculously_long_and_it_is_still_super_annoying( + [1, 2, 3], arg1, [1, 2, 3], arg2, [1, 2, 3], arg3 +) +# long arguments +normal_name = normal_function_name( + "but with super long string arguments that on their own exceed the line limit so there's no way it can ever fit", + "eggs with spam and eggs and spam with eggs with spam and eggs and spam with eggs with spam and eggs and spam with eggs", + this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it=0, +) diff --git a/tests/comments.py b/tests/comments.py new file mode 100644 index 0000000..e661ba6 --- /dev/null +++ b/tests/comments.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +# Some license here. +# +# Has many lines. Many, many lines. +# Many, many, many lines. +"""Module docstring. + +Possibly also many, many lines. +""" + +import os.path +import sys + +import a +from b.c import X # some noqa comment + +try: + import fast +except ImportError: + import slow as fast + + +# Some comment before a function. +def function(default=None): + """Docstring comes first. + + Possibly many lines. + """ + # FIXME: Some comment about why this function is crap but still in production. + import inner_imports + + if inner_imports.are_evil(): + # Explains why we have this if. + # In great detail indeed. + x = X() + return x.method1() # type: ignore + + # This return is also commented for some reason. + return default + + +# Explains why we use global state. +GLOBAL_STATE = {'a': a(1), 'b': a(2), 'c': a(3)} + + +# Another comment +@fast(really=True) +async def wat(): + async with X.open_async() as x: # Some more comments + result = await x.method1() + # Comment after ending a block. + if result: + print('A OK', file=sys.stdout) + # Comment between things. + print() + + +# Some closing comments. +# Maybe Vim or Emacs directives for formatting. +# Who knows. diff --git a/tests/comments2.py b/tests/comments2.py new file mode 100644 index 0000000..7d5d3a3 --- /dev/null +++ b/tests/comments2.py @@ -0,0 +1,202 @@ +# Please keep __all__ alphabetized within each category. +__all__ = [ + # Super-special typing primitives. + 'Any', + 'Callable', + 'ClassVar', + + # ABCs (from collections.abc). + 'AbstractSet', # collections.abc.Set. + 'ByteString', + 'Container', + + # Concrete collection types. + 'Counter', + 'Deque', + 'Dict', + 'DefaultDict', + 'List', + 'Set', + 'FrozenSet', + 'NamedTuple', # Not really a type. + 'Generator', +] + +def inline_comments_in_brackets_ruin_everything(): + if typedargslist: + parameters.children = [ + parameters.children[0], # (1 + body, + parameters.children[-1], # )1 + ] + else: + parameters.children = [ + parameters.children[0], # (2 what if this was actually long + body, + parameters.children[-1], # )2 + ] + if (self._proc is not None and + # has the child process finished? + self._returncode is None and + # the child process has finished, but the + # transport hasn't been notified yet? + self._proc.poll() is None): + pass + short = [ + # one + 1, + # two + 2] + call(arg1, arg2, """ +short +""", arg3=True) + + ############################################################################ + + call2( + #short + arg1, + #but + arg2, + #multiline + """ +short +""", + # yup + arg3=True) + lcomp = [ + element # yup + for element in collection # yup + if element is not None # right + ] + lcomp2 = [ + # hello + element + # yup + for element in collection + # right + if element is not None + ] + lcomp3 = [ + # This one is actually too long to fit in a single line. + element.split('\n', 1)[0] + # yup + for element in collection.select_elements() + # right + if element is not None + ] + return Node( + syms.simple_stmt, + [ + Node(statement, result), + Leaf(token.NEWLINE, '\n'), # FIXME: \r\n? + ], + ) + +instruction() + +# END COMMENTS +# MORE END COMMENTS + + +# output + + +# Please keep __all__ alphabetized within each category. +__all__ = [ + # Super-special typing primitives. + 'Any', + 'Callable', + 'ClassVar', + # ABCs (from collections.abc). + 'AbstractSet', # collections.abc.Set. + 'ByteString', + 'Container', + # Concrete collection types. + 'Counter', + 'Deque', + 'Dict', + 'DefaultDict', + 'List', + 'Set', + 'FrozenSet', + 'NamedTuple', # Not really a type. + 'Generator', +] + + +def inline_comments_in_brackets_ruin_everything(): + if typedargslist: + parameters.children = [ + parameters.children[0], body, parameters.children[-1] # (1 # )1 + ] + else: + parameters.children = [ + parameters.children[0], # (2 what if this was actually long + body, + parameters.children[-1], # )2 + ] + if ( + self._proc is not None and + # has the child process finished? + self._returncode is None and + # the child process has finished, but the + # transport hasn't been notified yet? + self._proc.poll() is None + ): + pass + short = [ + # one + 1, + # two + 2, + ] + call( + arg1, + arg2, + """ +short +""", + arg3=True, + ) + ############################################################################ + call2( + # short + arg1, + # but + arg2, + # multiline + """ +short +""", + # yup + arg3=True, + ) + lcomp = [ + element for element in collection if element is not None # yup # yup # right + ] + lcomp2 = [ + # hello + element + # yup + for element in collection + # right + if element is not None + ] + lcomp3 = [ + # This one is actually too long to fit in a single line. + element.split('\n', 1)[0] + # yup + for element in collection.select_elements() + # right + if element is not None + ] + return Node( + syms.simple_stmt, + [Node(statement, result), Leaf(token.NEWLINE, '\n')], # FIXME: \r\n? + ) + + +instruction() +# END COMMENTS +# MORE END COMMENTS diff --git a/tests/composition.py b/tests/composition.py new file mode 100644 index 0000000..7b462ac --- /dev/null +++ b/tests/composition.py @@ -0,0 +1,21 @@ +class C: + + def test(self) -> None: + with patch("black.out", print): + self.assertEqual( + unstyle(str(report)), '1 file reformatted, 1 file failed to reformat.' + ) + self.assertEqual( + unstyle(str(report)), + '1 file reformatted, 1 file left unchanged, 1 file failed to reformat.', + ) + self.assertEqual( + unstyle(str(report)), + '2 files reformatted, 1 file left unchanged, ' + '1 file failed to reformat.', + ) + self.assertEqual( + unstyle(str(report)), + '2 files reformatted, 2 files left unchanged, ' + '2 files failed to reformat.', + ) diff --git a/tests/expression.py b/tests/expression.py new file mode 100644 index 0000000..3291dc2 --- /dev/null +++ b/tests/expression.py @@ -0,0 +1,240 @@ +... +'some_string' +b'\\xa3' +Name +None +True +False +1 +1.0 +1j +True or False +True or False or None +True and False +True and False and None +(Name1 and Name2) or Name3 +Name1 and Name2 or Name3 +Name1 or (Name2 and Name3) +Name1 or Name2 and Name3 +(Name1 and Name2) or (Name3 and Name4) +Name1 and Name2 or Name3 and Name4 +Name1 or (Name2 and Name3) or Name4 +Name1 or Name2 and Name3 or Name4 +v1 << 2 +1 >> v2 +1 % finished +1 + v2 - v3 * 4 ^ 5 ** v6 / 7 // 8 +((1 + v2) - (v3 * 4)) ^ (((5 ** v6) / 7) // 8) +not great +~great ++value +-1 +~int and not v1 ^ 123 + v2 | True +(~int) and (not ((v1 ^ (123 + v2)) | True)) +lambda arg: None +lambda a=True: a +lambda a, b, c=True: a +lambda a, b, c=True, *, d=(1 << v2), e='str': a +lambda a, b, c=True, *vararg, d=(v1 << 2), e='str', **kwargs: a + b +1 if True else 2 +str or None if True else str or bytes or None +(str or None) if True else (str or bytes or None) +str or None if (1 if True else 2) else str or bytes or None +(str or None) if (1 if True else 2) else (str or bytes or None) +{'2.7': dead, '3.7': (long_live or die_hard)} +{'2.7': dead, '3.7': (long_live or die_hard), **{'3.6': verygood}} +{**a, **b, **c} +{'2.7', '3.6', '3.7', '3.8', '3.9', ('4.0' if gilectomy else '3.10')} +({'a': 'b'}, (True or False), (+value), 'string', b'bytes') or None +() +(1,) +(1, 2) +(1, 2, 3) +[] +[1, 2, 3, 4, 5, 6, 7, 8, 9, (10 or A), (11 or B), (12 or C)] +{i for i in (1, 2, 3)} +{(i ** 2) for i in (1, 2, 3)} +{(i ** 2) for i, _ in ((1, 'a'), (2, 'b'), (3, 'c'))} +{((i ** 2) + j) for i in (1, 2, 3) for j in (1, 2, 3)} +[i for i in (1, 2, 3)] +[(i ** 2) for i in (1, 2, 3)] +[(i ** 2) for i, _ in ((1, 'a'), (2, 'b'), (3, 'c'))] +[((i ** 2) + j) for i in (1, 2, 3) for j in (1, 2, 3)] +{i: 0 for i in (1, 2, 3)} +{i: j for i, j in ((1, 'a'), (2, 'b'), (3, 'c'))} +Python3 > Python2 > COBOL +Life is Life +call() +call(arg) +call(kwarg='hey') +call(arg, kwarg='hey') +call(arg, another, kwarg='hey', **kwargs) +lukasz.langa.pl +call.me(maybe) +1 .real +1.0 .real +....__class__ +list[str] +dict[str, int] +tuple[str, ...] +tuple[str, int, float, dict[str, int]] +slice[0] +slice[0:1] +slice[0:1:2] +slice[:] +slice[:-1] +slice[1:] +slice[::-1] +(str or None) if (sys.version_info[0] > (3,)) else (str or bytes or None) +f'f-string without formatted values is just a string' +f'{{NOT a formatted value}}' +f'some f-string with {a} {few():.2f} {formatted.values!r}' +f"{f'{nested} inner'} outer" +f'space between opening braces: { {a for a in (1, 2, 3)}}' +{'2.7': dead, '3.7': long_live or die_hard} +{'2.7', '3.6', '3.7', '3.8', '3.9', '4.0' if gilectomy else '3.10'} +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10 or A, 11 or B, 12 or C] +(SomeName) +SomeName +(Good, Bad, Ugly) +(i for i in (1, 2, 3)) +((i ** 2) for i in (1, 2, 3)) +((i ** 2) for i, _ in ((1, 'a'), (2, 'b'), (3, 'c'))) +(((i ** 2) + j) for i in (1, 2, 3) for j in (1, 2, 3)) +(*starred) +a = (1,) +b = 1, +c = 1 +d = (1,) + a + (2,) + + +def gen(): + yield from outside_of_generator + a = (yield) + + +async def f(): + await some.complicated[0].call(with_args=(True or (1 is not 1))) + + +# output + + +... +'some_string' +b'\\xa3' +Name +None +True +False +1 +1.0 +1j +True or False +True or False or None +True and False +True and False and None +(Name1 and Name2) or Name3 +Name1 and Name2 or Name3 +Name1 or (Name2 and Name3) +Name1 or Name2 and Name3 +(Name1 and Name2) or (Name3 and Name4) +Name1 and Name2 or Name3 and Name4 +Name1 or (Name2 and Name3) or Name4 +Name1 or Name2 and Name3 or Name4 +v1 << 2 +1 >> v2 +1 % finished +1 + v2 - v3 * 4 ^ 5 ** v6 / 7 // 8 +((1 + v2) - (v3 * 4)) ^ (((5 ** v6) / 7) // 8) +not great +~great ++value +-1 +~int and not v1 ^ 123 + v2 | True +(~int) and (not ((v1 ^ (123 + v2)) | True)) +lambda arg: None +lambda a=True: a +lambda a, b, c=True: a +lambda a, b, c=True, *, d=(1 << v2), e='str': a +lambda a, b, c=True, *vararg, d=(v1 << 2), e='str', **kwargs: a + b +1 if True else 2 +str or None if True else str or bytes or None +(str or None) if True else (str or bytes or None) +str or None if (1 if True else 2) else str or bytes or None +(str or None) if (1 if True else 2) else (str or bytes or None) +{'2.7': dead, '3.7': (long_live or die_hard)} +{'2.7': dead, '3.7': (long_live or die_hard), **{'3.6': verygood}} +{**a, **b, **c} +{'2.7', '3.6', '3.7', '3.8', '3.9', ('4.0' if gilectomy else '3.10')} +({'a': 'b'}, (True or False), (+value), 'string', b'bytes') or None +() +(1,) +(1, 2) +(1, 2, 3) +[] +[1, 2, 3, 4, 5, 6, 7, 8, 9, (10 or A), (11 or B), (12 or C)] +{i for i in (1, 2, 3)} +{(i ** 2) for i in (1, 2, 3)} +{(i ** 2) for i, _ in ((1, 'a'), (2, 'b'), (3, 'c'))} +{((i ** 2) + j) for i in (1, 2, 3) for j in (1, 2, 3)} +[i for i in (1, 2, 3)] +[(i ** 2) for i in (1, 2, 3)] +[(i ** 2) for i, _ in ((1, 'a'), (2, 'b'), (3, 'c'))] +[((i ** 2) + j) for i in (1, 2, 3) for j in (1, 2, 3)] +{i: 0 for i in (1, 2, 3)} +{i: j for i, j in ((1, 'a'), (2, 'b'), (3, 'c'))} +Python3 > Python2 > COBOL +Life is Life +call() +call(arg) +call(kwarg='hey') +call(arg, kwarg='hey') +call(arg, another, kwarg='hey', **kwargs) +lukasz.langa.pl +call.me(maybe) +1 .real +1.0 .real +....__class__ +list[str] +dict[str, int] +tuple[str, ...] +tuple[str, int, float, dict[str, int]] +slice[0] +slice[0:1] +slice[0:1:2] +slice[:] +slice[:-1] +slice[1:] +slice[::-1] +(str or None) if (sys.version_info[0] > (3,)) else (str or bytes or None) +f'f-string without formatted values is just a string' +f'{{NOT a formatted value}}' +f'some f-string with {a} {few():.2f} {formatted.values!r}' +f"{f'{nested} inner'} outer" +f'space between opening braces: { {a for a in (1, 2, 3)}}' +{'2.7': dead, '3.7': long_live or die_hard} +{'2.7', '3.6', '3.7', '3.8', '3.9', '4.0' if gilectomy else '3.10'} +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10 or A, 11 or B, 12 or C] +(SomeName) +SomeName +(Good, Bad, Ugly) +(i for i in (1, 2, 3)) +((i ** 2) for i in (1, 2, 3)) +((i ** 2) for i, _ in ((1, 'a'), (2, 'b'), (3, 'c'))) +(((i ** 2) + j) for i in (1, 2, 3) for j in (1, 2, 3)) +(*starred) +a = (1,) +b = 1, +c = 1 +d = (1,) + a + (2,) + + +def gen(): + yield from outside_of_generator + + a = (yield) + + +async def f(): + await some.complicated[0].call(with_args=(True or (1 is not 1))) diff --git a/tests/function.py b/tests/function.py new file mode 100644 index 0000000..85f7d40 --- /dev/null +++ b/tests/function.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +import asyncio +import sys + +from third_party import X, Y, Z + +from library import some_connection, \ + some_decorator + +def func_no_args(): + a; b; c + if True: raise RuntimeError + if False: ... + for i in range(10): + print(i) + continue + return None +async def coroutine(arg): + "Single-line docstring. Multiline is harder to reformat." + async with some_connection() as conn: + await conn.do_what_i_mean('SELECT bobby, tables FROM xkcd', timeout=2) + await asyncio.sleep(1) +@asyncio.coroutine +@some_decorator( +with_args=True, +many_args=[1,2,3] +) +def function_signature_stress_test(number:int,no_annotation=None,text:str="default",* ,debug:bool=False,**kwargs) -> str: + return text[number:-1] + +def long_lines(): + if True: + typedargslist.extend( + gen_annotated_params(ast_args.kwonlyargs, ast_args.kw_defaults, parameters, implicit_default=True) + ) + _type_comment_re = re.compile( + r""" + ^ + [\t ]* + \#[ ]type:[ ]* + (?P + [^#\t\n]+? + ) + (? to match + # a trailing space which is why we need the silliness below + (? + (?:\#[^\n]*)? + \n? + ) + $ + """, re.MULTILINE | re.VERBOSE + ) + +# output + + +#!/usr/bin/env python3 +import asyncio +import sys + +from third_party import X, Y, Z + +from library import some_connection, some_decorator + + +def func_no_args(): + a + b + c + if True: + raise RuntimeError + + if False: + ... + for i in range(10): + print(i) + continue + + return None + + +async def coroutine(arg): + "Single-line docstring. Multiline is harder to reformat." + async with some_connection() as conn: + await conn.do_what_i_mean('SELECT bobby, tables FROM xkcd', timeout=2) + await asyncio.sleep(1) + + +@asyncio.coroutine +@some_decorator(with_args=True, many_args=[1, 2, 3]) +def function_signature_stress_test( + number: int, + no_annotation=None, + text: str = "default", + *, + debug: bool = False, + **kwargs, +) -> str: + return text[number:-1] + + +def long_lines(): + if True: + typedargslist.extend( + gen_annotated_params( + ast_args.kwonlyargs, + ast_args.kw_defaults, + parameters, + implicit_default=True, + ) + ) + _type_comment_re = re.compile( + r""" + ^ + [\t ]* + \#[ ]type:[ ]* + (?P + [^#\t\n]+? + ) + (? to match + # a trailing space which is why we need the silliness below + (? + (?:\#[^\n]*)? + \n? + ) + $ + """, + re.MULTILINE | re.VERBOSE, + ) diff --git a/tests/import_spacing.py b/tests/import_spacing.py new file mode 100644 index 0000000..0597b62 --- /dev/null +++ b/tests/import_spacing.py @@ -0,0 +1,77 @@ +"""The asyncio package, tracking PEP 3156.""" + +# flake8: noqa + +import sys + +# This relies on each of the submodules having an __all__ variable. +from .base_events import * +from .coroutines import * +from .events import * # comment here + +from .futures import * +from .locks import * # comment here +from .protocols import * + +from .runners import * # comment here +from .queues import * +from .streams import * + +from .subprocess import * +from .tasks import * +from .transports import * + +__all__ = ( + base_events.__all__ + + coroutines.__all__ + + events.__all__ + + futures.__all__ + + locks.__all__ + + protocols.__all__ + + runners.__all__ + + queues.__all__ + + streams.__all__ + + subprocess.__all__ + + tasks.__all__ + + transports.__all__ +) + + +# output + + +"""The asyncio package, tracking PEP 3156.""" +# flake8: noqa +import sys + +# This relies on each of the submodules having an __all__ variable. +from .base_events import * +from .coroutines import * +from .events import * # comment here + +from .futures import * +from .locks import * # comment here +from .protocols import * + +from .runners import * # comment here +from .queues import * +from .streams import * + +from .subprocess import * +from .tasks import * +from .transports import * + +__all__ = ( + base_events.__all__ + + coroutines.__all__ + + events.__all__ + + futures.__all__ + + locks.__all__ + + protocols.__all__ + + runners.__all__ + + queues.__all__ + + streams.__all__ + + subprocess.__all__ + + tasks.__all__ + + transports.__all__ +) diff --git a/tests/test_black.py b/tests/test_black.py new file mode 100644 index 0000000..d9c0c5e --- /dev/null +++ b/tests/test_black.py @@ -0,0 +1,220 @@ +#!/usr/bin/env python3 +from functools import partial +from pathlib import Path +from typing import List, Tuple +import unittest +from unittest.mock import patch + +from click import unstyle + +import black + +ll = 88 +ff = partial(black.format_file, line_length=ll, fast=True) +fs = partial(black.format_str, line_length=ll) +THIS_FILE = Path(__file__) +THIS_DIR = THIS_FILE.parent + + +def dump_to_stderr(*output: str) -> str: + return '\n' + '\n'.join(output) + '\n' + + +def read_data(name: str) -> Tuple[str, str]: + """read_data('test_name') -> 'input', 'output'""" + if not name.endswith('.py'): + name += '.py' + _input: List[str] = [] + _output: List[str] = [] + with open(THIS_DIR / name, 'r', encoding='utf8') as test: + lines = test.readlines() + result = _input + for line in lines: + if line.rstrip() == '# output': + result = _output + continue + + result.append(line) + if _input and not _output: + # If there's no output marker, treat the entire file as already pre-formatted. + _output = _input[:] + return ''.join(_input).strip() + '\n', ''.join(_output).strip() + '\n' + + +class BlackTestCase(unittest.TestCase): + maxDiff = None + + def assertFormatEqual(self, expected: str, actual: str) -> None: + if actual != expected: + black.out('Expected tree:', fg='green') + try: + exp_node = black.lib2to3_parse(expected) + bdv = black.DebugVisitor() + list(bdv.visit(exp_node)) + except Exception as ve: + black.err(str(ve)) + black.out('Actual tree:', fg='red') + try: + exp_node = black.lib2to3_parse(actual) + bdv = black.DebugVisitor() + list(bdv.visit(exp_node)) + except Exception as ve: + black.err(str(ve)) + self.assertEqual(expected, actual) + + @patch("black.dump_to_file", dump_to_stderr) + def test_self(self) -> None: + source, expected = read_data('test_black') + actual = fs(source) + self.assertFormatEqual(expected, actual) + black.assert_equivalent(source, actual) + black.assert_stable(source, actual, line_length=ll) + with self.assertRaises(black.NothingChanged): + ff(THIS_FILE) + + @patch("black.dump_to_file", dump_to_stderr) + def test_black(self) -> None: + source, expected = read_data('../black') + actual = fs(source) + self.assertFormatEqual(expected, actual) + black.assert_equivalent(source, actual) + black.assert_stable(source, actual, line_length=ll) + with self.assertRaises(black.NothingChanged): + ff(THIS_FILE) + + @patch("black.dump_to_file", dump_to_stderr) + def test_setup(self) -> None: + source, expected = read_data('../setup') + actual = fs(source) + self.assertFormatEqual(expected, actual) + black.assert_equivalent(source, actual) + black.assert_stable(source, actual, line_length=ll) + with self.assertRaises(black.NothingChanged): + ff(THIS_FILE) + + @patch("black.dump_to_file", dump_to_stderr) + def test_function(self) -> None: + source, expected = read_data('function') + actual = fs(source) + self.assertFormatEqual(expected, actual) + black.assert_equivalent(source, actual) + black.assert_stable(source, actual, line_length=ll) + + @patch("black.dump_to_file", dump_to_stderr) + def test_expression(self) -> None: + source, expected = read_data('expression') + actual = fs(source) + self.assertFormatEqual(expected, actual) + black.assert_equivalent(source, actual) + black.assert_stable(source, actual, line_length=ll) + + @patch("black.dump_to_file", dump_to_stderr) + def test_comments(self) -> None: + source, expected = read_data('comments') + actual = fs(source) + self.assertFormatEqual(expected, actual) + black.assert_equivalent(source, actual) + black.assert_stable(source, actual, line_length=ll) + + @patch("black.dump_to_file", dump_to_stderr) + def test_comments2(self) -> None: + source, expected = read_data('comments2') + actual = fs(source) + self.assertFormatEqual(expected, actual) + black.assert_equivalent(source, actual) + black.assert_stable(source, actual, line_length=ll) + + @patch("black.dump_to_file", dump_to_stderr) + def test_cantfit(self) -> None: + source, expected = read_data('cantfit') + actual = fs(source) + self.assertFormatEqual(expected, actual) + black.assert_equivalent(source, actual) + black.assert_stable(source, actual, line_length=ll) + + @patch("black.dump_to_file", dump_to_stderr) + def test_import_spacing(self) -> None: + source, expected = read_data('import_spacing') + actual = fs(source) + self.assertFormatEqual(expected, actual) + black.assert_equivalent(source, actual) + black.assert_stable(source, actual, line_length=ll) + + @patch("black.dump_to_file", dump_to_stderr) + def test_composition(self) -> None: + source, expected = read_data('composition') + actual = fs(source) + self.assertFormatEqual(expected, actual) + black.assert_equivalent(source, actual) + black.assert_stable(source, actual, line_length=ll) + + def test_report(self) -> None: + report = black.Report() + out_lines = [] + err_lines = [] + + def out(msg: str, **kwargs): + out_lines.append(msg) + + def err(msg: str, **kwargs): + err_lines.append(msg) + + with patch("black.out", out), patch("black.err", err): + report.done(Path('f1'), changed=True) + self.assertEqual(len(out_lines), 1) + self.assertEqual(len(err_lines), 0) + self.assertEqual(out_lines[-1], 'reformatted f1') + self.assertEqual(unstyle(str(report)), '1 file reformatted.') + self.assertEqual(report.return_code, 0) + report.failed(Path('e1'), 'boom') + self.assertEqual(len(out_lines), 1) + self.assertEqual(len(err_lines), 1) + self.assertEqual(err_lines[-1], 'error: cannot format e1: boom') + self.assertEqual( + unstyle(str(report)), '1 file reformatted, 1 file failed to reformat.' + ) + self.assertEqual(report.return_code, 1) + report.done(Path('f2'), changed=False) + self.assertEqual(len(out_lines), 2) + self.assertEqual(len(err_lines), 1) + self.assertEqual(out_lines[-1], 'f2 already well formatted, good job.') + self.assertEqual( + unstyle(str(report)), + '1 file reformatted, 1 file left unchanged, ' + '1 file failed to reformat.', + ) + self.assertEqual(report.return_code, 1) + report.done(Path('f3'), changed=True) + self.assertEqual(len(out_lines), 3) + self.assertEqual(len(err_lines), 1) + self.assertEqual(out_lines[-1], 'reformatted f3') + self.assertEqual( + unstyle(str(report)), + '2 files reformatted, 1 file left unchanged, ' + '1 file failed to reformat.', + ) + self.assertEqual(report.return_code, 1) + report.failed(Path('e2'), 'boom') + self.assertEqual(len(out_lines), 3) + self.assertEqual(len(err_lines), 2) + self.assertEqual(err_lines[-1], 'error: cannot format e2: boom') + self.assertEqual( + unstyle(str(report)), + '2 files reformatted, 1 file left unchanged, ' + '2 files failed to reformat.', + ) + self.assertEqual(report.return_code, 1) + report.done(Path('f4'), changed=False) + self.assertEqual(len(out_lines), 4) + self.assertEqual(len(err_lines), 2) + self.assertEqual(out_lines[-1], 'f4 already well formatted, good job.') + self.assertEqual( + unstyle(str(report)), + '2 files reformatted, 2 files left unchanged, ' + '2 files failed to reformat.', + ) + self.assertEqual(report.return_code, 1) + + +if __name__ == '__main__': + unittest.main()