Initial commit

author Łukasz Langa <lukasz@langa.pl>

Wed, 14 Mar 2018 19:55:32 +0000 (12:55 -0700)

committer Łukasz Langa <lukasz@langa.pl>

Wed, 14 Mar 2018 19:55:32 +0000 (12:55 -0700)
author Łukasz Langa <lukasz@langa.pl>
Wed, 14 Mar 2018 19:55:32 +0000 (12:55 -0700)
committer Łukasz Langa <lukasz@langa.pl>
Wed, 14 Mar 2018 19:55:32 +0000 (12:55 -0700)
diff --git a/.flake8 b/.flake8

new file mode 100644 (file)

index 0000000..cf36923
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,8 @@
+# This is an example .flake8 config, used when developing *Black* itself.
+# Keep in sync with setup.cfg which is used for source packages.
+
+[flake8]
+ignore = E266, E501
+max-line-length = 80
+max-complexity = 12
+select = B,C,E,F,W,T4,B9
diff --git a/.gitignore b/.gitignore

new file mode 100644 (file)

index 0000000..6350e98
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.coverage
diff --git a/.travis.yml b/.travis.yml

new file mode 100644 (file)

index 0000000..e434c44
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,15 @@
+sudo: false
+language: python
+before_script:
+- pip install -e .
+# test script
+script:  python setup.py test
+notifications:
+  on_success: change
+  on_failure: always
+matrix:
+  include:
+    - python: 3.6
+    - python: 3.6-dev
+    - python: 3.7-dev
+    - python: 3.8-dev
diff --git a/LICENSE b/LICENSE

new file mode 100644 (file)

index 0000000..7a9b891
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2018 Łukasz Langa
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/MANIFEST.in b/MANIFEST.in

new file mode 100644 (file)

index 0000000..9ae6851
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,2 @@
+include *.rst *.md LICENSE
+recursive-include tests *.txt *.py
diff --git a/Pipfile b/Pipfile

new file mode 100644 (file)

index 0000000..3c20aff
--- /dev/null
+++ b/Pipfile
@@ -0,0 +1,17 @@
+[[source]]
+url = "https://pypi.python.org/simple"
+verify_ssl = true
+name = "pypi"
+
+[packages]
+attrs = "*"
+click = "*"
+
+[dev-packages]
+coverage = "*"
+flake8 = "*"
+flake8-bugbear = "*"
+flake8-mypy = "*"
+mypy = "*"
+pypandoc = "*"
+twine = "*"
diff --git a/Pipfile.lock b/Pipfile.lock

new file mode 100644 (file)

index 0000000..7c173f4
--- /dev/null
+++ b/Pipfile.lock
@@ -0,0 +1,243 @@
+{
+    "_meta": {
+        "hash": {
+            "sha256": "e2dc877c2f32df83197fc3dc0f49e0a66d0d099aab106b99d64fdbe5b14cc91b"
+        },
+        "host-environment-markers": {
+            "implementation_name": "cpython",
+            "implementation_version": "3.6.4",
+            "os_name": "posix",
+            "platform_machine": "x86_64",
+            "platform_python_implementation": "CPython",
+            "platform_release": "17.4.0",
+            "platform_system": "Darwin",
+            "platform_version": "Darwin Kernel Version 17.4.0: Sun Dec 17 09:19:54 PST 2017; root:xnu-4570.41.2~1/RELEASE_X86_64",
+            "python_full_version": "3.6.4",
+            "python_version": "3.6",
+            "sys_platform": "darwin"
+        },
+        "pipfile-spec": 6,
+        "requires": {},
+        "sources": [
+            {
+                "name": "pypi",
+                "url": "https://pypi.python.org/simple",
+                "verify_ssl": true
+            }
+        ]
+    },
+    "default": {
+        "attrs": {
+            "hashes": [
+                "sha256:a17a9573a6f475c99b551c0e0a812707ddda1ec9653bed04c13841404ed6f450",
+                "sha256:1c7960ccfd6a005cd9f7ba884e6316b5e430a3f1a6c37c5f87d8b43f83b54ec9"
+            ],
+            "version": "==17.4.0"
+        },
+        "click": {
+            "hashes": [
+                "sha256:29f99fc6125fbc931b758dc053b3114e55c77a6e4c6c3a2674a2dc986016381d",
+                "sha256:f15516df478d5a56180fbf80e68f206010e6d160fc39fa508b65e035fd75130b"
+            ],
+            "version": "==6.7"
+        }
+    },
+    "develop": {
+        "attrs": {
+            "hashes": [
+                "sha256:a17a9573a6f475c99b551c0e0a812707ddda1ec9653bed04c13841404ed6f450",
+                "sha256:1c7960ccfd6a005cd9f7ba884e6316b5e430a3f1a6c37c5f87d8b43f83b54ec9"
+            ],
+            "version": "==17.4.0"
+        },
+        "certifi": {
+            "hashes": [
+                "sha256:14131608ad2fd56836d33a71ee60fa1c82bc9d2c8d98b7bdbc631fe1b3cd1296",
+                "sha256:edbc3f203427eef571f79a7692bb160a2b0f7ccaa31953e99bd17e307cf63f7d"
+            ],
+            "version": "==2018.1.18"
+        },
+        "chardet": {
+            "hashes": [
+                "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691",
+                "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae"
+            ],
+            "version": "==3.0.4"
+        },
+        "coverage": {
+            "hashes": [
+                "sha256:7608a3dd5d73cb06c531b8925e0ef8d3de31fed2544a7de6c63960a1e73ea4bc",
+                "sha256:3a2184c6d797a125dca8367878d3b9a178b6fdd05fdc2d35d758c3006a1cd694",
+                "sha256:f3f501f345f24383c0000395b26b726e46758b71393267aeae0bd36f8b3ade80",
+                "sha256:0b136648de27201056c1869a6c0d4e23f464750fd9a9ba9750b8336a244429ed",
+                "sha256:337ded681dd2ef9ca04ef5d93cfc87e52e09db2594c296b4a0a3662cb1b41249",
+                "sha256:3eb42bf89a6be7deb64116dd1cc4b08171734d721e7a7e57ad64cc4ef29ed2f1",
+                "sha256:be6cfcd8053d13f5f5eeb284aa8a814220c3da1b0078fa859011c7fffd86dab9",
+                "sha256:69bf008a06b76619d3c3f3b1983f5145c75a305a0fea513aca094cae5c40a8f5",
+                "sha256:2eb564bbf7816a9d68dd3369a510be3327f1c618d2357fa6b1216994c2e3d508",
+                "sha256:9d6dd10d49e01571bf6e147d3b505141ffc093a06756c60b053a859cb2128b1f",
+                "sha256:701cd6093d63e6b8ad7009d8a92425428bc4d6e7ab8d75efbb665c806c1d79ba",
+                "sha256:5a13ea7911ff5e1796b6d5e4fbbf6952381a611209b736d48e675c2756f3f74e",
+                "sha256:c1bb572fab8208c400adaf06a8133ac0712179a334c09224fb11393e920abcdd",
+                "sha256:03481e81d558d30d230bc12999e3edffe392d244349a90f4ef9b88425fac74ba",
+                "sha256:28b2191e7283f4f3568962e373b47ef7f0392993bb6660d079c62bd50fe9d162",
+                "sha256:de4418dadaa1c01d497e539210cb6baa015965526ff5afc078c57ca69160108d",
+                "sha256:8c3cb8c35ec4d9506979b4cf90ee9918bc2e49f84189d9bf5c36c0c1119c6558",
+                "sha256:7e1fe19bd6dce69d9fd159d8e4a80a8f52101380d5d3a4d374b6d3eae0e5de9c",
+                "sha256:6bc583dc18d5979dc0f6cec26a8603129de0304d5ae1f17e57a12834e7235062",
+                "sha256:198626739a79b09fa0a2f06e083ffd12eb55449b5f8bfdbeed1df4910b2ca640",
+                "sha256:7aa36d2b844a3e4a4b356708d79fd2c260281a7390d678a10b91ca595ddc9e99",
+                "sha256:3d72c20bd105022d29b14a7d628462ebdc61de2f303322c0212a054352f3b287",
+                "sha256:4635a184d0bbe537aa185a34193898eee409332a8ccb27eea36f262566585000",
+                "sha256:e05cb4d9aad6233d67e0541caa7e511fa4047ed7750ec2510d466e806e0255d6",
+                "sha256:76ecd006d1d8f739430ec50cc872889af1f9c1b6b8f48e29941814b09b0fd3cc",
+                "sha256:7d3f553904b0c5c016d1dad058a7554c7ac4c91a789fca496e7d8347ad040653",
+                "sha256:3c79a6f7b95751cdebcd9037e4d06f8d5a9b60e4ed0cd231342aa8ad7124882a",
+                "sha256:56e448f051a201c5ebbaa86a5efd0ca90d327204d8b059ab25ad0f35fbfd79f1",
+                "sha256:ac4fef68da01116a5c117eba4dd46f2e06847a497de5ed1d64bb99a5fda1ef91",
+                "sha256:1c383d2ef13ade2acc636556fd544dba6e14fa30755f26812f54300e401f98f2",
+                "sha256:b8815995e050764c8610dbc82641807d196927c3dbed207f0a079833ffcf588d",
+                "sha256:104ab3934abaf5be871a583541e8829d6c19ce7bde2923b2751e0d3ca44db60a",
+                "sha256:9e112fcbe0148a6fa4f0a02e8d58e94470fc6cb82a5481618fea901699bf34c4",
+                "sha256:15b111b6a0f46ee1a485414a52a7ad1d703bdf984e9ed3c288a4414d3871dcbd",
+                "sha256:e4d96c07229f58cb686120f168276e434660e4358cc9cf3b0464210b04913e77",
+                "sha256:f8a923a85cb099422ad5a2e345fe877bbc89a8a8b23235824a93488150e45f6e"
+            ],
+            "version": "==4.5.1"
+        },
+        "flake8": {
+            "hashes": [
+                "sha256:c7841163e2b576d435799169b78703ad6ac1bbb0f199994fc05f700b2a90ea37",
+                "sha256:7253265f7abd8b313e3892944044a365e3f4ac3fcdcfb4298f55ee9ddf188ba0"
+            ],
+            "version": "==3.5.0"
+        },
+        "flake8-bugbear": {
+            "hashes": [
+                "sha256:541746f0f3b2f1a8d7278e1d2d218df298996b60b02677708560db7c7e620e3b",
+                "sha256:5f14a99d458e29cb92be9079c970030e0dd398b2decb179d76d39a5266ea1578"
+            ],
+            "version": "==18.2.0"
+        },
+        "flake8-mypy": {
+            "hashes": [
+                "sha256:cff009f4250e8391bf48990093cff85802778c345c8449d6498b62efefeebcbc",
+                "sha256:47120db63aff631ee1f84bac6fe8e64731dc66da3efc1c51f85e15ade4a3ba18"
+            ],
+            "version": "==17.8.0"
+        },
+        "idna": {
+            "hashes": [
+                "sha256:8c7309c718f94b3a625cb648ace320157ad16ff131ae0af362c9f21b80ef6ec4",
+                "sha256:2c6a5de3089009e3da7c5dde64a141dbc8551d5b7f6cf4ed7c2568d0cc520a8f"
+            ],
+            "version": "==2.6"
+        },
+        "mccabe": {
+            "hashes": [
+                "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42",
+                "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"
+            ],
+            "version": "==0.6.1"
+        },
+        "mypy": {
+            "hashes": [
+                "sha256:884f18f3a40cfcf24cdd5860b84958cfb35e6563e439c5adc1503878df221dc3",
+                "sha256:83d798f66323f2de6191d66d9ae5ab234e4ee5b400010e19c58d75d308049f25"
+            ],
+            "version": "==0.570"
+        },
+        "pkginfo": {
+            "hashes": [
+                "sha256:31a49103180ae1518b65d3f4ce09c784e2bc54e338197668b4fb7dc539521024",
+                "sha256:bb1a6aeabfc898f5df124e7e00303a5b3ec9a489535f346bfbddb081af93f89e"
+            ],
+            "version": "==1.4.1"
+        },
+        "pycodestyle": {
+            "hashes": [
+                "sha256:6c4245ade1edfad79c3446fadfc96b0de2759662dc29d07d80a6f27ad1ca6ba9",
+                "sha256:682256a5b318149ca0d2a9185d365d8864a768a28db66a84a2ea946bcc426766"
+            ],
+            "version": "==2.3.1"
+        },
+        "pyflakes": {
+            "hashes": [
+                "sha256:08bd6a50edf8cffa9fa09a463063c425ecaaf10d1eb0335a7e8b1401aef89e6f",
+                "sha256:8d616a382f243dbf19b54743f280b80198be0bca3a5396f1d2e1fca6223e8805"
+            ],
+            "version": "==1.6.0"
+        },
+        "pypandoc": {
+            "hashes": [
+                "sha256:e914e6d5f84a76764887e4d909b09d63308725f0cbb5293872c2c92f07c11a5b"
+            ],
+            "version": "==1.4"
+        },
+        "requests": {
+            "hashes": [
+                "sha256:6a1b267aa90cac58ac3a765d067950e7dbbf75b1da07e895d1f594193a40a38b",
+                "sha256:9c443e7324ba5b85070c4a818ade28bfabedf16ea10206da1132edaa6dda237e"
+            ],
+            "version": "==2.18.4"
+        },
+        "requests-toolbelt": {
+            "hashes": [
+                "sha256:42c9c170abc2cacb78b8ab23ac957945c7716249206f90874651971a4acff237",
+                "sha256:f6a531936c6fa4c6cfce1b9c10d5c4f498d16528d2a54a22ca00011205a187b5"
+            ],
+            "version": "==0.8.0"
+        },
+        "tqdm": {
+            "hashes": [
+                "sha256:f66468c14ccd011a627734c9b3fd72f20ce16f8faecc47384eb2507af5924fb9",
+                "sha256:5ec0d4442358e55cdb4a0471d04c6c831518fd8837f259db5537d90feab380df"
+            ],
+            "version": "==4.19.6"
+        },
+        "twine": {
+            "hashes": [
+                "sha256:d3ce5c480c22ccfb761cd358526e862b32546d2fe4bc93d46b5cf04ea3cc46ca",
+                "sha256:caa45b7987fc96321258cd7668e3be2ff34064f5c66d2d975b641adca659c1ab"
+            ],
+            "version": "==1.9.1"
+        },
+        "typed-ast": {
+            "hashes": [
+                "sha256:0948004fa228ae071054f5208840a1e88747a357ec1101c17217bfe99b299d58",
+                "sha256:25d8feefe27eb0303b73545416b13d108c6067b846b543738a25ff304824ed9a",
+                "sha256:c05b41bc1deade9f90ddc5d988fe506208019ebba9f2578c622516fd201f5863",
+                "sha256:519425deca5c2b2bdac49f77b2c5625781abbaf9a809d727d3a5596b30bb4ded",
+                "sha256:6de012d2b166fe7a4cdf505eee3aaa12192f7ba365beeefaca4ec10e31241a85",
+                "sha256:79b91ebe5a28d349b6d0d323023350133e927b4de5b651a8aa2db69c761420c6",
+                "sha256:a8034021801bc0440f2e027c354b4eafd95891b573e12ff0418dec385c76785c",
+                "sha256:f19f2a4f547505fe9072e15f6f4ae714af51b5a681a97f187971f50c283193b6",
+                "sha256:c9b060bd1e5a26ab6e8267fd46fc9e02b54eb15fffb16d112d4c7b1c12987559",
+                "sha256:2e214b72168ea0275efd6c884b114ab42e316de3ffa125b267e732ed2abda892",
+                "sha256:bc978ac17468fe868ee589c795d06777f75496b1ed576d308002c8a5756fb9ea",
+                "sha256:edb04bdd45bfd76c8292c4d9654568efaedf76fe78eb246dde69bdb13b2dad87",
+                "sha256:668d0cec391d9aed1c6a388b0d5b97cd22e6073eaa5fbaa6d2946603b4871efe",
+                "sha256:29464a177d56e4e055b5f7b629935af7f49c196be47528cc94e0a7bf83fbc2b9",
+                "sha256:8550177fa5d4c1f09b5e5f524411c44633c80ec69b24e0e98906dd761941ca46",
+                "sha256:3e0d5e48e3a23e9a4d1a9f698e32a542a4a288c871d33ed8df1b092a40f3a0f9",
+                "sha256:68ba70684990f59497680ff90d18e756a47bf4863c604098f10de9716b2c0bdd",
+                "sha256:57fe287f0cdd9ceaf69e7b71a2e94a24b5d268b35df251a88fef5cc241bf73aa"
+            ],
+            "version": "==1.1.0"
+        },
+        "urllib3": {
+            "hashes": [
+                "sha256:06330f386d6e4b195fbfc736b297f58c5a892e4440e54d294d7004e3a9bbea1b",
+                "sha256:cc44da8e1145637334317feebd728bd869a35285b93cbb4cca2577da7e62db4f"
+            ],
+            "version": "==1.22"
+        },
+        "wheel": {
+            "hashes": [
+                "sha256:e721e53864f084f956f40f96124a74da0631ac13fbbd1ba99e8e2b5e9cafdf64",
+                "sha256:9515fe0a94e823fd90b08d22de45d7bde57c90edce705b22f5e1ecf7e1b653c8"
+            ],
+            "version": "==0.30.0"
+        }
+    }
+}
diff --git a/README.md b/README.md

new file mode 100644 (file)

index 0000000..d1724ca
--- /dev/null
+++ b/README.md
@@ -0,0 +1,260 @@
+# black
+
+[![Build Status](https://travis-ci.org/ambv/black.svg?branch=master)](https://travis-ci.org/ambv/black)
+
+> Any color you like.
+
+
+*Black* is the uncompromising Python code formatter.  By using it, you
+agree to cease control over minutiae of hand-formatting.  In return,
+*Black* gives you speed, determinism, and freedom from `pycodestyle`
+nagging about formatting.  You will save time and mental energy for
+more important matters.
+
+Blackened code looks the same regardless of the project you're reading.
+Formatting becomes transparent after a while and you can focus on the
+content instead.
+
+*Black* makes code review faster by producing the smallest diffs
+possible.
+
+
+## NOTE: This is an early pre-release
+
+*Black* can already successfully format itself and the standard library.
+It also sports a decent test suite.  However, it is still very new.
+Things will probably be wonky for a while. This is made explicit by the
+"Alpha" trove classifier, as well as by the "a" in the version number.
+What this means for you is that **until the formatter becomes stable,
+you should expect some formatting to change in the future**.
+
+Also, as a temporary safety measure, *Black* will check that the
+reformatted code still produces a valid AST that is equivalent to the
+original.  This slows it down.  If you're feeling confident, use
+``--fast``.
+
+
+## Usage
+
+*Black* can be installed by running `pip install black`.
+
+```
+black [OPTIONS] [SRC]...
+
+Options:
+  -l, --line-length INTEGER   Where to wrap around.  [default: 88]
+  --fast / --safe             If --fast given, skip temporary sanity checks.
+                              [default: --safe]
+  --version                   Show the version and exit.
+  --help                      Show this message and exit.
+```
+
+
+## The philosophy behind *Black*
+
+*Black* reformats entire files in place.  It is not configurable.  It
+doesn't take previous formatting into account.  It doesn't reformat
+blocks that start with `# fmt: off` and end with `# fmt: on`.  It also
+recognizes [YAPF](https://github.com/google/yapf)'s block comments to
+the same effect, as a courtesy for straddling code.
+
+
+### How *Black* formats files
+
+*Black* ignores previous formatting and applies uniform horizontal
+and vertical whitespace to your code.  The rules for horizontal
+whitespace are pretty obvious and can be summarized as: do whatever
+makes `pycodestyle` happy.
+
+As for vertical whitespace, *Black* tries to render one full expression
+or simple statement per line.  If this fits the allotted line length,
+great.
+```!py3
+# in:
+l = [1,
+     2,
+     3,
+]
+
+# out:
+l = [1, 2, 3]
+```
+
+If not, *Black* will look at the contents of the first outer matching
+brackets and put that in a separate indented line.
+```!py3
+# in:
+l = [[n for n in list_bosses()], [n for n in list_employees()]]
+
+# out:
+l = [
+    [n for n in list_bosses()], [n for n in list_employees()]
+]
+```
+
+If that still doesn't fit the bill, it will decompose the internal
+expression further using the same rule, indenting matching brackets
+every time.  If the contents of the matching brackets pair are
+comma-separated (like an argument list, or a dict literal, and so on)
+then *Black* will first try to keep them on the same line with the
+matching brackets.  If that doesn't work, it will put all of them in
+separate lines.
+```!py3
+# in:
+def very_important_function(template: str, *variables, *, file: os.PathLike, debug: bool = False):
+    """Applies `variables` to the `template` and writes to `file`."""
+    with open(file, 'w') as f:
+        ...
+
+# out:
+def very_important_function(
+    template: str,
+    *variables,
+    *,
+    file: os.PathLike,
+    debug: bool = False,
+):
+    """Applies `variables` to the `template` and writes to `file`."""
+    with open(file, 'w') as f:
+        ...
+```
+
+You might have noticed that closing brackets are always dedented and
+that a trailing comma is always added.  Such formatting produces smaller
+diffs; when you add or remove an element, it's always just one line.
+Also, having the closing bracket dedented provides a clear delimiter
+between two distinct sections of the code that otherwise share the same
+indentation level (like the arguments list and the docstring in the
+example above).
+
+Unnecessary trailing commas are removed if an expression fits in one
+line.  This makes it 1% more likely that your line won't exceed the
+allotted line length limit.
+
+*Black* avoids spurious vertical whitespace.  This is in the spirit of
+PEP 8 which says that in-function vertical whitespace should only be
+used sparingly.  One exception is control flow statements: *Black* will
+always emit an extra empty line after ``return``, ``raise``, ``break``,
+``continue``, and ``yield``.  This is to make changes in control flow
+more prominent to readers of your code.
+
+That's it.  The rest of the whitespace formatting rules follow PEP 8 and
+are designed to keep `pycodestyle` quiet.
+
+
+### Line length
+
+You probably noticed the peculiar default line length.  *Black* defaults
+to 88 characters per line, which happens to be 10% over 80.  This number
+was found to produce significantly shorter files than sticking with 80
+(the most popular), or even 79 (used by the standard library).  In
+general, [90-ish seems like the wise choice](https://youtu.be/wf-BqAjZb8M?t=260).
+
+If you're paid by the line of code you write, you can pass
+`--line-length` with a lower number.  *Black* will try to respect that.
+However, sometimes it won't be able to without breaking other rules.  In
+those rare cases, auto-formatted code will exceed your allotted limit.
+
+You can also increase it, but remember that people with sight disabilities
+find it harder to work with line lengths exceeding 100 characters.
+It also adversely affects side-by-side diff review  on typical screen
+resolutions.  Long lines also make it harder to present code neatly
+in documentation or talk slides.
+
+If you're using Flake8, you can bump `max-line-length` to 88 and forget
+about it.  Alternatively, use [Bugbear](https://github.com/PyCQA/flake8-bugbear)'s
+B950 warning instead of E501 and keep the max line length at 80 which
+you are probably already using.  You'd do it like this:
+```!ini
+[flake8]
+max-line-length = 80
+...
+select = C,E,F,W,B,B950
+ignore = E501
+```
+
+You'll find *Black*'s own .flake8 config file is configured like this.
+If you're curious about the reasoning behind B950, Bugbear's documentation
+explains it.  The tl;dr is "it's like highway speed limits, we won't
+bother you if you overdo it by a few km/h".
+
+
+### Editor integration
+
+There is currently no integration with any text editors. Vim and
+Atom/Nuclide integration is planned by the author, others will require
+external contributions.
+
+Patches welcome! ✨ 🍰 ✨
+
+
+## Testimonials
+
+**Dusty Phillips**, [writer](https://smile.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=dusty+phillips):
+
+> Black is opinionated so you don't have to be.
+
+**Hynek Schlawack**, [creator of `attrs`](http://www.attrs.org/), core
+developer of Twisted and CPython:
+
+> An auto-formatter that doesn't suck is all I want for Xmas!
+
+**Carl Meyer**, [Django](https://www.djangoproject.com/) core developer:
+
+> At least the name is good.
+
+
+## Tests
+
+Just run:
+
+```
+python setup.py test
+```
+
+## This tool requires Python 3.6.0+ to run
+
+But you can reformat Python 2 code with it, too.  *Black* is able to parse
+all of the new syntax supported on Python 3.6 but also *effectively all*
+the Python 2 syntax at the same time, as long as you're not using print
+statements.
+
+By making the code exclusively Python 3.6+, I'm able to focus on the
+quality of the formatting and re-use all the nice features of the new
+releases (check out [pathlib](docs.python.org/3/library/pathlib.html) or
+f-strings) instead of wasting cycles on Unicode compatibility, and so on.
+
+
+## License
+
+MIT
+
+
+## Contributing
+
+In terms of inspiration, *Black* is about as configurable as *gofmt* and
+*rustfmt* are.  This is deliberate.
+
+Bug reports and fixes are always welcome!  However, before you suggest a
+new feature or configuration knob, ask yourself why you want it.  If it
+enables better integration with some workflow, fixes an inconsistency,
+speeds things up, and so on - go for it!  On the other hand, if your
+answer is "because I don't like a particular formatting" then you're not
+ready to embrace *Black* yet. Such changes are unlikely to get accepted.
+You can still try but prepare to be disappointed.
+
+
+## Change Log
+
+### 18.3a0
+
+* first published version, Happy 🍰 Day 2018!
+
+* alpha quality
+
+* date-versioned (see: http://calver.org/)
+
+
+## Authors
+
+Glued together by [Łukasz Langa](mailto:lukasz@langa.pl).
diff --git a/black.py b/black.py

new file mode 100644 (file)

index 0000000..24c57ca
--- /dev/null
+++ b/black.py
@@ -0,0 +1,1478 @@
+#!/usr/bin/env python3
+import asyncio
+from asyncio.base_events import BaseEventLoop
+from concurrent.futures import Executor, ProcessPoolExecutor
+from functools import partial
+import keyword
+import os
+from pathlib import Path
+import tokenize
+from typing import (
+    Dict, Generic, Iterable, Iterator, List, Optional, Set, Tuple, TypeVar, Union
+)
+
+from attr import attrib, dataclass, Factory
+import click
+
+# lib2to3 fork
+from blib2to3.pytree import Node, Leaf, type_repr
+from blib2to3 import pygram, pytree
+from blib2to3.pgen2 import driver, token
+from blib2to3.pgen2.parse import ParseError
+
+__version__ = "18.3a0"
+DEFAULT_LINE_LENGTH = 88
+# types
+syms = pygram.python_symbols
+FileContent = str
+Encoding = str
+Depth = int
+NodeType = int
+LeafID = int
+Priority = int
+LN = Union[Leaf, Node]
+out = partial(click.secho, bold=True, err=True)
+err = partial(click.secho, fg='red', err=True)
+
+
+class NothingChanged(UserWarning):
+    """Raised by `format_file` when the reformatted code is the same as source."""
+
+
+class CannotSplit(Exception):
+    """A readable split that fits the allotted line length is impossible.
+
+    Raised by `left_hand_split()` and `right_hand_split()`.
+    """
+
+
+@click.command()
+@click.option(
+    '-l',
+    '--line-length',
+    type=int,
+    default=DEFAULT_LINE_LENGTH,
+    help='How many character per line to allow.',
+    show_default=True,
+)
+@click.option(
+    '--fast/--safe',
+    is_flag=True,
+    help='If --fast given, skip temporary sanity checks. [default: --safe]',
+)
+@click.version_option(version=__version__)
+@click.argument(
+    'src',
+    nargs=-1,
+    type=click.Path(exists=True, file_okay=True, dir_okay=True, readable=True),
+)
+@click.pass_context
+def main(ctx: click.Context, line_length: int, fast: bool, src: List[str]) -> None:
+    """The uncompromising code formatter."""
+    sources: List[Path] = []
+    for s in src:
+        p = Path(s)
+        if p.is_dir():
+            sources.extend(gen_python_files_in_dir(p))
+        elif p.is_file():
+            # if a file was explicitly given, we don't care about its extension
+            sources.append(p)
+        else:
+            err(f'invalid path: {s}')
+    if len(sources) == 0:
+        ctx.exit(0)
+    elif len(sources) == 1:
+        p = sources[0]
+        report = Report()
+        try:
+            changed = format_file_in_place(p, line_length=line_length, fast=fast)
+            report.done(p, changed)
+        except Exception as exc:
+            report.failed(p, str(exc))
+        ctx.exit(report.return_code)
+    else:
+        loop = asyncio.get_event_loop()
+        executor = ProcessPoolExecutor(max_workers=os.cpu_count())
+        return_code = 1
+        try:
+            return_code = loop.run_until_complete(
+                schedule_formatting(sources, line_length, fast, loop, executor)
+            )
+        finally:
+            loop.close()
+            ctx.exit(return_code)
+
+
+async def schedule_formatting(
+    sources: List[Path],
+    line_length: int,
+    fast: bool,
+    loop: BaseEventLoop,
+    executor: Executor,
+) -> int:
+    tasks = {
+        src: loop.run_in_executor(
+            executor, format_file_in_place, src, line_length, fast
+        )
+        for src in sources
+    }
+    await asyncio.wait(tasks.values())
+    cancelled = []
+    report = Report()
+    for src, task in tasks.items():
+        if not task.done():
+            report.failed(src, 'timed out, cancelling')
+            task.cancel()
+            cancelled.append(task)
+        elif task.exception():
+            report.failed(src, str(task.exception()))
+        else:
+            report.done(src, task.result())
+    if cancelled:
+        await asyncio.wait(cancelled, timeout=2)
+    out('All done! ✨ 🍰 ✨')
+    click.echo(str(report))
+    return report.return_code
+
+
+def format_file_in_place(src: Path, line_length: int, fast: bool) -> bool:
+    """Format the file and rewrite if changed. Return True if changed."""
+    try:
+        contents, encoding = format_file(src, line_length=line_length, fast=fast)
+    except NothingChanged:
+        return False
+
+    with open(src, "w", encoding=encoding) as f:
+        f.write(contents)
+    return True
+
+
+def format_file(
+    src: Path, line_length: int, fast: bool
+) -> Tuple[FileContent, Encoding]:
+    """Reformats a file and returns its contents and encoding."""
+    with tokenize.open(src) as src_buffer:
+        src_contents = src_buffer.read()
+    if src_contents.strip() == '':
+        raise NothingChanged(src)
+
+    dst_contents = format_str(src_contents, line_length=line_length)
+    if src_contents == dst_contents:
+        raise NothingChanged(src)
+
+    if not fast:
+        assert_equivalent(src_contents, dst_contents)
+        assert_stable(src_contents, dst_contents, line_length=line_length)
+    return dst_contents, src_buffer.encoding
+
+
+def format_str(src_contents: str, line_length: int) -> FileContent:
+    """Reformats a string and returns new contents."""
+    src_node = lib2to3_parse(src_contents)
+    dst_contents = ""
+    comments: List[Line] = []
+    lines = LineGenerator()
+    elt = EmptyLineTracker()
+    empty_line = Line()
+    after = 0
+    for current_line in lines.visit(src_node):
+        for _ in range(after):
+            dst_contents += str(empty_line)
+        before, after = elt.maybe_empty_lines(current_line)
+        for _ in range(before):
+            dst_contents += str(empty_line)
+        if not current_line.is_comment:
+            for comment in comments:
+                dst_contents += str(comment)
+            comments = []
+            for line in split_line(current_line, line_length=line_length):
+                dst_contents += str(line)
+        else:
+            comments.append(current_line)
+    for comment in comments:
+        dst_contents += str(comment)
+    return dst_contents
+
+
+def lib2to3_parse(src_txt: str) -> Node:
+    """Given a string with source, return the lib2to3 Node."""
+    grammar = pygram.python_grammar_no_print_statement
+    drv = driver.Driver(grammar, pytree.convert)
+    if src_txt[-1] != '\n':
+        nl = '\r\n' if '\r\n' in src_txt[:1024] else '\n'
+        src_txt += nl
+    try:
+        result = drv.parse_string(src_txt, True)
+    except ParseError as pe:
+        lineno, column = pe.context[1]
+        lines = src_txt.splitlines()
+        try:
+            faulty_line = lines[lineno - 1]
+        except IndexError:
+            faulty_line = "<line number missing in source>"
+        raise ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}") from None
+
+    if isinstance(result, Leaf):
+        result = Node(syms.file_input, [result])
+    return result
+
+
+def lib2to3_unparse(node: Node) -> str:
+    """Given a lib2to3 node, return its string representation."""
+    code = str(node)
+    return code
+
+
+T = TypeVar('T')
+
+
+class Visitor(Generic[T]):
+    """Basic lib2to3 visitor that yields things on visiting."""
+
+    def visit(self, node: LN) -> Iterator[T]:
+        if node.type < 256:
+            name = token.tok_name[node.type]
+        else:
+            name = type_repr(node.type)
+        yield from getattr(self, f'visit_{name}', self.visit_default)(node)
+
+    def visit_default(self, node: LN) -> Iterator[T]:
+        if isinstance(node, Node):
+            for child in node.children:
+                yield from self.visit(child)
+
+
+@dataclass
+class DebugVisitor(Visitor[T]):
+    tree_depth: int = attrib(default=0)
+
+    def visit_default(self, node: LN) -> Iterator[T]:
+        indent = ' ' * (2 * self.tree_depth)
+        if isinstance(node, Node):
+            _type = type_repr(node.type)
+            out(f'{indent}{_type}', fg='yellow')
+            self.tree_depth += 1
+            for child in node.children:
+                yield from self.visit(child)
+
+            self.tree_depth -= 1
+            out(f'{indent}/{_type}', fg='yellow', bold=False)
+        else:
+            _type = token.tok_name.get(node.type, str(node.type))
+            out(f'{indent}{_type}', fg='blue', nl=False)
+            if node.prefix:
+                # We don't have to handle prefixes for `Node` objects since
+                # that delegates to the first child anyway.
+                out(f' {node.prefix!r}', fg='green', bold=False, nl=False)
+            out(f' {node.value!r}', fg='blue', bold=False)
+
+
+KEYWORDS = set(keyword.kwlist)
+WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
+FLOW_CONTROL = {'return', 'raise', 'break', 'continue'}
+STATEMENT = {
+    syms.if_stmt,
+    syms.while_stmt,
+    syms.for_stmt,
+    syms.try_stmt,
+    syms.except_clause,
+    syms.with_stmt,
+    syms.funcdef,
+    syms.classdef,
+}
+STANDALONE_COMMENT = 153
+LOGIC_OPERATORS = {'and', 'or'}
+COMPARATORS = {
+    token.LESS,
+    token.GREATER,
+    token.EQEQUAL,
+    token.NOTEQUAL,
+    token.LESSEQUAL,
+    token.GREATEREQUAL,
+}
+MATH_OPERATORS = {
+    token.PLUS,
+    token.MINUS,
+    token.STAR,
+    token.SLASH,
+    token.VBAR,
+    token.AMPER,
+    token.PERCENT,
+    token.CIRCUMFLEX,
+    token.LEFTSHIFT,
+    token.RIGHTSHIFT,
+    token.DOUBLESTAR,
+    token.DOUBLESLASH,
+}
+COMPREHENSION_PRIORITY = 20
+COMMA_PRIORITY = 10
+LOGIC_PRIORITY = 5
+STRING_PRIORITY = 4
+COMPARATOR_PRIORITY = 3
+MATH_PRIORITY = 1
+
+
+@dataclass
+class BracketTracker:
+    depth: int = attrib(default=0)
+    bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = attrib(default=Factory(dict))
+    delimiters: Dict[LeafID, Priority] = attrib(default=Factory(dict))
+    previous: Optional[Leaf] = attrib(default=None)
+
+    def mark(self, leaf: Leaf) -> None:
+        if leaf.type == token.COMMENT:
+            return
+
+        if leaf.type in CLOSING_BRACKETS:
+            self.depth -= 1
+            opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
+            leaf.opening_bracket = opening_bracket  # type: ignore
+        leaf.bracket_depth = self.depth  # type: ignore
+        if self.depth == 0:
+            delim = is_delimiter(leaf)
+            if delim:
+                self.delimiters[id(leaf)] = delim
+            elif self.previous is not None:
+                if leaf.type == token.STRING and self.previous.type == token.STRING:
+                    self.delimiters[id(self.previous)] = STRING_PRIORITY
+                elif (
+                    leaf.type == token.NAME and
+                    leaf.value == 'for' and
+                    leaf.parent and
+                    leaf.parent.type in {syms.comp_for, syms.old_comp_for}
+                ):
+                    self.delimiters[id(self.previous)] = COMPREHENSION_PRIORITY
+                elif (
+                    leaf.type == token.NAME and
+                    leaf.value == 'if' and
+                    leaf.parent and
+                    leaf.parent.type in {syms.comp_if, syms.old_comp_if}
+                ):
+                    self.delimiters[id(self.previous)] = COMPREHENSION_PRIORITY
+        if leaf.type in OPENING_BRACKETS:
+            self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
+            self.depth += 1
+        self.previous = leaf
+
+    def any_open_brackets(self) -> bool:
+        """Returns True if there is an yet unmatched open bracket on the line."""
+        return bool(self.bracket_match)
+
+    def max_priority(self, exclude: Iterable[LeafID] = ()) -> int:
+        """Returns the highest priority of a delimiter found on the line.
+
+        Values are consistent with what `is_delimiter()` returns.
+        """
+        return max(v for k, v in self.delimiters.items() if k not in exclude)
+
+
+@dataclass
+class Line:
+    depth: int = attrib(default=0)
+    leaves: List[Leaf] = attrib(default=Factory(list))
+    comments: Dict[LeafID, Leaf] = attrib(default=Factory(dict))
+    bracket_tracker: BracketTracker = attrib(default=Factory(BracketTracker))
+    inside_brackets: bool = attrib(default=False)
+
+    def append(self, leaf: Leaf, preformatted: bool = False) -> None:
+        has_value = leaf.value.strip()
+        if not has_value:
+            return
+
+        if self.leaves and not preformatted:
+            # Note: at this point leaf.prefix should be empty except for
+            # imports, for which we only preserve newlines.
+            leaf.prefix += whitespace(leaf)
+        if self.inside_brackets or not preformatted:
+            self.bracket_tracker.mark(leaf)
+            self.maybe_remove_trailing_comma(leaf)
+            if self.maybe_adapt_standalone_comment(leaf):
+                return
+
+        if not self.append_comment(leaf):
+            self.leaves.append(leaf)
+
+    @property
+    def is_comment(self) -> bool:
+        return bool(self) and self.leaves[0].type == STANDALONE_COMMENT
+
+    @property
+    def is_decorator(self) -> bool:
+        return bool(self) and self.leaves[0].type == token.AT
+
+    @property
+    def is_import(self) -> bool:
+        return bool(self) and is_import(self.leaves[0])
+
+    @property
+    def is_class(self) -> bool:
+        return (
+            bool(self) and
+            self.leaves[0].type == token.NAME and
+            self.leaves[0].value == 'class'
+        )
+
+    @property
+    def is_def(self) -> bool:
+        """Also returns True for async defs."""
+        try:
+            first_leaf = self.leaves[0]
+        except IndexError:
+            return False
+
+        try:
+            second_leaf: Optional[Leaf] = self.leaves[1]
+        except IndexError:
+            second_leaf = None
+        return (
+            (first_leaf.type == token.NAME and first_leaf.value == 'def') or
+            (
+                first_leaf.type == token.NAME and
+                first_leaf.value == 'async' and
+                second_leaf is not None and
+                second_leaf.type == token.NAME and
+                second_leaf.value == 'def'
+            )
+        )
+
+    @property
+    def is_flow_control(self) -> bool:
+        return (
+            bool(self) and
+            self.leaves[0].type == token.NAME and
+            self.leaves[0].value in FLOW_CONTROL
+        )
+
+    @property
+    def is_yield(self) -> bool:
+        return (
+            bool(self) and
+            self.leaves[0].type == token.NAME and
+            self.leaves[0].value == 'yield'
+        )
+
+    def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
+        if not (
+            self.leaves and
+            self.leaves[-1].type == token.COMMA and
+            closing.type in CLOSING_BRACKETS
+        ):
+            return False
+
+        if closing.type == token.RSQB or closing.type == token.RBRACE:
+            self.leaves.pop()
+            return True
+
+        # For parens let's check if it's safe to remove the comma.  If the
+        # trailing one is the only one, we might mistakenly change a tuple
+        # into a different type by removing the comma.
+        depth = closing.bracket_depth + 1  # type: ignore
+        commas = 0
+        opening = closing.opening_bracket  # type: ignore
+        for _opening_index, leaf in enumerate(self.leaves):
+            if leaf is opening:
+                break
+
+        else:
+            return False
+
+        for leaf in self.leaves[_opening_index + 1:]:
+            if leaf is closing:
+                break
+
+            bracket_depth = leaf.bracket_depth  # type: ignore
+            if bracket_depth == depth and leaf.type == token.COMMA:
+                commas += 1
+        if commas > 1:
+            self.leaves.pop()
+            return True
+
+        return False
+
+    def maybe_adapt_standalone_comment(self, comment: Leaf) -> bool:
+        """Hack a standalone comment to act as a trailing comment for line splitting.
+
+        If this line has brackets and a standalone `comment`, we need to adapt
+        it to be able to still reformat the line.
+
+        This is not perfect, the line to which the standalone comment gets
+        appended will appear "too long" when splitting.
+        """
+        if not (
+            comment.type == STANDALONE_COMMENT and
+            self.bracket_tracker.any_open_brackets()
+        ):
+            return False
+
+        comment.type = token.COMMENT
+        comment.prefix = '\n' + '    ' * (self.depth + 1)
+        return self.append_comment(comment)
+
+    def append_comment(self, comment: Leaf) -> bool:
+        if comment.type != token.COMMENT:
+            return False
+
+        try:
+            after = id(self.last_non_delimiter())
+        except LookupError:
+            comment.type = STANDALONE_COMMENT
+            comment.prefix = ''
+            return False
+
+        else:
+            if after in self.comments:
+                self.comments[after].value += str(comment)
+            else:
+                self.comments[after] = comment
+            return True
+
+    def last_non_delimiter(self) -> Leaf:
+        for i in range(len(self.leaves)):
+            last = self.leaves[-i - 1]
+            if not is_delimiter(last):
+                return last
+
+        raise LookupError("No non-delimiters found")
+
+    def __str__(self) -> str:
+        if not self:
+            return '\n'
+
+        indent = '    ' * self.depth
+        leaves = iter(self.leaves)
+        first = next(leaves)
+        res = f'{first.prefix}{indent}{first.value}'
+        for leaf in leaves:
+            res += str(leaf)
+        for comment in self.comments.values():
+            res += str(comment)
+        return res + '\n'
+
+    def __bool__(self) -> bool:
+        return bool(self.leaves or self.comments)
+
+
+@dataclass
+class EmptyLineTracker:
+    """Provides a stateful method that returns the number of potential extra
+    empty lines needed before and after the currently processed line.
+
+    Note: this tracker works on lines that haven't been split yet.
+    """
+    previous_line: Optional[Line] = attrib(default=None)
+    previous_after: int = attrib(default=0)
+    previous_defs: List[int] = attrib(default=Factory(list))
+
+    def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
+        """Returns the number of extra empty lines before and after the `current_line`.
+
+        This is for separating `def`, `async def` and `class` with extra empty lines
+        (two on module-level), as well as providing an extra empty line after flow
+        control keywords to make them more prominent.
+        """
+        before, after = self._maybe_empty_lines(current_line)
+        self.previous_after = after
+        self.previous_line = current_line
+        return before, after
+
+    def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
+        before = 0
+        depth = current_line.depth
+        while self.previous_defs and self.previous_defs[-1] >= depth:
+            self.previous_defs.pop()
+            before = (1 if depth else 2) - self.previous_after
+        is_decorator = current_line.is_decorator
+        if is_decorator or current_line.is_def or current_line.is_class:
+            if not is_decorator:
+                self.previous_defs.append(depth)
+            if self.previous_line is None:
+                # Don't insert empty lines before the first line in the file.
+                return 0, 0
+
+            if self.previous_line and self.previous_line.is_decorator:
+                # Don't insert empty lines between decorators.
+                return 0, 0
+
+            newlines = 2
+            if current_line.depth:
+                newlines -= 1
+            newlines -= self.previous_after
+            return newlines, 0
+
+        if current_line.is_flow_control:
+            return before, 1
+
+        if (
+            self.previous_line and
+            self.previous_line.is_import and
+            not current_line.is_import and
+            depth == self.previous_line.depth
+        ):
+            return (before or 1), 0
+
+        if (
+            self.previous_line and
+            self.previous_line.is_yield and
+            (not current_line.is_yield or depth != self.previous_line.depth)
+        ):
+            return (before or 1), 0
+
+        return before, 0
+
+
+@dataclass
+class LineGenerator(Visitor[Line]):
+    """Generates reformatted Line objects.  Empty lines are not emitted.
+
+    Note: destroys the tree it's visiting by mutating prefixes of its leaves
+    in ways that will no longer stringify to valid Python code on the tree.
+    """
+    current_line: Line = attrib(default=Factory(Line))
+    standalone_comments: List[Leaf] = attrib(default=Factory(list))
+
+    def line(self, indent: int = 0) -> Iterator[Line]:
+        """Generate a line.
+
+        If the line is empty, only emit if it makes sense.
+        If the line is too long, split it first and then generate.
+
+        If any lines were generated, set up a new current_line.
+        """
+        if not self.current_line:
+            self.current_line.depth += indent
+            return  # Line is empty, don't emit. Creating a new one unnecessary.
+
+        complete_line = self.current_line
+        self.current_line = Line(depth=complete_line.depth + indent)
+        yield complete_line
+
+    def visit_default(self, node: LN) -> Iterator[Line]:
+        if isinstance(node, Leaf):
+            for comment in generate_comments(node):
+                if self.current_line.bracket_tracker.any_open_brackets():
+                    # any comment within brackets is subject to splitting
+                    self.current_line.append(comment)
+                elif comment.type == token.COMMENT:
+                    # regular trailing comment
+                    self.current_line.append(comment)
+                    yield from self.line()
+
+                else:
+                    # regular standalone comment, to be processed later (see
+                    # docstring in `generate_comments()`
+                    self.standalone_comments.append(comment)
+            normalize_prefix(node)
+            if node.type not in WHITESPACE:
+                for comment in self.standalone_comments:
+                    yield from self.line()
+
+                    self.current_line.append(comment)
+                    yield from self.line()
+
+                self.standalone_comments = []
+                self.current_line.append(node)
+        yield from super().visit_default(node)
+
+    def visit_suite(self, node: Node) -> Iterator[Line]:
+        """Body of a statement after a colon."""
+        children = iter(node.children)
+        # Process newline before indenting.  It might contain an inline
+        # comment that should go right after the colon.
+        newline = next(children)
+        yield from self.visit(newline)
+        yield from self.line(+1)
+
+        for child in children:
+            yield from self.visit(child)
+
+        yield from self.line(-1)
+
+    def visit_stmt(self, node: Node, keywords: Set[str]) -> Iterator[Line]:
+        """Visit a statement.
+
+        The relevant Python language keywords for this statement are NAME leaves
+        within it.
+        """
+        for child in node.children:
+            if child.type == token.NAME and child.value in keywords:  # type: ignore
+                yield from self.line()
+
+            yield from self.visit(child)
+
+    def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
+        """A statement without nested statements."""
+        is_suite_like = node.parent and node.parent.type in STATEMENT
+        if is_suite_like:
+            yield from self.line(+1)
+            yield from self.visit_default(node)
+            yield from self.line(-1)
+
+        else:
+            yield from self.line()
+            yield from self.visit_default(node)
+
+    def visit_async_stmt(self, node: Node) -> Iterator[Line]:
+        yield from self.line()
+
+        children = iter(node.children)
+        for child in children:
+            yield from self.visit(child)
+
+            if child.type == token.NAME and child.value == 'async':  # type: ignore
+                break
+
+        internal_stmt = next(children)
+        for child in internal_stmt.children:
+            yield from self.visit(child)
+
+    def visit_decorators(self, node: Node) -> Iterator[Line]:
+        for child in node.children:
+            yield from self.line()
+            yield from self.visit(child)
+
+    def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
+        yield from self.line()
+
+    def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
+        yield from self.visit_default(leaf)
+        yield from self.line()
+
+    def __attrs_post_init__(self) -> None:
+        """You are in a twisty little maze of passages."""
+        v = self.visit_stmt
+        self.visit_if_stmt = partial(v, keywords={'if', 'else', 'elif'})
+        self.visit_while_stmt = partial(v, keywords={'while', 'else'})
+        self.visit_for_stmt = partial(v, keywords={'for', 'else'})
+        self.visit_try_stmt = partial(v, keywords={'try', 'except', 'else', 'finally'})
+        self.visit_except_clause = partial(v, keywords={'except'})
+        self.visit_funcdef = partial(v, keywords={'def'})
+        self.visit_with_stmt = partial(v, keywords={'with'})
+        self.visit_classdef = partial(v, keywords={'class'})
+        self.visit_async_funcdef = self.visit_async_stmt
+        self.visit_decorated = self.visit_decorators
+
+
+BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
+OPENING_BRACKETS = set(BRACKET.keys())
+CLOSING_BRACKETS = set(BRACKET.values())
+BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
+
+
+def whitespace(leaf: Leaf) -> str:
+    """Return whitespace prefix if needed for the given `leaf`."""
+    NO = ''
+    SPACE = ' '
+    DOUBLESPACE = '  '
+    t = leaf.type
+    p = leaf.parent
+    if t == token.COLON:
+        return NO
+
+    if t == token.COMMA:
+        return NO
+
+    if t == token.RPAR:
+        return NO
+
+    if t == token.COMMENT:
+        return DOUBLESPACE
+
+    if t == STANDALONE_COMMENT:
+        return NO
+
+    assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
+    if p.type in {syms.parameters, syms.arglist}:
+        # untyped function signatures or calls
+        if t == token.RPAR:
+            return NO
+
+        prev = leaf.prev_sibling
+        if not prev or prev.type != token.COMMA:
+            return NO
+
+    if p.type == syms.varargslist:
+        # lambdas
+        if t == token.RPAR:
+            return NO
+
+        prev = leaf.prev_sibling
+        if prev and prev.type != token.COMMA:
+            return NO
+
+    elif p.type == syms.typedargslist:
+        # typed function signatures
+        prev = leaf.prev_sibling
+        if not prev:
+            return NO
+
+        if t == token.EQUAL:
+            if prev.type != syms.tname:
+                return NO
+
+        elif prev.type == token.EQUAL:
+            # A bit hacky: if the equal sign has whitespace, it means we
+            # previously found it's a typed argument.  So, we're using that, too.
+            return prev.prefix
+
+        elif prev.type != token.COMMA:
+            return NO
+
+    elif p.type == syms.tname:
+        # type names
+        prev = leaf.prev_sibling
+        if not prev:
+            prevp = preceding_leaf(p)
+            if not prevp or prevp.type != token.COMMA:
+                return NO
+
+    elif p.type == syms.trailer:
+        # attributes and calls
+        if t == token.LPAR or t == token.RPAR:
+            return NO
+
+        prev = leaf.prev_sibling
+        if not prev:
+            if t == token.DOT:
+                prevp = preceding_leaf(p)
+                if not prevp or prevp.type != token.NUMBER:
+                    return NO
+
+            elif t == token.LSQB:
+                return NO
+
+        elif prev.type != token.COMMA:
+            return NO
+
+    elif p.type == syms.argument:
+        # single argument
+        if t == token.EQUAL:
+            return NO
+
+        prev = leaf.prev_sibling
+        if not prev:
+            prevp = preceding_leaf(p)
+            if not prevp or prevp.type == token.LPAR:
+                return NO
+
+        elif prev.type == token.EQUAL or prev.type == token.DOUBLESTAR:
+            return NO
+
+    elif p.type == syms.decorator:
+        # decorators
+        return NO
+
+    elif p.type == syms.dotted_name:
+        prev = leaf.prev_sibling
+        if prev:
+            return NO
+
+        prevp = preceding_leaf(p)
+        if not prevp or prevp.type == token.AT:
+            return NO
+
+    elif p.type == syms.classdef:
+        if t == token.LPAR:
+            return NO
+
+        prev = leaf.prev_sibling
+        if prev and prev.type == token.LPAR:
+            return NO
+
+    elif p.type == syms.subscript:
+        # indexing
+        if t == token.COLON:
+            return NO
+
+        prev = leaf.prev_sibling
+        if not prev or prev.type == token.COLON:
+            return NO
+
+    elif p.type in {
+        syms.test,
+        syms.not_test,
+        syms.xor_expr,
+        syms.or_test,
+        syms.and_test,
+        syms.arith_expr,
+        syms.shift_expr,
+        syms.yield_expr,
+        syms.term,
+        syms.power,
+        syms.comparison,
+    }:
+        # various arithmetic and logic expressions
+        prev = leaf.prev_sibling
+        if not prev:
+            prevp = preceding_leaf(p)
+            if not prevp or prevp.type in OPENING_BRACKETS:
+                return NO
+
+            if prevp.type == token.EQUAL:
+                if prevp.parent and prevp.parent.type in {
+                    syms.varargslist, syms.parameters, syms.arglist, syms.argument
+                }:
+                    return NO
+
+        return SPACE
+
+    elif p.type == syms.atom:
+        if t in CLOSING_BRACKETS:
+            return NO
+
+        prev = leaf.prev_sibling
+        if not prev:
+            prevp = preceding_leaf(p)
+            if not prevp:
+                return NO
+
+            if prevp.type in OPENING_BRACKETS:
+                return NO
+
+            if prevp.type == token.EQUAL:
+                if prevp.parent and prevp.parent.type in {
+                    syms.varargslist, syms.parameters, syms.arglist, syms.argument
+                }:
+                    return NO
+
+            if prevp.type == token.DOUBLESTAR:
+                if prevp.parent and prevp.parent.type in {
+                    syms.varargslist, syms.parameters, syms.arglist, syms.dictsetmaker
+                }:
+                    return NO
+
+        elif prev.type in OPENING_BRACKETS:
+            return NO
+
+        elif t == token.DOT:
+            # dots, but not the first one.
+            return NO
+
+    elif (
+        p.type == syms.listmaker or
+        p.type == syms.testlist_gexp or
+        p.type == syms.subscriptlist
+    ):
+        # list interior, including unpacking
+        prev = leaf.prev_sibling
+        if not prev:
+            return NO
+
+    elif p.type == syms.dictsetmaker:
+        # dict and set interior, including unpacking
+        prev = leaf.prev_sibling
+        if not prev:
+            return NO
+
+        if prev.type == token.DOUBLESTAR:
+            return NO
+
+    elif p.type == syms.factor or p.type == syms.star_expr:
+        # unary ops
+        prev = leaf.prev_sibling
+        if not prev:
+            prevp = preceding_leaf(p)
+            if not prevp or prevp.type in OPENING_BRACKETS:
+                return NO
+
+            prevp_parent = prevp.parent
+            assert prevp_parent is not None
+            if prevp.type == token.COLON and prevp_parent.type in {
+                syms.subscript, syms.sliceop
+            }:
+                return NO
+
+            elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
+                return NO
+
+        elif t == token.NAME or t == token.NUMBER:
+            return NO
+
+    elif p.type == syms.import_from and t == token.NAME:
+        prev = leaf.prev_sibling
+        if prev and prev.type == token.DOT:
+            return NO
+
+    elif p.type == syms.sliceop:
+        return NO
+
+    return SPACE
+
+
+def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
+    """Returns the first leaf that precedes `node`, if any."""
+    while node:
+        res = node.prev_sibling
+        if res:
+            if isinstance(res, Leaf):
+                return res
+
+            try:
+                return list(res.leaves())[-1]
+
+            except IndexError:
+                return None
+
+        node = node.parent
+    return None
+
+
+def is_delimiter(leaf: Leaf) -> int:
+    """Returns the priority of the `leaf` delimiter. Returns 0 if not delimiter.
+
+    Higher numbers are higher priority.
+    """
+    if leaf.type == token.COMMA:
+        return COMMA_PRIORITY
+
+    if leaf.type == token.NAME and leaf.value in LOGIC_OPERATORS:
+        return LOGIC_PRIORITY
+
+    if leaf.type in COMPARATORS:
+        return COMPARATOR_PRIORITY
+
+    if (
+        leaf.type in MATH_OPERATORS and
+        leaf.parent and
+        leaf.parent.type not in {syms.factor, syms.star_expr}
+    ):
+        return MATH_PRIORITY
+
+    return 0
+
+
+def generate_comments(leaf: Leaf) -> Iterator[Leaf]:
+    """Cleans the prefix of the `leaf` and generates comments from it, if any.
+
+    Comments in lib2to3 are shoved into the whitespace prefix.  This happens
+    in `pgen2/driver.py:Driver.parse_tokens()`.  This was a brilliant implementation
+    move because it does away with modifying the grammar to include all the
+    possible places in which comments can be placed.
+
+    The sad consequence for us though is that comments don't "belong" anywhere.
+    This is why this function generates simple parentless Leaf objects for
+    comments.  We simply don't know what the correct parent should be.
+
+    No matter though, we can live without this.  We really only need to
+    differentiate between inline and standalone comments.  The latter don't
+    share the line with any code.
+
+    Inline comments are emitted as regular token.COMMENT leaves.  Standalone
+    are emitted with a fake STANDALONE_COMMENT token identifier.
+    """
+    if not leaf.prefix:
+        return
+
+    if '#' not in leaf.prefix:
+        return
+
+    before_comment, content = leaf.prefix.split('#', 1)
+    content = content.rstrip()
+    if content and (content[0] not in {' ', '!', '#'}):
+        content = ' ' + content
+    is_standalone_comment = (
+        '\n' in before_comment or '\n' in content or leaf.type == token.DEDENT
+    )
+    if not is_standalone_comment:
+        # simple trailing comment
+        yield Leaf(token.COMMENT, value='#' + content)
+        return
+
+    for line in ('#' + content).split('\n'):
+        line = line.lstrip()
+        if not line.startswith('#'):
+            continue
+
+        yield Leaf(STANDALONE_COMMENT, line)
+
+
+def split_line(line: Line, line_length: int, inner: bool = False) -> Iterator[Line]:
+    """Splits a `line` into potentially many lines.
+
+    They should fit in the allotted `line_length` but might not be able to.
+    `inner` signifies that there were a pair of brackets somewhere around the
+    current `line`, possibly transitively. This means we can fallback to splitting
+    by delimiters if the LHS/RHS don't yield any results.
+    """
+    line_str = str(line).strip('\n')
+    if len(line_str) <= line_length and '\n' not in line_str:
+        yield line
+        return
+
+    if line.is_def:
+        split_funcs = [left_hand_split]
+    elif line.inside_brackets:
+        split_funcs = [delimiter_split]
+        if '\n' not in line_str:
+            # Only attempt RHS if we don't have multiline strings or comments
+            # on this line.
+            split_funcs.append(right_hand_split)
+    else:
+        split_funcs = [right_hand_split]
+    for split_func in split_funcs:
+        # We are accumulating lines in `result` because we might want to abort
+        # mission and return the original line in the end, or attempt a different
+        # split altogether.
+        result: List[Line] = []
+        try:
+            for l in split_func(line):
+                if str(l).strip('\n') == line_str:
+                    raise CannotSplit("Split function returned an unchanged result")
+
+                result.extend(split_line(l, line_length=line_length, inner=True))
+        except CannotSplit as cs:
+            continue
+
+        else:
+            yield from result
+            break
+
+    else:
+        yield line
+
+
+def left_hand_split(line: Line) -> Iterator[Line]:
+    """Split line into many lines, starting with the first matching bracket pair.
+
+    Note: this usually looks weird, only use this for function definitions.
+    Prefer RHS otherwise.
+    """
+    head = Line(depth=line.depth)
+    body = Line(depth=line.depth + 1, inside_brackets=True)
+    tail = Line(depth=line.depth)
+    tail_leaves: List[Leaf] = []
+    body_leaves: List[Leaf] = []
+    head_leaves: List[Leaf] = []
+    current_leaves = head_leaves
+    matching_bracket = None
+    for leaf in line.leaves:
+        if (
+            current_leaves is body_leaves and
+            leaf.type in CLOSING_BRACKETS and
+            leaf.opening_bracket is matching_bracket  # type: ignore
+        ):
+            current_leaves = tail_leaves
+        current_leaves.append(leaf)
+        if current_leaves is head_leaves:
+            if leaf.type in OPENING_BRACKETS:
+                matching_bracket = leaf
+                current_leaves = body_leaves
+    # Since body is a new indent level, remove spurious leading whitespace.
+    if body_leaves:
+        normalize_prefix(body_leaves[0])
+    # Build the new lines.
+    for result, leaves in (
+        (head, head_leaves), (body, body_leaves), (tail, tail_leaves)
+    ):
+        for leaf in leaves:
+            result.append(leaf, preformatted=True)
+            comment_after = line.comments.get(id(leaf))
+            if comment_after:
+                result.append(comment_after, preformatted=True)
+    # Check if the split succeeded.
+    tail_len = len(str(tail))
+    if not body:
+        if tail_len == 0:
+            raise CannotSplit("Splitting brackets produced the same line")
+
+        elif tail_len < 3:
+            raise CannotSplit(
+                f"Splitting brackets on an empty body to save "
+                f"{tail_len} characters is not worth it"
+            )
+
+    for result in (head, body, tail):
+        if result:
+            yield result
+
+
+def right_hand_split(line: Line) -> Iterator[Line]:
+    """Split line into many lines, starting with the last matching bracket pair."""
+    head = Line(depth=line.depth)
+    body = Line(depth=line.depth + 1, inside_brackets=True)
+    tail = Line(depth=line.depth)
+    tail_leaves: List[Leaf] = []
+    body_leaves: List[Leaf] = []
+    head_leaves: List[Leaf] = []
+    current_leaves = tail_leaves
+    opening_bracket = None
+    for leaf in reversed(line.leaves):
+        if current_leaves is body_leaves:
+            if leaf is opening_bracket:
+                current_leaves = head_leaves
+        current_leaves.append(leaf)
+        if current_leaves is tail_leaves:
+            if leaf.type in CLOSING_BRACKETS:
+                opening_bracket = leaf.opening_bracket  # type: ignore
+                current_leaves = body_leaves
+    tail_leaves.reverse()
+    body_leaves.reverse()
+    head_leaves.reverse()
+    # Since body is a new indent level, remove spurious leading whitespace.
+    if body_leaves:
+        normalize_prefix(body_leaves[0])
+    # Build the new lines.
+    for result, leaves in (
+        (head, head_leaves), (body, body_leaves), (tail, tail_leaves)
+    ):
+        for leaf in leaves:
+            result.append(leaf, preformatted=True)
+            comment_after = line.comments.get(id(leaf))
+            if comment_after:
+                result.append(comment_after, preformatted=True)
+    # Check if the split succeeded.
+    tail_len = len(str(tail).strip('\n'))
+    if not body:
+        if tail_len == 0:
+            raise CannotSplit("Splitting brackets produced the same line")
+
+        elif tail_len < 3:
+            raise CannotSplit(
+                f"Splitting brackets on an empty body to save "
+                f"{tail_len} characters is not worth it"
+            )
+
+    for result in (head, body, tail):
+        if result:
+            yield result
+
+
+def delimiter_split(line: Line) -> Iterator[Line]:
+    """Split according to delimiters of the highest priority.
+
+    This kind of split doesn't increase indentation.
+    """
+    try:
+        last_leaf = line.leaves[-1]
+    except IndexError:
+        raise CannotSplit("Line empty")
+
+    delimiters = line.bracket_tracker.delimiters
+    try:
+        delimiter_priority = line.bracket_tracker.max_priority(exclude={id(last_leaf)})
+    except ValueError:
+        raise CannotSplit("No delimiters found")
+
+    current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
+    for leaf in line.leaves:
+        current_line.append(leaf, preformatted=True)
+        comment_after = line.comments.get(id(leaf))
+        if comment_after:
+            current_line.append(comment_after, preformatted=True)
+        leaf_priority = delimiters.get(id(leaf))
+        if leaf_priority == delimiter_priority:
+            normalize_prefix(current_line.leaves[0])
+            yield current_line
+
+            current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
+    if current_line:
+        if (
+            delimiter_priority == COMMA_PRIORITY and
+            current_line.leaves[-1].type != token.COMMA
+        ):
+            current_line.append(Leaf(token.COMMA, ','))
+        normalize_prefix(current_line.leaves[0])
+        yield current_line
+
+
+def is_import(leaf: Leaf) -> bool:
+    """Returns True if the given leaf starts an import statement."""
+    p = leaf.parent
+    t = leaf.type
+    v = leaf.value
+    return bool(
+        t == token.NAME and
+        (
+            (v == 'import' and p and p.type == syms.import_name) or
+            (v == 'from' and p and p.type == syms.import_from)
+        )
+    )
+
+
+def normalize_prefix(leaf: Leaf) -> None:
+    """Leave existing extra newlines for imports.  Remove everything else."""
+    if is_import(leaf):
+        spl = leaf.prefix.split('#', 1)
+        nl_count = spl[0].count('\n')
+        if len(spl) > 1:
+            # Skip one newline since it was for a standalone comment.
+            nl_count -= 1
+        leaf.prefix = '\n' * nl_count
+        return
+
+    leaf.prefix = ''
+
+
+PYTHON_EXTENSIONS = {'.py'}
+BLACKLISTED_DIRECTORIES = {
+    'build', 'buck-out', 'dist', '_build', '.git', '.hg', '.mypy_cache', '.tox', '.venv'
+}
+
+
+def gen_python_files_in_dir(path: Path) -> Iterator[Path]:
+    for child in path.iterdir():
+        if child.is_dir():
+            if child.name in BLACKLISTED_DIRECTORIES:
+                continue
+
+            yield from gen_python_files_in_dir(child)
+
+        elif child.suffix in PYTHON_EXTENSIONS:
+            yield child
+
+
+@dataclass
+class Report:
+    """Provides a reformatting counter."""
+    change_count: int = attrib(default=0)
+    same_count: int = attrib(default=0)
+    failure_count: int = attrib(default=0)
+
+    def done(self, src: Path, changed: bool) -> None:
+        """Increment the counter for successful reformatting. Write out a message."""
+        if changed:
+            out(f'reformatted {src}')
+            self.change_count += 1
+        else:
+            out(f'{src} already well formatted, good job.', bold=False)
+            self.same_count += 1
+
+    def failed(self, src: Path, message: str) -> None:
+        """Increment the counter for failed reformatting. Write out a message."""
+        err(f'error: cannot format {src}: {message}')
+        self.failure_count += 1
+
+    @property
+    def return_code(self) -> int:
+        """Which return code should the app use considering the current state."""
+        return 1 if self.failure_count else 0
+
+    def __str__(self) -> str:
+        """A color report of the current state.
+
+        Use `click.unstyle` to remove colors.
+        """
+        report = []
+        if self.change_count:
+            s = 's' if self.change_count > 1 else ''
+            report.append(
+                click.style(f'{self.change_count} file{s} reformatted', bold=True)
+            )
+        if self.same_count:
+            s = 's' if self.same_count > 1 else ''
+            report.append(f'{self.same_count} file{s} left unchanged')
+        if self.failure_count:
+            s = 's' if self.failure_count > 1 else ''
+            report.append(
+                click.style(
+                    f'{self.failure_count} file{s} failed to reformat', fg='red'
+                )
+            )
+        return ', '.join(report) + '.'
+
+
+def assert_equivalent(src: str, dst: str) -> None:
+    """Raises AssertionError if `src` and `dst` aren't equivalent.
+
+    This is a temporary sanity check until Black becomes stable.
+    """
+
+    import ast
+    import traceback
+
+    def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
+        """Simple visitor generating strings to compare ASTs by content."""
+        yield f"{'  ' * depth}{node.__class__.__name__}("
+
+        for field in sorted(node._fields):
+            try:
+                value = getattr(node, field)
+            except AttributeError:
+                continue
+
+            yield f"{'  ' * (depth+1)}{field}="
+
+            if isinstance(value, list):
+                for item in value:
+                    if isinstance(item, ast.AST):
+                        yield from _v(item, depth + 2)
+
+            elif isinstance(value, ast.AST):
+                yield from _v(value, depth + 2)
+
+            else:
+                yield f"{'  ' * (depth+2)}{value!r},  # {value.__class__.__name__}"
+
+        yield f"{'  ' * depth})  # /{node.__class__.__name__}"
+
+    try:
+        src_ast = ast.parse(src)
+    except Exception as exc:
+        raise AssertionError(f"cannot parse source: {exc}") from None
+
+    try:
+        dst_ast = ast.parse(dst)
+    except Exception as exc:
+        log = dump_to_file(''.join(traceback.format_tb(exc.__traceback__)), dst)
+        raise AssertionError(
+            f"INTERNAL ERROR: Black produced invalid code: {exc}. "
+            f"Please report a bug on https://github.com/ambv/black/issues.  "
+            f"This invalid output might be helpful: {log}",
+        ) from None
+
+    src_ast_str = '\n'.join(_v(src_ast))
+    dst_ast_str = '\n'.join(_v(dst_ast))
+    if src_ast_str != dst_ast_str:
+        log = dump_to_file(diff(src_ast_str, dst_ast_str, 'src', 'dst'))
+        raise AssertionError(
+            f"INTERNAL ERROR: Black produced code that is not equivalent to "
+            f"the source.  "
+            f"Please report a bug on https://github.com/ambv/black/issues.  "
+            f"This diff might be helpful: {log}",
+        ) from None
+
+
+def assert_stable(src: str, dst: str, line_length: int) -> None:
+    """Raises AssertionError if `dst` reformats differently the second time.
+
+    This is a temporary sanity check until Black becomes stable.
+    """
+    newdst = format_str(dst, line_length=line_length)
+    if dst != newdst:
+        log = dump_to_file(
+            diff(src, dst, 'source', 'first pass'),
+            diff(dst, newdst, 'first pass', 'second pass'),
+        )
+        raise AssertionError(
+            f"INTERNAL ERROR: Black produced different code on the second pass "
+            f"of the formatter.  "
+            f"Please report a bug on https://github.com/ambv/black/issues.  "
+            f"This diff might be helpful: {log}",
+        ) from None
+
+
+def dump_to_file(*output: str) -> str:
+    """Dumps `output` to a temporary file. Returns path to the file."""
+    import tempfile
+
+    with tempfile.NamedTemporaryFile(
+        mode='w', prefix='blk_', suffix='.log', delete=False
+    ) as f:
+        for lines in output:
+            f.write(lines)
+            f.write('\n')
+    return f.name
+
+
+def diff(a: str, b: str, a_name: str, b_name: str) -> str:
+    """Returns a udiff string between strings `a` and `b`."""
+    import difflib
+
+    a_lines = [line + '\n' for line in a.split('\n')]
+    b_lines = [line + '\n' for line in b.split('\n')]
+    return ''.join(
+        difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
+    )
+
+
+if __name__ == '__main__':
+    main()
diff --git a/blib2to3/Grammar.txt b/blib2to3/Grammar.txt

new file mode 100644 (file)

index 0000000..b19b4a2
--- /dev/null
+++ b/blib2to3/Grammar.txt
@@ -0,0 +1,173 @@
+# Grammar for 2to3. This grammar supports Python 2.x and 3.x.
+
+# NOTE WELL: You should also follow all the steps listed at
+# https://devguide.python.org/grammar/
+
+# Start symbols for the grammar:
+#      file_input is a module or sequence of commands read from an input file;
+#      single_input is a single interactive statement;
+#      eval_input is the input for the eval() and input() functions.
+# NB: compound_stmt in single_input is followed by extra NEWLINE!
+file_input: (NEWLINE | stmt)* ENDMARKER
+single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
+eval_input: testlist NEWLINE* ENDMARKER
+
+decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
+decorators: decorator+
+decorated: decorators (classdef | funcdef | async_funcdef)
+async_funcdef: 'async' funcdef
+funcdef: 'def' NAME parameters ['->' test] ':' suite
+parameters: '(' [typedargslist] ')'
+typedargslist: ((tfpdef ['=' test] ',')*
+                ('*' [tname] (',' tname ['=' test])* [',' ['**' tname [',']]] | '**' tname [','])
+                | tfpdef ['=' test] (',' tfpdef ['=' test])* [','])
+tname: NAME [':' test]
+tfpdef: tname | '(' tfplist ')'
+tfplist: tfpdef (',' tfpdef)* [',']
+varargslist: ((vfpdef ['=' test] ',')*
+              ('*' [vname] (',' vname ['=' test])*  [',' ['**' vname [',']]] | '**' vname [','])
+              | vfpdef ['=' test] (',' vfpdef ['=' test])* [','])
+vname: NAME
+vfpdef: vname | '(' vfplist ')'
+vfplist: vfpdef (',' vfpdef)* [',']
+
+stmt: simple_stmt | compound_stmt
+simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
+small_stmt: (expr_stmt | print_stmt  | del_stmt | pass_stmt | flow_stmt |
+             import_stmt | global_stmt | exec_stmt | assert_stmt)
+expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
+                     ('=' (yield_expr|testlist_star_expr))*)
+annassign: ':' test ['=' test]
+testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
+augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
+            '<<=' | '>>=' | '**=' | '//=')
+# For normal and annotated assignments, additional restrictions enforced by the interpreter
+print_stmt: 'print' ( [ test (',' test)* [','] ] |
+                      '>>' test [ (',' test)+ [','] ] )
+del_stmt: 'del' exprlist
+pass_stmt: 'pass'
+flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
+break_stmt: 'break'
+continue_stmt: 'continue'
+return_stmt: 'return' [testlist]
+yield_stmt: yield_expr
+raise_stmt: 'raise' [test ['from' test | ',' test [',' test]]]
+import_stmt: import_name | import_from
+import_name: 'import' dotted_as_names
+import_from: ('from' ('.'* dotted_name | '.'+)
+              'import' ('*' | '(' import_as_names ')' | import_as_names))
+import_as_name: NAME ['as' NAME]
+dotted_as_name: dotted_name ['as' NAME]
+import_as_names: import_as_name (',' import_as_name)* [',']
+dotted_as_names: dotted_as_name (',' dotted_as_name)*
+dotted_name: NAME ('.' NAME)*
+global_stmt: ('global' | 'nonlocal') NAME (',' NAME)*
+exec_stmt: 'exec' expr ['in' test [',' test]]
+assert_stmt: 'assert' test [',' test]
+
+compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
+async_stmt: 'async' (funcdef | with_stmt | for_stmt)
+if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
+while_stmt: 'while' test ':' suite ['else' ':' suite]
+for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
+try_stmt: ('try' ':' suite
+           ((except_clause ':' suite)+
+           ['else' ':' suite]
+           ['finally' ':' suite] |
+          'finally' ':' suite))
+with_stmt: 'with' with_item (',' with_item)*  ':' suite
+with_item: test ['as' expr]
+with_var: 'as' expr
+# NB compile.c makes sure that the default except clause is last
+except_clause: 'except' [test [(',' | 'as') test]]
+suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
+
+# Backward compatibility cruft to support:
+# [ x for x in lambda: True, lambda: False if x() ]
+# even while also allowing:
+# lambda x: 5 if x else 2
+# (But not a mix of the two)
+testlist_safe: old_test [(',' old_test)+ [',']]
+old_test: or_test | old_lambdef
+old_lambdef: 'lambda' [varargslist] ':' old_test
+
+test: or_test ['if' or_test 'else' test] | lambdef
+or_test: and_test ('or' and_test)*
+and_test: not_test ('and' not_test)*
+not_test: 'not' not_test | comparison
+comparison: expr (comp_op expr)*
+comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
+star_expr: '*' expr
+expr: xor_expr ('|' xor_expr)*
+xor_expr: and_expr ('^' and_expr)*
+and_expr: shift_expr ('&' shift_expr)*
+shift_expr: arith_expr (('<<'|'>>') arith_expr)*
+arith_expr: term (('+'|'-') term)*
+term: factor (('*'|'@'|'/'|'%'|'//') factor)*
+factor: ('+'|'-'|'~') factor | power
+power: ['await'] atom trailer* ['**' factor]
+atom: ('(' [yield_expr|testlist_gexp] ')' |
+       '[' [listmaker] ']' |
+       '{' [dictsetmaker] '}' |
+       '`' testlist1 '`' |
+       NAME | NUMBER | STRING+ | '.' '.' '.')
+listmaker: (test|star_expr) ( old_comp_for | (',' (test|star_expr))* [','] )
+testlist_gexp: (test|star_expr) ( old_comp_for | (',' (test|star_expr))* [','] )
+lambdef: 'lambda' [varargslist] ':' test
+trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
+subscriptlist: subscript (',' subscript)* [',']
+subscript: test | [test] ':' [test] [sliceop]
+sliceop: ':' [test]
+exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
+testlist: test (',' test)* [',']
+dictsetmaker: ( ((test ':' test | '**' expr)
+                 (comp_for | (',' (test ':' test | '**' expr))* [','])) |
+                ((test | star_expr)
+                (comp_for | (',' (test | star_expr))* [','])) )
+
+classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
+
+arglist: argument (',' argument)* [',']
+
+# "test '=' test" is really "keyword '=' test", but we have no such token.
+# These need to be in a single rule to avoid grammar that is ambiguous
+# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
+# we explicitly match '*' here, too, to give it proper precedence.
+# Illegal combinations and orderings are blocked in ast.c:
+# multiple (test comp_for) arguments are blocked; keyword unpackings
+# that precede iterable unpackings are blocked; etc.
+argument: ( test [comp_for] |
+            test '=' test |
+           '**' expr |
+           star_expr )
+
+comp_iter: comp_for | comp_if
+comp_for: ['async'] 'for' exprlist 'in' or_test [comp_iter]
+comp_if: 'if' old_test [comp_iter]
+
+# As noted above, testlist_safe extends the syntax allowed in list
+# comprehensions and generators. We can't use it indiscriminately in all
+# derivations using a comp_for-like pattern because the testlist_safe derivation
+# contains comma which clashes with trailing comma in arglist.
+#
+# This was an issue because the parser would not follow the correct derivation
+# when parsing syntactically valid Python code. Since testlist_safe was created
+# specifically to handle list comprehensions and generator expressions enclosed
+# with parentheses, it's safe to only use it in those. That avoids the issue; we
+# can parse code like set(x for x in [],).
+#
+# The syntax supported by this set of rules is not a valid Python 3 syntax,
+# hence the prefix "old".
+#
+# See https://bugs.python.org/issue27494
+old_comp_iter: old_comp_for | old_comp_if
+old_comp_for: ['async'] 'for' exprlist 'in' testlist_safe [old_comp_iter]
+old_comp_if: 'if' old_test [old_comp_iter]
+
+testlist1: test (',' test)*
+
+# not used in grammar, but may appear in "node" passed from Parser to Compiler
+encoding_decl: NAME
+
+yield_expr: 'yield' [yield_arg]
+yield_arg: 'from' test | testlist
diff --git a/blib2to3/Grammar3.6.4.final.0.pickle b/blib2to3/Grammar3.6.4.final.0.pickle

new file mode 100644 (file)

index 0000000..da22814

Binary files /dev/null and b/blib2to3/Grammar3.6.4.final.0.pickle differ
diff --git a/blib2to3/PatternGrammar.txt b/blib2to3/PatternGrammar.txt

new file mode 100644 (file)

index 0000000..36bf814
--- /dev/null
+++ b/blib2to3/PatternGrammar.txt
@@ -0,0 +1,28 @@
+# Copyright 2006 Google, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+# A grammar to describe tree matching patterns.
+# Not shown here:
+# - 'TOKEN' stands for any token (leaf node)
+# - 'any' stands for any node (leaf or interior)
+# With 'any' we can still specify the sub-structure.
+
+# The start symbol is 'Matcher'.
+
+Matcher: Alternatives ENDMARKER
+
+Alternatives: Alternative ('|' Alternative)*
+
+Alternative: (Unit | NegatedUnit)+
+
+Unit: [NAME '='] ( STRING [Repeater]
+                 | NAME [Details] [Repeater]
+                 | '(' Alternatives ')' [Repeater]
+                 | '[' Alternatives ']'
+                )
+
+NegatedUnit: 'not' (STRING | NAME [Details] | '(' Alternatives ')')
+
+Repeater: '*' | '+' | '{' NUMBER [',' NUMBER] '}'
+
+Details: '<' Alternatives '>'
diff --git a/blib2to3/PatternGrammar3.6.4.final.0.pickle b/blib2to3/PatternGrammar3.6.4.final.0.pickle

new file mode 100644 (file)

index 0000000..e027504

Binary files /dev/null and b/blib2to3/PatternGrammar3.6.4.final.0.pickle differ
diff --git a/blib2to3/README b/blib2to3/README

new file mode 100644 (file)

index 0000000..2c12c62
--- /dev/null
+++ b/blib2to3/README
@@ -0,0 +1,7 @@
+A subset of lib2to3 taken from Python 3.7.0b2.
+Commit hash: 9c17e3a1987004b8bcfbe423953aad84493a7984
+
+Reasons for forking:
+- consistent handling of f-strings for users of Python < 3.6.2
+- better ability to debug
+- ability to Cythonize
diff --git a/blib2to3/__init__.py b/blib2to3/__init__.py

new file mode 100644 (file)

index 0000000..ea30561
--- /dev/null
+++ b/blib2to3/__init__.py
@@ -0,0 +1 @@
+#empty
diff --git a/blib2to3/__init__.pyi b/blib2to3/__init__.pyi

new file mode 100644 (file)

index 0000000..145e31b
--- /dev/null
+++ b/blib2to3/__init__.pyi
@@ -0,0 +1 @@
+# Stubs for lib2to3 (Python 3.6)
diff --git a/blib2to3/pgen2/__init__.py b/blib2to3/pgen2/__init__.py

new file mode 100644 (file)

index 0000000..af39048
--- /dev/null
+++ b/blib2to3/pgen2/__init__.py
@@ -0,0 +1,4 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""The pgen2 package."""
diff --git a/blib2to3/pgen2/__init__.pyi b/blib2to3/pgen2/__init__.pyi

new file mode 100644 (file)

index 0000000..1adc82a
--- /dev/null
+++ b/blib2to3/pgen2/__init__.pyi
@@ -0,0 +1,10 @@
+# Stubs for lib2to3.pgen2 (Python 3.6)
+
+import os
+import sys
+from typing import Text, Union
+
+if sys.version_info >= (3, 6):
+    _Path = Union[Text, os.PathLike]
+else:
+    _Path = Text
diff --git a/blib2to3/pgen2/conv.py b/blib2to3/pgen2/conv.py

new file mode 100644 (file)

index 0000000..ed0cac5
--- /dev/null
+++ b/blib2to3/pgen2/conv.py
@@ -0,0 +1,257 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Convert graminit.[ch] spit out by pgen to Python code.
+
+Pgen is the Python parser generator.  It is useful to quickly create a
+parser from a grammar file in Python's grammar notation.  But I don't
+want my parsers to be written in C (yet), so I'm translating the
+parsing tables to Python data structures and writing a Python parse
+engine.
+
+Note that the token numbers are constants determined by the standard
+Python tokenizer.  The standard token module defines these numbers and
+their names (the names are not used much).  The token numbers are
+hardcoded into the Python tokenizer and into pgen.  A Python
+implementation of the Python tokenizer is also available, in the
+standard tokenize module.
+
+On the other hand, symbol numbers (representing the grammar's
+non-terminals) are assigned by pgen based on the actual grammar
+input.
+
+Note: this module is pretty much obsolete; the pgen module generates
+equivalent grammar tables directly from the Grammar.txt input file
+without having to invoke the Python pgen C program.
+
+"""
+
+# Python imports
+import re
+
+# Local imports
+from pgen2 import grammar, token
+
+
+class Converter(grammar.Grammar):
+    """Grammar subclass that reads classic pgen output files.
+
+    The run() method reads the tables as produced by the pgen parser
+    generator, typically contained in two C files, graminit.h and
+    graminit.c.  The other methods are for internal use only.
+
+    See the base class for more documentation.
+
+    """
+
+    def run(self, graminit_h, graminit_c):
+        """Load the grammar tables from the text files written by pgen."""
+        self.parse_graminit_h(graminit_h)
+        self.parse_graminit_c(graminit_c)
+        self.finish_off()
+
+    def parse_graminit_h(self, filename):
+        """Parse the .h file written by pgen.  (Internal)
+
+        This file is a sequence of #define statements defining the
+        nonterminals of the grammar as numbers.  We build two tables
+        mapping the numbers to names and back.
+
+        """
+        try:
+            f = open(filename)
+        except OSError as err:
+            print("Can't open %s: %s" % (filename, err))
+            return False
+        self.symbol2number = {}
+        self.number2symbol = {}
+        lineno = 0
+        for line in f:
+            lineno += 1
+            mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line)
+            if not mo and line.strip():
+                print("%s(%s): can't parse %s" % (filename, lineno,
+                                                  line.strip()))
+            else:
+                symbol, number = mo.groups()
+                number = int(number)
+                assert symbol not in self.symbol2number
+                assert number not in self.number2symbol
+                self.symbol2number[symbol] = number
+                self.number2symbol[number] = symbol
+        return True
+
+    def parse_graminit_c(self, filename):
+        """Parse the .c file written by pgen.  (Internal)
+
+        The file looks as follows.  The first two lines are always this:
+
+        #include "pgenheaders.h"
+        #include "grammar.h"
+
+        After that come four blocks:
+
+        1) one or more state definitions
+        2) a table defining dfas
+        3) a table defining labels
+        4) a struct defining the grammar
+
+        A state definition has the following form:
+        - one or more arc arrays, each of the form:
+          static arc arcs_<n>_<m>[<k>] = {
+                  {<i>, <j>},
+                  ...
+          };
+        - followed by a state array, of the form:
+          static state states_<s>[<t>] = {
+                  {<k>, arcs_<n>_<m>},
+                  ...
+          };
+
+        """
+        try:
+            f = open(filename)
+        except OSError as err:
+            print("Can't open %s: %s" % (filename, err))
+            return False
+        # The code below essentially uses f's iterator-ness!
+        lineno = 0
+
+        # Expect the two #include lines
+        lineno, line = lineno+1, next(f)
+        assert line == '#include "pgenheaders.h"\n', (lineno, line)
+        lineno, line = lineno+1, next(f)
+        assert line == '#include "grammar.h"\n', (lineno, line)
+
+        # Parse the state definitions
+        lineno, line = lineno+1, next(f)
+        allarcs = {}
+        states = []
+        while line.startswith("static arc "):
+            while line.startswith("static arc "):
+                mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$",
+                              line)
+                assert mo, (lineno, line)
+                n, m, k = list(map(int, mo.groups()))
+                arcs = []
+                for _ in range(k):
+                    lineno, line = lineno+1, next(f)
+                    mo = re.match(r"\s+{(\d+), (\d+)},$", line)
+                    assert mo, (lineno, line)
+                    i, j = list(map(int, mo.groups()))
+                    arcs.append((i, j))
+                lineno, line = lineno+1, next(f)
+                assert line == "};\n", (lineno, line)
+                allarcs[(n, m)] = arcs
+                lineno, line = lineno+1, next(f)
+            mo = re.match(r"static state states_(\d+)\[(\d+)\] = {$", line)
+            assert mo, (lineno, line)
+            s, t = list(map(int, mo.groups()))
+            assert s == len(states), (lineno, line)
+            state = []
+            for _ in range(t):
+                lineno, line = lineno+1, next(f)
+                mo = re.match(r"\s+{(\d+), arcs_(\d+)_(\d+)},$", line)
+                assert mo, (lineno, line)
+                k, n, m = list(map(int, mo.groups()))
+                arcs = allarcs[n, m]
+                assert k == len(arcs), (lineno, line)
+                state.append(arcs)
+            states.append(state)
+            lineno, line = lineno+1, next(f)
+            assert line == "};\n", (lineno, line)
+            lineno, line = lineno+1, next(f)
+        self.states = states
+
+        # Parse the dfas
+        dfas = {}
+        mo = re.match(r"static dfa dfas\[(\d+)\] = {$", line)
+        assert mo, (lineno, line)
+        ndfas = int(mo.group(1))
+        for i in range(ndfas):
+            lineno, line = lineno+1, next(f)
+            mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$',
+                          line)
+            assert mo, (lineno, line)
+            symbol = mo.group(2)
+            number, x, y, z = list(map(int, mo.group(1, 3, 4, 5)))
+            assert self.symbol2number[symbol] == number, (lineno, line)
+            assert self.number2symbol[number] == symbol, (lineno, line)
+            assert x == 0, (lineno, line)
+            state = states[z]
+            assert y == len(state), (lineno, line)
+            lineno, line = lineno+1, next(f)
+            mo = re.match(r'\s+("(?:\\\d\d\d)*")},$', line)
+            assert mo, (lineno, line)
+            first = {}
+            rawbitset = eval(mo.group(1))
+            for i, c in enumerate(rawbitset):
+                byte = ord(c)
+                for j in range(8):
+                    if byte & (1<<j):
+                        first[i*8 + j] = 1
+            dfas[number] = (state, first)
+        lineno, line = lineno+1, next(f)
+        assert line == "};\n", (lineno, line)
+        self.dfas = dfas
+
+        # Parse the labels
+        labels = []
+        lineno, line = lineno+1, next(f)
+        mo = re.match(r"static label labels\[(\d+)\] = {$", line)
+        assert mo, (lineno, line)
+        nlabels = int(mo.group(1))
+        for i in range(nlabels):
+            lineno, line = lineno+1, next(f)
+            mo = re.match(r'\s+{(\d+), (0|"\w+")},$', line)
+            assert mo, (lineno, line)
+            x, y = mo.groups()
+            x = int(x)
+            if y == "0":
+                y = None
+            else:
+                y = eval(y)
+            labels.append((x, y))
+        lineno, line = lineno+1, next(f)
+        assert line == "};\n", (lineno, line)
+        self.labels = labels
+
+        # Parse the grammar struct
+        lineno, line = lineno+1, next(f)
+        assert line == "grammar _PyParser_Grammar = {\n", (lineno, line)
+        lineno, line = lineno+1, next(f)
+        mo = re.match(r"\s+(\d+),$", line)
+        assert mo, (lineno, line)
+        ndfas = int(mo.group(1))
+        assert ndfas == len(self.dfas)
+        lineno, line = lineno+1, next(f)
+        assert line == "\tdfas,\n", (lineno, line)
+        lineno, line = lineno+1, next(f)
+        mo = re.match(r"\s+{(\d+), labels},$", line)
+        assert mo, (lineno, line)
+        nlabels = int(mo.group(1))
+        assert nlabels == len(self.labels), (lineno, line)
+        lineno, line = lineno+1, next(f)
+        mo = re.match(r"\s+(\d+)$", line)
+        assert mo, (lineno, line)
+        start = int(mo.group(1))
+        assert start in self.number2symbol, (lineno, line)
+        self.start = start
+        lineno, line = lineno+1, next(f)
+        assert line == "};\n", (lineno, line)
+        try:
+            lineno, line = lineno+1, next(f)
+        except StopIteration:
+            pass
+        else:
+            assert 0, (lineno, line)
+
+    def finish_off(self):
+        """Create additional useful structures.  (Internal)."""
+        self.keywords = {} # map from keyword strings to arc labels
+        self.tokens = {}   # map from numeric token values to arc labels
+        for ilabel, (type, value) in enumerate(self.labels):
+            if type == token.NAME and value is not None:
+                self.keywords[value] = ilabel
+            elif value is None:
+                self.tokens[type] = ilabel
diff --git a/blib2to3/pgen2/driver.py b/blib2to3/pgen2/driver.py

new file mode 100644 (file)

index 0000000..cbc58e7
--- /dev/null
+++ b/blib2to3/pgen2/driver.py
@@ -0,0 +1,178 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+# Modifications:
+# Copyright 2006 Google, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Parser driver.
+
+This provides a high-level interface to parse a file into a syntax tree.
+
+"""
+
+__author__ = "Guido van Rossum <guido@python.org>"
+
+__all__ = ["Driver", "load_grammar"]
+
+# Python imports
+import codecs
+import io
+import os
+import logging
+import pkgutil
+import sys
+
+# Pgen imports
+from . import grammar, parse, token, tokenize, pgen
+
+
+class Driver(object):
+
+    def __init__(self, grammar, convert=None, logger=None):
+        self.grammar = grammar
+        if logger is None:
+            logger = logging.getLogger()
+        self.logger = logger
+        self.convert = convert
+
+    def parse_tokens(self, tokens, debug=False):
+        """Parse a series of tokens and return the syntax tree."""
+        # XXX Move the prefix computation into a wrapper around tokenize.
+        p = parse.Parser(self.grammar, self.convert)
+        p.setup()
+        lineno = 1
+        column = 0
+        type = value = start = end = line_text = None
+        prefix = ""
+        for quintuple in tokens:
+            type, value, start, end, line_text = quintuple
+            if start != (lineno, column):
+                assert (lineno, column) <= start, ((lineno, column), start)
+                s_lineno, s_column = start
+                if lineno < s_lineno:
+                    prefix += "\n" * (s_lineno - lineno)
+                    lineno = s_lineno
+                    column = 0
+                if column < s_column:
+                    prefix += line_text[column:s_column]
+                    column = s_column
+            if type in (tokenize.COMMENT, tokenize.NL):
+                prefix += value
+                lineno, column = end
+                if value.endswith("\n"):
+                    lineno += 1
+                    column = 0
+                continue
+            if type == token.OP:
+                type = grammar.opmap[value]
+            if debug:
+                self.logger.debug("%s %r (prefix=%r)",
+                                  token.tok_name[type], value, prefix)
+            if p.addtoken(type, value, (prefix, start)):
+                if debug:
+                    self.logger.debug("Stop.")
+                break
+            prefix = ""
+            lineno, column = end
+            if value.endswith("\n"):
+                lineno += 1
+                column = 0
+        else:
+            # We never broke out -- EOF is too soon (how can this happen???)
+            raise parse.ParseError("incomplete input",
+                                   type, value, (prefix, start))
+        return p.rootnode
+
+    def parse_stream_raw(self, stream, debug=False):
+        """Parse a stream and return the syntax tree."""
+        tokens = tokenize.generate_tokens(stream.readline)
+        return self.parse_tokens(tokens, debug)
+
+    def parse_stream(self, stream, debug=False):
+        """Parse a stream and return the syntax tree."""
+        return self.parse_stream_raw(stream, debug)
+
+    def parse_file(self, filename, encoding=None, debug=False):
+        """Parse a file and return the syntax tree."""
+        with io.open(filename, "r", encoding=encoding) as stream:
+            return self.parse_stream(stream, debug)
+
+    def parse_string(self, text, debug=False):
+        """Parse a string and return the syntax tree."""
+        tokens = tokenize.generate_tokens(io.StringIO(text).readline)
+        return self.parse_tokens(tokens, debug)
+
+
+def _generate_pickle_name(gt):
+    head, tail = os.path.splitext(gt)
+    if tail == ".txt":
+        tail = ""
+    return head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
+
+
+def load_grammar(gt="Grammar.txt", gp=None,
+                 save=True, force=False, logger=None):
+    """Load the grammar (maybe from a pickle)."""
+    if logger is None:
+        logger = logging.getLogger()
+    gp = _generate_pickle_name(gt) if gp is None else gp
+    if force or not _newer(gp, gt):
+        logger.info("Generating grammar tables from %s", gt)
+        g = pgen.generate_grammar(gt)
+        if save:
+            logger.info("Writing grammar tables to %s", gp)
+            try:
+                g.dump(gp)
+            except OSError as e:
+                logger.info("Writing failed: %s", e)
+    else:
+        g = grammar.Grammar()
+        g.load(gp)
+    return g
+
+
+def _newer(a, b):
+    """Inquire whether file a was written since file b."""
+    if not os.path.exists(a):
+        return False
+    if not os.path.exists(b):
+        return True
+    return os.path.getmtime(a) >= os.path.getmtime(b)
+
+
+def load_packaged_grammar(package, grammar_source):
+    """Normally, loads a pickled grammar by doing
+        pkgutil.get_data(package, pickled_grammar)
+    where *pickled_grammar* is computed from *grammar_source* by adding the
+    Python version and using a ``.pickle`` extension.
+
+    However, if *grammar_source* is an extant file, load_grammar(grammar_source)
+    is called instead. This facilitates using a packaged grammar file when needed
+    but preserves load_grammar's automatic regeneration behavior when possible.
+
+    """
+    if os.path.isfile(grammar_source):
+        return load_grammar(grammar_source)
+    pickled_name = _generate_pickle_name(os.path.basename(grammar_source))
+    data = pkgutil.get_data(package, pickled_name)
+    g = grammar.Grammar()
+    g.loads(data)
+    return g
+
+
+def main(*args):
+    """Main program, when run as a script: produce grammar pickle files.
+
+    Calls load_grammar for each argument, a path to a grammar text file.
+    """
+    if not args:
+        args = sys.argv[1:]
+    logging.basicConfig(level=logging.INFO, stream=sys.stdout,
+                        format='%(message)s')
+    for gt in args:
+        load_grammar(gt, save=True, force=True)
+    return True
+
+if __name__ == "__main__":
+    sys.exit(int(not main()))
diff --git a/blib2to3/pgen2/driver.pyi b/blib2to3/pgen2/driver.pyi

new file mode 100644 (file)

index 0000000..f098bf5
--- /dev/null
+++ b/blib2to3/pgen2/driver.pyi
@@ -0,0 +1,24 @@
+# Stubs for lib2to3.pgen2.driver (Python 3.6)
+
+import os
+import sys
+from typing import Any, Callable, IO, Iterable, List, Optional, Text, Tuple, Union
+
+from logging import Logger
+from blib2to3.pytree import _Convert, _NL
+from blib2to3.pgen2 import _Path
+from blib2to3.pgen2.grammar import Grammar
+
+
+class Driver:
+    grammar: Grammar
+    logger: Logger
+    convert: _Convert
+    def __init__(self, grammar: Grammar, convert: Optional[_Convert] = ..., logger: Optional[Logger] = ...) -> None: ...
+    def parse_tokens(self, tokens: Iterable[Any], debug: bool = ...) -> _NL: ...
+    def parse_stream_raw(self, stream: IO[Text], debug: bool = ...) -> _NL: ...
+    def parse_stream(self, stream: IO[Text], debug: bool = ...) -> _NL: ...
+    def parse_file(self, filename: _Path, encoding: Optional[Text] = ..., debug: bool = ...) -> _NL: ...
+    def parse_string(self, text: Text, debug: bool = ...) -> _NL: ...
+
+def load_grammar(gt: Text = ..., gp: Optional[Text] = ..., save: bool = ..., force: bool = ..., logger: Optional[Logger] = ...) -> Grammar: ...
diff --git a/blib2to3/pgen2/grammar.py b/blib2to3/pgen2/grammar.py

new file mode 100644 (file)

index 0000000..088c58b
--- /dev/null
+++ b/blib2to3/pgen2/grammar.py
@@ -0,0 +1,211 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""This module defines the data structures used to represent a grammar.
+
+These are a bit arcane because they are derived from the data
+structures used by Python's 'pgen' parser generator.
+
+There's also a table here mapping operators to their names in the
+token module; the Python tokenize module reports all operators as the
+fallback token code OP, but the parser needs the actual token code.
+
+"""
+
+# Python imports
+import collections
+import pickle
+
+# Local imports
+from . import token
+
+
+class Grammar(object):
+    """Pgen parsing tables conversion class.
+
+    Once initialized, this class supplies the grammar tables for the
+    parsing engine implemented by parse.py.  The parsing engine
+    accesses the instance variables directly.  The class here does not
+    provide initialization of the tables; several subclasses exist to
+    do this (see the conv and pgen modules).
+
+    The load() method reads the tables from a pickle file, which is
+    much faster than the other ways offered by subclasses.  The pickle
+    file is written by calling dump() (after loading the grammar
+    tables using a subclass).  The report() method prints a readable
+    representation of the tables to stdout, for debugging.
+
+    The instance variables are as follows:
+
+    symbol2number -- a dict mapping symbol names to numbers.  Symbol
+                     numbers are always 256 or higher, to distinguish
+                     them from token numbers, which are between 0 and
+                     255 (inclusive).
+
+    number2symbol -- a dict mapping numbers to symbol names;
+                     these two are each other's inverse.
+
+    states        -- a list of DFAs, where each DFA is a list of
+                     states, each state is a list of arcs, and each
+                     arc is a (i, j) pair where i is a label and j is
+                     a state number.  The DFA number is the index into
+                     this list.  (This name is slightly confusing.)
+                     Final states are represented by a special arc of
+                     the form (0, j) where j is its own state number.
+
+    dfas          -- a dict mapping symbol numbers to (DFA, first)
+                     pairs, where DFA is an item from the states list
+                     above, and first is a set of tokens that can
+                     begin this grammar rule (represented by a dict
+                     whose values are always 1).
+
+    labels        -- a list of (x, y) pairs where x is either a token
+                     number or a symbol number, and y is either None
+                     or a string; the strings are keywords.  The label
+                     number is the index in this list; label numbers
+                     are used to mark state transitions (arcs) in the
+                     DFAs.
+
+    start         -- the number of the grammar's start symbol.
+
+    keywords      -- a dict mapping keyword strings to arc labels.
+
+    tokens        -- a dict mapping token numbers to arc labels.
+
+    """
+
+    def __init__(self):
+        self.symbol2number = {}
+        self.number2symbol = {}
+        self.states = []
+        self.dfas = {}
+        self.labels = [(0, "EMPTY")]
+        self.keywords = {}
+        self.tokens = {}
+        self.symbol2label = {}
+        self.start = 256
+
+    def dump(self, filename):
+        """Dump the grammar tables to a pickle file.
+
+        dump() recursively changes all dict to OrderedDict, so the pickled file
+        is not exactly the same as what was passed in to dump(). load() uses the
+        pickled file to create the tables, but  only changes OrderedDict to dict
+        at the top level; it does not recursively change OrderedDict to dict.
+        So, the loaded tables are different from the original tables that were
+        passed to load() in that some of the OrderedDict (from the pickled file)
+        are not changed back to dict. For parsing, this has no effect on
+        performance because OrderedDict uses dict's __getitem__ with nothing in
+        between.
+        """
+        with open(filename, "wb") as f:
+            d = _make_deterministic(self.__dict__)
+            pickle.dump(d, f, 2)
+
+    def load(self, filename):
+        """Load the grammar tables from a pickle file."""
+        with open(filename, "rb") as f:
+            d = pickle.load(f)
+        self.__dict__.update(d)
+
+    def loads(self, pkl):
+        """Load the grammar tables from a pickle bytes object."""
+        self.__dict__.update(pickle.loads(pkl))
+
+    def copy(self):
+        """
+        Copy the grammar.
+        """
+        new = self.__class__()
+        for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
+                          "tokens", "symbol2label"):
+            setattr(new, dict_attr, getattr(self, dict_attr).copy())
+        new.labels = self.labels[:]
+        new.states = self.states[:]
+        new.start = self.start
+        return new
+
+    def report(self):
+        """Dump the grammar tables to standard output, for debugging."""
+        from pprint import pprint
+        print("s2n")
+        pprint(self.symbol2number)
+        print("n2s")
+        pprint(self.number2symbol)
+        print("states")
+        pprint(self.states)
+        print("dfas")
+        pprint(self.dfas)
+        print("labels")
+        pprint(self.labels)
+        print("start", self.start)
+
+
+def _make_deterministic(top):
+    if isinstance(top, dict):
+        return collections.OrderedDict(
+            sorted(((k, _make_deterministic(v)) for k, v in top.items())))
+    if isinstance(top, list):
+        return [_make_deterministic(e) for e in top]
+    if isinstance(top, tuple):
+        return tuple(_make_deterministic(e) for e in top)
+    return top
+
+
+# Map from operator to number (since tokenize doesn't do this)
+
+opmap_raw = """
+( LPAR
+) RPAR
+[ LSQB
+] RSQB
+: COLON
+, COMMA
+; SEMI
++ PLUS
+- MINUS
+* STAR
+/ SLASH
+| VBAR
+& AMPER
+< LESS
+> GREATER
+= EQUAL
+. DOT
+% PERCENT
+` BACKQUOTE
+{ LBRACE
+} RBRACE
+@ AT
+@= ATEQUAL
+== EQEQUAL
+!= NOTEQUAL
+<> NOTEQUAL
+<= LESSEQUAL
+>= GREATEREQUAL
+~ TILDE
+^ CIRCUMFLEX
+<< LEFTSHIFT
+>> RIGHTSHIFT
+** DOUBLESTAR
++= PLUSEQUAL
+-= MINEQUAL
+*= STAREQUAL
+/= SLASHEQUAL
+%= PERCENTEQUAL
+&= AMPEREQUAL
+|= VBAREQUAL
+^= CIRCUMFLEXEQUAL
+<<= LEFTSHIFTEQUAL
+>>= RIGHTSHIFTEQUAL
+**= DOUBLESTAREQUAL
+// DOUBLESLASH
+//= DOUBLESLASHEQUAL
+-> RARROW
+"""
+
+opmap = {}
+for line in opmap_raw.splitlines():
+    if line:
+        op, name = line.split()
+        opmap[op] = getattr(token, name)
diff --git a/blib2to3/pgen2/grammar.pyi b/blib2to3/pgen2/grammar.pyi

new file mode 100644 (file)

index 0000000..353086d
--- /dev/null
+++ b/blib2to3/pgen2/grammar.pyi
@@ -0,0 +1,29 @@
+# Stubs for lib2to3.pgen2.grammar (Python 3.6)
+
+from blib2to3.pgen2 import _Path
+
+from typing import Any, Dict, List, Optional, Text, Tuple, TypeVar
+
+_P = TypeVar('_P')
+_Label = Tuple[int, Optional[Text]]
+_DFA = List[List[Tuple[int, int]]]
+_DFAS = Tuple[_DFA, Dict[int, int]]
+
+class Grammar:
+    symbol2number: Dict[Text, int]
+    number2symbol: Dict[int, Text]
+    states: List[_DFA]
+    dfas: Dict[int, _DFAS]
+    labels: List[_Label]
+    keywords: Dict[Text, int]
+    tokens: Dict[int, int]
+    symbol2label: Dict[Text, int]
+    start: int
+    def __init__(self) -> None: ...
+    def dump(self, filename: _Path) -> None: ...
+    def load(self, filename: _Path) -> None: ...
+    def copy(self: _P) -> _P: ...
+    def report(self) -> None: ...
+
+opmap_raw: Text
+opmap: Dict[Text, Text]
diff --git a/blib2to3/pgen2/literals.py b/blib2to3/pgen2/literals.py

new file mode 100644 (file)

index 0000000..b9b63e6
--- /dev/null
+++ b/blib2to3/pgen2/literals.py
@@ -0,0 +1,60 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Safely evaluate Python string literals without using eval()."""
+
+import re
+
+simple_escapes = {"a": "\a",
+                  "b": "\b",
+                  "f": "\f",
+                  "n": "\n",
+                  "r": "\r",
+                  "t": "\t",
+                  "v": "\v",
+                  "'": "'",
+                  '"': '"',
+                  "\\": "\\"}
+
+def escape(m):
+    all, tail = m.group(0, 1)
+    assert all.startswith("\\")
+    esc = simple_escapes.get(tail)
+    if esc is not None:
+        return esc
+    if tail.startswith("x"):
+        hexes = tail[1:]
+        if len(hexes) < 2:
+            raise ValueError("invalid hex string escape ('\\%s')" % tail)
+        try:
+            i = int(hexes, 16)
+        except ValueError:
+            raise ValueError("invalid hex string escape ('\\%s')" % tail) from None
+    else:
+        try:
+            i = int(tail, 8)
+        except ValueError:
+            raise ValueError("invalid octal string escape ('\\%s')" % tail) from None
+    return chr(i)
+
+def evalString(s):
+    assert s.startswith("'") or s.startswith('"'), repr(s[:1])
+    q = s[0]
+    if s[:3] == q*3:
+        q = q*3
+    assert s.endswith(q), repr(s[-len(q):])
+    assert len(s) >= 2*len(q)
+    s = s[len(q):-len(q)]
+    return re.sub(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})", escape, s)
+
+def test():
+    for i in range(256):
+        c = chr(i)
+        s = repr(c)
+        e = evalString(s)
+        if e != c:
+            print(i, c, s, e)
+
+
+if __name__ == "__main__":
+    test()
diff --git a/blib2to3/pgen2/literals.pyi b/blib2to3/pgen2/literals.pyi

new file mode 100644 (file)

index 0000000..8719500
--- /dev/null
+++ b/blib2to3/pgen2/literals.pyi
@@ -0,0 +1,9 @@
+# Stubs for lib2to3.pgen2.literals (Python 3.6)
+
+from typing import Dict, Match, Text
+
+simple_escapes: Dict[Text, Text]
+
+def escape(m: Match) -> Text: ...
+def evalString(s: Text) -> Text: ...
+def test() -> None: ...
diff --git a/blib2to3/pgen2/parse.py b/blib2to3/pgen2/parse.py

new file mode 100644 (file)

index 0000000..6bebdbb
--- /dev/null
+++ b/blib2to3/pgen2/parse.py
@@ -0,0 +1,201 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Parser engine for the grammar tables generated by pgen.
+
+The grammar table must be loaded first.
+
+See Parser/parser.c in the Python distribution for additional info on
+how this parsing engine works.
+
+"""
+
+# Local imports
+from . import token
+
+class ParseError(Exception):
+    """Exception to signal the parser is stuck."""
+
+    def __init__(self, msg, type, value, context):
+        Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
+                           (msg, type, value, context))
+        self.msg = msg
+        self.type = type
+        self.value = value
+        self.context = context
+
+class Parser(object):
+    """Parser engine.
+
+    The proper usage sequence is:
+
+    p = Parser(grammar, [converter])  # create instance
+    p.setup([start])                  # prepare for parsing
+    <for each input token>:
+        if p.addtoken(...):           # parse a token; may raise ParseError
+            break
+    root = p.rootnode                 # root of abstract syntax tree
+
+    A Parser instance may be reused by calling setup() repeatedly.
+
+    A Parser instance contains state pertaining to the current token
+    sequence, and should not be used concurrently by different threads
+    to parse separate token sequences.
+
+    See driver.py for how to get input tokens by tokenizing a file or
+    string.
+
+    Parsing is complete when addtoken() returns True; the root of the
+    abstract syntax tree can then be retrieved from the rootnode
+    instance variable.  When a syntax error occurs, addtoken() raises
+    the ParseError exception.  There is no error recovery; the parser
+    cannot be used after a syntax error was reported (but it can be
+    reinitialized by calling setup()).
+
+    """
+
+    def __init__(self, grammar, convert=None):
+        """Constructor.
+
+        The grammar argument is a grammar.Grammar instance; see the
+        grammar module for more information.
+
+        The parser is not ready yet for parsing; you must call the
+        setup() method to get it started.
+
+        The optional convert argument is a function mapping concrete
+        syntax tree nodes to abstract syntax tree nodes.  If not
+        given, no conversion is done and the syntax tree produced is
+        the concrete syntax tree.  If given, it must be a function of
+        two arguments, the first being the grammar (a grammar.Grammar
+        instance), and the second being the concrete syntax tree node
+        to be converted.  The syntax tree is converted from the bottom
+        up.
+
+        A concrete syntax tree node is a (type, value, context, nodes)
+        tuple, where type is the node type (a token or symbol number),
+        value is None for symbols and a string for tokens, context is
+        None or an opaque value used for error reporting (typically a
+        (lineno, offset) pair), and nodes is a list of children for
+        symbols, and None for tokens.
+
+        An abstract syntax tree node may be anything; this is entirely
+        up to the converter function.
+
+        """
+        self.grammar = grammar
+        self.convert = convert or (lambda grammar, node: node)
+
+    def setup(self, start=None):
+        """Prepare for parsing.
+
+        This *must* be called before starting to parse.
+
+        The optional argument is an alternative start symbol; it
+        defaults to the grammar's start symbol.
+
+        You can use a Parser instance to parse any number of programs;
+        each time you call setup() the parser is reset to an initial
+        state determined by the (implicit or explicit) start symbol.
+
+        """
+        if start is None:
+            start = self.grammar.start
+        # Each stack entry is a tuple: (dfa, state, node).
+        # A node is a tuple: (type, value, context, children),
+        # where children is a list of nodes or None, and context may be None.
+        newnode = (start, None, None, [])
+        stackentry = (self.grammar.dfas[start], 0, newnode)
+        self.stack = [stackentry]
+        self.rootnode = None
+        self.used_names = set() # Aliased to self.rootnode.used_names in pop()
+
+    def addtoken(self, type, value, context):
+        """Add a token; return True iff this is the end of the program."""
+        # Map from token to label
+        ilabel = self.classify(type, value, context)
+        # Loop until the token is shifted; may raise exceptions
+        while True:
+            dfa, state, node = self.stack[-1]
+            states, first = dfa
+            arcs = states[state]
+            # Look for a state with this label
+            for i, newstate in arcs:
+                t, v = self.grammar.labels[i]
+                if ilabel == i:
+                    # Look it up in the list of labels
+                    assert t < 256
+                    # Shift a token; we're done with it
+                    self.shift(type, value, newstate, context)
+                    # Pop while we are in an accept-only state
+                    state = newstate
+                    while states[state] == [(0, state)]:
+                        self.pop()
+                        if not self.stack:
+                            # Done parsing!
+                            return True
+                        dfa, state, node = self.stack[-1]
+                        states, first = dfa
+                    # Done with this token
+                    return False
+                elif t >= 256:
+                    # See if it's a symbol and if we're in its first set
+                    itsdfa = self.grammar.dfas[t]
+                    itsstates, itsfirst = itsdfa
+                    if ilabel in itsfirst:
+                        # Push a symbol
+                        self.push(t, self.grammar.dfas[t], newstate, context)
+                        break # To continue the outer while loop
+            else:
+                if (0, state) in arcs:
+                    # An accepting state, pop it and try something else
+                    self.pop()
+                    if not self.stack:
+                        # Done parsing, but another token is input
+                        raise ParseError("too much input",
+                                         type, value, context)
+                else:
+                    # No success finding a transition
+                    raise ParseError("bad input", type, value, context)
+
+    def classify(self, type, value, context):
+        """Turn a token into a label.  (Internal)"""
+        if type == token.NAME:
+            # Keep a listing of all used names
+            self.used_names.add(value)
+            # Check for reserved words
+            ilabel = self.grammar.keywords.get(value)
+            if ilabel is not None:
+                return ilabel
+        ilabel = self.grammar.tokens.get(type)
+        if ilabel is None:
+            raise ParseError("bad token", type, value, context)
+        return ilabel
+
+    def shift(self, type, value, newstate, context):
+        """Shift a token.  (Internal)"""
+        dfa, state, node = self.stack[-1]
+        newnode = (type, value, context, None)
+        newnode = self.convert(self.grammar, newnode)
+        if newnode is not None:
+            node[-1].append(newnode)
+        self.stack[-1] = (dfa, newstate, node)
+
+    def push(self, type, newdfa, newstate, context):
+        """Push a nonterminal.  (Internal)"""
+        dfa, state, node = self.stack[-1]
+        newnode = (type, None, context, [])
+        self.stack[-1] = (dfa, newstate, node)
+        self.stack.append((newdfa, 0, newnode))
+
+    def pop(self):
+        """Pop a nonterminal.  (Internal)"""
+        popdfa, popstate, popnode = self.stack.pop()
+        newnode = self.convert(self.grammar, popnode)
+        if newnode is not None:
+            if self.stack:
+                dfa, state, node = self.stack[-1]
+                node[-1].append(newnode)
+            else:
+                self.rootnode = newnode
+                self.rootnode.used_names = self.used_names
diff --git a/blib2to3/pgen2/parse.pyi b/blib2to3/pgen2/parse.pyi

new file mode 100644 (file)

index 0000000..cbcf941
--- /dev/null
+++ b/blib2to3/pgen2/parse.pyi
@@ -0,0 +1,29 @@
+# Stubs for lib2to3.pgen2.parse (Python 3.6)
+
+from typing import Any, Dict, List, Optional, Sequence, Set, Text, Tuple
+
+from blib2to3.pgen2.grammar import Grammar, _DFAS
+from blib2to3.pytree import _NL, _Convert, _RawNode
+
+_Context = Sequence[Any]
+
+class ParseError(Exception):
+    msg: Text
+    type: int
+    value: Optional[Text]
+    context: _Context
+    def __init__(self, msg: Text, type: int, value: Optional[Text], context: _Context) -> None: ...
+
+class Parser:
+    grammar: Grammar
+    convert: _Convert
+    stack: List[Tuple[_DFAS, int, _RawNode]]
+    rootnode: Optional[_NL]
+    used_names: Set[Text]
+    def __init__(self, grammar: Grammar, convert: Optional[_Convert] = ...) -> None: ...
+    def setup(self, start: Optional[int] = ...) -> None: ...
+    def addtoken(self, type: int, value: Optional[Text], context: _Context) -> bool: ...
+    def classify(self, type: int, value: Optional[Text], context: _Context) -> int: ...
+    def shift(self, type: int, value: Optional[Text], newstate: int, context: _Context) -> None: ...
+    def push(self, type: int, newdfa: _DFAS, newstate: int, context: _Context) -> None: ...
+    def pop(self) -> None: ...
diff --git a/blib2to3/pgen2/pgen.py b/blib2to3/pgen2/pgen.py

new file mode 100644 (file)

index 0000000..b0cbd16
--- /dev/null
+++ b/blib2to3/pgen2/pgen.py
@@ -0,0 +1,386 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+# Pgen imports
+from . import grammar, token, tokenize
+
+class PgenGrammar(grammar.Grammar):
+    pass
+
+class ParserGenerator(object):
+
+    def __init__(self, filename, stream=None):
+        close_stream = None
+        if stream is None:
+            stream = open(filename)
+            close_stream = stream.close
+        self.filename = filename
+        self.stream = stream
+        self.generator = tokenize.generate_tokens(stream.readline)
+        self.gettoken() # Initialize lookahead
+        self.dfas, self.startsymbol = self.parse()
+        if close_stream is not None:
+            close_stream()
+        self.first = {} # map from symbol name to set of tokens
+        self.addfirstsets()
+
+    def make_grammar(self):
+        c = PgenGrammar()
+        names = list(self.dfas.keys())
+        names.sort()
+        names.remove(self.startsymbol)
+        names.insert(0, self.startsymbol)
+        for name in names:
+            i = 256 + len(c.symbol2number)
+            c.symbol2number[name] = i
+            c.number2symbol[i] = name
+        for name in names:
+            dfa = self.dfas[name]
+            states = []
+            for state in dfa:
+                arcs = []
+                for label, next in sorted(state.arcs.items()):
+                    arcs.append((self.make_label(c, label), dfa.index(next)))
+                if state.isfinal:
+                    arcs.append((0, dfa.index(state)))
+                states.append(arcs)
+            c.states.append(states)
+            c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name))
+        c.start = c.symbol2number[self.startsymbol]
+        return c
+
+    def make_first(self, c, name):
+        rawfirst = self.first[name]
+        first = {}
+        for label in sorted(rawfirst):
+            ilabel = self.make_label(c, label)
+            ##assert ilabel not in first # XXX failed on <> ... !=
+            first[ilabel] = 1
+        return first
+
+    def make_label(self, c, label):
+        # XXX Maybe this should be a method on a subclass of converter?
+        ilabel = len(c.labels)
+        if label[0].isalpha():
+            # Either a symbol name or a named token
+            if label in c.symbol2number:
+                # A symbol name (a non-terminal)
+                if label in c.symbol2label:
+                    return c.symbol2label[label]
+                else:
+                    c.labels.append((c.symbol2number[label], None))
+                    c.symbol2label[label] = ilabel
+                    return ilabel
+            else:
+                # A named token (NAME, NUMBER, STRING)
+                itoken = getattr(token, label, None)
+                assert isinstance(itoken, int), label
+                assert itoken in token.tok_name, label
+                if itoken in c.tokens:
+                    return c.tokens[itoken]
+                else:
+                    c.labels.append((itoken, None))
+                    c.tokens[itoken] = ilabel
+                    return ilabel
+        else:
+            # Either a keyword or an operator
+            assert label[0] in ('"', "'"), label
+            value = eval(label)
+            if value[0].isalpha():
+                # A keyword
+                if value in c.keywords:
+                    return c.keywords[value]
+                else:
+                    c.labels.append((token.NAME, value))
+                    c.keywords[value] = ilabel
+                    return ilabel
+            else:
+                # An operator (any non-numeric token)
+                itoken = grammar.opmap[value] # Fails if unknown token
+                if itoken in c.tokens:
+                    return c.tokens[itoken]
+                else:
+                    c.labels.append((itoken, None))
+                    c.tokens[itoken] = ilabel
+                    return ilabel
+
+    def addfirstsets(self):
+        names = list(self.dfas.keys())
+        names.sort()
+        for name in names:
+            if name not in self.first:
+                self.calcfirst(name)
+            #print name, self.first[name].keys()
+
+    def calcfirst(self, name):
+        dfa = self.dfas[name]
+        self.first[name] = None # dummy to detect left recursion
+        state = dfa[0]
+        totalset = {}
+        overlapcheck = {}
+        for label, next in state.arcs.items():
+            if label in self.dfas:
+                if label in self.first:
+                    fset = self.first[label]
+                    if fset is None:
+                        raise ValueError("recursion for rule %r" % name)
+                else:
+                    self.calcfirst(label)
+                    fset = self.first[label]
+                totalset.update(fset)
+                overlapcheck[label] = fset
+            else:
+                totalset[label] = 1
+                overlapcheck[label] = {label: 1}
+        inverse = {}
+        for label, itsfirst in overlapcheck.items():
+            for symbol in itsfirst:
+                if symbol in inverse:
+                    raise ValueError("rule %s is ambiguous; %s is in the"
+                                     " first sets of %s as well as %s" %
+                                     (name, symbol, label, inverse[symbol]))
+                inverse[symbol] = label
+        self.first[name] = totalset
+
+    def parse(self):
+        dfas = {}
+        startsymbol = None
+        # MSTART: (NEWLINE | RULE)* ENDMARKER
+        while self.type != token.ENDMARKER:
+            while self.type == token.NEWLINE:
+                self.gettoken()
+            # RULE: NAME ':' RHS NEWLINE
+            name = self.expect(token.NAME)
+            self.expect(token.OP, ":")
+            a, z = self.parse_rhs()
+            self.expect(token.NEWLINE)
+            #self.dump_nfa(name, a, z)
+            dfa = self.make_dfa(a, z)
+            #self.dump_dfa(name, dfa)
+            oldlen = len(dfa)
+            self.simplify_dfa(dfa)
+            newlen = len(dfa)
+            dfas[name] = dfa
+            #print name, oldlen, newlen
+            if startsymbol is None:
+                startsymbol = name
+        return dfas, startsymbol
+
+    def make_dfa(self, start, finish):
+        # To turn an NFA into a DFA, we define the states of the DFA
+        # to correspond to *sets* of states of the NFA.  Then do some
+        # state reduction.  Let's represent sets as dicts with 1 for
+        # values.
+        assert isinstance(start, NFAState)
+        assert isinstance(finish, NFAState)
+        def closure(state):
+            base = {}
+            addclosure(state, base)
+            return base
+        def addclosure(state, base):
+            assert isinstance(state, NFAState)
+            if state in base:
+                return
+            base[state] = 1
+            for label, next in state.arcs:
+                if label is None:
+                    addclosure(next, base)
+        states = [DFAState(closure(start), finish)]
+        for state in states: # NB states grows while we're iterating
+            arcs = {}
+            for nfastate in state.nfaset:
+                for label, next in nfastate.arcs:
+                    if label is not None:
+                        addclosure(next, arcs.setdefault(label, {}))
+            for label, nfaset in sorted(arcs.items()):
+                for st in states:
+                    if st.nfaset == nfaset:
+                        break
+                else:
+                    st = DFAState(nfaset, finish)
+                    states.append(st)
+                state.addarc(st, label)
+        return states # List of DFAState instances; first one is start
+
+    def dump_nfa(self, name, start, finish):
+        print("Dump of NFA for", name)
+        todo = [start]
+        for i, state in enumerate(todo):
+            print("  State", i, state is finish and "(final)" or "")
+            for label, next in state.arcs:
+                if next in todo:
+                    j = todo.index(next)
+                else:
+                    j = len(todo)
+                    todo.append(next)
+                if label is None:
+                    print("    -> %d" % j)
+                else:
+                    print("    %s -> %d" % (label, j))
+
+    def dump_dfa(self, name, dfa):
+        print("Dump of DFA for", name)
+        for i, state in enumerate(dfa):
+            print("  State", i, state.isfinal and "(final)" or "")
+            for label, next in sorted(state.arcs.items()):
+                print("    %s -> %d" % (label, dfa.index(next)))
+
+    def simplify_dfa(self, dfa):
+        # This is not theoretically optimal, but works well enough.
+        # Algorithm: repeatedly look for two states that have the same
+        # set of arcs (same labels pointing to the same nodes) and
+        # unify them, until things stop changing.
+
+        # dfa is a list of DFAState instances
+        changes = True
+        while changes:
+            changes = False
+            for i, state_i in enumerate(dfa):
+                for j in range(i+1, len(dfa)):
+                    state_j = dfa[j]
+                    if state_i == state_j:
+                        #print "  unify", i, j
+                        del dfa[j]
+                        for state in dfa:
+                            state.unifystate(state_j, state_i)
+                        changes = True
+                        break
+
+    def parse_rhs(self):
+        # RHS: ALT ('|' ALT)*
+        a, z = self.parse_alt()
+        if self.value != "|":
+            return a, z
+        else:
+            aa = NFAState()
+            zz = NFAState()
+            aa.addarc(a)
+            z.addarc(zz)
+            while self.value == "|":
+                self.gettoken()
+                a, z = self.parse_alt()
+                aa.addarc(a)
+                z.addarc(zz)
+            return aa, zz
+
+    def parse_alt(self):
+        # ALT: ITEM+
+        a, b = self.parse_item()
+        while (self.value in ("(", "[") or
+               self.type in (token.NAME, token.STRING)):
+            c, d = self.parse_item()
+            b.addarc(c)
+            b = d
+        return a, b
+
+    def parse_item(self):
+        # ITEM: '[' RHS ']' | ATOM ['+' | '*']
+        if self.value == "[":
+            self.gettoken()
+            a, z = self.parse_rhs()
+            self.expect(token.OP, "]")
+            a.addarc(z)
+            return a, z
+        else:
+            a, z = self.parse_atom()
+            value = self.value
+            if value not in ("+", "*"):
+                return a, z
+            self.gettoken()
+            z.addarc(a)
+            if value == "+":
+                return a, z
+            else:
+                return a, a
+
+    def parse_atom(self):
+        # ATOM: '(' RHS ')' | NAME | STRING
+        if self.value == "(":
+            self.gettoken()
+            a, z = self.parse_rhs()
+            self.expect(token.OP, ")")
+            return a, z
+        elif self.type in (token.NAME, token.STRING):
+            a = NFAState()
+            z = NFAState()
+            a.addarc(z, self.value)
+            self.gettoken()
+            return a, z
+        else:
+            self.raise_error("expected (...) or NAME or STRING, got %s/%s",
+                             self.type, self.value)
+
+    def expect(self, type, value=None):
+        if self.type != type or (value is not None and self.value != value):
+            self.raise_error("expected %s/%s, got %s/%s",
+                             type, value, self.type, self.value)
+        value = self.value
+        self.gettoken()
+        return value
+
+    def gettoken(self):
+        tup = next(self.generator)
+        while tup[0] in (tokenize.COMMENT, tokenize.NL):
+            tup = next(self.generator)
+        self.type, self.value, self.begin, self.end, self.line = tup
+        #print token.tok_name[self.type], repr(self.value)
+
+    def raise_error(self, msg, *args):
+        if args:
+            try:
+                msg = msg % args
+            except:
+                msg = " ".join([msg] + list(map(str, args)))
+        raise SyntaxError(msg, (self.filename, self.end[0],
+                                self.end[1], self.line))
+
+class NFAState(object):
+
+    def __init__(self):
+        self.arcs = [] # list of (label, NFAState) pairs
+
+    def addarc(self, next, label=None):
+        assert label is None or isinstance(label, str)
+        assert isinstance(next, NFAState)
+        self.arcs.append((label, next))
+
+class DFAState(object):
+
+    def __init__(self, nfaset, final):
+        assert isinstance(nfaset, dict)
+        assert isinstance(next(iter(nfaset)), NFAState)
+        assert isinstance(final, NFAState)
+        self.nfaset = nfaset
+        self.isfinal = final in nfaset
+        self.arcs = {} # map from label to DFAState
+
+    def addarc(self, next, label):
+        assert isinstance(label, str)
+        assert label not in self.arcs
+        assert isinstance(next, DFAState)
+        self.arcs[label] = next
+
+    def unifystate(self, old, new):
+        for label, next in self.arcs.items():
+            if next is old:
+                self.arcs[label] = new
+
+    def __eq__(self, other):
+        # Equality test -- ignore the nfaset instance variable
+        assert isinstance(other, DFAState)
+        if self.isfinal != other.isfinal:
+            return False
+        # Can't just return self.arcs == other.arcs, because that
+        # would invoke this method recursively, with cycles...
+        if len(self.arcs) != len(other.arcs):
+            return False
+        for label, next in self.arcs.items():
+            if next is not other.arcs.get(label):
+                return False
+        return True
+
+    __hash__ = None # For Py3 compatibility.
+
+def generate_grammar(filename="Grammar.txt"):
+    p = ParserGenerator(filename)
+    return p.make_grammar()
diff --git a/blib2to3/pgen2/pgen.pyi b/blib2to3/pgen2/pgen.pyi

new file mode 100644 (file)

index 0000000..1529ad0
--- /dev/null
+++ b/blib2to3/pgen2/pgen.pyi
@@ -0,0 +1,49 @@
+# Stubs for lib2to3.pgen2.pgen (Python 3.6)
+
+from typing import Any, Dict, IO, Iterable, Iterator, List, Optional, Text, Tuple
+from mypy_extensions import NoReturn
+
+from blib2to3.pgen2 import _Path, grammar
+from blib2to3.pgen2.tokenize import _TokenInfo
+
+class PgenGrammar(grammar.Grammar): ...
+
+class ParserGenerator:
+    filename: _Path
+    stream: IO[Text]
+    generator: Iterator[_TokenInfo]
+    first: Dict[Text, Dict[Text, int]]
+    def __init__(self, filename: _Path, stream: Optional[IO[Text]] = ...) -> None: ...
+    def make_grammar(self) -> PgenGrammar: ...
+    def make_first(self, c: PgenGrammar, name: Text) -> Dict[int, int]: ...
+    def make_label(self, c: PgenGrammar, label: Text) -> int: ...
+    def addfirstsets(self) -> None: ...
+    def calcfirst(self, name: Text) -> None: ...
+    def parse(self) -> Tuple[Dict[Text, List[DFAState]], Text]: ...
+    def make_dfa(self, start: NFAState, finish: NFAState) -> List[DFAState]: ...
+    def dump_nfa(self, name: Text, start: NFAState, finish: NFAState) -> List[DFAState]: ...
+    def dump_dfa(self, name: Text, dfa: Iterable[DFAState]) -> None: ...
+    def simplify_dfa(self, dfa: List[DFAState]) -> None: ...
+    def parse_rhs(self) -> Tuple[NFAState, NFAState]: ...
+    def parse_alt(self) -> Tuple[NFAState, NFAState]: ...
+    def parse_item(self) -> Tuple[NFAState, NFAState]: ...
+    def parse_atom(self) -> Tuple[NFAState, NFAState]: ...
+    def expect(self, type: int, value: Optional[Any] = ...) -> Text: ...
+    def gettoken(self) -> None: ...
+    def raise_error(self, msg: str, *args: Any) -> NoReturn: ...
+
+class NFAState:
+    arcs: List[Tuple[Optional[Text], NFAState]]
+    def __init__(self) -> None: ...
+    def addarc(self, next: NFAState, label: Optional[Text] = ...) -> None: ...
+
+class DFAState:
+    nfaset: Dict[NFAState, Any]
+    isfinal: bool
+    arcs: Dict[Text, DFAState]
+    def __init__(self, nfaset: Dict[NFAState, Any], final: NFAState) -> None: ...
+    def addarc(self, next: DFAState, label: Text) -> None: ...
+    def unifystate(self, old: DFAState, new: DFAState) -> None: ...
+    def __eq__(self, other: Any) -> bool: ...
+
+def generate_grammar(filename: _Path = ...) -> PgenGrammar: ...
diff --git a/blib2to3/pgen2/token.py b/blib2to3/pgen2/token.py

new file mode 100755 (executable)

index 0000000..7599396
--- /dev/null
+++ b/blib2to3/pgen2/token.py
@@ -0,0 +1,83 @@
+#! /usr/bin/env python3
+
+"""Token constants (from "token.h")."""
+
+#  Taken from Python (r53757) and modified to include some tokens
+#   originally monkeypatched in by pgen2.tokenize
+
+#--start constants--
+ENDMARKER = 0
+NAME = 1
+NUMBER = 2
+STRING = 3
+NEWLINE = 4
+INDENT = 5
+DEDENT = 6
+LPAR = 7
+RPAR = 8
+LSQB = 9
+RSQB = 10
+COLON = 11
+COMMA = 12
+SEMI = 13
+PLUS = 14
+MINUS = 15
+STAR = 16
+SLASH = 17
+VBAR = 18
+AMPER = 19
+LESS = 20
+GREATER = 21
+EQUAL = 22
+DOT = 23
+PERCENT = 24
+BACKQUOTE = 25
+LBRACE = 26
+RBRACE = 27
+EQEQUAL = 28
+NOTEQUAL = 29
+LESSEQUAL = 30
+GREATEREQUAL = 31
+TILDE = 32
+CIRCUMFLEX = 33
+LEFTSHIFT = 34
+RIGHTSHIFT = 35
+DOUBLESTAR = 36
+PLUSEQUAL = 37
+MINEQUAL = 38
+STAREQUAL = 39
+SLASHEQUAL = 40
+PERCENTEQUAL = 41
+AMPEREQUAL = 42
+VBAREQUAL = 43
+CIRCUMFLEXEQUAL = 44
+LEFTSHIFTEQUAL = 45
+RIGHTSHIFTEQUAL = 46
+DOUBLESTAREQUAL = 47
+DOUBLESLASH = 48
+DOUBLESLASHEQUAL = 49
+AT = 50
+ATEQUAL = 51
+OP = 52
+COMMENT = 53
+NL = 54
+RARROW = 55
+ERRORTOKEN = 56
+N_TOKENS = 57
+NT_OFFSET = 256
+#--end constants--
+
+tok_name = {}
+for _name, _value in list(globals().items()):
+    if type(_value) is type(0):
+        tok_name[_value] = _name
+
+
+def ISTERMINAL(x):
+    return x < NT_OFFSET
+
+def ISNONTERMINAL(x):
+    return x >= NT_OFFSET
+
+def ISEOF(x):
+    return x == ENDMARKER
diff --git a/blib2to3/pgen2/token.pyi b/blib2to3/pgen2/token.pyi

new file mode 100644 (file)

index 0000000..c256af8
--- /dev/null
+++ b/blib2to3/pgen2/token.pyi
@@ -0,0 +1,73 @@
+# Stubs for lib2to3.pgen2.token (Python 3.6)
+
+import sys
+from typing import Dict, Text
+
+ENDMARKER: int
+NAME: int
+NUMBER: int
+STRING: int
+NEWLINE: int
+INDENT: int
+DEDENT: int
+LPAR: int
+RPAR: int
+LSQB: int
+RSQB: int
+COLON: int
+COMMA: int
+SEMI: int
+PLUS: int
+MINUS: int
+STAR: int
+SLASH: int
+VBAR: int
+AMPER: int
+LESS: int
+GREATER: int
+EQUAL: int
+DOT: int
+PERCENT: int
+BACKQUOTE: int
+LBRACE: int
+RBRACE: int
+EQEQUAL: int
+NOTEQUAL: int
+LESSEQUAL: int
+GREATEREQUAL: int
+TILDE: int
+CIRCUMFLEX: int
+LEFTSHIFT: int
+RIGHTSHIFT: int
+DOUBLESTAR: int
+PLUSEQUAL: int
+MINEQUAL: int
+STAREQUAL: int
+SLASHEQUAL: int
+PERCENTEQUAL: int
+AMPEREQUAL: int
+VBAREQUAL: int
+CIRCUMFLEXEQUAL: int
+LEFTSHIFTEQUAL: int
+RIGHTSHIFTEQUAL: int
+DOUBLESTAREQUAL: int
+DOUBLESLASH: int
+DOUBLESLASHEQUAL: int
+OP: int
+COMMENT: int
+NL: int
+if sys.version_info >= (3,):
+    RARROW: int
+if sys.version_info >= (3, 5):
+    AT: int
+    ATEQUAL: int
+    AWAIT: int
+    ASYNC: int
+ERRORTOKEN: int
+N_TOKENS: int
+NT_OFFSET: int
+tok_name: Dict[int, Text]
+
+def ISTERMINAL(x: int) -> bool: ...
+def ISNONTERMINAL(x: int) -> bool: ...
+def ISEOF(x: int) -> bool: ...
diff --git a/blib2to3/pgen2/tokenize.py b/blib2to3/pgen2/tokenize.py

new file mode 100644 (file)

index 0000000..14560e4
--- /dev/null
+++ b/blib2to3/pgen2/tokenize.py
@@ -0,0 +1,518 @@
+# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation.
+# All rights reserved.
+
+"""Tokenization help for Python programs.
+
+generate_tokens(readline) is a generator that breaks a stream of
+text into Python tokens.  It accepts a readline-like method which is called
+repeatedly to get the next line of input (or "" for EOF).  It generates
+5-tuples with these members:
+
+    the token type (see token.py)
+    the token (a string)
+    the starting (row, column) indices of the token (a 2-tuple of ints)
+    the ending (row, column) indices of the token (a 2-tuple of ints)
+    the original line (string)
+
+It is designed to match the working of the Python tokenizer exactly, except
+that it produces COMMENT tokens for comments and gives type OP for all
+operators
+
+Older entry points
+    tokenize_loop(readline, tokeneater)
+    tokenize(readline, tokeneater=printtoken)
+are the same, except instead of generating tokens, tokeneater is a callback
+function to which the 5 fields described above are passed as 5 arguments,
+each time a new token is found."""
+
+__author__ = 'Ka-Ping Yee <ping@lfw.org>'
+__credits__ = \
+    'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
+
+import string, re
+from codecs import BOM_UTF8, lookup
+from lib2to3.pgen2.token import *
+
+from . import token
+__all__ = [x for x in dir(token) if x[0] != '_'] + ["tokenize",
+           "generate_tokens", "untokenize"]
+del token
+
+try:
+    bytes
+except NameError:
+    # Support bytes type in Python <= 2.5, so 2to3 turns itself into
+    # valid Python 3 code.
+    bytes = str
+
+def group(*choices): return '(' + '|'.join(choices) + ')'
+def any(*choices): return group(*choices) + '*'
+def maybe(*choices): return group(*choices) + '?'
+
+Whitespace = r'[ \f\t]*'
+Comment = r'#[^\r\n]*'
+Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
+Name = r'[a-zA-Z_]\w*'
+
+Binnumber = r'0[bB]_?[01]+(?:_[01]+)*'
+Hexnumber = r'0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?'
+Octnumber = r'0[oO]?_?[0-7]+(?:_[0-7]+)*[lL]?'
+Decnumber = group(r'[1-9]\d*(?:_\d+)*[lL]?', '0[lL]?')
+Intnumber = group(Binnumber, Hexnumber, Octnumber, Decnumber)
+Exponent = r'[eE][-+]?\d+(?:_\d+)*'
+Pointfloat = group(r'\d+(?:_\d+)*\.(?:\d+(?:_\d+)*)?', r'\.\d+(?:_\d+)*') + maybe(Exponent)
+Expfloat = r'\d+(?:_\d+)*' + Exponent
+Floatnumber = group(Pointfloat, Expfloat)
+Imagnumber = group(r'\d+(?:_\d+)*[jJ]', Floatnumber + r'[jJ]')
+Number = group(Imagnumber, Floatnumber, Intnumber)
+
+# Tail end of ' string.
+Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
+# Tail end of " string.
+Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
+# Tail end of ''' string.
+Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
+# Tail end of """ string.
+Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
+_litprefix = r"(?:[uUrRbBfF]|[rR][bB]|[bBuU][rR])?"
+Triple = group(_litprefix + "'''", _litprefix + '"""')
+# Single-line ' or " string.
+String = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
+               _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
+
+# Because of leftmost-then-longest match semantics, be sure to put the
+# longest operators first (e.g., if = came before ==, == would get
+# recognized as two instances of =).
+Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",
+                 r"//=?", r"->",
+                 r"[+\-*/%&@|^=<>]=?",
+                 r"~")
+
+Bracket = '[][(){}]'
+Special = group(r'\r?\n', r'[:;.,`@]')
+Funny = group(Operator, Bracket, Special)
+
+PlainToken = group(Number, Funny, String, Name)
+Token = Ignore + PlainToken
+
+# First (or only) line of ' or " string.
+ContStr = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
+                group("'", r'\\\r?\n'),
+                _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
+                group('"', r'\\\r?\n'))
+PseudoExtras = group(r'\\\r?\n', Comment, Triple)
+PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
+
+tokenprog, pseudoprog, single3prog, double3prog = list(map(
+    re.compile, (Token, PseudoToken, Single3, Double3)))
+endprogs = {"'": re.compile(Single), '"': re.compile(Double),
+            "'''": single3prog, '"""': double3prog,
+            "r'''": single3prog, 'r"""': double3prog,
+            "u'''": single3prog, 'u"""': double3prog,
+            "b'''": single3prog, 'b"""': double3prog,
+            "f'''": single3prog, 'f"""': double3prog,
+            "ur'''": single3prog, 'ur"""': double3prog,
+            "br'''": single3prog, 'br"""': double3prog,
+            "rb'''": single3prog, 'rb"""': double3prog,
+            "R'''": single3prog, 'R"""': double3prog,
+            "U'''": single3prog, 'U"""': double3prog,
+            "B'''": single3prog, 'B"""': double3prog,
+            "F'''": single3prog, 'F"""': double3prog,
+            "uR'''": single3prog, 'uR"""': double3prog,
+            "Ur'''": single3prog, 'Ur"""': double3prog,
+            "UR'''": single3prog, 'UR"""': double3prog,
+            "bR'''": single3prog, 'bR"""': double3prog,
+            "Br'''": single3prog, 'Br"""': double3prog,
+            "BR'''": single3prog, 'BR"""': double3prog,
+            "rB'''": single3prog, 'rB"""': double3prog,
+            "Rb'''": single3prog, 'Rb"""': double3prog,
+            "RB'''": single3prog, 'RB"""': double3prog,
+            'r': None, 'R': None,
+            'u': None, 'U': None,
+            'f': None, 'F': None,
+            'b': None, 'B': None}
+
+triple_quoted = {}
+for t in ("'''", '"""',
+          "r'''", 'r"""', "R'''", 'R"""',
+          "u'''", 'u"""', "U'''", 'U"""',
+          "b'''", 'b"""', "B'''", 'B"""',
+          "f'''", 'f"""', "F'''", 'F"""',
+          "ur'''", 'ur"""', "Ur'''", 'Ur"""',
+          "uR'''", 'uR"""', "UR'''", 'UR"""',
+          "br'''", 'br"""', "Br'''", 'Br"""',
+          "bR'''", 'bR"""', "BR'''", 'BR"""',
+          "rb'''", 'rb"""', "Rb'''", 'Rb"""',
+          "rB'''", 'rB"""', "RB'''", 'RB"""',):
+    triple_quoted[t] = t
+single_quoted = {}
+for t in ("'", '"',
+          "r'", 'r"', "R'", 'R"',
+          "u'", 'u"', "U'", 'U"',
+          "b'", 'b"', "B'", 'B"',
+          "f'", 'f"', "F'", 'F"',
+          "ur'", 'ur"', "Ur'", 'Ur"',
+          "uR'", 'uR"', "UR'", 'UR"',
+          "br'", 'br"', "Br'", 'Br"',
+          "bR'", 'bR"', "BR'", 'BR"',
+          "rb'", 'rb"', "Rb'", 'Rb"',
+          "rB'", 'rB"', "RB'", 'RB"',):
+    single_quoted[t] = t
+
+tabsize = 8
+
+class TokenError(Exception): pass
+
+class StopTokenizing(Exception): pass
+
+def printtoken(type, token, xxx_todo_changeme, xxx_todo_changeme1, line): # for testing
+    (srow, scol) = xxx_todo_changeme
+    (erow, ecol) = xxx_todo_changeme1
+    print("%d,%d-%d,%d:\t%s\t%s" % \
+        (srow, scol, erow, ecol, tok_name[type], repr(token)))
+
+def tokenize(readline, tokeneater=printtoken):
+    """
+    The tokenize() function accepts two parameters: one representing the
+    input stream, and one providing an output mechanism for tokenize().
+
+    The first parameter, readline, must be a callable object which provides
+    the same interface as the readline() method of built-in file objects.
+    Each call to the function should return one line of input as a string.
+
+    The second parameter, tokeneater, must also be a callable object. It is
+    called once for each token, with five arguments, corresponding to the
+    tuples generated by generate_tokens().
+    """
+    try:
+        tokenize_loop(readline, tokeneater)
+    except StopTokenizing:
+        pass
+
+# backwards compatible interface
+def tokenize_loop(readline, tokeneater):
+    for token_info in generate_tokens(readline):
+        tokeneater(*token_info)
+
+class Untokenizer:
+
+    def __init__(self):
+        self.tokens = []
+        self.prev_row = 1
+        self.prev_col = 0
+
+    def add_whitespace(self, start):
+        row, col = start
+        assert row <= self.prev_row
+        col_offset = col - self.prev_col
+        if col_offset:
+            self.tokens.append(" " * col_offset)
+
+    def untokenize(self, iterable):
+        for t in iterable:
+            if len(t) == 2:
+                self.compat(t, iterable)
+                break
+            tok_type, token, start, end, line = t
+            self.add_whitespace(start)
+            self.tokens.append(token)
+            self.prev_row, self.prev_col = end
+            if tok_type in (NEWLINE, NL):
+                self.prev_row += 1
+                self.prev_col = 0
+        return "".join(self.tokens)
+
+    def compat(self, token, iterable):
+        startline = False
+        indents = []
+        toks_append = self.tokens.append
+        toknum, tokval = token
+        if toknum in (NAME, NUMBER):
+            tokval += ' '
+        if toknum in (NEWLINE, NL):
+            startline = True
+        for tok in iterable:
+            toknum, tokval = tok[:2]
+
+            if toknum in (NAME, NUMBER):
+                tokval += ' '
+
+            if toknum == INDENT:
+                indents.append(tokval)
+                continue
+            elif toknum == DEDENT:
+                indents.pop()
+                continue
+            elif toknum in (NEWLINE, NL):
+                startline = True
+            elif startline and indents:
+                toks_append(indents[-1])
+                startline = False
+            toks_append(tokval)
+
+cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
+blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
+
+def _get_normal_name(orig_enc):
+    """Imitates get_normal_name in tokenizer.c."""
+    # Only care about the first 12 characters.
+    enc = orig_enc[:12].lower().replace("_", "-")
+    if enc == "utf-8" or enc.startswith("utf-8-"):
+        return "utf-8"
+    if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
+       enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
+        return "iso-8859-1"
+    return orig_enc
+
+def detect_encoding(readline):
+    """
+    The detect_encoding() function is used to detect the encoding that should
+    be used to decode a Python source file. It requires one argument, readline,
+    in the same way as the tokenize() generator.
+
+    It will call readline a maximum of twice, and return the encoding used
+    (as a string) and a list of any lines (left as bytes) it has read
+    in.
+
+    It detects the encoding from the presence of a utf-8 bom or an encoding
+    cookie as specified in pep-0263. If both a bom and a cookie are present, but
+    disagree, a SyntaxError will be raised. If the encoding cookie is an invalid
+    charset, raise a SyntaxError.  Note that if a utf-8 bom is found,
+    'utf-8-sig' is returned.
+
+    If no encoding is specified, then the default of 'utf-8' will be returned.
+    """
+    bom_found = False
+    encoding = None
+    default = 'utf-8'
+    def read_or_stop():
+        try:
+            return readline()
+        except StopIteration:
+            return bytes()
+
+    def find_cookie(line):
+        try:
+            line_string = line.decode('ascii')
+        except UnicodeDecodeError:
+            return None
+        match = cookie_re.match(line_string)
+        if not match:
+            return None
+        encoding = _get_normal_name(match.group(1))
+        try:
+            codec = lookup(encoding)
+        except LookupError:
+            # This behaviour mimics the Python interpreter
+            raise SyntaxError("unknown encoding: " + encoding)
+
+        if bom_found:
+            if codec.name != 'utf-8':
+                # This behaviour mimics the Python interpreter
+                raise SyntaxError('encoding problem: utf-8')
+            encoding += '-sig'
+        return encoding
+
+    first = read_or_stop()
+    if first.startswith(BOM_UTF8):
+        bom_found = True
+        first = first[3:]
+        default = 'utf-8-sig'
+    if not first:
+        return default, []
+
+    encoding = find_cookie(first)
+    if encoding:
+        return encoding, [first]
+    if not blank_re.match(first):
+        return default, [first]
+
+    second = read_or_stop()
+    if not second:
+        return default, [first]
+
+    encoding = find_cookie(second)
+    if encoding:
+        return encoding, [first, second]
+
+    return default, [first, second]
+
+def untokenize(iterable):
+    """Transform tokens back into Python source code.
+
+    Each element returned by the iterable must be a token sequence
+    with at least two elements, a token number and token value.  If
+    only two tokens are passed, the resulting output is poor.
+
+    Round-trip invariant for full input:
+        Untokenized source will match input source exactly
+
+    Round-trip invariant for limited intput:
+        # Output text will tokenize the back to the input
+        t1 = [tok[:2] for tok in generate_tokens(f.readline)]
+        newcode = untokenize(t1)
+        readline = iter(newcode.splitlines(1)).next
+        t2 = [tok[:2] for tokin generate_tokens(readline)]
+        assert t1 == t2
+    """
+    ut = Untokenizer()
+    return ut.untokenize(iterable)
+
+def generate_tokens(readline):
+    """
+    The generate_tokens() generator requires one argument, readline, which
+    must be a callable object which provides the same interface as the
+    readline() method of built-in file objects. Each call to the function
+    should return one line of input as a string.  Alternately, readline
+    can be a callable function terminating with StopIteration:
+        readline = open(myfile).next    # Example of alternate readline
+
+    The generator produces 5-tuples with these members: the token type; the
+    token string; a 2-tuple (srow, scol) of ints specifying the row and
+    column where the token begins in the source; a 2-tuple (erow, ecol) of
+    ints specifying the row and column where the token ends in the source;
+    and the line on which the token was found. The line passed is the
+    logical line; continuation lines are included.
+    """
+    lnum = parenlev = continued = 0
+    namechars, numchars = string.ascii_letters + '_', '0123456789'
+    contstr, needcont = '', 0
+    contline = None
+    indents = [0]
+
+    while 1:                                   # loop over lines in stream
+        try:
+            line = readline()
+        except StopIteration:
+            line = ''
+        lnum = lnum + 1
+        pos, max = 0, len(line)
+
+        if contstr:                            # continued string
+            if not line:
+                raise TokenError("EOF in multi-line string", strstart)
+            endmatch = endprog.match(line)
+            if endmatch:
+                pos = end = endmatch.end(0)
+                yield (STRING, contstr + line[:end],
+                       strstart, (lnum, end), contline + line)
+                contstr, needcont = '', 0
+                contline = None
+            elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
+                yield (ERRORTOKEN, contstr + line,
+                           strstart, (lnum, len(line)), contline)
+                contstr = ''
+                contline = None
+                continue
+            else:
+                contstr = contstr + line
+                contline = contline + line
+                continue
+
+        elif parenlev == 0 and not continued:  # new statement
+            if not line: break
+            column = 0
+            while pos < max:                   # measure leading whitespace
+                if line[pos] == ' ': column = column + 1
+                elif line[pos] == '\t': column = (column//tabsize + 1)*tabsize
+                elif line[pos] == '\f': column = 0
+                else: break
+                pos = pos + 1
+            if pos == max: break
+
+            if line[pos] in '#\r\n':           # skip comments or blank lines
+                if line[pos] == '#':
+                    comment_token = line[pos:].rstrip('\r\n')
+                    nl_pos = pos + len(comment_token)
+                    yield (COMMENT, comment_token,
+                           (lnum, pos), (lnum, pos + len(comment_token)), line)
+                    yield (NL, line[nl_pos:],
+                           (lnum, nl_pos), (lnum, len(line)), line)
+                else:
+                    yield ((NL, COMMENT)[line[pos] == '#'], line[pos:],
+                           (lnum, pos), (lnum, len(line)), line)
+                continue
+
+            if column > indents[-1]:           # count indents or dedents
+                indents.append(column)
+                yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
+            while column < indents[-1]:
+                if column not in indents:
+                    raise IndentationError(
+                        "unindent does not match any outer indentation level",
+                        ("<tokenize>", lnum, pos, line))
+                indents = indents[:-1]
+
+                yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
+
+        else:                                  # continued statement
+            if not line:
+                raise TokenError("EOF in multi-line statement", (lnum, 0))
+            continued = 0
+
+        while pos < max:
+            pseudomatch = pseudoprog.match(line, pos)
+            if pseudomatch:                                # scan for tokens
+                start, end = pseudomatch.span(1)
+                spos, epos, pos = (lnum, start), (lnum, end), end
+                token, initial = line[start:end], line[start]
+
+                if initial in numchars or \
+                   (initial == '.' and token != '.'):      # ordinary number
+                    yield (NUMBER, token, spos, epos, line)
+                elif initial in '\r\n':
+                    newline = NEWLINE
+                    if parenlev > 0:
+                        newline = NL
+                    yield (newline, token, spos, epos, line)
+
+                elif initial == '#':
+                    assert not token.endswith("\n")
+                    yield (COMMENT, token, spos, epos, line)
+                elif token in triple_quoted:
+                    endprog = endprogs[token]
+                    endmatch = endprog.match(line, pos)
+                    if endmatch:                           # all on one line
+                        pos = endmatch.end(0)
+                        token = line[start:pos]
+                        yield (STRING, token, spos, (lnum, pos), line)
+                    else:
+                        strstart = (lnum, start)           # multiple lines
+                        contstr = line[start:]
+                        contline = line
+                        break
+                elif initial in single_quoted or \
+                    token[:2] in single_quoted or \
+                    token[:3] in single_quoted:
+                    if token[-1] == '\n':                  # continued string
+                        strstart = (lnum, start)
+                        endprog = (endprogs[initial] or endprogs[token[1]] or
+                                   endprogs[token[2]])
+                        contstr, needcont = line[start:], 1
+                        contline = line
+                        break
+                    else:                                  # ordinary string
+                        yield (STRING, token, spos, epos, line)
+                elif initial in namechars:                 # ordinary name
+                    yield (NAME, token, spos, epos, line)
+                elif initial == '\\':                      # continued stmt
+                    # This yield is new; needed for better idempotency:
+                    yield (NL, token, spos, (lnum, pos), line)
+                    continued = 1
+                else:
+                    if initial in '([{': parenlev = parenlev + 1
+                    elif initial in ')]}': parenlev = parenlev - 1
+                    yield (OP, token, spos, epos, line)
+            else:
+                yield (ERRORTOKEN, line[pos],
+                           (lnum, pos), (lnum, pos+1), line)
+                pos = pos + 1
+
+    for indent in indents[1:]:                 # pop remaining indent levels
+        yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
+    yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
+
+if __name__ == '__main__':                     # testing
+    import sys
+    if len(sys.argv) > 1: tokenize(open(sys.argv[1]).readline)
+    else: tokenize(sys.stdin.readline)
diff --git a/blib2to3/pgen2/tokenize.pyi b/blib2to3/pgen2/tokenize.pyi

new file mode 100644 (file)

index 0000000..62352e9
--- /dev/null
+++ b/blib2to3/pgen2/tokenize.pyi
@@ -0,0 +1,30 @@
+# Stubs for lib2to3.pgen2.tokenize (Python 3.6)
+# NOTE: Only elements from __all__ are present.
+
+from typing import Callable, Iterable, Iterator, List, Text, Tuple
+from blib2to3.pgen2.token import *  # noqa
+
+
+_Coord = Tuple[int, int]
+_TokenEater = Callable[[int, Text, _Coord, _Coord, Text], None]
+_TokenInfo = Tuple[int, Text, _Coord, _Coord, Text]
+
+
+class TokenError(Exception): ...
+class StopTokenizing(Exception): ...
+
+def tokenize(readline: Callable[[], Text], tokeneater: _TokenEater = ...) -> None: ...
+
+class Untokenizer:
+    tokens: List[Text]
+    prev_row: int
+    prev_col: int
+    def __init__(self) -> None: ...
+    def add_whitespace(self, start: _Coord) -> None: ...
+    def untokenize(self, iterable: Iterable[_TokenInfo]) -> Text: ...
+    def compat(self, token: Tuple[int, Text], iterable: Iterable[_TokenInfo]) -> None: ...
+
+def untokenize(iterable: Iterable[_TokenInfo]) -> Text: ...
+def generate_tokens(
+    readline: Callable[[], Text]
+) -> Iterator[_TokenInfo]: ...
diff --git a/blib2to3/pygram.py b/blib2to3/pygram.py

new file mode 100644 (file)

index 0000000..919624e
--- /dev/null
+++ b/blib2to3/pygram.py
@@ -0,0 +1,40 @@
+# Copyright 2006 Google, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Export the Python grammar and symbols."""
+
+# Python imports
+import os
+
+# Local imports
+from .pgen2 import token
+from .pgen2 import driver
+from . import pytree
+
+# The grammar file
+_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "Grammar.txt")
+_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__),
+                                     "PatternGrammar.txt")
+
+
+class Symbols(object):
+
+    def __init__(self, grammar):
+        """Initializer.
+
+        Creates an attribute for each grammar symbol (nonterminal),
+        whose value is the symbol's type (an int >= 256).
+        """
+        for name, symbol in grammar.symbol2number.items():
+            setattr(self, name, symbol)
+
+
+python_grammar = driver.load_packaged_grammar("lib2to3", _GRAMMAR_FILE)
+
+python_symbols = Symbols(python_grammar)
+
+python_grammar_no_print_statement = python_grammar.copy()
+del python_grammar_no_print_statement.keywords["print"]
+
+pattern_grammar = driver.load_packaged_grammar("lib2to3", _PATTERN_GRAMMAR_FILE)
+pattern_symbols = Symbols(pattern_grammar)
diff --git a/blib2to3/pygram.pyi b/blib2to3/pygram.pyi

new file mode 100644 (file)

index 0000000..3dbc648
--- /dev/null
+++ b/blib2to3/pygram.pyi
@@ -0,0 +1,119 @@
+# Stubs for lib2to3.pygram (Python 3.6)
+
+from typing import Any
+from blib2to3.pgen2.grammar import Grammar
+
+class Symbols:
+    def __init__(self, grammar: Grammar) -> None: ...
+
+class python_symbols(Symbols):
+    and_expr: int
+    and_test: int
+    annassign: int
+    arglist: int
+    argument: int
+    arith_expr: int
+    assert_stmt: int
+    async_funcdef: int
+    async_stmt: int
+    atom: int
+    augassign: int
+    break_stmt: int
+    classdef: int
+    comp_for: int
+    comp_if: int
+    comp_iter: int
+    comp_op: int
+    comparison: int
+    compound_stmt: int
+    continue_stmt: int
+    decorated: int
+    decorator: int
+    decorators: int
+    del_stmt: int
+    dictsetmaker: int
+    dotted_as_name: int
+    dotted_as_names: int
+    dotted_name: int
+    encoding_decl: int
+    eval_input: int
+    except_clause: int
+    exec_stmt: int
+    expr: int
+    expr_stmt: int
+    exprlist: int
+    factor: int
+    file_input: int
+    flow_stmt: int
+    for_stmt: int
+    funcdef: int
+    global_stmt: int
+    if_stmt: int
+    import_as_name: int
+    import_as_names: int
+    import_from: int
+    import_name: int
+    import_stmt: int
+    lambdef: int
+    listmaker: int
+    not_test: int
+    old_comp_for: int
+    old_comp_if: int
+    old_comp_iter: int
+    old_lambdef: int
+    old_test: int
+    or_test: int
+    parameters: int
+    pass_stmt: int
+    power: int
+    print_stmt: int
+    raise_stmt: int
+    return_stmt: int
+    shift_expr: int
+    simple_stmt: int
+    single_input: int
+    sliceop: int
+    small_stmt: int
+    star_expr: int
+    stmt: int
+    subscript: int
+    subscriptlist: int
+    suite: int
+    term: int
+    test: int
+    testlist: int
+    testlist1: int
+    testlist_gexp: int
+    testlist_safe: int
+    testlist_star_expr: int
+    tfpdef: int
+    tfplist: int
+    tname: int
+    trailer: int
+    try_stmt: int
+    typedargslist: int
+    varargslist: int
+    vfpdef: int
+    vfplist: int
+    vname: int
+    while_stmt: int
+    with_item: int
+    with_stmt: int
+    with_var: int
+    xor_expr: int
+    yield_arg: int
+    yield_expr: int
+    yield_stmt: int
+
+class pattern_symbols(Symbols):
+    Alternative: int
+    Alternatives: int
+    Details: int
+    Matcher: int
+    NegatedUnit: int
+    Repeater: int
+    Unit: int
+
+python_grammar: Grammar
+python_grammar_no_print_statement: Grammar
+pattern_grammar: Grammar
diff --git a/blib2to3/pytree.py b/blib2to3/pytree.py

new file mode 100644 (file)

index 0000000..693366f
--- /dev/null
+++ b/blib2to3/pytree.py
@@ -0,0 +1,854 @@
+# Copyright 2006 Google, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""
+Python parse tree definitions.
+
+This is a very concrete parse tree; we need to keep every token and
+even the comments and whitespace between tokens.
+
+There's also a pattern matching implementation here.
+"""
+
+__author__ = "Guido van Rossum <guido@python.org>"
+
+import sys
+from io import StringIO
+
+HUGE = 0x7FFFFFFF  # maximum repeat count, default max
+
+_type_reprs = {}
+def type_repr(type_num):
+    global _type_reprs
+    if not _type_reprs:
+        from .pygram import python_symbols
+        # printing tokens is possible but not as useful
+        # from .pgen2 import token // token.__dict__.items():
+        for name, val in python_symbols.__dict__.items():
+            if type(val) == int: _type_reprs[val] = name
+    return _type_reprs.setdefault(type_num, type_num)
+
+class Base(object):
+
+    """
+    Abstract base class for Node and Leaf.
+
+    This provides some default functionality and boilerplate using the
+    template pattern.
+
+    A node may be a subnode of at most one parent.
+    """
+
+    # Default values for instance variables
+    type = None    # int: token number (< 256) or symbol number (>= 256)
+    parent = None  # Parent node pointer, or None
+    children = ()  # Tuple of subnodes
+    was_changed = False
+    was_checked = False
+
+    def __new__(cls, *args, **kwds):
+        """Constructor that prevents Base from being instantiated."""
+        assert cls is not Base, "Cannot instantiate Base"
+        return object.__new__(cls)
+
+    def __eq__(self, other):
+        """
+        Compare two nodes for equality.
+
+        This calls the method _eq().
+        """
+        if self.__class__ is not other.__class__:
+            return NotImplemented
+        return self._eq(other)
+
+    __hash__ = None # For Py3 compatibility.
+
+    def _eq(self, other):
+        """
+        Compare two nodes for equality.
+
+        This is called by __eq__ and __ne__.  It is only called if the two nodes
+        have the same type.  This must be implemented by the concrete subclass.
+        Nodes should be considered equal if they have the same structure,
+        ignoring the prefix string and other context information.
+        """
+        raise NotImplementedError
+
+    def clone(self):
+        """
+        Return a cloned (deep) copy of self.
+
+        This must be implemented by the concrete subclass.
+        """
+        raise NotImplementedError
+
+    def post_order(self):
+        """
+        Return a post-order iterator for the tree.
+
+        This must be implemented by the concrete subclass.
+        """
+        raise NotImplementedError
+
+    def pre_order(self):
+        """
+        Return a pre-order iterator for the tree.
+
+        This must be implemented by the concrete subclass.
+        """
+        raise NotImplementedError
+
+    def replace(self, new):
+        """Replace this node with a new one in the parent."""
+        assert self.parent is not None, str(self)
+        assert new is not None
+        if not isinstance(new, list):
+            new = [new]
+        l_children = []
+        found = False
+        for ch in self.parent.children:
+            if ch is self:
+                assert not found, (self.parent.children, self, new)
+                if new is not None:
+                    l_children.extend(new)
+                found = True
+            else:
+                l_children.append(ch)
+        assert found, (self.children, self, new)
+        self.parent.changed()
+        self.parent.children = l_children
+        for x in new:
+            x.parent = self.parent
+        self.parent = None
+
+    def get_lineno(self):
+        """Return the line number which generated the invocant node."""
+        node = self
+        while not isinstance(node, Leaf):
+            if not node.children:
+                return
+            node = node.children[0]
+        return node.lineno
+
+    def changed(self):
+        if self.parent:
+            self.parent.changed()
+        self.was_changed = True
+
+    def remove(self):
+        """
+        Remove the node from the tree. Returns the position of the node in its
+        parent's children before it was removed.
+        """
+        if self.parent:
+            for i, node in enumerate(self.parent.children):
+                if node is self:
+                    self.parent.changed()
+                    del self.parent.children[i]
+                    self.parent = None
+                    return i
+
+    @property
+    def next_sibling(self):
+        """
+        The node immediately following the invocant in their parent's children
+        list. If the invocant does not have a next sibling, it is None
+        """
+        if self.parent is None:
+            return None
+
+        # Can't use index(); we need to test by identity
+        for i, child in enumerate(self.parent.children):
+            if child is self:
+                try:
+                    return self.parent.children[i+1]
+                except IndexError:
+                    return None
+
+    @property
+    def prev_sibling(self):
+        """
+        The node immediately preceding the invocant in their parent's children
+        list. If the invocant does not have a previous sibling, it is None.
+        """
+        if self.parent is None:
+            return None
+
+        # Can't use index(); we need to test by identity
+        for i, child in enumerate(self.parent.children):
+            if child is self:
+                if i == 0:
+                    return None
+                return self.parent.children[i-1]
+
+    def leaves(self):
+        for child in self.children:
+            yield from child.leaves()
+
+    def depth(self):
+        if self.parent is None:
+            return 0
+        return 1 + self.parent.depth()
+
+    def get_suffix(self):
+        """
+        Return the string immediately following the invocant node. This is
+        effectively equivalent to node.next_sibling.prefix
+        """
+        next_sib = self.next_sibling
+        if next_sib is None:
+            return ""
+        return next_sib.prefix
+
+    if sys.version_info < (3, 0):
+        def __str__(self):
+            return str(self).encode("ascii")
+
+class Node(Base):
+
+    """Concrete implementation for interior nodes."""
+
+    def __init__(self,type, children,
+                 context=None,
+                 prefix=None,
+                 fixers_applied=None):
+        """
+        Initializer.
+
+        Takes a type constant (a symbol number >= 256), a sequence of
+        child nodes, and an optional context keyword argument.
+
+        As a side effect, the parent pointers of the children are updated.
+        """
+        assert type >= 256, type
+        self.type = type
+        self.children = list(children)
+        for ch in self.children:
+            assert ch.parent is None, repr(ch)
+            ch.parent = self
+        if prefix is not None:
+            self.prefix = prefix
+        if fixers_applied:
+            self.fixers_applied = fixers_applied[:]
+        else:
+            self.fixers_applied = None
+
+    def __repr__(self):
+        """Return a canonical string representation."""
+        return "%s(%s, %r)" % (self.__class__.__name__,
+                               type_repr(self.type),
+                               self.children)
+
+    def __unicode__(self):
+        """
+        Return a pretty string representation.
+
+        This reproduces the input source exactly.
+        """
+        return "".join(map(str, self.children))
+
+    if sys.version_info > (3, 0):
+        __str__ = __unicode__
+
+    def _eq(self, other):
+        """Compare two nodes for equality."""
+        return (self.type, self.children) == (other.type, other.children)
+
+    def clone(self):
+        """Return a cloned (deep) copy of self."""
+        return Node(self.type, [ch.clone() for ch in self.children],
+                    fixers_applied=self.fixers_applied)
+
+    def post_order(self):
+        """Return a post-order iterator for the tree."""
+        for child in self.children:
+            yield from child.post_order()
+        yield self
+
+    def pre_order(self):
+        """Return a pre-order iterator for the tree."""
+        yield self
+        for child in self.children:
+            yield from child.pre_order()
+
+    @property
+    def prefix(self):
+        """
+        The whitespace and comments preceding this node in the input.
+        """
+        if not self.children:
+            return ""
+        return self.children[0].prefix
+
+    @prefix.setter
+    def prefix(self, prefix):
+        if self.children:
+            self.children[0].prefix = prefix
+
+    def set_child(self, i, child):
+        """
+        Equivalent to 'node.children[i] = child'. This method also sets the
+        child's parent attribute appropriately.
+        """
+        child.parent = self
+        self.children[i].parent = None
+        self.children[i] = child
+        self.changed()
+
+    def insert_child(self, i, child):
+        """
+        Equivalent to 'node.children.insert(i, child)'. This method also sets
+        the child's parent attribute appropriately.
+        """
+        child.parent = self
+        self.children.insert(i, child)
+        self.changed()
+
+    def append_child(self, child):
+        """
+        Equivalent to 'node.children.append(child)'. This method also sets the
+        child's parent attribute appropriately.
+        """
+        child.parent = self
+        self.children.append(child)
+        self.changed()
+
+
+class Leaf(Base):
+
+    """Concrete implementation for leaf nodes."""
+
+    # Default values for instance variables
+    _prefix = ""  # Whitespace and comments preceding this token in the input
+    lineno = 0    # Line where this token starts in the input
+    column = 0    # Column where this token tarts in the input
+
+    def __init__(self, type, value,
+                 context=None,
+                 prefix=None,
+                 fixers_applied=[]):
+        """
+        Initializer.
+
+        Takes a type constant (a token number < 256), a string value, and an
+        optional context keyword argument.
+        """
+        assert 0 <= type < 256, type
+        if context is not None:
+            self._prefix, (self.lineno, self.column) = context
+        self.type = type
+        self.value = value
+        if prefix is not None:
+            self._prefix = prefix
+        self.fixers_applied = fixers_applied[:]
+
+    def __repr__(self):
+        """Return a canonical string representation."""
+        from .pgen2.token import tok_name
+        return "%s(%s, %r)" % (self.__class__.__name__,
+                               tok_name.get(self.type, self.type),
+                               self.value)
+
+    def __unicode__(self):
+        """
+        Return a pretty string representation.
+
+        This reproduces the input source exactly.
+        """
+        return self.prefix + str(self.value)
+
+    if sys.version_info > (3, 0):
+        __str__ = __unicode__
+
+    def _eq(self, other):
+        """Compare two nodes for equality."""
+        return (self.type, self.value) == (other.type, other.value)
+
+    def clone(self):
+        """Return a cloned (deep) copy of self."""
+        return Leaf(self.type, self.value,
+                    (self.prefix, (self.lineno, self.column)),
+                    fixers_applied=self.fixers_applied)
+
+    def leaves(self):
+        yield self
+
+    def post_order(self):
+        """Return a post-order iterator for the tree."""
+        yield self
+
+    def pre_order(self):
+        """Return a pre-order iterator for the tree."""
+        yield self
+
+    @property
+    def prefix(self):
+        """
+        The whitespace and comments preceding this token in the input.
+        """
+        return self._prefix
+
+    @prefix.setter
+    def prefix(self, prefix):
+        self.changed()
+        self._prefix = prefix
+
+def convert(gr, raw_node):
+    """
+    Convert raw node information to a Node or Leaf instance.
+
+    This is passed to the parser driver which calls it whenever a reduction of a
+    grammar rule produces a new complete node, so that the tree is build
+    strictly bottom-up.
+    """
+    type, value, context, children = raw_node
+    if children or type in gr.number2symbol:
+        # If there's exactly one child, return that child instead of
+        # creating a new node.
+        if len(children) == 1:
+            return children[0]
+        return Node(type, children, context=context)
+    else:
+        return Leaf(type, value, context=context)
+
+
+class BasePattern(object):
+
+    """
+    A pattern is a tree matching pattern.
+
+    It looks for a specific node type (token or symbol), and
+    optionally for a specific content.
+
+    This is an abstract base class.  There are three concrete
+    subclasses:
+
+    - LeafPattern matches a single leaf node;
+    - NodePattern matches a single node (usually non-leaf);
+    - WildcardPattern matches a sequence of nodes of variable length.
+    """
+
+    # Defaults for instance variables
+    type = None     # Node type (token if < 256, symbol if >= 256)
+    content = None  # Optional content matching pattern
+    name = None     # Optional name used to store match in results dict
+
+    def __new__(cls, *args, **kwds):
+        """Constructor that prevents BasePattern from being instantiated."""
+        assert cls is not BasePattern, "Cannot instantiate BasePattern"
+        return object.__new__(cls)
+
+    def __repr__(self):
+        args = [type_repr(self.type), self.content, self.name]
+        while args and args[-1] is None:
+            del args[-1]
+        return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
+
+    def optimize(self):
+        """
+        A subclass can define this as a hook for optimizations.
+
+        Returns either self or another node with the same effect.
+        """
+        return self
+
+    def match(self, node, results=None):
+        """
+        Does this pattern exactly match a node?
+
+        Returns True if it matches, False if not.
+
+        If results is not None, it must be a dict which will be
+        updated with the nodes matching named subpatterns.
+
+        Default implementation for non-wildcard patterns.
+        """
+        if self.type is not None and node.type != self.type:
+            return False
+        if self.content is not None:
+            r = None
+            if results is not None:
+                r = {}
+            if not self._submatch(node, r):
+                return False
+            if r:
+                results.update(r)
+        if results is not None and self.name:
+            results[self.name] = node
+        return True
+
+    def match_seq(self, nodes, results=None):
+        """
+        Does this pattern exactly match a sequence of nodes?
+
+        Default implementation for non-wildcard patterns.
+        """
+        if len(nodes) != 1:
+            return False
+        return self.match(nodes[0], results)
+
+    def generate_matches(self, nodes):
+        """
+        Generator yielding all matches for this pattern.
+
+        Default implementation for non-wildcard patterns.
+        """
+        r = {}
+        if nodes and self.match(nodes[0], r):
+            yield 1, r
+
+
+class LeafPattern(BasePattern):
+
+    def __init__(self, type=None, content=None, name=None):
+        """
+        Initializer.  Takes optional type, content, and name.
+
+        The type, if given must be a token type (< 256).  If not given,
+        this matches any *leaf* node; the content may still be required.
+
+        The content, if given, must be a string.
+
+        If a name is given, the matching node is stored in the results
+        dict under that key.
+        """
+        if type is not None:
+            assert 0 <= type < 256, type
+        if content is not None:
+            assert isinstance(content, str), repr(content)
+        self.type = type
+        self.content = content
+        self.name = name
+
+    def match(self, node, results=None):
+        """Override match() to insist on a leaf node."""
+        if not isinstance(node, Leaf):
+            return False
+        return BasePattern.match(self, node, results)
+
+    def _submatch(self, node, results=None):
+        """
+        Match the pattern's content to the node's children.
+
+        This assumes the node type matches and self.content is not None.
+
+        Returns True if it matches, False if not.
+
+        If results is not None, it must be a dict which will be
+        updated with the nodes matching named subpatterns.
+
+        When returning False, the results dict may still be updated.
+        """
+        return self.content == node.value
+
+
+class NodePattern(BasePattern):
+
+    wildcards = False
+
+    def __init__(self, type=None, content=None, name=None):
+        """
+        Initializer.  Takes optional type, content, and name.
+
+        The type, if given, must be a symbol type (>= 256).  If the
+        type is None this matches *any* single node (leaf or not),
+        except if content is not None, in which it only matches
+        non-leaf nodes that also match the content pattern.
+
+        The content, if not None, must be a sequence of Patterns that
+        must match the node's children exactly.  If the content is
+        given, the type must not be None.
+
+        If a name is given, the matching node is stored in the results
+        dict under that key.
+        """
+        if type is not None:
+            assert type >= 256, type
+        if content is not None:
+            assert not isinstance(content, str), repr(content)
+            content = list(content)
+            for i, item in enumerate(content):
+                assert isinstance(item, BasePattern), (i, item)
+                if isinstance(item, WildcardPattern):
+                    self.wildcards = True
+        self.type = type
+        self.content = content
+        self.name = name
+
+    def _submatch(self, node, results=None):
+        """
+        Match the pattern's content to the node's children.
+
+        This assumes the node type matches and self.content is not None.
+
+        Returns True if it matches, False if not.
+
+        If results is not None, it must be a dict which will be
+        updated with the nodes matching named subpatterns.
+
+        When returning False, the results dict may still be updated.
+        """
+        if self.wildcards:
+            for c, r in generate_matches(self.content, node.children):
+                if c == len(node.children):
+                    if results is not None:
+                        results.update(r)
+                    return True
+            return False
+        if len(self.content) != len(node.children):
+            return False
+        for subpattern, child in zip(self.content, node.children):
+            if not subpattern.match(child, results):
+                return False
+        return True
+
+
+class WildcardPattern(BasePattern):
+
+    """
+    A wildcard pattern can match zero or more nodes.
+
+    This has all the flexibility needed to implement patterns like:
+
+    .*      .+      .?      .{m,n}
+    (a b c | d e | f)
+    (...)*  (...)+  (...)?  (...){m,n}
+
+    except it always uses non-greedy matching.
+    """
+
+    def __init__(self, content=None, min=0, max=HUGE, name=None):
+        """
+        Initializer.
+
+        Args:
+            content: optional sequence of subsequences of patterns;
+                     if absent, matches one node;
+                     if present, each subsequence is an alternative [*]
+            min: optional minimum number of times to match, default 0
+            max: optional maximum number of times to match, default HUGE
+            name: optional name assigned to this match
+
+        [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
+            equivalent to (a b c | d e | f g h); if content is None,
+            this is equivalent to '.' in regular expression terms.
+            The min and max parameters work as follows:
+                min=0, max=maxint: .*
+                min=1, max=maxint: .+
+                min=0, max=1: .?
+                min=1, max=1: .
+            If content is not None, replace the dot with the parenthesized
+            list of alternatives, e.g. (a b c | d e | f g h)*
+        """
+        assert 0 <= min <= max <= HUGE, (min, max)
+        if content is not None:
+            content = tuple(map(tuple, content))  # Protect against alterations
+            # Check sanity of alternatives
+            assert len(content), repr(content)  # Can't have zero alternatives
+            for alt in content:
+                assert len(alt), repr(alt) # Can have empty alternatives
+        self.content = content
+        self.min = min
+        self.max = max
+        self.name = name
+
+    def optimize(self):
+        """Optimize certain stacked wildcard patterns."""
+        subpattern = None
+        if (self.content is not None and
+            len(self.content) == 1 and len(self.content[0]) == 1):
+            subpattern = self.content[0][0]
+        if self.min == 1 and self.max == 1:
+            if self.content is None:
+                return NodePattern(name=self.name)
+            if subpattern is not None and  self.name == subpattern.name:
+                return subpattern.optimize()
+        if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
+            subpattern.min <= 1 and self.name == subpattern.name):
+            return WildcardPattern(subpattern.content,
+                                   self.min*subpattern.min,
+                                   self.max*subpattern.max,
+                                   subpattern.name)
+        return self
+
+    def match(self, node, results=None):
+        """Does this pattern exactly match a node?"""
+        return self.match_seq([node], results)
+
+    def match_seq(self, nodes, results=None):
+        """Does this pattern exactly match a sequence of nodes?"""
+        for c, r in self.generate_matches(nodes):
+            if c == len(nodes):
+                if results is not None:
+                    results.update(r)
+                    if self.name:
+                        results[self.name] = list(nodes)
+                return True
+        return False
+
+    def generate_matches(self, nodes):
+        """
+        Generator yielding matches for a sequence of nodes.
+
+        Args:
+            nodes: sequence of nodes
+
+        Yields:
+            (count, results) tuples where:
+            count: the match comprises nodes[:count];
+            results: dict containing named submatches.
+        """
+        if self.content is None:
+            # Shortcut for special case (see __init__.__doc__)
+            for count in range(self.min, 1 + min(len(nodes), self.max)):
+                r = {}
+                if self.name:
+                    r[self.name] = nodes[:count]
+                yield count, r
+        elif self.name == "bare_name":
+            yield self._bare_name_matches(nodes)
+        else:
+            # The reason for this is that hitting the recursion limit usually
+            # results in some ugly messages about how RuntimeErrors are being
+            # ignored. We only have to do this on CPython, though, because other
+            # implementations don't have this nasty bug in the first place.
+            if hasattr(sys, "getrefcount"):
+                save_stderr = sys.stderr
+                sys.stderr = StringIO()
+            try:
+                for count, r in self._recursive_matches(nodes, 0):
+                    if self.name:
+                        r[self.name] = nodes[:count]
+                    yield count, r
+            except RuntimeError:
+                # We fall back to the iterative pattern matching scheme if the recursive
+                # scheme hits the recursion limit.
+                for count, r in self._iterative_matches(nodes):
+                    if self.name:
+                        r[self.name] = nodes[:count]
+                    yield count, r
+            finally:
+                if hasattr(sys, "getrefcount"):
+                    sys.stderr = save_stderr
+
+    def _iterative_matches(self, nodes):
+        """Helper to iteratively yield the matches."""
+        nodelen = len(nodes)
+        if 0 >= self.min:
+            yield 0, {}
+
+        results = []
+        # generate matches that use just one alt from self.content
+        for alt in self.content:
+            for c, r in generate_matches(alt, nodes):
+                yield c, r
+                results.append((c, r))
+
+        # for each match, iterate down the nodes
+        while results:
+            new_results = []
+            for c0, r0 in results:
+                # stop if the entire set of nodes has been matched
+                if c0 < nodelen and c0 <= self.max:
+                    for alt in self.content:
+                        for c1, r1 in generate_matches(alt, nodes[c0:]):
+                            if c1 > 0:
+                                r = {}
+                                r.update(r0)
+                                r.update(r1)
+                                yield c0 + c1, r
+                                new_results.append((c0 + c1, r))
+            results = new_results
+
+    def _bare_name_matches(self, nodes):
+        """Special optimized matcher for bare_name."""
+        count = 0
+        r = {}
+        done = False
+        max = len(nodes)
+        while not done and count < max:
+            done = True
+            for leaf in self.content:
+                if leaf[0].match(nodes[count], r):
+                    count += 1
+                    done = False
+                    break
+        r[self.name] = nodes[:count]
+        return count, r
+
+    def _recursive_matches(self, nodes, count):
+        """Helper to recursively yield the matches."""
+        assert self.content is not None
+        if count >= self.min:
+            yield 0, {}
+        if count < self.max:
+            for alt in self.content:
+                for c0, r0 in generate_matches(alt, nodes):
+                    for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
+                        r = {}
+                        r.update(r0)
+                        r.update(r1)
+                        yield c0 + c1, r
+
+
+class NegatedPattern(BasePattern):
+
+    def __init__(self, content=None):
+        """
+        Initializer.
+
+        The argument is either a pattern or None.  If it is None, this
+        only matches an empty sequence (effectively '$' in regex
+        lingo).  If it is not None, this matches whenever the argument
+        pattern doesn't have any matches.
+        """
+        if content is not None:
+            assert isinstance(content, BasePattern), repr(content)
+        self.content = content
+
+    def match(self, node):
+        # We never match a node in its entirety
+        return False
+
+    def match_seq(self, nodes):
+        # We only match an empty sequence of nodes in its entirety
+        return len(nodes) == 0
+
+    def generate_matches(self, nodes):
+        if self.content is None:
+            # Return a match if there is an empty sequence
+            if len(nodes) == 0:
+                yield 0, {}
+        else:
+            # Return a match if the argument pattern has no matches
+            for c, r in self.content.generate_matches(nodes):
+                return
+            yield 0, {}
+
+
+def generate_matches(patterns, nodes):
+    """
+    Generator yielding matches for a sequence of patterns and nodes.
+
+    Args:
+        patterns: a sequence of patterns
+        nodes: a sequence of nodes
+
+    Yields:
+        (count, results) tuples where:
+        count: the entire sequence of patterns matches nodes[:count];
+        results: dict containing named submatches.
+        """
+    if not patterns:
+        yield 0, {}
+    else:
+        p, rest = patterns[0], patterns[1:]
+        for c0, r0 in p.generate_matches(nodes):
+            if not rest:
+                yield c0, r0
+            else:
+                for c1, r1 in generate_matches(rest, nodes[c0:]):
+                    r = {}
+                    r.update(r0)
+                    r.update(r1)
+                    yield c0 + c1, r
diff --git a/blib2to3/pytree.pyi b/blib2to3/pytree.pyi

new file mode 100644 (file)

index 0000000..eb7320b
--- /dev/null
+++ b/blib2to3/pytree.pyi
@@ -0,0 +1,86 @@
+# Stubs for lib2to3.pytree (Python 3.6)
+
+import sys
+from typing import Any, Callable, Dict, Iterator, List, Optional, Text, Tuple, TypeVar, Union
+
+from blib2to3.pgen2.grammar import Grammar
+
+_P = TypeVar('_P')
+_NL = Union[Node, Leaf]
+_Context = Tuple[Text, int, int]
+_Results = Dict[Text, _NL]
+_RawNode = Tuple[int, Text, _Context, Optional[List[_NL]]]
+_Convert = Callable[[Grammar, _RawNode], Any]
+
+HUGE: int
+
+def type_repr(type_num: int) -> Text: ...
+
+class Base:
+    type: int
+    parent: Optional[Node]
+    prefix: Text
+    children: List[_NL]
+    was_changed: bool
+    was_checked: bool
+    def __eq__(self, other: Any) -> bool: ...
+    def _eq(self: _P, other: _P) -> bool: ...
+    def clone(self: _P) -> _P: ...
+    def post_order(self) -> Iterator[_NL]: ...
+    def pre_order(self) -> Iterator[_NL]: ...
+    def replace(self, new: Union[_NL, List[_NL]]) -> None: ...
+    def get_lineno(self) -> int: ...
+    def changed(self) -> None: ...
+    def remove(self) -> Optional[int]: ...
+    @property
+    def next_sibling(self) -> Optional[_NL]: ...
+    @property
+    def prev_sibling(self) -> Optional[_NL]: ...
+    def leaves(self) -> Iterator[Leaf]: ...
+    def depth(self) -> int: ...
+    def get_suffix(self) -> Text: ...
+    if sys.version_info < (3,):
+        def get_prefix(self) -> Text: ...
+        def set_prefix(self, prefix: Text) -> None: ...
+
+class Node(Base):
+    fixers_applied: List[Any]
+    def __init__(self, type: int, children: List[_NL], context: Optional[Any] = ..., prefix: Optional[Text] = ..., fixers_applied: Optional[List[Any]] = ...) -> None: ...
+    def set_child(self, i: int, child: _NL) -> None: ...
+    def insert_child(self, i: int, child: _NL) -> None: ...
+    def append_child(self, child: _NL) -> None: ...
+
+class Leaf(Base):
+    lineno: int
+    column: int
+    value: Text
+    fixers_applied: List[Any]
+    def __init__(self, type: int, value: Text, context: Optional[_Context] = ..., prefix: Optional[Text] = ..., fixers_applied: List[Any] = ...) -> None: ...
+
+def convert(gr: Grammar, raw_node: _RawNode) -> _NL: ...
+
+class BasePattern:
+    type: int
+    content: Optional[Text]
+    name: Optional[Text]
+    def optimize(self) -> BasePattern: ...  # sic, subclasses are free to optimize themselves into different patterns
+    def match(self, node: _NL, results: Optional[_Results] = ...) -> bool: ...
+    def match_seq(self, nodes: List[_NL], results: Optional[_Results] = ...) -> bool: ...
+    def generate_matches(self, nodes: List[_NL]) -> Iterator[Tuple[int, _Results]]: ...
+
+class LeafPattern(BasePattern):
+    def __init__(self, type: Optional[int] = ..., content: Optional[Text] = ..., name: Optional[Text] = ...) -> None: ...
+
+class NodePattern(BasePattern):
+    wildcards: bool
+    def __init__(self, type: Optional[int] = ..., content: Optional[Text] = ..., name: Optional[Text] = ...) -> None: ...
+
+class WildcardPattern(BasePattern):
+    min: int
+    max: int
+    def __init__(self, content: Optional[Text] = ..., min: int = ..., max: int = ..., name: Optional[Text] = ...) -> None: ...
+
+class NegatedPattern(BasePattern):
+    def __init__(self, content: Optional[Text] = ...) -> None: ...
+
+def generate_matches(patterns: List[BasePattern], nodes: List[_NL]) -> Iterator[Tuple[int, _Results]]: ...
diff --git a/mypy.ini b/mypy.ini

new file mode 100644 (file)

index 0000000..8fa7236
--- /dev/null
+++ b/mypy.ini
@@ -0,0 +1,31 @@
+[mypy]
+# Specify the target platform details in config, so your developers are
+# free to run mypy on Windows, Linux, or macOS and get consistent
+# results.
+python_version=3.6
+platform=linux
+
+# flake8-mypy expects the two following for sensible formatting
+show_column_numbers=True
+
+# show error messages from unrelated files
+follow_imports=normal
+
+# suppress errors about unsatisfied imports
+ignore_missing_imports=True
+
+# be strict
+disallow_untyped_calls=True
+warn_return_any=True
+strict_optional=True
+warn_no_return=True
+warn_redundant_casts=True
+warn_unused_ignores=True
+
+# The following are off by default.  Flip them on if you feel
+# adventurous.
+disallow_untyped_defs=True
+check_untyped_defs=True
+
+# No incremental mode
+cache_dir=/dev/null
diff --git a/setup.py b/setup.py

new file mode 100644 (file)

index 0000000..a64482c
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,67 @@
+# Copyright (C) 2018 Łukasz Langa
+import ast
+import re
+from setuptools import setup
+import sys
+
+assert sys.version_info >= (3, 6, 0), "black requires Python 3.6+"
+from pathlib import Path  # noqa E402
+
+CURRENT_DIR = Path(__file__).parent
+
+
+def get_long_description():
+    readme_md = CURRENT_DIR / 'README.md'
+    try:
+        import pypandoc
+        return pypandoc.convert_file(str(readme_md), 'rst')
+
+    except (IOError, ImportError):
+        print()
+        print(
+            '\x1b[31m\x1b[1mwarning:\x1b[0m\x1b[31m pandoc not found, '
+            'long description will be ugly (PyPI does not support .md).'
+            '\x1b[0m'
+        )
+        print()
+        with open(readme_md, encoding='utf8') as ld_file:
+            return ld_file.read()
+
+
+def get_version():
+    black_py = CURRENT_DIR / 'black.py'
+    _version_re = re.compile(r'__version__\s+=\s+(?P<version>.*)')
+    with open(black_py, 'r', encoding='utf8') as f:
+        version = _version_re.search(f.read()).group('version')
+    return str(ast.literal_eval(version))
+
+
+setup(
+    name='black',
+    version=get_version(),
+    description="The uncompromising code formatter.",
+    long_description=get_long_description(),
+    keywords='automation formatter yapf autopep8 pyfmt gofmt rustfmt',
+    author='Łukasz Langa',
+    author_email='lukasz@langa.pl',
+    url='https://github.com/ambv/black',
+    license='MIT',
+    py_modules=['black'],
+    packages=['blib2to3', 'blib2to3.pgen2'],
+    python_requires=">=3.6",
+    zip_safe=False,
+    install_requires=['click', 'attrs'],
+    test_suite='tests.test_black',
+    classifiers=[
+        'Development Status :: 3 - Alpha',
+        'Environment :: Console',
+        'Intended Audience :: Developers',
+        'License :: OSI Approved :: MIT License',
+        'Operating System :: OS Independent',
+        'Programming Language :: Python',
+        'Programming Language :: Python :: 3.6',
+        'Topic :: Software Development :: Libraries :: Python Modules',
+        'Topic :: Software Development :: Quality Assurance',
+    ],
+    entry_points={'console_scripts': ['black=black:main']},
+)
diff --git a/tests/.flake8 b/tests/.flake8

new file mode 100644 (file)

index 0000000..3528ac4
--- /dev/null
+++ b/tests/.flake8
@@ -0,0 +1,8 @@
+# Like the base Black .flake8 but also ignores F811 which is used deliberately
+# in test files.
+
+[flake8]
+ignore = E266, E501, F811
+max-line-length = 80
+max-complexity = 12
+select = B,C,E,F,W,T4,B9
diff --git a/tests/cantfit.py b/tests/cantfit.py

new file mode 100644 (file)

index 0000000..99bcaa0
--- /dev/null
+++ b/tests/cantfit.py
@@ -0,0 +1,27 @@
+# long variable name
+this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it = 0
+this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it = 1  # with a comment
+this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it = [
+    1, 2, 3
+]
+this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it = function()
+this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it = function(
+    arg1, arg2, arg3
+)
+this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it = function(
+    [1, 2, 3], arg1, [1, 2, 3], arg2, [1, 2, 3], arg3
+)
+# long function name
+normal_name = but_the_function_name_is_now_ridiculously_long_and_it_is_still_super_annoying()
+normal_name = but_the_function_name_is_now_ridiculously_long_and_it_is_still_super_annoying(
+    arg1, arg2, arg3
+)
+normal_name = but_the_function_name_is_now_ridiculously_long_and_it_is_still_super_annoying(
+    [1, 2, 3], arg1, [1, 2, 3], arg2, [1, 2, 3], arg3
+)
+# long arguments
+normal_name = normal_function_name(
+    "but with super long string arguments that on their own exceed the line limit so there's no way it can ever fit",
+    "eggs with spam and eggs and spam with eggs with spam and eggs and spam with eggs with spam and eggs and spam with eggs",
+    this_is_a_ridiculously_long_name_and_nobody_in_their_right_mind_would_use_one_like_it=0,
+)
diff --git a/tests/comments.py b/tests/comments.py

new file mode 100644 (file)

index 0000000..e661ba6
--- /dev/null
+++ b/tests/comments.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+# Some license here.
+#
+# Has many lines. Many, many lines.
+# Many, many, many lines.
+"""Module docstring.
+
+Possibly also many, many lines.
+"""
+
+import os.path
+import sys
+
+import a
+from b.c import X  # some noqa comment
+
+try:
+    import fast
+except ImportError:
+    import slow as fast
+
+
+# Some comment before a function.
+def function(default=None):
+    """Docstring comes first.
+
+    Possibly many lines.
+    """
+    # FIXME: Some comment about why this function is crap but still in production.
+    import inner_imports
+
+    if inner_imports.are_evil():
+        # Explains why we have this if.
+        # In great detail indeed.
+        x = X()
+        return x.method1()  # type: ignore
+
+    # This return is also commented for some reason.
+    return default
+
+
+# Explains why we use global state.
+GLOBAL_STATE = {'a': a(1), 'b': a(2), 'c': a(3)}
+
+
+# Another comment
+@fast(really=True)
+async def wat():
+    async with X.open_async() as x:  # Some more comments
+        result = await x.method1()
+    # Comment after ending a block.
+    if result:
+        print('A OK', file=sys.stdout)
+        # Comment between things.
+        print()
+
+
+# Some closing comments.
+# Maybe Vim or Emacs directives for formatting.
+# Who knows.
diff --git a/tests/comments2.py b/tests/comments2.py

new file mode 100644 (file)

index 0000000..7d5d3a3
--- /dev/null
+++ b/tests/comments2.py
@@ -0,0 +1,202 @@
+# Please keep __all__ alphabetized within each category.
+__all__ = [
+    # Super-special typing primitives.
+    'Any',
+    'Callable',
+    'ClassVar',
+
+    # ABCs (from collections.abc).
+    'AbstractSet',  # collections.abc.Set.
+    'ByteString',
+    'Container',
+
+    # Concrete collection types.
+    'Counter',
+    'Deque',
+    'Dict',
+    'DefaultDict',
+    'List',
+    'Set',
+    'FrozenSet',
+    'NamedTuple',  # Not really a type.
+    'Generator',
+]
+
+def inline_comments_in_brackets_ruin_everything():
+    if typedargslist:
+        parameters.children = [
+            parameters.children[0],  # (1
+            body,
+            parameters.children[-1],  # )1
+        ]
+    else:
+        parameters.children = [
+            parameters.children[0],  # (2 what if this was actually long
+            body,
+            parameters.children[-1],  # )2
+        ]
+    if (self._proc is not None and
+            # has the child process finished?
+            self._returncode is None and
+            # the child process has finished, but the
+            # transport hasn't been notified yet?
+            self._proc.poll() is None):
+        pass
+    short = [
+     # one
+     1,
+     # two
+     2]
+    call(arg1, arg2, """
+short
+""", arg3=True)
+
+    ############################################################################
+
+    call2(
+    #short
+    arg1,
+    #but
+    arg2,
+    #multiline
+    """
+short
+""",
+    # yup
+    arg3=True)
+    lcomp = [
+        element  # yup
+        for element in collection  # yup
+        if element is not None  # right
+    ]
+    lcomp2 = [
+        # hello
+        element
+        # yup
+        for element in collection
+        # right
+        if element is not None
+    ]
+    lcomp3 = [
+        # This one is actually too long to fit in a single line.
+        element.split('\n', 1)[0]
+        # yup
+        for element in collection.select_elements()
+        # right
+        if element is not None
+    ]
+    return Node(
+        syms.simple_stmt,
+        [
+            Node(statement, result),
+            Leaf(token.NEWLINE, '\n'),  # FIXME: \r\n?
+        ],
+    )
+
+instruction()
+
+# END COMMENTS
+# MORE END COMMENTS
+
+
+# output
+
+
+# Please keep __all__ alphabetized within each category.
+__all__ = [
+    # Super-special typing primitives.
+    'Any',
+    'Callable',
+    'ClassVar',
+    # ABCs (from collections.abc).
+    'AbstractSet',  # collections.abc.Set.
+    'ByteString',
+    'Container',
+    # Concrete collection types.
+    'Counter',
+    'Deque',
+    'Dict',
+    'DefaultDict',
+    'List',
+    'Set',
+    'FrozenSet',
+    'NamedTuple',  # Not really a type.
+    'Generator',
+]
+
+
+def inline_comments_in_brackets_ruin_everything():
+    if typedargslist:
+        parameters.children = [
+            parameters.children[0], body, parameters.children[-1]  # (1  # )1
+        ]
+    else:
+        parameters.children = [
+            parameters.children[0],  # (2 what if this was actually long
+            body,
+            parameters.children[-1],  # )2
+        ]
+    if (
+        self._proc is not None and
+        # has the child process finished?
+        self._returncode is None and
+        # the child process has finished, but the
+        # transport hasn't been notified yet?
+        self._proc.poll() is None
+    ):
+        pass
+    short = [
+        # one
+        1,
+        # two
+        2,
+    ]
+    call(
+        arg1,
+        arg2,
+        """
+short
+""",
+        arg3=True,
+    )
+    ############################################################################
+    call2(
+        # short
+        arg1,
+        # but
+        arg2,
+        # multiline
+        """
+short
+""",
+        # yup
+        arg3=True,
+    )
+    lcomp = [
+        element for element in collection if element is not None  # yup  # yup  # right
+    ]
+    lcomp2 = [
+        # hello
+        element
+        # yup
+        for element in collection
+        # right
+        if element is not None
+    ]
+    lcomp3 = [
+        # This one is actually too long to fit in a single line.
+        element.split('\n', 1)[0]
+        # yup
+        for element in collection.select_elements()
+        # right
+        if element is not None
+    ]
+    return Node(
+        syms.simple_stmt,
+        [Node(statement, result), Leaf(token.NEWLINE, '\n')],  # FIXME: \r\n?
+    )
+
+
+instruction()
+# END COMMENTS
+# MORE END COMMENTS
diff --git a/tests/composition.py b/tests/composition.py

new file mode 100644 (file)

index 0000000..7b462ac
--- /dev/null
+++ b/tests/composition.py
@@ -0,0 +1,21 @@
+class C:
+
+    def test(self) -> None:
+        with patch("black.out", print):
+            self.assertEqual(
+                unstyle(str(report)), '1 file reformatted, 1 file failed to reformat.'
+            )
+            self.assertEqual(
+                unstyle(str(report)),
+                '1 file reformatted, 1 file left unchanged, 1 file failed to reformat.',
+            )
+            self.assertEqual(
+                unstyle(str(report)),
+                '2 files reformatted, 1 file left unchanged, '
+                '1 file failed to reformat.',
+            )
+            self.assertEqual(
+                unstyle(str(report)),
+                '2 files reformatted, 2 files left unchanged, '
+                '2 files failed to reformat.',
+            )
diff --git a/tests/expression.py b/tests/expression.py

new file mode 100644 (file)

index 0000000..3291dc2
--- /dev/null
+++ b/tests/expression.py
@@ -0,0 +1,240 @@
+...
+'some_string'
+b'\\xa3'
+Name
+None
+True
+False
+1
+1.0
+1j
+True or False
+True or False or None
+True and False
+True and False and None
+(Name1 and Name2) or Name3
+Name1 and Name2 or Name3
+Name1 or (Name2 and Name3)
+Name1 or Name2 and Name3
+(Name1 and Name2) or (Name3 and Name4)
+Name1 and Name2 or Name3 and Name4
+Name1 or (Name2 and Name3) or Name4
+Name1 or Name2 and Name3 or Name4
+v1 << 2
+1 >> v2
+1 % finished
+1 + v2 - v3 * 4 ^ 5 ** v6 / 7 // 8
+((1 + v2) - (v3 * 4)) ^ (((5 ** v6) / 7) // 8)
+not great
+~great
++value
+-1
+~int and not v1 ^ 123 + v2 | True
+(~int) and (not ((v1 ^ (123 + v2)) | True))
+lambda arg: None
+lambda a=True: a
+lambda a, b, c=True: a
+lambda a, b, c=True, *, d=(1 << v2), e='str': a
+lambda a, b, c=True, *vararg, d=(v1 << 2), e='str', **kwargs: a + b
+1 if True else 2
+str or None if True else str or bytes or None
+(str or None) if True else (str or bytes or None)
+str or None if (1 if True else 2) else str or bytes or None
+(str or None) if (1 if True else 2) else (str or bytes or None)
+{'2.7': dead, '3.7': (long_live or die_hard)}
+{'2.7': dead, '3.7': (long_live or die_hard), **{'3.6': verygood}}
+{**a, **b, **c}
+{'2.7', '3.6', '3.7', '3.8', '3.9', ('4.0' if gilectomy else '3.10')}
+({'a': 'b'}, (True or False), (+value), 'string', b'bytes') or None
+()
+(1,)
+(1, 2)
+(1, 2, 3)
+[]
+[1, 2, 3, 4, 5, 6, 7, 8, 9, (10 or A), (11 or B), (12 or C)]
+{i for i in (1, 2, 3)}
+{(i ** 2) for i in (1, 2, 3)}
+{(i ** 2) for i, _ in ((1, 'a'), (2, 'b'), (3, 'c'))}
+{((i ** 2) + j) for i in (1, 2, 3) for j in (1, 2, 3)}
+[i for i in (1, 2, 3)]
+[(i ** 2) for i in (1, 2, 3)]
+[(i ** 2) for i, _ in ((1, 'a'), (2, 'b'), (3, 'c'))]
+[((i ** 2) + j) for i in (1, 2, 3) for j in (1, 2, 3)]
+{i: 0 for i in (1, 2, 3)}
+{i: j for i, j in ((1, 'a'), (2, 'b'), (3, 'c'))}
+Python3 > Python2 > COBOL
+Life is Life
+call()
+call(arg)
+call(kwarg='hey')
+call(arg, kwarg='hey')
+call(arg, another, kwarg='hey', **kwargs)
+lukasz.langa.pl
+call.me(maybe)
+1 .real
+1.0 .real
+....__class__
+list[str]
+dict[str, int]
+tuple[str, ...]
+tuple[str, int, float, dict[str, int]]
+slice[0]
+slice[0:1]
+slice[0:1:2]
+slice[:]
+slice[:-1]
+slice[1:]
+slice[::-1]
+(str or None) if (sys.version_info[0] > (3,)) else (str or bytes or None)
+f'f-string without formatted values is just a string'
+f'{{NOT a formatted value}}'
+f'some f-string with {a} {few():.2f} {formatted.values!r}'
+f"{f'{nested} inner'} outer"
+f'space between opening braces: { {a for a in (1, 2, 3)}}'
+{'2.7': dead, '3.7': long_live or die_hard}
+{'2.7', '3.6', '3.7', '3.8', '3.9', '4.0' if gilectomy else '3.10'}
+[1, 2, 3, 4, 5, 6, 7, 8, 9, 10 or A, 11 or B, 12 or C]
+(SomeName)
+SomeName
+(Good, Bad, Ugly)
+(i for i in (1, 2, 3))
+((i ** 2) for i in (1, 2, 3))
+((i ** 2) for i, _ in ((1, 'a'), (2, 'b'), (3, 'c')))
+(((i ** 2) + j) for i in (1, 2, 3) for j in (1, 2, 3))
+(*starred)
+a = (1,)
+b = 1,
+c = 1
+d = (1,) + a + (2,)
+
+
+def gen():
+    yield from outside_of_generator
+    a = (yield)
+
+
+async def f():
+    await some.complicated[0].call(with_args=(True or (1 is not 1)))
+
+
+# output
+
+
+...
+'some_string'
+b'\\xa3'
+Name
+None
+True
+False
+1
+1.0
+1j
+True or False
+True or False or None
+True and False
+True and False and None
+(Name1 and Name2) or Name3
+Name1 and Name2 or Name3
+Name1 or (Name2 and Name3)
+Name1 or Name2 and Name3
+(Name1 and Name2) or (Name3 and Name4)
+Name1 and Name2 or Name3 and Name4
+Name1 or (Name2 and Name3) or Name4
+Name1 or Name2 and Name3 or Name4
+v1 << 2
+1 >> v2
+1 % finished
+1 + v2 - v3 * 4 ^ 5 ** v6 / 7 // 8
+((1 + v2) - (v3 * 4)) ^ (((5 ** v6) / 7) // 8)
+not great
+~great
++value
+-1
+~int and not v1 ^ 123 + v2 | True
+(~int) and (not ((v1 ^ (123 + v2)) | True))
+lambda arg: None
+lambda a=True: a
+lambda a, b, c=True: a
+lambda a, b, c=True, *, d=(1 << v2), e='str': a
+lambda a, b, c=True, *vararg, d=(v1 << 2), e='str', **kwargs: a + b
+1 if True else 2
+str or None if True else str or bytes or None
+(str or None) if True else (str or bytes or None)
+str or None if (1 if True else 2) else str or bytes or None
+(str or None) if (1 if True else 2) else (str or bytes or None)
+{'2.7': dead, '3.7': (long_live or die_hard)}
+{'2.7': dead, '3.7': (long_live or die_hard), **{'3.6': verygood}}
+{**a, **b, **c}
+{'2.7', '3.6', '3.7', '3.8', '3.9', ('4.0' if gilectomy else '3.10')}
+({'a': 'b'}, (True or False), (+value), 'string', b'bytes') or None
+()
+(1,)
+(1, 2)
+(1, 2, 3)
+[]
+[1, 2, 3, 4, 5, 6, 7, 8, 9, (10 or A), (11 or B), (12 or C)]
+{i for i in (1, 2, 3)}
+{(i ** 2) for i in (1, 2, 3)}
+{(i ** 2) for i, _ in ((1, 'a'), (2, 'b'), (3, 'c'))}
+{((i ** 2) + j) for i in (1, 2, 3) for j in (1, 2, 3)}
+[i for i in (1, 2, 3)]
+[(i ** 2) for i in (1, 2, 3)]
+[(i ** 2) for i, _ in ((1, 'a'), (2, 'b'), (3, 'c'))]
+[((i ** 2) + j) for i in (1, 2, 3) for j in (1, 2, 3)]
+{i: 0 for i in (1, 2, 3)}
+{i: j for i, j in ((1, 'a'), (2, 'b'), (3, 'c'))}
+Python3 > Python2 > COBOL
+Life is Life
+call()
+call(arg)
+call(kwarg='hey')
+call(arg, kwarg='hey')
+call(arg, another, kwarg='hey', **kwargs)
+lukasz.langa.pl
+call.me(maybe)
+1 .real
+1.0 .real
+....__class__
+list[str]
+dict[str, int]
+tuple[str, ...]
+tuple[str, int, float, dict[str, int]]
+slice[0]
+slice[0:1]
+slice[0:1:2]
+slice[:]
+slice[:-1]
+slice[1:]
+slice[::-1]
+(str or None) if (sys.version_info[0] > (3,)) else (str or bytes or None)
+f'f-string without formatted values is just a string'
+f'{{NOT a formatted value}}'
+f'some f-string with {a} {few():.2f} {formatted.values!r}'
+f"{f'{nested} inner'} outer"
+f'space between opening braces: { {a for a in (1, 2, 3)}}'
+{'2.7': dead, '3.7': long_live or die_hard}
+{'2.7', '3.6', '3.7', '3.8', '3.9', '4.0' if gilectomy else '3.10'}
+[1, 2, 3, 4, 5, 6, 7, 8, 9, 10 or A, 11 or B, 12 or C]
+(SomeName)
+SomeName
+(Good, Bad, Ugly)
+(i for i in (1, 2, 3))
+((i ** 2) for i in (1, 2, 3))
+((i ** 2) for i, _ in ((1, 'a'), (2, 'b'), (3, 'c')))
+(((i ** 2) + j) for i in (1, 2, 3) for j in (1, 2, 3))
+(*starred)
+a = (1,)
+b = 1,
+c = 1
+d = (1,) + a + (2,)
+
+
+def gen():
+    yield from outside_of_generator
+
+    a = (yield)
+
+
+async def f():
+    await some.complicated[0].call(with_args=(True or (1 is not 1)))
diff --git a/tests/function.py b/tests/function.py

new file mode 100644 (file)

index 0000000..85f7d40
--- /dev/null
+++ b/tests/function.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python3
+import asyncio
+import sys
+
+from third_party import X, Y, Z
+
+from library import some_connection, \
+                    some_decorator
+
+def func_no_args():
+  a; b; c
+  if True: raise RuntimeError
+  if False: ...
+  for i in range(10):
+    print(i)
+    continue
+  return None
+async def coroutine(arg):
+ "Single-line docstring. Multiline is harder to reformat."
+ async with some_connection() as conn:
+     await conn.do_what_i_mean('SELECT bobby, tables FROM xkcd', timeout=2)
+ await asyncio.sleep(1)
+@asyncio.coroutine
+@some_decorator(
+with_args=True,
+many_args=[1,2,3]
+)
+def function_signature_stress_test(number:int,no_annotation=None,text:str="default",* ,debug:bool=False,**kwargs) -> str:
+ return text[number:-1]
+
+def long_lines():
+    if True:
+        typedargslist.extend(
+            gen_annotated_params(ast_args.kwonlyargs, ast_args.kw_defaults, parameters, implicit_default=True)
+        )
+    _type_comment_re = re.compile(
+        r"""
+        ^
+        [\t ]*
+        \#[ ]type:[ ]*
+        (?P<type>
+            [^#\t\n]+?
+        )
+        (?<!ignore)     # note: this will force the non-greedy + in <type> to match
+                        # a trailing space which is why we need the silliness below
+        (?<!ignore[ ]{1})(?<!ignore[ ]{2})(?<!ignore[ ]{3})(?<!ignore[ ]{4})
+        (?<!ignore[ ]{5})(?<!ignore[ ]{6})(?<!ignore[ ]{7})(?<!ignore[ ]{8})
+        (?<!ignore[ ]{9})(?<!ignore[ ]{10})
+        [\t ]*
+        (?P<nl>
+            (?:\#[^\n]*)?
+            \n?
+        )
+        $
+        """, re.MULTILINE | re.VERBOSE
+    )
+
+# output
+
+
+#!/usr/bin/env python3
+import asyncio
+import sys
+
+from third_party import X, Y, Z
+
+from library import some_connection, some_decorator
+
+
+def func_no_args():
+    a
+    b
+    c
+    if True:
+        raise RuntimeError
+
+    if False:
+        ...
+    for i in range(10):
+        print(i)
+        continue
+
+    return None
+
+
+async def coroutine(arg):
+    "Single-line docstring. Multiline is harder to reformat."
+    async with some_connection() as conn:
+        await conn.do_what_i_mean('SELECT bobby, tables FROM xkcd', timeout=2)
+    await asyncio.sleep(1)
+
+
+@asyncio.coroutine
+@some_decorator(with_args=True, many_args=[1, 2, 3])
+def function_signature_stress_test(
+    number: int,
+    no_annotation=None,
+    text: str = "default",
+    *,
+    debug: bool = False,
+    **kwargs,
+) -> str:
+    return text[number:-1]
+
+
+def long_lines():
+    if True:
+        typedargslist.extend(
+            gen_annotated_params(
+                ast_args.kwonlyargs,
+                ast_args.kw_defaults,
+                parameters,
+                implicit_default=True,
+            )
+        )
+    _type_comment_re = re.compile(
+        r"""
+        ^
+        [\t ]*
+        \#[ ]type:[ ]*
+        (?P<type>
+            [^#\t\n]+?
+        )
+        (?<!ignore)     # note: this will force the non-greedy + in <type> to match
+                        # a trailing space which is why we need the silliness below
+        (?<!ignore[ ]{1})(?<!ignore[ ]{2})(?<!ignore[ ]{3})(?<!ignore[ ]{4})
+        (?<!ignore[ ]{5})(?<!ignore[ ]{6})(?<!ignore[ ]{7})(?<!ignore[ ]{8})
+        (?<!ignore[ ]{9})(?<!ignore[ ]{10})
+        [\t ]*
+        (?P<nl>
+            (?:\#[^\n]*)?
+            \n?
+        )
+        $
+        """,
+        re.MULTILINE | re.VERBOSE,
+    )
diff --git a/tests/import_spacing.py b/tests/import_spacing.py

new file mode 100644 (file)

index 0000000..0597b62
--- /dev/null
+++ b/tests/import_spacing.py
@@ -0,0 +1,77 @@
+"""The asyncio package, tracking PEP 3156."""
+
+# flake8: noqa
+
+import sys
+
+# This relies on each of the submodules having an __all__ variable.
+from .base_events import *
+from .coroutines import *
+from .events import *  # comment here
+
+from .futures import *
+from .locks import *  # comment here
+from .protocols import *
+
+from .runners import *  # comment here
+from .queues import *
+from .streams import *
+
+from .subprocess import *
+from .tasks import *
+from .transports import *
+
+__all__ = (
+    base_events.__all__ +
+    coroutines.__all__ +
+    events.__all__ +
+    futures.__all__ +
+    locks.__all__ +
+    protocols.__all__ +
+    runners.__all__ +
+    queues.__all__ +
+    streams.__all__ +
+    subprocess.__all__ +
+    tasks.__all__ +
+    transports.__all__
+)
+
+
+# output
+
+
+"""The asyncio package, tracking PEP 3156."""
+# flake8: noqa
+import sys
+
+# This relies on each of the submodules having an __all__ variable.
+from .base_events import *
+from .coroutines import *
+from .events import *  # comment here
+
+from .futures import *
+from .locks import *  # comment here
+from .protocols import *
+
+from .runners import *  # comment here
+from .queues import *
+from .streams import *
+
+from .subprocess import *
+from .tasks import *
+from .transports import *
+
+__all__ = (
+    base_events.__all__ +
+    coroutines.__all__ +
+    events.__all__ +
+    futures.__all__ +
+    locks.__all__ +
+    protocols.__all__ +
+    runners.__all__ +
+    queues.__all__ +
+    streams.__all__ +
+    subprocess.__all__ +
+    tasks.__all__ +
+    transports.__all__
+)
diff --git a/tests/test_black.py b/tests/test_black.py

new file mode 100644 (file)

index 0000000..d9c0c5e
--- /dev/null
+++ b/tests/test_black.py
@@ -0,0 +1,220 @@
+#!/usr/bin/env python3
+from functools import partial
+from pathlib import Path
+from typing import List, Tuple
+import unittest
+from unittest.mock import patch
+
+from click import unstyle
+
+import black
+
+ll = 88
+ff = partial(black.format_file, line_length=ll, fast=True)
+fs = partial(black.format_str, line_length=ll)
+THIS_FILE = Path(__file__)
+THIS_DIR = THIS_FILE.parent
+
+
+def dump_to_stderr(*output: str) -> str:
+    return '\n' + '\n'.join(output) + '\n'
+
+
+def read_data(name: str) -> Tuple[str, str]:
+    """read_data('test_name') -> 'input', 'output'"""
+    if not name.endswith('.py'):
+        name += '.py'
+    _input: List[str] = []
+    _output: List[str] = []
+    with open(THIS_DIR / name, 'r', encoding='utf8') as test:
+        lines = test.readlines()
+    result = _input
+    for line in lines:
+        if line.rstrip() == '# output':
+            result = _output
+            continue
+
+        result.append(line)
+    if _input and not _output:
+        # If there's no output marker, treat the entire file as already pre-formatted.
+        _output = _input[:]
+    return ''.join(_input).strip() + '\n', ''.join(_output).strip() + '\n'
+
+
+class BlackTestCase(unittest.TestCase):
+    maxDiff = None
+
+    def assertFormatEqual(self, expected: str, actual: str) -> None:
+        if actual != expected:
+            black.out('Expected tree:', fg='green')
+            try:
+                exp_node = black.lib2to3_parse(expected)
+                bdv = black.DebugVisitor()
+                list(bdv.visit(exp_node))
+            except Exception as ve:
+                black.err(str(ve))
+            black.out('Actual tree:', fg='red')
+            try:
+                exp_node = black.lib2to3_parse(actual)
+                bdv = black.DebugVisitor()
+                list(bdv.visit(exp_node))
+            except Exception as ve:
+                black.err(str(ve))
+        self.assertEqual(expected, actual)
+
+    @patch("black.dump_to_file", dump_to_stderr)
+    def test_self(self) -> None:
+        source, expected = read_data('test_black')
+        actual = fs(source)
+        self.assertFormatEqual(expected, actual)
+        black.assert_equivalent(source, actual)
+        black.assert_stable(source, actual, line_length=ll)
+        with self.assertRaises(black.NothingChanged):
+            ff(THIS_FILE)
+
+    @patch("black.dump_to_file", dump_to_stderr)
+    def test_black(self) -> None:
+        source, expected = read_data('../black')
+        actual = fs(source)
+        self.assertFormatEqual(expected, actual)
+        black.assert_equivalent(source, actual)
+        black.assert_stable(source, actual, line_length=ll)
+        with self.assertRaises(black.NothingChanged):
+            ff(THIS_FILE)
+
+    @patch("black.dump_to_file", dump_to_stderr)
+    def test_setup(self) -> None:
+        source, expected = read_data('../setup')
+        actual = fs(source)
+        self.assertFormatEqual(expected, actual)
+        black.assert_equivalent(source, actual)
+        black.assert_stable(source, actual, line_length=ll)
+        with self.assertRaises(black.NothingChanged):
+            ff(THIS_FILE)
+
+    @patch("black.dump_to_file", dump_to_stderr)
+    def test_function(self) -> None:
+        source, expected = read_data('function')
+        actual = fs(source)
+        self.assertFormatEqual(expected, actual)
+        black.assert_equivalent(source, actual)
+        black.assert_stable(source, actual, line_length=ll)
+
+    @patch("black.dump_to_file", dump_to_stderr)
+    def test_expression(self) -> None:
+        source, expected = read_data('expression')
+        actual = fs(source)
+        self.assertFormatEqual(expected, actual)
+        black.assert_equivalent(source, actual)
+        black.assert_stable(source, actual, line_length=ll)
+
+    @patch("black.dump_to_file", dump_to_stderr)
+    def test_comments(self) -> None:
+        source, expected = read_data('comments')
+        actual = fs(source)
+        self.assertFormatEqual(expected, actual)
+        black.assert_equivalent(source, actual)
+        black.assert_stable(source, actual, line_length=ll)
+
+    @patch("black.dump_to_file", dump_to_stderr)
+    def test_comments2(self) -> None:
+        source, expected = read_data('comments2')
+        actual = fs(source)
+        self.assertFormatEqual(expected, actual)
+        black.assert_equivalent(source, actual)
+        black.assert_stable(source, actual, line_length=ll)
+
+    @patch("black.dump_to_file", dump_to_stderr)
+    def test_cantfit(self) -> None:
+        source, expected = read_data('cantfit')
+        actual = fs(source)
+        self.assertFormatEqual(expected, actual)
+        black.assert_equivalent(source, actual)
+        black.assert_stable(source, actual, line_length=ll)
+
+    @patch("black.dump_to_file", dump_to_stderr)
+    def test_import_spacing(self) -> None:
+        source, expected = read_data('import_spacing')
+        actual = fs(source)
+        self.assertFormatEqual(expected, actual)
+        black.assert_equivalent(source, actual)
+        black.assert_stable(source, actual, line_length=ll)
+
+    @patch("black.dump_to_file", dump_to_stderr)
+    def test_composition(self) -> None:
+        source, expected = read_data('composition')
+        actual = fs(source)
+        self.assertFormatEqual(expected, actual)
+        black.assert_equivalent(source, actual)
+        black.assert_stable(source, actual, line_length=ll)
+
+    def test_report(self) -> None:
+        report = black.Report()
+        out_lines = []
+        err_lines = []
+
+        def out(msg: str, **kwargs):
+            out_lines.append(msg)
+
+        def err(msg: str, **kwargs):
+            err_lines.append(msg)
+
+        with patch("black.out", out), patch("black.err", err):
+            report.done(Path('f1'), changed=True)
+            self.assertEqual(len(out_lines), 1)
+            self.assertEqual(len(err_lines), 0)
+            self.assertEqual(out_lines[-1], 'reformatted f1')
+            self.assertEqual(unstyle(str(report)), '1 file reformatted.')
+            self.assertEqual(report.return_code, 0)
+            report.failed(Path('e1'), 'boom')
+            self.assertEqual(len(out_lines), 1)
+            self.assertEqual(len(err_lines), 1)
+            self.assertEqual(err_lines[-1], 'error: cannot format e1: boom')
+            self.assertEqual(
+                unstyle(str(report)), '1 file reformatted, 1 file failed to reformat.'
+            )
+            self.assertEqual(report.return_code, 1)
+            report.done(Path('f2'), changed=False)
+            self.assertEqual(len(out_lines), 2)
+            self.assertEqual(len(err_lines), 1)
+            self.assertEqual(out_lines[-1], 'f2 already well formatted, good job.')
+            self.assertEqual(
+                unstyle(str(report)),
+                '1 file reformatted, 1 file left unchanged, '
+                '1 file failed to reformat.',
+            )
+            self.assertEqual(report.return_code, 1)
+            report.done(Path('f3'), changed=True)
+            self.assertEqual(len(out_lines), 3)
+            self.assertEqual(len(err_lines), 1)
+            self.assertEqual(out_lines[-1], 'reformatted f3')
+            self.assertEqual(
+                unstyle(str(report)),
+                '2 files reformatted, 1 file left unchanged, '
+                '1 file failed to reformat.',
+            )
+            self.assertEqual(report.return_code, 1)
+            report.failed(Path('e2'), 'boom')
+            self.assertEqual(len(out_lines), 3)
+            self.assertEqual(len(err_lines), 2)
+            self.assertEqual(err_lines[-1], 'error: cannot format e2: boom')
+            self.assertEqual(
+                unstyle(str(report)),
+                '2 files reformatted, 1 file left unchanged, '
+                '2 files failed to reformat.',
+            )
+            self.assertEqual(report.return_code, 1)
+            report.done(Path('f4'), changed=False)
+            self.assertEqual(len(out_lines), 4)
+            self.assertEqual(len(err_lines), 2)
+            self.assertEqual(out_lines[-1], 'f4 already well formatted, good job.')
+            self.assertEqual(
+                unstyle(str(report)),
+                '2 files reformatted, 2 files left unchanged, '
+                '2 files failed to reformat.',
+            )
+            self.assertEqual(report.return_code, 1)
+
+
+if __name__ == '__main__':
+    unittest.main()
author	Łukasz Langa <lukasz@langa.pl>
	Wed, 14 Mar 2018 19:55:32 +0000 (12:55 -0700)
committer	Łukasz Langa <lukasz@langa.pl>
	Wed, 14 Mar 2018 19:55:32 +0000 (12:55 -0700)
.flake8	[new file with mode: 0644]	patch \| blob
.gitignore	[new file with mode: 0644]	patch \| blob
.travis.yml	[new file with mode: 0644]	patch \| blob
LICENSE	[new file with mode: 0644]	patch \| blob
MANIFEST.in	[new file with mode: 0644]	patch \| blob
Pipfile	[new file with mode: 0644]	patch \| blob
Pipfile.lock	[new file with mode: 0644]	patch \| blob
README.md	[new file with mode: 0644]	patch \| blob
black.py	[new file with mode: 0644]	patch \| blob
blib2to3/Grammar.txt	[new file with mode: 0644]	patch \| blob
blib2to3/Grammar3.6.4.final.0.pickle	[new file with mode: 0644]	patch \| blob
blib2to3/PatternGrammar.txt	[new file with mode: 0644]	patch \| blob
blib2to3/PatternGrammar3.6.4.final.0.pickle	[new file with mode: 0644]	patch \| blob
blib2to3/README	[new file with mode: 0644]	patch \| blob
blib2to3/__init__.py	[new file with mode: 0644]	patch \| blob
blib2to3/__init__.pyi	[new file with mode: 0644]	patch \| blob
blib2to3/pgen2/__init__.py	[new file with mode: 0644]	patch \| blob
blib2to3/pgen2/__init__.pyi	[new file with mode: 0644]	patch \| blob
blib2to3/pgen2/conv.py	[new file with mode: 0644]	patch \| blob
blib2to3/pgen2/driver.py	[new file with mode: 0644]	patch \| blob
blib2to3/pgen2/driver.pyi	[new file with mode: 0644]	patch \| blob
blib2to3/pgen2/grammar.py	[new file with mode: 0644]	patch \| blob
blib2to3/pgen2/grammar.pyi	[new file with mode: 0644]	patch \| blob
blib2to3/pgen2/literals.py	[new file with mode: 0644]	patch \| blob
blib2to3/pgen2/literals.pyi	[new file with mode: 0644]	patch \| blob
blib2to3/pgen2/parse.py	[new file with mode: 0644]	patch \| blob
blib2to3/pgen2/parse.pyi	[new file with mode: 0644]	patch \| blob
blib2to3/pgen2/pgen.py	[new file with mode: 0644]	patch \| blob
blib2to3/pgen2/pgen.pyi	[new file with mode: 0644]	patch \| blob
blib2to3/pgen2/token.py	[new file with mode: 0755]	patch \| blob
blib2to3/pgen2/token.pyi	[new file with mode: 0644]	patch \| blob
blib2to3/pgen2/tokenize.py	[new file with mode: 0644]	patch \| blob
blib2to3/pgen2/tokenize.pyi	[new file with mode: 0644]	patch \| blob
blib2to3/pygram.py	[new file with mode: 0644]	patch \| blob
blib2to3/pygram.pyi	[new file with mode: 0644]	patch \| blob
blib2to3/pytree.py	[new file with mode: 0644]	patch \| blob
blib2to3/pytree.pyi	[new file with mode: 0644]	patch \| blob
mypy.ini	[new file with mode: 0644]	patch \| blob
setup.py	[new file with mode: 0644]	patch \| blob
tests/.flake8	[new file with mode: 0644]	patch \| blob
tests/cantfit.py	[new file with mode: 0644]	patch \| blob
tests/comments.py	[new file with mode: 0644]	patch \| blob
tests/comments2.py	[new file with mode: 0644]	patch \| blob
tests/composition.py	[new file with mode: 0644]	patch \| blob
tests/expression.py	[new file with mode: 0644]	patch \| blob
tests/function.py	[new file with mode: 0644]	patch \| blob
tests/import_spacing.py	[new file with mode: 0644]	patch \| blob
tests/test_black.py	[new file with mode: 0644]	patch \| blob