]> git.madduck.net Git - etc/vim.git/blobdiff - black.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Update pypi.python.org URL to pypi.org (#488)
[etc/vim.git] / black.py
index 1d2f1a304ee768bac626d50e8fef60721e868b34..3a51f21f4cd16b22e63a76918e4e46709f680316 100644 (file)
--- a/black.py
+++ b/black.py
@@ -20,6 +20,7 @@ from typing import (
     Callable,
     Collection,
     Dict,
+    Generator,
     Generic,
     Iterable,
     Iterator,
@@ -29,7 +30,6 @@ from typing import (
     Sequence,
     Set,
     Tuple,
-    Type,
     TypeVar,
     Union,
     cast,
@@ -47,7 +47,7 @@ from blib2to3.pgen2 import driver, token
 from blib2to3.pgen2.parse import ParseError
 
 
-__version__ = "18.6b1"
+__version__ = "18.6b4"
 DEFAULT_LINE_LENGTH = 88
 DEFAULT_EXCLUDES = (
     r"/(\.git|\.hg|\.mypy_cache|\.tox|\.venv|_build|buck-out|build|dist)/"
@@ -90,43 +90,16 @@ class CannotSplit(Exception):
     """
 
 
-class FormatError(Exception):
-    """Base exception for `# fmt: on` and `# fmt: off` handling.
-
-    It holds the number of bytes of the prefix consumed before the format
-    control comment appeared.
-    """
-
-    def __init__(self, consumed: int) -> None:
-        super().__init__(consumed)
-        self.consumed = consumed
-
-    def trim_prefix(self, leaf: Leaf) -> None:
-        leaf.prefix = leaf.prefix[self.consumed :]
-
-    def leaf_from_consumed(self, leaf: Leaf) -> Leaf:
-        """Returns a new Leaf from the consumed part of the prefix."""
-        unformatted_prefix = leaf.prefix[: self.consumed]
-        return Leaf(token.NEWLINE, unformatted_prefix)
-
-
-class FormatOn(FormatError):
-    """Found a comment like `# fmt: on` in the file."""
-
-
-class FormatOff(FormatError):
-    """Found a comment like `# fmt: off` in the file."""
-
-
 class WriteBack(Enum):
     NO = 0
     YES = 1
     DIFF = 2
+    CHECK = 3
 
     @classmethod
     def from_configuration(cls, *, check: bool, diff: bool) -> "WriteBack":
         if check and not diff:
-            return cls.NO
+            return cls.CHECK
 
         return cls.DIFF if diff else cls.YES
 
@@ -191,13 +164,13 @@ def read_pyproject_toml(
     return value
 
 
-@click.command()
+@click.command(context_settings=dict(help_option_names=["-h", "--help"]))
 @click.option(
     "-l",
     "--line-length",
     type=int,
     default=DEFAULT_LINE_LENGTH,
-    help="How many character per line to allow.",
+    help="How many characters per line to allow.",
     show_default=True,
 )
 @click.option(
@@ -426,7 +399,9 @@ def reformat_one(
                 mode=mode,
             ):
                 changed = Changed.YES
-            if write_back == WriteBack.YES and changed is not Changed.NO:
+            if (write_back is WriteBack.YES and changed is not Changed.CACHED) or (
+                write_back is WriteBack.CHECK and changed is Changed.NO
+            ):
                 write_cache(cache, [src], line_length, mode)
         report.done(src, changed)
     except Exception as exc:
@@ -456,50 +431,58 @@ async def schedule_formatting(
         sources, cached = filter_cached(cache, sources)
         for src in sorted(cached):
             report.done(src, Changed.CACHED)
+    if not sources:
+        return
+
     cancelled = []
-    formatted = []
-    if sources:
-        lock = None
-        if write_back == WriteBack.DIFF:
-            # For diff output, we need locks to ensure we don't interleave output
-            # from different processes.
-            manager = Manager()
-            lock = manager.Lock()
-        tasks = {
-            loop.run_in_executor(
-                executor,
-                format_file_in_place,
-                src,
-                line_length,
-                fast,
-                write_back,
-                mode,
-                lock,
-            ): src
-            for src in sorted(sources)
-        }
-        pending: Iterable[asyncio.Task] = tasks.keys()
-        try:
-            loop.add_signal_handler(signal.SIGINT, cancel, pending)
-            loop.add_signal_handler(signal.SIGTERM, cancel, pending)
-        except NotImplementedError:
-            # There are no good alternatives for these on Windows
-            pass
-        while pending:
-            done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
-            for task in done:
-                src = tasks.pop(task)
-                if task.cancelled():
-                    cancelled.append(task)
-                elif task.exception():
-                    report.failed(src, str(task.exception()))
-                else:
-                    formatted.append(src)
-                    report.done(src, Changed.YES if task.result() else Changed.NO)
+    sources_to_cache = []
+    lock = None
+    if write_back == WriteBack.DIFF:
+        # For diff output, we need locks to ensure we don't interleave output
+        # from different processes.
+        manager = Manager()
+        lock = manager.Lock()
+    tasks = {
+        loop.run_in_executor(
+            executor,
+            format_file_in_place,
+            src,
+            line_length,
+            fast,
+            write_back,
+            mode,
+            lock,
+        ): src
+        for src in sorted(sources)
+    }
+    pending: Iterable[asyncio.Task] = tasks.keys()
+    try:
+        loop.add_signal_handler(signal.SIGINT, cancel, pending)
+        loop.add_signal_handler(signal.SIGTERM, cancel, pending)
+    except NotImplementedError:
+        # There are no good alternatives for these on Windows.
+        pass
+    while pending:
+        done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
+        for task in done:
+            src = tasks.pop(task)
+            if task.cancelled():
+                cancelled.append(task)
+            elif task.exception():
+                report.failed(src, str(task.exception()))
+            else:
+                changed = Changed.YES if task.result() else Changed.NO
+                # If the file was written back or was successfully checked as
+                # well-formatted, store this information in the cache.
+                if write_back is WriteBack.YES or (
+                    write_back is WriteBack.CHECK and changed is Changed.NO
+                ):
+                    sources_to_cache.append(src)
+                report.done(src, changed)
     if cancelled:
         await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
-    if write_back == WriteBack.YES and formatted:
-        write_cache(cache, formatted, line_length, mode)
+    if sources_to_cache:
+        write_cache(cache, sources_to_cache, line_length, mode)
 
 
 def format_file_in_place(
@@ -512,7 +495,8 @@ def format_file_in_place(
 ) -> bool:
     """Format file under `src` path. Return True if changed.
 
-    If `write_back` is True, write reformatted code back to stdout.
+    If `write_back` is DIFF, write a diff to stdout. If it is YES, write reformatted
+    code to the file.
     `line_length` and `fast` options are passed to :func:`format_file_contents`.
     """
     if src.suffix == ".pyi":
@@ -561,7 +545,8 @@ def format_stdin_to_stdout(
 ) -> bool:
     """Format file on stdin. Return True if changed.
 
-    If `write_back` is True, write reformatted code back to stdout.
+    If `write_back` is YES, write reformatted code back to stdout. If it is DIFF,
+    write a diff to stdout.
     `line_length`, `fast`, `is_pyi`, and `force_py36` arguments are passed to
     :func:`format_file_contents`.
     """
@@ -628,10 +613,12 @@ def format_str(
     is_pyi = bool(mode & FileMode.PYI)
     py36 = bool(mode & FileMode.PYTHON36) or is_python36(src_node)
     normalize_strings = not bool(mode & FileMode.NO_STRING_NORMALIZATION)
+    normalize_fmt_off(src_node)
     lines = LineGenerator(
         remove_u_prefix=py36 or "unicode_literals" in future_imports,
         is_pyi=is_pyi,
         normalize_strings=normalize_strings,
+        allow_underscores=py36,
     )
     elt = EmptyLineTracker(is_pyi=is_pyi)
     empty_line = Line()
@@ -758,13 +745,15 @@ class DebugVisitor(Visitor[T]):
             out(f" {node.value!r}", fg="blue", bold=False)
 
     @classmethod
-    def show(cls, code: str) -> None:
+    def show(cls, code: Union[str, Leaf, Node]) -> None:
         """Pretty-print the lib2to3 AST of a given string of `code`.
 
         Convenience method for debugging.
         """
         v: DebugVisitor[None] = DebugVisitor()
-        list(v.visit(lib2to3_parse(code)))
+        if isinstance(code, str):
+            code = lib2to3_parse(code)
+        list(v.visit(code))
 
 
 KEYWORDS = set(keyword.kwlist)
@@ -781,6 +770,7 @@ STATEMENT = {
     syms.classdef,
 }
 STANDALONE_COMMENT = 153
+token.tok_name[STANDALONE_COMMENT] = "STANDALONE_COMMENT"
 LOGIC_OPERATORS = {"and", "or"}
 COMPARATORS = {
     token.LESS,
@@ -887,8 +877,8 @@ class BracketTracker:
     bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
     delimiters: Dict[LeafID, Priority] = Factory(dict)
     previous: Optional[Leaf] = None
-    _for_loop_variable: int = 0
-    _lambda_arguments: int = 0
+    _for_loop_depths: List[int] = Factory(list)
+    _lambda_argument_depths: List[int] = Factory(list)
 
     def mark(self, leaf: Leaf) -> None:
         """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
@@ -961,16 +951,21 @@ class BracketTracker:
         """
         if leaf.type == token.NAME and leaf.value == "for":
             self.depth += 1
-            self._for_loop_variable += 1
+            self._for_loop_depths.append(self.depth)
             return True
 
         return False
 
     def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
         """See `maybe_increment_for_loop_variable` above for explanation."""
-        if self._for_loop_variable and leaf.type == token.NAME and leaf.value == "in":
+        if (
+            self._for_loop_depths
+            and self._for_loop_depths[-1] == self.depth
+            and leaf.type == token.NAME
+            and leaf.value == "in"
+        ):
             self.depth -= 1
-            self._for_loop_variable -= 1
+            self._for_loop_depths.pop()
             return True
 
         return False
@@ -983,16 +978,20 @@ class BracketTracker:
         """
         if leaf.type == token.NAME and leaf.value == "lambda":
             self.depth += 1
-            self._lambda_arguments += 1
+            self._lambda_argument_depths.append(self.depth)
             return True
 
         return False
 
     def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
         """See `maybe_increment_lambda_arguments` above for explanation."""
-        if self._lambda_arguments and leaf.type == token.COLON:
+        if (
+            self._lambda_argument_depths
+            and self._lambda_argument_depths[-1] == self.depth
+            and leaf.type == token.COLON
+        ):
             self.depth -= 1
-            self._lambda_arguments -= 1
+            self._lambda_argument_depths.pop()
             return True
 
         return False
@@ -1174,7 +1173,7 @@ class Line:
             self.remove_trailing_comma()
             return True
 
-        # Otheriwsse, if the trailing one is the only one, we might mistakenly
+        # Otherwise, if the trailing one is the only one, we might mistakenly
         # change a tuple into a different type by removing the comma.
         depth = closing.bracket_depth + 1
         commas = 0
@@ -1230,6 +1229,9 @@ class Line:
 
         Provide a non-negative leaf `_index` to speed up the function.
         """
+        if not self.comments:
+            return
+
         if _index == -1:
             for _index, _leaf in enumerate(self.leaves):
                 if leaf is _leaf:
@@ -1253,18 +1255,18 @@ class Line:
 
     def is_complex_subscript(self, leaf: Leaf) -> bool:
         """Return True iff `leaf` is part of a slice with non-trivial exprs."""
-        open_lsqb = (
-            leaf if leaf.type == token.LSQB else self.bracket_tracker.get_open_lsqb()
-        )
+        open_lsqb = self.bracket_tracker.get_open_lsqb()
         if open_lsqb is None:
             return False
 
         subscript_start = open_lsqb.next_sibling
-        if (
-            isinstance(subscript_start, Node)
-            and subscript_start.type == syms.subscriptlist
-        ):
-            subscript_start = child_towards(subscript_start, leaf)
+
+        if isinstance(subscript_start, Node):
+            if subscript_start.type == syms.listmaker:
+                return False
+
+            if subscript_start.type == syms.subscriptlist:
+                subscript_start = child_towards(subscript_start, leaf)
         return subscript_start is not None and any(
             n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
         )
@@ -1289,55 +1291,6 @@ class Line:
         return bool(self.leaves or self.comments)
 
 
-class UnformattedLines(Line):
-    """Just like :class:`Line` but stores lines which aren't reformatted."""
-
-    def append(self, leaf: Leaf, preformatted: bool = True) -> None:
-        """Just add a new `leaf` to the end of the lines.
-
-        The `preformatted` argument is ignored.
-
-        Keeps track of indentation `depth`, which is useful when the user
-        says `# fmt: on`. Otherwise, doesn't do anything with the `leaf`.
-        """
-        try:
-            list(generate_comments(leaf))
-        except FormatOn as f_on:
-            self.leaves.append(f_on.leaf_from_consumed(leaf))
-            raise
-
-        self.leaves.append(leaf)
-        if leaf.type == token.INDENT:
-            self.depth += 1
-        elif leaf.type == token.DEDENT:
-            self.depth -= 1
-
-    def __str__(self) -> str:
-        """Render unformatted lines from leaves which were added with `append()`.
-
-        `depth` is not used for indentation in this case.
-        """
-        if not self:
-            return "\n"
-
-        res = ""
-        for leaf in self.leaves:
-            res += str(leaf)
-        return res
-
-    def append_comment(self, comment: Leaf) -> bool:
-        """Not implemented in this class. Raises `NotImplementedError`."""
-        raise NotImplementedError("Unformatted lines don't store comments separately.")
-
-    def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
-        """Does nothing and returns False."""
-        return False
-
-    def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
-        """Does nothing and returns False."""
-        return False
-
-
 @dataclass
 class EmptyLineTracker:
     """Provides a stateful method that returns the number of potential extra
@@ -1359,9 +1312,6 @@ class EmptyLineTracker:
         This is for separating `def`, `async def` and `class` with extra empty
         lines (two on module-level).
         """
-        if isinstance(current_line, UnformattedLines):
-            return 0, 0
-
         before, after = self._maybe_empty_lines(current_line)
         before -= self.previous_after
         self.previous_after = after
@@ -1387,44 +1337,8 @@ class EmptyLineTracker:
                 before = 0 if depth else 1
             else:
                 before = 1 if depth else 2
-        is_decorator = current_line.is_decorator
-        if is_decorator or current_line.is_def or current_line.is_class:
-            if not is_decorator:
-                self.previous_defs.append(depth)
-            if self.previous_line is None:
-                # Don't insert empty lines before the first line in the file.
-                return 0, 0
-
-            if self.previous_line.is_decorator:
-                return 0, 0
-
-            if self.previous_line.depth < current_line.depth and (
-                self.previous_line.is_class or self.previous_line.is_def
-            ):
-                return 0, 0
-
-            if (
-                self.previous_line.is_comment
-                and self.previous_line.depth == current_line.depth
-                and before == 0
-            ):
-                return 0, 0
-
-            if self.is_pyi:
-                if self.previous_line.depth > current_line.depth:
-                    newlines = 1
-                elif current_line.is_class or self.previous_line.is_class:
-                    if current_line.is_stub_class and self.previous_line.is_stub_class:
-                        newlines = 0
-                    else:
-                        newlines = 1
-                else:
-                    newlines = 0
-            else:
-                newlines = 2
-            if current_line.depth and newlines:
-                newlines -= 1
-            return newlines, 0
+        if current_line.is_decorator or current_line.is_def or current_line.is_class:
+            return self._maybe_empty_lines_for_class_or_def(current_line, before)
 
         if (
             self.previous_line
@@ -1443,6 +1357,50 @@ class EmptyLineTracker:
 
         return before, 0
 
+    def _maybe_empty_lines_for_class_or_def(
+        self, current_line: Line, before: int
+    ) -> Tuple[int, int]:
+        if not current_line.is_decorator:
+            self.previous_defs.append(current_line.depth)
+        if self.previous_line is None:
+            # Don't insert empty lines before the first line in the file.
+            return 0, 0
+
+        if self.previous_line.is_decorator:
+            return 0, 0
+
+        if self.previous_line.depth < current_line.depth and (
+            self.previous_line.is_class or self.previous_line.is_def
+        ):
+            return 0, 0
+
+        if (
+            self.previous_line.is_comment
+            and self.previous_line.depth == current_line.depth
+            and before == 0
+        ):
+            return 0, 0
+
+        if self.is_pyi:
+            if self.previous_line.depth > current_line.depth:
+                newlines = 1
+            elif current_line.is_class or self.previous_line.is_class:
+                if current_line.is_stub_class and self.previous_line.is_stub_class:
+                    # No blank line between classes with an empty body
+                    newlines = 0
+                else:
+                    newlines = 1
+            elif current_line.is_def and not self.previous_line.is_def:
+                # Blank line between a block of functions and a block of non-functions
+                newlines = 1
+            else:
+                newlines = 0
+        else:
+            newlines = 2
+        if current_line.depth and newlines:
+            newlines -= 1
+        return newlines, 0
+
 
 @dataclass
 class LineGenerator(Visitor[Line]):
@@ -1456,8 +1414,9 @@ class LineGenerator(Visitor[Line]):
     normalize_strings: bool = True
     current_line: Line = Factory(Line)
     remove_u_prefix: bool = False
+    allow_underscores: bool = False
 
-    def line(self, indent: int = 0, type: Type[Line] = Line) -> Iterator[Line]:
+    def line(self, indent: int = 0) -> Iterator[Line]:
         """Generate a line.
 
         If the line is empty, only emit if it makes sense.
@@ -1466,67 +1425,41 @@ class LineGenerator(Visitor[Line]):
         If any lines were generated, set up a new current_line.
         """
         if not self.current_line:
-            if self.current_line.__class__ == type:
-                self.current_line.depth += indent
-            else:
-                self.current_line = type(depth=self.current_line.depth + indent)
+            self.current_line.depth += indent
             return  # Line is empty, don't emit. Creating a new one unnecessary.
 
         complete_line = self.current_line
-        self.current_line = type(depth=complete_line.depth + indent)
+        self.current_line = Line(depth=complete_line.depth + indent)
         yield complete_line
 
-    def visit(self, node: LN) -> Iterator[Line]:
-        """Main method to visit `node` and its children.
-
-        Yields :class:`Line` objects.
-        """
-        if isinstance(self.current_line, UnformattedLines):
-            # File contained `# fmt: off`
-            yield from self.visit_unformatted(node)
-
-        else:
-            yield from super().visit(node)
-
     def visit_default(self, node: LN) -> Iterator[Line]:
         """Default `visit_*()` implementation. Recurses to children of `node`."""
         if isinstance(node, Leaf):
             any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
-            try:
-                for comment in generate_comments(node):
-                    if any_open_brackets:
-                        # any comment within brackets is subject to splitting
-                        self.current_line.append(comment)
-                    elif comment.type == token.COMMENT:
-                        # regular trailing comment
-                        self.current_line.append(comment)
-                        yield from self.line()
-
-                    else:
-                        # regular standalone comment
-                        yield from self.line()
-
-                        self.current_line.append(comment)
-                        yield from self.line()
-
-            except FormatOff as f_off:
-                f_off.trim_prefix(node)
-                yield from self.line(type=UnformattedLines)
-                yield from self.visit(node)
-
-            except FormatOn as f_on:
-                # This only happens here if somebody says "fmt: on" multiple
-                # times in a row.
-                f_on.trim_prefix(node)
-                yield from self.visit_default(node)
+            for comment in generate_comments(node):
+                if any_open_brackets:
+                    # any comment within brackets is subject to splitting
+                    self.current_line.append(comment)
+                elif comment.type == token.COMMENT:
+                    # regular trailing comment
+                    self.current_line.append(comment)
+                    yield from self.line()
 
-            else:
-                normalize_prefix(node, inside_brackets=any_open_brackets)
-                if self.normalize_strings and node.type == token.STRING:
-                    normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix)
-                    normalize_string_quotes(node)
-                if node.type not in WHITESPACE:
-                    self.current_line.append(node)
+                else:
+                    # regular standalone comment
+                    yield from self.line()
+
+                    self.current_line.append(comment)
+                    yield from self.line()
+
+            normalize_prefix(node, inside_brackets=any_open_brackets)
+            if self.normalize_strings and node.type == token.STRING:
+                normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix)
+                normalize_string_quotes(node)
+            if node.type == token.NUMBER:
+                normalize_numeric_literal(node, self.allow_underscores)
+            if node.type not in WHITESPACE:
+                self.current_line.append(node)
         yield from super().visit_default(node)
 
     def visit_INDENT(self, node: Node) -> Iterator[Line]:
@@ -1623,23 +1556,10 @@ class LineGenerator(Visitor[Line]):
         yield from self.visit_default(leaf)
         yield from self.line()
 
-    def visit_unformatted(self, node: LN) -> Iterator[Line]:
-        """Used when file contained a `# fmt: off`."""
-        if isinstance(node, Node):
-            for child in node.children:
-                yield from self.visit(child)
-
-        else:
-            try:
-                self.current_line.append(node)
-            except FormatOn as f_on:
-                f_on.trim_prefix(node)
-                yield from self.line()
-                yield from self.visit(node)
-
-            if node.type == token.ENDMARKER:
-                # somebody decided not to put a final `# fmt: on`
-                yield from self.line()
+    def visit_STANDALONE_COMMENT(self, leaf: Leaf) -> Iterator[Line]:
+        if not self.current_line.bracket_tracker.any_open_brackets():
+            yield from self.line()
+        yield from self.visit_default(leaf)
 
     def __attrs_post_init__(self) -> None:
         """You are in a twisty little maze of passages."""
@@ -1929,6 +1849,31 @@ def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
     return node
 
 
+def container_of(leaf: Leaf) -> LN:
+    """Return `leaf` or one of its ancestors that is the topmost container of it.
+
+    By "container" we mean a node where `leaf` is the very first child.
+    """
+    same_prefix = leaf.prefix
+    container: LN = leaf
+    while container:
+        parent = container.parent
+        if parent is None:
+            break
+
+        if parent.children[0].prefix != same_prefix:
+            break
+
+        if parent.type == syms.file_input:
+            break
+
+        if parent.prev_sibling is not None and parent.prev_sibling.type in BRACKETS:
+            break
+
+        container = parent
+    return container
+
+
 def is_split_after_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
     """Return the priority of the `leaf` delimiter, given a line break after it.
 
@@ -2034,6 +1979,10 @@ def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
     return 0
 
 
+FMT_OFF = {"# fmt: off", "# fmt:off", "# yapf: disable"}
+FMT_ON = {"# fmt: on", "# fmt:on", "# yapf: enable"}
+
+
 def generate_comments(leaf: LN) -> Iterator[Leaf]:
     """Clean the prefix of the `leaf` and generate comments from it, if any.
 
@@ -2053,16 +2002,27 @@ def generate_comments(leaf: LN) -> Iterator[Leaf]:
     Inline comments are emitted as regular token.COMMENT leaves.  Standalone
     are emitted with a fake STANDALONE_COMMENT token identifier.
     """
-    p = leaf.prefix
-    if not p:
-        return
+    for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER):
+        yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
 
-    if "#" not in p:
-        return
+
+@dataclass
+class ProtoComment:
+    type: int  # token.COMMENT or STANDALONE_COMMENT
+    value: str  # content of the comment
+    newlines: int  # how many newlines before the comment
+    consumed: int  # how many characters of the original leaf's prefix did we consume
+
+
+@lru_cache(maxsize=4096)
+def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]:
+    result: List[ProtoComment] = []
+    if not prefix or "#" not in prefix:
+        return result
 
     consumed = 0
     nlines = 0
-    for index, line in enumerate(p.split("\n")):
+    for index, line in enumerate(prefix.split("\n")):
         consumed += len(line) + 1  # adding the length of the split '\n'
         line = line.lstrip()
         if not line:
@@ -2070,25 +2030,18 @@ def generate_comments(leaf: LN) -> Iterator[Leaf]:
         if not line.startswith("#"):
             continue
 
-        if index == 0 and leaf.type != token.ENDMARKER:
+        if index == 0 and not is_endmarker:
             comment_type = token.COMMENT  # simple trailing comment
         else:
             comment_type = STANDALONE_COMMENT
         comment = make_comment(line)
-        yield Leaf(comment_type, comment, prefix="\n" * nlines)
-
-        if comment in {"# fmt: on", "# yapf: enable"}:
-            raise FormatOn(consumed)
-
-        if comment in {"# fmt: off", "# yapf: disable"}:
-            if comment_type == STANDALONE_COMMENT:
-                raise FormatOff(consumed)
-
-            prev = preceding_leaf(leaf)
-            if not prev or prev.type in WHITESPACE:  # standalone comment in disguise
-                raise FormatOff(consumed)
-
+        result.append(
+            ProtoComment(
+                type=comment_type, value=comment, newlines=nlines, consumed=consumed
+            )
+        )
         nlines = 0
+    return result
 
 
 def make_comment(content: str) -> str:
@@ -2123,7 +2076,7 @@ def split_line(
     If `py36` is True, splitting may generate syntax that is only compatible
     with Python 3.6 and later.
     """
-    if isinstance(line, UnformattedLines) or line.is_comment:
+    if line.is_comment:
         yield line
         return
 
@@ -2526,8 +2479,8 @@ def normalize_string_quotes(leaf: Leaf) -> None:
 
     prefix = leaf.value[:first_quote_pos]
     unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
-    escaped_new_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{new_quote}")
-    escaped_orig_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{orig_quote}")
+    escaped_new_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
+    escaped_orig_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
     body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
     if "r" in prefix.casefold():
         if unescaped_new_quote.search(body):
@@ -2538,14 +2491,20 @@ def normalize_string_quotes(leaf: Leaf) -> None:
         # Do not introduce or remove backslashes in raw strings
         new_body = body
     else:
-        # remove unnecessary quotes
+        # remove unnecessary escapes
         new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
         if body != new_body:
-            # Consider the string without unnecessary quotes as the original
+            # Consider the string without unnecessary escapes as the original
             body = new_body
             leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
         new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
         new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
+    if "f" in prefix.casefold():
+        matches = re.findall(r"[^{]\{(.*?)\}[^}]", new_body)
+        for m in matches:
+            if "\\" in str(m):
+                # Do not introduce backslashes in interpolated expressions
+                return
     if new_quote == '"""' and new_body[-1:] == '"':
         # edge case:
         new_body = new_body[:-1] + '\\"'
@@ -2560,6 +2519,79 @@ def normalize_string_quotes(leaf: Leaf) -> None:
     leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
 
 
+def normalize_numeric_literal(leaf: Leaf, allow_underscores: bool) -> None:
+    """Normalizes numeric (float, int, and complex) literals.
+
+    All letters used in the representation are normalized to lowercase (except
+    in Python 2 long literals), and long number literals are split using underscores.
+    """
+    text = leaf.value.lower()
+    if text.startswith(("0o", "0x", "0b")):
+        # Leave octal, hex, and binary literals alone.
+        pass
+    elif "e" in text:
+        before, after = text.split("e")
+        sign = ""
+        if after.startswith("-"):
+            after = after[1:]
+            sign = "-"
+        elif after.startswith("+"):
+            after = after[1:]
+        before = format_float_or_int_string(before, allow_underscores)
+        after = format_int_string(after, allow_underscores)
+        text = f"{before}e{sign}{after}"
+    elif text.endswith(("j", "l")):
+        number = text[:-1]
+        suffix = text[-1]
+        # Capitalize in "2L" because "l" looks too similar to "1".
+        if suffix == "l":
+            suffix = "L"
+        text = f"{format_float_or_int_string(number, allow_underscores)}{suffix}"
+    else:
+        text = format_float_or_int_string(text, allow_underscores)
+    leaf.value = text
+
+
+def format_float_or_int_string(text: str, allow_underscores: bool) -> str:
+    """Formats a float string like "1.0"."""
+    if "." not in text:
+        return format_int_string(text, allow_underscores)
+
+    before, after = text.split(".")
+    before = format_int_string(before, allow_underscores) if before else "0"
+    if after:
+        after = format_int_string(after, allow_underscores, count_from_end=False)
+    else:
+        after = "0"
+    return f"{before}.{after}"
+
+
+def format_int_string(
+    text: str, allow_underscores: bool, count_from_end: bool = True
+) -> str:
+    """Normalizes underscores in a string to e.g. 1_000_000.
+
+    Input must be a string of digits and optional underscores.
+    If count_from_end is False, we add underscores after groups of three digits
+    counting from the beginning instead of the end of the strings. This is used
+    for the fractional part of float literals.
+    """
+    if not allow_underscores:
+        return text
+
+    text = text.replace("_", "")
+    if len(text) <= 6:
+        # No underscores for numbers <= 6 digits long.
+        return text
+
+    if count_from_end:
+        # Avoid removing leading zeros, which are important if we're formatting
+        # part of a number like "0.001".
+        return format(int("1" + text), "3_")[1:].lstrip("_")
+    else:
+        return "_".join(text[i : i + 3] for i in range(0, len(text), 3))
+
+
 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
     """Make existing optional parentheses invisible or create new ones.
 
@@ -2569,16 +2601,20 @@ def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
     Standardizes on visible parentheses for single-element tuples, and keeps
     existing visible parentheses for other tuples and generator expressions.
     """
-    try:
-        list(generate_comments(node))
-    except FormatOff:
-        return  # This `node` has a prefix with `# fmt: off`, don't mess with parens.
+    for pc in list_comments(node.prefix, is_endmarker=False):
+        if pc.value in FMT_OFF:
+            # This `node` has a prefix with `# fmt: off`, don't mess with parens.
+            return
 
     check_lpar = False
     for index, child in enumerate(list(node.children)):
         if check_lpar:
             if child.type == syms.atom:
-                maybe_make_parens_invisible_in_atom(child)
+                if maybe_make_parens_invisible_in_atom(child):
+                    lpar = Leaf(token.LPAR, "")
+                    rpar = Leaf(token.RPAR, "")
+                    index = child.remove() or 0
+                    node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
             elif is_one_tuple(child):
                 # wrap child in visible parentheses
                 lpar = Leaf(token.LPAR, "(")
@@ -2608,8 +2644,89 @@ def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
         check_lpar = isinstance(child, Leaf) and child.value in parens_after
 
 
+def normalize_fmt_off(node: Node) -> None:
+    """Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
+    try_again = True
+    while try_again:
+        try_again = convert_one_fmt_off_pair(node)
+
+
+def convert_one_fmt_off_pair(node: Node) -> bool:
+    """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
+
+    Returns True if a pair was converted.
+    """
+    for leaf in node.leaves():
+        previous_consumed = 0
+        for comment in list_comments(leaf.prefix, is_endmarker=False):
+            if comment.value in FMT_OFF:
+                # We only want standalone comments. If there's no previous leaf or
+                # the previous leaf is indentation, it's a standalone comment in
+                # disguise.
+                if comment.type != STANDALONE_COMMENT:
+                    prev = preceding_leaf(leaf)
+                    if prev and prev.type not in WHITESPACE:
+                        continue
+
+                ignored_nodes = list(generate_ignored_nodes(leaf))
+                if not ignored_nodes:
+                    continue
+
+                first = ignored_nodes[0]  # Can be a container node with the `leaf`.
+                parent = first.parent
+                prefix = first.prefix
+                first.prefix = prefix[comment.consumed :]
+                hidden_value = (
+                    comment.value + "\n" + "".join(str(n) for n in ignored_nodes)
+                )
+                if hidden_value.endswith("\n"):
+                    # That happens when one of the `ignored_nodes` ended with a NEWLINE
+                    # leaf (possibly followed by a DEDENT).
+                    hidden_value = hidden_value[:-1]
+                first_idx = None
+                for ignored in ignored_nodes:
+                    index = ignored.remove()
+                    if first_idx is None:
+                        first_idx = index
+                assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)"
+                assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)"
+                parent.insert_child(
+                    first_idx,
+                    Leaf(
+                        STANDALONE_COMMENT,
+                        hidden_value,
+                        prefix=prefix[:previous_consumed] + "\n" * comment.newlines,
+                    ),
+                )
+                return True
+
+            previous_consumed = comment.consumed
+
+    return False
+
+
+def generate_ignored_nodes(leaf: Leaf) -> Iterator[LN]:
+    """Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
+
+    Stops at the end of the block.
+    """
+    container: Optional[LN] = container_of(leaf)
+    while container is not None and container.type != token.ENDMARKER:
+        for comment in list_comments(container.prefix, is_endmarker=False):
+            if comment.value in FMT_ON:
+                return
+
+        yield container
+
+        container = container.next_sibling
+
+
 def maybe_make_parens_invisible_in_atom(node: LN) -> bool:
-    """If it's safe, make the parens in the atom `node` invisible, recursively."""
+    """If it's safe, make the parens in the atom `node` invisible, recursively.
+
+    Returns whether the node should itself be wrapped in invisible parentheses.
+
+    """
     if (
         node.type != syms.atom
         or is_empty_tuple(node)
@@ -2627,9 +2744,9 @@ def maybe_make_parens_invisible_in_atom(node: LN) -> bool:
         last.value = ""  # type: ignore
         if len(node.children) > 1:
             maybe_make_parens_invisible_in_atom(node.children[1])
-        return True
+        return False
 
-    return False
+    return True
 
 
 def is_empty_tuple(node: LN) -> bool:
@@ -2807,7 +2924,8 @@ def is_python36(node: Node) -> bool:
     """Return True if the current file is using Python 3.6+ features.
 
     Currently looking for:
-    - f-strings; and
+    - f-strings;
+    - underscores in numeric literals; and
     - trailing commas after * or ** in function signatures and calls.
     """
     for n in node.pre_order():
@@ -2816,6 +2934,10 @@ def is_python36(node: Node) -> bool:
             if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
                 return True
 
+        elif n.type == token.NUMBER:
+            if "_" in n.value:  # type: ignore
+                return True
+
         elif (
             n.type in {syms.typedargslist, syms.arglist}
             and n.children
@@ -2889,7 +3011,23 @@ def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[Leaf
 
 def get_future_imports(node: Node) -> Set[str]:
     """Return a set of __future__ imports in the file."""
-    imports = set()
+    imports: Set[str] = set()
+
+    def get_imports_from_children(children: List[LN]) -> Generator[str, None, None]:
+        for child in children:
+            if isinstance(child, Leaf):
+                if child.type == token.NAME:
+                    yield child.value
+            elif child.type == syms.import_as_name:
+                orig_name = child.children[0]
+                assert isinstance(orig_name, Leaf), "Invalid syntax parsing imports"
+                assert orig_name.type == token.NAME, "Invalid syntax parsing imports"
+                yield orig_name.value
+            elif child.type == syms.import_as_names:
+                yield from get_imports_from_children(child.children)
+            else:
+                assert False, "Invalid syntax parsing imports"
+
     for child in node.children:
         if child.type != syms.simple_stmt:
             break
@@ -2908,15 +3046,7 @@ def get_future_imports(node: Node) -> Set[str]:
             module_name = first_child.children[1]
             if not isinstance(module_name, Leaf) or module_name.value != "__future__":
                 break
-            for import_from_child in first_child.children[3:]:
-                if isinstance(import_from_child, Leaf):
-                    if import_from_child.type == token.NAME:
-                        imports.add(import_from_child.value)
-                else:
-                    assert import_from_child.type == syms.import_as_names
-                    for leaf in import_from_child.children:
-                        if isinstance(leaf, Leaf) and leaf.type == token.NAME:
-                            imports.add(leaf.value)
+            imports |= set(get_imports_from_children(first_child.children[3:]))
         else:
             break
     return imports
@@ -2932,11 +3062,23 @@ def gen_python_files_in_dir(
     """Generate all files under `path` whose paths are not excluded by the
     `exclude` regex, but are included by the `include` regex.
 
+    Symbolic links pointing outside of the `root` directory are ignored.
+
     `report` is where output about exclusions goes.
     """
     assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
     for child in path.iterdir():
-        normalized_path = "/" + child.resolve().relative_to(root).as_posix()
+        try:
+            normalized_path = "/" + child.resolve().relative_to(root).as_posix()
+        except ValueError:
+            if child.is_symlink():
+                report.path_ignored(
+                    child, f"is a symbolic link that points outside {root}"
+                )
+                continue
+
+            raise
+
         if child.is_dir():
             normalized_path += "/"
         exclude_match = exclude.search(normalized_path)
@@ -3029,7 +3171,7 @@ class Report:
         - otherwise return 0.
         """
         # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
-        # 126 we have special returncodes reserved by the shell.
+        # 126 we have special return codes reserved by the shell.
         if self.failure_count:
             return 123
 
@@ -3451,5 +3593,28 @@ def write_cache(
         pass
 
 
+def patch_click() -> None:
+    """Make Click not crash.
+
+    On certain misconfigured environments, Python 3 selects the ASCII encoding as the
+    default which restricts paths that it can access during the lifetime of the
+    application.  Click refuses to work in this scenario by raising a RuntimeError.
+
+    In case of Black the likelihood that non-ASCII characters are going to be used in
+    file paths is minimal since it's Python source code.  Moreover, this crash was
+    spurious on Python 3.7 thanks to PEP 538 and PEP 540.
+    """
+    try:
+        from click import core
+        from click import _unicodefun  # type: ignore
+    except ModuleNotFoundError:
+        return
+
+    for module in (core, _unicodefun):
+        if hasattr(module, "_verify_python3_env"):
+            module._verify_python3_env = lambda: None
+
+
 if __name__ == "__main__":
+    patch_click()
     main()