from blib2to3.pgen2 import driver, token
from blib2to3.pgen2.parse import ParseError
-__version__ = "18.3a3"
+__version__ = "18.3a4"
DEFAULT_LINE_LENGTH = 88
# types
syms = pygram.python_symbols
class NothingChanged(UserWarning):
- """Raised by `format_file` when the reformatted code is the same as source."""
+ """Raised by :func:`format_file` when reformatted code is the same as source."""
class CannotSplit(Exception):
"""A readable split that fits the allotted line length is impossible.
- Raised by `left_hand_split()`, `right_hand_split()`, and `delimiter_split()`.
+ Raised by :func:`left_hand_split`, :func:`right_hand_split`, and
+ :func:`delimiter_split`.
"""
class FormatError(Exception):
- """Base fmt: on/off error.
+ """Base exception for `# fmt: on` and `# fmt: off` handling.
It holds the number of bytes of the prefix consumed before the format
control comment appeared.
loop: BaseEventLoop,
executor: Executor,
) -> int:
+ """Run formatting of `sources` in parallel using the provided `executor`.
+
+ (Use ProcessPoolExecutors for actual parallelism.)
+
+ `line_length`, `write_back`, and `fast` options are passed to
+ :func:`format_file_in_place`.
+ """
tasks = {
src: loop.run_in_executor(
executor, format_file_in_place, src, line_length, fast, write_back
}
await asyncio.wait(tasks.values())
cancelled = []
- report = Report()
+ report = Report(check=not write_back)
for src, task in tasks.items():
if not task.done():
report.failed(src, 'timed out, cancelling')
def format_file_in_place(
src: Path, line_length: int, fast: bool, write_back: bool = False
) -> bool:
- """Format the file and rewrite if changed. Return True if changed."""
+ """Format file under `src` path. Return True if changed.
+
+ If `write_back` is True, write reformatted code back to stdout.
+ `line_length` and `fast` options are passed to :func:`format_file_contents`.
+ """
with tokenize.open(src) as src_buffer:
src_contents = src_buffer.read()
try:
def format_stdin_to_stdout(
line_length: int, fast: bool, write_back: bool = False
) -> bool:
- """Format file on stdin and pipe output to stdout. Return True if changed."""
+ """Format file on stdin. Return True if changed.
+
+ If `write_back` is True, write reformatted code back to stdout.
+ `line_length` and `fast` arguments are passed to :func:`format_file_contents`.
+ """
contents = sys.stdin.read()
try:
contents = format_file_contents(contents, line_length=line_length, fast=fast)
def format_file_contents(
src_contents: str, line_length: int, fast: bool
) -> FileContent:
- """Reformats a file and returns its contents and encoding."""
+ """Reformat contents a file and return new contents.
+
+ If `fast` is False, additionally confirm that the reformatted code is
+ valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
+ `line_length` is passed to :func:`format_str`.
+ """
if src_contents.strip() == '':
raise NothingChanged
def format_str(src_contents: str, line_length: int) -> FileContent:
- """Reformats a string and returns new contents."""
+ """Reformat a string and return new contents.
+
+ `line_length` determines how many characters per line are allowed.
+ """
src_node = lib2to3_parse(src_contents)
dst_contents = ""
lines = LineGenerator()
class Visitor(Generic[T]):
- """Basic lib2to3 visitor that yields things on visiting."""
+ """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
def visit(self, node: LN) -> Iterator[T]:
+ """Main method to visit `node` and its children.
+
+ It tries to find a `visit_*()` method for the given `node.type`, like
+ `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
+ If no dedicated `visit_*()` method is found, chooses `visit_default()`
+ instead.
+
+ Then yields objects of type `T` from the selected visitor.
+ """
if node.type < 256:
name = token.tok_name[node.type]
else:
yield from getattr(self, f'visit_{name}', self.visit_default)(node)
def visit_default(self, node: LN) -> Iterator[T]:
+ """Default `visit_*()` implementation. Recurses to children of `node`."""
if isinstance(node, Node):
for child in node.children:
yield from self.visit(child)
@classmethod
def show(cls, code: str) -> None:
- """Pretty-prints a given string of `code`.
+ """Pretty-print the lib2to3 AST of a given string of `code`.
Convenience method for debugging.
"""
@dataclass
class BracketTracker:
+ """Keeps track of brackets on a line."""
+
depth: int = 0
bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
delimiters: Dict[LeafID, Priority] = Factory(dict)
previous: Optional[Leaf] = None
def mark(self, leaf: Leaf) -> None:
+ """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
+
+ All leaves receive an int `bracket_depth` field that stores how deep
+ within brackets a given leaf is. 0 means there are no enclosing brackets
+ that started on this line.
+
+ If a leaf is itself a closing bracket, it receives an `opening_bracket`
+ field that it forms a pair with. This is a one-directional link to
+ avoid reference cycles.
+
+ If a leaf is a delimiter (a token on which Black can split the line if
+ needed) and it's on depth 0, its `id()` is stored in the tracker's
+ `delimiters` field.
+ """
if leaf.type == token.COMMENT:
return
self.previous = leaf
def any_open_brackets(self) -> bool:
- """Returns True if there is an yet unmatched open bracket on the line."""
+ """Return True if there is an yet unmatched open bracket on the line."""
return bool(self.bracket_match)
- def max_priority(self, exclude: Iterable[LeafID] = ()) -> int:
- """Returns the highest priority of a delimiter found on the line.
+ def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> int:
+ """Return the highest priority of a delimiter found on the line.
Values are consistent with what `is_delimiter()` returns.
"""
@dataclass
class Line:
+ """Holds leaves and comments. Can be printed with `str(line)`."""
+
depth: int = 0
leaves: List[Leaf] = Factory(list)
comments: Dict[LeafID, Leaf] = Factory(dict)
_for_loop_variable: bool = False
def append(self, leaf: Leaf, preformatted: bool = False) -> None:
+ """Add a new `leaf` to the end of the line.
+
+ Unless `preformatted` is True, the `leaf` will receive a new consistent
+ whitespace prefix and metadata applied by :class:`BracketTracker`.
+ Trailing commas are maybe removed, unpacked for loop variables are
+ demoted from being delimiters.
+
+ Inline comments are put aside.
+ """
has_value = leaf.value.strip()
if not has_value:
return
@property
def is_comment(self) -> bool:
+ """Is this line a standalone comment?"""
return bool(self) and self.leaves[0].type == STANDALONE_COMMENT
@property
def is_decorator(self) -> bool:
+ """Is this line a decorator?"""
return bool(self) and self.leaves[0].type == token.AT
@property
def is_import(self) -> bool:
+ """Is this an import line?"""
return bool(self) and is_import(self.leaves[0])
@property
def is_class(self) -> bool:
+ """Is this line a class definition?"""
return (
bool(self)
and self.leaves[0].type == token.NAME
@property
def is_def(self) -> bool:
- """Also returns True for async defs."""
+ """Is this a function definition? (Also returns True for async defs.)"""
try:
first_leaf = self.leaves[0]
except IndexError:
@property
def is_flow_control(self) -> bool:
+ """Is this line a flow control statement?
+
+ Those are `return`, `raise`, `break`, and `continue`.
+ """
return (
bool(self)
and self.leaves[0].type == token.NAME
@property
def is_yield(self) -> bool:
+ """Is this line a yield statement?"""
return (
bool(self)
and self.leaves[0].type == token.NAME
)
def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
+ """Remove trailing comma if there is one and it's safe."""
if not (
self.leaves
and self.leaves[-1].type == token.COMMA
def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
"""In a for loop, or comprehension, the variables are often unpacks.
- To avoid splitting on the comma in this situation, we will increase
- the depth of tokens between `for` and `in`.
+ To avoid splitting on the comma in this situation, increase the depth of
+ tokens between `for` and `in`.
"""
if leaf.type == token.NAME and leaf.value == 'for':
self.has_for = True
return False
def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
- # See `maybe_increment_for_loop_variable` above for explanation.
+ """See `maybe_increment_for_loop_variable` above for explanation."""
if self._for_loop_variable and leaf.type == token.NAME and leaf.value == 'in':
self.bracket_tracker.depth -= 1
self._for_loop_variable = False
return self.append_comment(comment)
def append_comment(self, comment: Leaf) -> bool:
+ """Add an inline comment to the line."""
if comment.type != token.COMMENT:
return False
return True
def last_non_delimiter(self) -> Leaf:
+ """Return the last non-delimiter on the line. Raise LookupError otherwise."""
for i in range(len(self.leaves)):
last = self.leaves[-i - 1]
if not is_delimiter(last):
raise LookupError("No non-delimiters found")
def __str__(self) -> str:
+ """Render the line."""
if not self:
return '\n'
return res + '\n'
def __bool__(self) -> bool:
+ """Return True if the line has leaves or comments."""
return bool(self.leaves or self.comments)
class UnformattedLines(Line):
+ """Just like :class:`Line` but stores lines which aren't reformatted."""
- def append(self, leaf: Leaf, preformatted: bool = False) -> None:
+ def append(self, leaf: Leaf, preformatted: bool = True) -> None:
+ """Just add a new `leaf` to the end of the lines.
+
+ The `preformatted` argument is ignored.
+
+ Keeps track of indentation `depth`, which is useful when the user
+ says `# fmt: on`. Otherwise, doesn't do anything with the `leaf`.
+ """
try:
list(generate_comments(leaf))
except FormatOn as f_on:
elif leaf.type == token.DEDENT:
self.depth -= 1
+ def __str__(self) -> str:
+ """Render unformatted lines from leaves which were added with `append()`.
+
+ `depth` is not used for indentation in this case.
+ """
+ if not self:
+ return '\n'
+
+ res = ''
+ for leaf in self.leaves:
+ res += str(leaf)
+ return res
+
def append_comment(self, comment: Leaf) -> bool:
+ """Not implemented in this class. Raises `NotImplementedError`."""
raise NotImplementedError("Unformatted lines don't store comments separately.")
def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
+ """Does nothing and returns False."""
return False
def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
+ """Does nothing and returns False."""
return False
def maybe_adapt_standalone_comment(self, comment: Leaf) -> bool:
+ """Does nothing and returns False."""
return False
- def __str__(self) -> str:
- if not self:
- return '\n'
-
- res = ''
- for leaf in self.leaves:
- res += str(leaf)
- return res
-
@dataclass
class EmptyLineTracker:
previous_defs: List[int] = Factory(list)
def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
- """Returns the number of extra empty lines before and after the `current_line`.
+ """Return the number of extra empty lines before and after the `current_line`.
- This is for separating `def`, `async def` and `class` with extra empty lines
- (two on module-level), as well as providing an extra empty line after flow
- control keywords to make them more prominent.
+ This is for separating `def`, `async def` and `class` with extra empty
+ lines (two on module-level), as well as providing an extra empty line
+ after flow control keywords to make them more prominent.
"""
if isinstance(current_line, UnformattedLines):
return 0, 0
def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
max_allowed = 1
- if current_line.is_comment and current_line.depth == 0:
+ if current_line.depth == 0:
max_allowed = 2
if current_line.leaves:
# Consume the first leaf's extra newlines.
first_leaf = current_line.leaves[0]
before = first_leaf.prefix.count('\n')
- before = min(before, max(before, max_allowed))
+ before = min(before, max_allowed)
first_leaf.prefix = ''
else:
before = 0
yield complete_line
def visit(self, node: LN) -> Iterator[Line]:
- """High-level entry point to the visitor."""
+ """Main method to visit `node` and its children.
+
+ Yields :class:`Line` objects.
+ """
if isinstance(self.current_line, UnformattedLines):
# File contained `# fmt: off`
yield from self.visit_unformatted(node)
yield from super().visit(node)
def visit_default(self, node: LN) -> Iterator[Line]:
+ """Default `visit_*()` implementation. Recurses to children of `node`."""
if isinstance(node, Leaf):
any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
try:
yield from super().visit_default(node)
def visit_INDENT(self, node: Node) -> Iterator[Line]:
+ """Increase indentation level, maybe yield a line."""
+ # In blib2to3 INDENT never holds comments.
yield from self.line(+1)
yield from self.visit_default(node)
def visit_DEDENT(self, node: Node) -> Iterator[Line]:
+ """Decrease indentation level, maybe yield a line."""
# DEDENT has no value. Additionally, in blib2to3 it never holds comments.
yield from self.line(-1)
def visit_stmt(self, node: Node, keywords: Set[str]) -> Iterator[Line]:
"""Visit a statement.
- The relevant Python language keywords for this statement are NAME leaves
- within it.
+ This implementation is shared for `if`, `while`, `for`, `try`, `except`,
+ `def`, `with`, and `class`.
+
+ The relevant Python language `keywords` for a given statement will be NAME
+ leaves within it. This methods puts those on a separate line.
"""
for child in node.children:
if child.type == token.NAME and child.value in keywords: # type: ignore
yield from self.visit(child)
def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
- """A statement without nested statements."""
+ """Visit a statement without nested statements."""
is_suite_like = node.parent and node.parent.type in STATEMENT
if is_suite_like:
yield from self.line(+1)
yield from self.visit_default(node)
def visit_async_stmt(self, node: Node) -> Iterator[Line]:
+ """Visit `async def`, `async for`, `async with`."""
yield from self.line()
children = iter(node.children)
yield from self.visit(child)
def visit_decorators(self, node: Node) -> Iterator[Line]:
+ """Visit decorators."""
for child in node.children:
yield from self.line()
yield from self.visit(child)
def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
+ """Remove a semicolon and put the other statement on a separate line."""
yield from self.line()
def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
+ """End of file. Process outstanding comments and end with a newline."""
yield from self.visit_default(leaf)
yield from self.line()
def visit_unformatted(self, node: LN) -> Iterator[Line]:
+ """Used when file contained a `# fmt: off`."""
if isinstance(node, Node):
for child in node.children:
yield from self.visit(child)
def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
- """Returns the first leaf that precedes `node`, if any."""
+ """Return the first leaf that precedes `node`, if any."""
while node:
res = node.prev_sibling
if res:
def is_delimiter(leaf: Leaf) -> int:
- """Returns the priority of the `leaf` delimiter. Returns 0 if not delimiter.
+ """Return the priority of the `leaf` delimiter. Return 0 if not delimiter.
Higher numbers are higher priority.
"""
def generate_comments(leaf: Leaf) -> Iterator[Leaf]:
- """Cleans the prefix of the `leaf` and generates comments from it, if any.
+ """Clean the prefix of the `leaf` and generate comments from it, if any.
Comments in lib2to3 are shoved into the whitespace prefix. This happens
in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
def make_comment(content: str) -> str:
+ """Return a consistently formatted comment from the given `content` string.
+
+ All comments (except for "##", "#!", "#:") should have a single space between
+ the hash sign and the content.
+
+ If `content` didn't start with a hash sign, one is provided.
+ """
content = content.rstrip()
if not content:
return '#'
def split_line(
line: Line, line_length: int, inner: bool = False, py36: bool = False
) -> Iterator[Line]:
- """Splits a `line` into potentially many lines.
+ """Split a `line` into potentially many lines.
They should fit in the allotted `line_length` but might not be able to.
`inner` signifies that there were a pair of brackets somewhere around the
comment_after = line.comments.get(id(leaf))
if comment_after:
result.append(comment_after, preformatted=True)
- split_succeeded_or_raise(head, body, tail)
+ bracket_split_succeeded_or_raise(head, body, tail)
for result in (head, body, tail):
if result:
yield result
comment_after = line.comments.get(id(leaf))
if comment_after:
result.append(comment_after, preformatted=True)
- split_succeeded_or_raise(head, body, tail)
+ bracket_split_succeeded_or_raise(head, body, tail)
for result in (head, body, tail):
if result:
yield result
-def split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
+def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
+ """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
+
+ Do nothing otherwise.
+
+ A left- or right-hand split is based on a pair of brackets. Content before
+ (and including) the opening bracket is left on one line, content inside the
+ brackets is put on a separate line, and finally content starting with and
+ following the closing bracket is put on a separate line.
+
+ Those are called `head`, `body`, and `tail`, respectively. If the split
+ produced the same line (all content in `head`) or ended up with an empty `body`
+ and the `tail` is just the closing bracket, then it's considered failed.
+ """
tail_len = len(str(tail).strip())
if not body:
if tail_len == 0:
This kind of split doesn't increase indentation.
If `py36` is True, the split will add trailing commas also in function
- signatures that contain * and **.
+ signatures that contain `*` and `**`.
"""
try:
last_leaf = line.leaves[-1]
delimiters = line.bracket_tracker.delimiters
try:
- delimiter_priority = line.bracket_tracker.max_priority(exclude={id(last_leaf)})
+ delimiter_priority = line.bracket_tracker.max_delimiter_priority(
+ exclude={id(last_leaf)}
+ )
except ValueError:
raise CannotSplit("No delimiters found")
def is_import(leaf: Leaf) -> bool:
- """Returns True if the given leaf starts an import statement."""
+ """Return True if the given leaf starts an import statement."""
p = leaf.parent
t = leaf.type
v = leaf.value
def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
- """Leave existing extra newlines if not `inside_brackets`.
+ """Leave existing extra newlines if not `inside_brackets`. Remove everything
+ else.
- Remove everything else. Note: don't use backslashes for formatting or
- you'll lose your voting rights.
+ Note: don't use backslashes for formatting or you'll lose your voting rights.
"""
if not inside_brackets:
spl = leaf.prefix.split('#')
def is_python36(node: Node) -> bool:
- """Returns True if the current file is using Python 3.6+ features.
+ """Return True if the current file is using Python 3.6+ features.
Currently looking for:
- f-strings; and
def gen_python_files_in_dir(path: Path) -> Iterator[Path]:
+ """Generate all files under `path` which aren't under BLACKLISTED_DIRECTORIES
+ and have one of the PYTHON_EXTENSIONS.
+ """
for child in path.iterdir():
if child.is_dir():
if child.name in BLACKLISTED_DIRECTORIES:
@dataclass
class Report:
- """Provides a reformatting counter."""
+ """Provides a reformatting counter. Can be rendered with `str(report)`."""
check: bool = False
change_count: int = 0
same_count: int = 0
@property
def return_code(self) -> int:
- """Which return code should the app use considering the current state."""
+ """Return the exit code that the app should use.
+
+ This considers the current state of changed files and failures:
+ - if there were any failures, return 123;
+ - if any files were changed and --check is being used, return 1;
+ - otherwise return 0.
+ """
# According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
# 126 we have special returncodes reserved by the shell.
if self.failure_count:
return 0
def __str__(self) -> str:
- """A color report of the current state.
+ """Render a color report of the current state.
Use `click.unstyle` to remove colors.
"""
def assert_equivalent(src: str, dst: str) -> None:
- """Raises AssertionError if `src` and `dst` aren't equivalent.
-
- This is a temporary sanity check until Black becomes stable.
- """
+ """Raise AssertionError if `src` and `dst` aren't equivalent."""
import ast
import traceback
def assert_stable(src: str, dst: str, line_length: int) -> None:
- """Raises AssertionError if `dst` reformats differently the second time.
-
- This is a temporary sanity check until Black becomes stable.
- """
+ """Raise AssertionError if `dst` reformats differently the second time."""
newdst = format_str(dst, line_length=line_length)
if dst != newdst:
log = dump_to_file(
def dump_to_file(*output: str) -> str:
- """Dumps `output` to a temporary file. Returns path to the file."""
+ """Dump `output` to a temporary file. Return path to the file."""
import tempfile
with tempfile.NamedTemporaryFile(
def diff(a: str, b: str, a_name: str, b_name: str) -> str:
- """Returns a udiff string between strings `a` and `b`."""
+ """Return a unified diff string between strings `a` and `b`."""
import difflib
a_lines = [line + '\n' for line in a.split('\n')]