import colorama # noqa: F401
DEFAULT_LINE_LENGTH = 88
-DEFAULT_EXCLUDES = r"/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist)/" # noqa: B950
+DEFAULT_EXCLUDES = r"/(\.direnv|\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist)/" # noqa: B950
DEFAULT_INCLUDES = r"\.pyi?$"
CACHE_DIR = Path(user_cache_dir("black", version=__version__))
target_versions: Set[TargetVersion] = field(default_factory=set)
line_length: int = DEFAULT_LINE_LENGTH
string_normalization: bool = True
+ experimental_string_processing: bool = False
is_pyi: bool = False
def get_cache_key(self) -> str:
return all(feature in VERSION_TO_FEATURES[version] for version in target_versions)
-def find_pyproject_toml(path_search_start: str) -> Optional[str]:
+def find_pyproject_toml(path_search_start: Iterable[str]) -> Optional[str]:
"""Find the absolute filepath to a pyproject.toml if it exists"""
path_project_root = find_project_root(path_search_start)
path_pyproject_toml = path_project_root / "pyproject.toml"
if not config:
return None
+ else:
+ # Sanitize the values to be Click friendly. For more information please see:
+ # https://github.com/psf/black/issues/1458
+ # https://github.com/pallets/click/issues/1567
+ config = {
+ k: str(v) if not isinstance(v, (list, dict)) else v
+ for k, v in config.items()
+ }
target_version = config.get("target_version")
if target_version is not None and not isinstance(target_version, list):
is_flag=True,
help="Don't normalize string quotes or prefixes.",
)
+@click.option(
+ "--experimental-string-processing",
+ is_flag=True,
+ hidden=True,
+ help=(
+ "Experimental option that performs more normalization on string literals."
+ " Currently disabled because it leads to some crashes."
+ ),
+)
@click.option(
"--check",
is_flag=True,
),
is_eager=True,
callback=read_pyproject_toml,
- help="Read configuration from PATH.",
+ help="Read configuration from FILE path.",
)
@click.pass_context
def main(
fast: bool,
pyi: bool,
skip_string_normalization: bool,
+ experimental_string_processing: bool,
quiet: bool,
verbose: bool,
include: str,
line_length=line_length,
is_pyi=pyi,
string_normalization=not skip_string_normalization,
+ experimental_string_processing=experimental_string_processing,
)
if config and verbose:
out(f"Using configuration from {config}.", bold=False, fg="blue")
root = find_project_root(src)
sources: Set[Path] = set()
path_empty(src, "No Path provided. Nothing to do 😴", quiet, verbose, ctx)
- exclude_regexes = [exclude_regex]
- if force_exclude_regex is not None:
- exclude_regexes.append(force_exclude_regex)
+ gitignore = get_gitignore(root)
for s in src:
p = Path(s)
p.iterdir(),
root,
include_regex,
- exclude_regexes,
+ exclude_regex,
+ force_exclude_regex,
report,
- get_gitignore(root),
+ gitignore,
)
)
elif s == "-":
sources.add(p)
elif p.is_file():
- sources.update(
- gen_python_files(
- [p], root, None, exclude_regexes, report, get_gitignore(root)
- )
- )
+ normalized_path = normalize_path_maybe_ignore(p, root, report)
+ if normalized_path is None:
+ continue
+
+ normalized_path = "/" + normalized_path
+ # Hard-exclude any files that matches the `--force-exclude` regex.
+ if force_exclude_regex:
+ force_exclude_match = force_exclude_regex.search(normalized_path)
+ else:
+ force_exclude_match = None
+ if force_exclude_match and force_exclude_match.group(0):
+ report.path_ignored(p, "matches the --force-exclude regular expression")
+ continue
+
+ sources.add(p)
else:
err(f"invalid path: {s}")
return sources
write_cache(cache, [src], mode)
report.done(src, changed)
except Exception as exc:
+ if report.verbose:
+ traceback.print_exc()
report.failed(src, str(exc))
...
A more complex example:
+
>>> print(
... black.format_str(
... "def f(arg:str='')->None: hey",
before, after = elt.maybe_empty_lines(current_line)
dst_contents.append(str(empty_line) * before)
for line in transform_line(
- current_line,
- line_length=mode.line_length,
- normalize_strings=mode.string_normalization,
- features=split_line_features,
+ current_line, mode=mode, features=split_line_features
):
dst_contents.append(str(line))
return "".join(dst_contents)
)
if self.inside_brackets or not preformatted:
self.bracket_tracker.mark(leaf)
- self.maybe_remove_trailing_comma(leaf)
+ if self.maybe_should_explode(leaf):
+ self.should_explode = True
if not self.append_comment(leaf):
self.leaves.append(leaf)
Leaf(token.DOT, ".") for _ in range(3)
]
- @property
- def is_collection_with_optional_trailing_comma(self) -> bool:
- """Is this line a collection literal with a trailing comma that's optional?
-
- Note that the trailing comma in a 1-tuple is not optional.
- """
- if not self.leaves or len(self.leaves) < 4:
- return False
-
- # Look for and address a trailing colon.
- if self.leaves[-1].type == token.COLON:
- closer = self.leaves[-2]
- close_index = -2
- else:
- closer = self.leaves[-1]
- close_index = -1
- if closer.type not in CLOSING_BRACKETS or self.inside_brackets:
- return False
-
- if closer.type == token.RPAR:
- # Tuples require an extra check, because if there's only
- # one element in the tuple removing the comma unmakes the
- # tuple.
- #
- # We also check for parens before looking for the trailing
- # comma because in some cases (eg assigning a dict
- # literal) the literal gets wrapped in temporary parens
- # during parsing. This case is covered by the
- # collections.py test data.
- opener = closer.opening_bracket
- for _open_index, leaf in enumerate(self.leaves):
- if leaf is opener:
- break
-
- else:
- # Couldn't find the matching opening paren, play it safe.
- return False
-
- commas = 0
- comma_depth = self.leaves[close_index - 1].bracket_depth
- for leaf in self.leaves[_open_index + 1 : close_index]:
- if leaf.bracket_depth == comma_depth and leaf.type == token.COMMA:
- commas += 1
- if commas > 1:
- # We haven't looked yet for the trailing comma because
- # we might also have caught noop parens.
- return self.leaves[close_index - 1].type == token.COMMA
-
- elif commas == 1:
- return False # it's either a one-tuple or didn't have a trailing comma
-
- if self.leaves[close_index - 1].type in CLOSING_BRACKETS:
- close_index -= 1
- closer = self.leaves[close_index]
- if closer.type == token.RPAR:
- # TODO: this is a gut feeling. Will we ever see this?
- return False
-
- if self.leaves[close_index - 1].type != token.COMMA:
- return False
-
- return True
-
@property
def is_def(self) -> bool:
"""Is this a function definition? (Also returns True for async defs.)"""
def contains_multiline_strings(self) -> bool:
return any(is_multiline_string(leaf) for leaf in self.leaves)
- def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
- """Remove trailing comma if there is one and it's safe."""
- if not (self.leaves and self.leaves[-1].type == token.COMMA):
- return False
-
- # We remove trailing commas only in the case of importing a
- # single name from a module.
+ def maybe_should_explode(self, closing: Leaf) -> bool:
+ """Return True if this line should explode (always be split), that is when:
+ - there's a pre-existing trailing comma here; and
+ - it's not a one-tuple.
+ """
if not (
- self.leaves
- and self.is_import
- and len(self.leaves) > 4
+ closing.type in CLOSING_BRACKETS
+ and self.leaves
and self.leaves[-1].type == token.COMMA
- and closing.type in CLOSING_BRACKETS
- and self.leaves[-4].type == token.NAME
- and (
- # regular `from foo import bar,`
- self.leaves[-4].value == "import"
- # `from foo import (bar as baz,)
- or (
- len(self.leaves) > 6
- and self.leaves[-6].value == "import"
- and self.leaves[-3].value == "as"
- )
- # `from foo import bar as baz,`
- or (
- len(self.leaves) > 5
- and self.leaves[-5].value == "import"
- and self.leaves[-3].value == "as"
- )
- )
- and closing.type == token.RPAR
+ and not self.leaves[-1].was_checked # pre-existing
):
return False
- self.remove_trailing_comma()
- return True
+ if closing.type in {token.RBRACE, token.RSQB}:
+ return True
+
+ if self.is_import:
+ return True
+
+ if not is_one_tuple_between(closing.opening_bracket, closing, self.leaves):
+ return True
+
+ return False
def append_comment(self, comment: Leaf) -> bool:
"""Add an inline or standalone comment to the line."""
def transform_line(
- line: Line,
- line_length: int,
- normalize_strings: bool,
- features: Collection[Feature] = (),
+ line: Line, mode: Mode, features: Collection[Feature] = ()
) -> Iterator[Line]:
"""Transform a `line`, potentially splitting it into many lines.
def init_st(ST: Type[StringTransformer]) -> StringTransformer:
"""Initialize StringTransformer"""
- return ST(line_length, normalize_strings)
+ return ST(mode.line_length, mode.string_normalization)
string_merge = init_st(StringMerger)
string_paren_strip = init_st(StringParenStripper)
if (
not line.contains_uncollapsable_type_comments()
and not line.should_explode
- and not line.is_collection_with_optional_trailing_comma
and (
- is_line_short_enough(line, line_length=line_length, line_str=line_str)
+ is_line_short_enough(line, line_length=mode.line_length, line_str=line_str)
or line.contains_unsplittable_type_ignore()
)
- and not (line.contains_standalone_comments() and line.inside_brackets)
+ and not (line.inside_brackets and line.contains_standalone_comments())
):
# Only apply basic string preprocessing, since lines shouldn't be split here.
- transformers = [string_merge, string_paren_strip]
+ if mode.experimental_string_processing:
+ transformers = [string_merge, string_paren_strip]
+ else:
+ transformers = []
elif line.is_def:
transformers = [left_hand_split]
else:
def rhs(line: Line, features: Collection[Feature]) -> Iterator[Line]:
- for omit in generate_trailers_to_omit(line, line_length):
- lines = list(right_hand_split(line, line_length, features, omit=omit))
- if is_line_short_enough(lines[0], line_length=line_length):
+ for omit in generate_trailers_to_omit(line, mode.line_length):
+ lines = list(
+ right_hand_split(line, mode.line_length, features, omit=omit)
+ )
+ if is_line_short_enough(lines[0], line_length=mode.line_length):
yield from lines
return
# See #762 and #781 for the full story.
yield from right_hand_split(line, line_length=1, features=features)
- if line.inside_brackets:
- transformers = [
- string_merge,
- string_paren_strip,
- delimiter_split,
- standalone_comment_split,
- string_split,
- string_paren_wrap,
- rhs,
- ]
+ if mode.experimental_string_processing:
+ if line.inside_brackets:
+ transformers = [
+ string_merge,
+ string_paren_strip,
+ delimiter_split,
+ standalone_comment_split,
+ string_split,
+ string_paren_wrap,
+ rhs,
+ ]
+ else:
+ transformers = [
+ string_merge,
+ string_paren_strip,
+ string_split,
+ string_paren_wrap,
+ rhs,
+ ]
else:
- transformers = [
- string_merge,
- string_paren_strip,
- string_split,
- string_paren_wrap,
- rhs,
- ]
+ if line.inside_brackets:
+ transformers = [delimiter_split, standalone_comment_split, rhs]
+ else:
+ transformers = [rhs]
for transform in transformers:
# We are accumulating lines in `result` because we might want to abort
)
result.extend(
- transform_line(
- transformed_line,
- line_length=line_length,
- normalize_strings=normalize_strings,
- features=features,
- )
+ transform_line(transformed_line, mode=mode, features=features)
)
except CannotTransform:
continue
Requirements:
The line contains a string which is surrounded by parentheses and:
- The target string is NOT the only argument to a function call).
- - The RPAR is NOT followed by an attribute access (i.e. a dot).
+ - If the target string contains a PERCENT, the brackets are not
+ preceeded or followed by an operator with higher precedence than
+ PERCENT.
Transformations:
The parentheses mentioned in the 'Requirements' section are stripped.
string_parser = StringParser()
next_idx = string_parser.parse(LL, string_idx)
+ # if the leaves in the parsed string include a PERCENT, we need to
+ # make sure the initial LPAR is NOT preceded by an operator with
+ # higher or equal precedence to PERCENT
+ if is_valid_index(idx - 2):
+ # mypy can't quite follow unless we name this
+ before_lpar = LL[idx - 2]
+ if token.PERCENT in {leaf.type for leaf in LL[idx - 1 : next_idx]} and (
+ (
+ before_lpar.type
+ in {
+ token.STAR,
+ token.AT,
+ token.SLASH,
+ token.DOUBLESLASH,
+ token.PERCENT,
+ token.TILDE,
+ token.DOUBLESTAR,
+ token.AWAIT,
+ token.LSQB,
+ token.LPAR,
+ }
+ )
+ or (
+ # only unary PLUS/MINUS
+ before_lpar.parent
+ and before_lpar.parent.type == syms.factor
+ and (before_lpar.type in {token.PLUS, token.MINUS})
+ )
+ ):
+ continue
+
# Should be followed by a non-empty RPAR...
if (
is_valid_index(next_idx)
and LL[next_idx].type == token.RPAR
and not is_empty_rpar(LL[next_idx])
):
- # That RPAR should NOT be followed by a '.' symbol.
- if is_valid_index(next_idx + 1) and LL[next_idx + 1].type == token.DOT:
+ # That RPAR should NOT be followed by anything with higher
+ # precedence than PERCENT
+ if is_valid_index(next_idx + 1) and LL[next_idx + 1].type in {
+ token.DOUBLESTAR,
+ token.LSQB,
+ token.LPAR,
+ token.DOT,
+ }:
continue
return Ok(string_idx)
new_line.append(string_leaf)
append_leaves(
- new_line, line, LL[string_idx + 1 : rpar_idx] + LL[rpar_idx + 1 :],
+ new_line, line, LL[string_idx + 1 : rpar_idx] + LL[rpar_idx + 1 :]
)
LL[rpar_idx].remove()
set(@leaves) is a subset of set(@old_line.leaves).
"""
for old_leaf in leaves:
- assert old_leaf in old_line.leaves
-
new_leaf = Leaf(old_leaf.type, old_leaf.value)
replace_child(old_leaf, new_leaf)
new_line.append(new_leaf)
tail = bracket_split_build_line(tail_leaves, line, opening_bracket)
bracket_split_succeeded_or_raise(head, body, tail)
if (
- # the body shouldn't be exploded
- not body.should_explode
# the opening bracket is an optional paren
- and opening_bracket.type == token.LPAR
+ opening_bracket.type == token.LPAR
and not opening_bracket.value
# the closing bracket is an optional paren
and closing_bracket.type == token.RPAR
continue
if leaves[i].type != token.COMMA:
- leaves.insert(i + 1, Leaf(token.COMMA, ","))
+ new_comma = Leaf(token.COMMA, ",")
+ new_comma.was_checked = True
+ leaves.insert(i + 1, new_comma)
break
# Populate the line
result.append(leaf, preformatted=True)
for comment_after in original.comments_after(leaf):
result.append(comment_after, preformatted=True)
- if is_body:
- result.should_explode = should_explode(result, opening_bracket)
+ if is_body and should_split_body_explode(result, opening_bracket):
+ result.should_explode = True
return result
and current_line.leaves[-1].type != token.COMMA
and current_line.leaves[-1].type != STANDALONE_COMMENT
):
- current_line.append(Leaf(token.COMMA, ","))
+ new_comma = Leaf(token.COMMA, ",")
+ new_comma.was_checked = True
+ current_line.append(new_comma)
yield current_line
leaf.value = ")"
-def should_explode(line: Line, opening_bracket: Leaf) -> bool:
- """Should `line` immediately be split with `delimiter_split()` after RHS?"""
+def should_split_body_explode(line: Line, opening_bracket: Leaf) -> bool:
+ """Should `line` be immediately split with `delimiter_split()` after RHS?"""
- if not (
- opening_bracket.parent
- and opening_bracket.parent.type in {syms.atom, syms.import_from}
- and opening_bracket.value in "[{("
- ):
+ if not (opening_bracket.parent and opening_bracket.value in "[{("):
return False
+ # We're essentially checking if the body is delimited by commas and there's more
+ # than one of them (we're excluding the trailing comma and if the delimiter priority
+ # is still commas, that means there's more).
+ exclude = set()
+ pre_existing_trailing_comma = False
try:
last_leaf = line.leaves[-1]
- exclude = {id(last_leaf)} if last_leaf.type == token.COMMA else set()
+ if last_leaf.type == token.COMMA:
+ pre_existing_trailing_comma = not last_leaf.was_checked
+ exclude.add(id(last_leaf))
max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude)
except (IndexError, ValueError):
return False
- return max_priority == COMMA_PRIORITY
+ return max_priority == COMMA_PRIORITY and (
+ # always explode imports
+ opening_bracket.parent.type in {syms.atom, syms.import_from}
+ or pre_existing_trailing_comma
+ )
+
+
+def is_one_tuple_between(opening: Leaf, closing: Leaf, leaves: List[Leaf]) -> bool:
+ """Return True if content between `opening` and `closing` looks like a one-tuple."""
+ depth = closing.bracket_depth + 1
+ for _opening_index, leaf in enumerate(leaves):
+ if leaf is opening:
+ break
+
+ else:
+ raise LookupError("Opening paren not found in `leaves`")
+
+ commas = 0
+ _opening_index += 1
+ for leaf in leaves[_opening_index:]:
+ if leaf is closing:
+ break
+
+ bracket_depth = leaf.bracket_depth
+ if bracket_depth == depth and leaf.type == token.COMMA:
+ commas += 1
+ if leaf.parent and leaf.parent.type in {
+ syms.arglist,
+ syms.typedargslist,
+ }:
+ commas += 1
+ break
+
+ return commas < 2
def get_features_used(node: Node) -> Set[Feature]:
return PathSpec.from_lines("gitwildmatch", lines)
+def normalize_path_maybe_ignore(
+ path: Path, root: Path, report: "Report"
+) -> Optional[str]:
+ """Normalize `path`. May return `None` if `path` was ignored.
+
+ `report` is where "path ignored" output goes.
+ """
+ try:
+ normalized_path = path.resolve().relative_to(root).as_posix()
+ except OSError as e:
+ report.path_ignored(path, f"cannot be read because {e}")
+ return None
+
+ except ValueError:
+ if path.is_symlink():
+ report.path_ignored(path, f"is a symbolic link that points outside {root}")
+ return None
+
+ raise
+
+ return normalized_path
+
+
def gen_python_files(
paths: Iterable[Path],
root: Path,
include: Optional[Pattern[str]],
- exclude_regexes: Iterable[Pattern[str]],
+ exclude: Pattern[str],
+ force_exclude: Optional[Pattern[str]],
report: "Report",
gitignore: PathSpec,
) -> Iterator[Path]:
"""Generate all files under `path` whose paths are not excluded by the
- `exclude` regex, but are included by the `include` regex.
+ `exclude_regex` or `force_exclude` regexes, but are included by the `include` regex.
Symbolic links pointing outside of the `root` directory are ignored.
"""
assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
for child in paths:
- # Then ignore with `exclude` option.
- try:
- normalized_path = child.resolve().relative_to(root).as_posix()
- except OSError as e:
- report.path_ignored(child, f"cannot be read because {e}")
+ normalized_path = normalize_path_maybe_ignore(child, root, report)
+ if normalized_path is None:
continue
- except ValueError:
- if child.is_symlink():
- report.path_ignored(
- child, f"is a symbolic link that points outside {root}"
- )
- continue
-
- raise
# First ignore files matching .gitignore
if gitignore.match_file(normalized_path):
report.path_ignored(child, "matches the .gitignore file content")
continue
+ # Then ignore with `--exclude` and `--force-exclude` options.
normalized_path = "/" + normalized_path
if child.is_dir():
normalized_path += "/"
- is_excluded = False
- for exclude in exclude_regexes:
- exclude_match = exclude.search(normalized_path) if exclude else None
- if exclude_match and exclude_match.group(0):
- report.path_ignored(child, "matches the --exclude regular expression")
- is_excluded = True
- break
- if is_excluded:
+ exclude_match = exclude.search(normalized_path) if exclude else None
+ if exclude_match and exclude_match.group(0):
+ report.path_ignored(child, "matches the --exclude regular expression")
+ continue
+
+ force_exclude_match = (
+ force_exclude.search(normalized_path) if force_exclude else None
+ )
+ if force_exclude_match and force_exclude_match.group(0):
+ report.path_ignored(child, "matches the --force-exclude regular expression")
continue
if child.is_dir():
yield from gen_python_files(
- child.iterdir(), root, include, exclude_regexes, report, gitignore
+ child.iterdir(),
+ root,
+ include,
+ exclude,
+ force_exclude,
+ report,
+ gitignore,
)
elif child.is_file():
def find_project_root(srcs: Iterable[str]) -> Path:
"""Return a directory containing .git, .hg, or pyproject.toml.
- That directory can be one of the directories passed in `srcs` or their
- common parent.
+ That directory will be a common parent of all files and directories
+ passed in `srcs`.
If no directory in the tree contains a marker that would specify it's the
project root, the root of the file system is returned.
if not srcs:
return Path("/").resolve()
- common_base = min(Path(src).resolve() for src in srcs)
- if common_base.is_dir():
- # Append a fake file so `parents` below returns `common_base_dir`, too.
- common_base /= "fake-file"
- for directory in common_base.parents:
+ path_srcs = [Path(Path.cwd(), src).resolve() for src in srcs]
+
+ # A list of lists of parents for each 'src'. 'src' is included as a
+ # "parent" of itself if it is a directory
+ src_parents = [
+ list(path.parents) + ([path] if path.is_dir() else []) for path in path_srcs
+ ]
+
+ common_base = max(
+ set.intersection(*(set(parents) for parents in src_parents)),
+ key=lambda path: path.parts,
+ )
+
+ for directory in (common_base, *common_base.parents):
if (directory / ".git").exists():
return directory
and field == "value"
and isinstance(value, str)
):
- normalized = re.sub(r" *\n[ \t]+", "\n ", value).strip()
+ normalized = re.sub(r" *\n[ \t]*", "\n", value).strip()
else:
normalized = value
yield f"{' ' * (depth+2)}{normalized!r}, # {value.__class__.__name__}"