<!-- Changes that improve Black's performance. -->
+- Reduce Black's startup time when formatting a single file by 15-30% (#3211)
+
## 22.6.0
### Style
.. autofunction:: black.reformat_one
-.. autofunction:: black.schedule_formatting
+.. autofunction:: black.concurrency.schedule_formatting
File operations
---------------
.. autofunction:: black.linegen.should_split_line
-.. autofunction:: black.shutdown
+.. autofunction:: black.concurrency.shutdown
.. autofunction:: black.strings.sub_twice
-import asyncio
import io
import json
import os
import platform
import re
-import signal
import sys
import tokenize
import traceback
from datetime import datetime
from enum import Enum
from json.decoder import JSONDecodeError
-from multiprocessing import Manager, freeze_support
from pathlib import Path
from typing import (
- TYPE_CHECKING,
Any,
Dict,
Generator,
Union,
)
+if sys.version_info >= (3, 8):
+ from typing import Final
+else:
+ from typing_extensions import Final
+
import click
from click.core import ParameterSource
from mypy_extensions import mypyc_attr
from pathspec.patterns.gitwildmatch import GitWildMatchPatternError
from _black_version import version as __version__
-from black.cache import Cache, filter_cached, get_cache_info, read_cache, write_cache
+from black.cache import Cache, get_cache_info, read_cache, write_cache
from black.comments import normalize_fmt_off
-from black.concurrency import cancel, maybe_install_uvloop, shutdown
from black.const import (
DEFAULT_EXCLUDES,
DEFAULT_INCLUDES,
from blib2to3.pgen2 import token
from blib2to3.pytree import Leaf, Node
-if TYPE_CHECKING:
- from concurrent.futures import Executor
-
COMPILED = Path(__file__).suffix in (".pyd", ".so")
+DEFAULT_WORKERS: Final = os.cpu_count()
# types
FileContent = str
# Legacy name, left for integrations.
FileMode = Mode
-DEFAULT_WORKERS = os.cpu_count()
-
def read_pyproject_toml(
ctx: click.Context, param: click.Parameter, value: Optional[str]
report=report,
)
else:
+ from black.concurrency import reformat_many
+
reformat_many(
sources=sources,
fast=fast,
report.failed(src, str(exc))
-# diff-shades depends on being to monkeypatch this function to operate. I know it's
-# not ideal, but this shouldn't cause any issues ... hopefully. ~ichard26
-@mypyc_attr(patchable=True)
-def reformat_many(
- sources: Set[Path],
- fast: bool,
- write_back: WriteBack,
- mode: Mode,
- report: "Report",
- workers: Optional[int],
-) -> None:
- """Reformat multiple files using a ProcessPoolExecutor."""
- from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor
-
- executor: Executor
- worker_count = workers if workers is not None else DEFAULT_WORKERS
- if sys.platform == "win32":
- # Work around https://bugs.python.org/issue26903
- assert worker_count is not None
- worker_count = min(worker_count, 60)
- try:
- executor = ProcessPoolExecutor(max_workers=worker_count)
- except (ImportError, NotImplementedError, OSError):
- # we arrive here if the underlying system does not support multi-processing
- # like in AWS Lambda or Termux, in which case we gracefully fallback to
- # a ThreadPoolExecutor with just a single worker (more workers would not do us
- # any good due to the Global Interpreter Lock)
- executor = ThreadPoolExecutor(max_workers=1)
-
- loop = asyncio.new_event_loop()
- asyncio.set_event_loop(loop)
- try:
- loop.run_until_complete(
- schedule_formatting(
- sources=sources,
- fast=fast,
- write_back=write_back,
- mode=mode,
- report=report,
- loop=loop,
- executor=executor,
- )
- )
- finally:
- try:
- shutdown(loop)
- finally:
- asyncio.set_event_loop(None)
- if executor is not None:
- executor.shutdown()
-
-
-async def schedule_formatting(
- sources: Set[Path],
- fast: bool,
- write_back: WriteBack,
- mode: Mode,
- report: "Report",
- loop: asyncio.AbstractEventLoop,
- executor: "Executor",
-) -> None:
- """Run formatting of `sources` in parallel using the provided `executor`.
-
- (Use ProcessPoolExecutors for actual parallelism.)
-
- `write_back`, `fast`, and `mode` options are passed to
- :func:`format_file_in_place`.
- """
- cache: Cache = {}
- if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
- cache = read_cache(mode)
- sources, cached = filter_cached(cache, sources)
- for src in sorted(cached):
- report.done(src, Changed.CACHED)
- if not sources:
- return
-
- cancelled = []
- sources_to_cache = []
- lock = None
- if write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
- # For diff output, we need locks to ensure we don't interleave output
- # from different processes.
- manager = Manager()
- lock = manager.Lock()
- tasks = {
- asyncio.ensure_future(
- loop.run_in_executor(
- executor, format_file_in_place, src, fast, mode, write_back, lock
- )
- ): src
- for src in sorted(sources)
- }
- pending = tasks.keys()
- try:
- loop.add_signal_handler(signal.SIGINT, cancel, pending)
- loop.add_signal_handler(signal.SIGTERM, cancel, pending)
- except NotImplementedError:
- # There are no good alternatives for these on Windows.
- pass
- while pending:
- done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
- for task in done:
- src = tasks.pop(task)
- if task.cancelled():
- cancelled.append(task)
- elif task.exception():
- report.failed(src, str(task.exception()))
- else:
- changed = Changed.YES if task.result() else Changed.NO
- # If the file was written back or was successfully checked as
- # well-formatted, store this information in the cache.
- if write_back is WriteBack.YES or (
- write_back is WriteBack.CHECK and changed is Changed.NO
- ):
- sources_to_cache.append(src)
- report.done(src, changed)
- if cancelled:
- if sys.version_info >= (3, 7):
- await asyncio.gather(*cancelled, return_exceptions=True)
- else:
- await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
- if sources_to_cache:
- write_cache(cache, sources_to_cache, mode)
-
-
def format_file_in_place(
src: Path,
fast: bool,
def patched_main() -> None:
- maybe_install_uvloop()
- freeze_support()
+ if sys.platform == "win32" and getattr(sys, "frozen", False):
+ from multiprocessing import freeze_support
+
+ freeze_support()
+
patch_click()
main()
+"""
+Formatting many files at once via multiprocessing. Contains entrypoint and utilities.
+
+NOTE: this module is only imported if we need to format several files at once.
+"""
+
import asyncio
import logging
+import signal
import sys
-from typing import Any, Iterable
+from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor
+from multiprocessing import Manager
+from pathlib import Path
+from typing import Any, Iterable, Optional, Set
+
+from mypy_extensions import mypyc_attr
+from black import DEFAULT_WORKERS, WriteBack, format_file_in_place
+from black.cache import Cache, filter_cached, read_cache, write_cache
+from black.mode import Mode
from black.output import err
+from black.report import Changed, Report
def maybe_install_uvloop() -> None:
This is called only from command-line entry points to avoid
interfering with the parent process if Black is used as a library.
-
"""
try:
import uvloop
cf_logger = logging.getLogger("concurrent.futures")
cf_logger.setLevel(logging.CRITICAL)
loop.close()
+
+
+# diff-shades depends on being to monkeypatch this function to operate. I know it's
+# not ideal, but this shouldn't cause any issues ... hopefully. ~ichard26
+@mypyc_attr(patchable=True)
+def reformat_many(
+ sources: Set[Path],
+ fast: bool,
+ write_back: WriteBack,
+ mode: Mode,
+ report: Report,
+ workers: Optional[int],
+) -> None:
+ """Reformat multiple files using a ProcessPoolExecutor."""
+ maybe_install_uvloop()
+
+ executor: Executor
+ worker_count = workers if workers is not None else DEFAULT_WORKERS
+ if sys.platform == "win32":
+ # Work around https://bugs.python.org/issue26903
+ assert worker_count is not None
+ worker_count = min(worker_count, 60)
+ try:
+ executor = ProcessPoolExecutor(max_workers=worker_count)
+ except (ImportError, NotImplementedError, OSError):
+ # we arrive here if the underlying system does not support multi-processing
+ # like in AWS Lambda or Termux, in which case we gracefully fallback to
+ # a ThreadPoolExecutor with just a single worker (more workers would not do us
+ # any good due to the Global Interpreter Lock)
+ executor = ThreadPoolExecutor(max_workers=1)
+
+ loop = asyncio.new_event_loop()
+ asyncio.set_event_loop(loop)
+ try:
+ loop.run_until_complete(
+ schedule_formatting(
+ sources=sources,
+ fast=fast,
+ write_back=write_back,
+ mode=mode,
+ report=report,
+ loop=loop,
+ executor=executor,
+ )
+ )
+ finally:
+ try:
+ shutdown(loop)
+ finally:
+ asyncio.set_event_loop(None)
+ if executor is not None:
+ executor.shutdown()
+
+
+async def schedule_formatting(
+ sources: Set[Path],
+ fast: bool,
+ write_back: WriteBack,
+ mode: Mode,
+ report: "Report",
+ loop: asyncio.AbstractEventLoop,
+ executor: "Executor",
+) -> None:
+ """Run formatting of `sources` in parallel using the provided `executor`.
+
+ (Use ProcessPoolExecutors for actual parallelism.)
+
+ `write_back`, `fast`, and `mode` options are passed to
+ :func:`format_file_in_place`.
+ """
+ cache: Cache = {}
+ if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
+ cache = read_cache(mode)
+ sources, cached = filter_cached(cache, sources)
+ for src in sorted(cached):
+ report.done(src, Changed.CACHED)
+ if not sources:
+ return
+
+ cancelled = []
+ sources_to_cache = []
+ lock = None
+ if write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
+ # For diff output, we need locks to ensure we don't interleave output
+ # from different processes.
+ manager = Manager()
+ lock = manager.Lock()
+ tasks = {
+ asyncio.ensure_future(
+ loop.run_in_executor(
+ executor, format_file_in_place, src, fast, mode, write_back, lock
+ )
+ ): src
+ for src in sorted(sources)
+ }
+ pending = tasks.keys()
+ try:
+ loop.add_signal_handler(signal.SIGINT, cancel, pending)
+ loop.add_signal_handler(signal.SIGTERM, cancel, pending)
+ except NotImplementedError:
+ # There are no good alternatives for these on Windows.
+ pass
+ while pending:
+ done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
+ for task in done:
+ src = tasks.pop(task)
+ if task.cancelled():
+ cancelled.append(task)
+ elif task.exception():
+ report.failed(src, str(task.exception()))
+ else:
+ changed = Changed.YES if task.result() else Changed.NO
+ # If the file was written back or was successfully checked as
+ # well-formatted, store this information in the cache.
+ if write_back is WriteBack.YES or (
+ write_back is WriteBack.CHECK and changed is Changed.NO
+ ):
+ sources_to_cache.append(src)
+ report.done(src, changed)
+ if cancelled:
+ if sys.version_info >= (3, 7):
+ await asyncio.gather(*cancelled, return_exceptions=True)
+ else:
+ await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
+ if sources_to_cache:
+ write_cache(cache, sources_to_cache, mode)
src = (workspace / f"test{tag}.py").resolve()
with src.open("w") as fobj:
fobj.write("print('hello')")
- with patch("black.Manager", wraps=multiprocessing.Manager) as mgr:
+ with patch(
+ "black.concurrency.Manager", wraps=multiprocessing.Manager
+ ) as mgr:
cmd = ["--diff", str(workspace)]
if color:
cmd.append("--color")
str(cached): black.get_cache_info(cached),
str(cached_but_changed): (0.0, 0),
}
- todo, done = black.filter_cached(
+ todo, done = black.cache.filter_cached(
cache, {uncached, cached, cached_but_changed}
)
assert todo == {uncached, cached_but_changed}