]> git.madduck.net Git - etc/vim.git/commitdiff

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Lazily import parallelized format modules
authorRichard Si <63936253+ichard26@users.noreply.github.com>
Thu, 4 Aug 2022 00:18:33 +0000 (20:18 -0400)
committerRichard Si <63936253+ichard26@users.noreply.github.com>
Sat, 27 Aug 2022 01:11:00 +0000 (21:11 -0400)
`black.reformat_many` depends on a lot of slow-to-import modules. When
formatting simply a single file, the time paid to import those modules
is totally wasted. So I moved `black.reformat_many` and its helpers
to `black.concurrency` which is now *only* imported if there's more
than one file to reformat. This way, running Black over a single file
is snappier

Here are the numbers before and after this patch running `python -m
black --version`:

- interpreted: 411 ms +- 9 ms -> 342 ms +- 7 ms: 1.20x faster
- compiled: 365 ms +- 15 ms -> 304 ms +- 7 ms: 1.20x faster

Co-authored-by: Fabio Zadrozny <fabiofz@gmail.com>
CHANGES.md
docs/contributing/reference/reference_functions.rst
src/black/__init__.py
src/black/concurrency.py
tests/test_black.py

index 17659522fd188f621700ff8b13ede70f4fbdbba6..34c54710775d133dd0602d489794df73be38f916 100644 (file)
@@ -87,6 +87,8 @@
 
 <!-- Changes that improve Black's performance. -->
 
+- Reduce Black's startup time when formatting a single file by 15-30% (#3211)
+
 ## 22.6.0
 
 ### Style
index 01ffe44ef530c4913843ebd8e813680118045636..50eaeb31e15ba71629dd3f0bc8ce4907cf72c187 100644 (file)
@@ -52,7 +52,7 @@ Formatting
 
 .. autofunction:: black.reformat_one
 
-.. autofunction:: black.schedule_formatting
+.. autofunction:: black.concurrency.schedule_formatting
 
 File operations
 ---------------
@@ -173,7 +173,7 @@ Utilities
 
 .. autofunction:: black.linegen.should_split_line
 
-.. autofunction:: black.shutdown
+.. autofunction:: black.concurrency.shutdown
 
 .. autofunction:: black.strings.sub_twice
 
index a0c1ad4b41691e588e4e44285a0d2430cb2695ca..afc76e1fa0cb5e7dda1a51544718ef778e846344 100644 (file)
@@ -1,10 +1,8 @@
-import asyncio
 import io
 import json
 import os
 import platform
 import re
-import signal
 import sys
 import tokenize
 import traceback
@@ -13,10 +11,8 @@ from dataclasses import replace
 from datetime import datetime
 from enum import Enum
 from json.decoder import JSONDecodeError
-from multiprocessing import Manager, freeze_support
 from pathlib import Path
 from typing import (
-    TYPE_CHECKING,
     Any,
     Dict,
     Generator,
@@ -32,15 +28,19 @@ from typing import (
     Union,
 )
 
+if sys.version_info >= (3, 8):
+    from typing import Final
+else:
+    from typing_extensions import Final
+
 import click
 from click.core import ParameterSource
 from mypy_extensions import mypyc_attr
 from pathspec.patterns.gitwildmatch import GitWildMatchPatternError
 
 from _black_version import version as __version__
-from black.cache import Cache, filter_cached, get_cache_info, read_cache, write_cache
+from black.cache import Cache, get_cache_info, read_cache, write_cache
 from black.comments import normalize_fmt_off
-from black.concurrency import cancel, maybe_install_uvloop, shutdown
 from black.const import (
     DEFAULT_EXCLUDES,
     DEFAULT_INCLUDES,
@@ -91,10 +91,8 @@ from black.trans import iter_fexpr_spans
 from blib2to3.pgen2 import token
 from blib2to3.pytree import Leaf, Node
 
-if TYPE_CHECKING:
-    from concurrent.futures import Executor
-
 COMPILED = Path(__file__).suffix in (".pyd", ".so")
+DEFAULT_WORKERS: Final = os.cpu_count()
 
 # types
 FileContent = str
@@ -125,8 +123,6 @@ class WriteBack(Enum):
 # Legacy name, left for integrations.
 FileMode = Mode
 
-DEFAULT_WORKERS = os.cpu_count()
-
 
 def read_pyproject_toml(
     ctx: click.Context, param: click.Parameter, value: Optional[str]
@@ -592,6 +588,8 @@ def main(  # noqa: C901
                 report=report,
             )
         else:
+            from black.concurrency import reformat_many
+
             reformat_many(
                 sources=sources,
                 fast=fast,
@@ -776,132 +774,6 @@ def reformat_one(
         report.failed(src, str(exc))
 
 
-# diff-shades depends on being to monkeypatch this function to operate. I know it's
-# not ideal, but this shouldn't cause any issues ... hopefully. ~ichard26
-@mypyc_attr(patchable=True)
-def reformat_many(
-    sources: Set[Path],
-    fast: bool,
-    write_back: WriteBack,
-    mode: Mode,
-    report: "Report",
-    workers: Optional[int],
-) -> None:
-    """Reformat multiple files using a ProcessPoolExecutor."""
-    from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor
-
-    executor: Executor
-    worker_count = workers if workers is not None else DEFAULT_WORKERS
-    if sys.platform == "win32":
-        # Work around https://bugs.python.org/issue26903
-        assert worker_count is not None
-        worker_count = min(worker_count, 60)
-    try:
-        executor = ProcessPoolExecutor(max_workers=worker_count)
-    except (ImportError, NotImplementedError, OSError):
-        # we arrive here if the underlying system does not support multi-processing
-        # like in AWS Lambda or Termux, in which case we gracefully fallback to
-        # a ThreadPoolExecutor with just a single worker (more workers would not do us
-        # any good due to the Global Interpreter Lock)
-        executor = ThreadPoolExecutor(max_workers=1)
-
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-    try:
-        loop.run_until_complete(
-            schedule_formatting(
-                sources=sources,
-                fast=fast,
-                write_back=write_back,
-                mode=mode,
-                report=report,
-                loop=loop,
-                executor=executor,
-            )
-        )
-    finally:
-        try:
-            shutdown(loop)
-        finally:
-            asyncio.set_event_loop(None)
-        if executor is not None:
-            executor.shutdown()
-
-
-async def schedule_formatting(
-    sources: Set[Path],
-    fast: bool,
-    write_back: WriteBack,
-    mode: Mode,
-    report: "Report",
-    loop: asyncio.AbstractEventLoop,
-    executor: "Executor",
-) -> None:
-    """Run formatting of `sources` in parallel using the provided `executor`.
-
-    (Use ProcessPoolExecutors for actual parallelism.)
-
-    `write_back`, `fast`, and `mode` options are passed to
-    :func:`format_file_in_place`.
-    """
-    cache: Cache = {}
-    if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
-        cache = read_cache(mode)
-        sources, cached = filter_cached(cache, sources)
-        for src in sorted(cached):
-            report.done(src, Changed.CACHED)
-    if not sources:
-        return
-
-    cancelled = []
-    sources_to_cache = []
-    lock = None
-    if write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
-        # For diff output, we need locks to ensure we don't interleave output
-        # from different processes.
-        manager = Manager()
-        lock = manager.Lock()
-    tasks = {
-        asyncio.ensure_future(
-            loop.run_in_executor(
-                executor, format_file_in_place, src, fast, mode, write_back, lock
-            )
-        ): src
-        for src in sorted(sources)
-    }
-    pending = tasks.keys()
-    try:
-        loop.add_signal_handler(signal.SIGINT, cancel, pending)
-        loop.add_signal_handler(signal.SIGTERM, cancel, pending)
-    except NotImplementedError:
-        # There are no good alternatives for these on Windows.
-        pass
-    while pending:
-        done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
-        for task in done:
-            src = tasks.pop(task)
-            if task.cancelled():
-                cancelled.append(task)
-            elif task.exception():
-                report.failed(src, str(task.exception()))
-            else:
-                changed = Changed.YES if task.result() else Changed.NO
-                # If the file was written back or was successfully checked as
-                # well-formatted, store this information in the cache.
-                if write_back is WriteBack.YES or (
-                    write_back is WriteBack.CHECK and changed is Changed.NO
-                ):
-                    sources_to_cache.append(src)
-                report.done(src, changed)
-    if cancelled:
-        if sys.version_info >= (3, 7):
-            await asyncio.gather(*cancelled, return_exceptions=True)
-        else:
-            await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
-    if sources_to_cache:
-        write_cache(cache, sources_to_cache, mode)
-
-
 def format_file_in_place(
     src: Path,
     fast: bool,
@@ -1506,8 +1378,11 @@ def patch_click() -> None:
 
 
 def patched_main() -> None:
-    maybe_install_uvloop()
-    freeze_support()
+    if sys.platform == "win32" and getattr(sys, "frozen", False):
+        from multiprocessing import freeze_support
+
+        freeze_support()
+
     patch_click()
     main()
 
index 24f67b62f0617f79ab21fa9b036645338a4feb8e..d77ea40bd4675e1f89c4dd58554fec5ee77c6825 100644 (file)
@@ -1,9 +1,25 @@
+"""
+Formatting many files at once via multiprocessing. Contains entrypoint and utilities.
+
+NOTE: this module is only imported if we need to format several files at once.
+"""
+
 import asyncio
 import logging
+import signal
 import sys
-from typing import Any, Iterable
+from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor
+from multiprocessing import Manager
+from pathlib import Path
+from typing import Any, Iterable, Optional, Set
+
+from mypy_extensions import mypyc_attr
 
+from black import DEFAULT_WORKERS, WriteBack, format_file_in_place
+from black.cache import Cache, filter_cached, read_cache, write_cache
+from black.mode import Mode
 from black.output import err
+from black.report import Changed, Report
 
 
 def maybe_install_uvloop() -> None:
@@ -11,7 +27,6 @@ def maybe_install_uvloop() -> None:
 
     This is called only from command-line entry points to avoid
     interfering with the parent process if Black is used as a library.
-
     """
     try:
         import uvloop
@@ -55,3 +70,129 @@ def shutdown(loop: asyncio.AbstractEventLoop) -> None:
         cf_logger = logging.getLogger("concurrent.futures")
         cf_logger.setLevel(logging.CRITICAL)
         loop.close()
+
+
+# diff-shades depends on being to monkeypatch this function to operate. I know it's
+# not ideal, but this shouldn't cause any issues ... hopefully. ~ichard26
+@mypyc_attr(patchable=True)
+def reformat_many(
+    sources: Set[Path],
+    fast: bool,
+    write_back: WriteBack,
+    mode: Mode,
+    report: Report,
+    workers: Optional[int],
+) -> None:
+    """Reformat multiple files using a ProcessPoolExecutor."""
+    maybe_install_uvloop()
+
+    executor: Executor
+    worker_count = workers if workers is not None else DEFAULT_WORKERS
+    if sys.platform == "win32":
+        # Work around https://bugs.python.org/issue26903
+        assert worker_count is not None
+        worker_count = min(worker_count, 60)
+    try:
+        executor = ProcessPoolExecutor(max_workers=worker_count)
+    except (ImportError, NotImplementedError, OSError):
+        # we arrive here if the underlying system does not support multi-processing
+        # like in AWS Lambda or Termux, in which case we gracefully fallback to
+        # a ThreadPoolExecutor with just a single worker (more workers would not do us
+        # any good due to the Global Interpreter Lock)
+        executor = ThreadPoolExecutor(max_workers=1)
+
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    try:
+        loop.run_until_complete(
+            schedule_formatting(
+                sources=sources,
+                fast=fast,
+                write_back=write_back,
+                mode=mode,
+                report=report,
+                loop=loop,
+                executor=executor,
+            )
+        )
+    finally:
+        try:
+            shutdown(loop)
+        finally:
+            asyncio.set_event_loop(None)
+        if executor is not None:
+            executor.shutdown()
+
+
+async def schedule_formatting(
+    sources: Set[Path],
+    fast: bool,
+    write_back: WriteBack,
+    mode: Mode,
+    report: "Report",
+    loop: asyncio.AbstractEventLoop,
+    executor: "Executor",
+) -> None:
+    """Run formatting of `sources` in parallel using the provided `executor`.
+
+    (Use ProcessPoolExecutors for actual parallelism.)
+
+    `write_back`, `fast`, and `mode` options are passed to
+    :func:`format_file_in_place`.
+    """
+    cache: Cache = {}
+    if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
+        cache = read_cache(mode)
+        sources, cached = filter_cached(cache, sources)
+        for src in sorted(cached):
+            report.done(src, Changed.CACHED)
+    if not sources:
+        return
+
+    cancelled = []
+    sources_to_cache = []
+    lock = None
+    if write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
+        # For diff output, we need locks to ensure we don't interleave output
+        # from different processes.
+        manager = Manager()
+        lock = manager.Lock()
+    tasks = {
+        asyncio.ensure_future(
+            loop.run_in_executor(
+                executor, format_file_in_place, src, fast, mode, write_back, lock
+            )
+        ): src
+        for src in sorted(sources)
+    }
+    pending = tasks.keys()
+    try:
+        loop.add_signal_handler(signal.SIGINT, cancel, pending)
+        loop.add_signal_handler(signal.SIGTERM, cancel, pending)
+    except NotImplementedError:
+        # There are no good alternatives for these on Windows.
+        pass
+    while pending:
+        done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
+        for task in done:
+            src = tasks.pop(task)
+            if task.cancelled():
+                cancelled.append(task)
+            elif task.exception():
+                report.failed(src, str(task.exception()))
+            else:
+                changed = Changed.YES if task.result() else Changed.NO
+                # If the file was written back or was successfully checked as
+                # well-formatted, store this information in the cache.
+                if write_back is WriteBack.YES or (
+                    write_back is WriteBack.CHECK and changed is Changed.NO
+                ):
+                    sources_to_cache.append(src)
+                report.done(src, changed)
+    if cancelled:
+        if sys.version_info >= (3, 7):
+            await asyncio.gather(*cancelled, return_exceptions=True)
+        else:
+            await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
+    if sources_to_cache:
+        write_cache(cache, sources_to_cache, mode)
index c76b3faddf556e1bb114efb59296d132eb9f7f4d..5da247b71b013a416cba7ec0a37d3f6960542bc1 100644 (file)
@@ -1763,7 +1763,9 @@ class TestCaching:
                 src = (workspace / f"test{tag}.py").resolve()
                 with src.open("w") as fobj:
                     fobj.write("print('hello')")
-            with patch("black.Manager", wraps=multiprocessing.Manager) as mgr:
+            with patch(
+                "black.concurrency.Manager", wraps=multiprocessing.Manager
+            ) as mgr:
                 cmd = ["--diff", str(workspace)]
                 if color:
                     cmd.append("--color")
@@ -1810,7 +1812,7 @@ class TestCaching:
                 str(cached): black.get_cache_info(cached),
                 str(cached_but_changed): (0.0, 0),
             }
-            todo, done = black.filter_cached(
+            todo, done = black.cache.filter_cached(
                 cache, {uncached, cached, cached_but_changed}
             )
             assert todo == {uncached, cached_but_changed}