scripts/fuzz.py

   1 """Property-based tests for Black.
   2
   3 By Zac Hatfield-Dodds, based on my Hypothesmith tool for source code
   4 generation.  You can run this file with `python`, `pytest`, or (soon)
   5 a coverage-guided fuzzer I'm working on.
   6 """
   7
   8 import re
   9
  10 import hypothesmith
  11 from hypothesis import HealthCheck, given, settings
  12 from hypothesis import strategies as st
  13
  14 import black
  15 from blib2to3.pgen2.tokenize import TokenError
  16
  17
  18 # This test uses the Hypothesis and Hypothesmith libraries to generate random
  19 # syntatically-valid Python source code and run Black in odd modes.
  20 @settings(
  21     max_examples=1000,  # roughly 1k tests/minute, or half that under coverage
  22     derandomize=True,  # deterministic mode to avoid CI flakiness
  23     deadline=None,  # ignore Hypothesis' health checks; we already know that
  24     suppress_health_check=list(HealthCheck),  # this is slow and filter-heavy.
  25 )
  26 @given(
  27     # Note that while Hypothesmith might generate code unlike that written by
  28     # humans, it's a general test that should pass for any *valid* source code.
  29     # (so e.g. running it against code scraped of the internet might also help)
  30     src_contents=hypothesmith.from_grammar() | hypothesmith.from_node(),
  31     # Using randomly-varied modes helps us to exercise less common code paths.
  32     mode=st.builds(
  33         black.FileMode,
  34         line_length=st.just(88) | st.integers(0, 200),
  35         string_normalization=st.booleans(),
  36         preview=st.booleans(),
  37         is_pyi=st.booleans(),
  38         magic_trailing_comma=st.booleans(),
  39     ),
  40 )
  41 def test_idempotent_any_syntatically_valid_python(
  42     src_contents: str, mode: black.FileMode
  43 ) -> None:
  44     # Before starting, let's confirm that the input string is valid Python:
  45     compile(src_contents, "<string>", "exec")  # else the bug is in hypothesmith
  46
  47     # Then format the code...
  48     try:
  49         dst_contents = black.format_str(src_contents, mode=mode)
  50     except black.InvalidInput:
  51         # This is a bug - if it's valid Python code, as above, Black should be
  52         # able to cope with it.  See issues #970, #1012
  53         # TODO: remove this try-except block when issues are resolved.
  54         return
  55     except TokenError as e:
  56         if (  # Special-case logic for backslashes followed by newlines or end-of-input
  57             e.args[0] == "EOF in multi-line statement"
  58             and re.search(r"\\($|\r?\n)", src_contents) is not None
  59         ):
  60             # This is a bug - if it's valid Python code, as above, Black should be
  61             # able to cope with it.  See issue #1012.
  62             # TODO: remove this block when the issue is resolved.
  63             return
  64         raise
  65
  66     # And check that we got equivalent and stable output.
  67     black.assert_equivalent(src_contents, dst_contents)
  68     black.assert_stable(src_contents, dst_contents, mode=mode)
  69
  70     # Future test: check that pure-python and mypyc versions of black
  71     # give identical output for identical input?
  72
  73
  74 if __name__ == "__main__":
  75     # Run tests, including shrinking and reporting any known failures.
  76     test_idempotent_any_syntatically_valid_python()
  77
  78     # If Atheris is available, run coverage-guided fuzzing.
  79     # (if you want only bounded fuzzing, just use `pytest fuzz.py`)
  80     try:
  81         import sys
  82
  83         import atheris  # type: ignore[import]
  84     except ImportError:
  85         pass
  86     else:
  87         test = test_idempotent_any_syntatically_valid_python
  88         atheris.Setup(
  89             sys.argv,
  90             test.hypothesis.fuzz_one_input,  # type: ignore[attr-defined]
  91         )
  92         atheris.Fuzz()