]> git.madduck.net Git - etc/vim.git/blob - src/black/handle_ipynb_magics.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Enable `PYTHONWARNDEFAULTENCODING = 1` in CI (#3763)
[etc/vim.git] / src / black / handle_ipynb_magics.py
1 """Functions to process IPython magics with."""
2
3 import ast
4 import collections
5 import dataclasses
6 import secrets
7 import sys
8 from functools import lru_cache
9 from typing import Dict, List, Optional, Tuple
10
11 if sys.version_info >= (3, 10):
12     from typing import TypeGuard
13 else:
14     from typing_extensions import TypeGuard
15
16 from black.output import out
17 from black.report import NothingChanged
18
19 TRANSFORMED_MAGICS = frozenset(
20     (
21         "get_ipython().run_cell_magic",
22         "get_ipython().system",
23         "get_ipython().getoutput",
24         "get_ipython().run_line_magic",
25     )
26 )
27 TOKENS_TO_IGNORE = frozenset(
28     (
29         "ENDMARKER",
30         "NL",
31         "NEWLINE",
32         "COMMENT",
33         "DEDENT",
34         "UNIMPORTANT_WS",
35         "ESCAPED_NL",
36     )
37 )
38 PYTHON_CELL_MAGICS = frozenset(
39     (
40         "capture",
41         "prun",
42         "pypy",
43         "python",
44         "python3",
45         "time",
46         "timeit",
47     )
48 )
49 TOKEN_HEX = secrets.token_hex
50
51
52 @dataclasses.dataclass(frozen=True)
53 class Replacement:
54     mask: str
55     src: str
56
57
58 @lru_cache()
59 def jupyter_dependencies_are_installed(*, verbose: bool, quiet: bool) -> bool:
60     try:
61         # isort: off
62         # tokenize_rt is less commonly installed than IPython
63         # and IPython is expensive to import
64         import tokenize_rt  # noqa:F401
65         import IPython  # noqa:F401
66
67         # isort: on
68     except ModuleNotFoundError:
69         if verbose or not quiet:
70             msg = (
71                 "Skipping .ipynb files as Jupyter dependencies are not installed.\n"
72                 'You can fix this by running ``pip install "black[jupyter]"``'
73             )
74             out(msg)
75         return False
76     else:
77         return True
78
79
80 def remove_trailing_semicolon(src: str) -> Tuple[str, bool]:
81     """Remove trailing semicolon from Jupyter notebook cell.
82
83     For example,
84
85         fig, ax = plt.subplots()
86         ax.plot(x_data, y_data);  # plot data
87
88     would become
89
90         fig, ax = plt.subplots()
91         ax.plot(x_data, y_data)  # plot data
92
93     Mirrors the logic in `quiet` from `IPython.core.displayhook`, but uses
94     ``tokenize_rt`` so that round-tripping works fine.
95     """
96     from tokenize_rt import reversed_enumerate, src_to_tokens, tokens_to_src
97
98     tokens = src_to_tokens(src)
99     trailing_semicolon = False
100     for idx, token in reversed_enumerate(tokens):
101         if token.name in TOKENS_TO_IGNORE:
102             continue
103         if token.name == "OP" and token.src == ";":
104             del tokens[idx]
105             trailing_semicolon = True
106         break
107     if not trailing_semicolon:
108         return src, False
109     return tokens_to_src(tokens), True
110
111
112 def put_trailing_semicolon_back(src: str, has_trailing_semicolon: bool) -> str:
113     """Put trailing semicolon back if cell originally had it.
114
115     Mirrors the logic in `quiet` from `IPython.core.displayhook`, but uses
116     ``tokenize_rt`` so that round-tripping works fine.
117     """
118     if not has_trailing_semicolon:
119         return src
120     from tokenize_rt import reversed_enumerate, src_to_tokens, tokens_to_src
121
122     tokens = src_to_tokens(src)
123     for idx, token in reversed_enumerate(tokens):
124         if token.name in TOKENS_TO_IGNORE:
125             continue
126         tokens[idx] = token._replace(src=token.src + ";")
127         break
128     else:  # pragma: nocover
129         raise AssertionError(
130             "INTERNAL ERROR: Was not able to reinstate trailing semicolon. "
131             "Please report a bug on https://github.com/psf/black/issues.  "
132         ) from None
133     return str(tokens_to_src(tokens))
134
135
136 def mask_cell(src: str) -> Tuple[str, List[Replacement]]:
137     """Mask IPython magics so content becomes parseable Python code.
138
139     For example,
140
141         %matplotlib inline
142         'foo'
143
144     becomes
145
146         "25716f358c32750e"
147         'foo'
148
149     The replacements are returned, along with the transformed code.
150     """
151     replacements: List[Replacement] = []
152     try:
153         ast.parse(src)
154     except SyntaxError:
155         # Might have IPython magics, will process below.
156         pass
157     else:
158         # Syntax is fine, nothing to mask, early return.
159         return src, replacements
160
161     from IPython.core.inputtransformer2 import TransformerManager
162
163     transformer_manager = TransformerManager()
164     transformed = transformer_manager.transform_cell(src)
165     transformed, cell_magic_replacements = replace_cell_magics(transformed)
166     replacements += cell_magic_replacements
167     transformed = transformer_manager.transform_cell(transformed)
168     transformed, magic_replacements = replace_magics(transformed)
169     if len(transformed.splitlines()) != len(src.splitlines()):
170         # Multi-line magic, not supported.
171         raise NothingChanged
172     replacements += magic_replacements
173     return transformed, replacements
174
175
176 def get_token(src: str, magic: str) -> str:
177     """Return randomly generated token to mask IPython magic with.
178
179     For example, if 'magic' was `%matplotlib inline`, then a possible
180     token to mask it with would be `"43fdd17f7e5ddc83"`. The token
181     will be the same length as the magic, and we make sure that it was
182     not already present anywhere else in the cell.
183     """
184     assert magic
185     nbytes = max(len(magic) // 2 - 1, 1)
186     token = TOKEN_HEX(nbytes)
187     counter = 0
188     while token in src:
189         token = TOKEN_HEX(nbytes)
190         counter += 1
191         if counter > 100:
192             raise AssertionError(
193                 "INTERNAL ERROR: Black was not able to replace IPython magic. "
194                 "Please report a bug on https://github.com/psf/black/issues.  "
195                 f"The magic might be helpful: {magic}"
196             ) from None
197     if len(token) + 2 < len(magic):
198         token = f"{token}."
199     return f'"{token}"'
200
201
202 def replace_cell_magics(src: str) -> Tuple[str, List[Replacement]]:
203     """Replace cell magic with token.
204
205     Note that 'src' will already have been processed by IPython's
206     TransformerManager().transform_cell.
207
208     Example,
209
210         get_ipython().run_cell_magic('t', '-n1', 'ls =!ls\\n')
211
212     becomes
213
214         "a794."
215         ls =!ls
216
217     The replacement, along with the transformed code, is returned.
218     """
219     replacements: List[Replacement] = []
220
221     tree = ast.parse(src)
222
223     cell_magic_finder = CellMagicFinder()
224     cell_magic_finder.visit(tree)
225     if cell_magic_finder.cell_magic is None:
226         return src, replacements
227     header = cell_magic_finder.cell_magic.header
228     mask = get_token(src, header)
229     replacements.append(Replacement(mask=mask, src=header))
230     return f"{mask}\n{cell_magic_finder.cell_magic.body}", replacements
231
232
233 def replace_magics(src: str) -> Tuple[str, List[Replacement]]:
234     """Replace magics within body of cell.
235
236     Note that 'src' will already have been processed by IPython's
237     TransformerManager().transform_cell.
238
239     Example, this
240
241         get_ipython().run_line_magic('matplotlib', 'inline')
242         'foo'
243
244     becomes
245
246         "5e67db56d490fd39"
247         'foo'
248
249     The replacement, along with the transformed code, are returned.
250     """
251     replacements = []
252     magic_finder = MagicFinder()
253     magic_finder.visit(ast.parse(src))
254     new_srcs = []
255     for i, line in enumerate(src.splitlines(), start=1):
256         if i in magic_finder.magics:
257             offsets_and_magics = magic_finder.magics[i]
258             if len(offsets_and_magics) != 1:  # pragma: nocover
259                 raise AssertionError(
260                     f"Expecting one magic per line, got: {offsets_and_magics}\n"
261                     "Please report a bug on https://github.com/psf/black/issues."
262                 )
263             col_offset, magic = (
264                 offsets_and_magics[0].col_offset,
265                 offsets_and_magics[0].magic,
266             )
267             mask = get_token(src, magic)
268             replacements.append(Replacement(mask=mask, src=magic))
269             line = line[:col_offset] + mask
270         new_srcs.append(line)
271     return "\n".join(new_srcs), replacements
272
273
274 def unmask_cell(src: str, replacements: List[Replacement]) -> str:
275     """Remove replacements from cell.
276
277     For example
278
279         "9b20"
280         foo = bar
281
282     becomes
283
284         %%time
285         foo = bar
286     """
287     for replacement in replacements:
288         src = src.replace(replacement.mask, replacement.src)
289     return src
290
291
292 def _is_ipython_magic(node: ast.expr) -> TypeGuard[ast.Attribute]:
293     """Check if attribute is IPython magic.
294
295     Note that the source of the abstract syntax tree
296     will already have been processed by IPython's
297     TransformerManager().transform_cell.
298     """
299     return (
300         isinstance(node, ast.Attribute)
301         and isinstance(node.value, ast.Call)
302         and isinstance(node.value.func, ast.Name)
303         and node.value.func.id == "get_ipython"
304     )
305
306
307 def _get_str_args(args: List[ast.expr]) -> List[str]:
308     str_args = []
309     for arg in args:
310         assert isinstance(arg, ast.Str)
311         str_args.append(arg.s)
312     return str_args
313
314
315 @dataclasses.dataclass(frozen=True)
316 class CellMagic:
317     name: str
318     params: Optional[str]
319     body: str
320
321     @property
322     def header(self) -> str:
323         if self.params:
324             return f"%%{self.name} {self.params}"
325         return f"%%{self.name}"
326
327
328 # ast.NodeVisitor + dataclass = breakage under mypyc.
329 class CellMagicFinder(ast.NodeVisitor):
330     """Find cell magics.
331
332     Note that the source of the abstract syntax tree
333     will already have been processed by IPython's
334     TransformerManager().transform_cell.
335
336     For example,
337
338         %%time\n
339         foo()
340
341     would have been transformed to
342
343         get_ipython().run_cell_magic('time', '', 'foo()\\n')
344
345     and we look for instances of the latter.
346     """
347
348     def __init__(self, cell_magic: Optional[CellMagic] = None) -> None:
349         self.cell_magic = cell_magic
350
351     def visit_Expr(self, node: ast.Expr) -> None:
352         """Find cell magic, extract header and body."""
353         if (
354             isinstance(node.value, ast.Call)
355             and _is_ipython_magic(node.value.func)
356             and node.value.func.attr == "run_cell_magic"
357         ):
358             args = _get_str_args(node.value.args)
359             self.cell_magic = CellMagic(name=args[0], params=args[1], body=args[2])
360         self.generic_visit(node)
361
362
363 @dataclasses.dataclass(frozen=True)
364 class OffsetAndMagic:
365     col_offset: int
366     magic: str
367
368
369 # Unsurprisingly, subclassing ast.NodeVisitor means we can't use dataclasses here
370 # as mypyc will generate broken code.
371 class MagicFinder(ast.NodeVisitor):
372     """Visit cell to look for get_ipython calls.
373
374     Note that the source of the abstract syntax tree
375     will already have been processed by IPython's
376     TransformerManager().transform_cell.
377
378     For example,
379
380         %matplotlib inline
381
382     would have been transformed to
383
384         get_ipython().run_line_magic('matplotlib', 'inline')
385
386     and we look for instances of the latter (and likewise for other
387     types of magics).
388     """
389
390     def __init__(self) -> None:
391         self.magics: Dict[int, List[OffsetAndMagic]] = collections.defaultdict(list)
392
393     def visit_Assign(self, node: ast.Assign) -> None:
394         """Look for system assign magics.
395
396         For example,
397
398             black_version = !black --version
399             env = %env var
400
401         would have been (respectively) transformed to
402
403             black_version = get_ipython().getoutput('black --version')
404             env = get_ipython().run_line_magic('env', 'var')
405
406         and we look for instances of any of the latter.
407         """
408         if isinstance(node.value, ast.Call) and _is_ipython_magic(node.value.func):
409             args = _get_str_args(node.value.args)
410             if node.value.func.attr == "getoutput":
411                 src = f"!{args[0]}"
412             elif node.value.func.attr == "run_line_magic":
413                 src = f"%{args[0]}"
414                 if args[1]:
415                     src += f" {args[1]}"
416             else:
417                 raise AssertionError(
418                     f"Unexpected IPython magic {node.value.func.attr!r} found. "
419                     "Please report a bug on https://github.com/psf/black/issues."
420                 ) from None
421             self.magics[node.value.lineno].append(
422                 OffsetAndMagic(node.value.col_offset, src)
423             )
424         self.generic_visit(node)
425
426     def visit_Expr(self, node: ast.Expr) -> None:
427         """Look for magics in body of cell.
428
429         For examples,
430
431             !ls
432             !!ls
433             ?ls
434             ??ls
435
436         would (respectively) get transformed to
437
438             get_ipython().system('ls')
439             get_ipython().getoutput('ls')
440             get_ipython().run_line_magic('pinfo', 'ls')
441             get_ipython().run_line_magic('pinfo2', 'ls')
442
443         and we look for instances of any of the latter.
444         """
445         if isinstance(node.value, ast.Call) and _is_ipython_magic(node.value.func):
446             args = _get_str_args(node.value.args)
447             if node.value.func.attr == "run_line_magic":
448                 if args[0] == "pinfo":
449                     src = f"?{args[1]}"
450                 elif args[0] == "pinfo2":
451                     src = f"??{args[1]}"
452                 else:
453                     src = f"%{args[0]}"
454                     if args[1]:
455                         src += f" {args[1]}"
456             elif node.value.func.attr == "system":
457                 src = f"!{args[0]}"
458             elif node.value.func.attr == "getoutput":
459                 src = f"!!{args[0]}"
460             else:
461                 raise NothingChanged  # unsupported magic.
462             self.magics[node.value.lineno].append(
463                 OffsetAndMagic(node.value.col_offset, src)
464             )
465         self.generic_visit(node)