]> git.madduck.net Git - etc/vim.git/blob - src/black/handle_ipynb_magics.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Surface links to Stability Policy (GH-2848)
[etc/vim.git] / src / black / handle_ipynb_magics.py
1 """Functions to process IPython magics with."""
2
3 from functools import lru_cache
4 import dataclasses
5 import ast
6 from typing import Dict, List, Tuple, Optional
7
8 import secrets
9 import sys
10 import collections
11
12 if sys.version_info >= (3, 10):
13     from typing import TypeGuard
14 else:
15     from typing_extensions import TypeGuard
16
17 from black.report import NothingChanged
18 from black.output import out
19
20
21 TRANSFORMED_MAGICS = frozenset(
22     (
23         "get_ipython().run_cell_magic",
24         "get_ipython().system",
25         "get_ipython().getoutput",
26         "get_ipython().run_line_magic",
27     )
28 )
29 TOKENS_TO_IGNORE = frozenset(
30     (
31         "ENDMARKER",
32         "NL",
33         "NEWLINE",
34         "COMMENT",
35         "DEDENT",
36         "UNIMPORTANT_WS",
37         "ESCAPED_NL",
38     )
39 )
40 PYTHON_CELL_MAGICS = frozenset(
41     (
42         "capture",
43         "prun",
44         "pypy",
45         "python",
46         "python3",
47         "time",
48         "timeit",
49     )
50 )
51 TOKEN_HEX = secrets.token_hex
52
53
54 @dataclasses.dataclass(frozen=True)
55 class Replacement:
56     mask: str
57     src: str
58
59
60 @lru_cache()
61 def jupyter_dependencies_are_installed(*, verbose: bool, quiet: bool) -> bool:
62     try:
63         import IPython  # noqa:F401
64         import tokenize_rt  # noqa:F401
65     except ModuleNotFoundError:
66         if verbose or not quiet:
67             msg = (
68                 "Skipping .ipynb files as Jupyter dependencies are not installed.\n"
69                 "You can fix this by running ``pip install black[jupyter]``"
70             )
71             out(msg)
72         return False
73     else:
74         return True
75
76
77 def remove_trailing_semicolon(src: str) -> Tuple[str, bool]:
78     """Remove trailing semicolon from Jupyter notebook cell.
79
80     For example,
81
82         fig, ax = plt.subplots()
83         ax.plot(x_data, y_data);  # plot data
84
85     would become
86
87         fig, ax = plt.subplots()
88         ax.plot(x_data, y_data)  # plot data
89
90     Mirrors the logic in `quiet` from `IPython.core.displayhook`, but uses
91     ``tokenize_rt`` so that round-tripping works fine.
92     """
93     from tokenize_rt import (
94         src_to_tokens,
95         tokens_to_src,
96         reversed_enumerate,
97     )
98
99     tokens = src_to_tokens(src)
100     trailing_semicolon = False
101     for idx, token in reversed_enumerate(tokens):
102         if token.name in TOKENS_TO_IGNORE:
103             continue
104         if token.name == "OP" and token.src == ";":
105             del tokens[idx]
106             trailing_semicolon = True
107         break
108     if not trailing_semicolon:
109         return src, False
110     return tokens_to_src(tokens), True
111
112
113 def put_trailing_semicolon_back(src: str, has_trailing_semicolon: bool) -> str:
114     """Put trailing semicolon back if cell originally had it.
115
116     Mirrors the logic in `quiet` from `IPython.core.displayhook`, but uses
117     ``tokenize_rt`` so that round-tripping works fine.
118     """
119     if not has_trailing_semicolon:
120         return src
121     from tokenize_rt import src_to_tokens, tokens_to_src, reversed_enumerate
122
123     tokens = src_to_tokens(src)
124     for idx, token in reversed_enumerate(tokens):
125         if token.name in TOKENS_TO_IGNORE:
126             continue
127         tokens[idx] = token._replace(src=token.src + ";")
128         break
129     else:  # pragma: nocover
130         raise AssertionError(
131             "INTERNAL ERROR: Was not able to reinstate trailing semicolon. "
132             "Please report a bug on https://github.com/psf/black/issues.  "
133         ) from None
134     return str(tokens_to_src(tokens))
135
136
137 def mask_cell(src: str) -> Tuple[str, List[Replacement]]:
138     """Mask IPython magics so content becomes parseable Python code.
139
140     For example,
141
142         %matplotlib inline
143         'foo'
144
145     becomes
146
147         "25716f358c32750e"
148         'foo'
149
150     The replacements are returned, along with the transformed code.
151     """
152     replacements: List[Replacement] = []
153     try:
154         ast.parse(src)
155     except SyntaxError:
156         # Might have IPython magics, will process below.
157         pass
158     else:
159         # Syntax is fine, nothing to mask, early return.
160         return src, replacements
161
162     from IPython.core.inputtransformer2 import TransformerManager
163
164     transformer_manager = TransformerManager()
165     transformed = transformer_manager.transform_cell(src)
166     transformed, cell_magic_replacements = replace_cell_magics(transformed)
167     replacements += cell_magic_replacements
168     transformed = transformer_manager.transform_cell(transformed)
169     transformed, magic_replacements = replace_magics(transformed)
170     if len(transformed.splitlines()) != len(src.splitlines()):
171         # Multi-line magic, not supported.
172         raise NothingChanged
173     replacements += magic_replacements
174     return transformed, replacements
175
176
177 def get_token(src: str, magic: str) -> str:
178     """Return randomly generated token to mask IPython magic with.
179
180     For example, if 'magic' was `%matplotlib inline`, then a possible
181     token to mask it with would be `"43fdd17f7e5ddc83"`. The token
182     will be the same length as the magic, and we make sure that it was
183     not already present anywhere else in the cell.
184     """
185     assert magic
186     nbytes = max(len(magic) // 2 - 1, 1)
187     token = TOKEN_HEX(nbytes)
188     counter = 0
189     while token in src:
190         token = TOKEN_HEX(nbytes)
191         counter += 1
192         if counter > 100:
193             raise AssertionError(
194                 "INTERNAL ERROR: Black was not able to replace IPython magic. "
195                 "Please report a bug on https://github.com/psf/black/issues.  "
196                 f"The magic might be helpful: {magic}"
197             ) from None
198     if len(token) + 2 < len(magic):
199         token = f"{token}."
200     return f'"{token}"'
201
202
203 def replace_cell_magics(src: str) -> Tuple[str, List[Replacement]]:
204     """Replace cell magic with token.
205
206     Note that 'src' will already have been processed by IPython's
207     TransformerManager().transform_cell.
208
209     Example,
210
211         get_ipython().run_cell_magic('t', '-n1', 'ls =!ls\\n')
212
213     becomes
214
215         "a794."
216         ls =!ls
217
218     The replacement, along with the transformed code, is returned.
219     """
220     replacements: List[Replacement] = []
221
222     tree = ast.parse(src)
223
224     cell_magic_finder = CellMagicFinder()
225     cell_magic_finder.visit(tree)
226     if cell_magic_finder.cell_magic is None:
227         return src, replacements
228     header = cell_magic_finder.cell_magic.header
229     mask = get_token(src, header)
230     replacements.append(Replacement(mask=mask, src=header))
231     return f"{mask}\n{cell_magic_finder.cell_magic.body}", replacements
232
233
234 def replace_magics(src: str) -> Tuple[str, List[Replacement]]:
235     """Replace magics within body of cell.
236
237     Note that 'src' will already have been processed by IPython's
238     TransformerManager().transform_cell.
239
240     Example, this
241
242         get_ipython().run_line_magic('matplotlib', 'inline')
243         'foo'
244
245     becomes
246
247         "5e67db56d490fd39"
248         'foo'
249
250     The replacement, along with the transformed code, are returned.
251     """
252     replacements = []
253     magic_finder = MagicFinder()
254     magic_finder.visit(ast.parse(src))
255     new_srcs = []
256     for i, line in enumerate(src.splitlines(), start=1):
257         if i in magic_finder.magics:
258             offsets_and_magics = magic_finder.magics[i]
259             if len(offsets_and_magics) != 1:  # pragma: nocover
260                 raise AssertionError(
261                     f"Expecting one magic per line, got: {offsets_and_magics}\n"
262                     "Please report a bug on https://github.com/psf/black/issues."
263                 )
264             col_offset, magic = (
265                 offsets_and_magics[0].col_offset,
266                 offsets_and_magics[0].magic,
267             )
268             mask = get_token(src, magic)
269             replacements.append(Replacement(mask=mask, src=magic))
270             line = line[:col_offset] + mask
271         new_srcs.append(line)
272     return "\n".join(new_srcs), replacements
273
274
275 def unmask_cell(src: str, replacements: List[Replacement]) -> str:
276     """Remove replacements from cell.
277
278     For example
279
280         "9b20"
281         foo = bar
282
283     becomes
284
285         %%time
286         foo = bar
287     """
288     for replacement in replacements:
289         src = src.replace(replacement.mask, replacement.src)
290     return src
291
292
293 def _is_ipython_magic(node: ast.expr) -> TypeGuard[ast.Attribute]:
294     """Check if attribute is IPython magic.
295
296     Note that the source of the abstract syntax tree
297     will already have been processed by IPython's
298     TransformerManager().transform_cell.
299     """
300     return (
301         isinstance(node, ast.Attribute)
302         and isinstance(node.value, ast.Call)
303         and isinstance(node.value.func, ast.Name)
304         and node.value.func.id == "get_ipython"
305     )
306
307
308 def _get_str_args(args: List[ast.expr]) -> List[str]:
309     str_args = []
310     for arg in args:
311         assert isinstance(arg, ast.Str)
312         str_args.append(arg.s)
313     return str_args
314
315
316 @dataclasses.dataclass(frozen=True)
317 class CellMagic:
318     name: str
319     params: Optional[str]
320     body: str
321
322     @property
323     def header(self) -> str:
324         if self.params:
325             return f"%%{self.name} {self.params}"
326         return f"%%{self.name}"
327
328
329 # ast.NodeVisitor + dataclass = breakage under mypyc.
330 class CellMagicFinder(ast.NodeVisitor):
331     """Find cell magics.
332
333     Note that the source of the abstract syntax tree
334     will already have been processed by IPython's
335     TransformerManager().transform_cell.
336
337     For example,
338
339         %%time\nfoo()
340
341     would have been transformed to
342
343         get_ipython().run_cell_magic('time', '', 'foo()\\n')
344
345     and we look for instances of the latter.
346     """
347
348     def __init__(self, cell_magic: Optional[CellMagic] = None) -> None:
349         self.cell_magic = cell_magic
350
351     def visit_Expr(self, node: ast.Expr) -> None:
352         """Find cell magic, extract header and body."""
353         if (
354             isinstance(node.value, ast.Call)
355             and _is_ipython_magic(node.value.func)
356             and node.value.func.attr == "run_cell_magic"
357         ):
358             args = _get_str_args(node.value.args)
359             self.cell_magic = CellMagic(name=args[0], params=args[1], body=args[2])
360         self.generic_visit(node)
361
362
363 @dataclasses.dataclass(frozen=True)
364 class OffsetAndMagic:
365     col_offset: int
366     magic: str
367
368
369 # Unsurprisingly, subclassing ast.NodeVisitor means we can't use dataclasses here
370 # as mypyc will generate broken code.
371 class MagicFinder(ast.NodeVisitor):
372     """Visit cell to look for get_ipython calls.
373
374     Note that the source of the abstract syntax tree
375     will already have been processed by IPython's
376     TransformerManager().transform_cell.
377
378     For example,
379
380         %matplotlib inline
381
382     would have been transformed to
383
384         get_ipython().run_line_magic('matplotlib', 'inline')
385
386     and we look for instances of the latter (and likewise for other
387     types of magics).
388     """
389
390     def __init__(self) -> None:
391         self.magics: Dict[int, List[OffsetAndMagic]] = collections.defaultdict(list)
392
393     def visit_Assign(self, node: ast.Assign) -> None:
394         """Look for system assign magics.
395
396         For example,
397
398             black_version = !black --version
399             env = %env var
400
401         would have been (respectively) transformed to
402
403             black_version = get_ipython().getoutput('black --version')
404             env = get_ipython().run_line_magic('env', 'var')
405
406         and we look for instances of any of the latter.
407         """
408         if isinstance(node.value, ast.Call) and _is_ipython_magic(node.value.func):
409             args = _get_str_args(node.value.args)
410             if node.value.func.attr == "getoutput":
411                 src = f"!{args[0]}"
412             elif node.value.func.attr == "run_line_magic":
413                 src = f"%{args[0]}"
414                 if args[1]:
415                     src += f" {args[1]}"
416             else:
417                 raise AssertionError(
418                     f"Unexpected IPython magic {node.value.func.attr!r} found. "
419                     "Please report a bug on https://github.com/psf/black/issues."
420                 ) from None
421             self.magics[node.value.lineno].append(
422                 OffsetAndMagic(node.value.col_offset, src)
423             )
424         self.generic_visit(node)
425
426     def visit_Expr(self, node: ast.Expr) -> None:
427         """Look for magics in body of cell.
428
429         For examples,
430
431             !ls
432             !!ls
433             ?ls
434             ??ls
435
436         would (respectively) get transformed to
437
438             get_ipython().system('ls')
439             get_ipython().getoutput('ls')
440             get_ipython().run_line_magic('pinfo', 'ls')
441             get_ipython().run_line_magic('pinfo2', 'ls')
442
443         and we look for instances of any of the latter.
444         """
445         if isinstance(node.value, ast.Call) and _is_ipython_magic(node.value.func):
446             args = _get_str_args(node.value.args)
447             if node.value.func.attr == "run_line_magic":
448                 if args[0] == "pinfo":
449                     src = f"?{args[1]}"
450                 elif args[0] == "pinfo2":
451                     src = f"??{args[1]}"
452                 else:
453                     src = f"%{args[0]}"
454                     if args[1]:
455                         src += f" {args[1]}"
456             elif node.value.func.attr == "system":
457                 src = f"!{args[0]}"
458             elif node.value.func.attr == "getoutput":
459                 src = f"!!{args[0]}"
460             else:
461                 raise NothingChanged  # unsupported magic.
462             self.magics[node.value.lineno].append(
463                 OffsetAndMagic(node.value.col_offset, src)
464             )
465         self.generic_visit(node)