]> git.madduck.net Git - etc/vim.git/blob - src/black/handle_ipynb_magics.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

2b6b920921195bbbb7ec18741b14ca8ce621097b
[etc/vim.git] / src / black / handle_ipynb_magics.py
1 """Functions to process IPython magics with."""
2
3 import ast
4 import collections
5 import dataclasses
6 import secrets
7 import sys
8 from functools import lru_cache
9 from typing import Dict, List, Optional, Tuple
10
11 if sys.version_info >= (3, 10):
12     from typing import TypeGuard
13 else:
14     from typing_extensions import TypeGuard
15
16 from black.output import out
17 from black.report import NothingChanged
18
19 TRANSFORMED_MAGICS = frozenset(
20     (
21         "get_ipython().run_cell_magic",
22         "get_ipython().system",
23         "get_ipython().getoutput",
24         "get_ipython().run_line_magic",
25     )
26 )
27 TOKENS_TO_IGNORE = frozenset(
28     (
29         "ENDMARKER",
30         "NL",
31         "NEWLINE",
32         "COMMENT",
33         "DEDENT",
34         "UNIMPORTANT_WS",
35         "ESCAPED_NL",
36     )
37 )
38 PYTHON_CELL_MAGICS = frozenset(
39     (
40         "capture",
41         "prun",
42         "pypy",
43         "python",
44         "python3",
45         "time",
46         "timeit",
47     )
48 )
49 TOKEN_HEX = secrets.token_hex
50
51
52 @dataclasses.dataclass(frozen=True)
53 class Replacement:
54     mask: str
55     src: str
56
57
58 @lru_cache()
59 def jupyter_dependencies_are_installed(*, verbose: bool, quiet: bool) -> bool:
60     try:
61         # isort: off
62         # tokenize_rt is less commonly installed than IPython
63         # and IPython is expensive to import
64         import tokenize_rt  # noqa:F401
65         import IPython  # noqa:F401
66
67         # isort: on
68     except ModuleNotFoundError:
69         if verbose or not quiet:
70             msg = (
71                 "Skipping .ipynb files as Jupyter dependencies are not installed.\n"
72                 'You can fix this by running ``pip install "black[jupyter]"``'
73             )
74             out(msg)
75         return False
76     else:
77         return True
78
79
80 def remove_trailing_semicolon(src: str) -> Tuple[str, bool]:
81     """Remove trailing semicolon from Jupyter notebook cell.
82
83     For example,
84
85         fig, ax = plt.subplots()
86         ax.plot(x_data, y_data);  # plot data
87
88     would become
89
90         fig, ax = plt.subplots()
91         ax.plot(x_data, y_data)  # plot data
92
93     Mirrors the logic in `quiet` from `IPython.core.displayhook`, but uses
94     ``tokenize_rt`` so that round-tripping works fine.
95     """
96     from tokenize_rt import reversed_enumerate, src_to_tokens, tokens_to_src
97
98     tokens = src_to_tokens(src)
99     trailing_semicolon = False
100     for idx, token in reversed_enumerate(tokens):
101         if token.name in TOKENS_TO_IGNORE:
102             continue
103         if token.name == "OP" and token.src == ";":
104             del tokens[idx]
105             trailing_semicolon = True
106         break
107     if not trailing_semicolon:
108         return src, False
109     return tokens_to_src(tokens), True
110
111
112 def put_trailing_semicolon_back(src: str, has_trailing_semicolon: bool) -> str:
113     """Put trailing semicolon back if cell originally had it.
114
115     Mirrors the logic in `quiet` from `IPython.core.displayhook`, but uses
116     ``tokenize_rt`` so that round-tripping works fine.
117     """
118     if not has_trailing_semicolon:
119         return src
120     from tokenize_rt import reversed_enumerate, src_to_tokens, tokens_to_src
121
122     tokens = src_to_tokens(src)
123     for idx, token in reversed_enumerate(tokens):
124         if token.name in TOKENS_TO_IGNORE:
125             continue
126         tokens[idx] = token._replace(src=token.src + ";")
127         break
128     else:  # pragma: nocover
129         raise AssertionError(
130             "INTERNAL ERROR: Was not able to reinstate trailing semicolon. "
131             "Please report a bug on https://github.com/psf/black/issues.  "
132         ) from None
133     return str(tokens_to_src(tokens))
134
135
136 def mask_cell(src: str) -> Tuple[str, List[Replacement]]:
137     """Mask IPython magics so content becomes parseable Python code.
138
139     For example,
140
141         %matplotlib inline
142         'foo'
143
144     becomes
145
146         "25716f358c32750e"
147         'foo'
148
149     The replacements are returned, along with the transformed code.
150     """
151     replacements: List[Replacement] = []
152     try:
153         ast.parse(src)
154     except SyntaxError:
155         # Might have IPython magics, will process below.
156         pass
157     else:
158         # Syntax is fine, nothing to mask, early return.
159         return src, replacements
160
161     from IPython.core.inputtransformer2 import TransformerManager
162
163     transformer_manager = TransformerManager()
164     transformed = transformer_manager.transform_cell(src)
165     transformed, cell_magic_replacements = replace_cell_magics(transformed)
166     replacements += cell_magic_replacements
167     transformed = transformer_manager.transform_cell(transformed)
168     transformed, magic_replacements = replace_magics(transformed)
169     if len(transformed.splitlines()) != len(src.splitlines()):
170         # Multi-line magic, not supported.
171         raise NothingChanged
172     replacements += magic_replacements
173     return transformed, replacements
174
175
176 def get_token(src: str, magic: str) -> str:
177     """Return randomly generated token to mask IPython magic with.
178
179     For example, if 'magic' was `%matplotlib inline`, then a possible
180     token to mask it with would be `"43fdd17f7e5ddc83"`. The token
181     will be the same length as the magic, and we make sure that it was
182     not already present anywhere else in the cell.
183     """
184     assert magic
185     nbytes = max(len(magic) // 2 - 1, 1)
186     token = TOKEN_HEX(nbytes)
187     counter = 0
188     while token in src:
189         token = TOKEN_HEX(nbytes)
190         counter += 1
191         if counter > 100:
192             raise AssertionError(
193                 "INTERNAL ERROR: Black was not able to replace IPython magic. "
194                 "Please report a bug on https://github.com/psf/black/issues.  "
195                 f"The magic might be helpful: {magic}"
196             ) from None
197     if len(token) + 2 < len(magic):
198         token = f"{token}."
199     return f'"{token}"'
200
201
202 def replace_cell_magics(src: str) -> Tuple[str, List[Replacement]]:
203     """Replace cell magic with token.
204
205     Note that 'src' will already have been processed by IPython's
206     TransformerManager().transform_cell.
207
208     Example,
209
210         get_ipython().run_cell_magic('t', '-n1', 'ls =!ls\\n')
211
212     becomes
213
214         "a794."
215         ls =!ls
216
217     The replacement, along with the transformed code, is returned.
218     """
219     replacements: List[Replacement] = []
220
221     tree = ast.parse(src)
222
223     cell_magic_finder = CellMagicFinder()
224     cell_magic_finder.visit(tree)
225     if cell_magic_finder.cell_magic is None:
226         return src, replacements
227     header = cell_magic_finder.cell_magic.header
228     mask = get_token(src, header)
229     replacements.append(Replacement(mask=mask, src=header))
230     return f"{mask}\n{cell_magic_finder.cell_magic.body}", replacements
231
232
233 def replace_magics(src: str) -> Tuple[str, List[Replacement]]:
234     """Replace magics within body of cell.
235
236     Note that 'src' will already have been processed by IPython's
237     TransformerManager().transform_cell.
238
239     Example, this
240
241         get_ipython().run_line_magic('matplotlib', 'inline')
242         'foo'
243
244     becomes
245
246         "5e67db56d490fd39"
247         'foo'
248
249     The replacement, along with the transformed code, are returned.
250     """
251     replacements = []
252     magic_finder = MagicFinder()
253     magic_finder.visit(ast.parse(src))
254     new_srcs = []
255     for i, line in enumerate(src.splitlines(), start=1):
256         if i in magic_finder.magics:
257             offsets_and_magics = magic_finder.magics[i]
258             if len(offsets_and_magics) != 1:  # pragma: nocover
259                 raise AssertionError(
260                     f"Expecting one magic per line, got: {offsets_and_magics}\n"
261                     "Please report a bug on https://github.com/psf/black/issues."
262                 )
263             col_offset, magic = (
264                 offsets_and_magics[0].col_offset,
265                 offsets_and_magics[0].magic,
266             )
267             mask = get_token(src, magic)
268             replacements.append(Replacement(mask=mask, src=magic))
269             line = line[:col_offset] + mask
270         new_srcs.append(line)
271     return "\n".join(new_srcs), replacements
272
273
274 def unmask_cell(src: str, replacements: List[Replacement]) -> str:
275     """Remove replacements from cell.
276
277     For example
278
279         "9b20"
280         foo = bar
281
282     becomes
283
284         %%time
285         foo = bar
286     """
287     for replacement in replacements:
288         src = src.replace(replacement.mask, replacement.src)
289     return src
290
291
292 def _is_ipython_magic(node: ast.expr) -> TypeGuard[ast.Attribute]:
293     """Check if attribute is IPython magic.
294
295     Note that the source of the abstract syntax tree
296     will already have been processed by IPython's
297     TransformerManager().transform_cell.
298     """
299     return (
300         isinstance(node, ast.Attribute)
301         and isinstance(node.value, ast.Call)
302         and isinstance(node.value.func, ast.Name)
303         and node.value.func.id == "get_ipython"
304     )
305
306
307 def _get_str_args(args: List[ast.expr]) -> List[str]:
308     str_args = []
309     for arg in args:
310         assert isinstance(arg, ast.Str)
311         str_args.append(arg.s)
312     return str_args
313
314
315 @dataclasses.dataclass(frozen=True)
316 class CellMagic:
317     name: str
318     params: Optional[str]
319     body: str
320
321     @property
322     def header(self) -> str:
323         if self.params:
324             return f"%%{self.name} {self.params}"
325         return f"%%{self.name}"
326
327
328 # ast.NodeVisitor + dataclass = breakage under mypyc.
329 class CellMagicFinder(ast.NodeVisitor):
330     """Find cell magics.
331
332     Note that the source of the abstract syntax tree
333     will already have been processed by IPython's
334     TransformerManager().transform_cell.
335
336     For example,
337
338         %%time\nfoo()
339
340     would have been transformed to
341
342         get_ipython().run_cell_magic('time', '', 'foo()\\n')
343
344     and we look for instances of the latter.
345     """
346
347     def __init__(self, cell_magic: Optional[CellMagic] = None) -> None:
348         self.cell_magic = cell_magic
349
350     def visit_Expr(self, node: ast.Expr) -> None:
351         """Find cell magic, extract header and body."""
352         if (
353             isinstance(node.value, ast.Call)
354             and _is_ipython_magic(node.value.func)
355             and node.value.func.attr == "run_cell_magic"
356         ):
357             args = _get_str_args(node.value.args)
358             self.cell_magic = CellMagic(name=args[0], params=args[1], body=args[2])
359         self.generic_visit(node)
360
361
362 @dataclasses.dataclass(frozen=True)
363 class OffsetAndMagic:
364     col_offset: int
365     magic: str
366
367
368 # Unsurprisingly, subclassing ast.NodeVisitor means we can't use dataclasses here
369 # as mypyc will generate broken code.
370 class MagicFinder(ast.NodeVisitor):
371     """Visit cell to look for get_ipython calls.
372
373     Note that the source of the abstract syntax tree
374     will already have been processed by IPython's
375     TransformerManager().transform_cell.
376
377     For example,
378
379         %matplotlib inline
380
381     would have been transformed to
382
383         get_ipython().run_line_magic('matplotlib', 'inline')
384
385     and we look for instances of the latter (and likewise for other
386     types of magics).
387     """
388
389     def __init__(self) -> None:
390         self.magics: Dict[int, List[OffsetAndMagic]] = collections.defaultdict(list)
391
392     def visit_Assign(self, node: ast.Assign) -> None:
393         """Look for system assign magics.
394
395         For example,
396
397             black_version = !black --version
398             env = %env var
399
400         would have been (respectively) transformed to
401
402             black_version = get_ipython().getoutput('black --version')
403             env = get_ipython().run_line_magic('env', 'var')
404
405         and we look for instances of any of the latter.
406         """
407         if isinstance(node.value, ast.Call) and _is_ipython_magic(node.value.func):
408             args = _get_str_args(node.value.args)
409             if node.value.func.attr == "getoutput":
410                 src = f"!{args[0]}"
411             elif node.value.func.attr == "run_line_magic":
412                 src = f"%{args[0]}"
413                 if args[1]:
414                     src += f" {args[1]}"
415             else:
416                 raise AssertionError(
417                     f"Unexpected IPython magic {node.value.func.attr!r} found. "
418                     "Please report a bug on https://github.com/psf/black/issues."
419                 ) from None
420             self.magics[node.value.lineno].append(
421                 OffsetAndMagic(node.value.col_offset, src)
422             )
423         self.generic_visit(node)
424
425     def visit_Expr(self, node: ast.Expr) -> None:
426         """Look for magics in body of cell.
427
428         For examples,
429
430             !ls
431             !!ls
432             ?ls
433             ??ls
434
435         would (respectively) get transformed to
436
437             get_ipython().system('ls')
438             get_ipython().getoutput('ls')
439             get_ipython().run_line_magic('pinfo', 'ls')
440             get_ipython().run_line_magic('pinfo2', 'ls')
441
442         and we look for instances of any of the latter.
443         """
444         if isinstance(node.value, ast.Call) and _is_ipython_magic(node.value.func):
445             args = _get_str_args(node.value.args)
446             if node.value.func.attr == "run_line_magic":
447                 if args[0] == "pinfo":
448                     src = f"?{args[1]}"
449                 elif args[0] == "pinfo2":
450                     src = f"??{args[1]}"
451                 else:
452                     src = f"%{args[0]}"
453                     if args[1]:
454                         src += f" {args[1]}"
455             elif node.value.func.attr == "system":
456                 src = f"!{args[0]}"
457             elif node.value.func.attr == "getoutput":
458                 src = f"!!{args[0]}"
459             else:
460                 raise NothingChanged  # unsupported magic.
461             self.magics[node.value.lineno].append(
462                 OffsetAndMagic(node.value.col_offset, src)
463             )
464         self.generic_visit(node)