]> git.madduck.net Git - etc/vim.git/blob - src/black/handle_ipynb_magics.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Add Kedro to project list and QuantumBlack to orgs (#2502)
[etc/vim.git] / src / black / handle_ipynb_magics.py
1 """Functions to process IPython magics with."""
2
3 from functools import lru_cache
4 import dataclasses
5 import ast
6 from typing import Dict, List, Tuple, Optional
7
8 import secrets
9 import sys
10 import collections
11
12 if sys.version_info >= (3, 10):
13     from typing import TypeGuard
14 else:
15     from typing_extensions import TypeGuard
16
17 from black.report import NothingChanged
18 from black.output import out
19
20
21 TRANSFORMED_MAGICS = frozenset(
22     (
23         "get_ipython().run_cell_magic",
24         "get_ipython().system",
25         "get_ipython().getoutput",
26         "get_ipython().run_line_magic",
27     )
28 )
29 TOKENS_TO_IGNORE = frozenset(
30     (
31         "ENDMARKER",
32         "NL",
33         "NEWLINE",
34         "COMMENT",
35         "DEDENT",
36         "UNIMPORTANT_WS",
37         "ESCAPED_NL",
38     )
39 )
40 NON_PYTHON_CELL_MAGICS = frozenset(
41     (
42         "%%bash",
43         "%%html",
44         "%%javascript",
45         "%%js",
46         "%%latex",
47         "%%markdown",
48         "%%perl",
49         "%%ruby",
50         "%%script",
51         "%%sh",
52         "%%svg",
53         "%%writefile",
54     )
55 )
56
57
58 @dataclasses.dataclass(frozen=True)
59 class Replacement:
60     mask: str
61     src: str
62
63
64 @lru_cache()
65 def jupyter_dependencies_are_installed(*, verbose: bool, quiet: bool) -> bool:
66     try:
67         import IPython  # noqa:F401
68         import tokenize_rt  # noqa:F401
69     except ModuleNotFoundError:
70         if verbose or not quiet:
71             msg = (
72                 "Skipping .ipynb files as Jupyter dependencies are not installed.\n"
73                 "You can fix this by running ``pip install black[jupyter]``"
74             )
75             out(msg)
76         return False
77     else:
78         return True
79
80
81 def remove_trailing_semicolon(src: str) -> Tuple[str, bool]:
82     """Remove trailing semicolon from Jupyter notebook cell.
83
84     For example,
85
86         fig, ax = plt.subplots()
87         ax.plot(x_data, y_data);  # plot data
88
89     would become
90
91         fig, ax = plt.subplots()
92         ax.plot(x_data, y_data)  # plot data
93
94     Mirrors the logic in `quiet` from `IPython.core.displayhook`, but uses
95     ``tokenize_rt`` so that round-tripping works fine.
96     """
97     from tokenize_rt import (
98         src_to_tokens,
99         tokens_to_src,
100         reversed_enumerate,
101     )
102
103     tokens = src_to_tokens(src)
104     trailing_semicolon = False
105     for idx, token in reversed_enumerate(tokens):
106         if token.name in TOKENS_TO_IGNORE:
107             continue
108         if token.name == "OP" and token.src == ";":
109             del tokens[idx]
110             trailing_semicolon = True
111         break
112     if not trailing_semicolon:
113         return src, False
114     return tokens_to_src(tokens), True
115
116
117 def put_trailing_semicolon_back(src: str, has_trailing_semicolon: bool) -> str:
118     """Put trailing semicolon back if cell originally had it.
119
120     Mirrors the logic in `quiet` from `IPython.core.displayhook`, but uses
121     ``tokenize_rt`` so that round-tripping works fine.
122     """
123     if not has_trailing_semicolon:
124         return src
125     from tokenize_rt import src_to_tokens, tokens_to_src, reversed_enumerate
126
127     tokens = src_to_tokens(src)
128     for idx, token in reversed_enumerate(tokens):
129         if token.name in TOKENS_TO_IGNORE:
130             continue
131         tokens[idx] = token._replace(src=token.src + ";")
132         break
133     else:  # pragma: nocover
134         raise AssertionError(
135             "INTERNAL ERROR: Was not able to reinstate trailing semicolon. "
136             "Please report a bug on https://github.com/psf/black/issues.  "
137         ) from None
138     return str(tokens_to_src(tokens))
139
140
141 def mask_cell(src: str) -> Tuple[str, List[Replacement]]:
142     """Mask IPython magics so content becomes parseable Python code.
143
144     For example,
145
146         %matplotlib inline
147         'foo'
148
149     becomes
150
151         "25716f358c32750e"
152         'foo'
153
154     The replacements are returned, along with the transformed code.
155     """
156     replacements: List[Replacement] = []
157     try:
158         ast.parse(src)
159     except SyntaxError:
160         # Might have IPython magics, will process below.
161         pass
162     else:
163         # Syntax is fine, nothing to mask, early return.
164         return src, replacements
165
166     from IPython.core.inputtransformer2 import TransformerManager
167
168     transformer_manager = TransformerManager()
169     transformed = transformer_manager.transform_cell(src)
170     transformed, cell_magic_replacements = replace_cell_magics(transformed)
171     replacements += cell_magic_replacements
172     transformed = transformer_manager.transform_cell(transformed)
173     transformed, magic_replacements = replace_magics(transformed)
174     if len(transformed.splitlines()) != len(src.splitlines()):
175         # Multi-line magic, not supported.
176         raise NothingChanged
177     replacements += magic_replacements
178     return transformed, replacements
179
180
181 def get_token(src: str, magic: str) -> str:
182     """Return randomly generated token to mask IPython magic with.
183
184     For example, if 'magic' was `%matplotlib inline`, then a possible
185     token to mask it with would be `"43fdd17f7e5ddc83"`. The token
186     will be the same length as the magic, and we make sure that it was
187     not already present anywhere else in the cell.
188     """
189     assert magic
190     nbytes = max(len(magic) // 2 - 1, 1)
191     token = secrets.token_hex(nbytes)
192     counter = 0
193     while token in src:  # pragma: nocover
194         token = secrets.token_hex(nbytes)
195         counter += 1
196         if counter > 100:
197             raise AssertionError(
198                 "INTERNAL ERROR: Black was not able to replace IPython magic. "
199                 "Please report a bug on https://github.com/psf/black/issues.  "
200                 f"The magic might be helpful: {magic}"
201             ) from None
202     if len(token) + 2 < len(magic):
203         token = f"{token}."
204     return f'"{token}"'
205
206
207 def replace_cell_magics(src: str) -> Tuple[str, List[Replacement]]:
208     """Replace cell magic with token.
209
210     Note that 'src' will already have been processed by IPython's
211     TransformerManager().transform_cell.
212
213     Example,
214
215         get_ipython().run_cell_magic('t', '-n1', 'ls =!ls\\n')
216
217     becomes
218
219         "a794."
220         ls =!ls
221
222     The replacement, along with the transformed code, is returned.
223     """
224     replacements: List[Replacement] = []
225
226     tree = ast.parse(src)
227
228     cell_magic_finder = CellMagicFinder()
229     cell_magic_finder.visit(tree)
230     if cell_magic_finder.cell_magic is None:
231         return src, replacements
232     if cell_magic_finder.cell_magic.header.split()[0] in NON_PYTHON_CELL_MAGICS:
233         raise NothingChanged
234     mask = get_token(src, cell_magic_finder.cell_magic.header)
235     replacements.append(Replacement(mask=mask, src=cell_magic_finder.cell_magic.header))
236     return f"{mask}\n{cell_magic_finder.cell_magic.body}", replacements
237
238
239 def replace_magics(src: str) -> Tuple[str, List[Replacement]]:
240     """Replace magics within body of cell.
241
242     Note that 'src' will already have been processed by IPython's
243     TransformerManager().transform_cell.
244
245     Example, this
246
247         get_ipython().run_line_magic('matplotlib', 'inline')
248         'foo'
249
250     becomes
251
252         "5e67db56d490fd39"
253         'foo'
254
255     The replacement, along with the transformed code, are returned.
256     """
257     replacements = []
258     magic_finder = MagicFinder()
259     magic_finder.visit(ast.parse(src))
260     new_srcs = []
261     for i, line in enumerate(src.splitlines(), start=1):
262         if i in magic_finder.magics:
263             offsets_and_magics = magic_finder.magics[i]
264             if len(offsets_and_magics) != 1:  # pragma: nocover
265                 raise AssertionError(
266                     f"Expecting one magic per line, got: {offsets_and_magics}\n"
267                     "Please report a bug on https://github.com/psf/black/issues."
268                 )
269             col_offset, magic = (
270                 offsets_and_magics[0].col_offset,
271                 offsets_and_magics[0].magic,
272             )
273             mask = get_token(src, magic)
274             replacements.append(Replacement(mask=mask, src=magic))
275             line = line[:col_offset] + mask
276         new_srcs.append(line)
277     return "\n".join(new_srcs), replacements
278
279
280 def unmask_cell(src: str, replacements: List[Replacement]) -> str:
281     """Remove replacements from cell.
282
283     For example
284
285         "9b20"
286         foo = bar
287
288     becomes
289
290         %%time
291         foo = bar
292     """
293     for replacement in replacements:
294         src = src.replace(replacement.mask, replacement.src)
295     return src
296
297
298 def _is_ipython_magic(node: ast.expr) -> TypeGuard[ast.Attribute]:
299     """Check if attribute is IPython magic.
300
301     Note that the source of the abstract syntax tree
302     will already have been processed by IPython's
303     TransformerManager().transform_cell.
304     """
305     return (
306         isinstance(node, ast.Attribute)
307         and isinstance(node.value, ast.Call)
308         and isinstance(node.value.func, ast.Name)
309         and node.value.func.id == "get_ipython"
310     )
311
312
313 @dataclasses.dataclass(frozen=True)
314 class CellMagic:
315     header: str
316     body: str
317
318
319 @dataclasses.dataclass
320 class CellMagicFinder(ast.NodeVisitor):
321     """Find cell magics.
322
323     Note that the source of the abstract syntax tree
324     will already have been processed by IPython's
325     TransformerManager().transform_cell.
326
327     For example,
328
329         %%time\nfoo()
330
331     would have been transformed to
332
333         get_ipython().run_cell_magic('time', '', 'foo()\\n')
334
335     and we look for instances of the latter.
336     """
337
338     cell_magic: Optional[CellMagic] = None
339
340     def visit_Expr(self, node: ast.Expr) -> None:
341         """Find cell magic, extract header and body."""
342         if (
343             isinstance(node.value, ast.Call)
344             and _is_ipython_magic(node.value.func)
345             and node.value.func.attr == "run_cell_magic"
346         ):
347             args = []
348             for arg in node.value.args:
349                 assert isinstance(arg, ast.Str)
350                 args.append(arg.s)
351             header = f"%%{args[0]}"
352             if args[1]:
353                 header += f" {args[1]}"
354             self.cell_magic = CellMagic(header=header, body=args[2])
355         self.generic_visit(node)
356
357
358 @dataclasses.dataclass(frozen=True)
359 class OffsetAndMagic:
360     col_offset: int
361     magic: str
362
363
364 @dataclasses.dataclass
365 class MagicFinder(ast.NodeVisitor):
366     """Visit cell to look for get_ipython calls.
367
368     Note that the source of the abstract syntax tree
369     will already have been processed by IPython's
370     TransformerManager().transform_cell.
371
372     For example,
373
374         %matplotlib inline
375
376     would have been transformed to
377
378         get_ipython().run_line_magic('matplotlib', 'inline')
379
380     and we look for instances of the latter (and likewise for other
381     types of magics).
382     """
383
384     magics: Dict[int, List[OffsetAndMagic]] = dataclasses.field(
385         default_factory=lambda: collections.defaultdict(list)
386     )
387
388     def visit_Assign(self, node: ast.Assign) -> None:
389         """Look for system assign magics.
390
391         For example,
392
393             black_version = !black --version
394
395         would have been transformed to
396
397             black_version = get_ipython().getoutput('black --version')
398
399         and we look for instances of the latter.
400         """
401         if (
402             isinstance(node.value, ast.Call)
403             and _is_ipython_magic(node.value.func)
404             and node.value.func.attr == "getoutput"
405         ):
406             args = []
407             for arg in node.value.args:
408                 assert isinstance(arg, ast.Str)
409                 args.append(arg.s)
410             assert args
411             src = f"!{args[0]}"
412             self.magics[node.value.lineno].append(
413                 OffsetAndMagic(node.value.col_offset, src)
414             )
415         self.generic_visit(node)
416
417     def visit_Expr(self, node: ast.Expr) -> None:
418         """Look for magics in body of cell.
419
420         For examples,
421
422             !ls
423             !!ls
424             ?ls
425             ??ls
426
427         would (respectively) get transformed to
428
429             get_ipython().system('ls')
430             get_ipython().getoutput('ls')
431             get_ipython().run_line_magic('pinfo', 'ls')
432             get_ipython().run_line_magic('pinfo2', 'ls')
433
434         and we look for instances of any of the latter.
435         """
436         if isinstance(node.value, ast.Call) and _is_ipython_magic(node.value.func):
437             args = []
438             for arg in node.value.args:
439                 assert isinstance(arg, ast.Str)
440                 args.append(arg.s)
441             assert args
442             if node.value.func.attr == "run_line_magic":
443                 if args[0] == "pinfo":
444                     src = f"?{args[1]}"
445                 elif args[0] == "pinfo2":
446                     src = f"??{args[1]}"
447                 else:
448                     src = f"%{args[0]}"
449                     if args[1]:
450                         assert src is not None
451                         src += f" {args[1]}"
452             elif node.value.func.attr == "system":
453                 src = f"!{args[0]}"
454             elif node.value.func.attr == "getoutput":
455                 src = f"!!{args[0]}"
456             else:
457                 raise NothingChanged  # unsupported magic.
458             self.magics[node.value.lineno].append(
459                 OffsetAndMagic(node.value.col_offset, src)
460             )
461         self.generic_visit(node)