]> git.madduck.net Git - etc/vim.git/blob - src/black/handle_ipynb_magics.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

blib2to3: support unparenthesized wulruses in more places (#2447)
[etc/vim.git] / src / black / handle_ipynb_magics.py
1 """Functions to process IPython magics with."""
2 from functools import lru_cache
3 import dataclasses
4 import ast
5 from typing import Dict
6
7 import secrets
8 from typing import List, Tuple
9 import collections
10
11 from typing import Optional
12 from typing_extensions import TypeGuard
13 from black.report import NothingChanged
14 from black.output import out
15
16
17 TRANSFORMED_MAGICS = frozenset(
18     (
19         "get_ipython().run_cell_magic",
20         "get_ipython().system",
21         "get_ipython().getoutput",
22         "get_ipython().run_line_magic",
23     )
24 )
25 TOKENS_TO_IGNORE = frozenset(
26     (
27         "ENDMARKER",
28         "NL",
29         "NEWLINE",
30         "COMMENT",
31         "DEDENT",
32         "UNIMPORTANT_WS",
33         "ESCAPED_NL",
34     )
35 )
36 NON_PYTHON_CELL_MAGICS = frozenset(
37     (
38         "%%bash",
39         "%%html",
40         "%%javascript",
41         "%%js",
42         "%%latex",
43         "%%markdown",
44         "%%perl",
45         "%%ruby",
46         "%%script",
47         "%%sh",
48         "%%svg",
49         "%%writefile",
50     )
51 )
52
53
54 @dataclasses.dataclass(frozen=True)
55 class Replacement:
56     mask: str
57     src: str
58
59
60 @lru_cache()
61 def jupyter_dependencies_are_installed(*, verbose: bool, quiet: bool) -> bool:
62     try:
63         import IPython  # noqa:F401
64         import tokenize_rt  # noqa:F401
65     except ModuleNotFoundError:
66         if verbose or not quiet:
67             msg = (
68                 "Skipping .ipynb files as Jupyter dependencies are not installed.\n"
69                 "You can fix this by running ``pip install black[jupyter]``"
70             )
71             out(msg)
72         return False
73     else:
74         return True
75
76
77 def remove_trailing_semicolon(src: str) -> Tuple[str, bool]:
78     """Remove trailing semicolon from Jupyter notebook cell.
79
80     For example,
81
82         fig, ax = plt.subplots()
83         ax.plot(x_data, y_data);  # plot data
84
85     would become
86
87         fig, ax = plt.subplots()
88         ax.plot(x_data, y_data)  # plot data
89
90     Mirrors the logic in `quiet` from `IPython.core.displayhook`, but uses
91     ``tokenize_rt`` so that round-tripping works fine.
92     """
93     from tokenize_rt import (
94         src_to_tokens,
95         tokens_to_src,
96         reversed_enumerate,
97     )
98
99     tokens = src_to_tokens(src)
100     trailing_semicolon = False
101     for idx, token in reversed_enumerate(tokens):
102         if token.name in TOKENS_TO_IGNORE:
103             continue
104         if token.name == "OP" and token.src == ";":
105             del tokens[idx]
106             trailing_semicolon = True
107         break
108     if not trailing_semicolon:
109         return src, False
110     return tokens_to_src(tokens), True
111
112
113 def put_trailing_semicolon_back(src: str, has_trailing_semicolon: bool) -> str:
114     """Put trailing semicolon back if cell originally had it.
115
116     Mirrors the logic in `quiet` from `IPython.core.displayhook`, but uses
117     ``tokenize_rt`` so that round-tripping works fine.
118     """
119     if not has_trailing_semicolon:
120         return src
121     from tokenize_rt import src_to_tokens, tokens_to_src, reversed_enumerate
122
123     tokens = src_to_tokens(src)
124     for idx, token in reversed_enumerate(tokens):
125         if token.name in TOKENS_TO_IGNORE:
126             continue
127         tokens[idx] = token._replace(src=token.src + ";")
128         break
129     else:  # pragma: nocover
130         raise AssertionError(
131             "INTERNAL ERROR: Was not able to reinstate trailing semicolon. "
132             "Please report a bug on https://github.com/psf/black/issues.  "
133         ) from None
134     return str(tokens_to_src(tokens))
135
136
137 def mask_cell(src: str) -> Tuple[str, List[Replacement]]:
138     """Mask IPython magics so content becomes parseable Python code.
139
140     For example,
141
142         %matplotlib inline
143         'foo'
144
145     becomes
146
147         "25716f358c32750e"
148         'foo'
149
150     The replacements are returned, along with the transformed code.
151     """
152     replacements: List[Replacement] = []
153     try:
154         ast.parse(src)
155     except SyntaxError:
156         # Might have IPython magics, will process below.
157         pass
158     else:
159         # Syntax is fine, nothing to mask, early return.
160         return src, replacements
161
162     from IPython.core.inputtransformer2 import TransformerManager
163
164     transformer_manager = TransformerManager()
165     transformed = transformer_manager.transform_cell(src)
166     transformed, cell_magic_replacements = replace_cell_magics(transformed)
167     replacements += cell_magic_replacements
168     transformed = transformer_manager.transform_cell(transformed)
169     transformed, magic_replacements = replace_magics(transformed)
170     if len(transformed.splitlines()) != len(src.splitlines()):
171         # Multi-line magic, not supported.
172         raise NothingChanged
173     replacements += magic_replacements
174     return transformed, replacements
175
176
177 def get_token(src: str, magic: str) -> str:
178     """Return randomly generated token to mask IPython magic with.
179
180     For example, if 'magic' was `%matplotlib inline`, then a possible
181     token to mask it with would be `"43fdd17f7e5ddc83"`. The token
182     will be the same length as the magic, and we make sure that it was
183     not already present anywhere else in the cell.
184     """
185     assert magic
186     nbytes = max(len(magic) // 2 - 1, 1)
187     token = secrets.token_hex(nbytes)
188     counter = 0
189     while token in src:  # pragma: nocover
190         token = secrets.token_hex(nbytes)
191         counter += 1
192         if counter > 100:
193             raise AssertionError(
194                 "INTERNAL ERROR: Black was not able to replace IPython magic. "
195                 "Please report a bug on https://github.com/psf/black/issues.  "
196                 f"The magic might be helpful: {magic}"
197             ) from None
198     if len(token) + 2 < len(magic):
199         token = f"{token}."
200     return f'"{token}"'
201
202
203 def replace_cell_magics(src: str) -> Tuple[str, List[Replacement]]:
204     """Replace cell magic with token.
205
206     Note that 'src' will already have been processed by IPython's
207     TransformerManager().transform_cell.
208
209     Example,
210
211         get_ipython().run_cell_magic('t', '-n1', 'ls =!ls\\n')
212
213     becomes
214
215         "a794."
216         ls =!ls
217
218     The replacement, along with the transformed code, is returned.
219     """
220     replacements: List[Replacement] = []
221
222     tree = ast.parse(src)
223
224     cell_magic_finder = CellMagicFinder()
225     cell_magic_finder.visit(tree)
226     if cell_magic_finder.cell_magic is None:
227         return src, replacements
228     if cell_magic_finder.cell_magic.header.split()[0] in NON_PYTHON_CELL_MAGICS:
229         raise NothingChanged
230     mask = get_token(src, cell_magic_finder.cell_magic.header)
231     replacements.append(Replacement(mask=mask, src=cell_magic_finder.cell_magic.header))
232     return f"{mask}\n{cell_magic_finder.cell_magic.body}", replacements
233
234
235 def replace_magics(src: str) -> Tuple[str, List[Replacement]]:
236     """Replace magics within body of cell.
237
238     Note that 'src' will already have been processed by IPython's
239     TransformerManager().transform_cell.
240
241     Example, this
242
243         get_ipython().run_line_magic('matplotlib', 'inline')
244         'foo'
245
246     becomes
247
248         "5e67db56d490fd39"
249         'foo'
250
251     The replacement, along with the transformed code, are returned.
252     """
253     replacements = []
254     magic_finder = MagicFinder()
255     magic_finder.visit(ast.parse(src))
256     new_srcs = []
257     for i, line in enumerate(src.splitlines(), start=1):
258         if i in magic_finder.magics:
259             offsets_and_magics = magic_finder.magics[i]
260             if len(offsets_and_magics) != 1:  # pragma: nocover
261                 raise AssertionError(
262                     f"Expecting one magic per line, got: {offsets_and_magics}\n"
263                     "Please report a bug on https://github.com/psf/black/issues."
264                 )
265             col_offset, magic = (
266                 offsets_and_magics[0].col_offset,
267                 offsets_and_magics[0].magic,
268             )
269             mask = get_token(src, magic)
270             replacements.append(Replacement(mask=mask, src=magic))
271             line = line[:col_offset] + mask
272         new_srcs.append(line)
273     return "\n".join(new_srcs), replacements
274
275
276 def unmask_cell(src: str, replacements: List[Replacement]) -> str:
277     """Remove replacements from cell.
278
279     For example
280
281         "9b20"
282         foo = bar
283
284     becomes
285
286         %%time
287         foo = bar
288     """
289     for replacement in replacements:
290         src = src.replace(replacement.mask, replacement.src)
291     return src
292
293
294 def _is_ipython_magic(node: ast.expr) -> TypeGuard[ast.Attribute]:
295     """Check if attribute is IPython magic.
296
297     Note that the source of the abstract syntax tree
298     will already have been processed by IPython's
299     TransformerManager().transform_cell.
300     """
301     return (
302         isinstance(node, ast.Attribute)
303         and isinstance(node.value, ast.Call)
304         and isinstance(node.value.func, ast.Name)
305         and node.value.func.id == "get_ipython"
306     )
307
308
309 @dataclasses.dataclass(frozen=True)
310 class CellMagic:
311     header: str
312     body: str
313
314
315 @dataclasses.dataclass
316 class CellMagicFinder(ast.NodeVisitor):
317     """Find cell magics.
318
319     Note that the source of the abstract syntax tree
320     will already have been processed by IPython's
321     TransformerManager().transform_cell.
322
323     For example,
324
325         %%time\nfoo()
326
327     would have been transformed to
328
329         get_ipython().run_cell_magic('time', '', 'foo()\\n')
330
331     and we look for instances of the latter.
332     """
333
334     cell_magic: Optional[CellMagic] = None
335
336     def visit_Expr(self, node: ast.Expr) -> None:
337         """Find cell magic, extract header and body."""
338         if (
339             isinstance(node.value, ast.Call)
340             and _is_ipython_magic(node.value.func)
341             and node.value.func.attr == "run_cell_magic"
342         ):
343             args = []
344             for arg in node.value.args:
345                 assert isinstance(arg, ast.Str)
346                 args.append(arg.s)
347             header = f"%%{args[0]}"
348             if args[1]:
349                 header += f" {args[1]}"
350             self.cell_magic = CellMagic(header=header, body=args[2])
351         self.generic_visit(node)
352
353
354 @dataclasses.dataclass(frozen=True)
355 class OffsetAndMagic:
356     col_offset: int
357     magic: str
358
359
360 @dataclasses.dataclass
361 class MagicFinder(ast.NodeVisitor):
362     """Visit cell to look for get_ipython calls.
363
364     Note that the source of the abstract syntax tree
365     will already have been processed by IPython's
366     TransformerManager().transform_cell.
367
368     For example,
369
370         %matplotlib inline
371
372     would have been transformed to
373
374         get_ipython().run_line_magic('matplotlib', 'inline')
375
376     and we look for instances of the latter (and likewise for other
377     types of magics).
378     """
379
380     magics: Dict[int, List[OffsetAndMagic]] = dataclasses.field(
381         default_factory=lambda: collections.defaultdict(list)
382     )
383
384     def visit_Assign(self, node: ast.Assign) -> None:
385         """Look for system assign magics.
386
387         For example,
388
389             black_version = !black --version
390
391         would have been transformed to
392
393             black_version = get_ipython().getoutput('black --version')
394
395         and we look for instances of the latter.
396         """
397         if (
398             isinstance(node.value, ast.Call)
399             and _is_ipython_magic(node.value.func)
400             and node.value.func.attr == "getoutput"
401         ):
402             args = []
403             for arg in node.value.args:
404                 assert isinstance(arg, ast.Str)
405                 args.append(arg.s)
406             assert args
407             src = f"!{args[0]}"
408             self.magics[node.value.lineno].append(
409                 OffsetAndMagic(node.value.col_offset, src)
410             )
411         self.generic_visit(node)
412
413     def visit_Expr(self, node: ast.Expr) -> None:
414         """Look for magics in body of cell.
415
416         For examples,
417
418             !ls
419             !!ls
420             ?ls
421             ??ls
422
423         would (respectively) get transformed to
424
425             get_ipython().system('ls')
426             get_ipython().getoutput('ls')
427             get_ipython().run_line_magic('pinfo', 'ls')
428             get_ipython().run_line_magic('pinfo2', 'ls')
429
430         and we look for instances of any of the latter.
431         """
432         if isinstance(node.value, ast.Call) and _is_ipython_magic(node.value.func):
433             args = []
434             for arg in node.value.args:
435                 assert isinstance(arg, ast.Str)
436                 args.append(arg.s)
437             assert args
438             if node.value.func.attr == "run_line_magic":
439                 if args[0] == "pinfo":
440                     src = f"?{args[1]}"
441                 elif args[0] == "pinfo2":
442                     src = f"??{args[1]}"
443                 else:
444                     src = f"%{args[0]}"
445                     if args[1]:
446                         assert src is not None
447                         src += f" {args[1]}"
448             elif node.value.func.attr == "system":
449                 src = f"!{args[0]}"
450             elif node.value.func.attr == "getoutput":
451                 src = f"!!{args[0]}"
452             else:
453                 raise NothingChanged  # unsupported magic.
454             self.magics[node.value.lineno].append(
455                 OffsetAndMagic(node.value.col_offset, src)
456             )
457         self.generic_visit(node)