]> git.madduck.net Git - etc/neomutt.git/blob - .config/neomutt/buildmimetree.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

32fc17c005b613055a3dc42fdb8f744f148652e4
[etc/neomutt.git] / .config / neomutt / buildmimetree.py
1 #!/usr/bin/python3
2 #
3 # NeoMutt helper script to create multipart/* emails with Markdown → HTML
4 # alternative conversion, and handling of inline images, using NeoMutt's
5 # ability to manually craft MIME trees, but automating this process.
6 #
7 # Configuration:
8 #   neomuttrc (needs to be a single line):
9 #     set my_mdwn_extensions="extra,admonition,codehilite,sane_lists,smarty"
10 #     macro compose B "\
11 #       <enter-command> source '$my_confdir/buildmimetree.py \
12 #       --tempdir $tempdir --extensions $my_mdwn_extensions \
13 #       --css-file $my_confdir/htmlmail.css |'<enter>\
14 #       <enter-command> sourc e \$my_mdwn_postprocess_cmd_file<enter>\
15 #     " "Convert message into a modern MIME tree with inline images"
16 #
17 #     (Yes, we need to call source twice, as mutt only starts to process output
18 #     from a source command when the command exits, and since we need to react
19 #     to the output, we need to be invoked again, using a $my_ variable to pass
20 #     information)
21 #
22 # Requirements:
23 #   - python3
24 #   - python3-markdown
25 #   - python3-beautifulsoup4
26 # Optional:
27 #   - pytest
28 #   - Pynliner, provides --css-file and thus inline styling of HTML output
29 #   - Pygments, then syntax highlighting for fenced code is enabled
30 #
31 # Latest version:
32 #   https://git.madduck.net/etc/neomutt.git/blob_plain/HEAD:/.config/neomutt/buildmimetree.py
33 #
34 # Copyright © 2023 martin f. krafft <madduck@madduck.net>
35 # Released under the GPL-2+ licence, just like Mutt itself.
36 #
37
38 import sys
39 import os.path
40 import pathlib
41 import markdown
42 import tempfile
43 import argparse
44 import re
45 import mimetypes
46 import bs4
47 import xml.etree.ElementTree as etree
48 import io
49 from collections import namedtuple, OrderedDict
50 from markdown.extensions import Extension
51 from markdown.blockprocessors import BlockProcessor
52 from markdown.inlinepatterns import ImageInlineProcessor, IMAGE_LINK_RE
53 from email.utils import make_msgid
54 from urllib import request
55
56
57 def parse_cli_args(*args, **kwargs):
58     parser = argparse.ArgumentParser(
59         description=(
60             "NeoMutt helper to turn text/markdown email parts "
61             "into full-fledged MIME trees"
62         )
63     )
64     parser.epilog = (
65         "Copyright © 2023 martin f. krafft <madduck@madduck.net>.\n"
66         "Released under the MIT licence"
67     )
68
69     parser.add_argument(
70         "--extensions",
71         metavar="EXT[,EXT[,EXT]]",
72         type=str,
73         default="",
74         help="Markdown extension to use (comma-separated list)",
75     )
76
77     if _PYNLINER:
78         parser.add_argument(
79             "--css-file",
80             metavar="FILE",
81             type=pathlib.Path,
82             default=os.devnull,
83             help="CSS file to merge with the final HTML",
84         )
85     else:
86         parser.set_defaults(css_file=None)
87
88     parser.add_argument(
89         "--related-to-html-only",
90         action="store_true",
91         help="Make related content be sibling to HTML parts only",
92     )
93
94     def positive_integer(value):
95         try:
96             if int(value) > 0:
97                 return int(value)
98
99         except ValueError:
100             pass
101
102         raise ValueError("Must be a positive integer")
103
104     parser.add_argument(
105         "--max-number-other-attachments",
106         metavar="INTEGER",
107         type=positive_integer,
108         default=20,
109         help="Maximum number of other attachments to expect",
110     )
111
112     parser.add_argument(
113         "--only-build",
114         "--just-build",
115         action="store_true",
116         help="Only build, don't send the message",
117     )
118
119     parser.add_argument(
120         "--tempdir",
121         metavar="DIR",
122         type=pathlib.Path,
123         help="Specify temporary directory to use for attachments",
124     )
125
126     parser.add_argument(
127         "--debug-commands",
128         action="store_true",
129         help="Turn on debug logging of commands generated to stderr",
130     )
131
132     parser.add_argument(
133         "--debug-walk",
134         action="store_true",
135         help="Turn on debugging to stderr of the MIME tree walk",
136     )
137
138     parser.add_argument(
139         "--dump-html",
140         metavar="FILE",
141         type=pathlib.Path,
142         help="Write the generated HTML to the file",
143     )
144
145     subp = parser.add_subparsers(help="Sub-command parsers", dest="mode")
146     massage_p = subp.add_parser(
147         "massage", help="Massaging phase (internal use)"
148     )
149
150     massage_p.add_argument(
151         "--write-commands-to",
152         "-o",
153         metavar="FILE",
154         dest="cmdpath",
155         type=pathlib.Path,
156         required=True,
157         help="Temporary file path to write commands to",
158     )
159
160     massage_p.add_argument(
161         "MAILDRAFT",
162         nargs="?",
163         type=pathlib.Path,
164         help="If provided, the script is invoked as editor on the mail draft",
165     )
166
167     return parser.parse_args(*args, **kwargs)
168
169
170 # [ FILE I/O HANDLING ] #######################################################
171
172
173 class File:
174     def __init__(self, path=None, mode="r", content=None, **kwargs):
175         if path:
176             if content:
177                 raise RuntimeError("Cannot specify path and content for File")
178
179             self._path = (
180                 path if isinstance(path, pathlib.Path) else pathlib.Path(path)
181             )
182         else:
183             self._path = None
184
185         if content and not re.search(r"[r+]", mode):
186             raise RuntimeError("Cannot specify content without read mode")
187
188         self._rcache = [content] if content else []
189         self._wcache = []
190         self._mode = mode
191         self._kwargs = kwargs
192         self._file = None
193
194     def open(self):
195         if self._path:
196             self._file = open(self._path, self._mode, **self._kwargs)
197         elif "b" in self._mode:
198             self._file = io.BytesIO()
199         else:
200             self._file = io.StringIO()
201
202     def __enter__(self):
203         self.open()
204         return self
205
206     def __exit__(self, exc_type, exc_val, exc_tb):
207         self.close()
208
209     def close(self):
210         self._file.close()
211         self._file = None
212         self._rcache = self._wcache
213
214     def _get_rcache(self):
215         return (b"" if "b" in self._mode else "").join(self._rcache)
216
217     def _get_wcache(self):
218         return (b"" if "b" in self._mode else "").join(self._wcache)
219
220     def _add_to_rcache(self, s):
221         self._rcache.append(s)
222
223     def _add_to_wcache(self, s):
224         self._wcache.append(s)
225
226     def read(self, *, cache=True):
227         if cache and self._rcache:
228             return self._get_rcache()
229
230         if not self._file:
231             with self as f:
232                 return f.read(cache=cache)
233
234         self._file.seek(0)
235         if cache:
236             self._add_to_rcache(self._file.read())
237             return self._get_rcache()
238         else:
239             return self._file.read()
240
241     def write(self, s, *, cache=True):
242         if not self._file:
243             with self as f:
244                 return f.write(s, cache=cache)
245
246         self._file.seek(0)
247         self._rcache = self._wcache
248
249         if cache:
250             self._add_to_wcache(s)
251
252         written = self._file.write(s)
253         self._file.flush()
254         return written
255
256     path = property(lambda s: s._path)
257
258     def __repr__(self):
259         return (
260             f'<File path={self._path or "(buffered)"} open={bool(self._file)} '
261             f"rcache={sum(len(c) for c in self._rcache) if self._rcache is not None else False} "
262             f"wcache={sum(len(c) for c in self._wcache) if self._wcache is not None else False}>"
263         )
264
265
266 class FileFactory:
267     def __init__(self):
268         self._files = []
269
270     def __call__(self, path=None, mode="r", content=None, **kwargs):
271         f = File(path, mode, content, **kwargs)
272         self._files.append(f)
273         return f
274
275     def __len__(self):
276         return self._files.__len__()
277
278     def pop(self, idx=-1):
279         return self._files.pop(idx)
280
281     def __getitem__(self, idx):
282         return self._files.__getitem__(idx)
283
284     def __contains__(self, f):
285         return self._files.__contains__(f)
286
287
288 class FakeFileFactory(FileFactory):
289     def __init__(self):
290         super().__init__()
291         self._paths2files = OrderedDict()
292
293     def __call__(self, path=None, mode="r", content=None, **kwargs):
294         if path in self._paths2files:
295             return self._paths2files[path]
296
297         f = super().__call__(None, mode, content, **kwargs)
298         self._paths2files[path] = f
299
300         mypath = path
301
302         class FakeFile(File):
303             path = mypath
304
305         # this is quality Python! We do this so that the fake file, which has
306         # no path, fake-pretends to have a path for testing purposes.
307
308         f.__class__ = FakeFile
309         return f
310
311     def __getitem__(self, path):
312         return self._paths2files.__getitem__(path)
313
314     def get(self, path, default):
315         return self._paths2files.get(path, default)
316
317     def pop(self, last=True):
318         return self._paths2files.popitem(last)
319
320     def __repr__(self):
321         return (
322             f"<FakeFileFactory nfiles={len(self._files)} "
323             f"paths={len(self._paths2files)}>"
324         )
325
326
327 # [ IMAGE HANDLING ] ##########################################################
328
329
330 InlineImageInfo = namedtuple(
331     "InlineImageInfo", ["cid", "desc"], defaults=[None]
332 )
333
334
335 class ImageRegistry:
336     def __init__(self):
337         self._images = OrderedDict()
338
339     def register(self, path, description=None):
340         # path = str(pathlib.Path(path).expanduser())
341         path = os.path.expanduser(path)
342         if path.startswith("/"):
343             path = f"file://{path}"
344         cid = make_msgid()[1:-1]
345         self._images[path] = InlineImageInfo(cid, description)
346         return cid
347
348     def __iter__(self):
349         return self._images.__iter__()
350
351     def __getitem__(self, idx):
352         return self._images.__getitem__(idx)
353
354     def __len__(self):
355         return self._images.__len__()
356
357     def items(self):
358         return self._images.items()
359
360     def __repr__(self):
361         return f"<ImageRegistry(items={len(self._images)})>"
362
363     def __str__(self):
364         return self._images.__str__()
365
366
367 class InlineImageExtension(Extension):
368     class RelatedImageInlineProcessor(ImageInlineProcessor):
369         def __init__(self, re, md, registry):
370             super().__init__(re, md)
371             self._registry = registry
372
373         def handleMatch(self, m, data):
374             el, start, end = super().handleMatch(m, data)
375             if "src" in el.attrib:
376                 src = el.attrib["src"]
377                 if "://" not in src or src.startswith("file://"):
378                     # We only inline local content
379                     cid = self._registry.register(
380                         el.attrib["src"],
381                         el.attrib.get("title", el.attrib.get("alt")),
382                     )
383                     el.attrib["src"] = f"cid:{cid}"
384             return el, start, end
385
386     def __init__(self, registry):
387         super().__init__()
388         self._image_registry = registry
389
390     INLINE_PATTERN_NAME = "image_link"
391
392     def extendMarkdown(self, md):
393         md.registerExtension(self)
394         inline_image_proc = self.RelatedImageInlineProcessor(
395             IMAGE_LINK_RE, md, self._image_registry
396         )
397         md.inlinePatterns.register(
398             inline_image_proc, InlineImageExtension.INLINE_PATTERN_NAME, 150
399         )
400
401
402 def markdown_with_inline_image_support(
403     text,
404     *,
405     mdwn=None,
406     image_registry=None,
407     extensions=None,
408     extension_configs=None,
409 ):
410     registry = (
411         image_registry if image_registry is not None else ImageRegistry()
412     )
413     inline_image_handler = InlineImageExtension(registry=registry)
414     extensions = extensions or []
415     extensions.append(inline_image_handler)
416     mdwn = markdown.Markdown(
417         extensions=extensions, extension_configs=extension_configs
418     )
419
420     htmltext = mdwn.convert(text)
421
422     def replace_image_with_cid(matchobj):
423         for m in (matchobj.group(1), f"file://{matchobj.group(1)}"):
424             if m in registry:
425                 return f"(cid:{registry[m].cid}"
426         return matchobj.group(0)
427
428     text = re.sub(r"\(([^)\s]+)", replace_image_with_cid, text)
429     return text, htmltext, registry, mdwn
430
431
432 # [ CSS STYLING ] #############################################################
433
434
435 try:
436     import pynliner
437
438     _PYNLINER = True
439
440 except ImportError:
441     _PYNLINER = False
442
443 try:
444     from pygments.formatters import get_formatter_by_name
445
446     _CODEHILITE_CLASS = "codehilite"
447
448     _PYGMENTS_CSS = get_formatter_by_name(
449         "html", style="default"
450     ).get_style_defs(f".{_CODEHILITE_CLASS}")
451
452 except ImportError:
453     _PYGMENTS_CSS = None
454
455
456 def apply_styling(html, css):
457     return (
458         pynliner.Pynliner()
459         .from_string(html)
460         .with_cssString("\n".join(s for s in [_PYGMENTS_CSS, css] if s))
461         .run()
462     )
463
464
465 # [ QUOTE HANDLING ] ##########################################################
466
467
468 class QuoteToAdmonitionExtension(Extension):
469     class EmailQuoteBlockProcessor(BlockProcessor):
470         RE = re.compile(r"(?:^|\n)>\s*(.*)")
471
472         def __init__(self, parser):
473             super().__init__(parser)
474             self._title = None
475
476         def test(self, parent, blocks):
477             if markdown.util.nearing_recursion_limit():
478                 return False
479
480             lines = blocks.splitlines()
481             if len(lines) < 2:
482                 if not self._title:
483                     return False
484
485                 elif not self.RE.search(lines[0]):
486                     return False
487
488                 return len(lines) > 0
489
490             elif not self.RE.search(lines[0]) and self.RE.search(lines[1]):
491                 return True
492
493             elif self._title and self.RE.search(lines[1]):
494                 return True
495
496             return False
497
498         def run(self, parent, blocks):
499             quotelines = blocks.pop(0).splitlines()
500
501             cont = bool(self._title)
502             if not self.RE.search(quotelines[0]):
503                 self._title = quotelines.pop(0)
504
505             admonition = etree.SubElement(parent, "div")
506             admonition.set(
507                 "class", f"admonition quote{' continued' if cont else ''}"
508             )
509             self.parser.parseChunk(admonition, self._title)
510
511             admonition[0].set("class", "admonition-title")
512             self.parser.parseChunk(
513                 admonition, "\n".join(self.clean(line) for line in quotelines)
514             )
515
516         @classmethod
517         def clean(klass, line):
518             m = klass.RE.match(line)
519             return m.group(1) if m else line
520
521     def extendMarkdown(self, md):
522         md.registerExtension(self)
523         email_quote_proc = self.EmailQuoteBlockProcessor(md.parser)
524         md.parser.blockprocessors.register(email_quote_proc, "emailquote", 25)
525
526
527 # [ PARTS GENERATION ] ########################################################
528
529
530 class Part(
531     namedtuple(
532         "Part",
533         ["type", "subtype", "path", "desc", "cid", "orig"],
534         defaults=[None, None, False],
535     )
536 ):
537     def __str__(self):
538         ret = f"<{self.type}/{self.subtype}>"
539         if self.cid:
540             ret = f"{ret} cid:{self.cid}"
541         if self.orig:
542             ret = f"{ret} ORIGINAL"
543         return ret
544
545
546 class Multipart(
547     namedtuple("Multipart", ["subtype", "children", "desc"], defaults=[None])
548 ):
549     def __str__(self):
550         return f"<multipart/{self.subtype}> children={len(self.children)}"
551
552     def __hash__(self):
553         return hash(str(self.subtype) + "".join(str(self.children)))
554
555
556 def collect_inline_images(
557     image_registry, *, tempdir=None, filefactory=FileFactory()
558 ):
559     relparts = []
560     for path, info in image_registry.items():
561         if path.startswith("cid:"):
562             continue
563
564         data = request.urlopen(path)
565
566         mimetype = data.headers["Content-Type"]
567         ext = mimetypes.guess_extension(mimetype)
568         tempfilename = tempfile.mkstemp(prefix="img", suffix=ext, dir=tempdir)
569         path = pathlib.Path(tempfilename[1])
570
571         with filefactory(path, "w+b") as out_f:
572             out_f.write(data.read())
573
574         # filewriter_fn(path, data.read(), "w+b")
575
576         desc = (
577             f'Inline image: "{info.desc}"'
578             if info.desc
579             else f"Inline image {str(len(relparts)+1)}"
580         )
581         relparts.append(
582             Part(*mimetype.split("/"), path, cid=info.cid, desc=desc)
583         )
584
585     return relparts
586
587
588 EMAIL_SIG_SEP = "\n-- \n"
589 HTML_SIG_MARKER = "=htmlsig "
590
591
592 def make_html_doc(body, sig=None):
593     ret = (
594         "<!DOCTYPE html>\n"
595         "<html>\n"
596         "<head>\n"
597         '<meta http-equiv="content-type" content="text/html; charset=UTF-8">\n'  # noqa: E501
598         '<meta name="viewport" content="width=device-width, initial-scale=1.0">\n'  # noqa: E501
599         "</head>\n"
600         "<body>\n"
601         f"{body}\n"
602     )
603
604     if sig:
605         nl = "\n"
606         ret = (
607             f'{ret}<div id="signature"><span class="sig_separator">{EMAIL_SIG_SEP.strip(nl)}</span>\n'  # noqa: E501
608             f"{sig}\n"
609             "</div>"
610         )
611
612     return f"{ret}\n  </body>\n</html>"
613
614
615 def make_text_mail(text, sig=None):
616     return EMAIL_SIG_SEP.join((text, sig)) if sig else text
617
618
619 def extract_signature(text, *, filefactory=FileFactory()):
620     parts = text.split(EMAIL_SIG_SEP, 1)
621     if len(parts) == 1:
622         return text, None, None
623
624     lines = parts[1].splitlines()
625     if lines[0].startswith(HTML_SIG_MARKER):
626         path = pathlib.Path(re.split(r" +", lines.pop(0), maxsplit=1)[1])
627         textsig = "\n".join(lines)
628
629         sig_input = filefactory(path.expanduser()).read()
630         soup = bs4.BeautifulSoup(sig_input, "html.parser")
631
632         style = str(soup.style.extract()) if soup.style else ""
633         for sig_selector in (
634             "#signature",
635             "#signatur",
636             "#emailsig",
637             ".signature",
638             ".signatur",
639             ".emailsig",
640             "body",
641             "div",
642         ):
643             sig = soup.select_one(sig_selector)
644             if sig:
645                 break
646
647         if not sig:
648             return parts[0], textsig, style + sig_input
649
650         if sig.attrs.get("id") == "signature":
651             sig = "".join(str(c) for c in sig.children)
652
653         return parts[0], textsig, style + str(sig)
654
655     return parts[0], parts[1], None
656
657
658 def convert_markdown_to_html(
659     draft_f,
660     *,
661     related_to_html_only=False,
662     css_f=None,
663     htmldump_f=None,
664     filefactory=FileFactory(),
665     tempdir=None,
666     extensions=None,
667     extension_configs=None,
668 ):
669     # TODO extension_configs need to be handled differently
670     extension_configs = extension_configs or {}
671     extension_configs.setdefault("pymdownx.highlight", {})[
672         "css_class"
673     ] = _CODEHILITE_CLASS
674
675     extensions = extensions or []
676     extensions.append(QuoteToAdmonitionExtension())
677
678     draft = draft_f.read()
679     origtext, textsig, htmlsig = extract_signature(
680         draft, filefactory=filefactory
681     )
682
683     (
684         origtext,
685         htmltext,
686         image_registry,
687         mdwn,
688     ) = markdown_with_inline_image_support(
689         origtext, extensions=extensions, extension_configs=extension_configs
690     )
691
692     if htmlsig:
693         if not textsig:
694             # TODO: decide what to do if there is no plain-text version
695             raise NotImplementedError("HTML signature but no text alternative")
696
697         soup = bs4.BeautifulSoup(htmlsig, "html.parser")
698         for img in soup.find_all("img"):
699             uri = img.attrs["src"]
700             desc = img.attrs.get("title", img.attrs.get("alt"))
701             cid = image_registry.register(uri, desc)
702             img.attrs["src"] = f"cid:{cid}"
703
704         htmlsig = str(soup)
705
706     elif textsig:
707         (
708             textsig,
709             htmlsig,
710             image_registry,
711             mdwn,
712         ) = markdown_with_inline_image_support(
713             textsig,
714             extensions=extensions,
715             extension_configs=extension_configs,
716             image_registry=image_registry,
717             mdwn=mdwn,
718         )
719
720     origtext = make_text_mail(origtext, textsig)
721     draft_f.write(origtext)
722     textpart = Part(
723         "text", "plain", draft_f.path, "Plain-text version", orig=True
724     )
725
726     htmltext = make_html_doc(htmltext, htmlsig)
727     htmltext = apply_styling(htmltext, css_f.read() if css_f else None)
728
729     if draft_f.path:
730         htmlpath = draft_f.path.with_suffix(".html")
731     else:
732         htmlpath = pathlib.Path(
733             tempfile.mkstemp(suffix=".html", dir=tempdir)[1]
734         )
735     with filefactory(
736         htmlpath, "w", encoding="utf-8", errors="xmlcharrefreplace"
737     ) as out_f:
738         out_f.write(htmltext)
739     htmlpart = Part("text", "html", htmlpath, "HTML version")
740
741     if htmldump_f:
742         htmldump_f.write(htmltext)
743
744     imgparts = collect_inline_images(
745         image_registry, tempdir=tempdir, filefactory=filefactory
746     )
747
748     if related_to_html_only:
749         # If there are inline image part, they will be contained within a
750         # multipart/related part along with the HTML part only
751         if imgparts:
752             # replace htmlpart with a multipart/related container of the HTML
753             # parts and the images
754             htmlpart = Multipart(
755                 "relative", [htmlpart] + imgparts, "Group of related content"
756             )
757
758         return Multipart(
759             "alternative", [textpart, htmlpart], "Group of alternative content"
760         )
761
762     else:
763         # If there are inline image part, they will be siblings to the
764         # multipart/alternative tree within a multipart/related part
765         altpart = Multipart(
766             "alternative", [textpart, htmlpart], "Group of alternative content"
767         )
768         if imgparts:
769             return Multipart(
770                 "relative", [altpart] + imgparts, "Group of related content"
771             )
772         else:
773             return altpart
774
775
776 class MIMETreeDFWalker:
777     def __init__(self, *, visitor_fn=None, debug=False):
778         self._visitor_fn = visitor_fn or self._echovisit
779         self._debug = debug
780
781     def _echovisit(self, node, ancestry, debugprint):
782         debugprint(f"node={node} ancestry={ancestry}")
783
784     def walk(self, root, *, visitor_fn=None):
785         """
786         Recursive function to implement a depth-dirst walk of the MIME-tree
787         rooted at `root`.
788         """
789         if isinstance(root, list):
790             if len(root) > 1:
791                 root = Multipart("mixed", children=root)
792             else:
793                 root = root[0]
794
795         self._walk(
796             root,
797             ancestry=[],
798             descendents=[],
799             visitor_fn=visitor_fn or self._visitor_fn,
800         )
801
802     def _walk(self, node, *, ancestry, descendents, visitor_fn):
803         # Let's start by enumerating the parts at the current level. At the
804         # root level, ancestry will be the empty list, and we expect a
805         # multipart/* container at this level. Later, e.g. within a
806         # mutlipart/alternative container, the subtree will just be the
807         # alternative parts, while the top of the ancestry will be the
808         # multipart/alternative container, which we will process after the
809         # following loop.
810
811         lead = f"{'│ '*len(ancestry)}"
812         if isinstance(node, Multipart):
813             self.debugprint(
814                 f"{lead}├{node} ancestry={[s.subtype for s in ancestry]}"
815             )
816
817             # Depth-first, so push the current container onto the ancestry
818             # stack, then descend …
819             ancestry.append(node)
820             self.debugprint(lead + "│ " * 2)
821             for child in node.children:
822                 self._walk(
823                     child,
824                     ancestry=ancestry,
825                     descendents=descendents,
826                     visitor_fn=visitor_fn,
827                 )
828             assert ancestry.pop() == node
829             sibling_descendents = descendents
830             descendents.extend(node.children)
831
832         else:
833             self.debugprint(f"{lead}├{node}")
834             sibling_descendents = descendents
835
836         if False and ancestry:
837             self.debugprint(lead[:-1] + " │")
838
839         if visitor_fn:
840             visitor_fn(
841                 node, ancestry, sibling_descendents, debugprint=self.debugprint
842             )
843
844     def debugprint(self, s, **kwargs):
845         if self._debug:
846             print(s, file=sys.stderr, **kwargs)
847
848
849 # [ RUN MODES ] ###############################################################
850
851
852 class MuttCommands:
853     """
854     Stupid class to interface writing out Mutt commands. This is quite a hack
855     to deal with the fact that Mutt runs "push" commands in reverse order, so
856     all of a sudden, things become very complicated when mixing with "real"
857     commands.
858
859     Hence we keep two sets of commands, and one set of pushes. Commands are
860     added to the first until a push is added, after which commands are added to
861     the second set of commands.
862
863     On flush(), the first set is printed, followed by the pushes in reverse,
864     and then the second set is printed. All 3 sets are then cleared.
865     """
866
867     def __init__(self, out_f=sys.stdout, *, debug=False):
868         self._cmd1, self._push, self._cmd2 = [], [], []
869         self._out_f = out_f
870         self._debug = debug
871
872     def cmd(self, s):
873         self.debugprint(s)
874         if self._push:
875             self._cmd2.append(s)
876         else:
877             self._cmd1.append(s)
878
879     def push(self, s):
880         s = s.replace('"', r"\"")
881         s = f'push "{s}"'
882         self.debugprint(s)
883         self._push.insert(0, s)
884
885     def flush(self):
886         print(
887             "\n".join(self._cmd1 + self._push + self._cmd2), file=self._out_f
888         )
889         self._cmd1, self._push, self._cmd2 = [], [], []
890
891     def debugprint(self, s, **kwargs):
892         if self._debug:
893             print(s, file=sys.stderr, **kwargs)
894
895
896 def do_setup(
897     *,
898     out_f=sys.stdout,
899     temppath=None,
900     tempdir=None,
901     debug_commands=False,
902 ):
903     temppath = temppath or pathlib.Path(
904         tempfile.mkstemp(prefix="muttmdwn-", dir=tempdir)[1]
905     )
906     cmds = MuttCommands(out_f, debug=debug_commands)
907
908     editor = f"{' '.join(sys.argv)} massage --write-commands-to {temppath}"
909
910     cmds.cmd('set my_editor="$editor"')
911     cmds.cmd('set my_edit_headers="$edit_headers"')
912     cmds.cmd(f'set editor="{editor}"')
913     cmds.cmd("unset edit_headers")
914     cmds.cmd(f"set my_mdwn_postprocess_cmd_file={temppath}")
915     cmds.push("<first-entry><edit-file>")
916     cmds.flush()
917
918
919 def do_massage(
920     draft_f,
921     cmd_f,
922     *,
923     extensions=None,
924     css_f=None,
925     htmldump_f=None,
926     converter=convert_markdown_to_html,
927     related_to_html_only=True,
928     only_build=False,
929     max_other_attachments=20,
930     tempdir=None,
931     debug_commands=False,
932     debug_walk=False,
933 ):
934     # Here's the big picture: we're being invoked as the editor on the email
935     # draft, and whatever commands we write to the file given as cmdpath will
936     # be run by the second source command in the macro definition.
937
938     # Let's start by cleaning up what the setup did (see above), i.e. we
939     # restore the $editor and $edit_headers variables, and also unset the
940     # variable used to identify the command file we're currently writing
941     # to.
942     cmds = MuttCommands(cmd_f, debug=debug_commands)
943     cmds.cmd('set editor="$my_editor"')
944     cmds.cmd('set edit_headers="$my_edit_headers"')
945     cmds.cmd("unset my_editor")
946     cmds.cmd("unset my_edit_headers")
947
948     # let's flush those commands, as there'll be a lot of pushes from now
949     # on, which need to be run in reverse order
950     cmds.flush()
951
952     extensions = extensions.split(",") if extensions else []
953     tree = converter(
954         draft_f,
955         css_f=css_f,
956         htmldump_f=htmldump_f,
957         related_to_html_only=related_to_html_only,
958         tempdir=tempdir,
959         extensions=extensions,
960     )
961
962     mimetree = MIMETreeDFWalker(debug=debug_walk)
963
964     state = dict(pos=1, tags={}, parts=1)
965
966     def visitor_fn(item, ancestry, descendents, *, debugprint=None):
967         """
968         Visitor function called for every node (part) of the MIME tree,
969         depth-first, and responsible for telling NeoMutt how to assemble
970         the tree.
971         """
972         KILL_LINE = r"\Ca\Ck"
973
974         if isinstance(item, Part):
975             # We've hit a leaf-node, i.e. an alternative or a related part
976             # with actual content.
977
978             # Let's add the part
979             if item.orig:
980                 # The original source already exists in the NeoMutt tree, but
981                 # the underlying file may have been modified, so we need to
982                 # update the encoding, but that's it:
983                 cmds.push("<first-entry>")
984                 cmds.push("<update-encoding>")
985
986                 # We really just need to be able to assume that at this point,
987                 # NeoMutt is at position 1, and that we've processed only this
988                 # part so far. Nevermind about actual attachments, we can
989                 # safely ignore those as they stay at the end.
990                 assert state["pos"] == 1
991                 assert state["parts"] == 1
992             else:
993                 # … whereas all other parts need to be added, and they're all
994                 # considered to be temporary and inline:
995                 cmds.push(f"<attach-file>{item.path}<enter>")
996                 cmds.push("<toggle-unlink><toggle-disposition>")
997
998                 # This added a part at the end of the list of parts, and that's
999                 # just how many parts we've seen so far, so it's position in
1000                 # the NeoMutt compose list is the count of parts
1001                 state["parts"] += 1
1002                 state["pos"] = state["parts"]
1003
1004             # If the item (including the original) comes with additional
1005             # information, then we might just as well update the NeoMutt
1006             # tree now:
1007             if item.cid:
1008                 cmds.push(f"<edit-content-id>{KILL_LINE}{item.cid}<enter>")
1009
1010             # Now for the biggest hack in this script, which is to handle
1011             # attachments, such as PDFs, that aren't related or alternatives.
1012             # The problem is that when we add an inline image, it always gets
1013             # appended to the list, i.e. inserted *after* other attachments.
1014             # Since we don't know the number of attachments, we also cannot
1015             # infer the postition of the new attachment. Therefore, we bubble
1016             # it all the way to the top, only to then move it down again:
1017             if state["pos"] > 1:  # skip for the first part
1018                 for i in range(max_other_attachments):
1019                     # could use any number here, but has to be larger than the
1020                     # number of possible attachments. The performance
1021                     # difference of using a high number is negligible.
1022                     # Bubble up the new part
1023                     cmds.push("<move-up>")
1024
1025                 # As we push the part to the right position in the list (i.e.
1026                 # the last of the subset of attachments this script added), we
1027                 # must handle the situation that subtrees are skipped by
1028                 # NeoMutt. Hence, the actual number of positions to move down
1029                 # is decremented by the number of descendents so far
1030                 # encountered.
1031                 for i in range(1, state["pos"] - len(descendents)):
1032                     cmds.push("<move-down>")
1033
1034         elif isinstance(item, Multipart):
1035             # This node has children, but we already visited them (see
1036             # above). The tags dictionary of State should contain a list of
1037             # their positions in the NeoMutt compose window, so iterate those
1038             # and tag the parts there:
1039             n_tags = len(state["tags"][item])
1040             for tag in state["tags"][item]:
1041                 cmds.push(f"<jump>{tag}<enter><tag-entry>")
1042
1043             if item.subtype == "alternative":
1044                 cmds.push("<group-alternatives>")
1045             elif item.subtype in ("relative", "related"):
1046                 cmds.push("<group-related>")
1047             elif item.subtype == "multilingual":
1048                 cmds.push("<group-multilingual>")
1049             else:
1050                 raise NotImplementedError(
1051                     f"Handling of multipart/{item.subtype} is not implemented"
1052                 )
1053
1054             state["pos"] -= n_tags - 1
1055             state["parts"] += 1
1056
1057         else:
1058             # We should never get here
1059             raise RuntimeError(f"Type {type(item)} is unexpected: {item}")
1060
1061         # If the item has a description, we might just as well add it
1062         if item.desc:
1063             cmds.push(f"<edit-description>{KILL_LINE}{item.desc}<enter>")
1064
1065         if ancestry:
1066             # If there's an ancestry, record the current (assumed) position in
1067             # the NeoMutt compose window as needed-to-tag by our direct parent
1068             # (i.e. the last item of the ancestry)
1069             state["tags"].setdefault(ancestry[-1], []).append(state["pos"])
1070
1071             lead = "│ " * (len(ancestry) + 1) + "* "
1072             debugprint(
1073                 f"{lead}ancestry={[a.subtype for a in ancestry]}\n"
1074                 f"{lead}descendents={[d.subtype for d in descendents]}\n"
1075                 f"{lead}children_positions={state['tags'][ancestry[-1]]}\n"
1076                 f"{lead}pos={state['pos']}, parts={state['parts']}"
1077             )
1078
1079     # -----------------
1080     # End of visitor_fn
1081
1082     # Let's walk the tree and visit every node with our fancy visitor
1083     # function
1084     mimetree.walk(tree, visitor_fn=visitor_fn)
1085
1086     if not only_build:
1087         cmds.push("<send-message>")
1088
1089     # Finally, cleanup. Since we're responsible for removing the temporary
1090     # file, how's this for a little hack?
1091     try:
1092         filename = cmd_f.name
1093     except AttributeError:
1094         filename = "pytest_internal_file"
1095     cmds.cmd(f"source 'rm -f {filename}|'")
1096     cmds.cmd("unset my_mdwn_postprocess_cmd_file")
1097     cmds.flush()
1098
1099
1100 # [ CLI ENTRY ] ###############################################################
1101
1102 if __name__ == "__main__":
1103     args = parse_cli_args()
1104
1105     if args.mode is None:
1106         do_setup(
1107             tempdir=args.tempdir,
1108             debug_commands=args.debug_commands,
1109         )
1110
1111     elif args.mode == "massage":
1112         with (
1113             File(args.MAILDRAFT, "r+") as draft_f,
1114             File(args.cmdpath, "w") as cmd_f,
1115             File(args.css_file, "r") as css_f,
1116             File(args.dump_html, "w") as htmldump_f,
1117         ):
1118             do_massage(
1119                 draft_f,
1120                 cmd_f,
1121                 extensions=args.extensions,
1122                 css_f=css_f,
1123                 htmldump_f=htmldump_f,
1124                 related_to_html_only=args.related_to_html_only,
1125                 max_other_attachments=args.max_number_other_attachments,
1126                 only_build=args.only_build,
1127                 tempdir=args.tempdir,
1128                 debug_commands=args.debug_commands,
1129                 debug_walk=args.debug_walk,
1130             )
1131
1132
1133 # [ TESTS ] ###################################################################
1134
1135 try:
1136     import pytest
1137
1138     class Tests:
1139         @pytest.fixture
1140         def const1(self):
1141             return "Curvature Vest Usher Dividing+T#iceps Senior"
1142
1143         @pytest.fixture
1144         def const2(self):
1145             return "Habitant Celestial 2litzy Resurf/ce Headpiece Harmonics"
1146
1147         @pytest.fixture
1148         def fakepath(self):
1149             return pathlib.Path("/does/not/exist")
1150
1151         @pytest.fixture
1152         def fakepath2(self):
1153             return pathlib.Path("/does/not/exist/either")
1154
1155         # NOTE: tests using the capsys fixture must specify sys.stdout to the
1156         # functions they call, else old stdout is used and not captured
1157
1158         @pytest.mark.muttctrl
1159         def test_MuttCommands_cmd(self, const1, const2, capsys):
1160             "Assert order of commands"
1161             cmds = MuttCommands(out_f=sys.stdout)
1162             cmds.cmd(const1)
1163             cmds.cmd(const2)
1164             cmds.flush()
1165             captured = capsys.readouterr()
1166             assert captured.out == "\n".join((const1, const2, ""))
1167
1168         @pytest.mark.muttctrl
1169         def test_MuttCommands_push(self, const1, const2, capsys):
1170             "Assert reverse order of pushes"
1171             cmds = MuttCommands(out_f=sys.stdout)
1172             cmds.push(const1)
1173             cmds.push(const2)
1174             cmds.flush()
1175             captured = capsys.readouterr()
1176             assert (
1177                 captured.out
1178                 == ('"\npush "'.join(("", const2, const1, "")))[2:-6]
1179             )
1180
1181         @pytest.mark.muttctrl
1182         def test_MuttCommands_push_escape(self, const1, const2, capsys):
1183             cmds = MuttCommands(out_f=sys.stdout)
1184             cmds.push(f'"{const1}"')
1185             cmds.flush()
1186             captured = capsys.readouterr()
1187             assert f'"\\"{const1}\\""' in captured.out
1188
1189         @pytest.mark.muttctrl
1190         def test_MuttCommands_cmd_push_mixed(self, const1, const2, capsys):
1191             "Assert reverse order of pushes"
1192             cmds = MuttCommands(out_f=sys.stdout)
1193             lines = ["000", "001", "010", "011", "100", "101", "110", "111"]
1194             for i in range(2):
1195                 cmds.cmd(lines[4 * i + 0])
1196                 cmds.cmd(lines[4 * i + 1])
1197                 cmds.push(lines[4 * i + 2])
1198                 cmds.push(lines[4 * i + 3])
1199             cmds.flush()
1200
1201             captured = capsys.readouterr()
1202             lines_out = captured.out.splitlines()
1203             assert lines[0] in lines_out[0]
1204             assert lines[1] in lines_out[1]
1205             assert lines[7] in lines_out[2]
1206             assert lines[6] in lines_out[3]
1207             assert lines[3] in lines_out[4]
1208             assert lines[2] in lines_out[5]
1209             assert lines[4] in lines_out[6]
1210             assert lines[5] in lines_out[7]
1211
1212         @pytest.fixture
1213         def mime_tree_related_to_alternative(self):
1214             return Multipart(
1215                 "relative",
1216                 children=[
1217                     Multipart(
1218                         "alternative",
1219                         children=[
1220                             Part(
1221                                 "text",
1222                                 "plain",
1223                                 "part.txt",
1224                                 desc="Plain",
1225                                 orig=True,
1226                             ),
1227                             Part("text", "html", "part.html", desc="HTML"),
1228                         ],
1229                         desc="Alternative",
1230                     ),
1231                     Part(
1232                         "text", "png", "logo.png", cid="logo.png", desc="Logo"
1233                     ),
1234                 ],
1235                 desc="Related",
1236             )
1237
1238         @pytest.fixture
1239         def mime_tree_related_to_html(self):
1240             return Multipart(
1241                 "alternative",
1242                 children=[
1243                     Part(
1244                         "text",
1245                         "plain",
1246                         "part.txt",
1247                         desc="Plain",
1248                         orig=True,
1249                     ),
1250                     Multipart(
1251                         "relative",
1252                         children=[
1253                             Part("text", "html", "part.html", desc="HTML"),
1254                             Part(
1255                                 "text",
1256                                 "png",
1257                                 "logo.png",
1258                                 cid="logo.png",
1259                                 desc="Logo",
1260                             ),
1261                         ],
1262                         desc="Related",
1263                     ),
1264                 ],
1265                 desc="Alternative",
1266             )
1267
1268         @pytest.fixture
1269         def mime_tree_nested(self):
1270             return Multipart(
1271                 "relative",
1272                 children=[
1273                     Multipart(
1274                         "alternative",
1275                         children=[
1276                             Part(
1277                                 "text",
1278                                 "plain",
1279                                 "part.txt",
1280                                 desc="Plain",
1281                                 orig=True,
1282                             ),
1283                             Multipart(
1284                                 "alternative",
1285                                 children=[
1286                                     Part(
1287                                         "text",
1288                                         "plain",
1289                                         "part.txt",
1290                                         desc="Nested plain",
1291                                     ),
1292                                     Part(
1293                                         "text",
1294                                         "html",
1295                                         "part.html",
1296                                         desc="Nested HTML",
1297                                     ),
1298                                 ],
1299                                 desc="Nested alternative",
1300                             ),
1301                         ],
1302                         desc="Alternative",
1303                     ),
1304                     Part(
1305                         "text",
1306                         "png",
1307                         "logo.png",
1308                         cid="logo.png",
1309                         desc="Logo",
1310                     ),
1311                 ],
1312                 desc="Related",
1313             )
1314
1315         @pytest.mark.treewalk
1316         def test_MIMETreeDFWalker_depth_first_walk(
1317             self, mime_tree_related_to_alternative
1318         ):
1319             mimetree = MIMETreeDFWalker()
1320
1321             items = []
1322
1323             def visitor_fn(item, ancestry, descendents, debugprint):
1324                 items.append((item, len(ancestry), len(descendents)))
1325
1326             mimetree.walk(
1327                 mime_tree_related_to_alternative, visitor_fn=visitor_fn
1328             )
1329             assert len(items) == 5
1330             assert items[0][0].subtype == "plain"
1331             assert items[0][1] == 2
1332             assert items[0][2] == 0
1333             assert items[1][0].subtype == "html"
1334             assert items[1][1] == 2
1335             assert items[1][2] == 0
1336             assert items[2][0].subtype == "alternative"
1337             assert items[2][1] == 1
1338             assert items[2][2] == 2
1339             assert items[3][0].subtype == "png"
1340             assert items[3][1] == 1
1341             assert items[3][2] == 2
1342             assert items[4][0].subtype == "relative"
1343             assert items[4][1] == 0
1344             assert items[4][2] == 4
1345
1346         @pytest.mark.treewalk
1347         def test_MIMETreeDFWalker_list_to_mixed(self, const1):
1348             mimetree = MIMETreeDFWalker()
1349             items = []
1350
1351             def visitor_fn(item, ancestry, descendents, debugprint):
1352                 items.append(item)
1353
1354             p = Part("text", "plain", const1)
1355             mimetree.walk([p], visitor_fn=visitor_fn)
1356             assert items[-1].subtype == "plain"
1357             mimetree.walk([p, p], visitor_fn=visitor_fn)
1358             assert items[-1].subtype == "mixed"
1359
1360         @pytest.mark.treewalk
1361         def test_MIMETreeDFWalker_visitor_in_constructor(
1362             self, mime_tree_related_to_alternative
1363         ):
1364             items = []
1365
1366             def visitor_fn(item, ancestry, descendents, debugprint):
1367                 items.append(item)
1368
1369             mimetree = MIMETreeDFWalker(visitor_fn=visitor_fn)
1370             mimetree.walk(mime_tree_related_to_alternative)
1371             assert len(items) == 5
1372
1373         @pytest.fixture
1374         def string_io(self, const1, text=None):
1375             return StringIO(text or const1)
1376
1377         @pytest.mark.massage
1378         def test_do_massage_basic(self):
1379             def converter(draft_f, **kwargs):
1380                 return Part("text", "plain", draft_f.path, orig=True)
1381
1382             with File() as draft_f, File() as cmd_f:
1383                 do_massage(
1384                     draft_f=draft_f,
1385                     cmd_f=cmd_f,
1386                     converter=converter,
1387                 )
1388                 lines = cmd_f.read().splitlines()
1389
1390             assert '="$my_editor"' in lines.pop(0)
1391             assert '="$my_edit_headers"' in lines.pop(0)
1392             assert "unset my_editor" == lines.pop(0)
1393             assert "unset my_edit_headers" == lines.pop(0)
1394             assert "send-message" in lines.pop(0)
1395             assert "update-encoding" in lines.pop(0)
1396             assert "first-entry" in lines.pop(0)
1397             assert "source 'rm -f " in lines.pop(0)
1398             assert "unset my_mdwn_postprocess_cmd_file" == lines.pop(0)
1399
1400         @pytest.mark.massage
1401         def test_do_massage_fulltree(self, mime_tree_related_to_alternative):
1402             def converter(draft_f, **kwargs):
1403                 return mime_tree_related_to_alternative
1404
1405             max_attachments = 5
1406
1407             with File() as draft_f, File() as cmd_f:
1408                 do_massage(
1409                     draft_f=draft_f,
1410                     cmd_f=cmd_f,
1411                     max_other_attachments=max_attachments,
1412                     converter=converter,
1413                 )
1414                 lines = cmd_f.read().splitlines()[4:-2]
1415
1416             assert "first-entry" in lines.pop()
1417             assert "update-encoding" in lines.pop()
1418             assert "Plain" in lines.pop()
1419             assert "part.html" in lines.pop()
1420             assert "toggle-unlink" in lines.pop()
1421             for i in range(max_attachments):
1422                 assert "move-up" in lines.pop()
1423             assert "move-down" in lines.pop()
1424             assert "HTML" in lines.pop()
1425             assert "jump>1" in lines.pop()
1426             assert "jump>2" in lines.pop()
1427             assert "group-alternatives" in lines.pop()
1428             assert "Alternative" in lines.pop()
1429             assert "logo.png" in lines.pop()
1430             assert "toggle-unlink" in lines.pop()
1431             assert "content-id" in lines.pop()
1432             for i in range(max_attachments):
1433                 assert "move-up" in lines.pop()
1434             assert "move-down" in lines.pop()
1435             assert "Logo" in lines.pop()
1436             assert "jump>1" in lines.pop()
1437             assert "jump>4" in lines.pop()
1438             assert "group-related" in lines.pop()
1439             assert "Related" in lines.pop()
1440             assert "send-message" in lines.pop()
1441             assert len(lines) == 0
1442
1443         @pytest.mark.massage
1444         def test_mime_tree_relative_within_alternative(
1445             self, mime_tree_related_to_html
1446         ):
1447             def converter(draft_f, **kwargs):
1448                 return mime_tree_related_to_html
1449
1450             with File() as draft_f, File() as cmd_f:
1451                 do_massage(
1452                     draft_f=draft_f,
1453                     cmd_f=cmd_f,
1454                     converter=converter,
1455                 )
1456                 lines = cmd_f.read().splitlines()[4:-2]
1457
1458             assert "first-entry" in lines.pop()
1459             assert "update-encoding" in lines.pop()
1460             assert "Plain" in lines.pop()
1461             assert "part.html" in lines.pop()
1462             assert "toggle-unlink" in lines.pop()
1463             assert "move-up" in lines.pop()
1464             while True:
1465                 top = lines.pop()
1466                 if "move-up" not in top:
1467                     break
1468             assert "move-down" in top
1469             assert "HTML" in lines.pop()
1470             assert "logo.png" in lines.pop()
1471             assert "toggle-unlink" in lines.pop()
1472             assert "content-id" in lines.pop()
1473             assert "move-up" in lines.pop()
1474             while True:
1475                 top = lines.pop()
1476                 if "move-up" not in top:
1477                     break
1478             assert "move-down" in top
1479             assert "move-down" in lines.pop()
1480             assert "Logo" in lines.pop()
1481             assert "jump>2" in lines.pop()
1482             assert "jump>3" in lines.pop()
1483             assert "group-related" in lines.pop()
1484             assert "Related" in lines.pop()
1485             assert "jump>1" in lines.pop()
1486             assert "jump>2" in lines.pop()
1487             assert "group-alternative" in lines.pop()
1488             assert "Alternative" in lines.pop()
1489             assert "send-message" in lines.pop()
1490             assert len(lines) == 0
1491
1492         @pytest.mark.massage
1493         def test_mime_tree_nested_trees_does_not_break_positioning(
1494             self, mime_tree_nested
1495         ):
1496             def converter(draft_f, **kwargs):
1497                 return mime_tree_nested
1498
1499             with File() as draft_f, File() as cmd_f:
1500                 do_massage(
1501                     draft_f=draft_f,
1502                     cmd_f=cmd_f,
1503                     converter=converter,
1504                 )
1505                 lines = cmd_f.read().splitlines()
1506
1507             while "logo.png" not in lines.pop():
1508                 pass
1509             lines.pop()
1510             assert "content-id" in lines.pop()
1511             assert "move-up" in lines.pop()
1512             while True:
1513                 top = lines.pop()
1514                 if "move-up" not in top:
1515                     break
1516             assert "move-down" in top
1517             # Due to the nested trees, the number of descendents of the sibling
1518             # actually needs to be considered, not just the nieces. So to move
1519             # from position 1 to position 6, it only needs one <move-down>
1520             # because that jumps over the entire sibling tree. Thus what
1521             # follows next must not be another <move-down>
1522             assert "Logo" in lines.pop()
1523
1524         @pytest.mark.converter
1525         def test_converter_tree_basic(self, fakepath, const1, fakefilefactory):
1526             draft_f = fakefilefactory(fakepath, content=const1)
1527             tree = convert_markdown_to_html(
1528                 draft_f, filefactory=fakefilefactory
1529             )
1530
1531             assert tree.subtype == "alternative"
1532             assert len(tree.children) == 2
1533             assert tree.children[0].subtype == "plain"
1534             assert tree.children[0].path == draft_f.path
1535             assert tree.children[0].orig
1536             assert tree.children[1].subtype == "html"
1537             assert tree.children[1].path == fakepath.with_suffix(".html")
1538
1539         @pytest.mark.converter
1540         def test_converter_writes(
1541             self, fakepath, fakefilefactory, const1, monkeypatch
1542         ):
1543             draft_f = fakefilefactory(fakepath, content=const1)
1544             convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
1545
1546             html = fakefilefactory.pop()
1547             assert fakepath.with_suffix(".html") == html[0]
1548             assert const1 in html[1].read()
1549             text = fakefilefactory.pop()
1550             assert fakepath == text[0]
1551             assert const1 == text[1].read()
1552
1553         @pytest.mark.imgproc
1554         def test_markdown_inline_image_processor(self):
1555             imgpath1 = "file:/path/to/image.png"
1556             imgpath2 = "file:///path/to/image.png?url=params"
1557             imgpath3 = "/path/to/image.png"
1558             text = f"""![inline local image]({imgpath1})
1559                        ![image inlined
1560                          with newline]({imgpath2})
1561                        ![image local path]({imgpath3})"""
1562             text, html, images, mdwn = markdown_with_inline_image_support(text)
1563
1564             # local paths have been normalised to URLs:
1565             imgpath3 = f"file://{imgpath3}"
1566
1567             assert 'src="cid:' in html
1568             assert "](cid:" in text
1569             assert len(images) == 3
1570             assert imgpath1 in images
1571             assert imgpath2 in images
1572             assert imgpath3 in images
1573             assert images[imgpath1].cid != images[imgpath2].cid
1574             assert images[imgpath1].cid != images[imgpath3].cid
1575             assert images[imgpath2].cid != images[imgpath3].cid
1576
1577         @pytest.mark.imgproc
1578         def test_markdown_inline_image_processor_title_to_desc(self, const1):
1579             imgpath = "file:///path/to/image.png"
1580             text = f'![inline local image]({imgpath} "{const1}")'
1581             text, html, images, mdwn = markdown_with_inline_image_support(text)
1582             assert images[imgpath].desc == const1
1583
1584         @pytest.mark.imgproc
1585         def test_markdown_inline_image_processor_alt_to_desc(self, const1):
1586             imgpath = "file:///path/to/image.png"
1587             text = f"![{const1}]({imgpath})"
1588             text, html, images, mdwn = markdown_with_inline_image_support(text)
1589             assert images[imgpath].desc == const1
1590
1591         @pytest.mark.imgproc
1592         def test_markdown_inline_image_processor_title_over_alt_desc(
1593             self, const1, const2
1594         ):
1595             imgpath = "file:///path/to/image.png"
1596             text = f'![{const1}]({imgpath} "{const2}")'
1597             text, html, images, mdwn = markdown_with_inline_image_support(text)
1598             assert images[imgpath].desc == const2
1599
1600         @pytest.mark.imgproc
1601         def test_markdown_inline_image_not_external(self):
1602             imgpath = "https://path/to/image.png"
1603             text = f"![inline image]({imgpath})"
1604             text, html, images, mdwn = markdown_with_inline_image_support(text)
1605
1606             assert 'src="cid:' not in html
1607             assert "](cid:" not in text
1608             assert len(images) == 0
1609
1610         @pytest.mark.imgproc
1611         def test_markdown_inline_image_local_file(self):
1612             imgpath = "/path/to/image.png"
1613             text = f"![inline image]({imgpath})"
1614             text, html, images, mdwn = markdown_with_inline_image_support(text)
1615
1616             for k, v in images.items():
1617                 assert k == f"file://{imgpath}"
1618                 break
1619
1620         @pytest.mark.imgproc
1621         def test_markdown_inline_image_expanduser(self):
1622             imgpath = pathlib.Path("~/image.png")
1623             text = f"![inline image]({imgpath})"
1624             text, html, images, mdwn = markdown_with_inline_image_support(text)
1625
1626             for k, v in images.items():
1627                 assert k == f"file://{imgpath.expanduser()}"
1628                 break
1629
1630         @pytest.fixture
1631         def test_png(self):
1632             return (
1633                 "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAE"
1634                 "AAAABCAAAAAA6fptVAAAACklEQVQI12P4DwABAQEAG7buVgAA"
1635             )
1636
1637         @pytest.mark.imgproc
1638         def test_markdown_inline_image_processor_base64(self, test_png):
1639             text = f"![1px white inlined]({test_png})"
1640             text, html, images, mdwn = markdown_with_inline_image_support(text)
1641
1642             assert 'src="cid:' in html
1643             assert "](cid:" in text
1644             assert len(images) == 1
1645             assert test_png in images
1646
1647         @pytest.mark.converter
1648         def test_converter_tree_inline_image_base64(
1649             self, test_png, fakefilefactory
1650         ):
1651             text = f"![inline base64 image]({test_png})"
1652             with fakefilefactory(content=text) as draft_f:
1653                 tree = convert_markdown_to_html(
1654                     draft_f,
1655                     filefactory=fakefilefactory,
1656                     related_to_html_only=False,
1657                 )
1658             assert tree.subtype == "relative"
1659             assert tree.children[0].subtype == "alternative"
1660             assert tree.children[1].subtype == "png"
1661             written = fakefilefactory.pop()
1662             assert tree.children[1].path == written[0]
1663             assert b"PNG" in written[1].read()
1664
1665         @pytest.mark.converter
1666         def test_converter_tree_inline_image_base64_related_to_html(
1667             self, test_png, fakefilefactory
1668         ):
1669             text = f"![inline base64 image]({test_png})"
1670             with fakefilefactory(content=text) as draft_f:
1671                 tree = convert_markdown_to_html(
1672                     draft_f,
1673                     filefactory=fakefilefactory,
1674                     related_to_html_only=True,
1675                 )
1676             assert tree.subtype == "alternative"
1677             assert tree.children[1].subtype == "relative"
1678             assert tree.children[1].children[1].subtype == "png"
1679             written = fakefilefactory.pop()
1680             assert tree.children[1].children[1].path == written[0]
1681             assert b"PNG" in written[1].read()
1682
1683         @pytest.mark.converter
1684         def test_converter_tree_inline_image_cid(
1685             self, const1, fakefilefactory
1686         ):
1687             text = f"![inline base64 image](cid:{const1})"
1688             with fakefilefactory(content=text) as draft_f:
1689                 tree = convert_markdown_to_html(
1690                     draft_f,
1691                     filefactory=fakefilefactory,
1692                     related_to_html_only=False,
1693                 )
1694             assert len(tree.children) == 2
1695             assert tree.children[0].cid != const1
1696             assert tree.children[0].type != "image"
1697             assert tree.children[1].cid != const1
1698             assert tree.children[1].type != "image"
1699
1700         @pytest.fixture
1701         def fakefilefactory(self):
1702             return FakeFileFactory()
1703
1704         @pytest.mark.imgcoll
1705         def test_inline_image_collection(
1706             self, test_png, const1, const2, fakefilefactory
1707         ):
1708             test_images = {test_png: InlineImageInfo(cid=const1, desc=const2)}
1709             relparts = collect_inline_images(
1710                 test_images, filefactory=fakefilefactory
1711             )
1712
1713             written = fakefilefactory.pop()
1714             assert b"PNG" in written[1].read()
1715
1716             assert relparts[0].subtype == "png"
1717             assert relparts[0].path == written[0]
1718             assert relparts[0].cid == const1
1719             assert const2 in relparts[0].desc
1720
1721         if _PYNLINER:
1722
1723             @pytest.mark.styling
1724             def test_apply_stylesheet(self):
1725                 html = "<p>Hello, world!</p>"
1726                 css = "p { color:red }"
1727                 out = apply_styling(html, css)
1728                 assert 'p style="color' in out
1729
1730             @pytest.mark.styling
1731             def test_apply_no_stylesheet(self, const1):
1732                 out = apply_styling(const1, None)
1733
1734             @pytest.mark.massage
1735             @pytest.mark.styling
1736             def test_massage_styling_to_converter(self):
1737                 css = "p { color:red }"
1738                 css_f = File(content=css)
1739                 css_applied = []
1740
1741                 def converter(draft_f, css_f, **kwargs):
1742                     css = css_f.read()
1743                     css_applied.append(css)
1744                     return Part("text", "plain", draft_f.path, orig=True)
1745
1746                 do_massage(
1747                     draft_f=File(),
1748                     cmd_f=File(),
1749                     css_f=css_f,
1750                     converter=converter,
1751                 )
1752                 assert css_applied[0] == css
1753
1754             @pytest.mark.converter
1755             @pytest.mark.styling
1756             def test_converter_apply_styles(
1757                 self, const1, monkeypatch, fakepath, fakefilefactory
1758             ):
1759                 css = "p { color:red }"
1760                 with (
1761                     monkeypatch.context() as m,
1762                     fakefilefactory(fakepath, content=const1) as draft_f,
1763                     fakefilefactory(content=css) as css_f,
1764                 ):
1765                     m.setattr(
1766                         markdown.Markdown,
1767                         "convert",
1768                         lambda s, t: f"<p>{t}</p>",
1769                     )
1770                     convert_markdown_to_html(
1771                         draft_f, css_f=css_f, filefactory=fakefilefactory
1772                     )
1773                 assert re.search(
1774                     r"color:.*red",
1775                     fakefilefactory[fakepath.with_suffix(".html")].read(),
1776                 )
1777
1778         if _PYGMENTS_CSS:
1779
1780             @pytest.mark.styling
1781             def test_apply_stylesheet_pygments(self):
1782                 html = (
1783                     f'<div class="{_CODEHILITE_CLASS}">'
1784                     "<pre>def foo():\n    return</pre></div>"
1785                 )
1786                 out = apply_styling(html, _PYGMENTS_CSS)
1787                 assert f'{_CODEHILITE_CLASS}" style="' in out
1788
1789         @pytest.mark.sig
1790         def test_signature_extraction_no_signature(self, const1):
1791             assert (const1, None, None) == extract_signature(const1)
1792
1793         @pytest.mark.sig
1794         def test_signature_extraction_just_text(self, const1, const2):
1795             origtext, textsig, htmlsig = extract_signature(
1796                 f"{const1}{EMAIL_SIG_SEP}{const2}"
1797             )
1798             assert origtext == const1
1799             assert textsig == const2
1800             assert htmlsig is None
1801
1802         @pytest.mark.sig
1803         def test_signature_extraction_html(
1804             self, fakepath, fakefilefactory, const1, const2
1805         ):
1806             sigconst = "HTML signature from {path} but as a string"
1807             sig = f'<div id="signature">{sigconst.format(path=fakepath)}</div>'
1808
1809             sig_f = fakefilefactory(fakepath, content=sig)
1810
1811             origtext, textsig, htmlsig = extract_signature(
1812                 f"{const1}{EMAIL_SIG_SEP}{HTML_SIG_MARKER} {fakepath}\n{const2}",
1813                 filefactory=fakefilefactory,
1814             )
1815             assert origtext == const1
1816             assert textsig == const2
1817             assert htmlsig == sigconst.format(path=fakepath)
1818
1819         @pytest.mark.sig
1820         def test_signature_extraction_file_not_found(self, const1):
1821             path = pathlib.Path("/does/not/exist")
1822             with pytest.raises(FileNotFoundError):
1823                 origtext, textsig, htmlsig = extract_signature(
1824                     f"{const1}{EMAIL_SIG_SEP}{HTML_SIG_MARKER}{path}\n{const1}"
1825                 )
1826
1827         @pytest.mark.imgproc
1828         def test_image_registry(self, const1):
1829             reg = ImageRegistry()
1830             cid = reg.register(const1)
1831             assert "@" in cid
1832             assert not cid.startswith("<")
1833             assert not cid.endswith(">")
1834             assert const1 in reg
1835
1836         @pytest.mark.imgproc
1837         def test_image_registry_file_uri(self, const1):
1838             reg = ImageRegistry()
1839             reg.register("/some/path")
1840             for path in reg:
1841                 assert path.startswith("file://")
1842                 break
1843
1844         @pytest.mark.converter
1845         @pytest.mark.sig
1846         def test_converter_signature_handling(
1847             self, fakepath, fakefilefactory, monkeypatch
1848         ):
1849             mailparts = (
1850                 "This is the mail body\n",
1851                 f"{EMAIL_SIG_SEP}",
1852                 "This is a plain-text signature only",
1853             )
1854
1855             with (
1856                 fakefilefactory(
1857                     fakepath, content="".join(mailparts)
1858                 ) as draft_f,
1859                 monkeypatch.context() as m,
1860             ):
1861                 m.setattr(markdown.Markdown, "convert", lambda s, t: t)
1862                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
1863
1864             soup = bs4.BeautifulSoup(
1865                 fakefilefactory[fakepath.with_suffix(".html")].read(),
1866                 "html.parser",
1867             )
1868             body = soup.body.contents
1869
1870             assert mailparts[0] in body.pop(0)
1871
1872             sig = soup.select_one("#signature")
1873             assert sig == body.pop(0)
1874
1875             sep = sig.select_one("span.sig_separator")
1876             assert sep == sig.contents[0]
1877             assert f"\n{sep.text}\n" == EMAIL_SIG_SEP
1878
1879             assert mailparts[2] in sig.contents[1]
1880
1881         @pytest.mark.converter
1882         @pytest.mark.sig
1883         def test_converter_signature_handling_htmlsig(
1884             self, fakepath, fakepath2, fakefilefactory, monkeypatch
1885         ):
1886             mailparts = (
1887                 "This is the mail body",
1888                 f"{EMAIL_SIG_SEP}",
1889                 f"{HTML_SIG_MARKER}{fakepath2}\n",
1890                 "This is the plain-text version",
1891             )
1892             htmlsig = "HTML Signature from {path} but as a string"
1893             html = (
1894                 f'<div id="signature"><p>{htmlsig.format(path=fakepath2)}</p></div>'
1895             )
1896
1897             sig_f = fakefilefactory(fakepath2, content=html)
1898
1899             def mdwn_fn(t):
1900                 return t.upper()
1901
1902             with (
1903                 fakefilefactory(
1904                     fakepath, content="".join(mailparts)
1905                 ) as draft_f,
1906                 monkeypatch.context() as m,
1907             ):
1908                 m.setattr(
1909                     markdown.Markdown, "convert", lambda s, t: mdwn_fn(t)
1910                 )
1911                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
1912
1913             soup = bs4.BeautifulSoup(
1914                 fakefilefactory[fakepath.with_suffix(".html")].read(),
1915                 "html.parser",
1916             )
1917             sig = soup.select_one("#signature")
1918             sig.span.extract()
1919
1920             assert HTML_SIG_MARKER not in sig.text
1921             assert htmlsig.format(path=fakepath2) == sig.text.strip()
1922
1923             plaintext = fakefilefactory[fakepath].read()
1924             assert plaintext.endswith(EMAIL_SIG_SEP + mailparts[-1])
1925
1926         @pytest.mark.converter
1927         @pytest.mark.sig
1928         def test_converter_signature_handling_htmlsig_with_image(
1929             self, fakepath, fakepath2, fakefilefactory, monkeypatch, test_png
1930         ):
1931             mailparts = (
1932                 "This is the mail body",
1933                 f"{EMAIL_SIG_SEP}",
1934                 f"{HTML_SIG_MARKER}{fakepath2}\n",
1935                 "This is the plain-text version",
1936             )
1937             htmlsig = (
1938                 "HTML Signature from {path} with image\n"
1939                 f'<img src="{test_png}">\n'
1940             )
1941             html = (
1942                 f'<div id="signature">{htmlsig.format(path=fakepath2)}</div>'
1943             )
1944
1945             sig_f = fakefilefactory(fakepath2, content=html)
1946
1947             def mdwn_fn(t):
1948                 return t.upper()
1949
1950             with (
1951                 fakefilefactory(
1952                     fakepath, content="".join(mailparts)
1953                 ) as draft_f,
1954                 monkeypatch.context() as m,
1955             ):
1956                 m.setattr(
1957                     markdown.Markdown, "convert", lambda s, t: mdwn_fn(t)
1958                 )
1959                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
1960
1961             assert fakefilefactory.pop()[0].suffix == ".png"
1962
1963             soup = bs4.BeautifulSoup(
1964                 fakefilefactory[fakepath.with_suffix(".html")].read(),
1965                 "html.parser",
1966             )
1967             assert soup.img.attrs["src"].startswith("cid:")
1968
1969         @pytest.mark.converter
1970         @pytest.mark.sig
1971         def test_converter_signature_handling_textsig_with_image(
1972             self, fakepath, fakefilefactory, test_png
1973         ):
1974             mailparts = (
1975                 "This is the mail body",
1976                 f"{EMAIL_SIG_SEP}",
1977                 "This is the plain-text version with image\n",
1978                 f"![Inline]({test_png})",
1979             )
1980             with (
1981                 fakefilefactory(
1982                     fakepath, content="".join(mailparts)
1983                 ) as draft_f,
1984             ):
1985                 tree = convert_markdown_to_html(
1986                     draft_f, filefactory=fakefilefactory
1987                 )
1988
1989             assert tree.subtype == "relative"
1990             assert tree.children[0].subtype == "alternative"
1991             assert tree.children[1].subtype == "png"
1992             written = fakefilefactory.pop()
1993             assert tree.children[1].path == written[0]
1994             assert written[1].read() == request.urlopen(test_png).read()
1995
1996         @pytest.mark.converter
1997         def test_converter_attribution_to_admonition(
1998             self, fakepath, fakefilefactory
1999         ):
2000             mailparts = (
2001                 "Regarding whatever",
2002                 "> blockquote line1",
2003                 "> blockquote line2",
2004                 "> ",
2005                 "> new para with **bold** text",
2006             )
2007             with fakefilefactory(
2008                 fakepath, content="\n".join(mailparts)
2009             ) as draft_f:
2010                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
2011
2012             soup = bs4.BeautifulSoup(
2013                 fakefilefactory[fakepath.with_suffix(".html")].read(),
2014                 "html.parser",
2015             )
2016             quote = soup.select_one("div.admonition.quote")
2017             assert quote
2018             assert (
2019                 soup.select_one("p.admonition-title").extract().text.strip()
2020                 == mailparts[0]
2021             )
2022
2023             p = quote.p.extract()
2024             assert p.text.strip() == "\n".join(p[2:] for p in mailparts[1:3])
2025
2026             p = quote.p.extract()
2027             assert p.contents[1].name == "strong"
2028
2029         @pytest.mark.converter
2030         def test_converter_attribution_to_admonition_multiple(
2031             self, fakepath, fakefilefactory
2032         ):
2033             mailparts = (
2034                 "Regarding whatever",
2035                 "> blockquote line1",
2036                 "> blockquote line2",
2037                 "",
2038                 "Normal text",
2039                 "",
2040                 "> continued emailquote",
2041                 "",
2042                 "Another email-quote",
2043                 "> something",
2044             )
2045             with fakefilefactory(
2046                 fakepath, content="\n".join(mailparts)
2047             ) as draft_f:
2048                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
2049
2050             soup = bs4.BeautifulSoup(
2051                 fakefilefactory[fakepath.with_suffix(".html")].read(),
2052                 "html.parser",
2053             )
2054             quote = soup.select_one("div.admonition.quote.continued").extract()
2055             assert quote
2056             assert (
2057                 quote.select_one("p.admonition-title").extract().text.strip()
2058                 == mailparts[0]
2059             )
2060
2061             p = quote.p.extract()
2062             assert p
2063
2064             quote = soup.select_one("div.admonition.quote.continued").extract()
2065             assert quote
2066             assert (
2067                 quote.select_one("p.admonition-title").extract().text.strip()
2068                 == mailparts[-2]
2069             )
2070
2071         @pytest.mark.fileio
2072         def test_file_class_contextmanager(self, const1, monkeypatch):
2073             state = dict(o=False, c=False)
2074
2075             def fn(t):
2076                 state[t] = True
2077
2078             with monkeypatch.context() as m:
2079                 m.setattr(File, "open", lambda s: fn("o"))
2080                 m.setattr(File, "close", lambda s: fn("c"))
2081                 with File() as f:
2082                     assert state["o"]
2083                     assert not state["c"]
2084             assert state["c"]
2085
2086         @pytest.mark.fileio
2087         def test_file_class_no_path(self, const1):
2088             with File(mode="w+") as f:
2089                 f.write(const1, cache=False)
2090                 assert f.read(cache=False) == const1
2091
2092         @pytest.mark.fileio
2093         def test_file_class_path(self, const1, tmp_path):
2094             with File(tmp_path / "file", mode="w+") as f:
2095                 f.write(const1, cache=False)
2096                 assert f.read(cache=False) == const1
2097
2098         @pytest.mark.fileio
2099         def test_file_class_cache(self, tmp_path, const1, const2):
2100             path = tmp_path / "file"
2101             file = File(path, mode="w+")
2102             with file as f:
2103                 f.write(const1, cache=True)
2104             with open(path, mode="w") as f:
2105                 f.write(const2)
2106             with file as f:
2107                 assert f.read(cache=True) == const1
2108
2109         @pytest.mark.fileio
2110         def test_file_class_cache_init(self, const1):
2111             file = File(path=None, mode="r", content=const1)
2112             with file as f:
2113                 assert f.read() == const1
2114
2115         @pytest.mark.fileio
2116         def test_file_class_content_or_path(self, fakepath, const1):
2117             with pytest.raises(RuntimeError):
2118                 file = File(path=fakepath, content=const1)
2119
2120         @pytest.mark.fileio
2121         def test_file_class_content_needs_read(self, const1):
2122             with pytest.raises(RuntimeError):
2123                 file = File(mode="w", content=const1)
2124
2125         @pytest.mark.fileio
2126         def test_file_class_write_persists_close(self, const1):
2127             f = File(mode="w+")
2128             with f:
2129                 f.write(const1)
2130             with f:
2131                 assert f.read() == const1
2132
2133         @pytest.mark.fileio
2134         def test_file_class_write_resets_read_cache(self, const1, const2):
2135             with File(mode="w+", content=const1) as f:
2136                 assert f.read() == const1
2137                 f.write(const2)
2138                 assert f.read() == const2
2139
2140         @pytest.mark.fileio
2141         def test_file_factory(self):
2142             fact = FileFactory()
2143             f = fact()
2144             assert isinstance(f, File)
2145             assert len(fact) == 1
2146             assert f in fact
2147             assert f == fact[0]
2148
2149         @pytest.mark.fileio
2150         def test_fake_file_factory(self, fakepath, fakefilefactory):
2151             fact = FakeFileFactory()
2152             f = fakefilefactory(fakepath)
2153             assert f.path == fakepath
2154             assert f == fakefilefactory[fakepath]
2155
2156         @pytest.mark.fileio
2157         def test_fake_file_factory_path_persistence(
2158             self, fakepath, fakefilefactory
2159         ):
2160             f1 = fakefilefactory(fakepath)
2161             assert f1 == fakefilefactory(fakepath)
2162
2163 except ImportError:
2164     pass