]> git.madduck.net Git - etc/neomutt.git/blob - .config/neomutt/buildmimetree.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

buildmimetree.py: housekeeping at the end of commands
[etc/neomutt.git] / .config / neomutt / buildmimetree.py
1 #!/usr/bin/python3
2 #
3 # NeoMutt helper script to create multipart/* emails with Markdown → HTML
4 # alternative conversion, and handling of inline images, using NeoMutt's
5 # ability to manually craft MIME trees, but automating this process.
6 #
7 # Configuration:
8 #   neomuttrc (needs to be a single line):
9 #     set my_mdwn_extensions="extra,admonition,codehilite,sane_lists,smarty"
10 #     macro compose B "\
11 #       <enter-command> source '$my_confdir/buildmimetree.py \
12 #       --tempdir $tempdir --extensions $my_mdwn_extensions \
13 #       --css-file $my_confdir/htmlmail.css |'<enter>\
14 #       <enter-command> sourc e \$my_mdwn_postprocess_cmd_file<enter>\
15 #     " "Convert message into a modern MIME tree with inline images"
16 #
17 #     (Yes, we need to call source twice, as mutt only starts to process output
18 #     from a source command when the command exits, and since we need to react
19 #     to the output, we need to be invoked again, using a $my_ variable to pass
20 #     information)
21 #
22 # Requirements:
23 #   - python3
24 #   - python3-markdown
25 #   - python3-beautifulsoup4
26 # Optional:
27 #   - pytest
28 #   - Pynliner, provides --css-file and thus inline styling of HTML output
29 #   - Pygments, then syntax highlighting for fenced code is enabled
30 #
31 # Latest version:
32 #   https://git.madduck.net/etc/neomutt.git/blob_plain/HEAD:/.config/neomutt/buildmimetree.py
33 #
34 # Copyright © 2023 martin f. krafft <madduck@madduck.net>
35 # Released under the GPL-2+ licence, just like Mutt itself.
36 #
37
38 import sys
39 import os.path
40 import pathlib
41 import markdown
42 import tempfile
43 import argparse
44 import re
45 import mimetypes
46 import bs4
47 import xml.etree.ElementTree as etree
48 import io
49 import enum
50 from collections import namedtuple, OrderedDict
51 from markdown.extensions import Extension
52 from markdown.blockprocessors import BlockProcessor
53 from markdown.inlinepatterns import ImageInlineProcessor, IMAGE_LINK_RE
54 from email.utils import make_msgid
55 from urllib import request
56
57
58 def parse_cli_args(*args, **kwargs):
59     parser = argparse.ArgumentParser(
60         description=(
61             "NeoMutt helper to turn text/markdown email parts "
62             "into full-fledged MIME trees"
63         )
64     )
65     parser.epilog = (
66         "Copyright © 2023 martin f. krafft <madduck@madduck.net>.\n"
67         "Released under the MIT licence"
68     )
69
70     parser.add_argument(
71         "--extensions",
72         metavar="EXT[,EXT[,EXT]]",
73         type=str,
74         default="",
75         help="Markdown extension to use (comma-separated list)",
76     )
77
78     if _PYNLINER:
79         parser.add_argument(
80             "--css-file",
81             metavar="FILE",
82             type=pathlib.Path,
83             default=os.devnull,
84             help="CSS file to merge with the final HTML",
85         )
86     else:
87         parser.set_defaults(css_file=None)
88
89     parser.add_argument(
90         "--related-to-html-only",
91         action="store_true",
92         help="Make related content be sibling to HTML parts only",
93     )
94
95     def positive_integer(value):
96         try:
97             if int(value) > 0:
98                 return int(value)
99
100         except ValueError:
101             pass
102
103         raise ValueError("Must be a positive integer")
104
105     parser.add_argument(
106         "--max-number-other-attachments",
107         metavar="INTEGER",
108         type=positive_integer,
109         default=20,
110         help="Maximum number of other attachments to expect",
111     )
112
113     parser.add_argument(
114         "--only-build",
115         "--just-build",
116         action="store_true",
117         help="Only build, don't send the message",
118     )
119
120     parser.add_argument(
121         "--tempdir",
122         metavar="DIR",
123         type=pathlib.Path,
124         help="Specify temporary directory to use for attachments",
125     )
126
127     parser.add_argument(
128         "--debug-commands",
129         action="store_true",
130         help="Turn on debug logging of commands generated to stderr",
131     )
132
133     parser.add_argument(
134         "--debug-walk",
135         action="store_true",
136         help="Turn on debugging to stderr of the MIME tree walk",
137     )
138
139     parser.add_argument(
140         "--dump-html",
141         metavar="FILE",
142         type=pathlib.Path,
143         help="Write the generated HTML to the file",
144     )
145
146     subp = parser.add_subparsers(help="Sub-command parsers", dest="mode")
147     massage_p = subp.add_parser(
148         "massage", help="Massaging phase (internal use)"
149     )
150
151     massage_p.add_argument(
152         "--write-commands-to",
153         "-o",
154         metavar="FILE",
155         dest="cmdpath",
156         type=pathlib.Path,
157         required=True,
158         help="Temporary file path to write commands to",
159     )
160
161     massage_p.add_argument(
162         "MAILDRAFT",
163         nargs="?",
164         type=pathlib.Path,
165         help="If provided, the script is invoked as editor on the mail draft",
166     )
167
168     return parser.parse_args(*args, **kwargs)
169
170
171 # [ FILE I/O HANDLING ] #######################################################
172
173
174 class File:
175
176     class Op(enum.Enum):
177         R = enum.auto()
178         W = enum.auto()
179
180     def __init__(self, path=None, mode="r", content=None, **kwargs):
181         if path:
182             if content:
183                 raise RuntimeError("Cannot specify path and content for File")
184
185             self._path = (
186                 path if isinstance(path, pathlib.Path) else pathlib.Path(path)
187             )
188         else:
189             self._path = None
190
191         if content and not re.search(r"[r+]", mode):
192             raise RuntimeError("Cannot specify content without read mode")
193
194         self._cache = {
195             File.Op.R: [content] if content else [],
196             File.Op.W: []
197         }
198         self._lastop = None
199         self._mode = mode
200         self._kwargs = kwargs
201         self._file = None
202
203     def open(self):
204         if self._path:
205             self._file = open(self._path, self._mode, **self._kwargs)
206         elif "b" in self._mode:
207             self._file = io.BytesIO()
208         else:
209             self._file = io.StringIO()
210
211     def __enter__(self):
212         self.open()
213         return self
214
215     def __exit__(self, exc_type, exc_val, exc_tb):
216         self.close()
217
218     def close(self):
219         self._file.close()
220         self._file = None
221         self._cache[File.Op.R] = self._cache[File.Op.W]
222         self._lastop = None
223
224     def _get_cache(self, op):
225         return (b"" if "b" in self._mode else "").join(self._cache[op])
226
227     def _add_to_cache(self, op, s):
228         self._cache[op].append(s)
229
230     def read(self, *, cache=True):
231         if cache and self._cache[File.Op.R]:
232             return self._get_cache(File.Op.R)
233
234         if not self._file:
235             with self as f:
236                 return f.read(cache=cache)
237
238         if self._lastop == File.Op.W:
239             try:
240                 self._file.seek(0)
241             except io.UnsupportedOperation:
242                 pass
243
244         self._lastop = File.Op.R
245
246         if cache:
247             self._add_to_cache(File.Op.R, self._file.read())
248             return self._get_cache(File.Op.R)
249         else:
250             return self._file.read()
251
252     def write(self, s, *, cache=True):
253
254         if not self._file:
255             with self as f:
256                 return f.write(s, cache=cache)
257
258         if self._lastop == File.Op.R:
259             try:
260                 self._file.seek(0)
261             except io.UnsupportedOperation:
262                 pass
263
264         if cache:
265             self._add_to_cache(File.Op.W, s)
266
267         self._cache[File.Op.R] = self._cache[File.Op.W]
268
269         written = self._file.write(s)
270         self._file.flush()
271         self._lastop = File.Op.W
272         return written
273
274     path = property(lambda s: s._path)
275
276     def __repr__(self):
277         return (
278             f'<File path={self._path or "(buffered)"} open={bool(self._file)} '
279             f"rcache={sum(len(c) for c in self._rcache) if self._rcache is not None else False} "
280             f"wcache={sum(len(c) for c in self._wcache) if self._wcache is not None else False}>"
281         )
282
283
284 class FileFactory:
285     def __init__(self):
286         self._files = []
287
288     def __call__(self, path=None, mode="r", content=None, **kwargs):
289         f = File(path, mode, content, **kwargs)
290         self._files.append(f)
291         return f
292
293     def __len__(self):
294         return self._files.__len__()
295
296     def pop(self, idx=-1):
297         return self._files.pop(idx)
298
299     def __getitem__(self, idx):
300         return self._files.__getitem__(idx)
301
302     def __contains__(self, f):
303         return self._files.__contains__(f)
304
305
306 class FakeFileFactory(FileFactory):
307     def __init__(self):
308         super().__init__()
309         self._paths2files = OrderedDict()
310
311     def __call__(self, path=None, mode="r", content=None, **kwargs):
312         if path in self._paths2files:
313             return self._paths2files[path]
314
315         f = super().__call__(None, mode, content, **kwargs)
316         self._paths2files[path] = f
317
318         mypath = path
319
320         class FakeFile(File):
321             path = mypath
322
323         # this is quality Python! We do this so that the fake file, which has
324         # no path, fake-pretends to have a path for testing purposes.
325
326         f.__class__ = FakeFile
327         return f
328
329     def __getitem__(self, path):
330         return self._paths2files.__getitem__(path)
331
332     def get(self, path, default):
333         return self._paths2files.get(path, default)
334
335     def pop(self, last=True):
336         return self._paths2files.popitem(last)
337
338     def __repr__(self):
339         return (
340             f"<FakeFileFactory nfiles={len(self._files)} "
341             f"paths={len(self._paths2files)}>"
342         )
343
344
345 # [ IMAGE HANDLING ] ##########################################################
346
347
348 InlineImageInfo = namedtuple(
349     "InlineImageInfo", ["cid", "desc"], defaults=[None]
350 )
351
352
353 class ImageRegistry:
354     def __init__(self):
355         self._images = OrderedDict()
356
357     def register(self, path, description=None):
358         # path = str(pathlib.Path(path).expanduser())
359         path = os.path.expanduser(path)
360         if path.startswith("/"):
361             path = f"file://{path}"
362         cid = make_msgid()[1:-1]
363         self._images[path] = InlineImageInfo(cid, description)
364         return cid
365
366     def __iter__(self):
367         return self._images.__iter__()
368
369     def __getitem__(self, idx):
370         return self._images.__getitem__(idx)
371
372     def __len__(self):
373         return self._images.__len__()
374
375     def items(self):
376         return self._images.items()
377
378     def __repr__(self):
379         return f"<ImageRegistry(items={len(self._images)})>"
380
381     def __str__(self):
382         return self._images.__str__()
383
384
385 class InlineImageExtension(Extension):
386     class RelatedImageInlineProcessor(ImageInlineProcessor):
387         def __init__(self, re, md, registry):
388             super().__init__(re, md)
389             self._registry = registry
390
391         def handleMatch(self, m, data):
392             el, start, end = super().handleMatch(m, data)
393             if "src" in el.attrib:
394                 src = el.attrib["src"]
395                 if "://" not in src or src.startswith("file://"):
396                     # We only inline local content
397                     cid = self._registry.register(
398                         el.attrib["src"],
399                         el.attrib.get("title", el.attrib.get("alt")),
400                     )
401                     el.attrib["src"] = f"cid:{cid}"
402             return el, start, end
403
404     def __init__(self, registry):
405         super().__init__()
406         self._image_registry = registry
407
408     INLINE_PATTERN_NAME = "image_link"
409
410     def extendMarkdown(self, md):
411         md.registerExtension(self)
412         inline_image_proc = self.RelatedImageInlineProcessor(
413             IMAGE_LINK_RE, md, self._image_registry
414         )
415         md.inlinePatterns.register(
416             inline_image_proc, InlineImageExtension.INLINE_PATTERN_NAME, 150
417         )
418
419
420 def markdown_with_inline_image_support(
421     text,
422     *,
423     mdwn=None,
424     image_registry=None,
425     extensions=None,
426     extension_configs=None,
427 ):
428     registry = (
429         image_registry if image_registry is not None else ImageRegistry()
430     )
431     inline_image_handler = InlineImageExtension(registry=registry)
432     extensions = extensions or []
433     extensions.append(inline_image_handler)
434     mdwn = markdown.Markdown(
435         extensions=extensions, extension_configs=extension_configs
436     )
437
438     htmltext = mdwn.convert(text)
439
440     def replace_image_with_cid(matchobj):
441         for m in (matchobj.group(1), f"file://{matchobj.group(1)}"):
442             if m in registry:
443                 return f"(cid:{registry[m].cid}"
444         return matchobj.group(0)
445
446     text = re.sub(r"\(([^)\s]+)", replace_image_with_cid, text)
447     return text, htmltext, registry, mdwn
448
449
450 # [ CSS STYLING ] #############################################################
451
452
453 try:
454     import pynliner
455
456     _PYNLINER = True
457
458 except ImportError:
459     _PYNLINER = False
460
461 try:
462     from pygments.formatters import get_formatter_by_name
463
464     _CODEHILITE_CLASS = "codehilite"
465
466     _PYGMENTS_CSS = get_formatter_by_name(
467         "html", style="default"
468     ).get_style_defs(f".{_CODEHILITE_CLASS}")
469
470 except ImportError:
471     _PYGMENTS_CSS = None
472
473
474 def apply_styling(html, css):
475     return (
476         pynliner.Pynliner()
477         .from_string(html)
478         .with_cssString("\n".join(s for s in [_PYGMENTS_CSS, css] if s))
479         .run()
480     )
481
482
483 # [ QUOTE HANDLING ] ##########################################################
484
485
486 class QuoteToAdmonitionExtension(Extension):
487     class EmailQuoteBlockProcessor(BlockProcessor):
488         RE = re.compile(r"(?:^|\n)>\s*(.*)")
489
490         def __init__(self, parser):
491             super().__init__(parser)
492             self._title = None
493
494         def test(self, parent, blocks):
495             if markdown.util.nearing_recursion_limit():
496                 return False
497
498             lines = blocks.splitlines()
499             if len(lines) < 2:
500                 if not self._title:
501                     return False
502
503                 elif not self.RE.search(lines[0]):
504                     return False
505
506                 return len(lines) > 0
507
508             elif not self.RE.search(lines[0]) and self.RE.search(lines[1]):
509                 return True
510
511             elif self._title and self.RE.search(lines[1]):
512                 return True
513
514             return False
515
516         def run(self, parent, blocks):
517             quotelines = blocks.pop(0).splitlines()
518
519             cont = bool(self._title)
520             if not self.RE.search(quotelines[0]):
521                 self._title = quotelines.pop(0)
522
523             admonition = etree.SubElement(parent, "div")
524             admonition.set(
525                 "class", f"admonition quote{' continued' if cont else ''}"
526             )
527             self.parser.parseChunk(admonition, self._title)
528
529             admonition[0].set("class", "admonition-title")
530             self.parser.parseChunk(
531                 admonition, "\n".join(self.clean(line) for line in quotelines)
532             )
533
534         @classmethod
535         def clean(klass, line):
536             m = klass.RE.match(line)
537             return m.group(1) if m else line
538
539     def extendMarkdown(self, md):
540         md.registerExtension(self)
541         email_quote_proc = self.EmailQuoteBlockProcessor(md.parser)
542         md.parser.blockprocessors.register(email_quote_proc, "emailquote", 25)
543
544
545 # [ PARTS GENERATION ] ########################################################
546
547
548 class Part(
549     namedtuple(
550         "Part",
551         ["type", "subtype", "path", "desc", "cid", "orig"],
552         defaults=[None, None, False],
553     )
554 ):
555     def __str__(self):
556         ret = f"<{self.type}/{self.subtype}>"
557         if self.cid:
558             ret = f"{ret} cid:{self.cid}"
559         if self.orig:
560             ret = f"{ret} ORIGINAL"
561         return ret
562
563
564 class Multipart(
565     namedtuple("Multipart", ["subtype", "children", "desc"], defaults=[None])
566 ):
567     def __str__(self):
568         return f"<multipart/{self.subtype}> children={len(self.children)}"
569
570     def __hash__(self):
571         return hash(str(self.subtype) + "".join(str(self.children)))
572
573
574 def collect_inline_images(
575     image_registry, *, tempdir=None, filefactory=FileFactory()
576 ):
577     relparts = []
578     for path, info in image_registry.items():
579         if path.startswith("cid:"):
580             continue
581
582         data = request.urlopen(path)
583
584         mimetype = data.headers["Content-Type"]
585         ext = mimetypes.guess_extension(mimetype)
586         tempfilename = tempfile.mkstemp(prefix="img", suffix=ext, dir=tempdir)
587         path = pathlib.Path(tempfilename[1])
588
589         with filefactory(path, "w+b") as out_f:
590             out_f.write(data.read())
591
592         # filewriter_fn(path, data.read(), "w+b")
593
594         desc = (
595             f'Inline image: "{info.desc}"'
596             if info.desc
597             else f"Inline image {str(len(relparts)+1)}"
598         )
599         relparts.append(
600             Part(*mimetype.split("/"), path, cid=info.cid, desc=desc)
601         )
602
603     return relparts
604
605
606 EMAIL_SIG_SEP = "\n-- \n"
607 HTML_SIG_MARKER = "=htmlsig "
608
609
610 def make_html_doc(body, sig=None):
611     ret = (
612         "<!DOCTYPE html>\n"
613         "<html>\n"
614         "<head>\n"
615         '<meta http-equiv="content-type" content="text/html; charset=UTF-8">\n'  # noqa: E501
616         '<meta name="viewport" content="width=device-width, initial-scale=1.0">\n'  # noqa: E501
617         "</head>\n"
618         "<body>\n"
619         f"{body}\n"
620     )
621
622     if sig:
623         nl = "\n"
624         ret = (
625             f'{ret}<div id="signature"><span class="sig_separator">{EMAIL_SIG_SEP.strip(nl)}</span>\n'  # noqa: E501
626             f"{sig}\n"
627             "</div>"
628         )
629
630     return f"{ret}\n  </body>\n</html>"
631
632
633 def make_text_mail(text, sig=None):
634     return EMAIL_SIG_SEP.join((text, sig)) if sig else text
635
636
637 def extract_signature(text, *, filefactory=FileFactory()):
638     parts = text.split(EMAIL_SIG_SEP, 1)
639     if len(parts) == 1:
640         return text, None, None
641
642     lines = parts[1].splitlines()
643     if lines[0].startswith(HTML_SIG_MARKER):
644         path = pathlib.Path(re.split(r" +", lines.pop(0), maxsplit=1)[1])
645         textsig = "\n".join(lines)
646
647         sig_input = filefactory(path.expanduser()).read()
648         soup = bs4.BeautifulSoup(sig_input, "html.parser")
649
650         style = str(soup.style.extract()) if soup.style else ""
651         for sig_selector in (
652             "#signature",
653             "#signatur",
654             "#emailsig",
655             ".signature",
656             ".signatur",
657             ".emailsig",
658             "body",
659             "div",
660         ):
661             sig = soup.select_one(sig_selector)
662             if sig:
663                 break
664
665         if not sig:
666             return parts[0], textsig, style + sig_input
667
668         if sig.attrs.get("id") == "signature":
669             sig = "".join(str(c) for c in sig.children)
670
671         return parts[0], textsig, style + str(sig)
672
673     return parts[0], parts[1], None
674
675
676 def convert_markdown_to_html(
677     draft_f,
678     *,
679     related_to_html_only=False,
680     css_f=None,
681     htmldump_f=None,
682     filefactory=FileFactory(),
683     tempdir=None,
684     extensions=None,
685     extension_configs=None,
686 ):
687     # TODO extension_configs need to be handled differently
688     extension_configs = extension_configs or {}
689     extension_configs.setdefault("pymdownx.highlight", {})[
690         "css_class"
691     ] = _CODEHILITE_CLASS
692
693     extensions = extensions or []
694     extensions.append(QuoteToAdmonitionExtension())
695
696     draft = draft_f.read()
697     origtext, textsig, htmlsig = extract_signature(
698         draft, filefactory=filefactory
699     )
700
701     (
702         origtext,
703         htmltext,
704         image_registry,
705         mdwn,
706     ) = markdown_with_inline_image_support(
707         origtext, extensions=extensions, extension_configs=extension_configs
708     )
709
710     if htmlsig:
711         if not textsig:
712             # TODO: decide what to do if there is no plain-text version
713             raise NotImplementedError("HTML signature but no text alternative")
714
715         soup = bs4.BeautifulSoup(htmlsig, "html.parser")
716         for img in soup.find_all("img"):
717             uri = img.attrs["src"]
718             desc = img.attrs.get("title", img.attrs.get("alt"))
719             cid = image_registry.register(uri, desc)
720             img.attrs["src"] = f"cid:{cid}"
721
722         htmlsig = str(soup)
723
724     elif textsig:
725         (
726             textsig,
727             htmlsig,
728             image_registry,
729             mdwn,
730         ) = markdown_with_inline_image_support(
731             textsig,
732             extensions=extensions,
733             extension_configs=extension_configs,
734             image_registry=image_registry,
735             mdwn=mdwn,
736         )
737
738     origtext = make_text_mail(origtext, textsig)
739     draft_f.write(origtext)
740     textpart = Part(
741         "text", "plain", draft_f.path, "Plain-text version", orig=True
742     )
743
744     htmltext = make_html_doc(htmltext, htmlsig)
745     htmltext = apply_styling(htmltext, css_f.read() if css_f else None)
746
747     if draft_f.path:
748         htmlpath = draft_f.path.with_suffix(".html")
749     else:
750         htmlpath = pathlib.Path(
751             tempfile.mkstemp(suffix=".html", dir=tempdir)[1]
752         )
753     with filefactory(
754         htmlpath, "w", encoding="utf-8", errors="xmlcharrefreplace"
755     ) as out_f:
756         out_f.write(htmltext)
757     htmlpart = Part("text", "html", htmlpath, "HTML version")
758
759     if htmldump_f:
760         htmldump_f.write(htmltext)
761
762     imgparts = collect_inline_images(
763         image_registry, tempdir=tempdir, filefactory=filefactory
764     )
765
766     if related_to_html_only:
767         # If there are inline image part, they will be contained within a
768         # multipart/related part along with the HTML part only
769         if imgparts:
770             # replace htmlpart with a multipart/related container of the HTML
771             # parts and the images
772             htmlpart = Multipart(
773                 "relative", [htmlpart] + imgparts, "Group of related content"
774             )
775
776         return Multipart(
777             "alternative", [textpart, htmlpart], "Group of alternative content"
778         )
779
780     else:
781         # If there are inline image part, they will be siblings to the
782         # multipart/alternative tree within a multipart/related part
783         altpart = Multipart(
784             "alternative", [textpart, htmlpart], "Group of alternative content"
785         )
786         if imgparts:
787             return Multipart(
788                 "relative", [altpart] + imgparts, "Group of related content"
789             )
790         else:
791             return altpart
792
793
794 class MIMETreeDFWalker:
795     def __init__(self, *, visitor_fn=None, debug=False):
796         self._visitor_fn = visitor_fn or self._echovisit
797         self._debug = debug
798
799     def _echovisit(self, node, ancestry, debugprint):
800         debugprint(f"node={node} ancestry={ancestry}")
801
802     def walk(self, root, *, visitor_fn=None):
803         """
804         Recursive function to implement a depth-dirst walk of the MIME-tree
805         rooted at `root`.
806         """
807         if isinstance(root, list):
808             if len(root) > 1:
809                 root = Multipart("mixed", children=root)
810             else:
811                 root = root[0]
812
813         self._walk(
814             root,
815             ancestry=[],
816             descendents=[],
817             visitor_fn=visitor_fn or self._visitor_fn,
818         )
819
820     def _walk(self, node, *, ancestry, descendents, visitor_fn):
821         # Let's start by enumerating the parts at the current level. At the
822         # root level, ancestry will be the empty list, and we expect a
823         # multipart/* container at this level. Later, e.g. within a
824         # mutlipart/alternative container, the subtree will just be the
825         # alternative parts, while the top of the ancestry will be the
826         # multipart/alternative container, which we will process after the
827         # following loop.
828
829         lead = f"{'│ '*len(ancestry)}"
830         if isinstance(node, Multipart):
831             self.debugprint(
832                 f"{lead}├{node} ancestry={[s.subtype for s in ancestry]}"
833             )
834
835             # Depth-first, so push the current container onto the ancestry
836             # stack, then descend …
837             ancestry.append(node)
838             self.debugprint(lead + "│ " * 2)
839             for child in node.children:
840                 self._walk(
841                     child,
842                     ancestry=ancestry,
843                     descendents=descendents,
844                     visitor_fn=visitor_fn,
845                 )
846             assert ancestry.pop() == node
847             sibling_descendents = descendents
848             descendents.extend(node.children)
849
850         else:
851             self.debugprint(f"{lead}├{node}")
852             sibling_descendents = descendents
853
854         if False and ancestry:
855             self.debugprint(lead[:-1] + " │")
856
857         if visitor_fn:
858             visitor_fn(
859                 node, ancestry, sibling_descendents, debugprint=self.debugprint
860             )
861
862     def debugprint(self, s, **kwargs):
863         if self._debug:
864             print(s, file=sys.stderr, **kwargs)
865
866
867 # [ RUN MODES ] ###############################################################
868
869
870 class MuttCommands:
871     """
872     Stupid class to interface writing out Mutt commands. This is quite a hack
873     to deal with the fact that Mutt runs "push" commands in reverse order, so
874     all of a sudden, things become very complicated when mixing with "real"
875     commands.
876
877     Hence we keep two sets of commands, and one set of pushes. Commands are
878     added to the first until a push is added, after which commands are added to
879     the second set of commands.
880
881     On flush(), the first set is printed, followed by the pushes in reverse,
882     and then the second set is printed. All 3 sets are then cleared.
883     """
884
885     def __init__(self, out_f=sys.stdout, *, debug=False):
886         self._cmd1, self._push, self._cmd2 = [], [], []
887         self._out_f = out_f
888         self._debug = debug
889
890     def cmd(self, s):
891         self.debugprint(s)
892         if self._push:
893             self._cmd2.append(s)
894         else:
895             self._cmd1.append(s)
896
897     def push(self, s):
898         s = s.replace('"', r"\"")
899         s = f'push "{s}"'
900         self.debugprint(s)
901         self._push.insert(0, s)
902
903     def flush(self):
904         print(
905             "\n".join(self._cmd1 + self._push + self._cmd2), file=self._out_f
906         )
907         self._cmd1, self._push, self._cmd2 = [], [], []
908
909     def debugprint(self, s, **kwargs):
910         if self._debug:
911             print(s, file=sys.stderr, **kwargs)
912
913
914 def do_setup(
915     *,
916     out_f=sys.stdout,
917     temppath=None,
918     tempdir=None,
919     debug_commands=False,
920 ):
921     temppath = temppath or pathlib.Path(
922         tempfile.mkstemp(prefix="muttmdwn-", dir=tempdir)[1]
923     )
924     cmds = MuttCommands(out_f, debug=debug_commands)
925
926     editor = f"{' '.join(sys.argv)} massage --write-commands-to {temppath}"
927
928     cmds.cmd('set my_editor="$editor"')
929     cmds.cmd('set my_edit_headers="$edit_headers"')
930     cmds.cmd(f'set editor="{editor}"')
931     cmds.cmd("unset edit_headers")
932     cmds.cmd(f"set my_mdwn_postprocess_cmd_file={temppath}")
933     cmds.push("<first-entry><edit-file>")
934     cmds.flush()
935
936
937 def do_massage(
938     draft_f,
939     cmd_f,
940     *,
941     extensions=None,
942     css_f=None,
943     htmldump_f=None,
944     converter=convert_markdown_to_html,
945     related_to_html_only=True,
946     only_build=False,
947     max_other_attachments=20,
948     tempdir=None,
949     debug_commands=False,
950     debug_walk=False,
951 ):
952     # Here's the big picture: we're being invoked as the editor on the email
953     # draft, and whatever commands we write to the file given as cmdpath will
954     # be run by the second source command in the macro definition.
955
956     # Let's start by cleaning up what the setup did (see above), i.e. we
957     # restore the $editor and $edit_headers variables, and also unset the
958     # variable used to identify the command file we're currently writing
959     # to.
960     cmds = MuttCommands(cmd_f, debug=debug_commands)
961
962     extensions = extensions.split(",") if extensions else []
963     tree = converter(
964         draft_f,
965         css_f=css_f,
966         htmldump_f=htmldump_f,
967         related_to_html_only=related_to_html_only,
968         tempdir=tempdir,
969         extensions=extensions,
970     )
971
972     mimetree = MIMETreeDFWalker(debug=debug_walk)
973
974     state = dict(pos=1, tags={}, parts=1)
975
976     def visitor_fn(item, ancestry, descendents, *, debugprint=None):
977         """
978         Visitor function called for every node (part) of the MIME tree,
979         depth-first, and responsible for telling NeoMutt how to assemble
980         the tree.
981         """
982         KILL_LINE = r"\Ca\Ck"
983
984         if isinstance(item, Part):
985             # We've hit a leaf-node, i.e. an alternative or a related part
986             # with actual content.
987
988             # Let's add the part
989             if item.orig:
990                 # The original source already exists in the NeoMutt tree, but
991                 # the underlying file may have been modified, so we need to
992                 # update the encoding, but that's it:
993                 cmds.push("<first-entry>")
994                 cmds.push("<update-encoding>")
995
996                 # We really just need to be able to assume that at this point,
997                 # NeoMutt is at position 1, and that we've processed only this
998                 # part so far. Nevermind about actual attachments, we can
999                 # safely ignore those as they stay at the end.
1000                 assert state["pos"] == 1
1001                 assert state["parts"] == 1
1002             else:
1003                 # … whereas all other parts need to be added, and they're all
1004                 # considered to be temporary and inline:
1005                 cmds.push(f"<attach-file>{item.path}<enter>")
1006                 cmds.push("<toggle-unlink><toggle-disposition>")
1007
1008                 # This added a part at the end of the list of parts, and that's
1009                 # just how many parts we've seen so far, so it's position in
1010                 # the NeoMutt compose list is the count of parts
1011                 state["parts"] += 1
1012                 state["pos"] = state["parts"]
1013
1014             # If the item (including the original) comes with additional
1015             # information, then we might just as well update the NeoMutt
1016             # tree now:
1017             if item.cid:
1018                 cmds.push(f"<edit-content-id>{KILL_LINE}{item.cid}<enter>")
1019
1020             # Now for the biggest hack in this script, which is to handle
1021             # attachments, such as PDFs, that aren't related or alternatives.
1022             # The problem is that when we add an inline image, it always gets
1023             # appended to the list, i.e. inserted *after* other attachments.
1024             # Since we don't know the number of attachments, we also cannot
1025             # infer the postition of the new attachment. Therefore, we bubble
1026             # it all the way to the top, only to then move it down again:
1027             if state["pos"] > 1:  # skip for the first part
1028                 for i in range(max_other_attachments):
1029                     # could use any number here, but has to be larger than the
1030                     # number of possible attachments. The performance
1031                     # difference of using a high number is negligible.
1032                     # Bubble up the new part
1033                     cmds.push("<move-up>")
1034
1035                 # As we push the part to the right position in the list (i.e.
1036                 # the last of the subset of attachments this script added), we
1037                 # must handle the situation that subtrees are skipped by
1038                 # NeoMutt. Hence, the actual number of positions to move down
1039                 # is decremented by the number of descendents so far
1040                 # encountered.
1041                 for i in range(1, state["pos"] - len(descendents)):
1042                     cmds.push("<move-down>")
1043
1044         elif isinstance(item, Multipart):
1045             # This node has children, but we already visited them (see
1046             # above). The tags dictionary of State should contain a list of
1047             # their positions in the NeoMutt compose window, so iterate those
1048             # and tag the parts there:
1049             n_tags = len(state["tags"][item])
1050             for tag in state["tags"][item]:
1051                 cmds.push(f"<jump>{tag}<enter><tag-entry>")
1052
1053             if item.subtype == "alternative":
1054                 cmds.push("<group-alternatives>")
1055             elif item.subtype in ("relative", "related"):
1056                 cmds.push("<group-related>")
1057             elif item.subtype == "multilingual":
1058                 cmds.push("<group-multilingual>")
1059             else:
1060                 raise NotImplementedError(
1061                     f"Handling of multipart/{item.subtype} is not implemented"
1062                 )
1063
1064             state["pos"] -= n_tags - 1
1065             state["parts"] += 1
1066
1067         else:
1068             # We should never get here
1069             raise RuntimeError(f"Type {type(item)} is unexpected: {item}")
1070
1071         # If the item has a description, we might just as well add it
1072         if item.desc:
1073             cmds.push(f"<edit-description>{KILL_LINE}{item.desc}<enter>")
1074
1075         if ancestry:
1076             # If there's an ancestry, record the current (assumed) position in
1077             # the NeoMutt compose window as needed-to-tag by our direct parent
1078             # (i.e. the last item of the ancestry)
1079             state["tags"].setdefault(ancestry[-1], []).append(state["pos"])
1080
1081             lead = "│ " * (len(ancestry) + 1) + "* "
1082             debugprint(
1083                 f"{lead}ancestry={[a.subtype for a in ancestry]}\n"
1084                 f"{lead}descendents={[d.subtype for d in descendents]}\n"
1085                 f"{lead}children_positions={state['tags'][ancestry[-1]]}\n"
1086                 f"{lead}pos={state['pos']}, parts={state['parts']}"
1087             )
1088
1089     # -----------------
1090     # End of visitor_fn
1091
1092     # Let's walk the tree and visit every node with our fancy visitor
1093     # function
1094     mimetree.walk(tree, visitor_fn=visitor_fn)
1095
1096     if not only_build:
1097         cmds.push("<send-message>")
1098
1099     # Finally, cleanup. Since we're responsible for removing the temporary
1100     # file, how's this for a little hack?
1101     try:
1102         filename = cmd_f.name
1103     except AttributeError:
1104         filename = "pytest_internal_file"
1105     cmds.cmd(f"source 'rm -f {filename}|'")
1106     cmds.cmd('set editor="$my_editor"')
1107     cmds.cmd('set edit_headers="$my_edit_headers"')
1108     cmds.cmd("unset my_editor")
1109     cmds.cmd("unset my_edit_headers")
1110     cmds.cmd("unset my_mdwn_postprocess_cmd_file")
1111     cmds.flush()
1112
1113
1114 # [ CLI ENTRY ] ###############################################################
1115
1116 if __name__ == "__main__":
1117     args = parse_cli_args()
1118
1119     if args.mode is None:
1120         do_setup(
1121             tempdir=args.tempdir,
1122             debug_commands=args.debug_commands,
1123         )
1124
1125     elif args.mode == "massage":
1126         with (
1127             File(args.MAILDRAFT, "r+") as draft_f,
1128             File(args.cmdpath, "w") as cmd_f,
1129             File(args.css_file, "r") as css_f,
1130             File(args.dump_html, "w") as htmldump_f,
1131         ):
1132             do_massage(
1133                 draft_f,
1134                 cmd_f,
1135                 extensions=args.extensions,
1136                 css_f=css_f,
1137                 htmldump_f=htmldump_f,
1138                 related_to_html_only=args.related_to_html_only,
1139                 max_other_attachments=args.max_number_other_attachments,
1140                 only_build=args.only_build,
1141                 tempdir=args.tempdir,
1142                 debug_commands=args.debug_commands,
1143                 debug_walk=args.debug_walk,
1144             )
1145
1146
1147 # [ TESTS ] ###################################################################
1148
1149 try:
1150     import pytest
1151
1152     class Tests:
1153         @pytest.fixture
1154         def const1(self):
1155             return "Curvature Vest Usher Dividing+T#iceps Senior"
1156
1157         @pytest.fixture
1158         def const2(self):
1159             return "Habitant Celestial 2litzy Resurf/ce Headpiece Harmonics"
1160
1161         @pytest.fixture
1162         def fakepath(self):
1163             return pathlib.Path("/does/not/exist")
1164
1165         @pytest.fixture
1166         def fakepath2(self):
1167             return pathlib.Path("/does/not/exist/either")
1168
1169         # NOTE: tests using the capsys fixture must specify sys.stdout to the
1170         # functions they call, else old stdout is used and not captured
1171
1172         @pytest.mark.muttctrl
1173         def test_MuttCommands_cmd(self, const1, const2, capsys):
1174             "Assert order of commands"
1175             cmds = MuttCommands(out_f=sys.stdout)
1176             cmds.cmd(const1)
1177             cmds.cmd(const2)
1178             cmds.flush()
1179             captured = capsys.readouterr()
1180             assert captured.out == "\n".join((const1, const2, ""))
1181
1182         @pytest.mark.muttctrl
1183         def test_MuttCommands_push(self, const1, const2, capsys):
1184             "Assert reverse order of pushes"
1185             cmds = MuttCommands(out_f=sys.stdout)
1186             cmds.push(const1)
1187             cmds.push(const2)
1188             cmds.flush()
1189             captured = capsys.readouterr()
1190             assert (
1191                 captured.out
1192                 == ('"\npush "'.join(("", const2, const1, "")))[2:-6]
1193             )
1194
1195         @pytest.mark.muttctrl
1196         def test_MuttCommands_push_escape(self, const1, const2, capsys):
1197             cmds = MuttCommands(out_f=sys.stdout)
1198             cmds.push(f'"{const1}"')
1199             cmds.flush()
1200             captured = capsys.readouterr()
1201             assert f'"\\"{const1}\\""' in captured.out
1202
1203         @pytest.mark.muttctrl
1204         def test_MuttCommands_cmd_push_mixed(self, const1, const2, capsys):
1205             "Assert reverse order of pushes"
1206             cmds = MuttCommands(out_f=sys.stdout)
1207             lines = ["000", "001", "010", "011", "100", "101", "110", "111"]
1208             for i in range(2):
1209                 cmds.cmd(lines[4 * i + 0])
1210                 cmds.cmd(lines[4 * i + 1])
1211                 cmds.push(lines[4 * i + 2])
1212                 cmds.push(lines[4 * i + 3])
1213             cmds.flush()
1214
1215             captured = capsys.readouterr()
1216             lines_out = captured.out.splitlines()
1217             assert lines[0] in lines_out[0]
1218             assert lines[1] in lines_out[1]
1219             assert lines[7] in lines_out[2]
1220             assert lines[6] in lines_out[3]
1221             assert lines[3] in lines_out[4]
1222             assert lines[2] in lines_out[5]
1223             assert lines[4] in lines_out[6]
1224             assert lines[5] in lines_out[7]
1225
1226         @pytest.fixture
1227         def mime_tree_related_to_alternative(self):
1228             return Multipart(
1229                 "relative",
1230                 children=[
1231                     Multipart(
1232                         "alternative",
1233                         children=[
1234                             Part(
1235                                 "text",
1236                                 "plain",
1237                                 "part.txt",
1238                                 desc="Plain",
1239                                 orig=True,
1240                             ),
1241                             Part("text", "html", "part.html", desc="HTML"),
1242                         ],
1243                         desc="Alternative",
1244                     ),
1245                     Part(
1246                         "text", "png", "logo.png", cid="logo.png", desc="Logo"
1247                     ),
1248                 ],
1249                 desc="Related",
1250             )
1251
1252         @pytest.fixture
1253         def mime_tree_related_to_html(self):
1254             return Multipart(
1255                 "alternative",
1256                 children=[
1257                     Part(
1258                         "text",
1259                         "plain",
1260                         "part.txt",
1261                         desc="Plain",
1262                         orig=True,
1263                     ),
1264                     Multipart(
1265                         "relative",
1266                         children=[
1267                             Part("text", "html", "part.html", desc="HTML"),
1268                             Part(
1269                                 "text",
1270                                 "png",
1271                                 "logo.png",
1272                                 cid="logo.png",
1273                                 desc="Logo",
1274                             ),
1275                         ],
1276                         desc="Related",
1277                     ),
1278                 ],
1279                 desc="Alternative",
1280             )
1281
1282         @pytest.fixture
1283         def mime_tree_nested(self):
1284             return Multipart(
1285                 "relative",
1286                 children=[
1287                     Multipart(
1288                         "alternative",
1289                         children=[
1290                             Part(
1291                                 "text",
1292                                 "plain",
1293                                 "part.txt",
1294                                 desc="Plain",
1295                                 orig=True,
1296                             ),
1297                             Multipart(
1298                                 "alternative",
1299                                 children=[
1300                                     Part(
1301                                         "text",
1302                                         "plain",
1303                                         "part.txt",
1304                                         desc="Nested plain",
1305                                     ),
1306                                     Part(
1307                                         "text",
1308                                         "html",
1309                                         "part.html",
1310                                         desc="Nested HTML",
1311                                     ),
1312                                 ],
1313                                 desc="Nested alternative",
1314                             ),
1315                         ],
1316                         desc="Alternative",
1317                     ),
1318                     Part(
1319                         "text",
1320                         "png",
1321                         "logo.png",
1322                         cid="logo.png",
1323                         desc="Logo",
1324                     ),
1325                 ],
1326                 desc="Related",
1327             )
1328
1329         @pytest.mark.treewalk
1330         def test_MIMETreeDFWalker_depth_first_walk(
1331             self, mime_tree_related_to_alternative
1332         ):
1333             mimetree = MIMETreeDFWalker()
1334
1335             items = []
1336
1337             def visitor_fn(item, ancestry, descendents, debugprint):
1338                 items.append((item, len(ancestry), len(descendents)))
1339
1340             mimetree.walk(
1341                 mime_tree_related_to_alternative, visitor_fn=visitor_fn
1342             )
1343             assert len(items) == 5
1344             assert items[0][0].subtype == "plain"
1345             assert items[0][1] == 2
1346             assert items[0][2] == 0
1347             assert items[1][0].subtype == "html"
1348             assert items[1][1] == 2
1349             assert items[1][2] == 0
1350             assert items[2][0].subtype == "alternative"
1351             assert items[2][1] == 1
1352             assert items[2][2] == 2
1353             assert items[3][0].subtype == "png"
1354             assert items[3][1] == 1
1355             assert items[3][2] == 2
1356             assert items[4][0].subtype == "relative"
1357             assert items[4][1] == 0
1358             assert items[4][2] == 4
1359
1360         @pytest.mark.treewalk
1361         def test_MIMETreeDFWalker_list_to_mixed(self, const1):
1362             mimetree = MIMETreeDFWalker()
1363             items = []
1364
1365             def visitor_fn(item, ancestry, descendents, debugprint):
1366                 items.append(item)
1367
1368             p = Part("text", "plain", const1)
1369             mimetree.walk([p], visitor_fn=visitor_fn)
1370             assert items[-1].subtype == "plain"
1371             mimetree.walk([p, p], visitor_fn=visitor_fn)
1372             assert items[-1].subtype == "mixed"
1373
1374         @pytest.mark.treewalk
1375         def test_MIMETreeDFWalker_visitor_in_constructor(
1376             self, mime_tree_related_to_alternative
1377         ):
1378             items = []
1379
1380             def visitor_fn(item, ancestry, descendents, debugprint):
1381                 items.append(item)
1382
1383             mimetree = MIMETreeDFWalker(visitor_fn=visitor_fn)
1384             mimetree.walk(mime_tree_related_to_alternative)
1385             assert len(items) == 5
1386
1387         @pytest.fixture
1388         def string_io(self, const1, text=None):
1389             return StringIO(text or const1)
1390
1391         @pytest.mark.massage
1392         def test_do_massage_basic(self):
1393             def converter(draft_f, **kwargs):
1394                 return Part("text", "plain", draft_f.path, orig=True)
1395
1396             with File() as draft_f, File() as cmd_f:
1397                 do_massage(
1398                     draft_f=draft_f,
1399                     cmd_f=cmd_f,
1400                     converter=converter,
1401                 )
1402                 lines = cmd_f.read().splitlines()
1403
1404             assert "send-message" in lines.pop(0)
1405             assert "update-encoding" in lines.pop(0)
1406             assert "first-entry" in lines.pop(0)
1407             assert "source 'rm -f " in lines.pop(0)
1408             assert '="$my_editor"' in lines.pop(0)
1409             assert '="$my_edit_headers"' in lines.pop(0)
1410             assert "unset my_editor" == lines.pop(0)
1411             assert "unset my_edit_headers" == lines.pop(0)
1412             assert "unset my_mdwn_postprocess_cmd_file" == lines.pop(0)
1413
1414         @pytest.mark.massage
1415         def test_do_massage_fulltree(self, mime_tree_related_to_alternative):
1416             def converter(draft_f, **kwargs):
1417                 return mime_tree_related_to_alternative
1418
1419             max_attachments = 5
1420
1421             with File() as draft_f, File() as cmd_f:
1422                 do_massage(
1423                     draft_f=draft_f,
1424                     cmd_f=cmd_f,
1425                     max_other_attachments=max_attachments,
1426                     converter=converter,
1427                 )
1428                 lines = cmd_f.read().splitlines()[:-6]
1429
1430             assert "first-entry" in lines.pop()
1431             assert "update-encoding" in lines.pop()
1432             assert "Plain" in lines.pop()
1433             assert "part.html" in lines.pop()
1434             assert "toggle-unlink" in lines.pop()
1435             for i in range(max_attachments):
1436                 assert "move-up" in lines.pop()
1437             assert "move-down" in lines.pop()
1438             assert "HTML" in lines.pop()
1439             assert "jump>1" in lines.pop()
1440             assert "jump>2" in lines.pop()
1441             assert "group-alternatives" in lines.pop()
1442             assert "Alternative" in lines.pop()
1443             assert "logo.png" in lines.pop()
1444             assert "toggle-unlink" in lines.pop()
1445             assert "content-id" in lines.pop()
1446             for i in range(max_attachments):
1447                 assert "move-up" in lines.pop()
1448             assert "move-down" in lines.pop()
1449             assert "Logo" in lines.pop()
1450             assert "jump>1" in lines.pop()
1451             assert "jump>4" in lines.pop()
1452             assert "group-related" in lines.pop()
1453             assert "Related" in lines.pop()
1454             assert "send-message" in lines.pop()
1455             assert len(lines) == 0
1456
1457         @pytest.mark.massage
1458         def test_mime_tree_relative_within_alternative(
1459             self, mime_tree_related_to_html
1460         ):
1461             def converter(draft_f, **kwargs):
1462                 return mime_tree_related_to_html
1463
1464             with File() as draft_f, File() as cmd_f:
1465                 do_massage(
1466                     draft_f=draft_f,
1467                     cmd_f=cmd_f,
1468                     converter=converter,
1469                 )
1470                 lines = cmd_f.read().splitlines()[:-6]
1471
1472             assert "first-entry" in lines.pop()
1473             assert "update-encoding" in lines.pop()
1474             assert "Plain" in lines.pop()
1475             assert "part.html" in lines.pop()
1476             assert "toggle-unlink" in lines.pop()
1477             assert "move-up" in lines.pop()
1478             while True:
1479                 top = lines.pop()
1480                 if "move-up" not in top:
1481                     break
1482             assert "move-down" in top
1483             assert "HTML" in lines.pop()
1484             assert "logo.png" in lines.pop()
1485             assert "toggle-unlink" in lines.pop()
1486             assert "content-id" in lines.pop()
1487             assert "move-up" in lines.pop()
1488             while True:
1489                 top = lines.pop()
1490                 if "move-up" not in top:
1491                     break
1492             assert "move-down" in top
1493             assert "move-down" in lines.pop()
1494             assert "Logo" in lines.pop()
1495             assert "jump>2" in lines.pop()
1496             assert "jump>3" in lines.pop()
1497             assert "group-related" in lines.pop()
1498             assert "Related" in lines.pop()
1499             assert "jump>1" in lines.pop()
1500             assert "jump>2" in lines.pop()
1501             assert "group-alternative" in lines.pop()
1502             assert "Alternative" in lines.pop()
1503             assert "send-message" in lines.pop()
1504             assert len(lines) == 0
1505
1506         @pytest.mark.massage
1507         def test_mime_tree_nested_trees_does_not_break_positioning(
1508             self, mime_tree_nested
1509         ):
1510             def converter(draft_f, **kwargs):
1511                 return mime_tree_nested
1512
1513             with File() as draft_f, File() as cmd_f:
1514                 do_massage(
1515                     draft_f=draft_f,
1516                     cmd_f=cmd_f,
1517                     converter=converter,
1518                 )
1519                 lines = cmd_f.read().splitlines()
1520
1521             while "logo.png" not in lines.pop():
1522                 pass
1523             lines.pop()
1524             assert "content-id" in lines.pop()
1525             assert "move-up" in lines.pop()
1526             while True:
1527                 top = lines.pop()
1528                 if "move-up" not in top:
1529                     break
1530             assert "move-down" in top
1531             # Due to the nested trees, the number of descendents of the sibling
1532             # actually needs to be considered, not just the nieces. So to move
1533             # from position 1 to position 6, it only needs one <move-down>
1534             # because that jumps over the entire sibling tree. Thus what
1535             # follows next must not be another <move-down>
1536             assert "Logo" in lines.pop()
1537
1538         @pytest.mark.converter
1539         def test_converter_tree_basic(self, fakepath, const1, fakefilefactory):
1540             draft_f = fakefilefactory(fakepath, content=const1)
1541             tree = convert_markdown_to_html(
1542                 draft_f, filefactory=fakefilefactory
1543             )
1544
1545             assert tree.subtype == "alternative"
1546             assert len(tree.children) == 2
1547             assert tree.children[0].subtype == "plain"
1548             assert tree.children[0].path == draft_f.path
1549             assert tree.children[0].orig
1550             assert tree.children[1].subtype == "html"
1551             assert tree.children[1].path == fakepath.with_suffix(".html")
1552
1553         @pytest.mark.converter
1554         def test_converter_writes(
1555             self, fakepath, fakefilefactory, const1, monkeypatch
1556         ):
1557             draft_f = fakefilefactory(fakepath, content=const1)
1558             convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
1559
1560             html = fakefilefactory.pop()
1561             assert fakepath.with_suffix(".html") == html[0]
1562             assert const1 in html[1].read()
1563             text = fakefilefactory.pop()
1564             assert fakepath == text[0]
1565             assert const1 == text[1].read()
1566
1567         @pytest.mark.imgproc
1568         def test_markdown_inline_image_processor(self):
1569             imgpath1 = "file:/path/to/image.png"
1570             imgpath2 = "file:///path/to/image.png?url=params"
1571             imgpath3 = "/path/to/image.png"
1572             text = f"""![inline local image]({imgpath1})
1573                        ![image inlined
1574                          with newline]({imgpath2})
1575                        ![image local path]({imgpath3})"""
1576             text, html, images, mdwn = markdown_with_inline_image_support(text)
1577
1578             # local paths have been normalised to URLs:
1579             imgpath3 = f"file://{imgpath3}"
1580
1581             assert 'src="cid:' in html
1582             assert "](cid:" in text
1583             assert len(images) == 3
1584             assert imgpath1 in images
1585             assert imgpath2 in images
1586             assert imgpath3 in images
1587             assert images[imgpath1].cid != images[imgpath2].cid
1588             assert images[imgpath1].cid != images[imgpath3].cid
1589             assert images[imgpath2].cid != images[imgpath3].cid
1590
1591         @pytest.mark.imgproc
1592         def test_markdown_inline_image_processor_title_to_desc(self, const1):
1593             imgpath = "file:///path/to/image.png"
1594             text = f'![inline local image]({imgpath} "{const1}")'
1595             text, html, images, mdwn = markdown_with_inline_image_support(text)
1596             assert images[imgpath].desc == const1
1597
1598         @pytest.mark.imgproc
1599         def test_markdown_inline_image_processor_alt_to_desc(self, const1):
1600             imgpath = "file:///path/to/image.png"
1601             text = f"![{const1}]({imgpath})"
1602             text, html, images, mdwn = markdown_with_inline_image_support(text)
1603             assert images[imgpath].desc == const1
1604
1605         @pytest.mark.imgproc
1606         def test_markdown_inline_image_processor_title_over_alt_desc(
1607             self, const1, const2
1608         ):
1609             imgpath = "file:///path/to/image.png"
1610             text = f'![{const1}]({imgpath} "{const2}")'
1611             text, html, images, mdwn = markdown_with_inline_image_support(text)
1612             assert images[imgpath].desc == const2
1613
1614         @pytest.mark.imgproc
1615         def test_markdown_inline_image_not_external(self):
1616             imgpath = "https://path/to/image.png"
1617             text = f"![inline image]({imgpath})"
1618             text, html, images, mdwn = markdown_with_inline_image_support(text)
1619
1620             assert 'src="cid:' not in html
1621             assert "](cid:" not in text
1622             assert len(images) == 0
1623
1624         @pytest.mark.imgproc
1625         def test_markdown_inline_image_local_file(self):
1626             imgpath = "/path/to/image.png"
1627             text = f"![inline image]({imgpath})"
1628             text, html, images, mdwn = markdown_with_inline_image_support(text)
1629
1630             for k, v in images.items():
1631                 assert k == f"file://{imgpath}"
1632                 break
1633
1634         @pytest.mark.imgproc
1635         def test_markdown_inline_image_expanduser(self):
1636             imgpath = pathlib.Path("~/image.png")
1637             text = f"![inline image]({imgpath})"
1638             text, html, images, mdwn = markdown_with_inline_image_support(text)
1639
1640             for k, v in images.items():
1641                 assert k == f"file://{imgpath.expanduser()}"
1642                 break
1643
1644         @pytest.fixture
1645         def test_png(self):
1646             return (
1647                 "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAE"
1648                 "AAAABCAAAAAA6fptVAAAACklEQVQI12P4DwABAQEAG7buVgAA"
1649             )
1650
1651         @pytest.mark.imgproc
1652         def test_markdown_inline_image_processor_base64(self, test_png):
1653             text = f"![1px white inlined]({test_png})"
1654             text, html, images, mdwn = markdown_with_inline_image_support(text)
1655
1656             assert 'src="cid:' in html
1657             assert "](cid:" in text
1658             assert len(images) == 1
1659             assert test_png in images
1660
1661         @pytest.mark.converter
1662         def test_converter_tree_inline_image_base64(
1663             self, test_png, fakefilefactory
1664         ):
1665             text = f"![inline base64 image]({test_png})"
1666             with fakefilefactory(content=text) as draft_f:
1667                 tree = convert_markdown_to_html(
1668                     draft_f,
1669                     filefactory=fakefilefactory,
1670                     related_to_html_only=False,
1671                 )
1672             assert tree.subtype == "relative"
1673             assert tree.children[0].subtype == "alternative"
1674             assert tree.children[1].subtype == "png"
1675             written = fakefilefactory.pop()
1676             assert tree.children[1].path == written[0]
1677             assert b"PNG" in written[1].read()
1678
1679         @pytest.mark.converter
1680         def test_converter_tree_inline_image_base64_related_to_html(
1681             self, test_png, fakefilefactory
1682         ):
1683             text = f"![inline base64 image]({test_png})"
1684             with fakefilefactory(content=text) as draft_f:
1685                 tree = convert_markdown_to_html(
1686                     draft_f,
1687                     filefactory=fakefilefactory,
1688                     related_to_html_only=True,
1689                 )
1690             assert tree.subtype == "alternative"
1691             assert tree.children[1].subtype == "relative"
1692             assert tree.children[1].children[1].subtype == "png"
1693             written = fakefilefactory.pop()
1694             assert tree.children[1].children[1].path == written[0]
1695             assert b"PNG" in written[1].read()
1696
1697         @pytest.mark.converter
1698         def test_converter_tree_inline_image_cid(
1699             self, const1, fakefilefactory
1700         ):
1701             text = f"![inline base64 image](cid:{const1})"
1702             with fakefilefactory(content=text) as draft_f:
1703                 tree = convert_markdown_to_html(
1704                     draft_f,
1705                     filefactory=fakefilefactory,
1706                     related_to_html_only=False,
1707                 )
1708             assert len(tree.children) == 2
1709             assert tree.children[0].cid != const1
1710             assert tree.children[0].type != "image"
1711             assert tree.children[1].cid != const1
1712             assert tree.children[1].type != "image"
1713
1714         @pytest.fixture
1715         def fakefilefactory(self):
1716             return FakeFileFactory()
1717
1718         @pytest.mark.imgcoll
1719         def test_inline_image_collection(
1720             self, test_png, const1, const2, fakefilefactory
1721         ):
1722             test_images = {test_png: InlineImageInfo(cid=const1, desc=const2)}
1723             relparts = collect_inline_images(
1724                 test_images, filefactory=fakefilefactory
1725             )
1726
1727             written = fakefilefactory.pop()
1728             assert b"PNG" in written[1].read()
1729
1730             assert relparts[0].subtype == "png"
1731             assert relparts[0].path == written[0]
1732             assert relparts[0].cid == const1
1733             assert const2 in relparts[0].desc
1734
1735         if _PYNLINER:
1736
1737             @pytest.mark.styling
1738             def test_apply_stylesheet(self):
1739                 html = "<p>Hello, world!</p>"
1740                 css = "p { color:red }"
1741                 out = apply_styling(html, css)
1742                 assert 'p style="color' in out
1743
1744             @pytest.mark.styling
1745             def test_apply_no_stylesheet(self, const1):
1746                 out = apply_styling(const1, None)
1747
1748             @pytest.mark.massage
1749             @pytest.mark.styling
1750             def test_massage_styling_to_converter(self):
1751                 css = "p { color:red }"
1752                 css_f = File(content=css)
1753                 css_applied = []
1754
1755                 def converter(draft_f, css_f, **kwargs):
1756                     css = css_f.read()
1757                     css_applied.append(css)
1758                     return Part("text", "plain", draft_f.path, orig=True)
1759
1760                 do_massage(
1761                     draft_f=File(),
1762                     cmd_f=File(),
1763                     css_f=css_f,
1764                     converter=converter,
1765                 )
1766                 assert css_applied[0] == css
1767
1768             @pytest.mark.converter
1769             @pytest.mark.styling
1770             def test_converter_apply_styles(
1771                 self, const1, monkeypatch, fakepath, fakefilefactory
1772             ):
1773                 css = "p { color:red }"
1774                 with (
1775                     monkeypatch.context() as m,
1776                     fakefilefactory(fakepath, content=const1) as draft_f,
1777                     fakefilefactory(content=css) as css_f,
1778                 ):
1779                     m.setattr(
1780                         markdown.Markdown,
1781                         "convert",
1782                         lambda s, t: f"<p>{t}</p>",
1783                     )
1784                     convert_markdown_to_html(
1785                         draft_f, css_f=css_f, filefactory=fakefilefactory
1786                     )
1787                 assert re.search(
1788                     r"color:.*red",
1789                     fakefilefactory[fakepath.with_suffix(".html")].read(),
1790                 )
1791
1792         if _PYGMENTS_CSS:
1793
1794             @pytest.mark.styling
1795             def test_apply_stylesheet_pygments(self):
1796                 html = (
1797                     f'<div class="{_CODEHILITE_CLASS}">'
1798                     "<pre>def foo():\n    return</pre></div>"
1799                 )
1800                 out = apply_styling(html, _PYGMENTS_CSS)
1801                 assert f'{_CODEHILITE_CLASS}" style="' in out
1802
1803         @pytest.mark.sig
1804         def test_signature_extraction_no_signature(self, const1):
1805             assert (const1, None, None) == extract_signature(const1)
1806
1807         @pytest.mark.sig
1808         def test_signature_extraction_just_text(self, const1, const2):
1809             origtext, textsig, htmlsig = extract_signature(
1810                 f"{const1}{EMAIL_SIG_SEP}{const2}"
1811             )
1812             assert origtext == const1
1813             assert textsig == const2
1814             assert htmlsig is None
1815
1816         @pytest.mark.sig
1817         def test_signature_extraction_html(
1818             self, fakepath, fakefilefactory, const1, const2
1819         ):
1820             sigconst = "HTML signature from {path} but as a string"
1821             sig = f'<div id="signature">{sigconst.format(path=fakepath)}</div>'
1822
1823             sig_f = fakefilefactory(fakepath, content=sig)
1824
1825             origtext, textsig, htmlsig = extract_signature(
1826                 f"{const1}{EMAIL_SIG_SEP}{HTML_SIG_MARKER} {fakepath}\n{const2}",
1827                 filefactory=fakefilefactory,
1828             )
1829             assert origtext == const1
1830             assert textsig == const2
1831             assert htmlsig == sigconst.format(path=fakepath)
1832
1833         @pytest.mark.sig
1834         def test_signature_extraction_file_not_found(self, const1):
1835             path = pathlib.Path("/does/not/exist")
1836             with pytest.raises(FileNotFoundError):
1837                 origtext, textsig, htmlsig = extract_signature(
1838                     f"{const1}{EMAIL_SIG_SEP}{HTML_SIG_MARKER}{path}\n{const1}"
1839                 )
1840
1841         @pytest.mark.imgproc
1842         def test_image_registry(self, const1):
1843             reg = ImageRegistry()
1844             cid = reg.register(const1)
1845             assert "@" in cid
1846             assert not cid.startswith("<")
1847             assert not cid.endswith(">")
1848             assert const1 in reg
1849
1850         @pytest.mark.imgproc
1851         def test_image_registry_file_uri(self, const1):
1852             reg = ImageRegistry()
1853             reg.register("/some/path")
1854             for path in reg:
1855                 assert path.startswith("file://")
1856                 break
1857
1858         @pytest.mark.converter
1859         @pytest.mark.sig
1860         def test_converter_signature_handling(
1861             self, fakepath, fakefilefactory, monkeypatch
1862         ):
1863             mailparts = (
1864                 "This is the mail body\n",
1865                 f"{EMAIL_SIG_SEP}",
1866                 "This is a plain-text signature only",
1867             )
1868
1869             with (
1870                 fakefilefactory(
1871                     fakepath, content="".join(mailparts)
1872                 ) as draft_f,
1873                 monkeypatch.context() as m,
1874             ):
1875                 m.setattr(markdown.Markdown, "convert", lambda s, t: t)
1876                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
1877
1878             soup = bs4.BeautifulSoup(
1879                 fakefilefactory[fakepath.with_suffix(".html")].read(),
1880                 "html.parser",
1881             )
1882             body = soup.body.contents
1883
1884             assert mailparts[0] in body.pop(0)
1885
1886             sig = soup.select_one("#signature")
1887             assert sig == body.pop(0)
1888
1889             sep = sig.select_one("span.sig_separator")
1890             assert sep == sig.contents[0]
1891             assert f"\n{sep.text}\n" == EMAIL_SIG_SEP
1892
1893             assert mailparts[2] in sig.contents[1]
1894
1895         @pytest.mark.converter
1896         @pytest.mark.sig
1897         def test_converter_signature_handling_htmlsig(
1898             self, fakepath, fakepath2, fakefilefactory, monkeypatch
1899         ):
1900             mailparts = (
1901                 "This is the mail body",
1902                 f"{EMAIL_SIG_SEP}",
1903                 f"{HTML_SIG_MARKER}{fakepath2}\n",
1904                 "This is the plain-text version",
1905             )
1906             htmlsig = "HTML Signature from {path} but as a string"
1907             html = (
1908                 f'<div id="signature"><p>{htmlsig.format(path=fakepath2)}</p></div>'
1909             )
1910
1911             sig_f = fakefilefactory(fakepath2, content=html)
1912
1913             def mdwn_fn(t):
1914                 return t.upper()
1915
1916             with (
1917                 fakefilefactory(
1918                     fakepath, content="".join(mailparts)
1919                 ) as draft_f,
1920                 monkeypatch.context() as m,
1921             ):
1922                 m.setattr(
1923                     markdown.Markdown, "convert", lambda s, t: mdwn_fn(t)
1924                 )
1925                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
1926
1927             soup = bs4.BeautifulSoup(
1928                 fakefilefactory[fakepath.with_suffix(".html")].read(),
1929                 "html.parser",
1930             )
1931             sig = soup.select_one("#signature")
1932             sig.span.extract()
1933
1934             assert HTML_SIG_MARKER not in sig.text
1935             assert htmlsig.format(path=fakepath2) == sig.text.strip()
1936
1937             plaintext = fakefilefactory[fakepath].read()
1938             assert plaintext.endswith(EMAIL_SIG_SEP + mailparts[-1])
1939
1940         @pytest.mark.converter
1941         @pytest.mark.sig
1942         def test_converter_signature_handling_htmlsig_with_image(
1943             self, fakepath, fakepath2, fakefilefactory, monkeypatch, test_png
1944         ):
1945             mailparts = (
1946                 "This is the mail body",
1947                 f"{EMAIL_SIG_SEP}",
1948                 f"{HTML_SIG_MARKER}{fakepath2}\n",
1949                 "This is the plain-text version",
1950             )
1951             htmlsig = (
1952                 "HTML Signature from {path} with image\n"
1953                 f'<img src="{test_png}">\n'
1954             )
1955             html = (
1956                 f'<div id="signature">{htmlsig.format(path=fakepath2)}</div>'
1957             )
1958
1959             sig_f = fakefilefactory(fakepath2, content=html)
1960
1961             def mdwn_fn(t):
1962                 return t.upper()
1963
1964             with (
1965                 fakefilefactory(
1966                     fakepath, content="".join(mailparts)
1967                 ) as draft_f,
1968                 monkeypatch.context() as m,
1969             ):
1970                 m.setattr(
1971                     markdown.Markdown, "convert", lambda s, t: mdwn_fn(t)
1972                 )
1973                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
1974
1975             assert fakefilefactory.pop()[0].suffix == ".png"
1976
1977             soup = bs4.BeautifulSoup(
1978                 fakefilefactory[fakepath.with_suffix(".html")].read(),
1979                 "html.parser",
1980             )
1981             assert soup.img.attrs["src"].startswith("cid:")
1982
1983         @pytest.mark.converter
1984         @pytest.mark.sig
1985         def test_converter_signature_handling_textsig_with_image(
1986             self, fakepath, fakefilefactory, test_png
1987         ):
1988             mailparts = (
1989                 "This is the mail body",
1990                 f"{EMAIL_SIG_SEP}",
1991                 "This is the plain-text version with image\n",
1992                 f"![Inline]({test_png})",
1993             )
1994             with (
1995                 fakefilefactory(
1996                     fakepath, content="".join(mailparts)
1997                 ) as draft_f,
1998             ):
1999                 tree = convert_markdown_to_html(
2000                     draft_f, filefactory=fakefilefactory
2001                 )
2002
2003             assert tree.subtype == "relative"
2004             assert tree.children[0].subtype == "alternative"
2005             assert tree.children[1].subtype == "png"
2006             written = fakefilefactory.pop()
2007             assert tree.children[1].path == written[0]
2008             assert written[1].read() == request.urlopen(test_png).read()
2009
2010         @pytest.mark.converter
2011         def test_converter_attribution_to_admonition(
2012             self, fakepath, fakefilefactory
2013         ):
2014             mailparts = (
2015                 "Regarding whatever",
2016                 "> blockquote line1",
2017                 "> blockquote line2",
2018                 "> ",
2019                 "> new para with **bold** text",
2020             )
2021             with fakefilefactory(
2022                 fakepath, content="\n".join(mailparts)
2023             ) as draft_f:
2024                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
2025
2026             soup = bs4.BeautifulSoup(
2027                 fakefilefactory[fakepath.with_suffix(".html")].read(),
2028                 "html.parser",
2029             )
2030             quote = soup.select_one("div.admonition.quote")
2031             assert quote
2032             assert (
2033                 soup.select_one("p.admonition-title").extract().text.strip()
2034                 == mailparts[0]
2035             )
2036
2037             p = quote.p.extract()
2038             assert p.text.strip() == "\n".join(p[2:] for p in mailparts[1:3])
2039
2040             p = quote.p.extract()
2041             assert p.contents[1].name == "strong"
2042
2043         @pytest.mark.converter
2044         def test_converter_attribution_to_admonition_multiple(
2045             self, fakepath, fakefilefactory
2046         ):
2047             mailparts = (
2048                 "Regarding whatever",
2049                 "> blockquote line1",
2050                 "> blockquote line2",
2051                 "",
2052                 "Normal text",
2053                 "",
2054                 "> continued emailquote",
2055                 "",
2056                 "Another email-quote",
2057                 "> something",
2058             )
2059             with fakefilefactory(
2060                 fakepath, content="\n".join(mailparts)
2061             ) as draft_f:
2062                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
2063
2064             soup = bs4.BeautifulSoup(
2065                 fakefilefactory[fakepath.with_suffix(".html")].read(),
2066                 "html.parser",
2067             )
2068             quote = soup.select_one("div.admonition.quote.continued").extract()
2069             assert quote
2070             assert (
2071                 quote.select_one("p.admonition-title").extract().text.strip()
2072                 == mailparts[0]
2073             )
2074
2075             p = quote.p.extract()
2076             assert p
2077
2078             quote = soup.select_one("div.admonition.quote.continued").extract()
2079             assert quote
2080             assert (
2081                 quote.select_one("p.admonition-title").extract().text.strip()
2082                 == mailparts[-2]
2083             )
2084
2085         @pytest.mark.fileio
2086         def test_file_class_contextmanager(self, const1, monkeypatch):
2087             state = dict(o=False, c=False)
2088
2089             def fn(t):
2090                 state[t] = True
2091
2092             with monkeypatch.context() as m:
2093                 m.setattr(File, "open", lambda s: fn("o"))
2094                 m.setattr(File, "close", lambda s: fn("c"))
2095                 with File() as f:
2096                     assert state["o"]
2097                     assert not state["c"]
2098             assert state["c"]
2099
2100         @pytest.mark.fileio
2101         def test_file_class_no_path(self, const1):
2102             with File(mode="w+") as f:
2103                 f.write(const1, cache=False)
2104                 assert f.read(cache=False) == const1
2105
2106         @pytest.mark.fileio
2107         def test_file_class_path(self, const1, tmp_path):
2108             with File(tmp_path / "file", mode="w+") as f:
2109                 f.write(const1, cache=False)
2110                 assert f.read(cache=False) == const1
2111
2112         @pytest.mark.fileio
2113         def test_file_class_cache(self, tmp_path, const1, const2):
2114             path = tmp_path / "file"
2115             file = File(path, mode="w+")
2116             with file as f:
2117                 f.write(const1, cache=True)
2118             with open(path, mode="w") as f:
2119                 f.write(const2)
2120             with file as f:
2121                 assert f.read(cache=True) == const1
2122
2123         @pytest.mark.fileio
2124         def test_file_class_cache_init(self, const1):
2125             file = File(path=None, mode="r", content=const1)
2126             with file as f:
2127                 assert f.read() == const1
2128
2129         @pytest.mark.fileio
2130         def test_file_class_content_or_path(self, fakepath, const1):
2131             with pytest.raises(RuntimeError):
2132                 file = File(path=fakepath, content=const1)
2133
2134         @pytest.mark.fileio
2135         def test_file_class_content_needs_read(self, const1):
2136             with pytest.raises(RuntimeError):
2137                 file = File(mode="w", content=const1)
2138
2139         @pytest.mark.fileio
2140         def test_file_class_write_persists_close(self, const1):
2141             f = File(mode="w+")
2142             with f:
2143                 f.write(const1)
2144             with f:
2145                 assert f.read() == const1
2146
2147         @pytest.mark.fileio
2148         def test_file_class_write_resets_read_cache(self, const1, const2):
2149             with File(mode="w+", content=const1) as f:
2150                 assert f.read() == const1
2151                 f.write(const2)
2152                 assert f.read() == const2
2153
2154         @pytest.mark.fileio
2155         def test_file_factory(self):
2156             fact = FileFactory()
2157             f = fact()
2158             assert isinstance(f, File)
2159             assert len(fact) == 1
2160             assert f in fact
2161             assert f == fact[0]
2162
2163         @pytest.mark.fileio
2164         def test_fake_file_factory(self, fakepath, fakefilefactory):
2165             fact = FakeFileFactory()
2166             f = fakefilefactory(fakepath)
2167             assert f.path == fakepath
2168             assert f == fakefilefactory[fakepath]
2169
2170         @pytest.mark.fileio
2171         def test_fake_file_factory_path_persistence(
2172             self, fakepath, fakefilefactory
2173         ):
2174             f1 = fakefilefactory(fakepath)
2175             assert f1 == fakefilefactory(fakepath)
2176
2177 except ImportError:
2178     pass