]> git.madduck.net Git - etc/neomutt.git/blob - .config/neomutt/buildmimetree.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

2ad671a839d632a87eae622358efab1dccaa086d
[etc/neomutt.git] / .config / neomutt / buildmimetree.py
1 #!/usr/bin/python3
2 #
3 # NeoMutt helper script to create multipart/* emails with Markdown → HTML
4 # alternative conversion, and handling of inline images, using NeoMutt's
5 # ability to manually craft MIME trees, but automating this process.
6 #
7 # Configuration:
8 #   neomuttrc (needs to be a single line):
9 #     set my_mdwn_extensions="extra,admonition,codehilite,sane_lists,smarty"
10 #     macro compose B "\
11 #       <enter-command> source '$my_confdir/buildmimetree.py \
12 #       --tempdir $tempdir --extensions $my_mdwn_extensions \
13 #       --css-file $my_confdir/htmlmail.css |'<enter>\
14 #       <enter-command> sourc e \$my_mdwn_postprocess_cmd_file<enter>\
15 #     " "Convert message into a modern MIME tree with inline images"
16 #
17 #     (Yes, we need to call source twice, as mutt only starts to process output
18 #     from a source command when the command exits, and since we need to react
19 #     to the output, we need to be invoked again, using a $my_ variable to pass
20 #     information)
21 #
22 # Requirements:
23 #   - python3
24 #   - python3-markdown
25 #   - python3-beautifulsoup4
26 # Optional:
27 #   - pytest
28 #   - Pynliner, provides --css-file and thus inline styling of HTML output
29 #   - Pygments, then syntax highlighting for fenced code is enabled
30 #
31 # Latest version:
32 #   https://git.madduck.net/etc/neomutt.git/blob_plain/HEAD:/.config/neomutt/buildmimetree.py
33 #
34 # Copyright © 2023 martin f. krafft <madduck@madduck.net>
35 # Released under the GPL-2+ licence, just like Mutt itself.
36 #
37
38 import sys
39 import os.path
40 import pathlib
41 import markdown
42 import tempfile
43 import argparse
44 import re
45 import mimetypes
46 import bs4
47 import xml.etree.ElementTree as etree
48 import io
49 import enum
50 from collections import namedtuple, OrderedDict
51 from markdown.extensions import Extension
52 from markdown.blockprocessors import BlockProcessor
53 from markdown.inlinepatterns import ImageInlineProcessor, IMAGE_LINK_RE
54 from email.utils import make_msgid
55 from urllib import request
56
57
58 def parse_cli_args(*args, **kwargs):
59     parser = argparse.ArgumentParser(
60         description=(
61             "NeoMutt helper to turn text/markdown email parts "
62             "into full-fledged MIME trees"
63         )
64     )
65     parser.epilog = (
66         "Copyright © 2023 martin f. krafft <madduck@madduck.net>.\n"
67         "Released under the MIT licence"
68     )
69
70     parser.add_argument(
71         "--extensions",
72         metavar="EXT[,EXT[,EXT]]",
73         type=str,
74         default="",
75         help="Markdown extension to use (comma-separated list)",
76     )
77
78     if _PYNLINER:
79         parser.add_argument(
80             "--css-file",
81             metavar="FILE",
82             type=pathlib.Path,
83             default=os.devnull,
84             help="CSS file to merge with the final HTML",
85         )
86     else:
87         parser.set_defaults(css_file=None)
88
89     parser.add_argument(
90         "--related-to-html-only",
91         action="store_true",
92         help="Make related content be sibling to HTML parts only",
93     )
94
95     def positive_integer(value):
96         try:
97             if int(value) > 0:
98                 return int(value)
99
100         except ValueError:
101             pass
102
103         raise ValueError("Must be a positive integer")
104
105     parser.add_argument(
106         "--max-number-other-attachments",
107         metavar="INTEGER",
108         type=positive_integer,
109         default=20,
110         help="Maximum number of other attachments to expect",
111     )
112
113     parser.add_argument(
114         "--only-build",
115         "--just-build",
116         action="store_true",
117         help="Only build, don't send the message",
118     )
119
120     parser.add_argument(
121         "--tempdir",
122         metavar="DIR",
123         type=pathlib.Path,
124         help="Specify temporary directory to use for attachments",
125     )
126
127     parser.add_argument(
128         "--debug-commands",
129         action="store_true",
130         help="Turn on debug logging of commands generated to stderr",
131     )
132
133     parser.add_argument(
134         "--debug-walk",
135         action="store_true",
136         help="Turn on debugging to stderr of the MIME tree walk",
137     )
138
139     parser.add_argument(
140         "--dump-html",
141         metavar="FILE",
142         type=pathlib.Path,
143         help="Write the generated HTML to the file",
144     )
145
146     subp = parser.add_subparsers(help="Sub-command parsers", dest="mode")
147     massage_p = subp.add_parser(
148         "massage", help="Massaging phase (internal use)"
149     )
150
151     massage_p.add_argument(
152         "--write-commands-to",
153         "-o",
154         metavar="FILE",
155         dest="cmdpath",
156         type=pathlib.Path,
157         required=True,
158         help="Temporary file path to write commands to",
159     )
160
161     massage_p.add_argument(
162         "MAILDRAFT",
163         nargs="?",
164         type=pathlib.Path,
165         help="If provided, the script is invoked as editor on the mail draft",
166     )
167
168     return parser.parse_args(*args, **kwargs)
169
170
171 # [ FILE I/O HANDLING ] #######################################################
172
173
174 class File:
175
176     class Op(enum.Enum):
177         R = enum.auto()
178         W = enum.auto()
179
180     def __init__(self, path=None, mode="r", content=None, **kwargs):
181         if path:
182             if content:
183                 raise RuntimeError("Cannot specify path and content for File")
184
185             self._path = (
186                 path if isinstance(path, pathlib.Path) else pathlib.Path(path)
187             )
188         else:
189             self._path = None
190
191         if content and not re.search(r"[r+]", mode):
192             raise RuntimeError("Cannot specify content without read mode")
193
194         self._cache = {
195             File.Op.R: [content] if content else [],
196             File.Op.W: []
197         }
198         self._lastop = None
199         self._mode = mode
200         self._kwargs = kwargs
201         self._file = None
202
203     def open(self):
204         if self._path:
205             self._file = open(self._path, self._mode, **self._kwargs)
206         elif "b" in self._mode:
207             self._file = io.BytesIO()
208         else:
209             self._file = io.StringIO()
210
211     def __enter__(self):
212         self.open()
213         return self
214
215     def __exit__(self, exc_type, exc_val, exc_tb):
216         self.close()
217
218     def close(self):
219         self._file.close()
220         self._file = None
221         self._cache[File.Op.R] = self._cache[File.Op.W]
222         self._lastop = None
223
224     def _get_cache(self, op):
225         return (b"" if "b" in self._mode else "").join(self._cache[op])
226
227     def _add_to_cache(self, op, s):
228         self._cache[op].append(s)
229
230     def read(self, *, cache=True):
231         if cache and self._cache[File.Op.R]:
232             return self._get_cache(File.Op.R)
233
234         if not self._file:
235             with self as f:
236                 return f.read(cache=cache)
237
238         if self._lastop == File.Op.W:
239             try:
240                 self._file.seek(0)
241             except io.UnsupportedOperation:
242                 pass
243
244         self._lastop = File.Op.R
245
246         if cache:
247             self._add_to_cache(File.Op.R, self._file.read())
248             return self._get_cache(File.Op.R)
249         else:
250             return self._file.read()
251
252     def write(self, s, *, cache=True):
253
254         if not self._file:
255             with self as f:
256                 return f.write(s, cache=cache)
257
258         if self._lastop == File.Op.R:
259             try:
260                 self._file.seek(0)
261             except io.UnsupportedOperation:
262                 pass
263
264         if cache:
265             self._add_to_cache(File.Op.W, s)
266
267         self._cache[File.Op.R] = self._cache[File.Op.W]
268
269         written = self._file.write(s)
270         self._file.flush()
271         self._lastop = File.Op.W
272         return written
273
274     path = property(lambda s: s._path)
275
276     def __repr__(self):
277         return (
278             f'<File path={self._path or "(buffered)"} open={bool(self._file)} '
279             f"rcache={sum(len(c) for c in self._rcache) if self._rcache is not None else False} "
280             f"wcache={sum(len(c) for c in self._wcache) if self._wcache is not None else False}>"
281         )
282
283
284 class FileFactory:
285     def __init__(self):
286         self._files = []
287
288     def __call__(self, path=None, mode="r", content=None, **kwargs):
289         f = File(path, mode, content, **kwargs)
290         self._files.append(f)
291         return f
292
293     def __len__(self):
294         return self._files.__len__()
295
296     def pop(self, idx=-1):
297         return self._files.pop(idx)
298
299     def __getitem__(self, idx):
300         return self._files.__getitem__(idx)
301
302     def __contains__(self, f):
303         return self._files.__contains__(f)
304
305
306 class FakeFileFactory(FileFactory):
307     def __init__(self):
308         super().__init__()
309         self._paths2files = OrderedDict()
310
311     def __call__(self, path=None, mode="r", content=None, **kwargs):
312         if path in self._paths2files:
313             return self._paths2files[path]
314
315         f = super().__call__(None, mode, content, **kwargs)
316         self._paths2files[path] = f
317
318         mypath = path
319
320         class FakeFile(File):
321             path = mypath
322
323         # this is quality Python! We do this so that the fake file, which has
324         # no path, fake-pretends to have a path for testing purposes.
325
326         f.__class__ = FakeFile
327         return f
328
329     def __getitem__(self, path):
330         return self._paths2files.__getitem__(path)
331
332     def get(self, path, default):
333         return self._paths2files.get(path, default)
334
335     def pop(self, last=True):
336         return self._paths2files.popitem(last)
337
338     def __repr__(self):
339         return (
340             f"<FakeFileFactory nfiles={len(self._files)} "
341             f"paths={len(self._paths2files)}>"
342         )
343
344
345 # [ IMAGE HANDLING ] ##########################################################
346
347
348 InlineImageInfo = namedtuple(
349     "InlineImageInfo", ["cid", "desc"], defaults=[None]
350 )
351
352
353 class ImageRegistry:
354     def __init__(self):
355         self._images = OrderedDict()
356
357     def register(self, path, description=None):
358         # path = str(pathlib.Path(path).expanduser())
359         path = os.path.expanduser(path)
360         if path.startswith("/"):
361             path = f"file://{path}"
362         cid = make_msgid()[1:-1]
363         self._images[path] = InlineImageInfo(cid, description)
364         return cid
365
366     def __iter__(self):
367         return self._images.__iter__()
368
369     def __getitem__(self, idx):
370         return self._images.__getitem__(idx)
371
372     def __len__(self):
373         return self._images.__len__()
374
375     def items(self):
376         return self._images.items()
377
378     def __repr__(self):
379         return f"<ImageRegistry(items={len(self._images)})>"
380
381     def __str__(self):
382         return self._images.__str__()
383
384
385 class InlineImageExtension(Extension):
386     class RelatedImageInlineProcessor(ImageInlineProcessor):
387         def __init__(self, re, md, registry):
388             super().__init__(re, md)
389             self._registry = registry
390
391         def handleMatch(self, m, data):
392             el, start, end = super().handleMatch(m, data)
393             if "src" in el.attrib:
394                 src = el.attrib["src"]
395                 if "://" not in src or src.startswith("file://"):
396                     # We only inline local content
397                     cid = self._registry.register(
398                         el.attrib["src"],
399                         el.attrib.get("title", el.attrib.get("alt")),
400                     )
401                     el.attrib["src"] = f"cid:{cid}"
402             return el, start, end
403
404     def __init__(self, registry):
405         super().__init__()
406         self._image_registry = registry
407
408     INLINE_PATTERN_NAME = "image_link"
409
410     def extendMarkdown(self, md):
411         md.registerExtension(self)
412         inline_image_proc = self.RelatedImageInlineProcessor(
413             IMAGE_LINK_RE, md, self._image_registry
414         )
415         md.inlinePatterns.register(
416             inline_image_proc, InlineImageExtension.INLINE_PATTERN_NAME, 150
417         )
418
419
420 def markdown_with_inline_image_support(
421     text,
422     *,
423     mdwn=None,
424     image_registry=None,
425     extensions=None,
426     extension_configs=None,
427 ):
428     registry = (
429         image_registry if image_registry is not None else ImageRegistry()
430     )
431     inline_image_handler = InlineImageExtension(registry=registry)
432     extensions = extensions or []
433     extensions.append(inline_image_handler)
434     mdwn = markdown.Markdown(
435         extensions=extensions, extension_configs=extension_configs
436     )
437
438     htmltext = mdwn.convert(text)
439
440     def replace_image_with_cid(matchobj):
441         for m in (matchobj.group(1), f"file://{matchobj.group(1)}"):
442             if m in registry:
443                 return f"(cid:{registry[m].cid}"
444         return matchobj.group(0)
445
446     text = re.sub(r"\(([^)\s]+)", replace_image_with_cid, text)
447     return text, htmltext, registry, mdwn
448
449
450 # [ CSS STYLING ] #############################################################
451
452
453 try:
454     import pynliner
455
456     _PYNLINER = True
457
458 except ImportError:
459     _PYNLINER = False
460
461 try:
462     from pygments.formatters import get_formatter_by_name
463
464     _CODEHILITE_CLASS = "codehilite"
465
466     _PYGMENTS_CSS = get_formatter_by_name(
467         "html", style="default"
468     ).get_style_defs(f".{_CODEHILITE_CLASS}")
469
470 except ImportError:
471     _PYGMENTS_CSS = None
472
473
474 def apply_styling(html, css):
475     return (
476         pynliner.Pynliner()
477         .from_string(html)
478         .with_cssString("\n".join(s for s in [_PYGMENTS_CSS, css] if s))
479         .run()
480     )
481
482
483 # [ QUOTE HANDLING ] ##########################################################
484
485
486 class QuoteToAdmonitionExtension(Extension):
487     class EmailQuoteBlockProcessor(BlockProcessor):
488         RE = re.compile(r"(?:^|\n)>\s*(.*)")
489
490         def __init__(self, parser):
491             super().__init__(parser)
492             self._title = None
493
494         def test(self, parent, blocks):
495             if markdown.util.nearing_recursion_limit():
496                 return False
497
498             lines = blocks.splitlines()
499             if len(lines) < 2:
500                 if not self._title:
501                     return False
502
503                 elif not self.RE.search(lines[0]):
504                     return False
505
506                 return len(lines) > 0
507
508             elif not self.RE.search(lines[0]) and self.RE.search(lines[1]):
509                 return True
510
511             elif self._title and self.RE.search(lines[1]):
512                 return True
513
514             return False
515
516         def run(self, parent, blocks):
517             quotelines = blocks.pop(0).splitlines()
518
519             cont = bool(self._title)
520             if not self.RE.search(quotelines[0]):
521                 self._title = quotelines.pop(0)
522
523             admonition = etree.SubElement(parent, "div")
524             admonition.set(
525                 "class", f"admonition quote{' continued' if cont else ''}"
526             )
527             self.parser.parseChunk(admonition, self._title)
528
529             admonition[0].set("class", "admonition-title")
530             self.parser.parseChunk(
531                 admonition, "\n".join(self.clean(line) for line in quotelines)
532             )
533
534         @classmethod
535         def clean(klass, line):
536             m = klass.RE.match(line)
537             return m.group(1) if m else line
538
539     def extendMarkdown(self, md):
540         md.registerExtension(self)
541         email_quote_proc = self.EmailQuoteBlockProcessor(md.parser)
542         md.parser.blockprocessors.register(email_quote_proc, "emailquote", 25)
543
544
545 # [ PARTS GENERATION ] ########################################################
546
547
548 class Part(
549     namedtuple(
550         "Part",
551         ["type", "subtype", "path", "desc", "cid", "orig"],
552         defaults=[None, None, False],
553     )
554 ):
555     def __str__(self):
556         ret = f"<{self.type}/{self.subtype}>"
557         if self.cid:
558             ret = f"{ret} cid:{self.cid}"
559         if self.orig:
560             ret = f"{ret} ORIGINAL"
561         return ret
562
563
564 class Multipart(
565     namedtuple("Multipart", ["subtype", "children", "desc"], defaults=[None])
566 ):
567     def __str__(self):
568         return f"<multipart/{self.subtype}> children={len(self.children)}"
569
570     def __hash__(self):
571         return hash(str(self.subtype) + "".join(str(self.children)))
572
573
574 def collect_inline_images(
575     image_registry, *, tempdir=None, filefactory=FileFactory()
576 ):
577     relparts = []
578     for path, info in image_registry.items():
579         if path.startswith("cid:"):
580             continue
581
582         data = request.urlopen(path)
583
584         mimetype = data.headers["Content-Type"]
585         ext = mimetypes.guess_extension(mimetype)
586         tempfilename = tempfile.mkstemp(prefix="img", suffix=ext, dir=tempdir)
587         path = pathlib.Path(tempfilename[1])
588
589         with filefactory(path, "w+b") as out_f:
590             out_f.write(data.read())
591
592         # filewriter_fn(path, data.read(), "w+b")
593
594         desc = (
595             f'Inline image: "{info.desc}"'
596             if info.desc
597             else f"Inline image {str(len(relparts)+1)}"
598         )
599         relparts.append(
600             Part(*mimetype.split("/"), path, cid=info.cid, desc=desc)
601         )
602
603     return relparts
604
605
606 EMAIL_SIG_SEP = "\n-- \n"
607 HTML_SIG_MARKER = "=htmlsig "
608
609
610 def make_html_doc(body, sig=None):
611     ret = (
612         "<!DOCTYPE html>\n"
613         "<html>\n"
614         "<head>\n"
615         '<meta http-equiv="content-type" content="text/html; charset=UTF-8">\n'  # noqa: E501
616         '<meta name="viewport" content="width=device-width, initial-scale=1.0">\n'  # noqa: E501
617         "</head>\n"
618         "<body>\n"
619         f"{body}\n"
620     )
621
622     if sig:
623         nl = "\n"
624         ret = (
625             f'{ret}<div id="signature"><span class="sig_separator">{EMAIL_SIG_SEP.strip(nl)}</span>\n'  # noqa: E501
626             f"{sig}\n"
627             "</div>"
628         )
629
630     return f"{ret}\n  </body>\n</html>"
631
632
633 def make_text_mail(text, sig=None):
634     return EMAIL_SIG_SEP.join((text, sig)) if sig else text
635
636
637 def extract_signature(text, *, filefactory=FileFactory()):
638     parts = text.split(EMAIL_SIG_SEP, 1)
639     if len(parts) == 1:
640         return text, None, None
641
642     lines = parts[1].splitlines()
643     if lines[0].startswith(HTML_SIG_MARKER):
644         path = pathlib.Path(re.split(r" +", lines.pop(0), maxsplit=1)[1])
645         textsig = "\n".join(lines)
646
647         sig_input = filefactory(path.expanduser()).read()
648         soup = bs4.BeautifulSoup(sig_input, "html.parser")
649
650         style = str(soup.style.extract()) if soup.style else ""
651         for sig_selector in (
652             "#signature",
653             "#signatur",
654             "#emailsig",
655             ".signature",
656             ".signatur",
657             ".emailsig",
658             "body",
659             "div",
660         ):
661             sig = soup.select_one(sig_selector)
662             if sig:
663                 break
664
665         if not sig:
666             return parts[0], textsig, style + sig_input
667
668         if sig.attrs.get("id") == "signature":
669             sig = "".join(str(c) for c in sig.children)
670
671         return parts[0], textsig, style + str(sig)
672
673     return parts[0], parts[1], None
674
675
676 def convert_markdown_to_html(
677     draft_f,
678     *,
679     related_to_html_only=False,
680     css_f=None,
681     htmldump_f=None,
682     filefactory=FileFactory(),
683     tempdir=None,
684     extensions=None,
685     extension_configs=None,
686 ):
687     # TODO extension_configs need to be handled differently
688     extension_configs = extension_configs or {}
689     extension_configs.setdefault("pymdownx.highlight", {})[
690         "css_class"
691     ] = _CODEHILITE_CLASS
692
693     extensions = extensions or []
694     extensions.append(QuoteToAdmonitionExtension())
695
696     draft = draft_f.read()
697     origtext, textsig, htmlsig = extract_signature(
698         draft, filefactory=filefactory
699     )
700
701     (
702         origtext,
703         htmltext,
704         image_registry,
705         mdwn,
706     ) = markdown_with_inline_image_support(
707         origtext, extensions=extensions, extension_configs=extension_configs
708     )
709
710     if htmlsig:
711         if not textsig:
712             # TODO: decide what to do if there is no plain-text version
713             raise NotImplementedError("HTML signature but no text alternative")
714
715         soup = bs4.BeautifulSoup(htmlsig, "html.parser")
716         for img in soup.find_all("img"):
717             uri = img.attrs["src"]
718             desc = img.attrs.get("title", img.attrs.get("alt"))
719             cid = image_registry.register(uri, desc)
720             img.attrs["src"] = f"cid:{cid}"
721
722         htmlsig = str(soup)
723
724     elif textsig:
725         (
726             textsig,
727             htmlsig,
728             image_registry,
729             mdwn,
730         ) = markdown_with_inline_image_support(
731             textsig,
732             extensions=extensions,
733             extension_configs=extension_configs,
734             image_registry=image_registry,
735             mdwn=mdwn,
736         )
737
738     origtext = make_text_mail(origtext, textsig)
739     draft_f.write(origtext)
740     textpart = Part(
741         "text", "plain", draft_f.path, "Plain-text version", orig=True
742     )
743
744     htmltext = make_html_doc(htmltext, htmlsig)
745     htmltext = apply_styling(htmltext, css_f.read() if css_f else None)
746
747     if draft_f.path:
748         htmlpath = draft_f.path.with_suffix(".html")
749     else:
750         htmlpath = pathlib.Path(
751             tempfile.mkstemp(suffix=".html", dir=tempdir)[1]
752         )
753     with filefactory(
754         htmlpath, "w", encoding="utf-8", errors="xmlcharrefreplace"
755     ) as out_f:
756         out_f.write(htmltext)
757     htmlpart = Part("text", "html", htmlpath, "HTML version")
758
759     if htmldump_f:
760         htmldump_f.write(htmltext)
761
762     imgparts = collect_inline_images(
763         image_registry, tempdir=tempdir, filefactory=filefactory
764     )
765
766     if related_to_html_only:
767         # If there are inline image part, they will be contained within a
768         # multipart/related part along with the HTML part only
769         if imgparts:
770             # replace htmlpart with a multipart/related container of the HTML
771             # parts and the images
772             htmlpart = Multipart(
773                 "relative", [htmlpart] + imgparts, "Group of related content"
774             )
775
776         return Multipart(
777             "alternative", [textpart, htmlpart], "Group of alternative content"
778         )
779
780     else:
781         # If there are inline image part, they will be siblings to the
782         # multipart/alternative tree within a multipart/related part
783         altpart = Multipart(
784             "alternative", [textpart, htmlpart], "Group of alternative content"
785         )
786         if imgparts:
787             return Multipart(
788                 "relative", [altpart] + imgparts, "Group of related content"
789             )
790         else:
791             return altpart
792
793
794 class MIMETreeDFWalker:
795     def __init__(self, *, visitor_fn=None, debug=False):
796         self._visitor_fn = visitor_fn or self._echovisit
797         self._debug = debug
798
799     def _echovisit(self, node, ancestry, debugprint):
800         debugprint(f"node={node} ancestry={ancestry}")
801
802     def walk(self, root, *, visitor_fn=None):
803         """
804         Recursive function to implement a depth-dirst walk of the MIME-tree
805         rooted at `root`.
806         """
807         if isinstance(root, list):
808             if len(root) > 1:
809                 root = Multipart("mixed", children=root)
810             else:
811                 root = root[0]
812
813         self._walk(
814             root,
815             ancestry=[],
816             descendents=[],
817             visitor_fn=visitor_fn or self._visitor_fn,
818         )
819
820     def _walk(self, node, *, ancestry, descendents, visitor_fn):
821         # Let's start by enumerating the parts at the current level. At the
822         # root level, ancestry will be the empty list, and we expect a
823         # multipart/* container at this level. Later, e.g. within a
824         # mutlipart/alternative container, the subtree will just be the
825         # alternative parts, while the top of the ancestry will be the
826         # multipart/alternative container, which we will process after the
827         # following loop.
828
829         lead = f"{'│ '*len(ancestry)}"
830         if isinstance(node, Multipart):
831             self.debugprint(
832                 f"{lead}├{node} ancestry={[s.subtype for s in ancestry]}"
833             )
834
835             # Depth-first, so push the current container onto the ancestry
836             # stack, then descend …
837             ancestry.append(node)
838             self.debugprint(lead + "│ " * 2)
839             for child in node.children:
840                 self._walk(
841                     child,
842                     ancestry=ancestry,
843                     descendents=descendents,
844                     visitor_fn=visitor_fn,
845                 )
846             assert ancestry.pop() == node
847             sibling_descendents = descendents
848             descendents.extend(node.children)
849
850         else:
851             self.debugprint(f"{lead}├{node}")
852             sibling_descendents = descendents
853
854         if False and ancestry:
855             self.debugprint(lead[:-1] + " │")
856
857         if visitor_fn:
858             visitor_fn(
859                 node, ancestry, sibling_descendents, debugprint=self.debugprint
860             )
861
862     def debugprint(self, s, **kwargs):
863         if self._debug:
864             print(s, file=sys.stderr, **kwargs)
865
866
867 # [ RUN MODES ] ###############################################################
868
869
870 class MuttCommands:
871     """
872     Stupid class to interface writing out Mutt commands. This is quite a hack
873     to deal with the fact that Mutt runs "push" commands in reverse order, so
874     all of a sudden, things become very complicated when mixing with "real"
875     commands.
876
877     Hence we keep two sets of commands, and one set of pushes. Commands are
878     added to the first until a push is added, after which commands are added to
879     the second set of commands.
880
881     On flush(), the first set is printed, followed by the pushes in reverse,
882     and then the second set is printed. All 3 sets are then cleared.
883     """
884
885     def __init__(self, out_f=sys.stdout, *, debug=False):
886         self._cmd1, self._push, self._cmd2 = [], [], []
887         self._out_f = out_f
888         self._debug = debug
889
890     def cmd(self, s):
891         self.debugprint(s)
892         if self._push:
893             self._cmd2.append(s)
894         else:
895             self._cmd1.append(s)
896
897     def push(self, s):
898         s = s.replace('"', r"\"")
899         s = f'push "{s}"'
900         self.debugprint(s)
901         self._push.insert(0, s)
902
903     def flush(self):
904         print(
905             "\n".join(self._cmd1 + self._push + self._cmd2), file=self._out_f
906         )
907         self._cmd1, self._push, self._cmd2 = [], [], []
908
909     def debugprint(self, s, **kwargs):
910         if self._debug:
911             print(s, file=sys.stderr, **kwargs)
912
913
914 def do_setup(
915     *,
916     out_f=sys.stdout,
917     temppath=None,
918     tempdir=None,
919     debug_commands=False,
920 ):
921     temppath = temppath or pathlib.Path(
922         tempfile.mkstemp(prefix="muttmdwn-", dir=tempdir)[1]
923     )
924     cmds = MuttCommands(out_f, debug=debug_commands)
925
926     editor = f"{' '.join(sys.argv)} massage --write-commands-to {temppath}"
927
928     cmds.cmd('set my_editor="$editor"')
929     cmds.cmd('set my_edit_headers="$edit_headers"')
930     cmds.cmd(f'set editor="{editor}"')
931     cmds.cmd("unset edit_headers")
932     cmds.cmd(f"set my_mdwn_postprocess_cmd_file={temppath}")
933     cmds.push("<first-entry><edit-file>")
934     cmds.flush()
935
936
937 def do_massage(
938     draft_f,
939     cmd_f,
940     *,
941     extensions=None,
942     css_f=None,
943     htmldump_f=None,
944     converter=convert_markdown_to_html,
945     related_to_html_only=True,
946     only_build=False,
947     max_other_attachments=20,
948     tempdir=None,
949     debug_commands=False,
950     debug_walk=False,
951 ):
952     # Here's the big picture: we're being invoked as the editor on the email
953     # draft, and whatever commands we write to the file given as cmdpath will
954     # be run by the second source command in the macro definition.
955
956     # Let's start by cleaning up what the setup did (see above), i.e. we
957     # restore the $editor and $edit_headers variables, and also unset the
958     # variable used to identify the command file we're currently writing
959     # to.
960     cmds = MuttCommands(cmd_f, debug=debug_commands)
961     cmds.cmd('set editor="$my_editor"')
962     cmds.cmd('set edit_headers="$my_edit_headers"')
963     cmds.cmd("unset my_editor")
964     cmds.cmd("unset my_edit_headers")
965
966     # let's flush those commands, as there'll be a lot of pushes from now
967     # on, which need to be run in reverse order
968     cmds.flush()
969
970     extensions = extensions.split(",") if extensions else []
971     tree = converter(
972         draft_f,
973         css_f=css_f,
974         htmldump_f=htmldump_f,
975         related_to_html_only=related_to_html_only,
976         tempdir=tempdir,
977         extensions=extensions,
978     )
979
980     mimetree = MIMETreeDFWalker(debug=debug_walk)
981
982     state = dict(pos=1, tags={}, parts=1)
983
984     def visitor_fn(item, ancestry, descendents, *, debugprint=None):
985         """
986         Visitor function called for every node (part) of the MIME tree,
987         depth-first, and responsible for telling NeoMutt how to assemble
988         the tree.
989         """
990         KILL_LINE = r"\Ca\Ck"
991
992         if isinstance(item, Part):
993             # We've hit a leaf-node, i.e. an alternative or a related part
994             # with actual content.
995
996             # Let's add the part
997             if item.orig:
998                 # The original source already exists in the NeoMutt tree, but
999                 # the underlying file may have been modified, so we need to
1000                 # update the encoding, but that's it:
1001                 cmds.push("<first-entry>")
1002                 cmds.push("<update-encoding>")
1003
1004                 # We really just need to be able to assume that at this point,
1005                 # NeoMutt is at position 1, and that we've processed only this
1006                 # part so far. Nevermind about actual attachments, we can
1007                 # safely ignore those as they stay at the end.
1008                 assert state["pos"] == 1
1009                 assert state["parts"] == 1
1010             else:
1011                 # … whereas all other parts need to be added, and they're all
1012                 # considered to be temporary and inline:
1013                 cmds.push(f"<attach-file>{item.path}<enter>")
1014                 cmds.push("<toggle-unlink><toggle-disposition>")
1015
1016                 # This added a part at the end of the list of parts, and that's
1017                 # just how many parts we've seen so far, so it's position in
1018                 # the NeoMutt compose list is the count of parts
1019                 state["parts"] += 1
1020                 state["pos"] = state["parts"]
1021
1022             # If the item (including the original) comes with additional
1023             # information, then we might just as well update the NeoMutt
1024             # tree now:
1025             if item.cid:
1026                 cmds.push(f"<edit-content-id>{KILL_LINE}{item.cid}<enter>")
1027
1028             # Now for the biggest hack in this script, which is to handle
1029             # attachments, such as PDFs, that aren't related or alternatives.
1030             # The problem is that when we add an inline image, it always gets
1031             # appended to the list, i.e. inserted *after* other attachments.
1032             # Since we don't know the number of attachments, we also cannot
1033             # infer the postition of the new attachment. Therefore, we bubble
1034             # it all the way to the top, only to then move it down again:
1035             if state["pos"] > 1:  # skip for the first part
1036                 for i in range(max_other_attachments):
1037                     # could use any number here, but has to be larger than the
1038                     # number of possible attachments. The performance
1039                     # difference of using a high number is negligible.
1040                     # Bubble up the new part
1041                     cmds.push("<move-up>")
1042
1043                 # As we push the part to the right position in the list (i.e.
1044                 # the last of the subset of attachments this script added), we
1045                 # must handle the situation that subtrees are skipped by
1046                 # NeoMutt. Hence, the actual number of positions to move down
1047                 # is decremented by the number of descendents so far
1048                 # encountered.
1049                 for i in range(1, state["pos"] - len(descendents)):
1050                     cmds.push("<move-down>")
1051
1052         elif isinstance(item, Multipart):
1053             # This node has children, but we already visited them (see
1054             # above). The tags dictionary of State should contain a list of
1055             # their positions in the NeoMutt compose window, so iterate those
1056             # and tag the parts there:
1057             n_tags = len(state["tags"][item])
1058             for tag in state["tags"][item]:
1059                 cmds.push(f"<jump>{tag}<enter><tag-entry>")
1060
1061             if item.subtype == "alternative":
1062                 cmds.push("<group-alternatives>")
1063             elif item.subtype in ("relative", "related"):
1064                 cmds.push("<group-related>")
1065             elif item.subtype == "multilingual":
1066                 cmds.push("<group-multilingual>")
1067             else:
1068                 raise NotImplementedError(
1069                     f"Handling of multipart/{item.subtype} is not implemented"
1070                 )
1071
1072             state["pos"] -= n_tags - 1
1073             state["parts"] += 1
1074
1075         else:
1076             # We should never get here
1077             raise RuntimeError(f"Type {type(item)} is unexpected: {item}")
1078
1079         # If the item has a description, we might just as well add it
1080         if item.desc:
1081             cmds.push(f"<edit-description>{KILL_LINE}{item.desc}<enter>")
1082
1083         if ancestry:
1084             # If there's an ancestry, record the current (assumed) position in
1085             # the NeoMutt compose window as needed-to-tag by our direct parent
1086             # (i.e. the last item of the ancestry)
1087             state["tags"].setdefault(ancestry[-1], []).append(state["pos"])
1088
1089             lead = "│ " * (len(ancestry) + 1) + "* "
1090             debugprint(
1091                 f"{lead}ancestry={[a.subtype for a in ancestry]}\n"
1092                 f"{lead}descendents={[d.subtype for d in descendents]}\n"
1093                 f"{lead}children_positions={state['tags'][ancestry[-1]]}\n"
1094                 f"{lead}pos={state['pos']}, parts={state['parts']}"
1095             )
1096
1097     # -----------------
1098     # End of visitor_fn
1099
1100     # Let's walk the tree and visit every node with our fancy visitor
1101     # function
1102     mimetree.walk(tree, visitor_fn=visitor_fn)
1103
1104     if not only_build:
1105         cmds.push("<send-message>")
1106
1107     # Finally, cleanup. Since we're responsible for removing the temporary
1108     # file, how's this for a little hack?
1109     try:
1110         filename = cmd_f.name
1111     except AttributeError:
1112         filename = "pytest_internal_file"
1113     cmds.cmd(f"source 'rm -f {filename}|'")
1114     cmds.cmd("unset my_mdwn_postprocess_cmd_file")
1115     cmds.flush()
1116
1117
1118 # [ CLI ENTRY ] ###############################################################
1119
1120 if __name__ == "__main__":
1121     args = parse_cli_args()
1122
1123     if args.mode is None:
1124         do_setup(
1125             tempdir=args.tempdir,
1126             debug_commands=args.debug_commands,
1127         )
1128
1129     elif args.mode == "massage":
1130         with (
1131             File(args.MAILDRAFT, "r+") as draft_f,
1132             File(args.cmdpath, "w") as cmd_f,
1133             File(args.css_file, "r") as css_f,
1134             File(args.dump_html, "w") as htmldump_f,
1135         ):
1136             do_massage(
1137                 draft_f,
1138                 cmd_f,
1139                 extensions=args.extensions,
1140                 css_f=css_f,
1141                 htmldump_f=htmldump_f,
1142                 related_to_html_only=args.related_to_html_only,
1143                 max_other_attachments=args.max_number_other_attachments,
1144                 only_build=args.only_build,
1145                 tempdir=args.tempdir,
1146                 debug_commands=args.debug_commands,
1147                 debug_walk=args.debug_walk,
1148             )
1149
1150
1151 # [ TESTS ] ###################################################################
1152
1153 try:
1154     import pytest
1155
1156     class Tests:
1157         @pytest.fixture
1158         def const1(self):
1159             return "Curvature Vest Usher Dividing+T#iceps Senior"
1160
1161         @pytest.fixture
1162         def const2(self):
1163             return "Habitant Celestial 2litzy Resurf/ce Headpiece Harmonics"
1164
1165         @pytest.fixture
1166         def fakepath(self):
1167             return pathlib.Path("/does/not/exist")
1168
1169         @pytest.fixture
1170         def fakepath2(self):
1171             return pathlib.Path("/does/not/exist/either")
1172
1173         # NOTE: tests using the capsys fixture must specify sys.stdout to the
1174         # functions they call, else old stdout is used and not captured
1175
1176         @pytest.mark.muttctrl
1177         def test_MuttCommands_cmd(self, const1, const2, capsys):
1178             "Assert order of commands"
1179             cmds = MuttCommands(out_f=sys.stdout)
1180             cmds.cmd(const1)
1181             cmds.cmd(const2)
1182             cmds.flush()
1183             captured = capsys.readouterr()
1184             assert captured.out == "\n".join((const1, const2, ""))
1185
1186         @pytest.mark.muttctrl
1187         def test_MuttCommands_push(self, const1, const2, capsys):
1188             "Assert reverse order of pushes"
1189             cmds = MuttCommands(out_f=sys.stdout)
1190             cmds.push(const1)
1191             cmds.push(const2)
1192             cmds.flush()
1193             captured = capsys.readouterr()
1194             assert (
1195                 captured.out
1196                 == ('"\npush "'.join(("", const2, const1, "")))[2:-6]
1197             )
1198
1199         @pytest.mark.muttctrl
1200         def test_MuttCommands_push_escape(self, const1, const2, capsys):
1201             cmds = MuttCommands(out_f=sys.stdout)
1202             cmds.push(f'"{const1}"')
1203             cmds.flush()
1204             captured = capsys.readouterr()
1205             assert f'"\\"{const1}\\""' in captured.out
1206
1207         @pytest.mark.muttctrl
1208         def test_MuttCommands_cmd_push_mixed(self, const1, const2, capsys):
1209             "Assert reverse order of pushes"
1210             cmds = MuttCommands(out_f=sys.stdout)
1211             lines = ["000", "001", "010", "011", "100", "101", "110", "111"]
1212             for i in range(2):
1213                 cmds.cmd(lines[4 * i + 0])
1214                 cmds.cmd(lines[4 * i + 1])
1215                 cmds.push(lines[4 * i + 2])
1216                 cmds.push(lines[4 * i + 3])
1217             cmds.flush()
1218
1219             captured = capsys.readouterr()
1220             lines_out = captured.out.splitlines()
1221             assert lines[0] in lines_out[0]
1222             assert lines[1] in lines_out[1]
1223             assert lines[7] in lines_out[2]
1224             assert lines[6] in lines_out[3]
1225             assert lines[3] in lines_out[4]
1226             assert lines[2] in lines_out[5]
1227             assert lines[4] in lines_out[6]
1228             assert lines[5] in lines_out[7]
1229
1230         @pytest.fixture
1231         def mime_tree_related_to_alternative(self):
1232             return Multipart(
1233                 "relative",
1234                 children=[
1235                     Multipart(
1236                         "alternative",
1237                         children=[
1238                             Part(
1239                                 "text",
1240                                 "plain",
1241                                 "part.txt",
1242                                 desc="Plain",
1243                                 orig=True,
1244                             ),
1245                             Part("text", "html", "part.html", desc="HTML"),
1246                         ],
1247                         desc="Alternative",
1248                     ),
1249                     Part(
1250                         "text", "png", "logo.png", cid="logo.png", desc="Logo"
1251                     ),
1252                 ],
1253                 desc="Related",
1254             )
1255
1256         @pytest.fixture
1257         def mime_tree_related_to_html(self):
1258             return Multipart(
1259                 "alternative",
1260                 children=[
1261                     Part(
1262                         "text",
1263                         "plain",
1264                         "part.txt",
1265                         desc="Plain",
1266                         orig=True,
1267                     ),
1268                     Multipart(
1269                         "relative",
1270                         children=[
1271                             Part("text", "html", "part.html", desc="HTML"),
1272                             Part(
1273                                 "text",
1274                                 "png",
1275                                 "logo.png",
1276                                 cid="logo.png",
1277                                 desc="Logo",
1278                             ),
1279                         ],
1280                         desc="Related",
1281                     ),
1282                 ],
1283                 desc="Alternative",
1284             )
1285
1286         @pytest.fixture
1287         def mime_tree_nested(self):
1288             return Multipart(
1289                 "relative",
1290                 children=[
1291                     Multipart(
1292                         "alternative",
1293                         children=[
1294                             Part(
1295                                 "text",
1296                                 "plain",
1297                                 "part.txt",
1298                                 desc="Plain",
1299                                 orig=True,
1300                             ),
1301                             Multipart(
1302                                 "alternative",
1303                                 children=[
1304                                     Part(
1305                                         "text",
1306                                         "plain",
1307                                         "part.txt",
1308                                         desc="Nested plain",
1309                                     ),
1310                                     Part(
1311                                         "text",
1312                                         "html",
1313                                         "part.html",
1314                                         desc="Nested HTML",
1315                                     ),
1316                                 ],
1317                                 desc="Nested alternative",
1318                             ),
1319                         ],
1320                         desc="Alternative",
1321                     ),
1322                     Part(
1323                         "text",
1324                         "png",
1325                         "logo.png",
1326                         cid="logo.png",
1327                         desc="Logo",
1328                     ),
1329                 ],
1330                 desc="Related",
1331             )
1332
1333         @pytest.mark.treewalk
1334         def test_MIMETreeDFWalker_depth_first_walk(
1335             self, mime_tree_related_to_alternative
1336         ):
1337             mimetree = MIMETreeDFWalker()
1338
1339             items = []
1340
1341             def visitor_fn(item, ancestry, descendents, debugprint):
1342                 items.append((item, len(ancestry), len(descendents)))
1343
1344             mimetree.walk(
1345                 mime_tree_related_to_alternative, visitor_fn=visitor_fn
1346             )
1347             assert len(items) == 5
1348             assert items[0][0].subtype == "plain"
1349             assert items[0][1] == 2
1350             assert items[0][2] == 0
1351             assert items[1][0].subtype == "html"
1352             assert items[1][1] == 2
1353             assert items[1][2] == 0
1354             assert items[2][0].subtype == "alternative"
1355             assert items[2][1] == 1
1356             assert items[2][2] == 2
1357             assert items[3][0].subtype == "png"
1358             assert items[3][1] == 1
1359             assert items[3][2] == 2
1360             assert items[4][0].subtype == "relative"
1361             assert items[4][1] == 0
1362             assert items[4][2] == 4
1363
1364         @pytest.mark.treewalk
1365         def test_MIMETreeDFWalker_list_to_mixed(self, const1):
1366             mimetree = MIMETreeDFWalker()
1367             items = []
1368
1369             def visitor_fn(item, ancestry, descendents, debugprint):
1370                 items.append(item)
1371
1372             p = Part("text", "plain", const1)
1373             mimetree.walk([p], visitor_fn=visitor_fn)
1374             assert items[-1].subtype == "plain"
1375             mimetree.walk([p, p], visitor_fn=visitor_fn)
1376             assert items[-1].subtype == "mixed"
1377
1378         @pytest.mark.treewalk
1379         def test_MIMETreeDFWalker_visitor_in_constructor(
1380             self, mime_tree_related_to_alternative
1381         ):
1382             items = []
1383
1384             def visitor_fn(item, ancestry, descendents, debugprint):
1385                 items.append(item)
1386
1387             mimetree = MIMETreeDFWalker(visitor_fn=visitor_fn)
1388             mimetree.walk(mime_tree_related_to_alternative)
1389             assert len(items) == 5
1390
1391         @pytest.fixture
1392         def string_io(self, const1, text=None):
1393             return StringIO(text or const1)
1394
1395         @pytest.mark.massage
1396         def test_do_massage_basic(self):
1397             def converter(draft_f, **kwargs):
1398                 return Part("text", "plain", draft_f.path, orig=True)
1399
1400             with File() as draft_f, File() as cmd_f:
1401                 do_massage(
1402                     draft_f=draft_f,
1403                     cmd_f=cmd_f,
1404                     converter=converter,
1405                 )
1406                 lines = cmd_f.read().splitlines()
1407
1408             assert '="$my_editor"' in lines.pop(0)
1409             assert '="$my_edit_headers"' in lines.pop(0)
1410             assert "unset my_editor" == lines.pop(0)
1411             assert "unset my_edit_headers" == lines.pop(0)
1412             assert "send-message" in lines.pop(0)
1413             assert "update-encoding" in lines.pop(0)
1414             assert "first-entry" in lines.pop(0)
1415             assert "source 'rm -f " in lines.pop(0)
1416             assert "unset my_mdwn_postprocess_cmd_file" == lines.pop(0)
1417
1418         @pytest.mark.massage
1419         def test_do_massage_fulltree(self, mime_tree_related_to_alternative):
1420             def converter(draft_f, **kwargs):
1421                 return mime_tree_related_to_alternative
1422
1423             max_attachments = 5
1424
1425             with File() as draft_f, File() as cmd_f:
1426                 do_massage(
1427                     draft_f=draft_f,
1428                     cmd_f=cmd_f,
1429                     max_other_attachments=max_attachments,
1430                     converter=converter,
1431                 )
1432                 lines = cmd_f.read().splitlines()[4:-2]
1433
1434             assert "first-entry" in lines.pop()
1435             assert "update-encoding" in lines.pop()
1436             assert "Plain" in lines.pop()
1437             assert "part.html" in lines.pop()
1438             assert "toggle-unlink" in lines.pop()
1439             for i in range(max_attachments):
1440                 assert "move-up" in lines.pop()
1441             assert "move-down" in lines.pop()
1442             assert "HTML" in lines.pop()
1443             assert "jump>1" in lines.pop()
1444             assert "jump>2" in lines.pop()
1445             assert "group-alternatives" in lines.pop()
1446             assert "Alternative" in lines.pop()
1447             assert "logo.png" in lines.pop()
1448             assert "toggle-unlink" in lines.pop()
1449             assert "content-id" in lines.pop()
1450             for i in range(max_attachments):
1451                 assert "move-up" in lines.pop()
1452             assert "move-down" in lines.pop()
1453             assert "Logo" in lines.pop()
1454             assert "jump>1" in lines.pop()
1455             assert "jump>4" in lines.pop()
1456             assert "group-related" in lines.pop()
1457             assert "Related" in lines.pop()
1458             assert "send-message" in lines.pop()
1459             assert len(lines) == 0
1460
1461         @pytest.mark.massage
1462         def test_mime_tree_relative_within_alternative(
1463             self, mime_tree_related_to_html
1464         ):
1465             def converter(draft_f, **kwargs):
1466                 return mime_tree_related_to_html
1467
1468             with File() as draft_f, File() as cmd_f:
1469                 do_massage(
1470                     draft_f=draft_f,
1471                     cmd_f=cmd_f,
1472                     converter=converter,
1473                 )
1474                 lines = cmd_f.read().splitlines()[4:-2]
1475
1476             assert "first-entry" in lines.pop()
1477             assert "update-encoding" in lines.pop()
1478             assert "Plain" in lines.pop()
1479             assert "part.html" in lines.pop()
1480             assert "toggle-unlink" in lines.pop()
1481             assert "move-up" in lines.pop()
1482             while True:
1483                 top = lines.pop()
1484                 if "move-up" not in top:
1485                     break
1486             assert "move-down" in top
1487             assert "HTML" in lines.pop()
1488             assert "logo.png" in lines.pop()
1489             assert "toggle-unlink" in lines.pop()
1490             assert "content-id" in lines.pop()
1491             assert "move-up" in lines.pop()
1492             while True:
1493                 top = lines.pop()
1494                 if "move-up" not in top:
1495                     break
1496             assert "move-down" in top
1497             assert "move-down" in lines.pop()
1498             assert "Logo" in lines.pop()
1499             assert "jump>2" in lines.pop()
1500             assert "jump>3" in lines.pop()
1501             assert "group-related" in lines.pop()
1502             assert "Related" in lines.pop()
1503             assert "jump>1" in lines.pop()
1504             assert "jump>2" in lines.pop()
1505             assert "group-alternative" in lines.pop()
1506             assert "Alternative" in lines.pop()
1507             assert "send-message" in lines.pop()
1508             assert len(lines) == 0
1509
1510         @pytest.mark.massage
1511         def test_mime_tree_nested_trees_does_not_break_positioning(
1512             self, mime_tree_nested
1513         ):
1514             def converter(draft_f, **kwargs):
1515                 return mime_tree_nested
1516
1517             with File() as draft_f, File() as cmd_f:
1518                 do_massage(
1519                     draft_f=draft_f,
1520                     cmd_f=cmd_f,
1521                     converter=converter,
1522                 )
1523                 lines = cmd_f.read().splitlines()
1524
1525             while "logo.png" not in lines.pop():
1526                 pass
1527             lines.pop()
1528             assert "content-id" in lines.pop()
1529             assert "move-up" in lines.pop()
1530             while True:
1531                 top = lines.pop()
1532                 if "move-up" not in top:
1533                     break
1534             assert "move-down" in top
1535             # Due to the nested trees, the number of descendents of the sibling
1536             # actually needs to be considered, not just the nieces. So to move
1537             # from position 1 to position 6, it only needs one <move-down>
1538             # because that jumps over the entire sibling tree. Thus what
1539             # follows next must not be another <move-down>
1540             assert "Logo" in lines.pop()
1541
1542         @pytest.mark.converter
1543         def test_converter_tree_basic(self, fakepath, const1, fakefilefactory):
1544             draft_f = fakefilefactory(fakepath, content=const1)
1545             tree = convert_markdown_to_html(
1546                 draft_f, filefactory=fakefilefactory
1547             )
1548
1549             assert tree.subtype == "alternative"
1550             assert len(tree.children) == 2
1551             assert tree.children[0].subtype == "plain"
1552             assert tree.children[0].path == draft_f.path
1553             assert tree.children[0].orig
1554             assert tree.children[1].subtype == "html"
1555             assert tree.children[1].path == fakepath.with_suffix(".html")
1556
1557         @pytest.mark.converter
1558         def test_converter_writes(
1559             self, fakepath, fakefilefactory, const1, monkeypatch
1560         ):
1561             draft_f = fakefilefactory(fakepath, content=const1)
1562             convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
1563
1564             html = fakefilefactory.pop()
1565             assert fakepath.with_suffix(".html") == html[0]
1566             assert const1 in html[1].read()
1567             text = fakefilefactory.pop()
1568             assert fakepath == text[0]
1569             assert const1 == text[1].read()
1570
1571         @pytest.mark.imgproc
1572         def test_markdown_inline_image_processor(self):
1573             imgpath1 = "file:/path/to/image.png"
1574             imgpath2 = "file:///path/to/image.png?url=params"
1575             imgpath3 = "/path/to/image.png"
1576             text = f"""![inline local image]({imgpath1})
1577                        ![image inlined
1578                          with newline]({imgpath2})
1579                        ![image local path]({imgpath3})"""
1580             text, html, images, mdwn = markdown_with_inline_image_support(text)
1581
1582             # local paths have been normalised to URLs:
1583             imgpath3 = f"file://{imgpath3}"
1584
1585             assert 'src="cid:' in html
1586             assert "](cid:" in text
1587             assert len(images) == 3
1588             assert imgpath1 in images
1589             assert imgpath2 in images
1590             assert imgpath3 in images
1591             assert images[imgpath1].cid != images[imgpath2].cid
1592             assert images[imgpath1].cid != images[imgpath3].cid
1593             assert images[imgpath2].cid != images[imgpath3].cid
1594
1595         @pytest.mark.imgproc
1596         def test_markdown_inline_image_processor_title_to_desc(self, const1):
1597             imgpath = "file:///path/to/image.png"
1598             text = f'![inline local image]({imgpath} "{const1}")'
1599             text, html, images, mdwn = markdown_with_inline_image_support(text)
1600             assert images[imgpath].desc == const1
1601
1602         @pytest.mark.imgproc
1603         def test_markdown_inline_image_processor_alt_to_desc(self, const1):
1604             imgpath = "file:///path/to/image.png"
1605             text = f"![{const1}]({imgpath})"
1606             text, html, images, mdwn = markdown_with_inline_image_support(text)
1607             assert images[imgpath].desc == const1
1608
1609         @pytest.mark.imgproc
1610         def test_markdown_inline_image_processor_title_over_alt_desc(
1611             self, const1, const2
1612         ):
1613             imgpath = "file:///path/to/image.png"
1614             text = f'![{const1}]({imgpath} "{const2}")'
1615             text, html, images, mdwn = markdown_with_inline_image_support(text)
1616             assert images[imgpath].desc == const2
1617
1618         @pytest.mark.imgproc
1619         def test_markdown_inline_image_not_external(self):
1620             imgpath = "https://path/to/image.png"
1621             text = f"![inline image]({imgpath})"
1622             text, html, images, mdwn = markdown_with_inline_image_support(text)
1623
1624             assert 'src="cid:' not in html
1625             assert "](cid:" not in text
1626             assert len(images) == 0
1627
1628         @pytest.mark.imgproc
1629         def test_markdown_inline_image_local_file(self):
1630             imgpath = "/path/to/image.png"
1631             text = f"![inline image]({imgpath})"
1632             text, html, images, mdwn = markdown_with_inline_image_support(text)
1633
1634             for k, v in images.items():
1635                 assert k == f"file://{imgpath}"
1636                 break
1637
1638         @pytest.mark.imgproc
1639         def test_markdown_inline_image_expanduser(self):
1640             imgpath = pathlib.Path("~/image.png")
1641             text = f"![inline image]({imgpath})"
1642             text, html, images, mdwn = markdown_with_inline_image_support(text)
1643
1644             for k, v in images.items():
1645                 assert k == f"file://{imgpath.expanduser()}"
1646                 break
1647
1648         @pytest.fixture
1649         def test_png(self):
1650             return (
1651                 "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAE"
1652                 "AAAABCAAAAAA6fptVAAAACklEQVQI12P4DwABAQEAG7buVgAA"
1653             )
1654
1655         @pytest.mark.imgproc
1656         def test_markdown_inline_image_processor_base64(self, test_png):
1657             text = f"![1px white inlined]({test_png})"
1658             text, html, images, mdwn = markdown_with_inline_image_support(text)
1659
1660             assert 'src="cid:' in html
1661             assert "](cid:" in text
1662             assert len(images) == 1
1663             assert test_png in images
1664
1665         @pytest.mark.converter
1666         def test_converter_tree_inline_image_base64(
1667             self, test_png, fakefilefactory
1668         ):
1669             text = f"![inline base64 image]({test_png})"
1670             with fakefilefactory(content=text) as draft_f:
1671                 tree = convert_markdown_to_html(
1672                     draft_f,
1673                     filefactory=fakefilefactory,
1674                     related_to_html_only=False,
1675                 )
1676             assert tree.subtype == "relative"
1677             assert tree.children[0].subtype == "alternative"
1678             assert tree.children[1].subtype == "png"
1679             written = fakefilefactory.pop()
1680             assert tree.children[1].path == written[0]
1681             assert b"PNG" in written[1].read()
1682
1683         @pytest.mark.converter
1684         def test_converter_tree_inline_image_base64_related_to_html(
1685             self, test_png, fakefilefactory
1686         ):
1687             text = f"![inline base64 image]({test_png})"
1688             with fakefilefactory(content=text) as draft_f:
1689                 tree = convert_markdown_to_html(
1690                     draft_f,
1691                     filefactory=fakefilefactory,
1692                     related_to_html_only=True,
1693                 )
1694             assert tree.subtype == "alternative"
1695             assert tree.children[1].subtype == "relative"
1696             assert tree.children[1].children[1].subtype == "png"
1697             written = fakefilefactory.pop()
1698             assert tree.children[1].children[1].path == written[0]
1699             assert b"PNG" in written[1].read()
1700
1701         @pytest.mark.converter
1702         def test_converter_tree_inline_image_cid(
1703             self, const1, fakefilefactory
1704         ):
1705             text = f"![inline base64 image](cid:{const1})"
1706             with fakefilefactory(content=text) as draft_f:
1707                 tree = convert_markdown_to_html(
1708                     draft_f,
1709                     filefactory=fakefilefactory,
1710                     related_to_html_only=False,
1711                 )
1712             assert len(tree.children) == 2
1713             assert tree.children[0].cid != const1
1714             assert tree.children[0].type != "image"
1715             assert tree.children[1].cid != const1
1716             assert tree.children[1].type != "image"
1717
1718         @pytest.fixture
1719         def fakefilefactory(self):
1720             return FakeFileFactory()
1721
1722         @pytest.mark.imgcoll
1723         def test_inline_image_collection(
1724             self, test_png, const1, const2, fakefilefactory
1725         ):
1726             test_images = {test_png: InlineImageInfo(cid=const1, desc=const2)}
1727             relparts = collect_inline_images(
1728                 test_images, filefactory=fakefilefactory
1729             )
1730
1731             written = fakefilefactory.pop()
1732             assert b"PNG" in written[1].read()
1733
1734             assert relparts[0].subtype == "png"
1735             assert relparts[0].path == written[0]
1736             assert relparts[0].cid == const1
1737             assert const2 in relparts[0].desc
1738
1739         if _PYNLINER:
1740
1741             @pytest.mark.styling
1742             def test_apply_stylesheet(self):
1743                 html = "<p>Hello, world!</p>"
1744                 css = "p { color:red }"
1745                 out = apply_styling(html, css)
1746                 assert 'p style="color' in out
1747
1748             @pytest.mark.styling
1749             def test_apply_no_stylesheet(self, const1):
1750                 out = apply_styling(const1, None)
1751
1752             @pytest.mark.massage
1753             @pytest.mark.styling
1754             def test_massage_styling_to_converter(self):
1755                 css = "p { color:red }"
1756                 css_f = File(content=css)
1757                 css_applied = []
1758
1759                 def converter(draft_f, css_f, **kwargs):
1760                     css = css_f.read()
1761                     css_applied.append(css)
1762                     return Part("text", "plain", draft_f.path, orig=True)
1763
1764                 do_massage(
1765                     draft_f=File(),
1766                     cmd_f=File(),
1767                     css_f=css_f,
1768                     converter=converter,
1769                 )
1770                 assert css_applied[0] == css
1771
1772             @pytest.mark.converter
1773             @pytest.mark.styling
1774             def test_converter_apply_styles(
1775                 self, const1, monkeypatch, fakepath, fakefilefactory
1776             ):
1777                 css = "p { color:red }"
1778                 with (
1779                     monkeypatch.context() as m,
1780                     fakefilefactory(fakepath, content=const1) as draft_f,
1781                     fakefilefactory(content=css) as css_f,
1782                 ):
1783                     m.setattr(
1784                         markdown.Markdown,
1785                         "convert",
1786                         lambda s, t: f"<p>{t}</p>",
1787                     )
1788                     convert_markdown_to_html(
1789                         draft_f, css_f=css_f, filefactory=fakefilefactory
1790                     )
1791                 assert re.search(
1792                     r"color:.*red",
1793                     fakefilefactory[fakepath.with_suffix(".html")].read(),
1794                 )
1795
1796         if _PYGMENTS_CSS:
1797
1798             @pytest.mark.styling
1799             def test_apply_stylesheet_pygments(self):
1800                 html = (
1801                     f'<div class="{_CODEHILITE_CLASS}">'
1802                     "<pre>def foo():\n    return</pre></div>"
1803                 )
1804                 out = apply_styling(html, _PYGMENTS_CSS)
1805                 assert f'{_CODEHILITE_CLASS}" style="' in out
1806
1807         @pytest.mark.sig
1808         def test_signature_extraction_no_signature(self, const1):
1809             assert (const1, None, None) == extract_signature(const1)
1810
1811         @pytest.mark.sig
1812         def test_signature_extraction_just_text(self, const1, const2):
1813             origtext, textsig, htmlsig = extract_signature(
1814                 f"{const1}{EMAIL_SIG_SEP}{const2}"
1815             )
1816             assert origtext == const1
1817             assert textsig == const2
1818             assert htmlsig is None
1819
1820         @pytest.mark.sig
1821         def test_signature_extraction_html(
1822             self, fakepath, fakefilefactory, const1, const2
1823         ):
1824             sigconst = "HTML signature from {path} but as a string"
1825             sig = f'<div id="signature">{sigconst.format(path=fakepath)}</div>'
1826
1827             sig_f = fakefilefactory(fakepath, content=sig)
1828
1829             origtext, textsig, htmlsig = extract_signature(
1830                 f"{const1}{EMAIL_SIG_SEP}{HTML_SIG_MARKER} {fakepath}\n{const2}",
1831                 filefactory=fakefilefactory,
1832             )
1833             assert origtext == const1
1834             assert textsig == const2
1835             assert htmlsig == sigconst.format(path=fakepath)
1836
1837         @pytest.mark.sig
1838         def test_signature_extraction_file_not_found(self, const1):
1839             path = pathlib.Path("/does/not/exist")
1840             with pytest.raises(FileNotFoundError):
1841                 origtext, textsig, htmlsig = extract_signature(
1842                     f"{const1}{EMAIL_SIG_SEP}{HTML_SIG_MARKER}{path}\n{const1}"
1843                 )
1844
1845         @pytest.mark.imgproc
1846         def test_image_registry(self, const1):
1847             reg = ImageRegistry()
1848             cid = reg.register(const1)
1849             assert "@" in cid
1850             assert not cid.startswith("<")
1851             assert not cid.endswith(">")
1852             assert const1 in reg
1853
1854         @pytest.mark.imgproc
1855         def test_image_registry_file_uri(self, const1):
1856             reg = ImageRegistry()
1857             reg.register("/some/path")
1858             for path in reg:
1859                 assert path.startswith("file://")
1860                 break
1861
1862         @pytest.mark.converter
1863         @pytest.mark.sig
1864         def test_converter_signature_handling(
1865             self, fakepath, fakefilefactory, monkeypatch
1866         ):
1867             mailparts = (
1868                 "This is the mail body\n",
1869                 f"{EMAIL_SIG_SEP}",
1870                 "This is a plain-text signature only",
1871             )
1872
1873             with (
1874                 fakefilefactory(
1875                     fakepath, content="".join(mailparts)
1876                 ) as draft_f,
1877                 monkeypatch.context() as m,
1878             ):
1879                 m.setattr(markdown.Markdown, "convert", lambda s, t: t)
1880                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
1881
1882             soup = bs4.BeautifulSoup(
1883                 fakefilefactory[fakepath.with_suffix(".html")].read(),
1884                 "html.parser",
1885             )
1886             body = soup.body.contents
1887
1888             assert mailparts[0] in body.pop(0)
1889
1890             sig = soup.select_one("#signature")
1891             assert sig == body.pop(0)
1892
1893             sep = sig.select_one("span.sig_separator")
1894             assert sep == sig.contents[0]
1895             assert f"\n{sep.text}\n" == EMAIL_SIG_SEP
1896
1897             assert mailparts[2] in sig.contents[1]
1898
1899         @pytest.mark.converter
1900         @pytest.mark.sig
1901         def test_converter_signature_handling_htmlsig(
1902             self, fakepath, fakepath2, fakefilefactory, monkeypatch
1903         ):
1904             mailparts = (
1905                 "This is the mail body",
1906                 f"{EMAIL_SIG_SEP}",
1907                 f"{HTML_SIG_MARKER}{fakepath2}\n",
1908                 "This is the plain-text version",
1909             )
1910             htmlsig = "HTML Signature from {path} but as a string"
1911             html = (
1912                 f'<div id="signature"><p>{htmlsig.format(path=fakepath2)}</p></div>'
1913             )
1914
1915             sig_f = fakefilefactory(fakepath2, content=html)
1916
1917             def mdwn_fn(t):
1918                 return t.upper()
1919
1920             with (
1921                 fakefilefactory(
1922                     fakepath, content="".join(mailparts)
1923                 ) as draft_f,
1924                 monkeypatch.context() as m,
1925             ):
1926                 m.setattr(
1927                     markdown.Markdown, "convert", lambda s, t: mdwn_fn(t)
1928                 )
1929                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
1930
1931             soup = bs4.BeautifulSoup(
1932                 fakefilefactory[fakepath.with_suffix(".html")].read(),
1933                 "html.parser",
1934             )
1935             sig = soup.select_one("#signature")
1936             sig.span.extract()
1937
1938             assert HTML_SIG_MARKER not in sig.text
1939             assert htmlsig.format(path=fakepath2) == sig.text.strip()
1940
1941             plaintext = fakefilefactory[fakepath].read()
1942             assert plaintext.endswith(EMAIL_SIG_SEP + mailparts[-1])
1943
1944         @pytest.mark.converter
1945         @pytest.mark.sig
1946         def test_converter_signature_handling_htmlsig_with_image(
1947             self, fakepath, fakepath2, fakefilefactory, monkeypatch, test_png
1948         ):
1949             mailparts = (
1950                 "This is the mail body",
1951                 f"{EMAIL_SIG_SEP}",
1952                 f"{HTML_SIG_MARKER}{fakepath2}\n",
1953                 "This is the plain-text version",
1954             )
1955             htmlsig = (
1956                 "HTML Signature from {path} with image\n"
1957                 f'<img src="{test_png}">\n'
1958             )
1959             html = (
1960                 f'<div id="signature">{htmlsig.format(path=fakepath2)}</div>'
1961             )
1962
1963             sig_f = fakefilefactory(fakepath2, content=html)
1964
1965             def mdwn_fn(t):
1966                 return t.upper()
1967
1968             with (
1969                 fakefilefactory(
1970                     fakepath, content="".join(mailparts)
1971                 ) as draft_f,
1972                 monkeypatch.context() as m,
1973             ):
1974                 m.setattr(
1975                     markdown.Markdown, "convert", lambda s, t: mdwn_fn(t)
1976                 )
1977                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
1978
1979             assert fakefilefactory.pop()[0].suffix == ".png"
1980
1981             soup = bs4.BeautifulSoup(
1982                 fakefilefactory[fakepath.with_suffix(".html")].read(),
1983                 "html.parser",
1984             )
1985             assert soup.img.attrs["src"].startswith("cid:")
1986
1987         @pytest.mark.converter
1988         @pytest.mark.sig
1989         def test_converter_signature_handling_textsig_with_image(
1990             self, fakepath, fakefilefactory, test_png
1991         ):
1992             mailparts = (
1993                 "This is the mail body",
1994                 f"{EMAIL_SIG_SEP}",
1995                 "This is the plain-text version with image\n",
1996                 f"![Inline]({test_png})",
1997             )
1998             with (
1999                 fakefilefactory(
2000                     fakepath, content="".join(mailparts)
2001                 ) as draft_f,
2002             ):
2003                 tree = convert_markdown_to_html(
2004                     draft_f, filefactory=fakefilefactory
2005                 )
2006
2007             assert tree.subtype == "relative"
2008             assert tree.children[0].subtype == "alternative"
2009             assert tree.children[1].subtype == "png"
2010             written = fakefilefactory.pop()
2011             assert tree.children[1].path == written[0]
2012             assert written[1].read() == request.urlopen(test_png).read()
2013
2014         @pytest.mark.converter
2015         def test_converter_attribution_to_admonition(
2016             self, fakepath, fakefilefactory
2017         ):
2018             mailparts = (
2019                 "Regarding whatever",
2020                 "> blockquote line1",
2021                 "> blockquote line2",
2022                 "> ",
2023                 "> new para with **bold** text",
2024             )
2025             with fakefilefactory(
2026                 fakepath, content="\n".join(mailparts)
2027             ) as draft_f:
2028                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
2029
2030             soup = bs4.BeautifulSoup(
2031                 fakefilefactory[fakepath.with_suffix(".html")].read(),
2032                 "html.parser",
2033             )
2034             quote = soup.select_one("div.admonition.quote")
2035             assert quote
2036             assert (
2037                 soup.select_one("p.admonition-title").extract().text.strip()
2038                 == mailparts[0]
2039             )
2040
2041             p = quote.p.extract()
2042             assert p.text.strip() == "\n".join(p[2:] for p in mailparts[1:3])
2043
2044             p = quote.p.extract()
2045             assert p.contents[1].name == "strong"
2046
2047         @pytest.mark.converter
2048         def test_converter_attribution_to_admonition_multiple(
2049             self, fakepath, fakefilefactory
2050         ):
2051             mailparts = (
2052                 "Regarding whatever",
2053                 "> blockquote line1",
2054                 "> blockquote line2",
2055                 "",
2056                 "Normal text",
2057                 "",
2058                 "> continued emailquote",
2059                 "",
2060                 "Another email-quote",
2061                 "> something",
2062             )
2063             with fakefilefactory(
2064                 fakepath, content="\n".join(mailparts)
2065             ) as draft_f:
2066                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
2067
2068             soup = bs4.BeautifulSoup(
2069                 fakefilefactory[fakepath.with_suffix(".html")].read(),
2070                 "html.parser",
2071             )
2072             quote = soup.select_one("div.admonition.quote.continued").extract()
2073             assert quote
2074             assert (
2075                 quote.select_one("p.admonition-title").extract().text.strip()
2076                 == mailparts[0]
2077             )
2078
2079             p = quote.p.extract()
2080             assert p
2081
2082             quote = soup.select_one("div.admonition.quote.continued").extract()
2083             assert quote
2084             assert (
2085                 quote.select_one("p.admonition-title").extract().text.strip()
2086                 == mailparts[-2]
2087             )
2088
2089         @pytest.mark.fileio
2090         def test_file_class_contextmanager(self, const1, monkeypatch):
2091             state = dict(o=False, c=False)
2092
2093             def fn(t):
2094                 state[t] = True
2095
2096             with monkeypatch.context() as m:
2097                 m.setattr(File, "open", lambda s: fn("o"))
2098                 m.setattr(File, "close", lambda s: fn("c"))
2099                 with File() as f:
2100                     assert state["o"]
2101                     assert not state["c"]
2102             assert state["c"]
2103
2104         @pytest.mark.fileio
2105         def test_file_class_no_path(self, const1):
2106             with File(mode="w+") as f:
2107                 f.write(const1, cache=False)
2108                 assert f.read(cache=False) == const1
2109
2110         @pytest.mark.fileio
2111         def test_file_class_path(self, const1, tmp_path):
2112             with File(tmp_path / "file", mode="w+") as f:
2113                 f.write(const1, cache=False)
2114                 assert f.read(cache=False) == const1
2115
2116         @pytest.mark.fileio
2117         def test_file_class_cache(self, tmp_path, const1, const2):
2118             path = tmp_path / "file"
2119             file = File(path, mode="w+")
2120             with file as f:
2121                 f.write(const1, cache=True)
2122             with open(path, mode="w") as f:
2123                 f.write(const2)
2124             with file as f:
2125                 assert f.read(cache=True) == const1
2126
2127         @pytest.mark.fileio
2128         def test_file_class_cache_init(self, const1):
2129             file = File(path=None, mode="r", content=const1)
2130             with file as f:
2131                 assert f.read() == const1
2132
2133         @pytest.mark.fileio
2134         def test_file_class_content_or_path(self, fakepath, const1):
2135             with pytest.raises(RuntimeError):
2136                 file = File(path=fakepath, content=const1)
2137
2138         @pytest.mark.fileio
2139         def test_file_class_content_needs_read(self, const1):
2140             with pytest.raises(RuntimeError):
2141                 file = File(mode="w", content=const1)
2142
2143         @pytest.mark.fileio
2144         def test_file_class_write_persists_close(self, const1):
2145             f = File(mode="w+")
2146             with f:
2147                 f.write(const1)
2148             with f:
2149                 assert f.read() == const1
2150
2151         @pytest.mark.fileio
2152         def test_file_class_write_resets_read_cache(self, const1, const2):
2153             with File(mode="w+", content=const1) as f:
2154                 assert f.read() == const1
2155                 f.write(const2)
2156                 assert f.read() == const2
2157
2158         @pytest.mark.fileio
2159         def test_file_factory(self):
2160             fact = FileFactory()
2161             f = fact()
2162             assert isinstance(f, File)
2163             assert len(fact) == 1
2164             assert f in fact
2165             assert f == fact[0]
2166
2167         @pytest.mark.fileio
2168         def test_fake_file_factory(self, fakepath, fakefilefactory):
2169             fact = FakeFileFactory()
2170             f = fakefilefactory(fakepath)
2171             assert f.path == fakepath
2172             assert f == fakefilefactory[fakepath]
2173
2174         @pytest.mark.fileio
2175         def test_fake_file_factory_path_persistence(
2176             self, fakepath, fakefilefactory
2177         ):
2178             f1 = fakefilefactory(fakepath)
2179             assert f1 == fakefilefactory(fakepath)
2180
2181 except ImportError:
2182     pass