]> git.madduck.net Git - etc/neomutt.git/blob - .config/neomutt/buildmimetree.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

buildmimetree.py: remove quickndirty file io hack
[etc/neomutt.git] / .config / neomutt / buildmimetree.py
1 #!/usr/bin/python3
2 #
3 # NeoMutt helper script to create multipart/* emails with Markdown → HTML
4 # alternative conversion, and handling of inline images, using NeoMutt's
5 # ability to manually craft MIME trees, but automating this process.
6 #
7 # Configuration:
8 #   neomuttrc (needs to be a single line):
9 #     set my_mdwn_extensions="extra,admonition,codehilite,sane_lists,smarty"
10 #     macro compose B "\
11 #       <enter-command> source '$my_confdir/buildmimetree.py \
12 #       --tempdir $tempdir --extensions $my_mdwn_extensions \
13 #       --css-file $my_confdir/htmlmail.css |'<enter>\
14 #       <enter-command> sourc e \$my_mdwn_postprocess_cmd_file<enter>\
15 #     " "Convert message into a modern MIME tree with inline images"
16 #
17 #     (Yes, we need to call source twice, as mutt only starts to process output
18 #     from a source command when the command exits, and since we need to react
19 #     to the output, we need to be invoked again, using a $my_ variable to pass
20 #     information)
21 #
22 # Requirements:
23 #   - python3
24 #   - python3-markdown
25 #   - python3-beautifulsoup4
26 # Optional:
27 #   - pytest
28 #   - Pynliner, provides --css-file and thus inline styling of HTML output
29 #   - Pygments, then syntax highlighting for fenced code is enabled
30 #
31 # Latest version:
32 #   https://git.madduck.net/etc/neomutt.git/blob_plain/HEAD:/.config/neomutt/buildmimetree.py
33 #
34 # Copyright © 2023 martin f. krafft <madduck@madduck.net>
35 # Released under the GPL-2+ licence, just like Mutt itself.
36 #
37
38 import sys
39 import os.path
40 import pathlib
41 import markdown
42 import tempfile
43 import argparse
44 import re
45 import mimetypes
46 import bs4
47 import xml.etree.ElementTree as etree
48 import io
49 import enum
50 from collections import namedtuple, OrderedDict
51 from markdown.extensions import Extension
52 from markdown.blockprocessors import BlockProcessor
53 from markdown.inlinepatterns import ImageInlineProcessor, IMAGE_LINK_RE
54 from email.utils import make_msgid
55 from urllib import request
56
57
58 def parse_cli_args(*args, **kwargs):
59     parser = argparse.ArgumentParser(
60         description=(
61             "NeoMutt helper to turn text/markdown email parts "
62             "into full-fledged MIME trees"
63         )
64     )
65     parser.epilog = (
66         "Copyright © 2023 martin f. krafft <madduck@madduck.net>.\n"
67         "Released under the MIT licence"
68     )
69
70     parser.add_argument(
71         "--extensions",
72         metavar="EXT[,EXT[,EXT]]",
73         type=str,
74         default="",
75         help="Markdown extension to use (comma-separated list)",
76     )
77
78     if _PYNLINER:
79         parser.add_argument(
80             "--css-file",
81             metavar="FILE",
82             type=pathlib.Path,
83             default=os.devnull,
84             help="CSS file to merge with the final HTML",
85         )
86     else:
87         parser.set_defaults(css_file=None)
88
89     parser.add_argument(
90         "--related-to-html-only",
91         action="store_true",
92         help="Make related content be sibling to HTML parts only",
93     )
94
95     def positive_integer(value):
96         try:
97             if int(value) > 0:
98                 return int(value)
99
100         except ValueError:
101             pass
102
103         raise ValueError("Must be a positive integer")
104
105     parser.add_argument(
106         "--max-number-other-attachments",
107         metavar="INTEGER",
108         type=positive_integer,
109         default=20,
110         help="Maximum number of other attachments to expect",
111     )
112
113     parser.add_argument(
114         "--only-build",
115         "--just-build",
116         action="store_true",
117         help="Only build, don't send the message",
118     )
119
120     parser.add_argument(
121         "--tempdir",
122         metavar="DIR",
123         type=pathlib.Path,
124         help="Specify temporary directory to use for attachments",
125     )
126
127     parser.add_argument(
128         "--debug-commands",
129         action="store_true",
130         help="Turn on debug logging of commands generated to stderr",
131     )
132
133     parser.add_argument(
134         "--debug-walk",
135         action="store_true",
136         help="Turn on debugging to stderr of the MIME tree walk",
137     )
138
139     parser.add_argument(
140         "--dump-html",
141         metavar="FILE",
142         type=pathlib.Path,
143         help="Write the generated HTML to the file",
144     )
145
146     subp = parser.add_subparsers(help="Sub-command parsers", dest="mode")
147     massage_p = subp.add_parser(
148         "massage", help="Massaging phase (internal use)"
149     )
150
151     massage_p.add_argument(
152         "--write-commands-to",
153         "-o",
154         metavar="FILE",
155         dest="cmdpath",
156         type=pathlib.Path,
157         required=True,
158         help="Temporary file path to write commands to",
159     )
160
161     massage_p.add_argument(
162         "MAILDRAFT",
163         nargs="?",
164         type=pathlib.Path,
165         help="If provided, the script is invoked as editor on the mail draft",
166     )
167
168     return parser.parse_args(*args, **kwargs)
169
170
171 # [ FILE I/O HANDLING ] #######################################################
172
173
174 class File:
175
176     class Op(enum.Enum):
177         R = enum.auto()
178         W = enum.auto()
179
180     def __init__(self, path=None, mode="r", content=None, **kwargs):
181         if path:
182             if content:
183                 raise RuntimeError("Cannot specify path and content for File")
184
185             self._path = (
186                 path if isinstance(path, pathlib.Path) else pathlib.Path(path)
187             )
188         else:
189             self._path = None
190
191         if content and not re.search(r"[r+]", mode):
192             raise RuntimeError("Cannot specify content without read mode")
193
194         self._cache = {
195             File.Op.R: [content] if content else [],
196             File.Op.W: []
197         }
198         self._lastop = None
199         self._mode = mode
200         self._kwargs = kwargs
201         self._file = None
202
203     def open(self):
204         if self._path:
205             self._file = open(self._path, self._mode, **self._kwargs)
206         elif "b" in self._mode:
207             self._file = io.BytesIO()
208         else:
209             self._file = io.StringIO()
210
211     def __enter__(self):
212         self.open()
213         return self
214
215     def __exit__(self, exc_type, exc_val, exc_tb):
216         self.close()
217
218     def close(self):
219         self._file.close()
220         self._file = None
221         self._cache[File.Op.R] = self._cache[File.Op.W]
222         self._lastop = None
223
224     def _get_cache(self, op):
225         return (b"" if "b" in self._mode else "").join(self._cache[op])
226
227     def _add_to_cache(self, op, s):
228         self._cache[op].append(s)
229
230     def read(self, *, cache=True):
231         if cache and self._cache[File.Op.R]:
232             return self._get_cache(File.Op.R)
233
234         if self._lastop == File.Op.W:
235             try:
236                 self._file.seek(0)
237             except io.UnsupportedOperation:
238                 pass
239
240         self._lastop = File.Op.R
241
242         if cache:
243             self._add_to_cache(File.Op.R, self._file.read())
244             return self._get_cache(File.Op.R)
245         else:
246             return self._file.read()
247
248     def write(self, s, *, cache=True):
249
250         if self._lastop == File.Op.R:
251             try:
252                 self._file.seek(0)
253             except io.UnsupportedOperation:
254                 pass
255
256         if cache:
257             self._add_to_cache(File.Op.W, s)
258
259         self._cache[File.Op.R] = self._cache[File.Op.W]
260
261         written = self._file.write(s)
262         self._file.flush()
263         self._lastop = File.Op.W
264         return written
265
266     path = property(lambda s: s._path)
267
268     def __repr__(self):
269         return (
270             f'<File path={self._path or "(buffered)"} open={bool(self._file)} '
271             f"rcache={sum(len(c) for c in self._rcache) if self._rcache is not None else False} "
272             f"wcache={sum(len(c) for c in self._wcache) if self._wcache is not None else False}>"
273         )
274
275
276 class FileFactory:
277     def __init__(self):
278         self._files = []
279
280     def __call__(self, path=None, mode="r", content=None, **kwargs):
281         f = File(path, mode, content, **kwargs)
282         self._files.append(f)
283         return f
284
285     def __len__(self):
286         return self._files.__len__()
287
288     def pop(self, idx=-1):
289         return self._files.pop(idx)
290
291     def __getitem__(self, idx):
292         return self._files.__getitem__(idx)
293
294     def __contains__(self, f):
295         return self._files.__contains__(f)
296
297
298 class FakeFileFactory(FileFactory):
299     def __init__(self):
300         super().__init__()
301         self._paths2files = OrderedDict()
302
303     def __call__(self, path=None, mode="r", content=None, **kwargs):
304         if path in self._paths2files:
305             return self._paths2files[path]
306
307         f = super().__call__(None, mode, content, **kwargs)
308         self._paths2files[path] = f
309
310         mypath = path
311
312         class FakeFile(File):
313             path = mypath
314
315         # this is quality Python! We do this so that the fake file, which has
316         # no path, fake-pretends to have a path for testing purposes.
317
318         f.__class__ = FakeFile
319         return f
320
321     def __getitem__(self, path):
322         return self._paths2files.__getitem__(path)
323
324     def get(self, path, default):
325         return self._paths2files.get(path, default)
326
327     def pop(self, last=True):
328         return self._paths2files.popitem(last)
329
330     def __repr__(self):
331         return (
332             f"<FakeFileFactory nfiles={len(self._files)} "
333             f"paths={len(self._paths2files)}>"
334         )
335
336
337 # [ IMAGE HANDLING ] ##########################################################
338
339
340 InlineImageInfo = namedtuple(
341     "InlineImageInfo", ["cid", "desc"], defaults=[None]
342 )
343
344
345 class ImageRegistry:
346     def __init__(self):
347         self._images = OrderedDict()
348
349     def register(self, path, description=None):
350         # path = str(pathlib.Path(path).expanduser())
351         path = os.path.expanduser(path)
352         if path.startswith("/"):
353             path = f"file://{path}"
354         cid = make_msgid()[1:-1]
355         self._images[path] = InlineImageInfo(cid, description)
356         return cid
357
358     def __iter__(self):
359         return self._images.__iter__()
360
361     def __getitem__(self, idx):
362         return self._images.__getitem__(idx)
363
364     def __len__(self):
365         return self._images.__len__()
366
367     def items(self):
368         return self._images.items()
369
370     def __repr__(self):
371         return f"<ImageRegistry(items={len(self._images)})>"
372
373     def __str__(self):
374         return self._images.__str__()
375
376
377 class InlineImageExtension(Extension):
378     class RelatedImageInlineProcessor(ImageInlineProcessor):
379         def __init__(self, re, md, registry):
380             super().__init__(re, md)
381             self._registry = registry
382
383         def handleMatch(self, m, data):
384             el, start, end = super().handleMatch(m, data)
385             if "src" in el.attrib:
386                 src = el.attrib["src"]
387                 if "://" not in src or src.startswith("file://"):
388                     # We only inline local content
389                     cid = self._registry.register(
390                         el.attrib["src"],
391                         el.attrib.get("title", el.attrib.get("alt")),
392                     )
393                     el.attrib["src"] = f"cid:{cid}"
394             return el, start, end
395
396     def __init__(self, registry):
397         super().__init__()
398         self._image_registry = registry
399
400     INLINE_PATTERN_NAME = "image_link"
401
402     def extendMarkdown(self, md):
403         md.registerExtension(self)
404         inline_image_proc = self.RelatedImageInlineProcessor(
405             IMAGE_LINK_RE, md, self._image_registry
406         )
407         md.inlinePatterns.register(
408             inline_image_proc, InlineImageExtension.INLINE_PATTERN_NAME, 150
409         )
410
411
412 def markdown_with_inline_image_support(
413     text,
414     *,
415     mdwn=None,
416     image_registry=None,
417     extensions=None,
418     extension_configs=None,
419 ):
420     registry = (
421         image_registry if image_registry is not None else ImageRegistry()
422     )
423     inline_image_handler = InlineImageExtension(registry=registry)
424     extensions = extensions or []
425     extensions.append(inline_image_handler)
426     mdwn = markdown.Markdown(
427         extensions=extensions, extension_configs=extension_configs
428     )
429
430     htmltext = mdwn.convert(text)
431
432     def replace_image_with_cid(matchobj):
433         for m in (matchobj.group(1), f"file://{matchobj.group(1)}"):
434             if m in registry:
435                 return f"(cid:{registry[m].cid}"
436         return matchobj.group(0)
437
438     text = re.sub(r"\(([^)\s]+)", replace_image_with_cid, text)
439     return text, htmltext, registry, mdwn
440
441
442 # [ CSS STYLING ] #############################################################
443
444
445 try:
446     import pynliner
447
448     _PYNLINER = True
449
450 except ImportError:
451     _PYNLINER = False
452
453 try:
454     from pygments.formatters import get_formatter_by_name
455
456     _CODEHILITE_CLASS = "codehilite"
457
458     _PYGMENTS_CSS = get_formatter_by_name(
459         "html", style="default"
460     ).get_style_defs(f".{_CODEHILITE_CLASS}")
461
462 except ImportError:
463     _PYGMENTS_CSS = None
464
465
466 def apply_styling(html, css):
467     return (
468         pynliner.Pynliner()
469         .from_string(html)
470         .with_cssString("\n".join(s for s in [_PYGMENTS_CSS, css] if s))
471         .run()
472     )
473
474
475 # [ QUOTE HANDLING ] ##########################################################
476
477
478 class QuoteToAdmonitionExtension(Extension):
479     class EmailQuoteBlockProcessor(BlockProcessor):
480         RE = re.compile(r"(?:^|\n)>\s*(.*)")
481
482         def __init__(self, parser):
483             super().__init__(parser)
484             self._title = None
485
486         def test(self, parent, blocks):
487             if markdown.util.nearing_recursion_limit():
488                 return False
489
490             lines = blocks.splitlines()
491             if len(lines) < 2:
492                 if not self._title:
493                     return False
494
495                 elif not self.RE.search(lines[0]):
496                     return False
497
498                 return len(lines) > 0
499
500             elif not self.RE.search(lines[0]) and self.RE.search(lines[1]):
501                 return True
502
503             elif self._title and self.RE.search(lines[1]):
504                 return True
505
506             return False
507
508         def run(self, parent, blocks):
509             quotelines = blocks.pop(0).splitlines()
510
511             cont = bool(self._title)
512             if not self.RE.search(quotelines[0]):
513                 self._title = quotelines.pop(0)
514
515             admonition = etree.SubElement(parent, "div")
516             admonition.set(
517                 "class", f"admonition quote{' continued' if cont else ''}"
518             )
519             self.parser.parseChunk(admonition, self._title)
520
521             admonition[0].set("class", "admonition-title")
522             self.parser.parseChunk(
523                 admonition, "\n".join(self.clean(line) for line in quotelines)
524             )
525
526         @classmethod
527         def clean(klass, line):
528             m = klass.RE.match(line)
529             return m.group(1) if m else line
530
531     def extendMarkdown(self, md):
532         md.registerExtension(self)
533         email_quote_proc = self.EmailQuoteBlockProcessor(md.parser)
534         md.parser.blockprocessors.register(email_quote_proc, "emailquote", 25)
535
536
537 # [ PARTS GENERATION ] ########################################################
538
539
540 class Part(
541     namedtuple(
542         "Part",
543         ["type", "subtype", "path", "desc", "cid", "orig"],
544         defaults=[None, None, False],
545     )
546 ):
547     def __str__(self):
548         ret = f"<{self.type}/{self.subtype}>"
549         if self.cid:
550             ret = f"{ret} cid:{self.cid}"
551         if self.orig:
552             ret = f"{ret} ORIGINAL"
553         return ret
554
555
556 class Multipart(
557     namedtuple("Multipart", ["subtype", "children", "desc"], defaults=[None])
558 ):
559     def __str__(self):
560         return f"<multipart/{self.subtype}> children={len(self.children)}"
561
562     def __hash__(self):
563         return hash(str(self.subtype) + "".join(str(self.children)))
564
565
566 def collect_inline_images(
567     image_registry, *, tempdir=None, filefactory=FileFactory()
568 ):
569     relparts = []
570     for path, info in image_registry.items():
571         if path.startswith("cid:"):
572             continue
573
574         data = request.urlopen(path)
575
576         mimetype = data.headers["Content-Type"]
577         ext = mimetypes.guess_extension(mimetype)
578         tempfilename = tempfile.mkstemp(prefix="img", suffix=ext, dir=tempdir)
579         path = pathlib.Path(tempfilename[1])
580
581         with filefactory(path, "w+b") as out_f:
582             out_f.write(data.read())
583
584         # filewriter_fn(path, data.read(), "w+b")
585
586         desc = (
587             f'Inline image: "{info.desc}"'
588             if info.desc
589             else f"Inline image {str(len(relparts)+1)}"
590         )
591         relparts.append(
592             Part(*mimetype.split("/"), path, cid=info.cid, desc=desc)
593         )
594
595     return relparts
596
597
598 EMAIL_SIG_SEP = "\n-- \n"
599 HTML_SIG_MARKER = "=htmlsig "
600
601
602 def make_html_doc(body, sig=None):
603     ret = (
604         "<!DOCTYPE html>\n"
605         "<html>\n"
606         "<head>\n"
607         '<meta http-equiv="content-type" content="text/html; charset=UTF-8">\n'  # noqa: E501
608         '<meta name="viewport" content="width=device-width, initial-scale=1.0">\n'  # noqa: E501
609         "</head>\n"
610         "<body>\n"
611         f"{body}\n"
612     )
613
614     if sig:
615         nl = "\n"
616         ret = (
617             f'{ret}<div id="signature"><span class="sig_separator">{EMAIL_SIG_SEP.strip(nl)}</span>\n'  # noqa: E501
618             f"{sig}\n"
619             "</div>"
620         )
621
622     return f"{ret}\n  </body>\n</html>"
623
624
625 def make_text_mail(text, sig=None):
626     return EMAIL_SIG_SEP.join((text, sig)) if sig else text
627
628
629 def extract_signature(text, *, filefactory=FileFactory()):
630     parts = text.split(EMAIL_SIG_SEP, 1)
631     if len(parts) == 1:
632         return text, None, None
633
634     lines = parts[1].splitlines()
635     if lines[0].startswith(HTML_SIG_MARKER):
636         path = pathlib.Path(re.split(r" +", lines.pop(0), maxsplit=1)[1])
637         textsig = "\n".join(lines)
638
639         with filefactory(path.expanduser()) as sig_f:
640             sig_input = sig_f.read()
641
642         soup = bs4.BeautifulSoup(sig_input, "html.parser")
643
644         style = str(soup.style.extract()) if soup.style else ""
645         for sig_selector in (
646             "#signature",
647             "#signatur",
648             "#emailsig",
649             ".signature",
650             ".signatur",
651             ".emailsig",
652             "body",
653             "div",
654         ):
655             sig = soup.select_one(sig_selector)
656             if sig:
657                 break
658
659         if not sig:
660             return parts[0], textsig, style + sig_input
661
662         if sig.attrs.get("id") == "signature":
663             sig = "".join(str(c) for c in sig.children)
664
665         return parts[0], textsig, style + str(sig)
666
667     return parts[0], parts[1], None
668
669
670 def convert_markdown_to_html(
671     draft_f,
672     *,
673     related_to_html_only=False,
674     css_f=None,
675     htmldump_f=None,
676     filefactory=FileFactory(),
677     tempdir=None,
678     extensions=None,
679     extension_configs=None,
680 ):
681     # TODO extension_configs need to be handled differently
682     extension_configs = extension_configs or {}
683     extension_configs.setdefault("pymdownx.highlight", {})[
684         "css_class"
685     ] = _CODEHILITE_CLASS
686
687     extensions = extensions or []
688     extensions.append(QuoteToAdmonitionExtension())
689
690     draft = draft_f.read()
691     origtext, textsig, htmlsig = extract_signature(
692         draft, filefactory=filefactory
693     )
694
695     (
696         origtext,
697         htmltext,
698         image_registry,
699         mdwn,
700     ) = markdown_with_inline_image_support(
701         origtext, extensions=extensions, extension_configs=extension_configs
702     )
703
704     if htmlsig:
705         if not textsig:
706             # TODO: decide what to do if there is no plain-text version
707             raise NotImplementedError("HTML signature but no text alternative")
708
709         soup = bs4.BeautifulSoup(htmlsig, "html.parser")
710         for img in soup.find_all("img"):
711             uri = img.attrs["src"]
712             desc = img.attrs.get("title", img.attrs.get("alt"))
713             cid = image_registry.register(uri, desc)
714             img.attrs["src"] = f"cid:{cid}"
715
716         htmlsig = str(soup)
717
718     elif textsig:
719         (
720             textsig,
721             htmlsig,
722             image_registry,
723             mdwn,
724         ) = markdown_with_inline_image_support(
725             textsig,
726             extensions=extensions,
727             extension_configs=extension_configs,
728             image_registry=image_registry,
729             mdwn=mdwn,
730         )
731
732     origtext = make_text_mail(origtext, textsig)
733     draft_f.write(origtext)
734     textpart = Part(
735         "text", "plain", draft_f.path, "Plain-text version", orig=True
736     )
737
738     htmltext = make_html_doc(htmltext, htmlsig)
739     htmltext = apply_styling(htmltext, css_f.read() if css_f else None)
740
741     if draft_f.path:
742         htmlpath = draft_f.path.with_suffix(".html")
743     else:
744         htmlpath = pathlib.Path(
745             tempfile.mkstemp(suffix=".html", dir=tempdir)[1]
746         )
747     with filefactory(
748         htmlpath, "w", encoding="utf-8", errors="xmlcharrefreplace"
749     ) as out_f:
750         out_f.write(htmltext)
751     htmlpart = Part("text", "html", htmlpath, "HTML version")
752
753     if htmldump_f:
754         htmldump_f.write(htmltext)
755
756     imgparts = collect_inline_images(
757         image_registry, tempdir=tempdir, filefactory=filefactory
758     )
759
760     if related_to_html_only:
761         # If there are inline image part, they will be contained within a
762         # multipart/related part along with the HTML part only
763         if imgparts:
764             # replace htmlpart with a multipart/related container of the HTML
765             # parts and the images
766             htmlpart = Multipart(
767                 "relative", [htmlpart] + imgparts, "Group of related content"
768             )
769
770         return Multipart(
771             "alternative", [textpart, htmlpart], "Group of alternative content"
772         )
773
774     else:
775         # If there are inline image part, they will be siblings to the
776         # multipart/alternative tree within a multipart/related part
777         altpart = Multipart(
778             "alternative", [textpart, htmlpart], "Group of alternative content"
779         )
780         if imgparts:
781             return Multipart(
782                 "relative", [altpart] + imgparts, "Group of related content"
783             )
784         else:
785             return altpart
786
787
788 class MIMETreeDFWalker:
789     def __init__(self, *, visitor_fn=None, debug=False):
790         self._visitor_fn = visitor_fn or self._echovisit
791         self._debug = debug
792
793     def _echovisit(self, node, ancestry, debugprint):
794         debugprint(f"node={node} ancestry={ancestry}")
795
796     def walk(self, root, *, visitor_fn=None):
797         """
798         Recursive function to implement a depth-dirst walk of the MIME-tree
799         rooted at `root`.
800         """
801         if isinstance(root, list):
802             if len(root) > 1:
803                 root = Multipart("mixed", children=root)
804             else:
805                 root = root[0]
806
807         self._walk(
808             root,
809             ancestry=[],
810             descendents=[],
811             visitor_fn=visitor_fn or self._visitor_fn,
812         )
813
814     def _walk(self, node, *, ancestry, descendents, visitor_fn):
815         # Let's start by enumerating the parts at the current level. At the
816         # root level, ancestry will be the empty list, and we expect a
817         # multipart/* container at this level. Later, e.g. within a
818         # mutlipart/alternative container, the subtree will just be the
819         # alternative parts, while the top of the ancestry will be the
820         # multipart/alternative container, which we will process after the
821         # following loop.
822
823         lead = f"{'│ '*len(ancestry)}"
824         if isinstance(node, Multipart):
825             self.debugprint(
826                 f"{lead}├{node} ancestry={[s.subtype for s in ancestry]}"
827             )
828
829             # Depth-first, so push the current container onto the ancestry
830             # stack, then descend …
831             ancestry.append(node)
832             self.debugprint(lead + "│ " * 2)
833             for child in node.children:
834                 self._walk(
835                     child,
836                     ancestry=ancestry,
837                     descendents=descendents,
838                     visitor_fn=visitor_fn,
839                 )
840             assert ancestry.pop() == node
841             sibling_descendents = descendents
842             descendents.extend(node.children)
843
844         else:
845             self.debugprint(f"{lead}├{node}")
846             sibling_descendents = descendents
847
848         if False and ancestry:
849             self.debugprint(lead[:-1] + " │")
850
851         if visitor_fn:
852             visitor_fn(
853                 node, ancestry, sibling_descendents, debugprint=self.debugprint
854             )
855
856     def debugprint(self, s, **kwargs):
857         if self._debug:
858             print(s, file=sys.stderr, **kwargs)
859
860
861 # [ RUN MODES ] ###############################################################
862
863
864 class MuttCommands:
865     """
866     Stupid class to interface writing out Mutt commands. This is quite a hack
867     to deal with the fact that Mutt runs "push" commands in reverse order, so
868     all of a sudden, things become very complicated when mixing with "real"
869     commands.
870
871     Hence we keep two sets of commands, and one set of pushes. Commands are
872     added to the first until a push is added, after which commands are added to
873     the second set of commands.
874
875     On flush(), the first set is printed, followed by the pushes in reverse,
876     and then the second set is printed. All 3 sets are then cleared.
877     """
878
879     def __init__(self, out_f=sys.stdout, *, debug=False):
880         self._cmd1, self._push, self._cmd2 = [], [], []
881         self._out_f = out_f
882         self._debug = debug
883
884     def cmd(self, s):
885         self.debugprint(s)
886         if self._push:
887             self._cmd2.append(s)
888         else:
889             self._cmd1.append(s)
890
891     def push(self, s):
892         s = s.replace('"', r"\"")
893         s = f'push "{s}"'
894         self.debugprint(s)
895         self._push.insert(0, s)
896
897     def flush(self):
898         print(
899             "\n".join(self._cmd1 + self._push + self._cmd2), file=self._out_f
900         )
901         self._cmd1, self._push, self._cmd2 = [], [], []
902
903     def debugprint(self, s, **kwargs):
904         if self._debug:
905             print(s, file=sys.stderr, **kwargs)
906
907
908 def do_setup(
909     *,
910     out_f=sys.stdout,
911     temppath=None,
912     tempdir=None,
913     debug_commands=False,
914 ):
915     temppath = temppath or pathlib.Path(
916         tempfile.mkstemp(prefix="muttmdwn-", dir=tempdir)[1]
917     )
918     cmds = MuttCommands(out_f, debug=debug_commands)
919
920     editor = f"{' '.join(sys.argv)} massage --write-commands-to {temppath}"
921
922     cmds.cmd('set my_editor="$editor"')
923     cmds.cmd('set my_edit_headers="$edit_headers"')
924     cmds.cmd(f'set editor="{editor}"')
925     cmds.cmd("unset edit_headers")
926     cmds.cmd(f"set my_mdwn_postprocess_cmd_file={temppath}")
927     cmds.push("<first-entry><edit-file>")
928     cmds.flush()
929
930
931 def do_massage(
932     draft_f,
933     cmd_f,
934     *,
935     extensions=None,
936     css_f=None,
937     htmldump_f=None,
938     converter=convert_markdown_to_html,
939     related_to_html_only=True,
940     only_build=False,
941     max_other_attachments=20,
942     tempdir=None,
943     debug_commands=False,
944     debug_walk=False,
945 ):
946     # Here's the big picture: we're being invoked as the editor on the email
947     # draft, and whatever commands we write to the file given as cmdpath will
948     # be run by the second source command in the macro definition.
949
950     # Let's start by cleaning up what the setup did (see above), i.e. we
951     # restore the $editor and $edit_headers variables, and also unset the
952     # variable used to identify the command file we're currently writing
953     # to.
954     cmds = MuttCommands(cmd_f, debug=debug_commands)
955
956     extensions = extensions.split(",") if extensions else []
957     tree = converter(
958         draft_f,
959         css_f=css_f,
960         htmldump_f=htmldump_f,
961         related_to_html_only=related_to_html_only,
962         tempdir=tempdir,
963         extensions=extensions,
964     )
965
966     mimetree = MIMETreeDFWalker(debug=debug_walk)
967
968     state = dict(pos=1, tags={}, parts=1)
969
970     def visitor_fn(item, ancestry, descendents, *, debugprint=None):
971         """
972         Visitor function called for every node (part) of the MIME tree,
973         depth-first, and responsible for telling NeoMutt how to assemble
974         the tree.
975         """
976         KILL_LINE = r"\Ca\Ck"
977
978         if isinstance(item, Part):
979             # We've hit a leaf-node, i.e. an alternative or a related part
980             # with actual content.
981
982             # Let's add the part
983             if item.orig:
984                 # The original source already exists in the NeoMutt tree, but
985                 # the underlying file may have been modified, so we need to
986                 # update the encoding, but that's it:
987                 cmds.push("<first-entry>")
988                 cmds.push("<update-encoding>")
989
990                 # We really just need to be able to assume that at this point,
991                 # NeoMutt is at position 1, and that we've processed only this
992                 # part so far. Nevermind about actual attachments, we can
993                 # safely ignore those as they stay at the end.
994                 assert state["pos"] == 1
995                 assert state["parts"] == 1
996             else:
997                 # … whereas all other parts need to be added, and they're all
998                 # considered to be temporary and inline:
999                 cmds.push(f"<attach-file>{item.path}<enter>")
1000                 cmds.push("<toggle-unlink><toggle-disposition>")
1001
1002                 # This added a part at the end of the list of parts, and that's
1003                 # just how many parts we've seen so far, so it's position in
1004                 # the NeoMutt compose list is the count of parts
1005                 state["parts"] += 1
1006                 state["pos"] = state["parts"]
1007
1008             # If the item (including the original) comes with additional
1009             # information, then we might just as well update the NeoMutt
1010             # tree now:
1011             if item.cid:
1012                 cmds.push(f"<edit-content-id>{KILL_LINE}{item.cid}<enter>")
1013
1014             # Now for the biggest hack in this script, which is to handle
1015             # attachments, such as PDFs, that aren't related or alternatives.
1016             # The problem is that when we add an inline image, it always gets
1017             # appended to the list, i.e. inserted *after* other attachments.
1018             # Since we don't know the number of attachments, we also cannot
1019             # infer the postition of the new attachment. Therefore, we bubble
1020             # it all the way to the top, only to then move it down again:
1021             if state["pos"] > 1:  # skip for the first part
1022                 for i in range(max_other_attachments):
1023                     # could use any number here, but has to be larger than the
1024                     # number of possible attachments. The performance
1025                     # difference of using a high number is negligible.
1026                     # Bubble up the new part
1027                     cmds.push("<move-up>")
1028
1029                 # As we push the part to the right position in the list (i.e.
1030                 # the last of the subset of attachments this script added), we
1031                 # must handle the situation that subtrees are skipped by
1032                 # NeoMutt. Hence, the actual number of positions to move down
1033                 # is decremented by the number of descendents so far
1034                 # encountered.
1035                 for i in range(1, state["pos"] - len(descendents)):
1036                     cmds.push("<move-down>")
1037
1038         elif isinstance(item, Multipart):
1039             # This node has children, but we already visited them (see
1040             # above). The tags dictionary of State should contain a list of
1041             # their positions in the NeoMutt compose window, so iterate those
1042             # and tag the parts there:
1043             n_tags = len(state["tags"][item])
1044             for tag in state["tags"][item]:
1045                 cmds.push(f"<jump>{tag}<enter><tag-entry>")
1046
1047             if item.subtype == "alternative":
1048                 cmds.push("<group-alternatives>")
1049             elif item.subtype in ("relative", "related"):
1050                 cmds.push("<group-related>")
1051             elif item.subtype == "multilingual":
1052                 cmds.push("<group-multilingual>")
1053             else:
1054                 raise NotImplementedError(
1055                     f"Handling of multipart/{item.subtype} is not implemented"
1056                 )
1057
1058             state["pos"] -= n_tags - 1
1059             state["parts"] += 1
1060
1061         else:
1062             # We should never get here
1063             raise RuntimeError(f"Type {type(item)} is unexpected: {item}")
1064
1065         # If the item has a description, we might just as well add it
1066         if item.desc:
1067             cmds.push(f"<edit-description>{KILL_LINE}{item.desc}<enter>")
1068
1069         if ancestry:
1070             # If there's an ancestry, record the current (assumed) position in
1071             # the NeoMutt compose window as needed-to-tag by our direct parent
1072             # (i.e. the last item of the ancestry)
1073             state["tags"].setdefault(ancestry[-1], []).append(state["pos"])
1074
1075             lead = "│ " * (len(ancestry) + 1) + "* "
1076             debugprint(
1077                 f"{lead}ancestry={[a.subtype for a in ancestry]}\n"
1078                 f"{lead}descendents={[d.subtype for d in descendents]}\n"
1079                 f"{lead}children_positions={state['tags'][ancestry[-1]]}\n"
1080                 f"{lead}pos={state['pos']}, parts={state['parts']}"
1081             )
1082
1083     # -----------------
1084     # End of visitor_fn
1085
1086     # Let's walk the tree and visit every node with our fancy visitor
1087     # function
1088     mimetree.walk(tree, visitor_fn=visitor_fn)
1089
1090     if not only_build:
1091         cmds.push("<send-message>")
1092
1093     # Finally, cleanup. Since we're responsible for removing the temporary
1094     # file, how's this for a little hack?
1095     try:
1096         filename = cmd_f.name
1097     except AttributeError:
1098         filename = "pytest_internal_file"
1099     cmds.cmd(f"source 'rm -f {filename}|'")
1100     cmds.cmd('set editor="$my_editor"')
1101     cmds.cmd('set edit_headers="$my_edit_headers"')
1102     cmds.cmd("unset my_editor")
1103     cmds.cmd("unset my_edit_headers")
1104     cmds.cmd("unset my_mdwn_postprocess_cmd_file")
1105     cmds.flush()
1106
1107
1108 # [ CLI ENTRY ] ###############################################################
1109
1110 if __name__ == "__main__":
1111     args = parse_cli_args()
1112
1113     if args.mode is None:
1114         do_setup(
1115             tempdir=args.tempdir,
1116             debug_commands=args.debug_commands,
1117         )
1118
1119     elif args.mode == "massage":
1120         with (
1121             File(args.MAILDRAFT, "r+") as draft_f,
1122             File(args.cmdpath, "w") as cmd_f,
1123             File(args.css_file, "r") as css_f,
1124             File(args.dump_html, "w") as htmldump_f,
1125         ):
1126             do_massage(
1127                 draft_f,
1128                 cmd_f,
1129                 extensions=args.extensions,
1130                 css_f=css_f,
1131                 htmldump_f=htmldump_f,
1132                 related_to_html_only=args.related_to_html_only,
1133                 max_other_attachments=args.max_number_other_attachments,
1134                 only_build=args.only_build,
1135                 tempdir=args.tempdir,
1136                 debug_commands=args.debug_commands,
1137                 debug_walk=args.debug_walk,
1138             )
1139
1140
1141 # [ TESTS ] ###################################################################
1142
1143 try:
1144     import pytest
1145
1146     class Tests:
1147         @pytest.fixture
1148         def const1(self):
1149             return "Curvature Vest Usher Dividing+T#iceps Senior"
1150
1151         @pytest.fixture
1152         def const2(self):
1153             return "Habitant Celestial 2litzy Resurf/ce Headpiece Harmonics"
1154
1155         @pytest.fixture
1156         def fakepath(self):
1157             return pathlib.Path("/does/not/exist")
1158
1159         @pytest.fixture
1160         def fakepath2(self):
1161             return pathlib.Path("/does/not/exist/either")
1162
1163         # NOTE: tests using the capsys fixture must specify sys.stdout to the
1164         # functions they call, else old stdout is used and not captured
1165
1166         @pytest.mark.muttctrl
1167         def test_MuttCommands_cmd(self, const1, const2, capsys):
1168             "Assert order of commands"
1169             cmds = MuttCommands(out_f=sys.stdout)
1170             cmds.cmd(const1)
1171             cmds.cmd(const2)
1172             cmds.flush()
1173             captured = capsys.readouterr()
1174             assert captured.out == "\n".join((const1, const2, ""))
1175
1176         @pytest.mark.muttctrl
1177         def test_MuttCommands_push(self, const1, const2, capsys):
1178             "Assert reverse order of pushes"
1179             cmds = MuttCommands(out_f=sys.stdout)
1180             cmds.push(const1)
1181             cmds.push(const2)
1182             cmds.flush()
1183             captured = capsys.readouterr()
1184             assert (
1185                 captured.out
1186                 == ('"\npush "'.join(("", const2, const1, "")))[2:-6]
1187             )
1188
1189         @pytest.mark.muttctrl
1190         def test_MuttCommands_push_escape(self, const1, const2, capsys):
1191             cmds = MuttCommands(out_f=sys.stdout)
1192             cmds.push(f'"{const1}"')
1193             cmds.flush()
1194             captured = capsys.readouterr()
1195             assert f'"\\"{const1}\\""' in captured.out
1196
1197         @pytest.mark.muttctrl
1198         def test_MuttCommands_cmd_push_mixed(self, const1, const2, capsys):
1199             "Assert reverse order of pushes"
1200             cmds = MuttCommands(out_f=sys.stdout)
1201             lines = ["000", "001", "010", "011", "100", "101", "110", "111"]
1202             for i in range(2):
1203                 cmds.cmd(lines[4 * i + 0])
1204                 cmds.cmd(lines[4 * i + 1])
1205                 cmds.push(lines[4 * i + 2])
1206                 cmds.push(lines[4 * i + 3])
1207             cmds.flush()
1208
1209             captured = capsys.readouterr()
1210             lines_out = captured.out.splitlines()
1211             assert lines[0] in lines_out[0]
1212             assert lines[1] in lines_out[1]
1213             assert lines[7] in lines_out[2]
1214             assert lines[6] in lines_out[3]
1215             assert lines[3] in lines_out[4]
1216             assert lines[2] in lines_out[5]
1217             assert lines[4] in lines_out[6]
1218             assert lines[5] in lines_out[7]
1219
1220         @pytest.fixture
1221         def mime_tree_related_to_alternative(self):
1222             return Multipart(
1223                 "relative",
1224                 children=[
1225                     Multipart(
1226                         "alternative",
1227                         children=[
1228                             Part(
1229                                 "text",
1230                                 "plain",
1231                                 "part.txt",
1232                                 desc="Plain",
1233                                 orig=True,
1234                             ),
1235                             Part("text", "html", "part.html", desc="HTML"),
1236                         ],
1237                         desc="Alternative",
1238                     ),
1239                     Part(
1240                         "text", "png", "logo.png", cid="logo.png", desc="Logo"
1241                     ),
1242                 ],
1243                 desc="Related",
1244             )
1245
1246         @pytest.fixture
1247         def mime_tree_related_to_html(self):
1248             return Multipart(
1249                 "alternative",
1250                 children=[
1251                     Part(
1252                         "text",
1253                         "plain",
1254                         "part.txt",
1255                         desc="Plain",
1256                         orig=True,
1257                     ),
1258                     Multipart(
1259                         "relative",
1260                         children=[
1261                             Part("text", "html", "part.html", desc="HTML"),
1262                             Part(
1263                                 "text",
1264                                 "png",
1265                                 "logo.png",
1266                                 cid="logo.png",
1267                                 desc="Logo",
1268                             ),
1269                         ],
1270                         desc="Related",
1271                     ),
1272                 ],
1273                 desc="Alternative",
1274             )
1275
1276         @pytest.fixture
1277         def mime_tree_nested(self):
1278             return Multipart(
1279                 "relative",
1280                 children=[
1281                     Multipart(
1282                         "alternative",
1283                         children=[
1284                             Part(
1285                                 "text",
1286                                 "plain",
1287                                 "part.txt",
1288                                 desc="Plain",
1289                                 orig=True,
1290                             ),
1291                             Multipart(
1292                                 "alternative",
1293                                 children=[
1294                                     Part(
1295                                         "text",
1296                                         "plain",
1297                                         "part.txt",
1298                                         desc="Nested plain",
1299                                     ),
1300                                     Part(
1301                                         "text",
1302                                         "html",
1303                                         "part.html",
1304                                         desc="Nested HTML",
1305                                     ),
1306                                 ],
1307                                 desc="Nested alternative",
1308                             ),
1309                         ],
1310                         desc="Alternative",
1311                     ),
1312                     Part(
1313                         "text",
1314                         "png",
1315                         "logo.png",
1316                         cid="logo.png",
1317                         desc="Logo",
1318                     ),
1319                 ],
1320                 desc="Related",
1321             )
1322
1323         @pytest.mark.treewalk
1324         def test_MIMETreeDFWalker_depth_first_walk(
1325             self, mime_tree_related_to_alternative
1326         ):
1327             mimetree = MIMETreeDFWalker()
1328
1329             items = []
1330
1331             def visitor_fn(item, ancestry, descendents, debugprint):
1332                 items.append((item, len(ancestry), len(descendents)))
1333
1334             mimetree.walk(
1335                 mime_tree_related_to_alternative, visitor_fn=visitor_fn
1336             )
1337             assert len(items) == 5
1338             assert items[0][0].subtype == "plain"
1339             assert items[0][1] == 2
1340             assert items[0][2] == 0
1341             assert items[1][0].subtype == "html"
1342             assert items[1][1] == 2
1343             assert items[1][2] == 0
1344             assert items[2][0].subtype == "alternative"
1345             assert items[2][1] == 1
1346             assert items[2][2] == 2
1347             assert items[3][0].subtype == "png"
1348             assert items[3][1] == 1
1349             assert items[3][2] == 2
1350             assert items[4][0].subtype == "relative"
1351             assert items[4][1] == 0
1352             assert items[4][2] == 4
1353
1354         @pytest.mark.treewalk
1355         def test_MIMETreeDFWalker_list_to_mixed(self, const1):
1356             mimetree = MIMETreeDFWalker()
1357             items = []
1358
1359             def visitor_fn(item, ancestry, descendents, debugprint):
1360                 items.append(item)
1361
1362             p = Part("text", "plain", const1)
1363             mimetree.walk([p], visitor_fn=visitor_fn)
1364             assert items[-1].subtype == "plain"
1365             mimetree.walk([p, p], visitor_fn=visitor_fn)
1366             assert items[-1].subtype == "mixed"
1367
1368         @pytest.mark.treewalk
1369         def test_MIMETreeDFWalker_visitor_in_constructor(
1370             self, mime_tree_related_to_alternative
1371         ):
1372             items = []
1373
1374             def visitor_fn(item, ancestry, descendents, debugprint):
1375                 items.append(item)
1376
1377             mimetree = MIMETreeDFWalker(visitor_fn=visitor_fn)
1378             mimetree.walk(mime_tree_related_to_alternative)
1379             assert len(items) == 5
1380
1381         @pytest.fixture
1382         def string_io(self, const1, text=None):
1383             return StringIO(text or const1)
1384
1385         @pytest.mark.massage
1386         def test_do_massage_basic(self):
1387             def converter(draft_f, **kwargs):
1388                 return Part("text", "plain", draft_f.path, orig=True)
1389
1390             with File() as draft_f, File() as cmd_f:
1391                 do_massage(
1392                     draft_f=draft_f,
1393                     cmd_f=cmd_f,
1394                     converter=converter,
1395                 )
1396                 lines = cmd_f.read().splitlines()
1397
1398             assert "send-message" in lines.pop(0)
1399             assert "update-encoding" in lines.pop(0)
1400             assert "first-entry" in lines.pop(0)
1401             assert "source 'rm -f " in lines.pop(0)
1402             assert '="$my_editor"' in lines.pop(0)
1403             assert '="$my_edit_headers"' in lines.pop(0)
1404             assert "unset my_editor" == lines.pop(0)
1405             assert "unset my_edit_headers" == lines.pop(0)
1406             assert "unset my_mdwn_postprocess_cmd_file" == lines.pop(0)
1407
1408         @pytest.mark.massage
1409         def test_do_massage_fulltree(self, mime_tree_related_to_alternative):
1410             def converter(draft_f, **kwargs):
1411                 return mime_tree_related_to_alternative
1412
1413             max_attachments = 5
1414
1415             with File() as draft_f, File() as cmd_f:
1416                 do_massage(
1417                     draft_f=draft_f,
1418                     cmd_f=cmd_f,
1419                     max_other_attachments=max_attachments,
1420                     converter=converter,
1421                 )
1422                 lines = cmd_f.read().splitlines()[:-6]
1423
1424             assert "first-entry" in lines.pop()
1425             assert "update-encoding" in lines.pop()
1426             assert "Plain" in lines.pop()
1427             assert "part.html" in lines.pop()
1428             assert "toggle-unlink" in lines.pop()
1429             for i in range(max_attachments):
1430                 assert "move-up" in lines.pop()
1431             assert "move-down" in lines.pop()
1432             assert "HTML" in lines.pop()
1433             assert "jump>1" in lines.pop()
1434             assert "jump>2" in lines.pop()
1435             assert "group-alternatives" in lines.pop()
1436             assert "Alternative" in lines.pop()
1437             assert "logo.png" in lines.pop()
1438             assert "toggle-unlink" in lines.pop()
1439             assert "content-id" in lines.pop()
1440             for i in range(max_attachments):
1441                 assert "move-up" in lines.pop()
1442             assert "move-down" in lines.pop()
1443             assert "Logo" in lines.pop()
1444             assert "jump>1" in lines.pop()
1445             assert "jump>4" in lines.pop()
1446             assert "group-related" in lines.pop()
1447             assert "Related" in lines.pop()
1448             assert "send-message" in lines.pop()
1449             assert len(lines) == 0
1450
1451         @pytest.mark.massage
1452         def test_mime_tree_relative_within_alternative(
1453             self, mime_tree_related_to_html
1454         ):
1455             def converter(draft_f, **kwargs):
1456                 return mime_tree_related_to_html
1457
1458             with File() as draft_f, File() as cmd_f:
1459                 do_massage(
1460                     draft_f=draft_f,
1461                     cmd_f=cmd_f,
1462                     converter=converter,
1463                 )
1464                 lines = cmd_f.read().splitlines()[:-6]
1465
1466             assert "first-entry" in lines.pop()
1467             assert "update-encoding" in lines.pop()
1468             assert "Plain" in lines.pop()
1469             assert "part.html" in lines.pop()
1470             assert "toggle-unlink" in lines.pop()
1471             assert "move-up" in lines.pop()
1472             while True:
1473                 top = lines.pop()
1474                 if "move-up" not in top:
1475                     break
1476             assert "move-down" in top
1477             assert "HTML" in lines.pop()
1478             assert "logo.png" in lines.pop()
1479             assert "toggle-unlink" in lines.pop()
1480             assert "content-id" in lines.pop()
1481             assert "move-up" in lines.pop()
1482             while True:
1483                 top = lines.pop()
1484                 if "move-up" not in top:
1485                     break
1486             assert "move-down" in top
1487             assert "move-down" in lines.pop()
1488             assert "Logo" in lines.pop()
1489             assert "jump>2" in lines.pop()
1490             assert "jump>3" in lines.pop()
1491             assert "group-related" in lines.pop()
1492             assert "Related" in lines.pop()
1493             assert "jump>1" in lines.pop()
1494             assert "jump>2" in lines.pop()
1495             assert "group-alternative" in lines.pop()
1496             assert "Alternative" in lines.pop()
1497             assert "send-message" in lines.pop()
1498             assert len(lines) == 0
1499
1500         @pytest.mark.massage
1501         def test_mime_tree_nested_trees_does_not_break_positioning(
1502             self, mime_tree_nested
1503         ):
1504             def converter(draft_f, **kwargs):
1505                 return mime_tree_nested
1506
1507             with File() as draft_f, File() as cmd_f:
1508                 do_massage(
1509                     draft_f=draft_f,
1510                     cmd_f=cmd_f,
1511                     converter=converter,
1512                 )
1513                 lines = cmd_f.read().splitlines()
1514
1515             while "logo.png" not in lines.pop():
1516                 pass
1517             lines.pop()
1518             assert "content-id" in lines.pop()
1519             assert "move-up" in lines.pop()
1520             while True:
1521                 top = lines.pop()
1522                 if "move-up" not in top:
1523                     break
1524             assert "move-down" in top
1525             # Due to the nested trees, the number of descendents of the sibling
1526             # actually needs to be considered, not just the nieces. So to move
1527             # from position 1 to position 6, it only needs one <move-down>
1528             # because that jumps over the entire sibling tree. Thus what
1529             # follows next must not be another <move-down>
1530             assert "Logo" in lines.pop()
1531
1532         @pytest.mark.converter
1533         def test_converter_tree_basic(self, fakepath, const1, fakefilefactory):
1534             with fakefilefactory(fakepath, content=const1) as draft_f:
1535                 tree = convert_markdown_to_html(
1536                     draft_f, filefactory=fakefilefactory
1537                 )
1538
1539             assert tree.subtype == "alternative"
1540             assert len(tree.children) == 2
1541             assert tree.children[0].subtype == "plain"
1542             assert tree.children[0].path == draft_f.path
1543             assert tree.children[0].orig
1544             assert tree.children[1].subtype == "html"
1545             assert tree.children[1].path == fakepath.with_suffix(".html")
1546
1547         @pytest.mark.converter
1548         def test_converter_writes(
1549             self, fakepath, fakefilefactory, const1, monkeypatch
1550         ):
1551             with fakefilefactory(fakepath, content=const1) as draft_f:
1552                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
1553
1554             html = fakefilefactory.pop()
1555             assert fakepath.with_suffix(".html") == html[0]
1556             assert const1 in html[1].read()
1557             text = fakefilefactory.pop()
1558             assert fakepath == text[0]
1559             assert const1 == text[1].read()
1560
1561         @pytest.mark.imgproc
1562         def test_markdown_inline_image_processor(self):
1563             imgpath1 = "file:/path/to/image.png"
1564             imgpath2 = "file:///path/to/image.png?url=params"
1565             imgpath3 = "/path/to/image.png"
1566             text = f"""![inline local image]({imgpath1})
1567                        ![image inlined
1568                          with newline]({imgpath2})
1569                        ![image local path]({imgpath3})"""
1570             text, html, images, mdwn = markdown_with_inline_image_support(text)
1571
1572             # local paths have been normalised to URLs:
1573             imgpath3 = f"file://{imgpath3}"
1574
1575             assert 'src="cid:' in html
1576             assert "](cid:" in text
1577             assert len(images) == 3
1578             assert imgpath1 in images
1579             assert imgpath2 in images
1580             assert imgpath3 in images
1581             assert images[imgpath1].cid != images[imgpath2].cid
1582             assert images[imgpath1].cid != images[imgpath3].cid
1583             assert images[imgpath2].cid != images[imgpath3].cid
1584
1585         @pytest.mark.imgproc
1586         def test_markdown_inline_image_processor_title_to_desc(self, const1):
1587             imgpath = "file:///path/to/image.png"
1588             text = f'![inline local image]({imgpath} "{const1}")'
1589             text, html, images, mdwn = markdown_with_inline_image_support(text)
1590             assert images[imgpath].desc == const1
1591
1592         @pytest.mark.imgproc
1593         def test_markdown_inline_image_processor_alt_to_desc(self, const1):
1594             imgpath = "file:///path/to/image.png"
1595             text = f"![{const1}]({imgpath})"
1596             text, html, images, mdwn = markdown_with_inline_image_support(text)
1597             assert images[imgpath].desc == const1
1598
1599         @pytest.mark.imgproc
1600         def test_markdown_inline_image_processor_title_over_alt_desc(
1601             self, const1, const2
1602         ):
1603             imgpath = "file:///path/to/image.png"
1604             text = f'![{const1}]({imgpath} "{const2}")'
1605             text, html, images, mdwn = markdown_with_inline_image_support(text)
1606             assert images[imgpath].desc == const2
1607
1608         @pytest.mark.imgproc
1609         def test_markdown_inline_image_not_external(self):
1610             imgpath = "https://path/to/image.png"
1611             text = f"![inline image]({imgpath})"
1612             text, html, images, mdwn = markdown_with_inline_image_support(text)
1613
1614             assert 'src="cid:' not in html
1615             assert "](cid:" not in text
1616             assert len(images) == 0
1617
1618         @pytest.mark.imgproc
1619         def test_markdown_inline_image_local_file(self):
1620             imgpath = "/path/to/image.png"
1621             text = f"![inline image]({imgpath})"
1622             text, html, images, mdwn = markdown_with_inline_image_support(text)
1623
1624             for k, v in images.items():
1625                 assert k == f"file://{imgpath}"
1626                 break
1627
1628         @pytest.mark.imgproc
1629         def test_markdown_inline_image_expanduser(self):
1630             imgpath = pathlib.Path("~/image.png")
1631             text = f"![inline image]({imgpath})"
1632             text, html, images, mdwn = markdown_with_inline_image_support(text)
1633
1634             for k, v in images.items():
1635                 assert k == f"file://{imgpath.expanduser()}"
1636                 break
1637
1638         @pytest.fixture
1639         def test_png(self):
1640             return (
1641                 ""
1642                 "AAAABCAAAAAA6fptVAAAACklEQVQI12P4DwABAQEAG7buVgAA"
1643             )
1644
1645         @pytest.mark.imgproc
1646         def test_markdown_inline_image_processor_base64(self, test_png):
1647             text = f"![1px white inlined]({test_png})"
1648             text, html, images, mdwn = markdown_with_inline_image_support(text)
1649
1650             assert 'src="cid:' in html
1651             assert "](cid:" in text
1652             assert len(images) == 1
1653             assert test_png in images
1654
1655         @pytest.mark.converter
1656         def test_converter_tree_inline_image_base64(
1657             self, test_png, fakefilefactory
1658         ):
1659             text = f"![inline base64 image]({test_png})"
1660             with fakefilefactory(content=text) as draft_f:
1661                 tree = convert_markdown_to_html(
1662                     draft_f,
1663                     filefactory=fakefilefactory,
1664                     related_to_html_only=False,
1665                 )
1666             assert tree.subtype == "relative"
1667             assert tree.children[0].subtype == "alternative"
1668             assert tree.children[1].subtype == "png"
1669             written = fakefilefactory.pop()
1670             assert tree.children[1].path == written[0]
1671             assert b"PNG" in written[1].read()
1672
1673         @pytest.mark.converter
1674         def test_converter_tree_inline_image_base64_related_to_html(
1675             self, test_png, fakefilefactory
1676         ):
1677             text = f"![inline base64 image]({test_png})"
1678             with fakefilefactory(content=text) as draft_f:
1679                 tree = convert_markdown_to_html(
1680                     draft_f,
1681                     filefactory=fakefilefactory,
1682                     related_to_html_only=True,
1683                 )
1684             assert tree.subtype == "alternative"
1685             assert tree.children[1].subtype == "relative"
1686             assert tree.children[1].children[1].subtype == "png"
1687             written = fakefilefactory.pop()
1688             assert tree.children[1].children[1].path == written[0]
1689             assert b"PNG" in written[1].read()
1690
1691         @pytest.mark.converter
1692         def test_converter_tree_inline_image_cid(
1693             self, const1, fakefilefactory
1694         ):
1695             text = f"![inline base64 image](cid:{const1})"
1696             with fakefilefactory(content=text) as draft_f:
1697                 tree = convert_markdown_to_html(
1698                     draft_f,
1699                     filefactory=fakefilefactory,
1700                     related_to_html_only=False,
1701                 )
1702             assert len(tree.children) == 2
1703             assert tree.children[0].cid != const1
1704             assert tree.children[0].type != "image"
1705             assert tree.children[1].cid != const1
1706             assert tree.children[1].type != "image"
1707
1708         @pytest.fixture
1709         def fakefilefactory(self):
1710             return FakeFileFactory()
1711
1712         @pytest.mark.imgcoll
1713         def test_inline_image_collection(
1714             self, test_png, const1, const2, fakefilefactory
1715         ):
1716             test_images = {test_png: InlineImageInfo(cid=const1, desc=const2)}
1717             relparts = collect_inline_images(
1718                 test_images, filefactory=fakefilefactory
1719             )
1720
1721             written = fakefilefactory.pop()
1722             assert b"PNG" in written[1].read()
1723
1724             assert relparts[0].subtype == "png"
1725             assert relparts[0].path == written[0]
1726             assert relparts[0].cid == const1
1727             assert const2 in relparts[0].desc
1728
1729         if _PYNLINER:
1730
1731             @pytest.mark.styling
1732             def test_apply_stylesheet(self):
1733                 html = "<p>Hello, world!</p>"
1734                 css = "p { color:red }"
1735                 out = apply_styling(html, css)
1736                 assert 'p style="color' in out
1737
1738             @pytest.mark.styling
1739             def test_apply_no_stylesheet(self, const1):
1740                 out = apply_styling(const1, None)
1741
1742             @pytest.mark.massage
1743             @pytest.mark.styling
1744             def test_massage_styling_to_converter(self):
1745                 css = "p { color:red }"
1746                 css_applied = []
1747
1748                 def converter(draft_f, css_f, **kwargs):
1749                     css = css_f.read()
1750                     css_applied.append(css)
1751                     return Part("text", "plain", draft_f.path, orig=True)
1752
1753                 with (
1754                     File() as draft_f,
1755                     File(mode="w") as cmd_f,
1756                     File(content=css) as css_f
1757                 ):
1758                     do_massage(
1759                         draft_f=draft_f,
1760                         cmd_f=cmd_f,
1761                         css_f=css_f,
1762                         converter=converter,
1763                     )
1764                 assert css_applied[0] == css
1765
1766             @pytest.mark.converter
1767             @pytest.mark.styling
1768             def test_converter_apply_styles(
1769                 self, const1, monkeypatch, fakepath, fakefilefactory
1770             ):
1771                 css = "p { color:red }"
1772                 with (
1773                     monkeypatch.context() as m,
1774                     fakefilefactory(fakepath, content=const1) as draft_f,
1775                     fakefilefactory(content=css) as css_f,
1776                 ):
1777                     m.setattr(
1778                         markdown.Markdown,
1779                         "convert",
1780                         lambda s, t: f"<p>{t}</p>",
1781                     )
1782                     convert_markdown_to_html(
1783                         draft_f, css_f=css_f, filefactory=fakefilefactory
1784                     )
1785                 assert re.search(
1786                     r"color:.*red",
1787                     fakefilefactory[fakepath.with_suffix(".html")].read(),
1788                 )
1789
1790         if _PYGMENTS_CSS:
1791
1792             @pytest.mark.styling
1793             def test_apply_stylesheet_pygments(self):
1794                 html = (
1795                     f'<div class="{_CODEHILITE_CLASS}">'
1796                     "<pre>def foo():\n    return</pre></div>"
1797                 )
1798                 out = apply_styling(html, _PYGMENTS_CSS)
1799                 assert f'{_CODEHILITE_CLASS}" style="' in out
1800
1801         @pytest.mark.sig
1802         def test_signature_extraction_no_signature(self, const1):
1803             assert (const1, None, None) == extract_signature(const1)
1804
1805         @pytest.mark.sig
1806         def test_signature_extraction_just_text(self, const1, const2):
1807             origtext, textsig, htmlsig = extract_signature(
1808                 f"{const1}{EMAIL_SIG_SEP}{const2}"
1809             )
1810             assert origtext == const1
1811             assert textsig == const2
1812             assert htmlsig is None
1813
1814         @pytest.mark.sig
1815         def test_signature_extraction_html(
1816             self, fakepath, fakefilefactory, const1, const2
1817         ):
1818             sigconst = "HTML signature from {path} but as a string"
1819             sig = f'<div id="signature">{sigconst.format(path=fakepath)}</div>'
1820
1821             sig_f = fakefilefactory(fakepath, content=sig)
1822
1823             origtext, textsig, htmlsig = extract_signature(
1824                 f"{const1}{EMAIL_SIG_SEP}{HTML_SIG_MARKER} {fakepath}\n{const2}",
1825                 filefactory=fakefilefactory,
1826             )
1827             assert origtext == const1
1828             assert textsig == const2
1829             assert htmlsig == sigconst.format(path=fakepath)
1830
1831         @pytest.mark.sig
1832         def test_signature_extraction_file_not_found(self, fakepath, const1):
1833             with pytest.raises(FileNotFoundError):
1834                 origtext, textsig, htmlsig = extract_signature(
1835                     f"{const1}{EMAIL_SIG_SEP}{HTML_SIG_MARKER}{fakepath}\n{const1}"
1836                 )
1837
1838         @pytest.mark.imgproc
1839         def test_image_registry(self, const1):
1840             reg = ImageRegistry()
1841             cid = reg.register(const1)
1842             assert "@" in cid
1843             assert not cid.startswith("<")
1844             assert not cid.endswith(">")
1845             assert const1 in reg
1846
1847         @pytest.mark.imgproc
1848         def test_image_registry_file_uri(self, const1):
1849             reg = ImageRegistry()
1850             reg.register("/some/path")
1851             for path in reg:
1852                 assert path.startswith("file://")
1853                 break
1854
1855         @pytest.mark.converter
1856         @pytest.mark.sig
1857         def test_converter_signature_handling(
1858             self, fakepath, fakefilefactory, monkeypatch
1859         ):
1860             mailparts = (
1861                 "This is the mail body\n",
1862                 f"{EMAIL_SIG_SEP}",
1863                 "This is a plain-text signature only",
1864             )
1865
1866             with (
1867                 fakefilefactory(
1868                     fakepath, content="".join(mailparts)
1869                 ) as draft_f,
1870                 monkeypatch.context() as m,
1871             ):
1872                 m.setattr(markdown.Markdown, "convert", lambda s, t: t)
1873                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
1874
1875             soup = bs4.BeautifulSoup(
1876                 fakefilefactory[fakepath.with_suffix(".html")].read(),
1877                 "html.parser",
1878             )
1879             body = soup.body.contents
1880
1881             assert mailparts[0] in body.pop(0)
1882
1883             sig = soup.select_one("#signature")
1884             assert sig == body.pop(0)
1885
1886             sep = sig.select_one("span.sig_separator")
1887             assert sep == sig.contents[0]
1888             assert f"\n{sep.text}\n" == EMAIL_SIG_SEP
1889
1890             assert mailparts[2] in sig.contents[1]
1891
1892         @pytest.mark.converter
1893         @pytest.mark.sig
1894         def test_converter_signature_handling_htmlsig(
1895             self, fakepath, fakepath2, fakefilefactory, monkeypatch
1896         ):
1897             mailparts = (
1898                 "This is the mail body",
1899                 f"{EMAIL_SIG_SEP}",
1900                 f"{HTML_SIG_MARKER}{fakepath2}\n",
1901                 "This is the plain-text version",
1902             )
1903             htmlsig = "HTML Signature from {path} but as a string"
1904             html = (
1905                 f'<div id="signature"><p>{htmlsig.format(path=fakepath2)}</p></div>'
1906             )
1907
1908             sig_f = fakefilefactory(fakepath2, content=html)
1909
1910             def mdwn_fn(t):
1911                 return t.upper()
1912
1913             with (
1914                 fakefilefactory(
1915                     fakepath, content="".join(mailparts)
1916                 ) as draft_f,
1917                 monkeypatch.context() as m,
1918             ):
1919                 m.setattr(
1920                     markdown.Markdown, "convert", lambda s, t: mdwn_fn(t)
1921                 )
1922                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
1923
1924             soup = bs4.BeautifulSoup(
1925                 fakefilefactory[fakepath.with_suffix(".html")].read(),
1926                 "html.parser",
1927             )
1928             sig = soup.select_one("#signature")
1929             sig.span.extract()
1930
1931             assert HTML_SIG_MARKER not in sig.text
1932             assert htmlsig.format(path=fakepath2) == sig.text.strip()
1933
1934             plaintext = fakefilefactory[fakepath].read()
1935             assert plaintext.endswith(EMAIL_SIG_SEP + mailparts[-1])
1936
1937         @pytest.mark.converter
1938         @pytest.mark.sig
1939         def test_converter_signature_handling_htmlsig_with_image(
1940             self, fakepath, fakepath2, fakefilefactory, monkeypatch, test_png
1941         ):
1942             mailparts = (
1943                 "This is the mail body",
1944                 f"{EMAIL_SIG_SEP}",
1945                 f"{HTML_SIG_MARKER}{fakepath2}\n",
1946                 "This is the plain-text version",
1947             )
1948             htmlsig = (
1949                 "HTML Signature from {path} with image\n"
1950                 f'<img src="{test_png}">\n'
1951             )
1952             html = (
1953                 f'<div id="signature">{htmlsig.format(path=fakepath2)}</div>'
1954             )
1955
1956             sig_f = fakefilefactory(fakepath2, content=html)
1957
1958             def mdwn_fn(t):
1959                 return t.upper()
1960
1961             with (
1962                 fakefilefactory(
1963                     fakepath, content="".join(mailparts)
1964                 ) as draft_f,
1965                 monkeypatch.context() as m,
1966             ):
1967                 m.setattr(
1968                     markdown.Markdown, "convert", lambda s, t: mdwn_fn(t)
1969                 )
1970                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
1971
1972             assert fakefilefactory.pop()[0].suffix == ".png"
1973
1974             soup = bs4.BeautifulSoup(
1975                 fakefilefactory[fakepath.with_suffix(".html")].read(),
1976                 "html.parser",
1977             )
1978             assert soup.img.attrs["src"].startswith("cid:")
1979
1980         @pytest.mark.converter
1981         @pytest.mark.sig
1982         def test_converter_signature_handling_textsig_with_image(
1983             self, fakepath, fakefilefactory, test_png
1984         ):
1985             mailparts = (
1986                 "This is the mail body",
1987                 f"{EMAIL_SIG_SEP}",
1988                 "This is the plain-text version with image\n",
1989                 f"![Inline]({test_png})",
1990             )
1991             with (
1992                 fakefilefactory(
1993                     fakepath, content="".join(mailparts)
1994                 ) as draft_f,
1995             ):
1996                 tree = convert_markdown_to_html(
1997                     draft_f, filefactory=fakefilefactory
1998                 )
1999
2000             assert tree.subtype == "relative"
2001             assert tree.children[0].subtype == "alternative"
2002             assert tree.children[1].subtype == "png"
2003             written = fakefilefactory.pop()
2004             assert tree.children[1].path == written[0]
2005             assert written[1].read() == request.urlopen(test_png).read()
2006
2007         @pytest.mark.converter
2008         def test_converter_attribution_to_admonition(
2009             self, fakepath, fakefilefactory
2010         ):
2011             mailparts = (
2012                 "Regarding whatever",
2013                 "> blockquote line1",
2014                 "> blockquote line2",
2015                 "> ",
2016                 "> new para with **bold** text",
2017             )
2018             with fakefilefactory(
2019                 fakepath, content="\n".join(mailparts)
2020             ) as draft_f:
2021                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
2022
2023             soup = bs4.BeautifulSoup(
2024                 fakefilefactory[fakepath.with_suffix(".html")].read(),
2025                 "html.parser",
2026             )
2027             quote = soup.select_one("div.admonition.quote")
2028             assert quote
2029             assert (
2030                 soup.select_one("p.admonition-title").extract().text.strip()
2031                 == mailparts[0]
2032             )
2033
2034             p = quote.p.extract()
2035             assert p.text.strip() == "\n".join(p[2:] for p in mailparts[1:3])
2036
2037             p = quote.p.extract()
2038             assert p.contents[1].name == "strong"
2039
2040         @pytest.mark.converter
2041         def test_converter_attribution_to_admonition_multiple(
2042             self, fakepath, fakefilefactory
2043         ):
2044             mailparts = (
2045                 "Regarding whatever",
2046                 "> blockquote line1",
2047                 "> blockquote line2",
2048                 "",
2049                 "Normal text",
2050                 "",
2051                 "> continued emailquote",
2052                 "",
2053                 "Another email-quote",
2054                 "> something",
2055             )
2056             with fakefilefactory(
2057                 fakepath, content="\n".join(mailparts)
2058             ) as draft_f:
2059                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
2060
2061             soup = bs4.BeautifulSoup(
2062                 fakefilefactory[fakepath.with_suffix(".html")].read(),
2063                 "html.parser",
2064             )
2065             quote = soup.select_one("div.admonition.quote.continued").extract()
2066             assert quote
2067             assert (
2068                 quote.select_one("p.admonition-title").extract().text.strip()
2069                 == mailparts[0]
2070             )
2071
2072             p = quote.p.extract()
2073             assert p
2074
2075             quote = soup.select_one("div.admonition.quote.continued").extract()
2076             assert quote
2077             assert (
2078                 quote.select_one("p.admonition-title").extract().text.strip()
2079                 == mailparts[-2]
2080             )
2081
2082         @pytest.mark.fileio
2083         def test_file_class_contextmanager(self, const1, monkeypatch):
2084             state = dict(o=False, c=False)
2085
2086             def fn(t):
2087                 state[t] = True
2088
2089             with monkeypatch.context() as m:
2090                 m.setattr(File, "open", lambda s: fn("o"))
2091                 m.setattr(File, "close", lambda s: fn("c"))
2092                 with File() as f:
2093                     assert state["o"]
2094                     assert not state["c"]
2095             assert state["c"]
2096
2097         @pytest.mark.fileio
2098         def test_file_class_no_path(self, const1):
2099             with File(mode="w+") as f:
2100                 f.write(const1, cache=False)
2101                 assert f.read(cache=False) == const1
2102
2103         @pytest.mark.fileio
2104         def test_file_class_path(self, const1, tmp_path):
2105             with File(tmp_path / "file", mode="w+") as f:
2106                 f.write(const1, cache=False)
2107                 assert f.read(cache=False) == const1
2108
2109         @pytest.mark.fileio
2110         def test_file_class_path_no_exists(self, fakepath):
2111             with pytest.raises(FileNotFoundError):
2112                 File(fakepath, mode="r").open()
2113
2114         @pytest.mark.fileio
2115         def test_file_class_cache(self, tmp_path, const1, const2):
2116             path = tmp_path / "file"
2117             file = File(path, mode="w+")
2118             with file as f:
2119                 f.write(const1, cache=True)
2120             with open(path, mode="w") as f:
2121                 f.write(const2)
2122             with file as f:
2123                 assert f.read(cache=True) == const1
2124
2125         @pytest.mark.fileio
2126         def test_file_class_cache_init(self, const1):
2127             file = File(path=None, mode="r", content=const1)
2128             with file as f:
2129                 assert f.read() == const1
2130
2131         @pytest.mark.fileio
2132         def test_file_class_content_or_path(self, fakepath, const1):
2133             with pytest.raises(RuntimeError):
2134                 file = File(path=fakepath, content=const1)
2135
2136         @pytest.mark.fileio
2137         def test_file_class_content_needs_read(self, const1):
2138             with pytest.raises(RuntimeError):
2139                 file = File(mode="w", content=const1)
2140
2141         @pytest.mark.fileio
2142         def test_file_class_write_persists_close(self, const1):
2143             f = File(mode="w+")
2144             with f:
2145                 f.write(const1)
2146             with f:
2147                 assert f.read() == const1
2148
2149         @pytest.mark.fileio
2150         def test_file_class_write_resets_read_cache(self, const1, const2):
2151             with File(mode="w+", content=const1) as f:
2152                 assert f.read() == const1
2153                 f.write(const2)
2154                 assert f.read() == const2
2155
2156         @pytest.mark.fileio
2157         def test_file_factory(self):
2158             fact = FileFactory()
2159             f = fact()
2160             assert isinstance(f, File)
2161             assert len(fact) == 1
2162             assert f in fact
2163             assert f == fact[0]
2164
2165         @pytest.mark.fileio
2166         def test_fake_file_factory(self, fakepath, fakefilefactory):
2167             fact = FakeFileFactory()
2168             f = fakefilefactory(fakepath)
2169             assert f.path == fakepath
2170             assert f == fakefilefactory[fakepath]
2171
2172         @pytest.mark.fileio
2173         def test_fake_file_factory_path_persistence(
2174             self, fakepath, fakefilefactory
2175         ):
2176             f1 = fakefilefactory(fakepath)
2177             assert f1 == fakefilefactory(fakepath)
2178
2179 except ImportError:
2180     pass