All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
3 # NeoMutt helper script to create multipart/* emails with Markdown → HTML
4 # alternative conversion, and handling of inline images, using NeoMutt's
5 # ability to manually craft MIME trees, but automating this process.
8 # neomuttrc (needs to be a single line):
9 # set my_mdwn_extensions="extra,admonition,codehilite,sane_lists,smarty"
11 # <enter-command> source '$my_confdir/buildmimetree.py \
12 # --tempdir $tempdir --extensions $my_mdwn_extensions \
13 # --css-file $my_confdir/htmlmail.css |'<enter>\
14 # <enter-command> sourc e \$my_mdwn_postprocess_cmd_file<enter>\
15 # " "Convert message into a modern MIME tree with inline images"
17 # (Yes, we need to call source twice, as mutt only starts to process output
18 # from a source command when the command exits, and since we need to react
19 # to the output, we need to be invoked again, using a $my_ variable to pass
25 # - python3-beautifulsoup4
28 # - Pynliner, provides --css-file and thus inline styling of HTML output
29 # - Pygments, then syntax highlighting for fenced code is enabled
32 # https://git.madduck.net/etc/neomutt.git/blob_plain/HEAD:/.config/neomutt/buildmimetree.py
34 # Copyright © 2023 martin f. krafft <madduck@madduck.net>
35 # Released under the GPL-2+ licence, just like Mutt itself.
48 from collections import namedtuple, OrderedDict
49 from markdown.extensions import Extension
50 from markdown.inlinepatterns import ImageInlineProcessor, IMAGE_LINK_RE
51 from email.utils import make_msgid
52 from urllib import request
55 def parse_cli_args(*args, **kwargs):
56 parser = argparse.ArgumentParser(
58 "NeoMutt helper to turn text/markdown email parts "
59 "into full-fledged MIME trees"
63 "Copyright © 2023 martin f. krafft <madduck@madduck.net>.\n"
64 "Released under the MIT licence"
69 metavar="EXT[,EXT[,EXT]]",
72 help="Markdown extension to use (comma-separated list)",
81 help="CSS file to merge with the final HTML",
84 parser.set_defaults(css_file=None)
87 "--related-to-html-only",
89 help="Make related content be sibling to HTML parts only",
92 def positive_integer(value):
100 raise ValueError("Must be a positive integer")
103 "--max-number-other-attachments",
105 type=positive_integer,
107 help="Maximum number of other attachments to expect",
114 help="Only build, don't send the message",
121 help="Specify temporary directory to use for attachments",
127 help="Turn on debug logging of commands generated to stderr",
133 help="Turn on debugging to stderr of the MIME tree walk",
136 subp = parser.add_subparsers(help="Sub-command parsers", dest="mode")
137 massage_p = subp.add_parser(
138 "massage", help="Massaging phase (internal use)"
141 massage_p.add_argument(
142 "--write-commands-to",
148 help="Temporary file path to write commands to",
151 massage_p.add_argument(
155 help="If provided, the script is invoked as editor on the mail draft",
158 return parser.parse_args(*args, **kwargs)
161 # [ IMAGE HANDLING ] ##########################################################
164 InlineImageInfo = namedtuple(
165 "InlineImageInfo", ["cid", "desc"], defaults=[None]
171 self._images = OrderedDict()
173 def register(self, path, description=None):
174 path = os.path.expanduser(path)
175 if path.startswith("/"):
176 path = f"file://{path}"
177 cid = make_msgid()[1:-1]
178 self._images[path] = InlineImageInfo(cid, description)
182 return self._images.__iter__()
184 def __getitem__(self, idx):
185 return self._images.__getitem__(idx)
188 return self._images.__len__()
191 return self._images.items()
194 return f"<ImageRegistry(items={len(self._images)})>"
197 return self._images.__str__()
200 class InlineImageExtension(Extension):
201 class RelatedImageInlineProcessor(ImageInlineProcessor):
202 def __init__(self, re, md, registry):
203 super().__init__(re, md)
204 self._registry = registry
206 def handleMatch(self, m, data):
207 el, start, end = super().handleMatch(m, data)
208 if "src" in el.attrib:
209 src = el.attrib["src"]
210 if "://" not in src or src.startswith("file://"):
211 # We only inline local content
212 cid = self._registry.register(
214 el.attrib.get("title", el.attrib.get("alt")),
216 el.attrib["src"] = f"cid:{cid}"
217 return el, start, end
219 def __init__(self, registry):
221 self._image_registry = registry
223 INLINE_PATTERN_NAME = "image_link"
225 def extendMarkdown(self, md):
226 md.registerExtension(self)
227 inline_image_proc = self.RelatedImageInlineProcessor(
228 IMAGE_LINK_RE, md, self._image_registry
230 md.inlinePatterns.register(
231 inline_image_proc, InlineImageExtension.INLINE_PATTERN_NAME, 150
235 def markdown_with_inline_image_support(
241 extension_configs=None,
244 image_registry if image_registry is not None else ImageRegistry()
246 inline_image_handler = InlineImageExtension(registry=registry)
247 extensions = extensions or []
248 extensions.append(inline_image_handler)
249 mdwn = markdown.Markdown(
250 extensions=extensions, extension_configs=extension_configs
253 htmltext = mdwn.convert(text)
255 def replace_image_with_cid(matchobj):
256 for m in (matchobj.group(1), f"file://{matchobj.group(1)}"):
258 return f"(cid:{registry[m].cid}"
259 return matchobj.group(0)
261 text = re.sub(r"\(([^)\s]+)", replace_image_with_cid, text)
262 return text, htmltext, registry, mdwn
265 # [ CSS STYLING ] #############################################################
277 from pygments.formatters import get_formatter_by_name
279 _CODEHILITE_CLASS = "codehilite"
281 _PYGMENTS_CSS = get_formatter_by_name(
282 "html", style="default"
283 ).get_style_defs(f".{_CODEHILITE_CLASS}")
289 def apply_styling(html, css):
293 .with_cssString("\n".join(s for s in [_PYGMENTS_CSS, css] if s))
298 # [ PARTS GENERATION ] ########################################################
304 ["type", "subtype", "path", "desc", "cid", "orig"],
305 defaults=[None, None, False],
309 ret = f"<{self.type}/{self.subtype}>"
311 ret = f"{ret} cid:{self.cid}"
313 ret = f"{ret} ORIGINAL"
318 namedtuple("Multipart", ["subtype", "children", "desc"], defaults=[None])
321 return f"<multipart/{self.subtype}> children={len(self.children)}"
324 return hash(str(self.subtype) + "".join(str(self.children)))
327 def filereader_fn(path, mode="r", **kwargs):
328 with open(path, mode, **kwargs) as in_f:
332 def filewriter_fn(path, content, mode="w", **kwargs):
333 with open(path, mode, **kwargs) as out_f:
337 def collect_inline_images(
338 image_registry, *, tempdir=None, filewriter_fn=filewriter_fn
341 for path, info in image_registry.items():
342 if path.startswith("cid:"):
345 data = request.urlopen(path)
347 mimetype = data.headers["Content-Type"]
348 ext = mimetypes.guess_extension(mimetype)
349 tempfilename = tempfile.mkstemp(prefix="img", suffix=ext, dir=tempdir)
350 path = pathlib.Path(tempfilename[1])
352 filewriter_fn(path, data.read(), "w+b")
355 f'Inline image: "{info.desc}"'
357 else f"Inline image {str(len(relparts)+1)}"
360 Part(*mimetype.split("/"), path, cid=info.cid, desc=desc)
366 EMAIL_SIG_SEP = "\n-- \n"
367 HTML_SIG_MARKER = "=htmlsig "
370 def make_html_doc(body, sig=None):
375 '<meta http-equiv="content-type" content="text/html; charset=UTF-8">\n' # noqa: E501
376 '<meta name="viewport" content="width=device-width, initial-scale=1.0">\n' # noqa: E501
385 f'{ret}<div id="signature"><span class="sig_separator">{EMAIL_SIG_SEP.strip(nl)}</span>\n' # noqa: E501
390 return f"{ret}\n </body>\n</html>"
393 def make_text_mail(text, sig=None):
394 return EMAIL_SIG_SEP.join((text, sig)) if sig else text
397 def extract_signature(text, *, filereader_fn=filereader_fn):
398 parts = text.split(EMAIL_SIG_SEP, 1)
400 return text, None, None
402 lines = parts[1].splitlines()
403 if lines[0].startswith(HTML_SIG_MARKER):
404 path = pathlib.Path(re.split(r" +", lines.pop(0), maxsplit=1)[1])
405 textsig = "\n".join(lines)
407 sig_input = filereader_fn(path.expanduser())
408 soup = bs4.BeautifulSoup(sig_input, "html.parser")
410 style = str(soup.style.extract()) if soup.style else ""
411 for sig_selector in (
421 sig = soup.select_one(sig_selector)
426 return parts[0], textsig, style + sig_input
428 if sig.attrs.get("id") == "signature":
429 sig = "".join(str(c) for c in sig.children)
431 return parts[0], textsig, style + str(sig)
433 return parts[0], parts[1], None
436 def convert_markdown_to_html(
440 related_to_html_only=False,
442 filewriter_fn=filewriter_fn,
443 filereader_fn=filereader_fn,
446 extension_configs=None,
448 # TODO extension_configs need to be handled differently
449 extension_configs = extension_configs or {}
450 extension_configs.setdefault("pymdownx.highlight", {})
451 extension_configs["pymdownx.highlight"]["css_class"] = _CODEHILITE_CLASS
453 origtext, textsig, htmlsig = extract_signature(
454 origtext, filereader_fn=filereader_fn
462 ) = markdown_with_inline_image_support(
463 origtext, extensions=extensions, extension_configs=extension_configs
468 # TODO: decide what to do if there is no plain-text version
469 raise NotImplementedError("HTML signature but no text alternative")
471 soup = bs4.BeautifulSoup(htmlsig, "html.parser")
472 for img in soup.find_all("img"):
473 uri = img.attrs["src"]
474 desc = img.attrs.get("title", img.attrs.get("alt"))
475 cid = image_registry.register(uri, desc)
476 img.attrs["src"] = f"cid:{cid}"
486 ) = markdown_with_inline_image_support(
488 extensions=extensions,
489 extension_configs=extension_configs,
490 image_registry=image_registry,
494 origtext = make_text_mail(origtext, textsig)
496 filewriter_fn(draftpath, origtext, encoding="utf-8")
498 "text", "plain", draftpath, "Plain-text version", orig=True
501 htmltext = make_html_doc(htmltext, htmlsig)
502 htmltext = apply_styling(htmltext, css)
504 htmlpath = draftpath.with_suffix(".html")
506 htmlpath, htmltext, encoding="utf-8", errors="xmlcharrefreplace"
508 htmlpart = Part("text", "html", htmlpath, "HTML version")
510 imgparts = collect_inline_images(
511 image_registry, tempdir=tempdir, filewriter_fn=filewriter_fn
514 if related_to_html_only:
515 # If there are inline image part, they will be contained within a
516 # multipart/related part along with the HTML part only
518 # replace htmlpart with a multipart/related container of the HTML
519 # parts and the images
520 htmlpart = Multipart(
521 "relative", [htmlpart] + imgparts, "Group of related content"
525 "alternative", [textpart, htmlpart], "Group of alternative content"
529 # If there are inline image part, they will be siblings to the
530 # multipart/alternative tree within a multipart/related part
532 "alternative", [textpart, htmlpart], "Group of alternative content"
536 "relative", [altpart] + imgparts, "Group of related content"
542 class MIMETreeDFWalker:
543 def __init__(self, *, visitor_fn=None, debug=False):
544 self._visitor_fn = visitor_fn or self._echovisit
547 def _echovisit(self, node, ancestry, debugprint):
548 debugprint(f"node={node} ancestry={ancestry}")
550 def walk(self, root, *, visitor_fn=None):
552 Recursive function to implement a depth-dirst walk of the MIME-tree
555 if isinstance(root, list):
557 root = Multipart("mixed", children=root)
565 visitor_fn=visitor_fn or self._visitor_fn,
568 def _walk(self, node, *, ancestry, descendents, visitor_fn):
569 # Let's start by enumerating the parts at the current level. At the
570 # root level, ancestry will be the empty list, and we expect a
571 # multipart/* container at this level. Later, e.g. within a
572 # mutlipart/alternative container, the subtree will just be the
573 # alternative parts, while the top of the ancestry will be the
574 # multipart/alternative container, which we will process after the
577 lead = f"{'│ '*len(ancestry)}"
578 if isinstance(node, Multipart):
580 f"{lead}├{node} ancestry={[s.subtype for s in ancestry]}"
583 # Depth-first, so push the current container onto the ancestry
584 # stack, then descend …
585 ancestry.append(node)
586 self.debugprint(lead + "│ " * 2)
587 for child in node.children:
591 descendents=descendents,
592 visitor_fn=visitor_fn,
594 assert ancestry.pop() == node
595 sibling_descendents = descendents
596 descendents.extend(node.children)
599 self.debugprint(f"{lead}├{node}")
600 sibling_descendents = descendents
602 if False and ancestry:
603 self.debugprint(lead[:-1] + " │")
607 node, ancestry, sibling_descendents, debugprint=self.debugprint
610 def debugprint(self, s, **kwargs):
612 print(s, file=sys.stderr, **kwargs)
615 # [ RUN MODES ] ###############################################################
620 Stupid class to interface writing out Mutt commands. This is quite a hack
621 to deal with the fact that Mutt runs "push" commands in reverse order, so
622 all of a sudden, things become very complicated when mixing with "real"
625 Hence we keep two sets of commands, and one set of pushes. Commands are
626 added to the first until a push is added, after which commands are added to
627 the second set of commands.
629 On flush(), the first set is printed, followed by the pushes in reverse,
630 and then the second set is printed. All 3 sets are then cleared.
633 def __init__(self, out_f=sys.stdout, *, debug=False):
634 self._cmd1, self._push, self._cmd2 = [], [], []
646 s = s.replace('"', r"\"")
649 self._push.insert(0, s)
653 "\n".join(self._cmd1 + self._push + self._cmd2), file=self._out_f
655 self._cmd1, self._push, self._cmd2 = [], [], []
657 def debugprint(self, s, **kwargs):
659 print(s, file=sys.stderr, **kwargs)
667 debug_commands=False,
669 temppath = temppath or pathlib.Path(
670 tempfile.mkstemp(prefix="muttmdwn-", dir=tempdir)[1]
672 cmds = MuttCommands(out_f, debug=debug_commands)
674 editor = f"{' '.join(sys.argv)} massage --write-commands-to {temppath}"
676 cmds.cmd('set my_editor="$editor"')
677 cmds.cmd('set my_edit_headers="$edit_headers"')
678 cmds.cmd(f'set editor="{editor}"')
679 cmds.cmd("unset edit_headers")
680 cmds.cmd(f"set my_mdwn_postprocess_cmd_file={temppath}")
681 cmds.push("<first-entry><edit-file>")
692 converter=convert_markdown_to_html,
693 related_to_html_only=True,
695 max_other_attachments=20,
697 debug_commands=False,
700 # Here's the big picture: we're being invoked as the editor on the email
701 # draft, and whatever commands we write to the file given as cmdpath will
702 # be run by the second source command in the macro definition.
704 # Let's start by cleaning up what the setup did (see above), i.e. we
705 # restore the $editor and $edit_headers variables, and also unset the
706 # variable used to identify the command file we're currently writing
708 cmds = MuttCommands(cmd_f, debug=debug_commands)
709 cmds.cmd('set editor="$my_editor"')
710 cmds.cmd('set edit_headers="$my_edit_headers"')
711 cmds.cmd("unset my_editor")
712 cmds.cmd("unset my_edit_headers")
714 # let's flush those commands, as there'll be a lot of pushes from now
715 # on, which need to be run in reverse order
718 extensions = extensions.split(",") if extensions else []
722 css=css_f.read() if css_f else None,
723 related_to_html_only=related_to_html_only,
725 extensions=extensions,
728 mimetree = MIMETreeDFWalker(debug=debug_walk)
730 state = dict(pos=1, tags={}, parts=1)
732 def visitor_fn(item, ancestry, descendents, *, debugprint=None):
734 Visitor function called for every node (part) of the MIME tree,
735 depth-first, and responsible for telling NeoMutt how to assemble
738 KILL_LINE = r"\Ca\Ck"
740 if isinstance(item, Part):
741 # We've hit a leaf-node, i.e. an alternative or a related part
742 # with actual content.
746 # The original source already exists in the NeoMutt tree, but
747 # the underlying file may have been modified, so we need to
748 # update the encoding, but that's it:
749 cmds.push("<first-entry>")
750 cmds.push("<update-encoding>")
752 # We really just need to be able to assume that at this point,
753 # NeoMutt is at position 1, and that we've processed only this
754 # part so far. Nevermind about actual attachments, we can
755 # safely ignore those as they stay at the end.
756 assert state["pos"] == 1
757 assert state["parts"] == 1
759 # … whereas all other parts need to be added, and they're all
760 # considered to be temporary and inline:
761 cmds.push(f"<attach-file>{item.path}<enter>")
762 cmds.push("<toggle-unlink><toggle-disposition>")
764 # This added a part at the end of the list of parts, and that's
765 # just how many parts we've seen so far, so it's position in
766 # the NeoMutt compose list is the count of parts
768 state["pos"] = state["parts"]
770 # If the item (including the original) comes with additional
771 # information, then we might just as well update the NeoMutt
774 cmds.push(f"<edit-content-id>{KILL_LINE}{item.cid}<enter>")
776 # Now for the biggest hack in this script, which is to handle
777 # attachments, such as PDFs, that aren't related or alternatives.
778 # The problem is that when we add an inline image, it always gets
779 # appended to the list, i.e. inserted *after* other attachments.
780 # Since we don't know the number of attachments, we also cannot
781 # infer the postition of the new attachment. Therefore, we bubble
782 # it all the way to the top, only to then move it down again:
783 if state["pos"] > 1: # skip for the first part
784 for i in range(max_other_attachments):
785 # could use any number here, but has to be larger than the
786 # number of possible attachments. The performance
787 # difference of using a high number is negligible.
788 # Bubble up the new part
789 cmds.push("<move-up>")
791 # As we push the part to the right position in the list (i.e.
792 # the last of the subset of attachments this script added), we
793 # must handle the situation that subtrees are skipped by
794 # NeoMutt. Hence, the actual number of positions to move down
795 # is decremented by the number of descendents so far
797 for i in range(1, state["pos"] - len(descendents)):
798 cmds.push("<move-down>")
800 elif isinstance(item, Multipart):
801 # This node has children, but we already visited them (see
802 # above). The tags dictionary of State should contain a list of
803 # their positions in the NeoMutt compose window, so iterate those
804 # and tag the parts there:
805 n_tags = len(state["tags"][item])
806 for tag in state["tags"][item]:
807 cmds.push(f"<jump>{tag}<enter><tag-entry>")
809 if item.subtype == "alternative":
810 cmds.push("<group-alternatives>")
811 elif item.subtype in ("relative", "related"):
812 cmds.push("<group-related>")
813 elif item.subtype == "multilingual":
814 cmds.push("<group-multilingual>")
816 raise NotImplementedError(
817 f"Handling of multipart/{item.subtype} is not implemented"
820 state["pos"] -= n_tags - 1
824 # We should never get here
825 raise RuntimeError(f"Type {type(item)} is unexpected: {item}")
827 # If the item has a description, we might just as well add it
829 cmds.push(f"<edit-description>{KILL_LINE}{item.desc}<enter>")
832 # If there's an ancestry, record the current (assumed) position in
833 # the NeoMutt compose window as needed-to-tag by our direct parent
834 # (i.e. the last item of the ancestry)
835 state["tags"].setdefault(ancestry[-1], []).append(state["pos"])
837 lead = "│ " * (len(ancestry) + 1) + "* "
839 f"{lead}ancestry={[a.subtype for a in ancestry]}\n"
840 f"{lead}descendents={[d.subtype for d in descendents]}\n"
841 f"{lead}children_positions={state['tags'][ancestry[-1]]}\n"
842 f"{lead}pos={state['pos']}, parts={state['parts']}"
848 # Let's walk the tree and visit every node with our fancy visitor
850 mimetree.walk(tree, visitor_fn=visitor_fn)
853 cmds.push("<send-message>")
855 # Finally, cleanup. Since we're responsible for removing the temporary
856 # file, how's this for a little hack?
858 filename = cmd_f.name
859 except AttributeError:
860 filename = "pytest_internal_file"
861 cmds.cmd(f"source 'rm -f {filename}|'")
862 cmds.cmd("unset my_mdwn_postprocess_cmd_file")
866 # [ CLI ENTRY ] ###############################################################
868 if __name__ == "__main__":
869 args = parse_cli_args()
871 if args.mode is None:
873 tempdir=args.tempdir,
874 debug_commands=args.debug_commands,
877 elif args.mode == "massage":
878 with open(args.MAILDRAFT, "r") as draft_f, open(
880 ) as cmd_f, open(args.css_file, "r") as css_f:
885 extensions=args.extensions,
887 related_to_html_only=args.related_to_html_only,
888 max_other_attachments=args.max_number_other_attachments,
889 only_build=args.only_build,
890 tempdir=args.tempdir,
891 debug_commands=args.debug_commands,
892 debug_walk=args.debug_walk,
896 # [ TESTS ] ###################################################################
900 from io import StringIO
905 return "CONSTANT STRING 1"
909 return "CONSTANT STRING 2"
911 # NOTE: tests using the capsys fixture must specify sys.stdout to the
912 # functions they call, else old stdout is used and not captured
914 @pytest.mark.muttctrl
915 def test_MuttCommands_cmd(self, const1, const2, capsys):
916 "Assert order of commands"
917 cmds = MuttCommands(out_f=sys.stdout)
921 captured = capsys.readouterr()
922 assert captured.out == "\n".join((const1, const2, ""))
924 @pytest.mark.muttctrl
925 def test_MuttCommands_push(self, const1, const2, capsys):
926 "Assert reverse order of pushes"
927 cmds = MuttCommands(out_f=sys.stdout)
931 captured = capsys.readouterr()
934 == ('"\npush "'.join(("", const2, const1, "")))[2:-6]
937 @pytest.mark.muttctrl
938 def test_MuttCommands_push_escape(self, const1, const2, capsys):
939 cmds = MuttCommands(out_f=sys.stdout)
940 cmds.push(f'"{const1}"')
942 captured = capsys.readouterr()
943 assert f'"\\"{const1}\\""' in captured.out
945 @pytest.mark.muttctrl
946 def test_MuttCommands_cmd_push_mixed(self, const1, const2, capsys):
947 "Assert reverse order of pushes"
948 cmds = MuttCommands(out_f=sys.stdout)
949 lines = ["000", "001", "010", "011", "100", "101", "110", "111"]
951 cmds.cmd(lines[4 * i + 0])
952 cmds.cmd(lines[4 * i + 1])
953 cmds.push(lines[4 * i + 2])
954 cmds.push(lines[4 * i + 3])
957 captured = capsys.readouterr()
958 lines_out = captured.out.splitlines()
959 assert lines[0] in lines_out[0]
960 assert lines[1] in lines_out[1]
961 assert lines[7] in lines_out[2]
962 assert lines[6] in lines_out[3]
963 assert lines[3] in lines_out[4]
964 assert lines[2] in lines_out[5]
965 assert lines[4] in lines_out[6]
966 assert lines[5] in lines_out[7]
969 def mime_tree_related_to_alternative(self):
983 Part("text", "html", "part.html", desc="HTML"),
988 "text", "png", "logo.png", cid="logo.png", desc="Logo"
995 def mime_tree_related_to_html(self):
1009 Part("text", "html", "part.html", desc="HTML"),
1024 @pytest.mark.treewalk
1025 def test_MIMETreeDFWalker_depth_first_walk(
1026 self, mime_tree_related_to_alternative
1028 mimetree = MIMETreeDFWalker()
1032 def visitor_fn(item, ancestry, descendents, debugprint):
1033 items.append((item, len(ancestry), len(descendents)))
1036 mime_tree_related_to_alternative, visitor_fn=visitor_fn
1038 assert len(items) == 5
1039 assert items[0][0].subtype == "plain"
1040 assert items[0][1] == 2
1041 assert items[0][2] == 0
1042 assert items[1][0].subtype == "html"
1043 assert items[1][1] == 2
1044 assert items[1][2] == 0
1045 assert items[2][0].subtype == "alternative"
1046 assert items[2][1] == 1
1047 assert items[2][2] == 2
1048 assert items[3][0].subtype == "png"
1049 assert items[3][1] == 1
1050 assert items[3][2] == 2
1051 assert items[4][0].subtype == "relative"
1052 assert items[4][1] == 0
1053 assert items[4][2] == 4
1055 @pytest.mark.treewalk
1056 def test_MIMETreeDFWalker_list_to_mixed(self, const1):
1057 mimetree = MIMETreeDFWalker()
1060 def visitor_fn(item, ancestry, descendents, debugprint):
1063 p = Part("text", "plain", const1)
1064 mimetree.walk([p], visitor_fn=visitor_fn)
1065 assert items[-1].subtype == "plain"
1066 mimetree.walk([p, p], visitor_fn=visitor_fn)
1067 assert items[-1].subtype == "mixed"
1069 @pytest.mark.treewalk
1070 def test_MIMETreeDFWalker_visitor_in_constructor(
1071 self, mime_tree_related_to_alternative
1075 def visitor_fn(item, ancestry, descendents, debugprint):
1078 mimetree = MIMETreeDFWalker(visitor_fn=visitor_fn)
1079 mimetree.walk(mime_tree_related_to_alternative)
1080 assert len(items) == 5
1083 def string_io(self, const1, text=None):
1084 return StringIO(text or const1)
1086 @pytest.mark.massage
1087 def test_do_massage_basic(self, const1, string_io, capsys):
1092 related_to_html_only,
1096 return Part("text", "plain", draftpath, orig=True)
1102 converter=converter,
1105 captured = capsys.readouterr()
1106 lines = captured.out.splitlines()
1107 assert '="$my_editor"' in lines.pop(0)
1108 assert '="$my_edit_headers"' in lines.pop(0)
1109 assert "unset my_editor" == lines.pop(0)
1110 assert "unset my_edit_headers" == lines.pop(0)
1111 assert "send-message" in lines.pop(0)
1112 assert "update-encoding" in lines.pop(0)
1113 assert "first-entry" in lines.pop(0)
1114 assert "source 'rm -f " in lines.pop(0)
1115 assert "unset my_mdwn_postprocess_cmd_file" == lines.pop(0)
1117 @pytest.mark.massage
1118 def test_do_massage_fulltree(
1119 self, string_io, const1, mime_tree_related_to_alternative, capsys
1125 related_to_html_only,
1129 return mime_tree_related_to_alternative
1136 max_other_attachments=max_attachments,
1137 converter=converter,
1140 captured = capsys.readouterr()
1141 lines = captured.out.splitlines()[4:-2]
1142 assert "first-entry" in lines.pop()
1143 assert "update-encoding" in lines.pop()
1144 assert "Plain" in lines.pop()
1145 assert "part.html" in lines.pop()
1146 assert "toggle-unlink" in lines.pop()
1147 for i in range(max_attachments):
1148 assert "move-up" in lines.pop()
1149 assert "move-down" in lines.pop()
1150 assert "HTML" in lines.pop()
1151 assert "jump>1" in lines.pop()
1152 assert "jump>2" in lines.pop()
1153 assert "group-alternatives" in lines.pop()
1154 assert "Alternative" in lines.pop()
1155 assert "logo.png" in lines.pop()
1156 assert "toggle-unlink" in lines.pop()
1157 assert "content-id" in lines.pop()
1158 for i in range(max_attachments):
1159 assert "move-up" in lines.pop()
1160 assert "move-down" in lines.pop()
1161 assert "Logo" in lines.pop()
1162 assert "jump>1" in lines.pop()
1163 assert "jump>4" in lines.pop()
1164 assert "group-related" in lines.pop()
1165 assert "Related" in lines.pop()
1166 assert "send-message" in lines.pop()
1167 assert len(lines) == 0
1170 def fake_filewriter(self):
1175 def __call__(self, path, content, mode="w", **kwargs):
1176 self._writes.append((path, content))
1178 def pop(self, index=-1):
1179 return self._writes.pop(index)
1184 def markdown_non_converter(self, const1, const2):
1185 return lambda s, text: f"{const1}{text}{const2}"
1187 @pytest.mark.converter
1188 def test_converter_tree_basic(self, const1, const2, fake_filewriter):
1189 path = pathlib.Path(const2)
1190 tree = convert_markdown_to_html(
1191 const1, path, filewriter_fn=fake_filewriter
1194 assert tree.subtype == "alternative"
1195 assert len(tree.children) == 2
1196 assert tree.children[0].subtype == "plain"
1197 assert tree.children[0].path == path
1198 assert tree.children[0].orig
1199 assert tree.children[1].subtype == "html"
1200 assert tree.children[1].path == path.with_suffix(".html")
1202 def test_converter_writes(
1208 markdown_non_converter,
1210 path = pathlib.Path(const2)
1212 with monkeypatch.context() as m:
1213 m.setattr(markdown.Markdown, "convert", markdown_non_converter)
1214 convert_markdown_to_html(
1215 const1, path, filewriter_fn=fake_filewriter
1218 assert (path, const1) == fake_filewriter.pop(0)
1219 written = fake_filewriter.pop(0)
1220 assert path.with_suffix(".html") == written[0]
1221 assert const1 in written[1]
1223 @pytest.mark.imgproc
1224 def test_markdown_inline_image_processor(self):
1225 imgpath1 = "file:/path/to/image.png"
1226 imgpath2 = "file:///path/to/image.png?url=params"
1227 imgpath3 = "/path/to/image.png"
1228 text = f"""![inline local image]({imgpath1})
1230 with newline]({imgpath2})
1231 ![image local path]({imgpath3})"""
1232 text, html, images, mdwn = markdown_with_inline_image_support(text)
1234 # local paths have been normalised to URLs:
1235 imgpath3 = f"file://{imgpath3}"
1237 assert 'src="cid:' in html
1238 assert "](cid:" in text
1239 assert len(images) == 3
1240 assert imgpath1 in images
1241 assert imgpath2 in images
1242 assert imgpath3 in images
1243 assert images[imgpath1].cid != images[imgpath2].cid
1244 assert images[imgpath1].cid != images[imgpath3].cid
1245 assert images[imgpath2].cid != images[imgpath3].cid
1247 @pytest.mark.imgproc
1248 def test_markdown_inline_image_processor_title_to_desc(self, const1):
1249 imgpath = "file:///path/to/image.png"
1250 text = f'![inline local image]({imgpath} "{const1}")'
1251 text, html, images, mdwn = markdown_with_inline_image_support(text)
1252 assert images[imgpath].desc == const1
1254 @pytest.mark.imgproc
1255 def test_markdown_inline_image_processor_alt_to_desc(self, const1):
1256 imgpath = "file:///path/to/image.png"
1257 text = f"![{const1}]({imgpath})"
1258 text, html, images, mdwn = markdown_with_inline_image_support(text)
1259 assert images[imgpath].desc == const1
1261 @pytest.mark.imgproc
1262 def test_markdown_inline_image_processor_title_over_alt_desc(
1263 self, const1, const2
1265 imgpath = "file:///path/to/image.png"
1266 text = f'![{const1}]({imgpath} "{const2}")'
1267 text, html, images, mdwn = markdown_with_inline_image_support(text)
1268 assert images[imgpath].desc == const2
1270 @pytest.mark.imgproc
1271 def test_markdown_inline_image_not_external(self):
1272 imgpath = "https://path/to/image.png"
1273 text = f"![inline image]({imgpath})"
1274 text, html, images, mdwn = markdown_with_inline_image_support(text)
1276 assert 'src="cid:' not in html
1277 assert "](cid:" not in text
1278 assert len(images) == 0
1280 @pytest.mark.imgproc
1281 def test_markdown_inline_image_local_file(self):
1282 imgpath = "/path/to/image.png"
1283 text = f"![inline image]({imgpath})"
1284 text, html, images, mdwn = markdown_with_inline_image_support(text)
1286 for k, v in images.items():
1287 assert k == f"file://{imgpath}"
1290 @pytest.mark.imgproc
1291 def test_markdown_inline_image_expanduser(self):
1292 imgpath = pathlib.Path("~/image.png")
1293 text = f"![inline image]({imgpath})"
1294 text, html, images, mdwn = markdown_with_inline_image_support(text)
1296 for k, v in images.items():
1297 assert k == f"file://{imgpath.expanduser()}"
1303 ""
1304 "AAAABCAAAAAA6fptVAAAACklEQVQI12P4DwABAQEAG7buVgAA"
1307 @pytest.mark.imgproc
1308 def test_markdown_inline_image_processor_base64(self, test_png):
1309 text = f"![1px white inlined]({test_png})"
1310 text, html, images, mdwn = markdown_with_inline_image_support(text)
1312 assert 'src="cid:' in html
1313 assert "](cid:" in text
1314 assert len(images) == 1
1315 assert test_png in images
1317 @pytest.mark.converter
1318 def test_converter_tree_inline_image_base64(
1319 self, test_png, const1, fake_filewriter
1321 text = f"![inline base64 image]({test_png})"
1322 path = pathlib.Path(const1)
1323 tree = convert_markdown_to_html(
1326 filewriter_fn=fake_filewriter,
1327 related_to_html_only=False,
1329 assert tree.subtype == "relative"
1330 assert tree.children[0].subtype == "alternative"
1331 assert tree.children[1].subtype == "png"
1332 written = fake_filewriter.pop()
1333 assert tree.children[1].path == written[0]
1334 assert written[1] == request.urlopen(test_png).read()
1336 @pytest.mark.converter
1337 def test_converter_tree_inline_image_base64_related_to_html(
1338 self, test_png, const1, fake_filewriter
1340 text = f"![inline base64 image]({test_png})"
1341 path = pathlib.Path(const1)
1342 tree = convert_markdown_to_html(
1345 filewriter_fn=fake_filewriter,
1346 related_to_html_only=True,
1348 assert tree.subtype == "alternative"
1349 assert tree.children[1].subtype == "relative"
1350 assert tree.children[1].children[1].subtype == "png"
1351 written = fake_filewriter.pop()
1352 assert tree.children[1].children[1].path == written[0]
1353 assert written[1] == request.urlopen(test_png).read()
1355 @pytest.mark.converter
1356 def test_converter_tree_inline_image_cid(
1357 self, const1, fake_filewriter
1359 text = f"![inline base64 image](cid:{const1})"
1360 path = pathlib.Path(const1)
1361 tree = convert_markdown_to_html(
1364 filewriter_fn=fake_filewriter,
1365 related_to_html_only=False,
1367 assert len(tree.children) == 2
1368 assert tree.children[0].cid != const1
1369 assert tree.children[0].type != "image"
1370 assert tree.children[1].cid != const1
1371 assert tree.children[1].type != "image"
1373 @pytest.mark.imgcoll
1374 def test_inline_image_collection(
1375 self, test_png, const1, const2, fake_filewriter
1377 test_images = {test_png: InlineImageInfo(cid=const1, desc=const2)}
1378 relparts = collect_inline_images(
1379 test_images, filewriter_fn=fake_filewriter
1382 written = fake_filewriter.pop()
1383 assert b"PNG" in written[1]
1385 assert relparts[0].subtype == "png"
1386 assert relparts[0].path == written[0]
1387 assert relparts[0].cid == const1
1388 assert relparts[0].desc.endswith(const2)
1392 @pytest.mark.styling
1393 def test_apply_stylesheet(self):
1394 html = "<p>Hello, world!</p>"
1395 css = "p { color:red }"
1396 out = apply_styling(html, css)
1397 assert 'p style="color' in out
1399 @pytest.mark.styling
1400 def test_massage_styling_to_converter(self, string_io, const1):
1401 css = "p { color:red }"
1402 css_f = StringIO(css)
1410 related_to_html_only,
1414 css_applied.append(css)
1415 return Part("text", "plain", draftpath, orig=True)
1422 converter=converter,
1424 assert css_applied[0] == css
1426 @pytest.mark.converter
1427 def test_converter_apply_styles(
1428 self, const1, fake_filewriter, monkeypatch
1430 path = pathlib.Path(const1)
1431 text = "Hello, world!"
1432 css = "p { color:red }"
1433 with monkeypatch.context() as m:
1437 lambda s, t: f"<p>{t}</p>",
1439 convert_markdown_to_html(
1440 text, path, css=css, filewriter_fn=fake_filewriter
1442 assert "color: red" in fake_filewriter.pop()[1]
1446 @pytest.mark.styling
1447 def test_apply_stylesheet_pygments(self):
1449 f'<div class="{_CODEHILITE_CLASS}">'
1450 "<pre>def foo():\n return</pre></div>"
1452 out = apply_styling(html, _PYGMENTS_CSS)
1453 assert f'{_CODEHILITE_CLASS}" style="' in out
1455 @pytest.mark.massage
1456 def test_mime_tree_relative_within_alternative(
1457 self, string_io, const1, capsys, mime_tree_related_to_html
1463 related_to_html_only,
1467 return mime_tree_related_to_html
1473 converter=converter,
1476 captured = capsys.readouterr()
1477 lines = captured.out.splitlines()[4:-2]
1478 assert "first-entry" in lines.pop()
1479 assert "update-encoding" in lines.pop()
1480 assert "Plain" in lines.pop()
1481 assert "part.html" in lines.pop()
1482 assert "toggle-unlink" in lines.pop()
1483 assert "move-up" in lines.pop()
1486 if "move-up" not in top:
1488 assert "move-down" in top
1489 assert "HTML" in lines.pop()
1490 assert "logo.png" in lines.pop()
1491 assert "toggle-unlink" in lines.pop()
1492 assert "content-id" in lines.pop()
1493 assert "move-up" in lines.pop()
1496 if "move-up" not in top:
1498 assert "move-down" in top
1499 assert "move-down" in lines.pop()
1500 assert "Logo" in lines.pop()
1501 assert "jump>2" in lines.pop()
1502 assert "jump>3" in lines.pop()
1503 assert "group-related" in lines.pop()
1504 assert "Related" in lines.pop()
1505 assert "jump>1" in lines.pop()
1506 assert "jump>2" in lines.pop()
1507 assert "group-alternative" in lines.pop()
1508 assert "Alternative" in lines.pop()
1509 assert "send-message" in lines.pop()
1510 assert len(lines) == 0
1512 @pytest.mark.massage
1513 def test_mime_tree_nested_trees_does_not_break_positioning(
1514 self, string_io, const1, capsys
1520 related_to_html_only,
1544 desc="Nested plain",
1553 desc="Nested alternative",
1573 converter=converter,
1576 captured = capsys.readouterr()
1577 lines = captured.out.splitlines()
1578 while "logo.png" not in lines.pop():
1581 assert "content-id" in lines.pop()
1582 assert "move-up" in lines.pop()
1585 if "move-up" not in top:
1587 assert "move-down" in top
1588 # Due to the nested trees, the number of descendents of the sibling
1589 # actually needs to be considered, not just the nieces. So to move
1590 # from position 1 to position 6, it only needs one <move-down>
1591 # because that jumps over the entire sibling tree. Thus what
1592 # follows next must not be another <move-down>
1593 assert "Logo" in lines.pop()
1596 def test_signature_extraction_no_signature(self, const1):
1597 assert (const1, None, None) == extract_signature(const1)
1600 def test_signature_extraction_just_text(self, const1, const2):
1601 origtext, textsig, htmlsig = extract_signature(
1602 f"{const1}{EMAIL_SIG_SEP}{const2}"
1604 assert origtext == const1
1605 assert textsig == const2
1606 assert htmlsig is None
1609 def test_signature_extraction_html(self, const1, const2):
1610 path = pathlib.Path("somepath")
1611 sigconst = "HTML signature from {path} but as a string"
1613 def filereader_fn(path):
1615 f'<div id="signature">{sigconst.format(path=path)}</div>'
1618 origtext, textsig, htmlsig = extract_signature(
1619 f"{const1}{EMAIL_SIG_SEP}{HTML_SIG_MARKER} {path}\n{const2}",
1620 filereader_fn=filereader_fn,
1622 assert origtext == const1
1623 assert textsig == const2
1624 assert htmlsig == sigconst.format(path=path)
1627 def test_signature_extraction_file_not_found(self, const1):
1628 path = pathlib.Path("/does/not/exist")
1629 with pytest.raises(FileNotFoundError):
1630 origtext, textsig, htmlsig = extract_signature(
1631 f"{const1}{EMAIL_SIG_SEP}{HTML_SIG_MARKER}{path}\n{const1}"
1634 @pytest.mark.imgproc
1635 def test_image_registry(self, const1):
1636 reg = ImageRegistry()
1637 cid = reg.register(const1)
1639 assert not cid.startswith("<")
1640 assert not cid.endswith(">")
1641 assert const1 in reg
1643 @pytest.mark.imgproc
1644 def test_image_registry_file_uri(self, const1):
1645 reg = ImageRegistry()
1646 reg.register("/some/path")
1648 assert path.startswith("file://")
1651 @pytest.mark.converter
1653 def test_converter_signature_handling(
1654 self, const1, fake_filewriter, monkeypatch
1656 path = pathlib.Path(const1)
1659 "This is the mail body\n",
1661 "This is a plain-text signature only",
1664 def filereader_fn(path):
1667 with monkeypatch.context() as m:
1668 m.setattr(markdown.Markdown, "convert", lambda s, t: t)
1669 convert_markdown_to_html(
1672 filewriter_fn=fake_filewriter,
1673 filereader_fn=filereader_fn,
1676 soup = bs4.BeautifulSoup(fake_filewriter.pop()[1], "html.parser")
1677 body = soup.body.contents
1679 assert mailparts[0] in body.pop(0)
1681 sig = soup.select_one("#signature")
1682 assert sig == body.pop(0)
1684 sep = sig.select_one("span.sig_separator")
1685 assert sep == sig.contents[0]
1686 assert f"\n{sep.text}\n" == EMAIL_SIG_SEP
1688 assert mailparts[2] in sig.contents[1]
1690 @pytest.mark.converter
1692 def test_converter_signature_handling_htmlsig(
1693 self, const1, fake_filewriter, monkeypatch
1695 path = pathlib.Path(const1)
1698 "This is the mail body",
1700 f"{HTML_SIG_MARKER}{path}\n",
1701 "This is the plain-text version",
1704 htmlsig = "HTML Signature from {path}"
1706 def filereader_fn(path):
1707 return f'<div id="signature">{htmlsig.format(path=path)}</div>'
1712 with monkeypatch.context() as m:
1714 markdown.Markdown, "convert", lambda s, t: mdwn_fn(t)
1716 convert_markdown_to_html(
1719 filewriter_fn=fake_filewriter,
1720 filereader_fn=filereader_fn,
1723 soup = bs4.BeautifulSoup(fake_filewriter.pop()[1], "html.parser")
1724 sig = soup.select_one("#signature")
1727 assert HTML_SIG_MARKER not in sig.text
1728 assert htmlsig.format(path=path) == sig.text.strip()
1730 plaintext = fake_filewriter.pop()[1]
1731 assert plaintext.endswith(EMAIL_SIG_SEP + mailparts[-1])
1733 @pytest.mark.converter
1735 def test_converter_signature_handling_htmlsig_with_image(
1736 self, const1, fake_filewriter, monkeypatch, test_png
1738 path = pathlib.Path(const1)
1741 "This is the mail body",
1743 f"{HTML_SIG_MARKER}{path}\n",
1744 "This is the plain-text version",
1748 "HTML Signature from {path} with image\n"
1749 f'<img src="{test_png}">\n'
1752 def filereader_fn(path):
1753 return f'<div id="signature">{htmlsig.format(path=path)}</div>'
1758 with monkeypatch.context() as m:
1760 markdown.Markdown, "convert", lambda s, t: mdwn_fn(t)
1762 convert_markdown_to_html(
1765 filewriter_fn=fake_filewriter,
1766 filereader_fn=filereader_fn,
1769 assert fake_filewriter.pop()[0].suffix == ".png"
1771 soup = bs4.BeautifulSoup(fake_filewriter.pop()[1], "html.parser")
1772 assert soup.img.attrs["src"].startswith("cid:")
1774 @pytest.mark.converter
1776 def test_converter_signature_handling_textsig_with_image(
1777 self, const1, fake_filewriter, test_png
1780 "This is the mail body",
1782 "This is the plain-text version with image\n",
1783 f"![Inline]({test_png})",
1786 tree = convert_markdown_to_html
1788 pathlib.Path(const1),
1789 filewriter_fn=fake_filewriter,
1792 assert tree.subtype == "relative"
1793 assert tree.children[0].subtype == "alternative"
1794 assert tree.children[1].subtype == "png"
1795 written = fake_filewriter.pop()
1796 assert tree.children[1].path == written[0]
1797 assert written[1] == request.urlopen(test_png).read()
1799 def test_converter_attribution_to_admonition(self, fake_filewriter):