From e7e9c87d952f5246555a697cce52aff34b9e1225 Mon Sep 17 00:00:00 2001 From: "martin f. krafft" Date: Wed, 23 Aug 2023 16:39:01 +1200 Subject: [PATCH] buildmimetree.py: implement inline image handling --- .config/neomutt/buildmimetree.py | 329 ++++++++++++++++++++++++++----- 1 file changed, 284 insertions(+), 45 deletions(-) diff --git a/.config/neomutt/buildmimetree.py b/.config/neomutt/buildmimetree.py index d033bda..c16e519 100755 --- a/.config/neomutt/buildmimetree.py +++ b/.config/neomutt/buildmimetree.py @@ -36,7 +36,13 @@ import pathlib import markdown import tempfile import argparse -from collections import namedtuple +import re +import mimetypes +from collections import namedtuple, OrderedDict +from markdown.extensions import Extension +from markdown.inlinepatterns import ImageInlineProcessor, IMAGE_LINK_RE +from email.utils import make_msgid +from urllib import request def parse_cli_args(*args, **kwargs): @@ -114,6 +120,74 @@ def parse_cli_args(*args, **kwargs): return parser.parse_args(*args, **kwargs) +# [ MARKDOWN WRAPPING ] ####################################################### + + +InlineImageInfo = namedtuple( + "InlineImageInfo", ["cid", "desc"], defaults=[None] +) + + +class InlineImageExtension(Extension): + class RelatedImageInlineProcessor(ImageInlineProcessor): + def __init__(self, re, md, ext): + super().__init__(re, md) + self._ext = ext + + def handleMatch(self, m, data): + el, start, end = super().handleMatch(m, data) + if "src" in el.attrib: + src = el.attrib["src"] + if "://" not in src or src.startswith("file://"): + # We only inline local content + cid = self._ext.get_cid_for_image(el.attrib) + el.attrib["src"] = f"cid:{cid}" + return el, start, end + + def __init__(self): + super().__init__() + self._images = OrderedDict() + + def extendMarkdown(self, md): + md.registerExtension(self) + inline_image_proc = self.RelatedImageInlineProcessor( + IMAGE_LINK_RE, md, self + ) + md.inlinePatterns.register(inline_image_proc, "image_link", 150) + + def get_cid_for_image(self, attrib): + msgid = make_msgid()[1:-1] + path = attrib["src"] + if path.startswith("/"): + path = f"file://{path}" + self._images[path] = InlineImageInfo( + msgid, attrib.get("title", attrib.get("alt")) + ) + return msgid + + def get_images(self): + return self._images + + +def markdown_with_inline_image_support(text, *, extensions=None): + inline_image_handler = InlineImageExtension() + extensions = extensions or [] + extensions.append(inline_image_handler) + mdwn = markdown.Markdown(extensions=extensions) + htmltext = mdwn.convert(text) + + images = inline_image_handler.get_images() + + def replace_image_with_cid(matchobj): + for m in (matchobj.group(1), f"file://{matchobj.group(1)}"): + if m in images: + return f"(cid:{images[m].cid}" + return matchobj.group(0) + + text = re.sub(r"\(([^)\s]+)", replace_image_with_cid, text) + return text, htmltext, images + + # [ PARTS GENERATION ] ######################################################## @@ -140,51 +214,65 @@ class Multipart( return f" children={len(self.children)}" +def filewriter_fn(path, content, mode="w", **kwargs): + with open(path, mode, **kwargs) as out_f: + out_f.write(content) -def convert_markdown_to_html( - origtext, draftpath, *, filewriter_fn=None, extensions=None + +def collect_inline_images( + images, *, tempdir=None, filewriter_fn=filewriter_fn ): - mdwn = markdown.Markdown(extensions=extensions) + relparts = [] + for path, info in images.items(): + data = request.urlopen(path) + + mimetype = data.headers["Content-Type"] + ext = mimetypes.guess_extension(mimetype) + tempfilename = tempfile.mkstemp(prefix="img", suffix=ext, dir=tempdir)[ + 1 + ] + path = pathlib.Path(tempfilename) + + filewriter_fn(path, data.read(), "w+b") + + relparts.append( + Part( + *mimetype.split("/"), path, cid=info.cid, desc=info.desc + ) + ) + + return relparts - if not filewriter_fn: - def filewriter_fn(path, content, mode="w", **kwargs): - with open(path, mode, **kwargs) as out_f: - out_f.write(content) +def convert_markdown_to_html( + origtext, draftpath, *, filewriter_fn=filewriter_fn, extensions=None +): + origtext, htmltext, images = markdown_with_inline_image_support( + origtext, extensions=extensions + ) filewriter_fn(draftpath, origtext, encoding="utf-8") textpart = Part( "text", "plain", draftpath, "Plain-text version", orig=True ) - htmltext = mdwn.convert(origtext) - htmlpath = draftpath.with_suffix(".html") filewriter_fn( htmlpath, htmltext, encoding="utf-8", errors="xmlcharrefreplace" ) htmlpart = Part("text", "html", htmlpath, "HTML version") - logopart = Part( - "image", - "png", - "/usr/share/doc/neomutt/logo/neomutt-256.png", - "Logo", - "neomutt-256.png", + altpart = Multipart( + "alternative", [textpart, htmlpart], "Group of alternative content" ) - return Multipart( - "relative", - [ - Multipart( - "alternative", - [textpart, htmlpart], - "Group of alternative content", - ), - logopart, - ], - "Group of related content", - ) + imgparts = collect_inline_images(images, filewriter_fn=filewriter_fn) + if imgparts: + return Multipart( + "relative", [altpart] + imgparts, "Group of related content" + ) + else: + return altpart class MIMETreeDFWalker: @@ -636,23 +724,174 @@ try: ) captured = capsys.readouterr() - lines = captured.out.splitlines()[4:][::-1] - assert "Related" in lines.pop() - assert "group-related" in lines.pop() - assert "tag-entry" in lines.pop() - assert "Logo" in lines.pop() - assert "content-id" in lines.pop() - assert "toggle-unlink" in lines.pop() - assert "logo.png" in lines.pop() - assert "tag-entry" in lines.pop() - assert "Alternative" in lines.pop() - assert "group-alternatives" in lines.pop() - assert "tag-entry" in lines.pop() - assert "HTML" in lines.pop() - assert "toggle-unlink" in lines.pop() - assert "part.html" in lines.pop() - assert "tag-entry" in lines.pop() - assert "Plain" in lines.pop() + lines = captured.out.splitlines()[4:] + assert "Related" in lines.pop(0) + assert "group-related" in lines.pop(0) + assert "tag-entry" in lines.pop(0) + assert "Logo" in lines.pop(0) + assert "content-id" in lines.pop(0) + assert "toggle-unlink" in lines.pop(0) + assert "logo.png" in lines.pop(0) + assert "tag-entry" in lines.pop(0) + assert "Alternative" in lines.pop(0) + assert "group-alternatives" in lines.pop(0) + assert "tag-entry" in lines.pop(0) + assert "HTML" in lines.pop(0) + assert "toggle-unlink" in lines.pop(0) + assert "part.html" in lines.pop(0) + assert "tag-entry" in lines.pop(0) + assert "Plain" in lines.pop(0) + assert "update-encoding" in lines.pop(0) + assert len(lines) == 2 + + @pytest.fixture + def fake_filewriter(self): + class FileWriter: + def __init__(self): + self._writes = [] + + def __call__(self, path, content, mode="w", **kwargs): + self._writes.append((path, content)) + + def pop(self, index=-1): + return self._writes.pop(index) + + return FileWriter() + + @pytest.fixture + def markdown_non_converter(self, const1, const2): + return lambda s, text: f"{const1}{text}{const2}" + + def test_converter_tree_basic( + self, const1, const2, fake_filewriter, markdown_non_converter + ): + path = pathlib.Path(const2) + tree = convert_markdown_to_html( + const1, path, filewriter_fn=fake_filewriter + ) + + assert tree.subtype == "alternative" + assert len(tree.children) == 2 + assert tree.children[0].subtype == "plain" + assert tree.children[0].path == path + assert tree.children[0].orig + assert tree.children[1].subtype == "html" + assert tree.children[1].path == path.with_suffix(".html") + + def test_converter_writes( + self, + const1, + const2, + fake_filewriter, + monkeypatch, + markdown_non_converter, + ): + path = pathlib.Path(const2) + + with monkeypatch.context() as m: + m.setattr(markdown.Markdown, "convert", markdown_non_converter) + convert_markdown_to_html( + const1, path, filewriter_fn=fake_filewriter + ) + + assert (path, const1) == fake_filewriter.pop(0) + assert ( + path.with_suffix(".html"), + markdown_non_converter(None, const1), + ) == fake_filewriter.pop(0) + + def test_markdown_inline_image_processor(self): + imgpath1 = "file:/path/to/image.png" + imgpath2 = "file:///path/to/image.png?url=params" + imgpath3 = "/path/to/image.png" + text = f"""![inline local image]({imgpath1}) + ![image inlined + with newline]({imgpath2}) + ![image local path]({imgpath3})""" + text, html, images = markdown_with_inline_image_support(text) + + # local paths have been normalised to URLs: + imgpath3 = f"file://{imgpath3}" + + assert 'src="cid:' in html + assert "](cid:" in text + assert len(images) == 3 + assert imgpath1 in images + assert imgpath2 in images + assert imgpath3 in images + assert images[imgpath1].cid != images[imgpath2].cid + assert images[imgpath1].cid != images[imgpath3].cid + assert images[imgpath2].cid != images[imgpath3].cid + + def test_markdown_inline_image_processor_title_to_desc(self, const1): + imgpath = "file:///path/to/image.png" + text = f'![inline local image]({imgpath} "{const1}")' + text, html, images = markdown_with_inline_image_support(text) + assert images[imgpath].desc == const1 + + def test_markdown_inline_image_processor_alt_to_desc(self, const1): + imgpath = "file:///path/to/image.png" + text = f"![{const1}]({imgpath})" + text, html, images = markdown_with_inline_image_support(text) + assert images[imgpath].desc == const1 + + def test_markdown_inline_image_processor_title_over_alt_desc( + self, const1, const2 + ): + imgpath = "file:///path/to/image.png" + text = f'![{const1}]({imgpath} "{const2}")' + text, html, images = markdown_with_inline_image_support(text) + assert images[imgpath].desc == const2 + + def test_markdown_inline_image_not_external(self): + imgpath = "https://path/to/image.png" + text = f"![inline image]({imgpath})" + text, html, images = markdown_with_inline_image_support(text) + + assert 'src="cid:' not in html + assert "](cid:" not in text + assert len(images) == 0 + + def test_markdown_inline_image_local_file(self): + imgpath = "/path/to/image.png" + text = f"![inline image]({imgpath})" + text, html, images = markdown_with_inline_image_support(text) + + for k, v in images.items(): + assert k == f"file://{imgpath}" + break + + def test_markdown_inline_image_processor_base64(self): + img = ( + "" + "AAAABCAAAAAA6fptVAAAACklEQVQI12P4DwABAQEAG7buVgAA" + ) + text = f"![1px white inlined]({img})" + text, html, images = markdown_with_inline_image_support(text) + + assert 'src="cid:' in html + assert "](cid:" in text + assert len(images) == 1 + assert img in images + + def test_converter_tree_inline_image_base64( + self, const1, fake_filewriter + ): + img = ( + "" + "AAAABCAAAAAA6fptVAAAACklEQVQI12P4DwABAQEAG7buVgAA" + ) + text = f"![inline base64 image]({img})" + path = pathlib.Path(const1) + tree = convert_markdown_to_html( + text, path, filewriter_fn=fake_filewriter + ) + + assert tree.subtype == "relative" + assert tree.children[1].subtype == "png" + written = fake_filewriter.pop() + assert tree.children[1].path == written[0] + assert written[1] == request.urlopen(img).read() except ImportError: pass -- 2.39.2