import markdown
import tempfile
import argparse
-from collections import namedtuple
+import re
+import mimetypes
+from collections import namedtuple, OrderedDict
+from markdown.extensions import Extension
+from markdown.inlinepatterns import ImageInlineProcessor, IMAGE_LINK_RE
+from email.utils import make_msgid
+from urllib import request
def parse_cli_args(*args, **kwargs):
return parser.parse_args(*args, **kwargs)
+# [ MARKDOWN WRAPPING ] #######################################################
+InlineImageInfo = namedtuple(
+ "InlineImageInfo", ["cid", "desc"], defaults=[None]
+class InlineImageExtension(Extension):
+ class RelatedImageInlineProcessor(ImageInlineProcessor):
+ def __init__(self, re, md, ext):
+ super().__init__(re, md)
+ self._ext = ext
+ def handleMatch(self, m, data):
+ el, start, end = super().handleMatch(m, data)
+ if "src" in el.attrib:
+ src = el.attrib["src"]
+ if "://" not in src or src.startswith("file://"):
+ # We only inline local content
+ cid = self._ext.get_cid_for_image(el.attrib)
+ el.attrib["src"] = f"cid:{cid}"
+ return el, start, end
+ def __init__(self):
+ super().__init__()
+ self._images = OrderedDict()
+ def extendMarkdown(self, md):
+ md.registerExtension(self)
+ inline_image_proc = self.RelatedImageInlineProcessor(
+ IMAGE_LINK_RE, md, self
+ )
+ md.inlinePatterns.register(inline_image_proc, "image_link", 150)
+ def get_cid_for_image(self, attrib):
+ msgid = make_msgid()[1:-1]
+ path = attrib["src"]
+ if path.startswith("/"):
+ path = f"file://{path}"
+ self._images[path] = InlineImageInfo(
+ msgid, attrib.get("title", attrib.get("alt"))
+ )
+ return msgid
+ def get_images(self):
+ return self._images
+def markdown_with_inline_image_support(text, *, extensions=None):
+ inline_image_handler = InlineImageExtension()
+ extensions = extensions or []
+ extensions.append(inline_image_handler)
+ mdwn = markdown.Markdown(extensions=extensions)
+ htmltext = mdwn.convert(text)
+ images = inline_image_handler.get_images()
+ def replace_image_with_cid(matchobj):
+ for m in (, f"file://{}"):
+ if m in images:
+ return f"(cid:{images[m].cid}"
+ return
+ text = re.sub(r"\(([^)\s]+)", replace_image_with_cid, text)
+ return text, htmltext, images
# [ PARTS GENERATION ] ########################################################
return f"<multipart/{self.subtype}> children={len(self.children)}"
+def filewriter_fn(path, content, mode="w", **kwargs):
+ with open(path, mode, **kwargs) as out_f:
+ out_f.write(content)
-def convert_markdown_to_html(
- origtext, draftpath, *, filewriter_fn=None, extensions=None
+def collect_inline_images(
+ images, *, tempdir=None, filewriter_fn=filewriter_fn
- mdwn = markdown.Markdown(extensions=extensions)
+ relparts = []
+ for path, info in images.items():
+ data = request.urlopen(path)
+ mimetype = data.headers["Content-Type"]
+ ext = mimetypes.guess_extension(mimetype)
+ tempfilename = tempfile.mkstemp(prefix="img", suffix=ext, dir=tempdir)[
+ 1
+ ]
+ path = pathlib.Path(tempfilename)
+ filewriter_fn(path,, "w+b")
+ relparts.append(
+ Part(
+ *mimetype.split("/"), path, cid=info.cid, desc=info.desc
+ )
+ )
+ return relparts
- if not filewriter_fn:
- def filewriter_fn(path, content, mode="w", **kwargs):
- with open(path, mode, **kwargs) as out_f:
- out_f.write(content)
+def convert_markdown_to_html(
+ origtext, draftpath, *, filewriter_fn=filewriter_fn, extensions=None
+ origtext, htmltext, images = markdown_with_inline_image_support(
+ origtext, extensions=extensions
+ )
filewriter_fn(draftpath, origtext, encoding="utf-8")
textpart = Part(
"text", "plain", draftpath, "Plain-text version", orig=True
- htmltext = mdwn.convert(origtext)
htmlpath = draftpath.with_suffix(".html")
htmlpath, htmltext, encoding="utf-8", errors="xmlcharrefreplace"
htmlpart = Part("text", "html", htmlpath, "HTML version")
- logopart = Part(
- "image",
- "png",
- "/usr/share/doc/neomutt/logo/neomutt-256.png",
- "Logo",
- "neomutt-256.png",
+ altpart = Multipart(
+ "alternative", [textpart, htmlpart], "Group of alternative content"
- return Multipart(
- "relative",
- [
- Multipart(
- "alternative",
- [textpart, htmlpart],
- "Group of alternative content",
- ),
- logopart,
- ],
- "Group of related content",
- )
+ imgparts = collect_inline_images(images, filewriter_fn=filewriter_fn)
+ if imgparts:
+ return Multipart(
+ "relative", [altpart] + imgparts, "Group of related content"
+ )
+ else:
+ return altpart
class MIMETreeDFWalker:
captured = capsys.readouterr()
- lines = captured.out.splitlines()[4:][::-1]
- assert "Related" in lines.pop()
- assert "group-related" in lines.pop()
- assert "tag-entry" in lines.pop()
- assert "Logo" in lines.pop()
- assert "content-id" in lines.pop()
- assert "toggle-unlink" in lines.pop()
- assert "logo.png" in lines.pop()
- assert "tag-entry" in lines.pop()
- assert "Alternative" in lines.pop()
- assert "group-alternatives" in lines.pop()
- assert "tag-entry" in lines.pop()
- assert "HTML" in lines.pop()
- assert "toggle-unlink" in lines.pop()
- assert "part.html" in lines.pop()
- assert "tag-entry" in lines.pop()
- assert "Plain" in lines.pop()
+ lines = captured.out.splitlines()[4:]
+ assert "Related" in lines.pop(0)
+ assert "group-related" in lines.pop(0)
+ assert "tag-entry" in lines.pop(0)
+ assert "Logo" in lines.pop(0)
+ assert "content-id" in lines.pop(0)
+ assert "toggle-unlink" in lines.pop(0)
+ assert "logo.png" in lines.pop(0)
+ assert "tag-entry" in lines.pop(0)
+ assert "Alternative" in lines.pop(0)
+ assert "group-alternatives" in lines.pop(0)
+ assert "tag-entry" in lines.pop(0)
+ assert "HTML" in lines.pop(0)
+ assert "toggle-unlink" in lines.pop(0)
+ assert "part.html" in lines.pop(0)
+ assert "tag-entry" in lines.pop(0)
+ assert "Plain" in lines.pop(0)
+ assert "update-encoding" in lines.pop(0)
+ assert len(lines) == 2
+ @pytest.fixture
+ def fake_filewriter(self):
+ class FileWriter:
+ def __init__(self):
+ self._writes = []
+ def __call__(self, path, content, mode="w", **kwargs):
+ self._writes.append((path, content))
+ def pop(self, index=-1):
+ return self._writes.pop(index)
+ return FileWriter()
+ @pytest.fixture
+ def markdown_non_converter(self, const1, const2):
+ return lambda s, text: f"{const1}{text}{const2}"
+ def test_converter_tree_basic(
+ self, const1, const2, fake_filewriter, markdown_non_converter
+ ):
+ path = pathlib.Path(const2)
+ tree = convert_markdown_to_html(
+ const1, path, filewriter_fn=fake_filewriter
+ )
+ assert tree.subtype == "alternative"
+ assert len(tree.children) == 2
+ assert tree.children[0].subtype == "plain"
+ assert tree.children[0].path == path
+ assert tree.children[0].orig
+ assert tree.children[1].subtype == "html"
+ assert tree.children[1].path == path.with_suffix(".html")
+ def test_converter_writes(
+ self,
+ const1,
+ const2,
+ fake_filewriter,
+ monkeypatch,
+ markdown_non_converter,
+ ):
+ path = pathlib.Path(const2)
+ with monkeypatch.context() as m:
+ m.setattr(markdown.Markdown, "convert", markdown_non_converter)
+ convert_markdown_to_html(
+ const1, path, filewriter_fn=fake_filewriter
+ )
+ assert (path, const1) == fake_filewriter.pop(0)
+ assert (
+ path.with_suffix(".html"),
+ markdown_non_converter(None, const1),
+ ) == fake_filewriter.pop(0)
+ def test_markdown_inline_image_processor(self):
+ imgpath1 = "file:/path/to/image.png"
+ imgpath2 = "file:///path/to/image.png?url=params"
+ imgpath3 = "/path/to/image.png"
+ text = f"""
+ """
+ text, html, images = markdown_with_inline_image_support(text)
+ # local paths have been normalised to URLs:
+ imgpath3 = f"file://{imgpath3}"
+ assert 'src="cid:' in html
+ assert "](cid:" in text
+ assert len(images) == 3
+ assert imgpath1 in images
+ assert imgpath2 in images
+ assert imgpath3 in images
+ assert images[imgpath1].cid != images[imgpath2].cid
+ assert images[imgpath1].cid != images[imgpath3].cid
+ assert images[imgpath2].cid != images[imgpath3].cid
+ def test_markdown_inline_image_processor_title_to_desc(self, const1):
+ imgpath = "file:///path/to/image.png"
+ text = f''
+ text, html, images = markdown_with_inline_image_support(text)
+ assert images[imgpath].desc == const1
+ def test_markdown_inline_image_processor_alt_to_desc(self, const1):
+ imgpath = "file:///path/to/image.png"
+ text = f""
+ text, html, images = markdown_with_inline_image_support(text)
+ assert images[imgpath].desc == const1
+ def test_markdown_inline_image_processor_title_over_alt_desc(
+ self, const1, const2
+ ):
+ imgpath = "file:///path/to/image.png"
+ text = f''
+ text, html, images = markdown_with_inline_image_support(text)
+ assert images[imgpath].desc == const2
+ def test_markdown_inline_image_not_external(self):
+ imgpath = "https://path/to/image.png"
+ text = f""
+ text, html, images = markdown_with_inline_image_support(text)
+ assert 'src="cid:' not in html
+ assert "](cid:" not in text
+ assert len(images) == 0
+ def test_markdown_inline_image_local_file(self):
+ imgpath = "/path/to/image.png"
+ text = f""
+ text, html, images = markdown_with_inline_image_support(text)
+ for k, v in images.items():
+ assert k == f"file://{imgpath}"
+ break
+ def test_markdown_inline_image_processor_base64(self):
+ img = (
+ ""
+ )
+ text = f""
+ text, html, images = markdown_with_inline_image_support(text)
+ assert 'src="cid:' in html
+ assert "](cid:" in text
+ assert len(images) == 1
+ assert img in images
+ def test_converter_tree_inline_image_base64(
+ self, const1, fake_filewriter
+ ):
+ img = (
+ ""
+ )
+ text = f""
+ path = pathlib.Path(const1)
+ tree = convert_markdown_to_html(
+ text, path, filewriter_fn=fake_filewriter
+ )
+ assert tree.subtype == "relative"
+ assert tree.children[1].subtype == "png"
+ written = fake_filewriter.pop()
+ assert tree.children[1].path == written[0]
+ assert written[1] == request.urlopen(img).read()
except ImportError: