]> git.madduck.net Git - etc/neomutt.git/blobdiff - .config/neomutt/buildmimetree.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

buildmimetree.py: Provide --only-build switch to prevent sending
[etc/neomutt.git] / .config / neomutt / buildmimetree.py
index ad4e988cb7eaef570d8fe29ec668e6cce2a33481..f4ee8216439436c3cc43310e30b31beffadaae73 100755 (executable)
@@ -7,7 +7,7 @@
 # Configuration:
 #   neomuttrc (needs to be a single line):
 #     macro compose B "\
-#       <enter-command> source '$my_confdir/buildmimetree.py setup|'<enter>\
+#       <enter-command> source '$my_confdir/buildmimetree.py setup --tempdir $tempdir|'<enter>\
 #       <enter-command> sourc e \$my_mdwn_postprocess_cmd_file<enter>\
 #     " "Convert message into a modern MIME tree with inline images"
 #
@@ -36,7 +36,13 @@ import pathlib
 import markdown
 import tempfile
 import argparse
-from collections import namedtuple
+import re
+import mimetypes
+from collections import namedtuple, OrderedDict
+from markdown.extensions import Extension
+from markdown.inlinepatterns import ImageInlineProcessor, IMAGE_LINK_RE
+from email.utils import make_msgid
+from urllib import request
 
 
 def parse_cli_args(*args, **kwargs):
@@ -55,12 +61,6 @@ def parse_cli_args(*args, **kwargs):
     parser_setup = subp.add_parser("setup", help="Setup phase")
     parser_massage = subp.add_parser("massage", help="Massaging phase")
 
-    parser_setup.add_argument(
-        "--debug-commands",
-        action="store_true",
-        help="Turn on debug logging of commands generated to stderr",
-    )
-
     parser_setup.add_argument(
         "--extension",
         "-x",
@@ -73,21 +73,28 @@ def parse_cli_args(*args, **kwargs):
     )
 
     parser_setup.add_argument(
-        "--send-message",
+        "--only-build",
         action="store_true",
-        help="Generate command(s) to send the message after processing",
+        help="Only build, don't send the message",
     )
 
-    parser_massage.add_argument(
+    parser_setup.add_argument(
+        "--tempdir",
+        default=None,
+        help="Specify temporary directory to use for attachments",
+    )
+
+    parser_setup.add_argument(
         "--debug-commands",
         action="store_true",
         help="Turn on debug logging of commands generated to stderr",
     )
 
     parser_massage.add_argument(
-        "--debug-walk",
-        action="store_true",
-        help="Turn on debugging to stderr of the MIME tree walk",
+        "--write-commands-to",
+        metavar="PATH",
+        dest="cmdpath",
+        help="Temporary file path to write commands to",
     )
 
     parser_massage.add_argument(
@@ -99,10 +106,27 @@ def parse_cli_args(*args, **kwargs):
     )
 
     parser_massage.add_argument(
-        "--write-commands-to",
-        metavar="PATH",
-        dest="cmdpath",
-        help="Temporary file path to write commands to",
+        "--only-build",
+        action="store_true",
+        help="Only build, don't send the message",
+    )
+
+    parser_massage.add_argument(
+        "--tempdir",
+        default=None,
+        help="Specify temporary directory to use for attachments",
+    )
+
+    parser_massage.add_argument(
+        "--debug-commands",
+        action="store_true",
+        help="Turn on debug logging of commands generated to stderr",
+    )
+
+    parser_massage.add_argument(
+        "--debug-walk",
+        action="store_true",
+        help="Turn on debugging to stderr of the MIME tree walk",
     )
 
     parser_massage.add_argument(
@@ -114,6 +138,74 @@ def parse_cli_args(*args, **kwargs):
     return parser.parse_args(*args, **kwargs)
 
 
+# [ MARKDOWN WRAPPING ] #######################################################
+
+
+InlineImageInfo = namedtuple(
+    "InlineImageInfo", ["cid", "desc"], defaults=[None]
+)
+
+
+class InlineImageExtension(Extension):
+    class RelatedImageInlineProcessor(ImageInlineProcessor):
+        def __init__(self, re, md, ext):
+            super().__init__(re, md)
+            self._ext = ext
+
+        def handleMatch(self, m, data):
+            el, start, end = super().handleMatch(m, data)
+            if "src" in el.attrib:
+                src = el.attrib["src"]
+                if "://" not in src or src.startswith("file://"):
+                    # We only inline local content
+                    cid = self._ext.get_cid_for_image(el.attrib)
+                    el.attrib["src"] = f"cid:{cid}"
+            return el, start, end
+
+    def __init__(self):
+        super().__init__()
+        self._images = OrderedDict()
+
+    def extendMarkdown(self, md):
+        md.registerExtension(self)
+        inline_image_proc = self.RelatedImageInlineProcessor(
+            IMAGE_LINK_RE, md, self
+        )
+        md.inlinePatterns.register(inline_image_proc, "image_link", 150)
+
+    def get_cid_for_image(self, attrib):
+        msgid = make_msgid()[1:-1]
+        path = attrib["src"]
+        if path.startswith("/"):
+            path = f"file://{path}"
+        self._images[path] = InlineImageInfo(
+            msgid, attrib.get("title", attrib.get("alt"))
+        )
+        return msgid
+
+    def get_images(self):
+        return self._images
+
+
+def markdown_with_inline_image_support(text, *, extensions=None):
+    inline_image_handler = InlineImageExtension()
+    extensions = extensions or []
+    extensions.append(inline_image_handler)
+    mdwn = markdown.Markdown(extensions=extensions)
+    htmltext = mdwn.convert(text)
+
+    images = inline_image_handler.get_images()
+
+    def replace_image_with_cid(matchobj):
+        for m in (matchobj.group(1), f"file://{matchobj.group(1)}"):
+            if m in images:
+                return f"(cid:{images[m].cid}"
+        return matchobj.group(0)
+
+    text = re.sub(r"\(([^)\s]+)", replace_image_with_cid, text)
+    return text, htmltext, images
+
+
 # [ PARTS GENERATION ] ########################################################
 
 
@@ -140,46 +232,68 @@ class Multipart(
         return f"<multipart/{self.subtype}> children={len(self.children)}"
 
 
-def convert_markdown_to_html(maildraft, *, extensions=None):
-    draftpath = pathlib.Path(maildraft)
+def filewriter_fn(path, content, mode="w", **kwargs):
+    with open(path, mode, **kwargs) as out_f:
+        out_f.write(content)
+
+
+def collect_inline_images(
+    images, *, tempdir=None, filewriter_fn=filewriter_fn
+):
+    relparts = []
+    for path, info in images.items():
+        data = request.urlopen(path)
+
+        mimetype = data.headers["Content-Type"]
+        ext = mimetypes.guess_extension(mimetype)
+        tempfilename = tempfile.mkstemp(prefix="img", suffix=ext, dir=tempdir)
+        path = pathlib.Path(tempfilename[1])
+
+        filewriter_fn(path, data.read(), "w+b")
+
+        relparts.append(
+            Part(*mimetype.split("/"), path, cid=info.cid, desc=f"Image: {info.desc}")
+        )
+
+    return relparts
+
+
+def convert_markdown_to_html(
+    origtext,
+    draftpath,
+    *,
+    filewriter_fn=filewriter_fn,
+    tempdir=None,
+    extensions=None,
+):
+    origtext, htmltext, images = markdown_with_inline_image_support(
+        origtext, extensions=extensions
+    )
+
+    filewriter_fn(draftpath, origtext, encoding="utf-8")
     textpart = Part(
         "text", "plain", draftpath, "Plain-text version", orig=True
     )
 
-    with open(draftpath, "r", encoding="utf-8") as textmarkdown:
-        text = textmarkdown.read()
-
-    mdwn = markdown.Markdown(extensions=extensions)
-    html = mdwn.convert(text)
-
     htmlpath = draftpath.with_suffix(".html")
+    filewriter_fn(
+        htmlpath, htmltext, encoding="utf-8", errors="xmlcharrefreplace"
+    )
     htmlpart = Part("text", "html", htmlpath, "HTML version")
 
-    with open(
-        htmlpath, "w", encoding="utf-8", errors="xmlcharrefreplace"
-    ) as texthtml:
-        texthtml.write(html)
-
-    logopart = Part(
-        "image",
-        "png",
-        "/usr/share/doc/neomutt/logo/neomutt-256.png",
-        "Logo",
-        "neomutt-256.png",
+    altpart = Multipart(
+        "alternative", [textpart, htmlpart], "Group of alternative content"
     )
 
-    return Multipart(
-        "relative",
-        [
-            Multipart(
-                "alternative",
-                [textpart, htmlpart],
-                "Group of alternative content",
-            ),
-            logopart,
-        ],
-        "Group of related content",
+    imgparts = collect_inline_images(
+        images, tempdir=tempdir, filewriter_fn=filewriter_fn
     )
+    if imgparts:
+        return Multipart(
+            "relative", [altpart] + imgparts, "Group of related content"
+        )
+    else:
+        return altpart
 
 
 class MIMETreeDFWalker:
@@ -288,17 +402,29 @@ class MuttCommands:
 
 
 def do_setup(
-    extensions=None, *, out_f=sys.stdout, temppath=None, debug_commands=False
+    extensions=None,
+    *,
+    out_f=sys.stdout,
+    only_build=False,
+    temppath=None,
+    tempdir=None,
+    debug_commands=False,
 ):
     extensions = extensions or []
     temppath = temppath or pathlib.Path(
-        tempfile.mkstemp(prefix="muttmdwn-")[1]
+        tempfile.mkstemp(prefix="muttmdwn-", dir=tempdir)[1]
     )
     cmds = MuttCommands(out_f, debug=debug_commands)
 
     editor = f"{sys.argv[0]} massage --write-commands-to {temppath}"
     if extensions:
         editor = f'{editor} --extensions {",".join(extensions)}'
+    if only_build:
+        editor = f'{editor} --only-build'
+    if tempdir:
+        editor = f"{editor} --tempdir {tempdir}"
+    if debug_commands:
+        editor = f"{editor} --debug-commands"
 
     cmds.cmd('set my_editor="$editor"')
     cmds.cmd('set my_edit_headers="$edit_headers"')
@@ -310,11 +436,14 @@ def do_setup(
 
 
 def do_massage(
-    maildraft,
+    draft_f,
+    draftpath,
     cmd_f,
     *,
     extensions=None,
     converter=convert_markdown_to_html,
+    only_build=False,
+    tempdir=None,
     debug_commands=False,
     debug_walk=False,
 ):
@@ -337,7 +466,7 @@ def do_massage(
     cmds.flush()
 
     extensions = extensions.split(",") if extensions else []
-    tree = converter(maildraft, extensions=extensions)
+    tree = converter(draft_f.read(), draftpath, tempdir=tempdir, extensions=extensions)
 
     mimetree = MIMETreeDFWalker(debug=debug_walk)
 
@@ -351,19 +480,23 @@ def do_massage(
             # We've hit a leaf-node, i.e. an alternative or a related part
             # with actual content.
 
-            # If the part is not an original part, i.e. doesn't already
-            # exist, we must first add it.
-            if not item.orig:
+            # Let's add the part
+            if item.orig:
+                # The original source already exists in the NeoMutt tree, but
+                # the underlying file may have been modified, so we need to
+                # update the encoding, but that's it:
+                cmds.push("<update-encoding>")
+            else:
+                # … whereas all other parts need to be added, and they're all
+                # considered to be temporary and inline:
                 cmds.push(f"<attach-file>{item.path}<enter>")
                 cmds.push("<toggle-unlink><toggle-disposition>")
-                if item.cid:
-                    cmds.push(f"<edit-content-id>\\Ca\\Ck{item.cid}<enter>")
 
-            # If the item (including the original) comes with a
-            # description, then we might just as well update the NeoMutt
+            # If the item (including the original) comes with additional
+            # information, then we might just as well update the NeoMutt
             # tree now:
-            if item.desc:
-                cmds.push(f"<edit-description>\\Ca\\Ck{item.desc}<enter>")
+            if item.cid:
+                cmds.push(f"<edit-content-id>\\Ca\\Ck{item.cid}<enter>")
 
         elif isinstance(item, Multipart):
             # This node has children, but we already visited them (see
@@ -372,19 +505,19 @@ def do_massage(
             # appropriate grouping:
             if item.subtype == "alternative":
                 cmds.push("<group-alternatives>")
-            elif item.subtype == "relative":
+            elif item.subtype in ("relative", "related"):
                 cmds.push("<group-related>")
             elif item.subtype == "multilingual":
                 cmds.push("<group-multilingual>")
 
-            # Again, if there is a description, we might just as well:
-            if item.desc:
-                cmds.push(f"<edit-description>\\Ca\\Ck{item.desc}<enter>")
-
         else:
             # We should never get here
             assert not "is valid part"
 
+        # If the item has a description, we might just as well add it
+        if item.desc:
+            cmds.push(f"<edit-description>\\Ca\\Ck{item.desc}<enter>")
+
         # Finally, if we're at non-root level, tag the new container,
         # as it might itself be part of a container, to be processed
         # one level up:
@@ -398,6 +531,9 @@ def do_massage(
     # function
     mimetree.walk(tree, visitor_fn=visitor_fn)
 
+    if not only_build:
+        cmds.push("<send-message>")
+
     # Finally, cleanup. Since we're responsible for removing the temporary
     # file, how's this for a little hack?
     try:
@@ -415,17 +551,24 @@ if __name__ == "__main__":
     args = parse_cli_args()
 
     if args.mode == "setup":
-        if args.send_message:
-            raise NotImplementedError()
-
-        do_setup(args.extensions, debug_commands=args.debug_commands)
+        do_setup(
+            args.extensions,
+            only_build=args.only_build,
+            tempdir=args.tempdir,
+            debug_commands=args.debug_commands,
+        )
 
     elif args.mode == "massage":
-        with open(args.cmdpath, "w") as cmd_f:
+        with open(args.MAILDRAFT, "r") as draft_f, open(
+            args.cmdpath, "w"
+        ) as cmd_f:
             do_massage(
-                args.MAILDRAFT,
+                draft_f,
+                pathlib.Path(args.MAILDRAFT),
                 cmd_f,
                 extensions=args.extensions,
+                only_build=args.only_build,
+                tempdir=args.tempdir,
                 debug_commands=args.debug_commands,
                 debug_walk=args.debug_walk,
             )
@@ -435,6 +578,7 @@ if __name__ == "__main__":
 
 try:
     import pytest
+    from io import StringIO
 
     class Tests:
         @pytest.fixture
@@ -581,49 +725,230 @@ try:
             assert lines[2].endswith(f'{const2},{const1}"')
             assert lines[4].endswith(const1)
 
-        def test_do_massage_basic(self, const1, capsys):
-            def converter(maildraft, extensions):
-                return Part("text", "plain", "/dev/null", orig=True)
+        @pytest.fixture
+        def string_io(self, const1, text=None):
+            return StringIO(text or const1)
+
+        def test_do_massage_basic(self, const1, string_io, capsys):
+            def converter(drafttext, draftpath, extensions, tempdir):
+                return Part("text", "plain", draftpath, orig=True)
 
-            do_massage(maildraft=const1, cmd_f=sys.stdout, converter=converter)
-            captured = capsys.readouterr()
-            assert (
-                captured.out.strip()
-                == """\
-            set editor="$my_editor"
-            set edit_headers="$my_edit_headers"
-            unset my_editor
-            unset my_edit_headers
-            source 'rm -f pytest_internal_file|'
-            unset my_mdwn_postprocess_cmd_file
-            """.replace(
-                    "            ", ""
-                ).strip()
+            do_massage(
+                draft_f=string_io,
+                draftpath=const1,
+                cmd_f=sys.stdout,
+                converter=converter,
             )
 
-        def test_do_massage_fulltree(self, const1, basic_mime_tree, capsys):
-            def converter(maildraft, extensions):
+            captured = capsys.readouterr()
+            lines = captured.out.splitlines()
+            assert '="$my_editor"' in lines.pop(0)
+            assert '="$my_edit_headers"' in lines.pop(0)
+            assert "unset my_editor" == lines.pop(0)
+            assert "unset my_edit_headers" == lines.pop(0)
+            assert "send-message" in lines.pop(0)
+            assert "update-encoding" in lines.pop(0)
+            assert "source 'rm -f " in lines.pop(0)
+            assert "unset my_mdwn_postprocess_cmd_file" == lines.pop(0)
+
+        def test_do_massage_fulltree(
+            self, string_io, const1, basic_mime_tree, capsys
+        ):
+            def converter(drafttext, draftpath, extensions, tempdir):
                 return basic_mime_tree
 
-            do_massage(maildraft=const1, cmd_f=sys.stdout, converter=converter)
+            do_massage(
+                draft_f=string_io,
+                draftpath=const1,
+                cmd_f=sys.stdout,
+                converter=converter,
+            )
+
             captured = capsys.readouterr()
-            lines = captured.out.splitlines()[4:][::-1]
-            assert "Related" in lines.pop()
-            assert "group-related" in lines.pop()
-            assert "tag-entry" in lines.pop()
-            assert "Logo" in lines.pop()
-            assert "content-id" in lines.pop()
-            assert "toggle-unlink" in lines.pop()
-            assert "logo.png" in lines.pop()
-            assert "tag-entry" in lines.pop()
-            assert "Alternative" in lines.pop()
-            assert "group-alternatives" in lines.pop()
-            assert "tag-entry" in lines.pop()
-            assert "HTML" in lines.pop()
-            assert "toggle-unlink" in lines.pop()
-            assert "part.html" in lines.pop()
-            assert "tag-entry" in lines.pop()
-            assert "Plain" in lines.pop()
+            lines = captured.out.splitlines()[4:]
+            assert "send-message" in lines.pop(0)
+            assert "Related" in lines.pop(0)
+            assert "group-related" in lines.pop(0)
+            assert "tag-entry" in lines.pop(0)
+            assert "Logo" in lines.pop(0)
+            assert "content-id" in lines.pop(0)
+            assert "toggle-unlink" in lines.pop(0)
+            assert "logo.png" in lines.pop(0)
+            assert "tag-entry" in lines.pop(0)
+            assert "Alternative" in lines.pop(0)
+            assert "group-alternatives" in lines.pop(0)
+            assert "tag-entry" in lines.pop(0)
+            assert "HTML" in lines.pop(0)
+            assert "toggle-unlink" in lines.pop(0)
+            assert "part.html" in lines.pop(0)
+            assert "tag-entry" in lines.pop(0)
+            assert "Plain" in lines.pop(0)
+            assert "update-encoding" in lines.pop(0)
+            assert len(lines) == 2
+
+        @pytest.fixture
+        def fake_filewriter(self):
+            class FileWriter:
+                def __init__(self):
+                    self._writes = []
+
+                def __call__(self, path, content, mode="w", **kwargs):
+                    self._writes.append((path, content))
+
+                def pop(self, index=-1):
+                    return self._writes.pop(index)
+
+            return FileWriter()
+
+        @pytest.fixture
+        def markdown_non_converter(self, const1, const2):
+            return lambda s, text: f"{const1}{text}{const2}"
+
+        def test_converter_tree_basic(
+            self, const1, const2, fake_filewriter, markdown_non_converter
+        ):
+            path = pathlib.Path(const2)
+            tree = convert_markdown_to_html(
+                const1, path, filewriter_fn=fake_filewriter
+            )
+
+            assert tree.subtype == "alternative"
+            assert len(tree.children) == 2
+            assert tree.children[0].subtype == "plain"
+            assert tree.children[0].path == path
+            assert tree.children[0].orig
+            assert tree.children[1].subtype == "html"
+            assert tree.children[1].path == path.with_suffix(".html")
+
+        def test_converter_writes(
+            self,
+            const1,
+            const2,
+            fake_filewriter,
+            monkeypatch,
+            markdown_non_converter,
+        ):
+            path = pathlib.Path(const2)
+
+            with monkeypatch.context() as m:
+                m.setattr(markdown.Markdown, "convert", markdown_non_converter)
+                convert_markdown_to_html(
+                    const1, path, filewriter_fn=fake_filewriter
+                )
+
+            assert (path, const1) == fake_filewriter.pop(0)
+            assert (
+                path.with_suffix(".html"),
+                markdown_non_converter(None, const1),
+            ) == fake_filewriter.pop(0)
+
+        def test_markdown_inline_image_processor(self):
+            imgpath1 = "file:/path/to/image.png"
+            imgpath2 = "file:///path/to/image.png?url=params"
+            imgpath3 = "/path/to/image.png"
+            text = f"""![inline local image]({imgpath1})
+                       ![image inlined
+                         with newline]({imgpath2})
+                       ![image local path]({imgpath3})"""
+            text, html, images = markdown_with_inline_image_support(text)
+
+            # local paths have been normalised to URLs:
+            imgpath3 = f"file://{imgpath3}"
+
+            assert 'src="cid:' in html
+            assert "](cid:" in text
+            assert len(images) == 3
+            assert imgpath1 in images
+            assert imgpath2 in images
+            assert imgpath3 in images
+            assert images[imgpath1].cid != images[imgpath2].cid
+            assert images[imgpath1].cid != images[imgpath3].cid
+            assert images[imgpath2].cid != images[imgpath3].cid
+
+        def test_markdown_inline_image_processor_title_to_desc(self, const1):
+            imgpath = "file:///path/to/image.png"
+            text = f'![inline local image]({imgpath} "{const1}")'
+            text, html, images = markdown_with_inline_image_support(text)
+            assert images[imgpath].desc == const1
+
+        def test_markdown_inline_image_processor_alt_to_desc(self, const1):
+            imgpath = "file:///path/to/image.png"
+            text = f"![{const1}]({imgpath})"
+            text, html, images = markdown_with_inline_image_support(text)
+            assert images[imgpath].desc == const1
+
+        def test_markdown_inline_image_processor_title_over_alt_desc(
+            self, const1, const2
+        ):
+            imgpath = "file:///path/to/image.png"
+            text = f'![{const1}]({imgpath} "{const2}")'
+            text, html, images = markdown_with_inline_image_support(text)
+            assert images[imgpath].desc == const2
+
+        def test_markdown_inline_image_not_external(self):
+            imgpath = "https://path/to/image.png"
+            text = f"![inline image]({imgpath})"
+            text, html, images = markdown_with_inline_image_support(text)
+
+            assert 'src="cid:' not in html
+            assert "](cid:" not in text
+            assert len(images) == 0
+
+        def test_markdown_inline_image_local_file(self):
+            imgpath = "/path/to/image.png"
+            text = f"![inline image]({imgpath})"
+            text, html, images = markdown_with_inline_image_support(text)
+
+            for k, v in images.items():
+                assert k == f"file://{imgpath}"
+                break
+
+        @pytest.fixture
+        def test_png(self):
+            return (
+                ""
+                "AAAABCAAAAAA6fptVAAAACklEQVQI12P4DwABAQEAG7buVgAA"
+            )
+
+        def test_markdown_inline_image_processor_base64(self, test_png):
+            text = f"![1px white inlined]({test_png})"
+            text, html, images = markdown_with_inline_image_support(text)
+
+            assert 'src="cid:' in html
+            assert "](cid:" in text
+            assert len(images) == 1
+            assert test_png in images
+
+        def test_converter_tree_inline_image_base64(
+            self, test_png, const1, fake_filewriter
+        ):
+            text = f"![inline base64 image]({test_png})"
+            path = pathlib.Path(const1)
+            tree = convert_markdown_to_html(
+                text, path, filewriter_fn=fake_filewriter
+            )
+
+            assert tree.subtype == "relative"
+            assert tree.children[1].subtype == "png"
+            written = fake_filewriter.pop()
+            assert tree.children[1].path == written[0]
+            assert written[1] == request.urlopen(test_png).read()
+
+        def test_inline_image_collection(
+            self, test_png, const1, const2, fake_filewriter
+        ):
+            test_images = {test_png: InlineImageInfo(cid=const1, desc=const2)}
+            relparts = collect_inline_images(
+                test_images, filewriter_fn=fake_filewriter
+            )
+
+            written = fake_filewriter.pop()
+            assert b"PNG" in written[1]
+
+            assert relparts[0].subtype == "png"
+            assert relparts[0].path == written[0]
+            assert relparts[0].cid == const1
+            assert relparts[0].desc.endswith(const2)
 
 except ImportError:
     pass