X-Git-Url: https://git.madduck.net/etc/neomutt.git/blobdiff_plain/8727c8d5c6ab58f4f112bac53c1252862a485e19..1f3569f9da0c229553fbf50e683fc3828f55e7e5:/.config/neomutt/buildmimetree.py?ds=inline

diff --git a/.config/neomutt/buildmimetree.py b/.config/neomutt/buildmimetree.py
index 32fc17c..a27f64c 100755
--- a/.config/neomutt/buildmimetree.py
+++ b/.config/neomutt/buildmimetree.py
@@ -11,7 +11,7 @@
 #       <enter-command> source '$my_confdir/buildmimetree.py \
 #       --tempdir $tempdir --extensions $my_mdwn_extensions \
 #       --css-file $my_confdir/htmlmail.css |'<enter>\
-#       <enter-command> sourc e \$my_mdwn_postprocess_cmd_file<enter>\
+#       <enter-command> source \$my_mdwn_postprocess_cmd_file<enter>\
 #     " "Convert message into a modern MIME tree with inline images"
 #
 #     (Yes, we need to call source twice, as mutt only starts to process output
@@ -28,11 +28,14 @@
 #   - Pynliner, provides --css-file and thus inline styling of HTML output
 #   - Pygments, then syntax highlighting for fenced code is enabled
 #
+# Running tests:
+#   pytest -x buildmimetree.py
+#
 # Latest version:
 #   https://git.madduck.net/etc/neomutt.git/blob_plain/HEAD:/.config/neomutt/buildmimetree.py
 #
-# Copyright Â© 2023 martin f. krafft <madduck@madduck.net>
-# Released under the GPL-2+ licence, just like Mutt itself.
+# Copyright Â© 2023â24 martin f. krafft <madduck@madduck.net>
+# Released under the GPL-2+ licence, just like NeoMutt itself.
 #
 
 import sys
@@ -46,10 +49,17 @@ import mimetypes
 import bs4
 import xml.etree.ElementTree as etree
 import io
+import enum
+import warnings
+from contextlib import contextmanager
 from collections import namedtuple, OrderedDict
 from markdown.extensions import Extension
 from markdown.blockprocessors import BlockProcessor
-from markdown.inlinepatterns import ImageInlineProcessor, IMAGE_LINK_RE
+from markdown.inlinepatterns import (
+    SimpleTextInlineProcessor,
+    ImageInlineProcessor,
+    IMAGE_LINK_RE,
+)
 from email.utils import make_msgid
 from urllib import request
 
@@ -62,7 +72,7 @@ def parse_cli_args(*args, **kwargs):
         )
     )
     parser.epilog = (
-        "Copyright Â© 2023 martin f. krafft <madduck@madduck.net>.\n"
+        "Copyright Â© 2023-24 martin f. krafft <madduck@madduck.net>.\n"
         "Released under the MIT licence"
     )
 
@@ -116,6 +126,11 @@ def parse_cli_args(*args, **kwargs):
         help="Only build, don't send the message",
     )
 
+    parser.add_argument(
+        "--domain",
+        help="Domain to use in content IDs",
+    )
+
     parser.add_argument(
         "--tempdir",
         metavar="DIR",
@@ -171,6 +186,10 @@ def parse_cli_args(*args, **kwargs):
 
 
 class File:
+    class Op(enum.Enum):
+        R = enum.auto()
+        W = enum.auto()
+
     def __init__(self, path=None, mode="r", content=None, **kwargs):
         if path:
             if content:
@@ -185,8 +204,8 @@ class File:
         if content and not re.search(r"[r+]", mode):
             raise RuntimeError("Cannot specify content without read mode")
 
-        self._rcache = [content] if content else []
-        self._wcache = []
+        self._cache = {File.Op.R: [content] if content else [], File.Op.W: []}
+        self._lastop = None
         self._mode = mode
         self._kwargs = kwargs
         self._file = None
@@ -209,48 +228,48 @@ class File:
     def close(self):
         self._file.close()
         self._file = None
-        self._rcache = self._wcache
-
-    def _get_rcache(self):
-        return (b"" if "b" in self._mode else "").join(self._rcache)
+        self._cache[File.Op.R] = self._cache[File.Op.W]
+        self._lastop = None
 
-    def _get_wcache(self):
-        return (b"" if "b" in self._mode else "").join(self._wcache)
+    def _get_cache(self, op):
+        return (b"" if "b" in self._mode else "").join(self._cache[op])
 
-    def _add_to_rcache(self, s):
-        self._rcache.append(s)
-
-    def _add_to_wcache(self, s):
-        self._wcache.append(s)
+    def _add_to_cache(self, op, s):
+        self._cache[op].append(s)
 
     def read(self, *, cache=True):
-        if cache and self._rcache:
-            return self._get_rcache()
+        if cache and self._cache[File.Op.R]:
+            return self._get_cache(File.Op.R)
+
+        if self._lastop == File.Op.W:
+            try:
+                self._file.seek(0)
+            except io.UnsupportedOperation:
+                pass
 
-        if not self._file:
-            with self as f:
-                return f.read(cache=cache)
+        self._lastop = File.Op.R
 
-        self._file.seek(0)
         if cache:
-            self._add_to_rcache(self._file.read())
-            return self._get_rcache()
+            self._add_to_cache(File.Op.R, self._file.read())
+            return self._get_cache(File.Op.R)
         else:
             return self._file.read()
 
     def write(self, s, *, cache=True):
-        if not self._file:
-            with self as f:
-                return f.write(s, cache=cache)
-
-        self._file.seek(0)
-        self._rcache = self._wcache
+        if self._lastop == File.Op.R:
+            try:
+                self._file.seek(0)
+            except io.UnsupportedOperation:
+                pass
 
         if cache:
-            self._add_to_wcache(s)
+            self._add_to_cache(File.Op.W, s)
+
+        self._cache[File.Op.R] = self._cache[File.Op.W]
 
         written = self._file.write(s)
         self._file.flush()
+        self._lastop = File.Op.W
         return written
 
     path = property(lambda s: s._path)
@@ -336,12 +355,12 @@ class ImageRegistry:
     def __init__(self):
         self._images = OrderedDict()
 
-    def register(self, path, description=None):
+    def register(self, path, description=None, *, domain=None):
         # path = str(pathlib.Path(path).expanduser())
         path = os.path.expanduser(path)
         if path.startswith("/"):
             path = f"file://{path}"
-        cid = make_msgid()[1:-1]
+        cid = make_msgid(domain=domain)[1:-1]
         self._images[path] = InlineImageInfo(cid, description)
         return cid
 
@@ -433,7 +452,10 @@ def markdown_with_inline_image_support(
 
 
 try:
-    import pynliner
+    with warnings.catch_warnings():
+        # https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1081037
+        warnings.filterwarnings("ignore", category=SyntaxWarning)
+        import pynliner
 
     _PYNLINER = True
 
@@ -462,18 +484,33 @@ def apply_styling(html, css):
     )
 
 
+# [ FORMAT=FLOWED HANDLING ] ##################################################
+
+
+class FormatFlowedNewlineExtension(Extension):
+    FFNL_RE = r"(?!\S)(\s)\n"
+
+    def extendMarkdown(self, md):
+        ffnl = SimpleTextInlineProcessor(self.FFNL_RE)
+        md.inlinePatterns.register(ffnl, "ffnl", 125)
+
+
 # [ QUOTE HANDLING ] ##########################################################
 
 
 class QuoteToAdmonitionExtension(Extension):
-    class EmailQuoteBlockProcessor(BlockProcessor):
+    class BlockProcessor(BlockProcessor):
         RE = re.compile(r"(?:^|\n)>\s*(.*)")
 
         def __init__(self, parser):
             super().__init__(parser)
             self._title = None
+            self._disable = False
 
         def test(self, parent, blocks):
+            if self._disable:
+                return False
+
             if markdown.util.nearing_recursion_limit():
                 return False
 
@@ -509,9 +546,14 @@ class QuoteToAdmonitionExtension(Extension):
             self.parser.parseChunk(admonition, self._title)
 
             admonition[0].set("class", "admonition-title")
-            self.parser.parseChunk(
-                admonition, "\n".join(self.clean(line) for line in quotelines)
-            )
+            with self.disable():
+                self.parser.parseChunk(admonition, "\n".join(quotelines))
+
+        @contextmanager
+        def disable(self):
+            self._disable = True
+            yield True
+            self._disable = False
 
         @classmethod
         def clean(klass, line):
@@ -520,7 +562,7 @@ class QuoteToAdmonitionExtension(Extension):
 
     def extendMarkdown(self, md):
         md.registerExtension(self)
-        email_quote_proc = self.EmailQuoteBlockProcessor(md.parser)
+        email_quote_proc = self.BlockProcessor(md.parser)
         md.parser.blockprocessors.register(email_quote_proc, "emailquote", 25)
 
 
@@ -626,7 +668,9 @@ def extract_signature(text, *, filefactory=FileFactory()):
         path = pathlib.Path(re.split(r" +", lines.pop(0), maxsplit=1)[1])
         textsig = "\n".join(lines)
 
-        sig_input = filefactory(path.expanduser()).read()
+        with filefactory(path.expanduser()) as sig_f:
+            sig_input = sig_f.read()
+
         soup = bs4.BeautifulSoup(sig_input, "html.parser")
 
         style = str(soup.style.extract()) if soup.style else ""
@@ -665,6 +709,7 @@ def convert_markdown_to_html(
     tempdir=None,
     extensions=None,
     extension_configs=None,
+    domain=None,
 ):
     # TODO extension_configs need to be handled differently
     extension_configs = extension_configs or {}
@@ -673,6 +718,7 @@ def convert_markdown_to_html(
     ] = _CODEHILITE_CLASS
 
     extensions = extensions or []
+    extensions.append(FormatFlowedNewlineExtension())
     extensions.append(QuoteToAdmonitionExtension())
 
     draft = draft_f.read()
@@ -698,7 +744,7 @@ def convert_markdown_to_html(
         for img in soup.find_all("img"):
             uri = img.attrs["src"]
             desc = img.attrs.get("title", img.attrs.get("alt"))
-            cid = image_registry.register(uri, desc)
+            cid = image_registry.register(uri, desc, domain=domain)
             img.attrs["src"] = f"cid:{cid}"
 
         htmlsig = str(soup)
@@ -928,6 +974,7 @@ def do_massage(
     only_build=False,
     max_other_attachments=20,
     tempdir=None,
+    domain=None,
     debug_commands=False,
     debug_walk=False,
 ):
@@ -940,14 +987,6 @@ def do_massage(
     # variable used to identify the command file we're currently writing
     # to.
     cmds = MuttCommands(cmd_f, debug=debug_commands)
-    cmds.cmd('set editor="$my_editor"')
-    cmds.cmd('set edit_headers="$my_edit_headers"')
-    cmds.cmd("unset my_editor")
-    cmds.cmd("unset my_edit_headers")
-
-    # let's flush those commands, as there'll be a lot of pushes from now
-    # on, which need to be run in reverse order
-    cmds.flush()
 
     extensions = extensions.split(",") if extensions else []
     tree = converter(
@@ -957,6 +996,7 @@ def do_massage(
         related_to_html_only=related_to_html_only,
         tempdir=tempdir,
         extensions=extensions,
+        domain=domain,
     )
 
     mimetree = MIMETreeDFWalker(debug=debug_walk)
@@ -1093,6 +1133,10 @@ def do_massage(
     except AttributeError:
         filename = "pytest_internal_file"
     cmds.cmd(f"source 'rm -f {filename}|'")
+    cmds.cmd('set editor="$my_editor"')
+    cmds.cmd('set edit_headers="$my_edit_headers"')
+    cmds.cmd("unset my_editor")
+    cmds.cmd("unset my_edit_headers")
     cmds.cmd("unset my_mdwn_postprocess_cmd_file")
     cmds.flush()
 
@@ -1125,6 +1169,7 @@ if __name__ == "__main__":
                 max_other_attachments=args.max_number_other_attachments,
                 only_build=args.only_build,
                 tempdir=args.tempdir,
+                domain=args.domain,
                 debug_commands=args.debug_commands,
                 debug_walk=args.debug_walk,
             )
@@ -1387,14 +1432,14 @@ try:
                 )
                 lines = cmd_f.read().splitlines()
 
-            assert '="$my_editor"' in lines.pop(0)
-            assert '="$my_edit_headers"' in lines.pop(0)
-            assert "unset my_editor" == lines.pop(0)
-            assert "unset my_edit_headers" == lines.pop(0)
             assert "send-message" in lines.pop(0)
             assert "update-encoding" in lines.pop(0)
             assert "first-entry" in lines.pop(0)
             assert "source 'rm -f " in lines.pop(0)
+            assert '="$my_editor"' in lines.pop(0)
+            assert '="$my_edit_headers"' in lines.pop(0)
+            assert "unset my_editor" == lines.pop(0)
+            assert "unset my_edit_headers" == lines.pop(0)
             assert "unset my_mdwn_postprocess_cmd_file" == lines.pop(0)
 
         @pytest.mark.massage
@@ -1411,7 +1456,7 @@ try:
                     max_other_attachments=max_attachments,
                     converter=converter,
                 )
-                lines = cmd_f.read().splitlines()[4:-2]
+                lines = cmd_f.read().splitlines()[:-6]
 
             assert "first-entry" in lines.pop()
             assert "update-encoding" in lines.pop()
@@ -1453,7 +1498,7 @@ try:
                     cmd_f=cmd_f,
                     converter=converter,
                 )
-                lines = cmd_f.read().splitlines()[4:-2]
+                lines = cmd_f.read().splitlines()[:-6]
 
             assert "first-entry" in lines.pop()
             assert "update-encoding" in lines.pop()
@@ -1523,10 +1568,10 @@ try:
 
         @pytest.mark.converter
         def test_converter_tree_basic(self, fakepath, const1, fakefilefactory):
-            draft_f = fakefilefactory(fakepath, content=const1)
-            tree = convert_markdown_to_html(
-                draft_f, filefactory=fakefilefactory
-            )
+            with fakefilefactory(fakepath, content=const1) as draft_f:
+                tree = convert_markdown_to_html(
+                    draft_f, filefactory=fakefilefactory
+                )
 
             assert tree.subtype == "alternative"
             assert len(tree.children) == 2
@@ -1540,8 +1585,8 @@ try:
         def test_converter_writes(
             self, fakepath, fakefilefactory, const1, monkeypatch
         ):
-            draft_f = fakefilefactory(fakepath, content=const1)
-            convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
+            with fakefilefactory(fakepath, content=const1) as draft_f:
+                convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
 
             html = fakefilefactory.pop()
             assert fakepath.with_suffix(".html") == html[0]
@@ -1735,7 +1780,6 @@ try:
             @pytest.mark.styling
             def test_massage_styling_to_converter(self):
                 css = "p { color:red }"
-                css_f = File(content=css)
                 css_applied = []
 
                 def converter(draft_f, css_f, **kwargs):
@@ -1743,12 +1787,17 @@ try:
                     css_applied.append(css)
                     return Part("text", "plain", draft_f.path, orig=True)
 
-                do_massage(
-                    draft_f=File(),
-                    cmd_f=File(),
-                    css_f=css_f,
-                    converter=converter,
-                )
+                with (
+                    File() as draft_f,
+                    File(mode="w") as cmd_f,
+                    File(content=css) as css_f,
+                ):
+                    do_massage(
+                        draft_f=draft_f,
+                        cmd_f=cmd_f,
+                        css_f=css_f,
+                        converter=converter,
+                    )
                 assert css_applied[0] == css
 
             @pytest.mark.converter
@@ -1817,11 +1866,10 @@ try:
             assert htmlsig == sigconst.format(path=fakepath)
 
         @pytest.mark.sig
-        def test_signature_extraction_file_not_found(self, const1):
-            path = pathlib.Path("/does/not/exist")
+        def test_signature_extraction_file_not_found(self, fakepath, const1):
             with pytest.raises(FileNotFoundError):
                 origtext, textsig, htmlsig = extract_signature(
-                    f"{const1}{EMAIL_SIG_SEP}{HTML_SIG_MARKER}{path}\n{const1}"
+                    f"{const1}{EMAIL_SIG_SEP}{HTML_SIG_MARKER}{fakepath}\n{const1}"
                 )
 
         @pytest.mark.imgproc
@@ -1833,6 +1881,15 @@ try:
             assert not cid.endswith(">")
             assert const1 in reg
 
+        @pytest.mark.imgproc
+        def test_image_registry_domain(self, const1, const2):
+            reg = ImageRegistry()
+            cid = reg.register(const1, domain=const2)
+            assert f"@{const2}" in cid
+            assert not cid.startswith("<")
+            assert not cid.endswith(">")
+            assert const1 in reg
+
         @pytest.mark.imgproc
         def test_image_registry_file_uri(self, const1):
             reg = ImageRegistry()
@@ -1890,9 +1947,7 @@ try:
                 "This is the plain-text version",
             )
             htmlsig = "HTML Signature from {path} but as a string"
-            html = (
-                f'<div id="signature"><p>{htmlsig.format(path=fakepath2)}</p></div>'
-            )
+            html = f'<div id="signature"><p>{htmlsig.format(path=fakepath2)}</p></div>'
 
             sig_f = fakefilefactory(fakepath2, content=html)
 
@@ -2026,6 +2081,29 @@ try:
             p = quote.p.extract()
             assert p.contents[1].name == "strong"
 
+        @pytest.mark.converter
+        def test_converter_attribution_to_admonition_with_blockquote(
+            self, fakepath, fakefilefactory
+        ):
+            mailparts = (
+                "Regarding whatever",
+                "> blockquote line1",
+                "> blockquote line2",
+                "> ",
+                "> new para with **bold** text",
+            )
+            with fakefilefactory(
+                fakepath, content="\n".join(mailparts)
+            ) as draft_f:
+                convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
+
+            soup = bs4.BeautifulSoup(
+                fakefilefactory[fakepath.with_suffix(".html")].read(),
+                "html.parser",
+            )
+            quote = soup.select_one("div.admonition.quote")
+            assert quote.blockquote
+
         @pytest.mark.converter
         def test_converter_attribution_to_admonition_multiple(
             self, fakepath, fakefilefactory
@@ -2068,6 +2146,46 @@ try:
                 == mailparts[-2]
             )
 
+        @pytest.mark.converter
+        def test_converter_format_flowed_with_nl2br(
+            self, fakepath, fakefilefactory
+        ):
+            mailparts = (
+                "This is format=flowed text ",
+                "with spaces at the end ",
+                "and there ought be no newlines.",
+                "",
+                "[link](https://example.org) ",
+                "and text.",
+                "",
+                "[link text ",
+                "broken up](https://example.org).",
+                "",
+                "This is on a new line with a hard break  ",
+                "due to the double space",
+            )
+            with fakefilefactory(
+                fakepath, content="\n".join(mailparts)
+            ) as draft_f:
+                convert_markdown_to_html(
+                    draft_f, extensions=["nl2br"], filefactory=fakefilefactory
+                )
+
+            soup = bs4.BeautifulSoup(
+                fakefilefactory[fakepath.with_suffix(".html")].read(),
+                "html.parser",
+            )
+            import ipdb
+
+            p = soup.p.extract().text
+            assert "".join(mailparts[0:3]) == p
+            p = ''.join(map(str, soup.p.extract().contents))
+            assert p == '<a href="https://example.org">link</a> and text.'
+            p = ''.join(map(str, soup.p.extract().contents))
+            assert (
+                p == '<a href="https://example.org">link text broken up</a>.'
+            )
+
         @pytest.mark.fileio
         def test_file_class_contextmanager(self, const1, monkeypatch):
             state = dict(o=False, c=False)
@@ -2095,6 +2213,11 @@ try:
                 f.write(const1, cache=False)
                 assert f.read(cache=False) == const1
 
+        @pytest.mark.fileio
+        def test_file_class_path_no_exists(self, fakepath):
+            with pytest.raises(FileNotFoundError):
+                File(fakepath, mode="r").open()
+
         @pytest.mark.fileio
         def test_file_class_cache(self, tmp_path, const1, const2):
             path = tmp_path / "file"