]> git.madduck.net Git - etc/neomutt.git/blobdiff - .config/neomutt/buildmimetree.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Ignore SyntaxWarnings for pynliner/cssutils for now
[etc/neomutt.git] / .config / neomutt / buildmimetree.py
index f5e3c17ae74512fada4032db862cb72cf29317f1..a27f64c1488bbf1417add4edae91f09a7127e992 100755 (executable)
@@ -11,7 +11,7 @@
 #       <enter-command> source '$my_confdir/buildmimetree.py \
 #       --tempdir $tempdir --extensions $my_mdwn_extensions \
 #       --css-file $my_confdir/htmlmail.css |'<enter>\
-#       <enter-command> sourc e \$my_mdwn_postprocess_cmd_file<enter>\
+#       <enter-command> source \$my_mdwn_postprocess_cmd_file<enter>\
 #     " "Convert message into a modern MIME tree with inline images"
 #
 #     (Yes, we need to call source twice, as mutt only starts to process output
 #   - Pynliner, provides --css-file and thus inline styling of HTML output
 #   - Pygments, then syntax highlighting for fenced code is enabled
 #
+# Running tests:
+#   pytest -x buildmimetree.py
+#
 # Latest version:
 #   https://git.madduck.net/etc/neomutt.git/blob_plain/HEAD:/.config/neomutt/buildmimetree.py
 #
-# Copyright © 2023 martin f. krafft <madduck@madduck.net>
-# Released under the GPL-2+ licence, just like Mutt itself.
+# Copyright © 2023–24 martin f. krafft <madduck@madduck.net>
+# Released under the GPL-2+ licence, just like NeoMutt itself.
 #
 
 import sys
@@ -47,10 +50,16 @@ import bs4
 import xml.etree.ElementTree as etree
 import io
 import enum
+import warnings
+from contextlib import contextmanager
 from collections import namedtuple, OrderedDict
 from markdown.extensions import Extension
 from markdown.blockprocessors import BlockProcessor
-from markdown.inlinepatterns import ImageInlineProcessor, IMAGE_LINK_RE
+from markdown.inlinepatterns import (
+    SimpleTextInlineProcessor,
+    ImageInlineProcessor,
+    IMAGE_LINK_RE,
+)
 from email.utils import make_msgid
 from urllib import request
 
@@ -63,7 +72,7 @@ def parse_cli_args(*args, **kwargs):
         )
     )
     parser.epilog = (
-        "Copyright © 2023 martin f. krafft <madduck@madduck.net>.\n"
+        "Copyright © 2023-24 martin f. krafft <madduck@madduck.net>.\n"
         "Released under the MIT licence"
     )
 
@@ -117,6 +126,11 @@ def parse_cli_args(*args, **kwargs):
         help="Only build, don't send the message",
     )
 
+    parser.add_argument(
+        "--domain",
+        help="Domain to use in content IDs",
+    )
+
     parser.add_argument(
         "--tempdir",
         metavar="DIR",
@@ -172,7 +186,6 @@ def parse_cli_args(*args, **kwargs):
 
 
 class File:
-
     class Op(enum.Enum):
         R = enum.auto()
         W = enum.auto()
@@ -191,10 +204,7 @@ class File:
         if content and not re.search(r"[r+]", mode):
             raise RuntimeError("Cannot specify content without read mode")
 
-        self._cache = {
-            File.Op.R: [content] if content else [],
-            File.Op.W: []
-        }
+        self._cache = {File.Op.R: [content] if content else [], File.Op.W: []}
         self._lastop = None
         self._mode = mode
         self._kwargs = kwargs
@@ -231,10 +241,6 @@ class File:
         if cache and self._cache[File.Op.R]:
             return self._get_cache(File.Op.R)
 
-        if not self._file:
-            with self as f:
-                return f.read(cache=cache)
-
         if self._lastop == File.Op.W:
             try:
                 self._file.seek(0)
@@ -250,11 +256,6 @@ class File:
             return self._file.read()
 
     def write(self, s, *, cache=True):
-
-        if not self._file:
-            with self as f:
-                return f.write(s, cache=cache)
-
         if self._lastop == File.Op.R:
             try:
                 self._file.seek(0)
@@ -354,12 +355,12 @@ class ImageRegistry:
     def __init__(self):
         self._images = OrderedDict()
 
-    def register(self, path, description=None):
+    def register(self, path, description=None, *, domain=None):
         # path = str(pathlib.Path(path).expanduser())
         path = os.path.expanduser(path)
         if path.startswith("/"):
             path = f"file://{path}"
-        cid = make_msgid()[1:-1]
+        cid = make_msgid(domain=domain)[1:-1]
         self._images[path] = InlineImageInfo(cid, description)
         return cid
 
@@ -451,7 +452,10 @@ def markdown_with_inline_image_support(
 
 
 try:
-    import pynliner
+    with warnings.catch_warnings():
+        # https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1081037
+        warnings.filterwarnings("ignore", category=SyntaxWarning)
+        import pynliner
 
     _PYNLINER = True
 
@@ -480,18 +484,33 @@ def apply_styling(html, css):
     )
 
 
+# [ FORMAT=FLOWED HANDLING ] ##################################################
+
+
+class FormatFlowedNewlineExtension(Extension):
+    FFNL_RE = r"(?!\S)(\s)\n"
+
+    def extendMarkdown(self, md):
+        ffnl = SimpleTextInlineProcessor(self.FFNL_RE)
+        md.inlinePatterns.register(ffnl, "ffnl", 125)
+
+
 # [ QUOTE HANDLING ] ##########################################################
 
 
 class QuoteToAdmonitionExtension(Extension):
-    class EmailQuoteBlockProcessor(BlockProcessor):
+    class BlockProcessor(BlockProcessor):
         RE = re.compile(r"(?:^|\n)>\s*(.*)")
 
         def __init__(self, parser):
             super().__init__(parser)
             self._title = None
+            self._disable = False
 
         def test(self, parent, blocks):
+            if self._disable:
+                return False
+
             if markdown.util.nearing_recursion_limit():
                 return False
 
@@ -527,9 +546,14 @@ class QuoteToAdmonitionExtension(Extension):
             self.parser.parseChunk(admonition, self._title)
 
             admonition[0].set("class", "admonition-title")
-            self.parser.parseChunk(
-                admonition, "\n".join(self.clean(line) for line in quotelines)
-            )
+            with self.disable():
+                self.parser.parseChunk(admonition, "\n".join(quotelines))
+
+        @contextmanager
+        def disable(self):
+            self._disable = True
+            yield True
+            self._disable = False
 
         @classmethod
         def clean(klass, line):
@@ -538,7 +562,7 @@ class QuoteToAdmonitionExtension(Extension):
 
     def extendMarkdown(self, md):
         md.registerExtension(self)
-        email_quote_proc = self.EmailQuoteBlockProcessor(md.parser)
+        email_quote_proc = self.BlockProcessor(md.parser)
         md.parser.blockprocessors.register(email_quote_proc, "emailquote", 25)
 
 
@@ -644,7 +668,9 @@ def extract_signature(text, *, filefactory=FileFactory()):
         path = pathlib.Path(re.split(r" +", lines.pop(0), maxsplit=1)[1])
         textsig = "\n".join(lines)
 
-        sig_input = filefactory(path.expanduser()).read()
+        with filefactory(path.expanduser()) as sig_f:
+            sig_input = sig_f.read()
+
         soup = bs4.BeautifulSoup(sig_input, "html.parser")
 
         style = str(soup.style.extract()) if soup.style else ""
@@ -683,6 +709,7 @@ def convert_markdown_to_html(
     tempdir=None,
     extensions=None,
     extension_configs=None,
+    domain=None,
 ):
     # TODO extension_configs need to be handled differently
     extension_configs = extension_configs or {}
@@ -691,6 +718,7 @@ def convert_markdown_to_html(
     ] = _CODEHILITE_CLASS
 
     extensions = extensions or []
+    extensions.append(FormatFlowedNewlineExtension())
     extensions.append(QuoteToAdmonitionExtension())
 
     draft = draft_f.read()
@@ -716,7 +744,7 @@ def convert_markdown_to_html(
         for img in soup.find_all("img"):
             uri = img.attrs["src"]
             desc = img.attrs.get("title", img.attrs.get("alt"))
-            cid = image_registry.register(uri, desc)
+            cid = image_registry.register(uri, desc, domain=domain)
             img.attrs["src"] = f"cid:{cid}"
 
         htmlsig = str(soup)
@@ -946,6 +974,7 @@ def do_massage(
     only_build=False,
     max_other_attachments=20,
     tempdir=None,
+    domain=None,
     debug_commands=False,
     debug_walk=False,
 ):
@@ -967,6 +996,7 @@ def do_massage(
         related_to_html_only=related_to_html_only,
         tempdir=tempdir,
         extensions=extensions,
+        domain=domain,
     )
 
     mimetree = MIMETreeDFWalker(debug=debug_walk)
@@ -1139,6 +1169,7 @@ if __name__ == "__main__":
                 max_other_attachments=args.max_number_other_attachments,
                 only_build=args.only_build,
                 tempdir=args.tempdir,
+                domain=args.domain,
                 debug_commands=args.debug_commands,
                 debug_walk=args.debug_walk,
             )
@@ -1537,10 +1568,10 @@ try:
 
         @pytest.mark.converter
         def test_converter_tree_basic(self, fakepath, const1, fakefilefactory):
-            draft_f = fakefilefactory(fakepath, content=const1)
-            tree = convert_markdown_to_html(
-                draft_f, filefactory=fakefilefactory
-            )
+            with fakefilefactory(fakepath, content=const1) as draft_f:
+                tree = convert_markdown_to_html(
+                    draft_f, filefactory=fakefilefactory
+                )
 
             assert tree.subtype == "alternative"
             assert len(tree.children) == 2
@@ -1554,8 +1585,8 @@ try:
         def test_converter_writes(
             self, fakepath, fakefilefactory, const1, monkeypatch
         ):
-            draft_f = fakefilefactory(fakepath, content=const1)
-            convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
+            with fakefilefactory(fakepath, content=const1) as draft_f:
+                convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
 
             html = fakefilefactory.pop()
             assert fakepath.with_suffix(".html") == html[0]
@@ -1749,7 +1780,6 @@ try:
             @pytest.mark.styling
             def test_massage_styling_to_converter(self):
                 css = "p { color:red }"
-                css_f = File(content=css)
                 css_applied = []
 
                 def converter(draft_f, css_f, **kwargs):
@@ -1757,12 +1787,17 @@ try:
                     css_applied.append(css)
                     return Part("text", "plain", draft_f.path, orig=True)
 
-                do_massage(
-                    draft_f=File(),
-                    cmd_f=File(),
-                    css_f=css_f,
-                    converter=converter,
-                )
+                with (
+                    File() as draft_f,
+                    File(mode="w") as cmd_f,
+                    File(content=css) as css_f,
+                ):
+                    do_massage(
+                        draft_f=draft_f,
+                        cmd_f=cmd_f,
+                        css_f=css_f,
+                        converter=converter,
+                    )
                 assert css_applied[0] == css
 
             @pytest.mark.converter
@@ -1831,11 +1866,10 @@ try:
             assert htmlsig == sigconst.format(path=fakepath)
 
         @pytest.mark.sig
-        def test_signature_extraction_file_not_found(self, const1):
-            path = pathlib.Path("/does/not/exist")
+        def test_signature_extraction_file_not_found(self, fakepath, const1):
             with pytest.raises(FileNotFoundError):
                 origtext, textsig, htmlsig = extract_signature(
-                    f"{const1}{EMAIL_SIG_SEP}{HTML_SIG_MARKER}{path}\n{const1}"
+                    f"{const1}{EMAIL_SIG_SEP}{HTML_SIG_MARKER}{fakepath}\n{const1}"
                 )
 
         @pytest.mark.imgproc
@@ -1847,6 +1881,15 @@ try:
             assert not cid.endswith(">")
             assert const1 in reg
 
+        @pytest.mark.imgproc
+        def test_image_registry_domain(self, const1, const2):
+            reg = ImageRegistry()
+            cid = reg.register(const1, domain=const2)
+            assert f"@{const2}" in cid
+            assert not cid.startswith("<")
+            assert not cid.endswith(">")
+            assert const1 in reg
+
         @pytest.mark.imgproc
         def test_image_registry_file_uri(self, const1):
             reg = ImageRegistry()
@@ -1904,9 +1947,7 @@ try:
                 "This is the plain-text version",
             )
             htmlsig = "HTML Signature from {path} but as a string"
-            html = (
-                f'<div id="signature"><p>{htmlsig.format(path=fakepath2)}</p></div>'
-            )
+            html = f'<div id="signature"><p>{htmlsig.format(path=fakepath2)}</p></div>'
 
             sig_f = fakefilefactory(fakepath2, content=html)
 
@@ -2040,6 +2081,29 @@ try:
             p = quote.p.extract()
             assert p.contents[1].name == "strong"
 
+        @pytest.mark.converter
+        def test_converter_attribution_to_admonition_with_blockquote(
+            self, fakepath, fakefilefactory
+        ):
+            mailparts = (
+                "Regarding whatever",
+                "> blockquote line1",
+                "> blockquote line2",
+                "> ",
+                "> new para with **bold** text",
+            )
+            with fakefilefactory(
+                fakepath, content="\n".join(mailparts)
+            ) as draft_f:
+                convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
+
+            soup = bs4.BeautifulSoup(
+                fakefilefactory[fakepath.with_suffix(".html")].read(),
+                "html.parser",
+            )
+            quote = soup.select_one("div.admonition.quote")
+            assert quote.blockquote
+
         @pytest.mark.converter
         def test_converter_attribution_to_admonition_multiple(
             self, fakepath, fakefilefactory
@@ -2082,6 +2146,46 @@ try:
                 == mailparts[-2]
             )
 
+        @pytest.mark.converter
+        def test_converter_format_flowed_with_nl2br(
+            self, fakepath, fakefilefactory
+        ):
+            mailparts = (
+                "This is format=flowed text ",
+                "with spaces at the end ",
+                "and there ought be no newlines.",
+                "",
+                "[link](https://example.org) ",
+                "and text.",
+                "",
+                "[link text ",
+                "broken up](https://example.org).",
+                "",
+                "This is on a new line with a hard break  ",
+                "due to the double space",
+            )
+            with fakefilefactory(
+                fakepath, content="\n".join(mailparts)
+            ) as draft_f:
+                convert_markdown_to_html(
+                    draft_f, extensions=["nl2br"], filefactory=fakefilefactory
+                )
+
+            soup = bs4.BeautifulSoup(
+                fakefilefactory[fakepath.with_suffix(".html")].read(),
+                "html.parser",
+            )
+            import ipdb
+
+            p = soup.p.extract().text
+            assert "".join(mailparts[0:3]) == p
+            p = ''.join(map(str, soup.p.extract().contents))
+            assert p == '<a href="https://example.org">link</a> and text.'
+            p = ''.join(map(str, soup.p.extract().contents))
+            assert (
+                p == '<a href="https://example.org">link text broken up</a>.'
+            )
+
         @pytest.mark.fileio
         def test_file_class_contextmanager(self, const1, monkeypatch):
             state = dict(o=False, c=False)
@@ -2109,6 +2213,11 @@ try:
                 f.write(const1, cache=False)
                 assert f.read(cache=False) == const1
 
+        @pytest.mark.fileio
+        def test_file_class_path_no_exists(self, fakepath):
+            with pytest.raises(FileNotFoundError):
+                File(fakepath, mode="r").open()
+
         @pytest.mark.fileio
         def test_file_class_cache(self, tmp_path, const1, const2):
             path = tmp_path / "file"