buildmimetree.py: extract images from html signatures

author martin f. krafft <madduck@madduck.net>

Sat, 2 Sep 2023 08:30:55 +0000 (10:30 +0200)

committer martin f. krafft <madduck@madduck.net>

Sat, 2 Sep 2023 10:07:19 +0000 (12:07 +0200)
author martin f. krafft <madduck@madduck.net>
Sat, 2 Sep 2023 08:30:55 +0000 (10:30 +0200)
committer martin f. krafft <madduck@madduck.net>
Sat, 2 Sep 2023 10:07:19 +0000 (12:07 +0200)
diff --git a/.config/neomutt/buildmimetree.py b/.config/neomutt/buildmimetree.py

index 00526327ddca23a14446c9901d214eb368b6b2fe..fbc8040061877b74cc0e9fbc4a8e605d21a8a33c 100755 (executable)
--- a/.config/neomutt/buildmimetree.py
+++ b/.config/neomutt/buildmimetree.py
@@ -9,7 +9,8 @@
  #     set my_mdwn_extensions="extra,admonition,codehilite,sane_lists,smarty"
  #     macro compose B "\
  #       <enter-command> source '$my_confdir/buildmimetree.py \
-#       --tempdir $tempdir --extensions $my_mdwn_extensions|'<enter>\
+#       --tempdir $tempdir --extensions $my_mdwn_extensions \
+#       --css-file $my_confdir/htmlmail.css |'<enter>\
  #       <enter-command> sourc e \$my_mdwn_postprocess_cmd_file<enter>\
  #     " "Convert message into a modern MIME tree with inline images"
  #
@@ -21,6 +22,7 @@
  # Requirements:
  #   - python3
  #   - python3-markdown
+#   - python3-beautifulsoup4
  # Optional:
  #   - pytest
  #   - Pynliner, provides --css-file and thus inline styling of HTML output
@@ -34,12 +36,15 @@
  #
  
  import sys
+import os
+import os.path
  import pathlib
  import markdown
  import tempfile
  import argparse
  import re
  import mimetypes
+import bs4
  from collections import namedtuple, OrderedDict
  from markdown.extensions import Extension
  from markdown.inlinepatterns import ImageInlineProcessor, IMAGE_LINK_RE
@@ -61,6 +66,7 @@ def parse_cli_args(*args, **kwargs):
  
      parser.add_argument(
          "--extensions",
+        metavar="EXT[,EXT[,EXT]]",
          type=str,
          default="",
          help="Markdown extension to use (comma-separated list)",
@@ -69,7 +75,9 @@ def parse_cli_args(*args, **kwargs):
      if _PYNLINER:
          parser.add_argument(
              "--css-file",
+            metavar="FILE",
              type=pathlib.Path,
+            default=os.devnull,
              help="CSS file to merge with the final HTML",
          )
      else:
@@ -89,22 +97,26 @@ def parse_cli_args(*args, **kwargs):
          except ValueError:
              pass
  
-        raise ValueError(f"Must be a positive integer")
+        raise ValueError("Must be a positive integer")
  
      parser.add_argument(
          "--max-number-other-attachments",
+        metavar="INTEGER",
          type=positive_integer,
-        help="Make related content be sibling to HTML parts only",
+        default=20,
+        help="Maximum number of other attachments to expect",
      )
  
      parser.add_argument(
          "--only-build",
+        "--just-build",
          action="store_true",
          help="Only build, don't send the message",
      )
  
      parser.add_argument(
          "--tempdir",
+        metavar="DIR",
          type=pathlib.Path,
          help="Specify temporary directory to use for attachments",
      )
@@ -115,6 +127,12 @@ def parse_cli_args(*args, **kwargs):
          help="Turn on debug logging of commands generated to stderr",
      )
  
+    parser.add_argument(
+        "--debug-walk",
+        action="store_true",
+        help="Turn on debugging to stderr of the MIME tree walk",
+    )
+
      subp = parser.add_subparsers(help="Sub-command parsers", dest="mode")
      massage_p = subp.add_parser(
          "massage", help="Massaging phase (internal use)"
@@ -123,19 +141,13 @@ def parse_cli_args(*args, **kwargs):
      massage_p.add_argument(
          "--write-commands-to",
          "-o",
-        metavar="PATH",
+        metavar="FILE",
          dest="cmdpath",
          type=pathlib.Path,
          required=True,
          help="Temporary file path to write commands to",
      )
  
-    massage_p.add_argument(
-        "--debug-walk",
-        action="store_true",
-        help="Turn on debugging to stderr of the MIME tree walk",
-    )
-
      massage_p.add_argument(
          "MAILDRAFT",
          nargs="?",
@@ -146,7 +158,7 @@ def parse_cli_args(*args, **kwargs):
      return parser.parse_args(*args, **kwargs)
  
  
-# [ MARKDOWN WRAPPING ] #######################################################
+# [ IMAGE HANDLING ] ##########################################################
  
  
  InlineImageInfo = namedtuple(
@@ -154,11 +166,42 @@ InlineImageInfo = namedtuple(
  )
  
  
+class ImageRegistry:
+    def __init__(self):
+        self._images = OrderedDict()
+
+    def register(self, path, description=None):
+        path = os.path.expanduser(path)
+        if path.startswith("/"):
+            path = f"file://{path}"
+        cid = make_msgid()[1:-1]
+        self._images[path] = InlineImageInfo(cid, description)
+        return cid
+
+    def __iter__(self):
+        return self._images.__iter__()
+
+    def __getitem__(self, idx):
+        return self._images.__getitem__(idx)
+
+    def __len__(self):
+        return self._images.__len__()
+
+    def items(self):
+        return self._images.items()
+
+    def __repr__(self):
+        return f"<ImageRegistry(items={len(self._images)})>"
+
+    def __str__(self):
+        return self._images.__str__()
+
+
  class InlineImageExtension(Extension):
      class RelatedImageInlineProcessor(ImageInlineProcessor):
-        def __init__(self, re, md, ext):
+        def __init__(self, re, md, registry):
              super().__init__(re, md)
-            self._ext = ext
+            self._registry = registry
  
          def handleMatch(self, m, data):
              el, start, end = super().handleMatch(m, data)
@@ -166,60 +209,62 @@ class InlineImageExtension(Extension):
                  src = el.attrib["src"]
                  if "://" not in src or src.startswith("file://"):
                      # We only inline local content
-                    cid = self._ext.get_cid_for_image(el.attrib)
+                    cid = self._registry.register(
+                        el.attrib["src"],
+                        el.attrib.get("title", el.attrib.get("alt")),
+                    )
                      el.attrib["src"] = f"cid:{cid}"
              return el, start, end
  
-    def __init__(self):
+    def __init__(self, registry):
          super().__init__()
-        self._images = OrderedDict()
+        self._image_registry = registry
+
+    INLINE_PATTERN_NAME = "image_link"
  
      def extendMarkdown(self, md):
          md.registerExtension(self)
          inline_image_proc = self.RelatedImageInlineProcessor(
-            IMAGE_LINK_RE, md, self
+            IMAGE_LINK_RE, md, self._image_registry
          )
-        md.inlinePatterns.register(inline_image_proc, "image_link", 150)
-
-    def get_cid_for_image(self, attrib):
-        msgid = make_msgid()[1:-1]
-        path = attrib["src"]
-        if path.startswith("/"):
-            path = f"file://{path}"
-        self._images[path] = InlineImageInfo(
-            msgid, attrib.get("title", attrib.get("alt"))
+        md.inlinePatterns.register(
+            inline_image_proc, InlineImageExtension.INLINE_PATTERN_NAME, 150
          )
-        return msgid
-
-    def get_images(self):
-        return self._images
  
  
  def markdown_with_inline_image_support(
-    text, *, extensions=None, extension_configs=None
+    text,
+    *,
+    mdwn=None,
+    image_registry=None,
+    extensions=None,
+    extension_configs=None,
  ):
-    inline_image_handler = InlineImageExtension()
+    registry = (
+        image_registry if image_registry is not None else ImageRegistry()
+    )
+    inline_image_handler = InlineImageExtension(registry=registry)
      extensions = extensions or []
      extensions.append(inline_image_handler)
      mdwn = markdown.Markdown(
          extensions=extensions, extension_configs=extension_configs
      )
-    htmltext = mdwn.convert(text)
  
-    images = inline_image_handler.get_images()
+    htmltext = mdwn.convert(text)
  
      def replace_image_with_cid(matchobj):
          for m in (matchobj.group(1), f"file://{matchobj.group(1)}"):
-            if m in images:
-                return f"(cid:{images[m].cid}"
+            if m in registry:
+                return f"(cid:{registry[m].cid}"
          return matchobj.group(0)
  
      text = re.sub(r"\(([^)\s]+)", replace_image_with_cid, text)
-    return text, htmltext, images
+    return text, htmltext, registry, mdwn
  
  
  # [ CSS STYLING ] #############################################################
  
+
  try:
      import pynliner
  
@@ -279,16 +324,21 @@ class Multipart(
          return hash(str(self.subtype) + "".join(str(self.children)))
  
  
+def filereader_fn(path, mode="r", **kwargs):
+    with open(path, mode, **kwargs) as in_f:
+        return in_f.read()
+
+
  def filewriter_fn(path, content, mode="w", **kwargs):
      with open(path, mode, **kwargs) as out_f:
          out_f.write(content)
  
  
  def collect_inline_images(
-    images, *, tempdir=None, filewriter_fn=filewriter_fn
+    image_registry, *, tempdir=None, filewriter_fn=filewriter_fn
  ):
      relparts = []
-    for path, info in images.items():
+    for path, info in image_registry.items():
          if path.startswith("cid:"):
              continue
  
@@ -301,25 +351,96 @@ def collect_inline_images(
  
          filewriter_fn(path, data.read(), "w+b")
  
+        desc = (
+            f'Inline image: "{info.desc}"'
+            if info.desc
+            else f"Inline image {str(len(relparts)+1)}"
+        )
          relparts.append(
-            Part(
-                *mimetype.split("/"),
-                path,
-                cid=info.cid,
-                desc=f"Image: {info.desc}",
-            )
+            Part(*mimetype.split("/"), path, cid=info.cid, desc=desc)
          )
  
      return relparts
  
  
+EMAIL_SIG_SEP = "\n-- \n"
+HTML_SIG_MARKER = "=htmlsig "
+
+
+def make_html_doc(body, sig=None):
+    ret = (
+        "<!DOCTYPE html>\n"
+        "<html>\n"
+        "<head>\n"
+        '<meta http-equiv="content-type" content="text/html; charset=UTF-8">\n'  # noqa: E501
+        '<meta name="viewport" content="width=device-width, initial-scale=1.0">\n'  # noqa: E501
+        "</head>\n"
+        "<body>\n"
+        f"{body}\n"
+    )
+
+    if sig:
+        nl = "\n"
+        ret = (
+            f'{ret}<div id="signature"><span class="sig_separator">{EMAIL_SIG_SEP.strip(nl)}</span>\n'  # noqa: E501
+            f"{sig}\n"
+            "</div>"
+        )
+
+    return f"{ret}\n  </body>\n</html>"
+
+
+def make_text_mail(text, sig=None):
+    return EMAIL_SIG_SEP.join((text, sig)) if sig else text
+
+
+def extract_signature(text, *, filereader_fn=filereader_fn):
+    parts = text.split(EMAIL_SIG_SEP, 1)
+    if len(parts) == 1:
+        return text, None, None
+
+    lines = parts[1].splitlines()
+    if lines[0].startswith(HTML_SIG_MARKER):
+        path = pathlib.Path(re.split(r" +", lines.pop(0), maxsplit=1)[1])
+        textsig = "\n".join(lines)
+
+        sig_input = filereader_fn(path.expanduser())
+        soup = bs4.BeautifulSoup(sig_input, "html.parser")
+
+        style = str(soup.style.extract()) if soup.style else ""
+        for sig_selector in (
+            "#signature",
+            "#signatur",
+            "#emailsig",
+            ".signature",
+            ".signatur",
+            ".emailsig",
+            "body",
+            "div",
+        ):
+            sig = soup.select_one(sig_selector)
+            if sig:
+                break
+
+        if not sig:
+            return parts[0], textsig, style + sig_input
+
+        if sig.attrs.get("id") == "signature":
+            sig = "".join(str(c) for c in sig.children)
+
+        return parts[0], textsig, style + str(sig)
+
+    return parts[0], parts[1], None
+
+
  def convert_markdown_to_html(
      origtext,
      draftpath,
      *,
      related_to_html_only=False,
-    cssfile=None,
+    css=None,
      filewriter_fn=filewriter_fn,
+    filereader_fn=filereader_fn,
      tempdir=None,
      extensions=None,
      extension_configs=None,
@@ -329,16 +450,56 @@ def convert_markdown_to_html(
      extension_configs.setdefault("pymdownx.highlight", {})
      extension_configs["pymdownx.highlight"]["css_class"] = _CODEHILITE_CLASS
  
-    origtext, htmltext, images = markdown_with_inline_image_support(
+    origtext, textsig, htmlsig = extract_signature(
+        origtext, filereader_fn=filereader_fn
+    )
+
+    (
+        origtext,
+        htmltext,
+        image_registry,
+        mdwn,
+    ) = markdown_with_inline_image_support(
          origtext, extensions=extensions, extension_configs=extension_configs
      )
  
+    if htmlsig:
+        if not textsig:
+            # TODO: decide what to do if there is no plain-text version
+            raise NotImplementedError("HTML signature but no text alternative")
+
+        soup = bs4.BeautifulSoup(htmlsig, "html.parser")
+        for img in soup.find_all("img"):
+            uri = img.attrs["src"]
+            desc = img.attrs.get("title", img.attrs.get("alt"))
+            cid = image_registry.register(uri, desc)
+            img.attrs["src"] = f"cid:{cid}"
+
+        htmlsig = str(soup)
+
+    elif textsig:
+        (
+            textsig,
+            htmlsig,
+            image_registry,
+            mdwn,
+        ) = markdown_with_inline_image_support(
+            textsig,
+            extensions=extensions,
+            extension_configs=extension_configs,
+            image_registry=image_registry,
+            mdwn=mdwn,
+        )
+
+    origtext = make_text_mail(origtext, textsig)
+
      filewriter_fn(draftpath, origtext, encoding="utf-8")
      textpart = Part(
          "text", "plain", draftpath, "Plain-text version", orig=True
      )
  
-    htmltext = apply_styling(htmltext, cssfile)
+    htmltext = make_html_doc(htmltext, htmlsig)
+    htmltext = apply_styling(htmltext, css)
  
      htmlpath = draftpath.with_suffix(".html")
      filewriter_fn(
@@ -347,7 +508,7 @@ def convert_markdown_to_html(
      htmlpart = Part("text", "html", htmlpath, "HTML version")
  
      imgparts = collect_inline_images(
-        images, tempdir=tempdir, filewriter_fn=filewriter_fn
+        image_registry, tempdir=tempdir, filewriter_fn=filewriter_fn
      )
  
      if related_to_html_only:
@@ -482,7 +643,7 @@ class MuttCommands:
              self._cmd1.append(s)
  
      def push(self, s):
-        s = s.replace('"', '"')
+        s = s.replace('"', r"\"")
          s = f'push "{s}"'
          self.debugprint(s)
          self._push.insert(0, s)
@@ -527,7 +688,7 @@ def do_massage(
      cmd_f,
      *,
      extensions=None,
-    cssfile=None,
+    css_f=None,
      converter=convert_markdown_to_html,
      related_to_html_only=True,
      only_build=False,
@@ -558,7 +719,7 @@ def do_massage(
      tree = converter(
          draft_f.read(),
          draftpath,
-        cssfile=cssfile,
+        css=css_f.read() if css_f else None,
          related_to_html_only=related_to_html_only,
          tempdir=tempdir,
          extensions=extensions,
@@ -625,7 +786,7 @@ def do_massage(
                      # number of possible attachments. The performance
                      # difference of using a high number is negligible.
                      # Bubble up the new part
-                    cmds.push(f"<move-up>")
+                    cmds.push("<move-up>")
  
                  # As we push the part to the right position in the list (i.e.
                  # the last of the subset of attachments this script added), we
@@ -634,7 +795,7 @@ def do_massage(
                  # is decremented by the number of descendents so far
                  # encountered.
                  for i in range(1, state["pos"] - len(descendents)):
-                    cmds.push(f"<move-down>")
+                    cmds.push("<move-down>")
  
          elif isinstance(item, Multipart):
              # This node has children, but we already visited them (see
@@ -716,13 +877,13 @@ if __name__ == "__main__":
      elif args.mode == "massage":
          with open(args.MAILDRAFT, "r") as draft_f, open(
              args.cmdpath, "w"
-        ) as cmd_f:
+        ) as cmd_f, open(args.css_file, "r") as css_f:
              do_massage(
                  draft_f,
                  args.MAILDRAFT,
                  cmd_f,
                  extensions=args.extensions,
-                cssfile=args.css_file,
+                css_f=css_f,
                  related_to_html_only=args.related_to_html_only,
                  max_other_attachments=args.max_number_other_attachments,
                  only_build=args.only_build,
@@ -750,6 +911,7 @@ try:
          # NOTE: tests using the capsys fixture must specify sys.stdout to the
          # functions they call, else old stdout is used and not captured
  
+        @pytest.mark.muttctrl
          def test_MuttCommands_cmd(self, const1, const2, capsys):
              "Assert order of commands"
              cmds = MuttCommands(out_f=sys.stdout)
@@ -759,6 +921,7 @@ try:
              captured = capsys.readouterr()
              assert captured.out == "\n".join((const1, const2, ""))
  
+        @pytest.mark.muttctrl
          def test_MuttCommands_push(self, const1, const2, capsys):
              "Assert reverse order of pushes"
              cmds = MuttCommands(out_f=sys.stdout)
@@ -771,6 +934,15 @@ try:
                  == ('"\npush "'.join(("", const2, const1, "")))[2:-6]
              )
  
+        @pytest.mark.muttctrl
+        def test_MuttCommands_push_escape(self, const1, const2, capsys):
+            cmds = MuttCommands(out_f=sys.stdout)
+            cmds.push(f'"{const1}"')
+            cmds.flush()
+            captured = capsys.readouterr()
+            assert f'"\\"{const1}\\""' in captured.out
+
+        @pytest.mark.muttctrl
          def test_MuttCommands_cmd_push_mixed(self, const1, const2, capsys):
              "Assert reverse order of pushes"
              cmds = MuttCommands(out_f=sys.stdout)
@@ -849,6 +1021,7 @@ try:
                  desc="Alternative",
              )
  
+        @pytest.mark.treewalk
          def test_MIMETreeDFWalker_depth_first_walk(
              self, mime_tree_related_to_alternative
          ):
@@ -879,6 +1052,7 @@ try:
              assert items[4][1] == 0
              assert items[4][2] == 4
  
+        @pytest.mark.treewalk
          def test_MIMETreeDFWalker_list_to_mixed(self, const1):
              mimetree = MIMETreeDFWalker()
              items = []
@@ -892,6 +1066,7 @@ try:
              mimetree.walk([p, p], visitor_fn=visitor_fn)
              assert items[-1].subtype == "mixed"
  
+        @pytest.mark.treewalk
          def test_MIMETreeDFWalker_visitor_in_constructor(
              self, mime_tree_related_to_alternative
          ):
@@ -908,11 +1083,12 @@ try:
          def string_io(self, const1, text=None):
              return StringIO(text or const1)
  
+        @pytest.mark.massage
          def test_do_massage_basic(self, const1, string_io, capsys):
              def converter(
                  drafttext,
                  draftpath,
-                cssfile,
+                css,
                  related_to_html_only,
                  extensions,
                  tempdir,
@@ -938,13 +1114,14 @@ try:
              assert "source 'rm -f " in lines.pop(0)
              assert "unset my_mdwn_postprocess_cmd_file" == lines.pop(0)
  
+        @pytest.mark.massage
          def test_do_massage_fulltree(
              self, string_io, const1, mime_tree_related_to_alternative, capsys
          ):
              def converter(
                  drafttext,
                  draftpath,
-                cssfile,
+                css,
                  related_to_html_only,
                  extensions,
                  tempdir,
@@ -1007,6 +1184,7 @@ try:
          def markdown_non_converter(self, const1, const2):
              return lambda s, text: f"{const1}{text}{const2}"
  
+        @pytest.mark.converter
          def test_converter_tree_basic(self, const1, const2, fake_filewriter):
              path = pathlib.Path(const2)
              tree = convert_markdown_to_html(
@@ -1038,11 +1216,11 @@ try:
                  )
  
              assert (path, const1) == fake_filewriter.pop(0)
-            assert (
-                path.with_suffix(".html"),
-                markdown_non_converter(None, const1),
-            ) == fake_filewriter.pop(0)
+            written = fake_filewriter.pop(0)
+            assert path.with_suffix(".html") == written[0]
+            assert const1 in written[1]
  
+        @pytest.mark.imgproc
          def test_markdown_inline_image_processor(self):
              imgpath1 = "file:/path/to/image.png"
              imgpath2 = "file:///path/to/image.png?url=params"
@@ -1051,7 +1229,7 @@ try:
                         ![image inlined
                           with newline]({imgpath2})
                         ![image local path]({imgpath3})"""
-            text, html, images = markdown_with_inline_image_support(text)
+            text, html, images, mdwn = markdown_with_inline_image_support(text)
  
              # local paths have been normalised to URLs:
              imgpath3 = f"file://{imgpath3}"
@@ -1066,44 +1244,59 @@ try:
              assert images[imgpath1].cid != images[imgpath3].cid
              assert images[imgpath2].cid != images[imgpath3].cid
  
+        @pytest.mark.imgproc
          def test_markdown_inline_image_processor_title_to_desc(self, const1):
              imgpath = "file:///path/to/image.png"
              text = f'![inline local image]({imgpath} "{const1}")'
-            text, html, images = markdown_with_inline_image_support(text)
+            text, html, images, mdwn = markdown_with_inline_image_support(text)
              assert images[imgpath].desc == const1
  
+        @pytest.mark.imgproc
          def test_markdown_inline_image_processor_alt_to_desc(self, const1):
              imgpath = "file:///path/to/image.png"
              text = f"![{const1}]({imgpath})"
-            text, html, images = markdown_with_inline_image_support(text)
+            text, html, images, mdwn = markdown_with_inline_image_support(text)
              assert images[imgpath].desc == const1
  
+        @pytest.mark.imgproc
          def test_markdown_inline_image_processor_title_over_alt_desc(
              self, const1, const2
          ):
              imgpath = "file:///path/to/image.png"
              text = f'![{const1}]({imgpath} "{const2}")'
-            text, html, images = markdown_with_inline_image_support(text)
+            text, html, images, mdwn = markdown_with_inline_image_support(text)
              assert images[imgpath].desc == const2
  
+        @pytest.mark.imgproc
          def test_markdown_inline_image_not_external(self):
              imgpath = "https://path/to/image.png"
              text = f"![inline image]({imgpath})"
-            text, html, images = markdown_with_inline_image_support(text)
+            text, html, images, mdwn = markdown_with_inline_image_support(text)
  
              assert 'src="cid:' not in html
              assert "](cid:" not in text
              assert len(images) == 0
  
+        @pytest.mark.imgproc
          def test_markdown_inline_image_local_file(self):
              imgpath = "/path/to/image.png"
              text = f"![inline image]({imgpath})"
-            text, html, images = markdown_with_inline_image_support(text)
+            text, html, images, mdwn = markdown_with_inline_image_support(text)
  
              for k, v in images.items():
                  assert k == f"file://{imgpath}"
                  break
  
+        @pytest.mark.imgproc
+        def test_markdown_inline_image_expanduser(self):
+            imgpath = pathlib.Path("~/image.png")
+            text = f"![inline image]({imgpath})"
+            text, html, images, mdwn = markdown_with_inline_image_support(text)
+
+            for k, v in images.items():
+                assert k == f"file://{imgpath.expanduser()}"
+                break
+
          @pytest.fixture
          def test_png(self):
              return (
@@ -1111,15 +1304,17 @@ try:
                  "AAAABCAAAAAA6fptVAAAACklEQVQI12P4DwABAQEAG7buVgAA"
              )
  
+        @pytest.mark.imgproc
          def test_markdown_inline_image_processor_base64(self, test_png):
              text = f"![1px white inlined]({test_png})"
-            text, html, images = markdown_with_inline_image_support(text)
+            text, html, images, mdwn = markdown_with_inline_image_support(text)
  
              assert 'src="cid:' in html
              assert "](cid:" in text
              assert len(images) == 1
              assert test_png in images
  
+        @pytest.mark.converter
          def test_converter_tree_inline_image_base64(
              self, test_png, const1, fake_filewriter
          ):
@@ -1138,6 +1333,7 @@ try:
              assert tree.children[1].path == written[0]
              assert written[1] == request.urlopen(test_png).read()
  
+        @pytest.mark.converter
          def test_converter_tree_inline_image_base64_related_to_html(
              self, test_png, const1, fake_filewriter
          ):
@@ -1156,6 +1352,7 @@ try:
              assert tree.children[1].children[1].path == written[0]
              assert written[1] == request.urlopen(test_png).read()
  
+        @pytest.mark.converter
          def test_converter_tree_inline_image_cid(
              self, const1, fake_filewriter
          ):
@@ -1173,6 +1370,7 @@ try:
              assert tree.children[1].cid != const1
              assert tree.children[1].type != "image"
  
+        @pytest.mark.imgcoll
          def test_inline_image_collection(
              self, test_png, const1, const2, fake_filewriter
          ):
@@ -1189,15 +1387,64 @@ try:
              assert relparts[0].cid == const1
              assert relparts[0].desc.endswith(const2)
  
-        def test_apply_stylesheet(self):
-            if _PYNLINER:
+        if _PYNLINER:
+
+            @pytest.mark.styling
+            def test_apply_stylesheet(self):
                  html = "<p>Hello, world!</p>"
                  css = "p { color:red }"
                  out = apply_styling(html, css)
                  assert 'p style="color' in out
  
-        def test_apply_stylesheet_pygments(self):
-            if _PYGMENTS_CSS:
+            @pytest.mark.styling
+            def test_massage_styling_to_converter(self, string_io, const1):
+                css = "p { color:red }"
+                css_f = StringIO(css)
+                out_f = StringIO()
+                css_applied = []
+
+                def converter(
+                    drafttext,
+                    draftpath,
+                    css,
+                    related_to_html_only,
+                    extensions,
+                    tempdir,
+                ):
+                    css_applied.append(css)
+                    return Part("text", "plain", draftpath, orig=True)
+
+                do_massage(
+                    draft_f=string_io,
+                    draftpath=const1,
+                    cmd_f=out_f,
+                    css_f=css_f,
+                    converter=converter,
+                )
+                assert css_applied[0] == css
+
+            @pytest.mark.converter
+            def test_converter_apply_styles(
+                self, const1, fake_filewriter, monkeypatch
+            ):
+                path = pathlib.Path(const1)
+                text = "Hello, world!"
+                css = "p { color:red }"
+                with monkeypatch.context() as m:
+                    m.setattr(
+                        markdown.Markdown,
+                        "convert",
+                        lambda s, t: f"<p>{t}</p>",
+                    )
+                    convert_markdown_to_html(
+                        text, path, css=css, filewriter_fn=fake_filewriter
+                    )
+                assert "color: red" in fake_filewriter.pop()[1]
+
+        if _PYGMENTS_CSS:
+
+            @pytest.mark.styling
+            def test_apply_stylesheet_pygments(self):
                  html = (
                      f'<div class="{_CODEHILITE_CLASS}">'
                      "<pre>def foo():\n    return</pre></div>"
@@ -1205,13 +1452,14 @@ try:
                  out = apply_styling(html, _PYGMENTS_CSS)
                  assert f'{_CODEHILITE_CLASS}" style="' in out
  
+        @pytest.mark.massage
          def test_mime_tree_relative_within_alternative(
              self, string_io, const1, capsys, mime_tree_related_to_html
          ):
              def converter(
                  drafttext,
                  draftpath,
-                cssfile,
+                css,
                  related_to_html_only,
                  extensions,
                  tempdir,
@@ -1261,13 +1509,14 @@ try:
              assert "send-message" in lines.pop()
              assert len(lines) == 0
  
+        @pytest.mark.massage
          def test_mime_tree_nested_trees_does_not_break_positioning(
              self, string_io, const1, capsys
          ):
              def converter(
                  drafttext,
                  draftpath,
-                cssfile,
+                css,
                  related_to_html_only,
                  extensions,
                  tempdir,
@@ -1326,7 +1575,7 @@ try:
  
              captured = capsys.readouterr()
              lines = captured.out.splitlines()
-            while not "logo.png" in lines.pop():
+            while "logo.png" not in lines.pop():
                  pass
              lines.pop()
              assert "content-id" in lines.pop()
@@ -1343,5 +1592,212 @@ try:
              # follows next must not be another <move-down>
              assert "Logo" in lines.pop()
  
+        @pytest.mark.sig
+        def test_signature_extraction_no_signature(self, const1):
+            assert (const1, None, None) == extract_signature(const1)
+
+        @pytest.mark.sig
+        def test_signature_extraction_just_text(self, const1, const2):
+            origtext, textsig, htmlsig = extract_signature(
+                f"{const1}{EMAIL_SIG_SEP}{const2}"
+            )
+            assert origtext == const1
+            assert textsig == const2
+            assert htmlsig is None
+
+        @pytest.mark.sig
+        def test_signature_extraction_html(self, const1, const2):
+            path = pathlib.Path("somepath")
+            sigconst = "HTML signature from {path} but as a string"
+
+            def filereader_fn(path):
+                return (
+                    f'<div id="signature">{sigconst.format(path=path)}</div>'
+                )
+
+            origtext, textsig, htmlsig = extract_signature(
+                f"{const1}{EMAIL_SIG_SEP}{HTML_SIG_MARKER} {path}\n{const2}",
+                filereader_fn=filereader_fn,
+            )
+            assert origtext == const1
+            assert textsig == const2
+            assert htmlsig == sigconst.format(path=path)
+
+        @pytest.mark.sig
+        def test_signature_extraction_file_not_found(self, const1):
+            path = pathlib.Path("/does/not/exist")
+            with pytest.raises(FileNotFoundError):
+                origtext, textsig, htmlsig = extract_signature(
+                    f"{const1}{EMAIL_SIG_SEP}{HTML_SIG_MARKER}{path}\n{const1}"
+                )
+
+        @pytest.mark.imgproc
+        def test_image_registry(self, const1):
+            reg = ImageRegistry()
+            cid = reg.register(const1)
+            assert "@" in cid
+            assert not cid.startswith("<")
+            assert not cid.endswith(">")
+            assert const1 in reg
+
+        @pytest.mark.imgproc
+        def test_image_registry_file_uri(self, const1):
+            reg = ImageRegistry()
+            reg.register("/some/path")
+            for path in reg:
+                assert path.startswith("file://")
+                break
+
+        @pytest.mark.converter
+        @pytest.mark.sig
+        def test_converter_signature_handling(
+            self, const1, fake_filewriter, monkeypatch
+        ):
+            path = pathlib.Path(const1)
+
+            mailparts = (
+                "This is the mail body\n",
+                f"{EMAIL_SIG_SEP}",
+                "This is a plain-text signature only",
+            )
+
+            def filereader_fn(path):
+                return ""
+
+            with monkeypatch.context() as m:
+                m.setattr(markdown.Markdown, "convert", lambda s, t: t)
+                convert_markdown_to_html(
+                    "".join(mailparts),
+                    path,
+                    filewriter_fn=fake_filewriter,
+                    filereader_fn=filereader_fn,
+                )
+
+            soup = bs4.BeautifulSoup(fake_filewriter.pop()[1], "html.parser")
+            body = soup.body.contents
+
+            assert mailparts[0] in body.pop(0)
+
+            sig = soup.select_one("#signature")
+            assert sig == body.pop(0)
+
+            sep = sig.select_one("span.sig_separator")
+            assert sep == sig.contents[0]
+            assert f"\n{sep.text}\n" == EMAIL_SIG_SEP
+
+            assert mailparts[2] in sig.contents[1]
+
+        @pytest.mark.converter
+        @pytest.mark.sig
+        def test_converter_signature_handling_htmlsig(
+            self, const1, fake_filewriter, monkeypatch
+        ):
+            path = pathlib.Path(const1)
+
+            mailparts = (
+                "This is the mail body",
+                f"{EMAIL_SIG_SEP}",
+                f"{HTML_SIG_MARKER}{path}\n",
+                "This is the plain-text version",
+            )
+
+            htmlsig = "HTML Signature from {path}"
+
+            def filereader_fn(path):
+                return f'<div id="signature">{htmlsig.format(path=path)}</div>'
+
+            def mdwn_fn(t):
+                return t.upper()
+
+            with monkeypatch.context() as m:
+                m.setattr(
+                    markdown.Markdown, "convert", lambda s, t: mdwn_fn(t)
+                )
+                convert_markdown_to_html(
+                    "".join(mailparts),
+                    path,
+                    filewriter_fn=fake_filewriter,
+                    filereader_fn=filereader_fn,
+                )
+
+            soup = bs4.BeautifulSoup(fake_filewriter.pop()[1], "html.parser")
+            sig = soup.select_one("#signature")
+            sig.span.extract()
+
+            assert HTML_SIG_MARKER not in sig.text
+            assert htmlsig.format(path=path) == sig.text.strip()
+
+            plaintext = fake_filewriter.pop()[1]
+            assert plaintext.endswith(EMAIL_SIG_SEP + mailparts[-1])
+
+        @pytest.mark.converter
+        @pytest.mark.sig
+        def test_converter_signature_handling_htmlsig_with_image(
+            self, const1, fake_filewriter, monkeypatch, test_png
+        ):
+            path = pathlib.Path(const1)
+
+            mailparts = (
+                "This is the mail body",
+                f"{EMAIL_SIG_SEP}",
+                f"{HTML_SIG_MARKER}{path}\n",
+                "This is the plain-text version",
+            )
+
+            htmlsig = (
+                "HTML Signature from {path} with image\n"
+                f'<img src="{test_png}">\n'
+            )
+
+            def filereader_fn(path):
+                return f'<div id="signature">{htmlsig.format(path=path)}</div>'
+
+            def mdwn_fn(t):
+                return t.upper()
+
+            with monkeypatch.context() as m:
+                m.setattr(
+                    markdown.Markdown, "convert", lambda s, t: mdwn_fn(t)
+                )
+                convert_markdown_to_html(
+                    "".join(mailparts),
+                    path,
+                    filewriter_fn=fake_filewriter,
+                    filereader_fn=filereader_fn,
+                )
+
+            assert fake_filewriter.pop()[0].suffix == ".png"
+
+            soup = bs4.BeautifulSoup(fake_filewriter.pop()[1], "html.parser")
+            assert soup.img.attrs["src"].startswith("cid:")
+
+        @pytest.mark.converter
+        @pytest.mark.sig
+        def test_converter_signature_handling_textsig_with_image(
+            self, const1, fake_filewriter, test_png
+        ):
+            mailparts = (
+                "This is the mail body",
+                f"{EMAIL_SIG_SEP}",
+                "This is the plain-text version with image\n",
+                f"![Inline]({test_png})",
+
+            )
+            tree = convert_markdown_to_html
+                "".join(mailparts),
+                pathlib.Path(const1),
+                filewriter_fn=fake_filewriter,
+            )
+
+            assert tree.subtype == "relative"
+            assert tree.children[0].subtype == "alternative"
+            assert tree.children[1].subtype == "png"
+            written = fake_filewriter.pop()
+            assert tree.children[1].path == written[0]
+            assert written[1] == request.urlopen(test_png).read()
+
+        def test_converter_attribution_to_admonition(self, fake_filewriter):
+
+
  except ImportError:
      pass
diff --git a/.config/neomutt/pytest.ini b/.config/neomutt/pytest.ini

new file mode 100644 (file)

index 0000000..4dedcb2
--- /dev/null
+++ b/.config/neomutt/pytest.ini
@@ -0,0 +1,11 @@
+[pytest]
+markers =
+  sig: test related to signature extraction/handling
+  converter: test related to the text2html conversion
+  massage: test related to message massaging
+  imgproc: test related to inline image processing
+  imgcoll: test related to inline image collection
+  styling: test related to HTML styling
+  treewalk: test related to MIME tree generation/walking
+  muttctrl: test related to Mutt command interfacing
+  current: test currently being worked on
author	martin f. krafft <madduck@madduck.net>
	Sat, 2 Sep 2023 08:30:55 +0000 (10:30 +0200)
committer	martin f. krafft <madduck@madduck.net>
	Sat, 2 Sep 2023 10:07:19 +0000 (12:07 +0200)
.config/neomutt/buildmimetree.py		patch \| blob \| history
.config/neomutt/pytest.ini	[new file with mode: 0644]	patch \| blob