From: martin f. krafft Date: Sat, 2 Sep 2023 08:30:55 +0000 (+0200) Subject: buildmimetree.py: extract images from html signatures X-Git-Url: https://git.madduck.net/etc/neomutt.git/commitdiff_plain/504117eb47128edd45675783d9475f8c5c91dd5b?hp=2fa1e4c1aec3affdf63c08a2e1d7005829cab88f buildmimetree.py: extract images from html signatures --- diff --git a/.config/neomutt/buildmimetree.py b/.config/neomutt/buildmimetree.py index 0052632..fbc8040 100755 --- a/.config/neomutt/buildmimetree.py +++ b/.config/neomutt/buildmimetree.py @@ -9,7 +9,8 @@ # set my_mdwn_extensions="extra,admonition,codehilite,sane_lists,smarty" # macro compose B "\ # source '$my_confdir/buildmimetree.py \ -# --tempdir $tempdir --extensions $my_mdwn_extensions|'\ +# --tempdir $tempdir --extensions $my_mdwn_extensions \ +# --css-file $my_confdir/htmlmail.css |'\ # sourc e \$my_mdwn_postprocess_cmd_file\ # " "Convert message into a modern MIME tree with inline images" # @@ -21,6 +22,7 @@ # Requirements: # - python3 # - python3-markdown +# - python3-beautifulsoup4 # Optional: # - pytest # - Pynliner, provides --css-file and thus inline styling of HTML output @@ -34,12 +36,15 @@ # import sys +import os +import os.path import pathlib import markdown import tempfile import argparse import re import mimetypes +import bs4 from collections import namedtuple, OrderedDict from markdown.extensions import Extension from markdown.inlinepatterns import ImageInlineProcessor, IMAGE_LINK_RE @@ -61,6 +66,7 @@ def parse_cli_args(*args, **kwargs): parser.add_argument( "--extensions", + metavar="EXT[,EXT[,EXT]]", type=str, default="", help="Markdown extension to use (comma-separated list)", @@ -69,7 +75,9 @@ def parse_cli_args(*args, **kwargs): if _PYNLINER: parser.add_argument( "--css-file", + metavar="FILE", type=pathlib.Path, + default=os.devnull, help="CSS file to merge with the final HTML", ) else: @@ -89,22 +97,26 @@ def parse_cli_args(*args, **kwargs): except ValueError: pass - raise ValueError(f"Must be a positive integer") + raise ValueError("Must be a positive integer") parser.add_argument( "--max-number-other-attachments", + metavar="INTEGER", type=positive_integer, - help="Make related content be sibling to HTML parts only", + default=20, + help="Maximum number of other attachments to expect", ) parser.add_argument( "--only-build", + "--just-build", action="store_true", help="Only build, don't send the message", ) parser.add_argument( "--tempdir", + metavar="DIR", type=pathlib.Path, help="Specify temporary directory to use for attachments", ) @@ -115,6 +127,12 @@ def parse_cli_args(*args, **kwargs): help="Turn on debug logging of commands generated to stderr", ) + parser.add_argument( + "--debug-walk", + action="store_true", + help="Turn on debugging to stderr of the MIME tree walk", + ) + subp = parser.add_subparsers(help="Sub-command parsers", dest="mode") massage_p = subp.add_parser( "massage", help="Massaging phase (internal use)" @@ -123,19 +141,13 @@ def parse_cli_args(*args, **kwargs): massage_p.add_argument( "--write-commands-to", "-o", - metavar="PATH", + metavar="FILE", dest="cmdpath", type=pathlib.Path, required=True, help="Temporary file path to write commands to", ) - massage_p.add_argument( - "--debug-walk", - action="store_true", - help="Turn on debugging to stderr of the MIME tree walk", - ) - massage_p.add_argument( "MAILDRAFT", nargs="?", @@ -146,7 +158,7 @@ def parse_cli_args(*args, **kwargs): return parser.parse_args(*args, **kwargs) -# [ MARKDOWN WRAPPING ] ####################################################### +# [ IMAGE HANDLING ] ########################################################## InlineImageInfo = namedtuple( @@ -154,11 +166,42 @@ InlineImageInfo = namedtuple( ) +class ImageRegistry: + def __init__(self): + self._images = OrderedDict() + + def register(self, path, description=None): + path = os.path.expanduser(path) + if path.startswith("/"): + path = f"file://{path}" + cid = make_msgid()[1:-1] + self._images[path] = InlineImageInfo(cid, description) + return cid + + def __iter__(self): + return self._images.__iter__() + + def __getitem__(self, idx): + return self._images.__getitem__(idx) + + def __len__(self): + return self._images.__len__() + + def items(self): + return self._images.items() + + def __repr__(self): + return f"" + + def __str__(self): + return self._images.__str__() + + class InlineImageExtension(Extension): class RelatedImageInlineProcessor(ImageInlineProcessor): - def __init__(self, re, md, ext): + def __init__(self, re, md, registry): super().__init__(re, md) - self._ext = ext + self._registry = registry def handleMatch(self, m, data): el, start, end = super().handleMatch(m, data) @@ -166,60 +209,62 @@ class InlineImageExtension(Extension): src = el.attrib["src"] if "://" not in src or src.startswith("file://"): # We only inline local content - cid = self._ext.get_cid_for_image(el.attrib) + cid = self._registry.register( + el.attrib["src"], + el.attrib.get("title", el.attrib.get("alt")), + ) el.attrib["src"] = f"cid:{cid}" return el, start, end - def __init__(self): + def __init__(self, registry): super().__init__() - self._images = OrderedDict() + self._image_registry = registry + + INLINE_PATTERN_NAME = "image_link" def extendMarkdown(self, md): md.registerExtension(self) inline_image_proc = self.RelatedImageInlineProcessor( - IMAGE_LINK_RE, md, self + IMAGE_LINK_RE, md, self._image_registry ) - md.inlinePatterns.register(inline_image_proc, "image_link", 150) - - def get_cid_for_image(self, attrib): - msgid = make_msgid()[1:-1] - path = attrib["src"] - if path.startswith("/"): - path = f"file://{path}" - self._images[path] = InlineImageInfo( - msgid, attrib.get("title", attrib.get("alt")) + md.inlinePatterns.register( + inline_image_proc, InlineImageExtension.INLINE_PATTERN_NAME, 150 ) - return msgid - - def get_images(self): - return self._images def markdown_with_inline_image_support( - text, *, extensions=None, extension_configs=None + text, + *, + mdwn=None, + image_registry=None, + extensions=None, + extension_configs=None, ): - inline_image_handler = InlineImageExtension() + registry = ( + image_registry if image_registry is not None else ImageRegistry() + ) + inline_image_handler = InlineImageExtension(registry=registry) extensions = extensions or [] extensions.append(inline_image_handler) mdwn = markdown.Markdown( extensions=extensions, extension_configs=extension_configs ) - htmltext = mdwn.convert(text) - images = inline_image_handler.get_images() + htmltext = mdwn.convert(text) def replace_image_with_cid(matchobj): for m in (matchobj.group(1), f"file://{matchobj.group(1)}"): - if m in images: - return f"(cid:{images[m].cid}" + if m in registry: + return f"(cid:{registry[m].cid}" return matchobj.group(0) text = re.sub(r"\(([^)\s]+)", replace_image_with_cid, text) - return text, htmltext, images + return text, htmltext, registry, mdwn # [ CSS STYLING ] ############################################################# + try: import pynliner @@ -279,16 +324,21 @@ class Multipart( return hash(str(self.subtype) + "".join(str(self.children))) +def filereader_fn(path, mode="r", **kwargs): + with open(path, mode, **kwargs) as in_f: + return in_f.read() + + def filewriter_fn(path, content, mode="w", **kwargs): with open(path, mode, **kwargs) as out_f: out_f.write(content) def collect_inline_images( - images, *, tempdir=None, filewriter_fn=filewriter_fn + image_registry, *, tempdir=None, filewriter_fn=filewriter_fn ): relparts = [] - for path, info in images.items(): + for path, info in image_registry.items(): if path.startswith("cid:"): continue @@ -301,25 +351,96 @@ def collect_inline_images( filewriter_fn(path, data.read(), "w+b") + desc = ( + f'Inline image: "{info.desc}"' + if info.desc + else f"Inline image {str(len(relparts)+1)}" + ) relparts.append( - Part( - *mimetype.split("/"), - path, - cid=info.cid, - desc=f"Image: {info.desc}", - ) + Part(*mimetype.split("/"), path, cid=info.cid, desc=desc) ) return relparts +EMAIL_SIG_SEP = "\n-- \n" +HTML_SIG_MARKER = "=htmlsig " + + +def make_html_doc(body, sig=None): + ret = ( + "\n" + "\n" + "\n" + '\n' # noqa: E501 + '\n' # noqa: E501 + "\n" + "\n" + f"{body}\n" + ) + + if sig: + nl = "\n" + ret = ( + f'{ret}
{EMAIL_SIG_SEP.strip(nl)}\n' # noqa: E501 + f"{sig}\n" + "
" + ) + + return f"{ret}\n \n" + + +def make_text_mail(text, sig=None): + return EMAIL_SIG_SEP.join((text, sig)) if sig else text + + +def extract_signature(text, *, filereader_fn=filereader_fn): + parts = text.split(EMAIL_SIG_SEP, 1) + if len(parts) == 1: + return text, None, None + + lines = parts[1].splitlines() + if lines[0].startswith(HTML_SIG_MARKER): + path = pathlib.Path(re.split(r" +", lines.pop(0), maxsplit=1)[1]) + textsig = "\n".join(lines) + + sig_input = filereader_fn(path.expanduser()) + soup = bs4.BeautifulSoup(sig_input, "html.parser") + + style = str(soup.style.extract()) if soup.style else "" + for sig_selector in ( + "#signature", + "#signatur", + "#emailsig", + ".signature", + ".signatur", + ".emailsig", + "body", + "div", + ): + sig = soup.select_one(sig_selector) + if sig: + break + + if not sig: + return parts[0], textsig, style + sig_input + + if sig.attrs.get("id") == "signature": + sig = "".join(str(c) for c in sig.children) + + return parts[0], textsig, style + str(sig) + + return parts[0], parts[1], None + + def convert_markdown_to_html( origtext, draftpath, *, related_to_html_only=False, - cssfile=None, + css=None, filewriter_fn=filewriter_fn, + filereader_fn=filereader_fn, tempdir=None, extensions=None, extension_configs=None, @@ -329,16 +450,56 @@ def convert_markdown_to_html( extension_configs.setdefault("pymdownx.highlight", {}) extension_configs["pymdownx.highlight"]["css_class"] = _CODEHILITE_CLASS - origtext, htmltext, images = markdown_with_inline_image_support( + origtext, textsig, htmlsig = extract_signature( + origtext, filereader_fn=filereader_fn + ) + + ( + origtext, + htmltext, + image_registry, + mdwn, + ) = markdown_with_inline_image_support( origtext, extensions=extensions, extension_configs=extension_configs ) + if htmlsig: + if not textsig: + # TODO: decide what to do if there is no plain-text version + raise NotImplementedError("HTML signature but no text alternative") + + soup = bs4.BeautifulSoup(htmlsig, "html.parser") + for img in soup.find_all("img"): + uri = img.attrs["src"] + desc = img.attrs.get("title", img.attrs.get("alt")) + cid = image_registry.register(uri, desc) + img.attrs["src"] = f"cid:{cid}" + + htmlsig = str(soup) + + elif textsig: + ( + textsig, + htmlsig, + image_registry, + mdwn, + ) = markdown_with_inline_image_support( + textsig, + extensions=extensions, + extension_configs=extension_configs, + image_registry=image_registry, + mdwn=mdwn, + ) + + origtext = make_text_mail(origtext, textsig) + filewriter_fn(draftpath, origtext, encoding="utf-8") textpart = Part( "text", "plain", draftpath, "Plain-text version", orig=True ) - htmltext = apply_styling(htmltext, cssfile) + htmltext = make_html_doc(htmltext, htmlsig) + htmltext = apply_styling(htmltext, css) htmlpath = draftpath.with_suffix(".html") filewriter_fn( @@ -347,7 +508,7 @@ def convert_markdown_to_html( htmlpart = Part("text", "html", htmlpath, "HTML version") imgparts = collect_inline_images( - images, tempdir=tempdir, filewriter_fn=filewriter_fn + image_registry, tempdir=tempdir, filewriter_fn=filewriter_fn ) if related_to_html_only: @@ -482,7 +643,7 @@ class MuttCommands: self._cmd1.append(s) def push(self, s): - s = s.replace('"', '"') + s = s.replace('"', r"\"") s = f'push "{s}"' self.debugprint(s) self._push.insert(0, s) @@ -527,7 +688,7 @@ def do_massage( cmd_f, *, extensions=None, - cssfile=None, + css_f=None, converter=convert_markdown_to_html, related_to_html_only=True, only_build=False, @@ -558,7 +719,7 @@ def do_massage( tree = converter( draft_f.read(), draftpath, - cssfile=cssfile, + css=css_f.read() if css_f else None, related_to_html_only=related_to_html_only, tempdir=tempdir, extensions=extensions, @@ -625,7 +786,7 @@ def do_massage( # number of possible attachments. The performance # difference of using a high number is negligible. # Bubble up the new part - cmds.push(f"") + cmds.push("") # As we push the part to the right position in the list (i.e. # the last of the subset of attachments this script added), we @@ -634,7 +795,7 @@ def do_massage( # is decremented by the number of descendents so far # encountered. for i in range(1, state["pos"] - len(descendents)): - cmds.push(f"") + cmds.push("") elif isinstance(item, Multipart): # This node has children, but we already visited them (see @@ -716,13 +877,13 @@ if __name__ == "__main__": elif args.mode == "massage": with open(args.MAILDRAFT, "r") as draft_f, open( args.cmdpath, "w" - ) as cmd_f: + ) as cmd_f, open(args.css_file, "r") as css_f: do_massage( draft_f, args.MAILDRAFT, cmd_f, extensions=args.extensions, - cssfile=args.css_file, + css_f=css_f, related_to_html_only=args.related_to_html_only, max_other_attachments=args.max_number_other_attachments, only_build=args.only_build, @@ -750,6 +911,7 @@ try: # NOTE: tests using the capsys fixture must specify sys.stdout to the # functions they call, else old stdout is used and not captured + @pytest.mark.muttctrl def test_MuttCommands_cmd(self, const1, const2, capsys): "Assert order of commands" cmds = MuttCommands(out_f=sys.stdout) @@ -759,6 +921,7 @@ try: captured = capsys.readouterr() assert captured.out == "\n".join((const1, const2, "")) + @pytest.mark.muttctrl def test_MuttCommands_push(self, const1, const2, capsys): "Assert reverse order of pushes" cmds = MuttCommands(out_f=sys.stdout) @@ -771,6 +934,15 @@ try: == ('"\npush "'.join(("", const2, const1, "")))[2:-6] ) + @pytest.mark.muttctrl + def test_MuttCommands_push_escape(self, const1, const2, capsys): + cmds = MuttCommands(out_f=sys.stdout) + cmds.push(f'"{const1}"') + cmds.flush() + captured = capsys.readouterr() + assert f'"\\"{const1}\\""' in captured.out + + @pytest.mark.muttctrl def test_MuttCommands_cmd_push_mixed(self, const1, const2, capsys): "Assert reverse order of pushes" cmds = MuttCommands(out_f=sys.stdout) @@ -849,6 +1021,7 @@ try: desc="Alternative", ) + @pytest.mark.treewalk def test_MIMETreeDFWalker_depth_first_walk( self, mime_tree_related_to_alternative ): @@ -879,6 +1052,7 @@ try: assert items[4][1] == 0 assert items[4][2] == 4 + @pytest.mark.treewalk def test_MIMETreeDFWalker_list_to_mixed(self, const1): mimetree = MIMETreeDFWalker() items = [] @@ -892,6 +1066,7 @@ try: mimetree.walk([p, p], visitor_fn=visitor_fn) assert items[-1].subtype == "mixed" + @pytest.mark.treewalk def test_MIMETreeDFWalker_visitor_in_constructor( self, mime_tree_related_to_alternative ): @@ -908,11 +1083,12 @@ try: def string_io(self, const1, text=None): return StringIO(text or const1) + @pytest.mark.massage def test_do_massage_basic(self, const1, string_io, capsys): def converter( drafttext, draftpath, - cssfile, + css, related_to_html_only, extensions, tempdir, @@ -938,13 +1114,14 @@ try: assert "source 'rm -f " in lines.pop(0) assert "unset my_mdwn_postprocess_cmd_file" == lines.pop(0) + @pytest.mark.massage def test_do_massage_fulltree( self, string_io, const1, mime_tree_related_to_alternative, capsys ): def converter( drafttext, draftpath, - cssfile, + css, related_to_html_only, extensions, tempdir, @@ -1007,6 +1184,7 @@ try: def markdown_non_converter(self, const1, const2): return lambda s, text: f"{const1}{text}{const2}" + @pytest.mark.converter def test_converter_tree_basic(self, const1, const2, fake_filewriter): path = pathlib.Path(const2) tree = convert_markdown_to_html( @@ -1038,11 +1216,11 @@ try: ) assert (path, const1) == fake_filewriter.pop(0) - assert ( - path.with_suffix(".html"), - markdown_non_converter(None, const1), - ) == fake_filewriter.pop(0) + written = fake_filewriter.pop(0) + assert path.with_suffix(".html") == written[0] + assert const1 in written[1] + @pytest.mark.imgproc def test_markdown_inline_image_processor(self): imgpath1 = "file:/path/to/image.png" imgpath2 = "file:///path/to/image.png?url=params" @@ -1051,7 +1229,7 @@ try: ![image inlined with newline]({imgpath2}) ![image local path]({imgpath3})""" - text, html, images = markdown_with_inline_image_support(text) + text, html, images, mdwn = markdown_with_inline_image_support(text) # local paths have been normalised to URLs: imgpath3 = f"file://{imgpath3}" @@ -1066,44 +1244,59 @@ try: assert images[imgpath1].cid != images[imgpath3].cid assert images[imgpath2].cid != images[imgpath3].cid + @pytest.mark.imgproc def test_markdown_inline_image_processor_title_to_desc(self, const1): imgpath = "file:///path/to/image.png" text = f'![inline local image]({imgpath} "{const1}")' - text, html, images = markdown_with_inline_image_support(text) + text, html, images, mdwn = markdown_with_inline_image_support(text) assert images[imgpath].desc == const1 + @pytest.mark.imgproc def test_markdown_inline_image_processor_alt_to_desc(self, const1): imgpath = "file:///path/to/image.png" text = f"![{const1}]({imgpath})" - text, html, images = markdown_with_inline_image_support(text) + text, html, images, mdwn = markdown_with_inline_image_support(text) assert images[imgpath].desc == const1 + @pytest.mark.imgproc def test_markdown_inline_image_processor_title_over_alt_desc( self, const1, const2 ): imgpath = "file:///path/to/image.png" text = f'![{const1}]({imgpath} "{const2}")' - text, html, images = markdown_with_inline_image_support(text) + text, html, images, mdwn = markdown_with_inline_image_support(text) assert images[imgpath].desc == const2 + @pytest.mark.imgproc def test_markdown_inline_image_not_external(self): imgpath = "https://path/to/image.png" text = f"![inline image]({imgpath})" - text, html, images = markdown_with_inline_image_support(text) + text, html, images, mdwn = markdown_with_inline_image_support(text) assert 'src="cid:' not in html assert "](cid:" not in text assert len(images) == 0 + @pytest.mark.imgproc def test_markdown_inline_image_local_file(self): imgpath = "/path/to/image.png" text = f"![inline image]({imgpath})" - text, html, images = markdown_with_inline_image_support(text) + text, html, images, mdwn = markdown_with_inline_image_support(text) for k, v in images.items(): assert k == f"file://{imgpath}" break + @pytest.mark.imgproc + def test_markdown_inline_image_expanduser(self): + imgpath = pathlib.Path("~/image.png") + text = f"![inline image]({imgpath})" + text, html, images, mdwn = markdown_with_inline_image_support(text) + + for k, v in images.items(): + assert k == f"file://{imgpath.expanduser()}" + break + @pytest.fixture def test_png(self): return ( @@ -1111,15 +1304,17 @@ try: "AAAABCAAAAAA6fptVAAAACklEQVQI12P4DwABAQEAG7buVgAA" ) + @pytest.mark.imgproc def test_markdown_inline_image_processor_base64(self, test_png): text = f"![1px white inlined]({test_png})" - text, html, images = markdown_with_inline_image_support(text) + text, html, images, mdwn = markdown_with_inline_image_support(text) assert 'src="cid:' in html assert "](cid:" in text assert len(images) == 1 assert test_png in images + @pytest.mark.converter def test_converter_tree_inline_image_base64( self, test_png, const1, fake_filewriter ): @@ -1138,6 +1333,7 @@ try: assert tree.children[1].path == written[0] assert written[1] == request.urlopen(test_png).read() + @pytest.mark.converter def test_converter_tree_inline_image_base64_related_to_html( self, test_png, const1, fake_filewriter ): @@ -1156,6 +1352,7 @@ try: assert tree.children[1].children[1].path == written[0] assert written[1] == request.urlopen(test_png).read() + @pytest.mark.converter def test_converter_tree_inline_image_cid( self, const1, fake_filewriter ): @@ -1173,6 +1370,7 @@ try: assert tree.children[1].cid != const1 assert tree.children[1].type != "image" + @pytest.mark.imgcoll def test_inline_image_collection( self, test_png, const1, const2, fake_filewriter ): @@ -1189,15 +1387,64 @@ try: assert relparts[0].cid == const1 assert relparts[0].desc.endswith(const2) - def test_apply_stylesheet(self): - if _PYNLINER: + if _PYNLINER: + + @pytest.mark.styling + def test_apply_stylesheet(self): html = "

Hello, world!

" css = "p { color:red }" out = apply_styling(html, css) assert 'p style="color' in out - def test_apply_stylesheet_pygments(self): - if _PYGMENTS_CSS: + @pytest.mark.styling + def test_massage_styling_to_converter(self, string_io, const1): + css = "p { color:red }" + css_f = StringIO(css) + out_f = StringIO() + css_applied = [] + + def converter( + drafttext, + draftpath, + css, + related_to_html_only, + extensions, + tempdir, + ): + css_applied.append(css) + return Part("text", "plain", draftpath, orig=True) + + do_massage( + draft_f=string_io, + draftpath=const1, + cmd_f=out_f, + css_f=css_f, + converter=converter, + ) + assert css_applied[0] == css + + @pytest.mark.converter + def test_converter_apply_styles( + self, const1, fake_filewriter, monkeypatch + ): + path = pathlib.Path(const1) + text = "Hello, world!" + css = "p { color:red }" + with monkeypatch.context() as m: + m.setattr( + markdown.Markdown, + "convert", + lambda s, t: f"

{t}

", + ) + convert_markdown_to_html( + text, path, css=css, filewriter_fn=fake_filewriter + ) + assert "color: red" in fake_filewriter.pop()[1] + + if _PYGMENTS_CSS: + + @pytest.mark.styling + def test_apply_stylesheet_pygments(self): html = ( f'
' "
def foo():\n    return
" @@ -1205,13 +1452,14 @@ try: out = apply_styling(html, _PYGMENTS_CSS) assert f'{_CODEHILITE_CLASS}" style="' in out + @pytest.mark.massage def test_mime_tree_relative_within_alternative( self, string_io, const1, capsys, mime_tree_related_to_html ): def converter( drafttext, draftpath, - cssfile, + css, related_to_html_only, extensions, tempdir, @@ -1261,13 +1509,14 @@ try: assert "send-message" in lines.pop() assert len(lines) == 0 + @pytest.mark.massage def test_mime_tree_nested_trees_does_not_break_positioning( self, string_io, const1, capsys ): def converter( drafttext, draftpath, - cssfile, + css, related_to_html_only, extensions, tempdir, @@ -1326,7 +1575,7 @@ try: captured = capsys.readouterr() lines = captured.out.splitlines() - while not "logo.png" in lines.pop(): + while "logo.png" not in lines.pop(): pass lines.pop() assert "content-id" in lines.pop() @@ -1343,5 +1592,212 @@ try: # follows next must not be another assert "Logo" in lines.pop() + @pytest.mark.sig + def test_signature_extraction_no_signature(self, const1): + assert (const1, None, None) == extract_signature(const1) + + @pytest.mark.sig + def test_signature_extraction_just_text(self, const1, const2): + origtext, textsig, htmlsig = extract_signature( + f"{const1}{EMAIL_SIG_SEP}{const2}" + ) + assert origtext == const1 + assert textsig == const2 + assert htmlsig is None + + @pytest.mark.sig + def test_signature_extraction_html(self, const1, const2): + path = pathlib.Path("somepath") + sigconst = "HTML signature from {path} but as a string" + + def filereader_fn(path): + return ( + f'
{sigconst.format(path=path)}
' + ) + + origtext, textsig, htmlsig = extract_signature( + f"{const1}{EMAIL_SIG_SEP}{HTML_SIG_MARKER} {path}\n{const2}", + filereader_fn=filereader_fn, + ) + assert origtext == const1 + assert textsig == const2 + assert htmlsig == sigconst.format(path=path) + + @pytest.mark.sig + def test_signature_extraction_file_not_found(self, const1): + path = pathlib.Path("/does/not/exist") + with pytest.raises(FileNotFoundError): + origtext, textsig, htmlsig = extract_signature( + f"{const1}{EMAIL_SIG_SEP}{HTML_SIG_MARKER}{path}\n{const1}" + ) + + @pytest.mark.imgproc + def test_image_registry(self, const1): + reg = ImageRegistry() + cid = reg.register(const1) + assert "@" in cid + assert not cid.startswith("<") + assert not cid.endswith(">") + assert const1 in reg + + @pytest.mark.imgproc + def test_image_registry_file_uri(self, const1): + reg = ImageRegistry() + reg.register("/some/path") + for path in reg: + assert path.startswith("file://") + break + + @pytest.mark.converter + @pytest.mark.sig + def test_converter_signature_handling( + self, const1, fake_filewriter, monkeypatch + ): + path = pathlib.Path(const1) + + mailparts = ( + "This is the mail body\n", + f"{EMAIL_SIG_SEP}", + "This is a plain-text signature only", + ) + + def filereader_fn(path): + return "" + + with monkeypatch.context() as m: + m.setattr(markdown.Markdown, "convert", lambda s, t: t) + convert_markdown_to_html( + "".join(mailparts), + path, + filewriter_fn=fake_filewriter, + filereader_fn=filereader_fn, + ) + + soup = bs4.BeautifulSoup(fake_filewriter.pop()[1], "html.parser") + body = soup.body.contents + + assert mailparts[0] in body.pop(0) + + sig = soup.select_one("#signature") + assert sig == body.pop(0) + + sep = sig.select_one("span.sig_separator") + assert sep == sig.contents[0] + assert f"\n{sep.text}\n" == EMAIL_SIG_SEP + + assert mailparts[2] in sig.contents[1] + + @pytest.mark.converter + @pytest.mark.sig + def test_converter_signature_handling_htmlsig( + self, const1, fake_filewriter, monkeypatch + ): + path = pathlib.Path(const1) + + mailparts = ( + "This is the mail body", + f"{EMAIL_SIG_SEP}", + f"{HTML_SIG_MARKER}{path}\n", + "This is the plain-text version", + ) + + htmlsig = "HTML Signature from {path}" + + def filereader_fn(path): + return f'
{htmlsig.format(path=path)}
' + + def mdwn_fn(t): + return t.upper() + + with monkeypatch.context() as m: + m.setattr( + markdown.Markdown, "convert", lambda s, t: mdwn_fn(t) + ) + convert_markdown_to_html( + "".join(mailparts), + path, + filewriter_fn=fake_filewriter, + filereader_fn=filereader_fn, + ) + + soup = bs4.BeautifulSoup(fake_filewriter.pop()[1], "html.parser") + sig = soup.select_one("#signature") + sig.span.extract() + + assert HTML_SIG_MARKER not in sig.text + assert htmlsig.format(path=path) == sig.text.strip() + + plaintext = fake_filewriter.pop()[1] + assert plaintext.endswith(EMAIL_SIG_SEP + mailparts[-1]) + + @pytest.mark.converter + @pytest.mark.sig + def test_converter_signature_handling_htmlsig_with_image( + self, const1, fake_filewriter, monkeypatch, test_png + ): + path = pathlib.Path(const1) + + mailparts = ( + "This is the mail body", + f"{EMAIL_SIG_SEP}", + f"{HTML_SIG_MARKER}{path}\n", + "This is the plain-text version", + ) + + htmlsig = ( + "HTML Signature from {path} with image\n" + f'\n' + ) + + def filereader_fn(path): + return f'
{htmlsig.format(path=path)}
' + + def mdwn_fn(t): + return t.upper() + + with monkeypatch.context() as m: + m.setattr( + markdown.Markdown, "convert", lambda s, t: mdwn_fn(t) + ) + convert_markdown_to_html( + "".join(mailparts), + path, + filewriter_fn=fake_filewriter, + filereader_fn=filereader_fn, + ) + + assert fake_filewriter.pop()[0].suffix == ".png" + + soup = bs4.BeautifulSoup(fake_filewriter.pop()[1], "html.parser") + assert soup.img.attrs["src"].startswith("cid:") + + @pytest.mark.converter + @pytest.mark.sig + def test_converter_signature_handling_textsig_with_image( + self, const1, fake_filewriter, test_png + ): + mailparts = ( + "This is the mail body", + f"{EMAIL_SIG_SEP}", + "This is the plain-text version with image\n", + f"![Inline]({test_png})", + + ) + tree = convert_markdown_to_html + "".join(mailparts), + pathlib.Path(const1), + filewriter_fn=fake_filewriter, + ) + + assert tree.subtype == "relative" + assert tree.children[0].subtype == "alternative" + assert tree.children[1].subtype == "png" + written = fake_filewriter.pop() + assert tree.children[1].path == written[0] + assert written[1] == request.urlopen(test_png).read() + + def test_converter_attribution_to_admonition(self, fake_filewriter): + + except ImportError: pass diff --git a/.config/neomutt/pytest.ini b/.config/neomutt/pytest.ini new file mode 100644 index 0000000..4dedcb2 --- /dev/null +++ b/.config/neomutt/pytest.ini @@ -0,0 +1,11 @@ +[pytest] +markers = + sig: test related to signature extraction/handling + converter: test related to the text2html conversion + massage: test related to message massaging + imgproc: test related to inline image processing + imgcoll: test related to inline image collection + styling: test related to HTML styling + treewalk: test related to MIME tree generation/walking + muttctrl: test related to Mutt command interfacing + current: test currently being worked on