X-Git-Url: https://git.madduck.net/etc/neomutt.git/blobdiff_plain/8727c8d5c6ab58f4f112bac53c1252862a485e19..1f3569f9da0c229553fbf50e683fc3828f55e7e5:/.config/neomutt/buildmimetree.py?ds=inline diff --git a/.config/neomutt/buildmimetree.py b/.config/neomutt/buildmimetree.py index 32fc17c..a27f64c 100755 --- a/.config/neomutt/buildmimetree.py +++ b/.config/neomutt/buildmimetree.py @@ -11,7 +11,7 @@ # source '$my_confdir/buildmimetree.py \ # --tempdir $tempdir --extensions $my_mdwn_extensions \ # --css-file $my_confdir/htmlmail.css |'\ -# sourc e \$my_mdwn_postprocess_cmd_file\ +# source \$my_mdwn_postprocess_cmd_file\ # " "Convert message into a modern MIME tree with inline images" # # (Yes, we need to call source twice, as mutt only starts to process output @@ -28,11 +28,14 @@ # - Pynliner, provides --css-file and thus inline styling of HTML output # - Pygments, then syntax highlighting for fenced code is enabled # +# Running tests: +# pytest -x buildmimetree.py +# # Latest version: # https://git.madduck.net/etc/neomutt.git/blob_plain/HEAD:/.config/neomutt/buildmimetree.py # -# Copyright © 2023 martin f. krafft -# Released under the GPL-2+ licence, just like Mutt itself. +# Copyright © 2023–24 martin f. krafft +# Released under the GPL-2+ licence, just like NeoMutt itself. # import sys @@ -46,10 +49,17 @@ import mimetypes import bs4 import xml.etree.ElementTree as etree import io +import enum +import warnings +from contextlib import contextmanager from collections import namedtuple, OrderedDict from markdown.extensions import Extension from markdown.blockprocessors import BlockProcessor -from markdown.inlinepatterns import ImageInlineProcessor, IMAGE_LINK_RE +from markdown.inlinepatterns import ( + SimpleTextInlineProcessor, + ImageInlineProcessor, + IMAGE_LINK_RE, +) from email.utils import make_msgid from urllib import request @@ -62,7 +72,7 @@ def parse_cli_args(*args, **kwargs): ) ) parser.epilog = ( - "Copyright © 2023 martin f. krafft .\n" + "Copyright © 2023-24 martin f. krafft .\n" "Released under the MIT licence" ) @@ -116,6 +126,11 @@ def parse_cli_args(*args, **kwargs): help="Only build, don't send the message", ) + parser.add_argument( + "--domain", + help="Domain to use in content IDs", + ) + parser.add_argument( "--tempdir", metavar="DIR", @@ -171,6 +186,10 @@ def parse_cli_args(*args, **kwargs): class File: + class Op(enum.Enum): + R = enum.auto() + W = enum.auto() + def __init__(self, path=None, mode="r", content=None, **kwargs): if path: if content: @@ -185,8 +204,8 @@ class File: if content and not re.search(r"[r+]", mode): raise RuntimeError("Cannot specify content without read mode") - self._rcache = [content] if content else [] - self._wcache = [] + self._cache = {File.Op.R: [content] if content else [], File.Op.W: []} + self._lastop = None self._mode = mode self._kwargs = kwargs self._file = None @@ -209,48 +228,48 @@ class File: def close(self): self._file.close() self._file = None - self._rcache = self._wcache - - def _get_rcache(self): - return (b"" if "b" in self._mode else "").join(self._rcache) + self._cache[File.Op.R] = self._cache[File.Op.W] + self._lastop = None - def _get_wcache(self): - return (b"" if "b" in self._mode else "").join(self._wcache) + def _get_cache(self, op): + return (b"" if "b" in self._mode else "").join(self._cache[op]) - def _add_to_rcache(self, s): - self._rcache.append(s) - - def _add_to_wcache(self, s): - self._wcache.append(s) + def _add_to_cache(self, op, s): + self._cache[op].append(s) def read(self, *, cache=True): - if cache and self._rcache: - return self._get_rcache() + if cache and self._cache[File.Op.R]: + return self._get_cache(File.Op.R) + + if self._lastop == File.Op.W: + try: + self._file.seek(0) + except io.UnsupportedOperation: + pass - if not self._file: - with self as f: - return f.read(cache=cache) + self._lastop = File.Op.R - self._file.seek(0) if cache: - self._add_to_rcache(self._file.read()) - return self._get_rcache() + self._add_to_cache(File.Op.R, self._file.read()) + return self._get_cache(File.Op.R) else: return self._file.read() def write(self, s, *, cache=True): - if not self._file: - with self as f: - return f.write(s, cache=cache) - - self._file.seek(0) - self._rcache = self._wcache + if self._lastop == File.Op.R: + try: + self._file.seek(0) + except io.UnsupportedOperation: + pass if cache: - self._add_to_wcache(s) + self._add_to_cache(File.Op.W, s) + + self._cache[File.Op.R] = self._cache[File.Op.W] written = self._file.write(s) self._file.flush() + self._lastop = File.Op.W return written path = property(lambda s: s._path) @@ -336,12 +355,12 @@ class ImageRegistry: def __init__(self): self._images = OrderedDict() - def register(self, path, description=None): + def register(self, path, description=None, *, domain=None): # path = str(pathlib.Path(path).expanduser()) path = os.path.expanduser(path) if path.startswith("/"): path = f"file://{path}" - cid = make_msgid()[1:-1] + cid = make_msgid(domain=domain)[1:-1] self._images[path] = InlineImageInfo(cid, description) return cid @@ -433,7 +452,10 @@ def markdown_with_inline_image_support( try: - import pynliner + with warnings.catch_warnings(): + # https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1081037 + warnings.filterwarnings("ignore", category=SyntaxWarning) + import pynliner _PYNLINER = True @@ -462,18 +484,33 @@ def apply_styling(html, css): ) +# [ FORMAT=FLOWED HANDLING ] ################################################## + + +class FormatFlowedNewlineExtension(Extension): + FFNL_RE = r"(?!\S)(\s)\n" + + def extendMarkdown(self, md): + ffnl = SimpleTextInlineProcessor(self.FFNL_RE) + md.inlinePatterns.register(ffnl, "ffnl", 125) + + # [ QUOTE HANDLING ] ########################################################## class QuoteToAdmonitionExtension(Extension): - class EmailQuoteBlockProcessor(BlockProcessor): + class BlockProcessor(BlockProcessor): RE = re.compile(r"(?:^|\n)>\s*(.*)") def __init__(self, parser): super().__init__(parser) self._title = None + self._disable = False def test(self, parent, blocks): + if self._disable: + return False + if markdown.util.nearing_recursion_limit(): return False @@ -509,9 +546,14 @@ class QuoteToAdmonitionExtension(Extension): self.parser.parseChunk(admonition, self._title) admonition[0].set("class", "admonition-title") - self.parser.parseChunk( - admonition, "\n".join(self.clean(line) for line in quotelines) - ) + with self.disable(): + self.parser.parseChunk(admonition, "\n".join(quotelines)) + + @contextmanager + def disable(self): + self._disable = True + yield True + self._disable = False @classmethod def clean(klass, line): @@ -520,7 +562,7 @@ class QuoteToAdmonitionExtension(Extension): def extendMarkdown(self, md): md.registerExtension(self) - email_quote_proc = self.EmailQuoteBlockProcessor(md.parser) + email_quote_proc = self.BlockProcessor(md.parser) md.parser.blockprocessors.register(email_quote_proc, "emailquote", 25) @@ -626,7 +668,9 @@ def extract_signature(text, *, filefactory=FileFactory()): path = pathlib.Path(re.split(r" +", lines.pop(0), maxsplit=1)[1]) textsig = "\n".join(lines) - sig_input = filefactory(path.expanduser()).read() + with filefactory(path.expanduser()) as sig_f: + sig_input = sig_f.read() + soup = bs4.BeautifulSoup(sig_input, "html.parser") style = str(soup.style.extract()) if soup.style else "" @@ -665,6 +709,7 @@ def convert_markdown_to_html( tempdir=None, extensions=None, extension_configs=None, + domain=None, ): # TODO extension_configs need to be handled differently extension_configs = extension_configs or {} @@ -673,6 +718,7 @@ def convert_markdown_to_html( ] = _CODEHILITE_CLASS extensions = extensions or [] + extensions.append(FormatFlowedNewlineExtension()) extensions.append(QuoteToAdmonitionExtension()) draft = draft_f.read() @@ -698,7 +744,7 @@ def convert_markdown_to_html( for img in soup.find_all("img"): uri = img.attrs["src"] desc = img.attrs.get("title", img.attrs.get("alt")) - cid = image_registry.register(uri, desc) + cid = image_registry.register(uri, desc, domain=domain) img.attrs["src"] = f"cid:{cid}" htmlsig = str(soup) @@ -928,6 +974,7 @@ def do_massage( only_build=False, max_other_attachments=20, tempdir=None, + domain=None, debug_commands=False, debug_walk=False, ): @@ -940,14 +987,6 @@ def do_massage( # variable used to identify the command file we're currently writing # to. cmds = MuttCommands(cmd_f, debug=debug_commands) - cmds.cmd('set editor="$my_editor"') - cmds.cmd('set edit_headers="$my_edit_headers"') - cmds.cmd("unset my_editor") - cmds.cmd("unset my_edit_headers") - - # let's flush those commands, as there'll be a lot of pushes from now - # on, which need to be run in reverse order - cmds.flush() extensions = extensions.split(",") if extensions else [] tree = converter( @@ -957,6 +996,7 @@ def do_massage( related_to_html_only=related_to_html_only, tempdir=tempdir, extensions=extensions, + domain=domain, ) mimetree = MIMETreeDFWalker(debug=debug_walk) @@ -1093,6 +1133,10 @@ def do_massage( except AttributeError: filename = "pytest_internal_file" cmds.cmd(f"source 'rm -f {filename}|'") + cmds.cmd('set editor="$my_editor"') + cmds.cmd('set edit_headers="$my_edit_headers"') + cmds.cmd("unset my_editor") + cmds.cmd("unset my_edit_headers") cmds.cmd("unset my_mdwn_postprocess_cmd_file") cmds.flush() @@ -1125,6 +1169,7 @@ if __name__ == "__main__": max_other_attachments=args.max_number_other_attachments, only_build=args.only_build, tempdir=args.tempdir, + domain=args.domain, debug_commands=args.debug_commands, debug_walk=args.debug_walk, ) @@ -1387,14 +1432,14 @@ try: ) lines = cmd_f.read().splitlines() - assert '="$my_editor"' in lines.pop(0) - assert '="$my_edit_headers"' in lines.pop(0) - assert "unset my_editor" == lines.pop(0) - assert "unset my_edit_headers" == lines.pop(0) assert "send-message" in lines.pop(0) assert "update-encoding" in lines.pop(0) assert "first-entry" in lines.pop(0) assert "source 'rm -f " in lines.pop(0) + assert '="$my_editor"' in lines.pop(0) + assert '="$my_edit_headers"' in lines.pop(0) + assert "unset my_editor" == lines.pop(0) + assert "unset my_edit_headers" == lines.pop(0) assert "unset my_mdwn_postprocess_cmd_file" == lines.pop(0) @pytest.mark.massage @@ -1411,7 +1456,7 @@ try: max_other_attachments=max_attachments, converter=converter, ) - lines = cmd_f.read().splitlines()[4:-2] + lines = cmd_f.read().splitlines()[:-6] assert "first-entry" in lines.pop() assert "update-encoding" in lines.pop() @@ -1453,7 +1498,7 @@ try: cmd_f=cmd_f, converter=converter, ) - lines = cmd_f.read().splitlines()[4:-2] + lines = cmd_f.read().splitlines()[:-6] assert "first-entry" in lines.pop() assert "update-encoding" in lines.pop() @@ -1523,10 +1568,10 @@ try: @pytest.mark.converter def test_converter_tree_basic(self, fakepath, const1, fakefilefactory): - draft_f = fakefilefactory(fakepath, content=const1) - tree = convert_markdown_to_html( - draft_f, filefactory=fakefilefactory - ) + with fakefilefactory(fakepath, content=const1) as draft_f: + tree = convert_markdown_to_html( + draft_f, filefactory=fakefilefactory + ) assert tree.subtype == "alternative" assert len(tree.children) == 2 @@ -1540,8 +1585,8 @@ try: def test_converter_writes( self, fakepath, fakefilefactory, const1, monkeypatch ): - draft_f = fakefilefactory(fakepath, content=const1) - convert_markdown_to_html(draft_f, filefactory=fakefilefactory) + with fakefilefactory(fakepath, content=const1) as draft_f: + convert_markdown_to_html(draft_f, filefactory=fakefilefactory) html = fakefilefactory.pop() assert fakepath.with_suffix(".html") == html[0] @@ -1735,7 +1780,6 @@ try: @pytest.mark.styling def test_massage_styling_to_converter(self): css = "p { color:red }" - css_f = File(content=css) css_applied = [] def converter(draft_f, css_f, **kwargs): @@ -1743,12 +1787,17 @@ try: css_applied.append(css) return Part("text", "plain", draft_f.path, orig=True) - do_massage( - draft_f=File(), - cmd_f=File(), - css_f=css_f, - converter=converter, - ) + with ( + File() as draft_f, + File(mode="w") as cmd_f, + File(content=css) as css_f, + ): + do_massage( + draft_f=draft_f, + cmd_f=cmd_f, + css_f=css_f, + converter=converter, + ) assert css_applied[0] == css @pytest.mark.converter @@ -1817,11 +1866,10 @@ try: assert htmlsig == sigconst.format(path=fakepath) @pytest.mark.sig - def test_signature_extraction_file_not_found(self, const1): - path = pathlib.Path("/does/not/exist") + def test_signature_extraction_file_not_found(self, fakepath, const1): with pytest.raises(FileNotFoundError): origtext, textsig, htmlsig = extract_signature( - f"{const1}{EMAIL_SIG_SEP}{HTML_SIG_MARKER}{path}\n{const1}" + f"{const1}{EMAIL_SIG_SEP}{HTML_SIG_MARKER}{fakepath}\n{const1}" ) @pytest.mark.imgproc @@ -1833,6 +1881,15 @@ try: assert not cid.endswith(">") assert const1 in reg + @pytest.mark.imgproc + def test_image_registry_domain(self, const1, const2): + reg = ImageRegistry() + cid = reg.register(const1, domain=const2) + assert f"@{const2}" in cid + assert not cid.startswith("<") + assert not cid.endswith(">") + assert const1 in reg + @pytest.mark.imgproc def test_image_registry_file_uri(self, const1): reg = ImageRegistry() @@ -1890,9 +1947,7 @@ try: "This is the plain-text version", ) htmlsig = "HTML Signature from {path} but as a string" - html = ( - f'

{htmlsig.format(path=fakepath2)}

' - ) + html = f'

{htmlsig.format(path=fakepath2)}

' sig_f = fakefilefactory(fakepath2, content=html) @@ -2026,6 +2081,29 @@ try: p = quote.p.extract() assert p.contents[1].name == "strong" + @pytest.mark.converter + def test_converter_attribution_to_admonition_with_blockquote( + self, fakepath, fakefilefactory + ): + mailparts = ( + "Regarding whatever", + "> blockquote line1", + "> blockquote line2", + "> ", + "> new para with **bold** text", + ) + with fakefilefactory( + fakepath, content="\n".join(mailparts) + ) as draft_f: + convert_markdown_to_html(draft_f, filefactory=fakefilefactory) + + soup = bs4.BeautifulSoup( + fakefilefactory[fakepath.with_suffix(".html")].read(), + "html.parser", + ) + quote = soup.select_one("div.admonition.quote") + assert quote.blockquote + @pytest.mark.converter def test_converter_attribution_to_admonition_multiple( self, fakepath, fakefilefactory @@ -2068,6 +2146,46 @@ try: == mailparts[-2] ) + @pytest.mark.converter + def test_converter_format_flowed_with_nl2br( + self, fakepath, fakefilefactory + ): + mailparts = ( + "This is format=flowed text ", + "with spaces at the end ", + "and there ought be no newlines.", + "", + "[link](https://example.org) ", + "and text.", + "", + "[link text ", + "broken up](https://example.org).", + "", + "This is on a new line with a hard break ", + "due to the double space", + ) + with fakefilefactory( + fakepath, content="\n".join(mailparts) + ) as draft_f: + convert_markdown_to_html( + draft_f, extensions=["nl2br"], filefactory=fakefilefactory + ) + + soup = bs4.BeautifulSoup( + fakefilefactory[fakepath.with_suffix(".html")].read(), + "html.parser", + ) + import ipdb + + p = soup.p.extract().text + assert "".join(mailparts[0:3]) == p + p = ''.join(map(str, soup.p.extract().contents)) + assert p == 'link and text.' + p = ''.join(map(str, soup.p.extract().contents)) + assert ( + p == 'link text broken up.' + ) + @pytest.mark.fileio def test_file_class_contextmanager(self, const1, monkeypatch): state = dict(o=False, c=False) @@ -2095,6 +2213,11 @@ try: f.write(const1, cache=False) assert f.read(cache=False) == const1 + @pytest.mark.fileio + def test_file_class_path_no_exists(self, fakepath): + with pytest.raises(FileNotFoundError): + File(fakepath, mode="r").open() + @pytest.mark.fileio def test_file_class_cache(self, tmp_path, const1, const2): path = tmp_path / "file"