# <enter-command> source '$my_confdir/buildmimetree.py \
# --tempdir $tempdir --extensions $my_mdwn_extensions \
# --css-file $my_confdir/htmlmail.css |'<enter>\
-# <enter-command> sourc e \$my_mdwn_postprocess_cmd_file<enter>\
+# <enter-command> source \$my_mdwn_postprocess_cmd_file<enter>\
# " "Convert message into a modern MIME tree with inline images"
#
# (Yes, we need to call source twice, as mutt only starts to process output
# - Pynliner, provides --css-file and thus inline styling of HTML output
# - Pygments, then syntax highlighting for fenced code is enabled
#
+# Running tests:
+# pytest -x buildmimetree.py
+#
# Latest version:
# https://git.madduck.net/etc/neomutt.git/blob_plain/HEAD:/.config/neomutt/buildmimetree.py
#
-# Copyright © 2023 martin f. krafft <madduck@madduck.net>
-# Released under the GPL-2+ licence, just like Mutt itself.
+# Copyright © 2023–24 martin f. krafft <madduck@madduck.net>
+# Released under the GPL-2+ licence, just like NeoMutt itself.
#
import sys
import bs4
import xml.etree.ElementTree as etree
import io
+import enum
+import warnings
+from contextlib import contextmanager
from collections import namedtuple, OrderedDict
from markdown.extensions import Extension
from markdown.blockprocessors import BlockProcessor
-from markdown.inlinepatterns import ImageInlineProcessor, IMAGE_LINK_RE
+from markdown.inlinepatterns import (
+ SimpleTextInlineProcessor,
+ ImageInlineProcessor,
+ IMAGE_LINK_RE,
+)
from email.utils import make_msgid
from urllib import request
)
)
parser.epilog = (
- "Copyright © 2023 martin f. krafft <madduck@madduck.net>.\n"
+ "Copyright © 2023-24 martin f. krafft <madduck@madduck.net>.\n"
"Released under the MIT licence"
)
help="Only build, don't send the message",
)
+ parser.add_argument(
+ "--domain",
+ help="Domain to use in content IDs",
+ )
+
parser.add_argument(
"--tempdir",
metavar="DIR",
class File:
+ class Op(enum.Enum):
+ R = enum.auto()
+ W = enum.auto()
+
def __init__(self, path=None, mode="r", content=None, **kwargs):
if path:
if content:
if content and not re.search(r"[r+]", mode):
raise RuntimeError("Cannot specify content without read mode")
- self._rcache = [content] if content else []
- self._wcache = []
+ self._cache = {File.Op.R: [content] if content else [], File.Op.W: []}
+ self._lastop = None
self._mode = mode
self._kwargs = kwargs
self._file = None
def close(self):
self._file.close()
self._file = None
- self._rcache = self._wcache
-
- def _get_rcache(self):
- return (b"" if "b" in self._mode else "").join(self._rcache)
+ self._cache[File.Op.R] = self._cache[File.Op.W]
+ self._lastop = None
- def _get_wcache(self):
- return (b"" if "b" in self._mode else "").join(self._wcache)
+ def _get_cache(self, op):
+ return (b"" if "b" in self._mode else "").join(self._cache[op])
- def _add_to_rcache(self, s):
- self._rcache.append(s)
-
- def _add_to_wcache(self, s):
- self._wcache.append(s)
+ def _add_to_cache(self, op, s):
+ self._cache[op].append(s)
def read(self, *, cache=True):
- if cache and self._rcache:
- return self._get_rcache()
+ if cache and self._cache[File.Op.R]:
+ return self._get_cache(File.Op.R)
+
+ if self._lastop == File.Op.W:
+ try:
+ self._file.seek(0)
+ except io.UnsupportedOperation:
+ pass
- if not self._file:
- with self as f:
- return f.read(cache=cache)
+ self._lastop = File.Op.R
- self._file.seek(0)
if cache:
- self._add_to_rcache(self._file.read())
- return self._get_rcache()
+ self._add_to_cache(File.Op.R, self._file.read())
+ return self._get_cache(File.Op.R)
else:
return self._file.read()
def write(self, s, *, cache=True):
- if not self._file:
- with self as f:
- return f.write(s, cache=cache)
-
- self._file.seek(0)
- self._rcache = self._wcache
+ if self._lastop == File.Op.R:
+ try:
+ self._file.seek(0)
+ except io.UnsupportedOperation:
+ pass
if cache:
- self._add_to_wcache(s)
+ self._add_to_cache(File.Op.W, s)
+
+ self._cache[File.Op.R] = self._cache[File.Op.W]
written = self._file.write(s)
self._file.flush()
+ self._lastop = File.Op.W
return written
path = property(lambda s: s._path)
def __init__(self):
self._images = OrderedDict()
- def register(self, path, description=None):
+ def register(self, path, description=None, *, domain=None):
# path = str(pathlib.Path(path).expanduser())
path = os.path.expanduser(path)
if path.startswith("/"):
path = f"file://{path}"
- cid = make_msgid()[1:-1]
+ cid = make_msgid(domain=domain)[1:-1]
self._images[path] = InlineImageInfo(cid, description)
return cid
try:
- import pynliner
+ with warnings.catch_warnings():
+ # https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1081037
+ warnings.filterwarnings("ignore", category=SyntaxWarning)
+ import pynliner
_PYNLINER = True
)
+# [ FORMAT=FLOWED HANDLING ] ##################################################
+
+
+class FormatFlowedNewlineExtension(Extension):
+ FFNL_RE = r"(?!\S)(\s)\n"
+
+ def extendMarkdown(self, md):
+ ffnl = SimpleTextInlineProcessor(self.FFNL_RE)
+ md.inlinePatterns.register(ffnl, "ffnl", 125)
+
+
# [ QUOTE HANDLING ] ##########################################################
class QuoteToAdmonitionExtension(Extension):
- class EmailQuoteBlockProcessor(BlockProcessor):
+ class BlockProcessor(BlockProcessor):
RE = re.compile(r"(?:^|\n)>\s*(.*)")
def __init__(self, parser):
super().__init__(parser)
self._title = None
+ self._disable = False
def test(self, parent, blocks):
+ if self._disable:
+ return False
+
if markdown.util.nearing_recursion_limit():
return False
self.parser.parseChunk(admonition, self._title)
admonition[0].set("class", "admonition-title")
- self.parser.parseChunk(
- admonition, "\n".join(self.clean(line) for line in quotelines)
- )
+ with self.disable():
+ self.parser.parseChunk(admonition, "\n".join(quotelines))
+
+ @contextmanager
+ def disable(self):
+ self._disable = True
+ yield True
+ self._disable = False
@classmethod
def clean(klass, line):
def extendMarkdown(self, md):
md.registerExtension(self)
- email_quote_proc = self.EmailQuoteBlockProcessor(md.parser)
+ email_quote_proc = self.BlockProcessor(md.parser)
md.parser.blockprocessors.register(email_quote_proc, "emailquote", 25)
path = pathlib.Path(re.split(r" +", lines.pop(0), maxsplit=1)[1])
textsig = "\n".join(lines)
- sig_input = filefactory(path.expanduser()).read()
+ with filefactory(path.expanduser()) as sig_f:
+ sig_input = sig_f.read()
+
soup = bs4.BeautifulSoup(sig_input, "html.parser")
style = str(soup.style.extract()) if soup.style else ""
tempdir=None,
extensions=None,
extension_configs=None,
+ domain=None,
):
# TODO extension_configs need to be handled differently
extension_configs = extension_configs or {}
] = _CODEHILITE_CLASS
extensions = extensions or []
+ extensions.append(FormatFlowedNewlineExtension())
extensions.append(QuoteToAdmonitionExtension())
draft = draft_f.read()
for img in soup.find_all("img"):
uri = img.attrs["src"]
desc = img.attrs.get("title", img.attrs.get("alt"))
- cid = image_registry.register(uri, desc)
+ cid = image_registry.register(uri, desc, domain=domain)
img.attrs["src"] = f"cid:{cid}"
htmlsig = str(soup)
only_build=False,
max_other_attachments=20,
tempdir=None,
+ domain=None,
debug_commands=False,
debug_walk=False,
):
# variable used to identify the command file we're currently writing
# to.
cmds = MuttCommands(cmd_f, debug=debug_commands)
- cmds.cmd('set editor="$my_editor"')
- cmds.cmd('set edit_headers="$my_edit_headers"')
- cmds.cmd("unset my_editor")
- cmds.cmd("unset my_edit_headers")
-
- # let's flush those commands, as there'll be a lot of pushes from now
- # on, which need to be run in reverse order
- cmds.flush()
extensions = extensions.split(",") if extensions else []
tree = converter(
related_to_html_only=related_to_html_only,
tempdir=tempdir,
extensions=extensions,
+ domain=domain,
)
mimetree = MIMETreeDFWalker(debug=debug_walk)
except AttributeError:
filename = "pytest_internal_file"
cmds.cmd(f"source 'rm -f {filename}|'")
+ cmds.cmd('set editor="$my_editor"')
+ cmds.cmd('set edit_headers="$my_edit_headers"')
+ cmds.cmd("unset my_editor")
+ cmds.cmd("unset my_edit_headers")
cmds.cmd("unset my_mdwn_postprocess_cmd_file")
cmds.flush()
max_other_attachments=args.max_number_other_attachments,
only_build=args.only_build,
tempdir=args.tempdir,
+ domain=args.domain,
debug_commands=args.debug_commands,
debug_walk=args.debug_walk,
)
)
lines = cmd_f.read().splitlines()
- assert '="$my_editor"' in lines.pop(0)
- assert '="$my_edit_headers"' in lines.pop(0)
- assert "unset my_editor" == lines.pop(0)
- assert "unset my_edit_headers" == lines.pop(0)
assert "send-message" in lines.pop(0)
assert "update-encoding" in lines.pop(0)
assert "first-entry" in lines.pop(0)
assert "source 'rm -f " in lines.pop(0)
+ assert '="$my_editor"' in lines.pop(0)
+ assert '="$my_edit_headers"' in lines.pop(0)
+ assert "unset my_editor" == lines.pop(0)
+ assert "unset my_edit_headers" == lines.pop(0)
assert "unset my_mdwn_postprocess_cmd_file" == lines.pop(0)
@pytest.mark.massage
max_other_attachments=max_attachments,
converter=converter,
)
- lines = cmd_f.read().splitlines()[4:-2]
+ lines = cmd_f.read().splitlines()[:-6]
assert "first-entry" in lines.pop()
assert "update-encoding" in lines.pop()
cmd_f=cmd_f,
converter=converter,
)
- lines = cmd_f.read().splitlines()[4:-2]
+ lines = cmd_f.read().splitlines()[:-6]
assert "first-entry" in lines.pop()
assert "update-encoding" in lines.pop()
@pytest.mark.converter
def test_converter_tree_basic(self, fakepath, const1, fakefilefactory):
- draft_f = fakefilefactory(fakepath, content=const1)
- tree = convert_markdown_to_html(
- draft_f, filefactory=fakefilefactory
- )
+ with fakefilefactory(fakepath, content=const1) as draft_f:
+ tree = convert_markdown_to_html(
+ draft_f, filefactory=fakefilefactory
+ )
assert tree.subtype == "alternative"
assert len(tree.children) == 2
def test_converter_writes(
self, fakepath, fakefilefactory, const1, monkeypatch
):
- draft_f = fakefilefactory(fakepath, content=const1)
- convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
+ with fakefilefactory(fakepath, content=const1) as draft_f:
+ convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
html = fakefilefactory.pop()
assert fakepath.with_suffix(".html") == html[0]
@pytest.mark.styling
def test_massage_styling_to_converter(self):
css = "p { color:red }"
- css_f = File(content=css)
css_applied = []
def converter(draft_f, css_f, **kwargs):
css_applied.append(css)
return Part("text", "plain", draft_f.path, orig=True)
- do_massage(
- draft_f=File(),
- cmd_f=File(),
- css_f=css_f,
- converter=converter,
- )
+ with (
+ File() as draft_f,
+ File(mode="w") as cmd_f,
+ File(content=css) as css_f,
+ ):
+ do_massage(
+ draft_f=draft_f,
+ cmd_f=cmd_f,
+ css_f=css_f,
+ converter=converter,
+ )
assert css_applied[0] == css
@pytest.mark.converter
assert htmlsig == sigconst.format(path=fakepath)
@pytest.mark.sig
- def test_signature_extraction_file_not_found(self, const1):
- path = pathlib.Path("/does/not/exist")
+ def test_signature_extraction_file_not_found(self, fakepath, const1):
with pytest.raises(FileNotFoundError):
origtext, textsig, htmlsig = extract_signature(
- f"{const1}{EMAIL_SIG_SEP}{HTML_SIG_MARKER}{path}\n{const1}"
+ f"{const1}{EMAIL_SIG_SEP}{HTML_SIG_MARKER}{fakepath}\n{const1}"
)
@pytest.mark.imgproc
assert not cid.endswith(">")
assert const1 in reg
+ @pytest.mark.imgproc
+ def test_image_registry_domain(self, const1, const2):
+ reg = ImageRegistry()
+ cid = reg.register(const1, domain=const2)
+ assert f"@{const2}" in cid
+ assert not cid.startswith("<")
+ assert not cid.endswith(">")
+ assert const1 in reg
+
@pytest.mark.imgproc
def test_image_registry_file_uri(self, const1):
reg = ImageRegistry()
"This is the plain-text version",
)
htmlsig = "HTML Signature from {path} but as a string"
- html = (
- f'<div id="signature"><p>{htmlsig.format(path=fakepath2)}</p></div>'
- )
+ html = f'<div id="signature"><p>{htmlsig.format(path=fakepath2)}</p></div>'
sig_f = fakefilefactory(fakepath2, content=html)
p = quote.p.extract()
assert p.contents[1].name == "strong"
+ @pytest.mark.converter
+ def test_converter_attribution_to_admonition_with_blockquote(
+ self, fakepath, fakefilefactory
+ ):
+ mailparts = (
+ "Regarding whatever",
+ "> blockquote line1",
+ "> blockquote line2",
+ "> ",
+ "> new para with **bold** text",
+ )
+ with fakefilefactory(
+ fakepath, content="\n".join(mailparts)
+ ) as draft_f:
+ convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
+
+ soup = bs4.BeautifulSoup(
+ fakefilefactory[fakepath.with_suffix(".html")].read(),
+ "html.parser",
+ )
+ quote = soup.select_one("div.admonition.quote")
+ assert quote.blockquote
+
@pytest.mark.converter
def test_converter_attribution_to_admonition_multiple(
self, fakepath, fakefilefactory
== mailparts[-2]
)
+ @pytest.mark.converter
+ def test_converter_format_flowed_with_nl2br(
+ self, fakepath, fakefilefactory
+ ):
+ mailparts = (
+ "This is format=flowed text ",
+ "with spaces at the end ",
+ "and there ought be no newlines.",
+ "",
+ "[link](https://example.org) ",
+ "and text.",
+ "",
+ "[link text ",
+ "broken up](https://example.org).",
+ "",
+ "This is on a new line with a hard break ",
+ "due to the double space",
+ )
+ with fakefilefactory(
+ fakepath, content="\n".join(mailparts)
+ ) as draft_f:
+ convert_markdown_to_html(
+ draft_f, extensions=["nl2br"], filefactory=fakefilefactory
+ )
+
+ soup = bs4.BeautifulSoup(
+ fakefilefactory[fakepath.with_suffix(".html")].read(),
+ "html.parser",
+ )
+ import ipdb
+
+ p = soup.p.extract().text
+ assert "".join(mailparts[0:3]) == p
+ p = ''.join(map(str, soup.p.extract().contents))
+ assert p == '<a href="https://example.org">link</a> and text.'
+ p = ''.join(map(str, soup.p.extract().contents))
+ assert (
+ p == '<a href="https://example.org">link text broken up</a>.'
+ )
+
@pytest.mark.fileio
def test_file_class_contextmanager(self, const1, monkeypatch):
state = dict(o=False, c=False)
f.write(const1, cache=False)
assert f.read(cache=False) == const1
+ @pytest.mark.fileio
+ def test_file_class_path_no_exists(self, fakepath):
+ with pytest.raises(FileNotFoundError):
+ File(fakepath, mode="r").open()
+
@pytest.mark.fileio
def test_file_class_cache(self, tmp_path, const1, const2):
path = tmp_path / "file"