.config/neomutt/buildmimetree.py

   1 #!/usr/bin/python3
   2 #
   3 # NeoMutt helper script to create multipart/* emails with Markdown → HTML
   4 # alternative conversion, and handling of inline images, using NeoMutt's
   5 # ability to manually craft MIME trees, but automating this process.
   6 #
   7 # Configuration:
   8 #   neomuttrc (needs to be a single line):
   9 #     set my_mdwn_extensions="extra,admonition,codehilite,sane_lists,smarty"
  10 #     macro compose B "\
  11 #       <enter-command> source '$my_confdir/buildmimetree.py \
  12 #       --tempdir $tempdir --extensions $my_mdwn_extensions \
  13 #       --css-file $my_confdir/htmlmail.css |'<enter>\
  14 #       <enter-command> sourc e \$my_mdwn_postprocess_cmd_file<enter>\
  15 #     " "Convert message into a modern MIME tree with inline images"
  16 #
  17 #     (Yes, we need to call source twice, as mutt only starts to process output
  18 #     from a source command when the command exits, and since we need to react
  19 #     to the output, we need to be invoked again, using a $my_ variable to pass
  20 #     information)
  21 #
  22 # Requirements:
  23 #   - python3
  24 #   - python3-markdown
  25 #   - python3-beautifulsoup4
  26 # Optional:
  27 #   - pytest
  28 #   - Pynliner, provides --css-file and thus inline styling of HTML output
  29 #   - Pygments, then syntax highlighting for fenced code is enabled
  30 #
  31 # Running tests:
  32 #   pytest -x buildmimetree.py
  33 #
  34 # Latest version:
  35 #   https://git.madduck.net/etc/neomutt.git/blob_plain/HEAD:/.config/neomutt/buildmimetree.py
  36 #
  37 # Copyright © 2023 martin f. krafft <madduck@madduck.net>
  38 # Released under the GPL-2+ licence, just like Mutt itself.
  39 #
  40
  41 import sys
  42 import os.path
  43 import pathlib
  44 import markdown
  45 import tempfile
  46 import argparse
  47 import re
  48 import mimetypes
  49 import bs4
  50 import xml.etree.ElementTree as etree
  51 import io
  52 import enum
  53 from contextlib import contextmanager
  54 from collections import namedtuple, OrderedDict
  55 from markdown.extensions import Extension
  56 from markdown.blockprocessors import BlockProcessor
  57 from markdown.inlinepatterns import (
  58     SimpleTextInlineProcessor,
  59     ImageInlineProcessor,
  60     IMAGE_LINK_RE,
  61 )
  62 from email.utils import make_msgid
  63 from urllib import request
  64
  65
  66 def parse_cli_args(*args, **kwargs):
  67     parser = argparse.ArgumentParser(
  68         description=(
  69             "NeoMutt helper to turn text/markdown email parts "
  70             "into full-fledged MIME trees"
  71         )
  72     )
  73     parser.epilog = (
  74         "Copyright © 2023 martin f. krafft <madduck@madduck.net>.\n"
  75         "Released under the MIT licence"
  76     )
  77
  78     parser.add_argument(
  79         "--extensions",
  80         metavar="EXT[,EXT[,EXT]]",
  81         type=str,
  82         default="",
  83         help="Markdown extension to use (comma-separated list)",
  84     )
  85
  86     if _PYNLINER:
  87         parser.add_argument(
  88             "--css-file",
  89             metavar="FILE",
  90             type=pathlib.Path,
  91             default=os.devnull,
  92             help="CSS file to merge with the final HTML",
  93         )
  94     else:
  95         parser.set_defaults(css_file=None)
  96
  97     parser.add_argument(
  98         "--related-to-html-only",
  99         action="store_true",
 100         help="Make related content be sibling to HTML parts only",
 101     )
 102
 103     def positive_integer(value):
 104         try:
 105             if int(value) > 0:
 106                 return int(value)
 107
 108         except ValueError:
 109             pass
 110
 111         raise ValueError("Must be a positive integer")
 112
 113     parser.add_argument(
 114         "--max-number-other-attachments",
 115         metavar="INTEGER",
 116         type=positive_integer,
 117         default=20,
 118         help="Maximum number of other attachments to expect",
 119     )
 120
 121     parser.add_argument(
 122         "--only-build",
 123         "--just-build",
 124         action="store_true",
 125         help="Only build, don't send the message",
 126     )
 127
 128     parser.add_argument(
 129         "--tempdir",
 130         metavar="DIR",
 131         type=pathlib.Path,
 132         help="Specify temporary directory to use for attachments",
 133     )
 134
 135     parser.add_argument(
 136         "--debug-commands",
 137         action="store_true",
 138         help="Turn on debug logging of commands generated to stderr",
 139     )
 140
 141     parser.add_argument(
 142         "--debug-walk",
 143         action="store_true",
 144         help="Turn on debugging to stderr of the MIME tree walk",
 145     )
 146
 147     parser.add_argument(
 148         "--dump-html",
 149         metavar="FILE",
 150         type=pathlib.Path,
 151         help="Write the generated HTML to the file",
 152     )
 153
 154     subp = parser.add_subparsers(help="Sub-command parsers", dest="mode")
 155     massage_p = subp.add_parser(
 156         "massage", help="Massaging phase (internal use)"
 157     )
 158
 159     massage_p.add_argument(
 160         "--write-commands-to",
 161         "-o",
 162         metavar="FILE",
 163         dest="cmdpath",
 164         type=pathlib.Path,
 165         required=True,
 166         help="Temporary file path to write commands to",
 167     )
 168
 169     massage_p.add_argument(
 170         "MAILDRAFT",
 171         nargs="?",
 172         type=pathlib.Path,
 173         help="If provided, the script is invoked as editor on the mail draft",
 174     )
 175
 176     return parser.parse_args(*args, **kwargs)
 177
 178
 179 # [ FILE I/O HANDLING ] #######################################################
 180
 181
 182 class File:
 183     class Op(enum.Enum):
 184         R = enum.auto()
 185         W = enum.auto()
 186
 187     def __init__(self, path=None, mode="r", content=None, **kwargs):
 188         if path:
 189             if content:
 190                 raise RuntimeError("Cannot specify path and content for File")
 191
 192             self._path = (
 193                 path if isinstance(path, pathlib.Path) else pathlib.Path(path)
 194             )
 195         else:
 196             self._path = None
 197
 198         if content and not re.search(r"[r+]", mode):
 199             raise RuntimeError("Cannot specify content without read mode")
 200
 201         self._cache = {File.Op.R: [content] if content else [], File.Op.W: []}
 202         self._lastop = None
 203         self._mode = mode
 204         self._kwargs = kwargs
 205         self._file = None
 206
 207     def open(self):
 208         if self._path:
 209             self._file = open(self._path, self._mode, **self._kwargs)
 210         elif "b" in self._mode:
 211             self._file = io.BytesIO()
 212         else:
 213             self._file = io.StringIO()
 214
 215     def __enter__(self):
 216         self.open()
 217         return self
 218
 219     def __exit__(self, exc_type, exc_val, exc_tb):
 220         self.close()
 221
 222     def close(self):
 223         self._file.close()
 224         self._file = None
 225         self._cache[File.Op.R] = self._cache[File.Op.W]
 226         self._lastop = None
 227
 228     def _get_cache(self, op):
 229         return (b"" if "b" in self._mode else "").join(self._cache[op])
 230
 231     def _add_to_cache(self, op, s):
 232         self._cache[op].append(s)
 233
 234     def read(self, *, cache=True):
 235         if cache and self._cache[File.Op.R]:
 236             return self._get_cache(File.Op.R)
 237
 238         if self._lastop == File.Op.W:
 239             try:
 240                 self._file.seek(0)
 241             except io.UnsupportedOperation:
 242                 pass
 243
 244         self._lastop = File.Op.R
 245
 246         if cache:
 247             self._add_to_cache(File.Op.R, self._file.read())
 248             return self._get_cache(File.Op.R)
 249         else:
 250             return self._file.read()
 251
 252     def write(self, s, *, cache=True):
 253         if self._lastop == File.Op.R:
 254             try:
 255                 self._file.seek(0)
 256             except io.UnsupportedOperation:
 257                 pass
 258
 259         if cache:
 260             self._add_to_cache(File.Op.W, s)
 261
 262         self._cache[File.Op.R] = self._cache[File.Op.W]
 263
 264         written = self._file.write(s)
 265         self._file.flush()
 266         self._lastop = File.Op.W
 267         return written
 268
 269     path = property(lambda s: s._path)
 270
 271     def __repr__(self):
 272         return (
 273             f'<File path={self._path or "(buffered)"} open={bool(self._file)} '
 274             f"rcache={sum(len(c) for c in self._rcache) if self._rcache is not None else False} "
 275             f"wcache={sum(len(c) for c in self._wcache) if self._wcache is not None else False}>"
 276         )
 277
 278
 279 class FileFactory:
 280     def __init__(self):
 281         self._files = []
 282
 283     def __call__(self, path=None, mode="r", content=None, **kwargs):
 284         f = File(path, mode, content, **kwargs)
 285         self._files.append(f)
 286         return f
 287
 288     def __len__(self):
 289         return self._files.__len__()
 290
 291     def pop(self, idx=-1):
 292         return self._files.pop(idx)
 293
 294     def __getitem__(self, idx):
 295         return self._files.__getitem__(idx)
 296
 297     def __contains__(self, f):
 298         return self._files.__contains__(f)
 299
 300
 301 class FakeFileFactory(FileFactory):
 302     def __init__(self):
 303         super().__init__()
 304         self._paths2files = OrderedDict()
 305
 306     def __call__(self, path=None, mode="r", content=None, **kwargs):
 307         if path in self._paths2files:
 308             return self._paths2files[path]
 309
 310         f = super().__call__(None, mode, content, **kwargs)
 311         self._paths2files[path] = f
 312
 313         mypath = path
 314
 315         class FakeFile(File):
 316             path = mypath
 317
 318         # this is quality Python! We do this so that the fake file, which has
 319         # no path, fake-pretends to have a path for testing purposes.
 320
 321         f.__class__ = FakeFile
 322         return f
 323
 324     def __getitem__(self, path):
 325         return self._paths2files.__getitem__(path)
 326
 327     def get(self, path, default):
 328         return self._paths2files.get(path, default)
 329
 330     def pop(self, last=True):
 331         return self._paths2files.popitem(last)
 332
 333     def __repr__(self):
 334         return (
 335             f"<FakeFileFactory nfiles={len(self._files)} "
 336             f"paths={len(self._paths2files)}>"
 337         )
 338
 339
 340 # [ IMAGE HANDLING ] ##########################################################
 341
 342
 343 InlineImageInfo = namedtuple(
 344     "InlineImageInfo", ["cid", "desc"], defaults=[None]
 345 )
 346
 347
 348 class ImageRegistry:
 349     def __init__(self):
 350         self._images = OrderedDict()
 351
 352     def register(self, path, description=None):
 353         # path = str(pathlib.Path(path).expanduser())
 354         path = os.path.expanduser(path)
 355         if path.startswith("/"):
 356             path = f"file://{path}"
 357         cid = make_msgid()[1:-1]
 358         self._images[path] = InlineImageInfo(cid, description)
 359         return cid
 360
 361     def __iter__(self):
 362         return self._images.__iter__()
 363
 364     def __getitem__(self, idx):
 365         return self._images.__getitem__(idx)
 366
 367     def __len__(self):
 368         return self._images.__len__()
 369
 370     def items(self):
 371         return self._images.items()
 372
 373     def __repr__(self):
 374         return f"<ImageRegistry(items={len(self._images)})>"
 375
 376     def __str__(self):
 377         return self._images.__str__()
 378
 379
 380 class InlineImageExtension(Extension):
 381     class RelatedImageInlineProcessor(ImageInlineProcessor):
 382         def __init__(self, re, md, registry):
 383             super().__init__(re, md)
 384             self._registry = registry
 385
 386         def handleMatch(self, m, data):
 387             el, start, end = super().handleMatch(m, data)
 388             if "src" in el.attrib:
 389                 src = el.attrib["src"]
 390                 if "://" not in src or src.startswith("file://"):
 391                     # We only inline local content
 392                     cid = self._registry.register(
 393                         el.attrib["src"],
 394                         el.attrib.get("title", el.attrib.get("alt")),
 395                     )
 396                     el.attrib["src"] = f"cid:{cid}"
 397             return el, start, end
 398
 399     def __init__(self, registry):
 400         super().__init__()
 401         self._image_registry = registry
 402
 403     INLINE_PATTERN_NAME = "image_link"
 404
 405     def extendMarkdown(self, md):
 406         md.registerExtension(self)
 407         inline_image_proc = self.RelatedImageInlineProcessor(
 408             IMAGE_LINK_RE, md, self._image_registry
 409         )
 410         md.inlinePatterns.register(
 411             inline_image_proc, InlineImageExtension.INLINE_PATTERN_NAME, 150
 412         )
 413
 414
 415 def markdown_with_inline_image_support(
 416     text,
 417     *,
 418     mdwn=None,
 419     image_registry=None,
 420     extensions=None,
 421     extension_configs=None,
 422 ):
 423     registry = (
 424         image_registry if image_registry is not None else ImageRegistry()
 425     )
 426     inline_image_handler = InlineImageExtension(registry=registry)
 427     extensions = extensions or []
 428     extensions.append(inline_image_handler)
 429     mdwn = markdown.Markdown(
 430         extensions=extensions, extension_configs=extension_configs
 431     )
 432
 433     htmltext = mdwn.convert(text)
 434
 435     def replace_image_with_cid(matchobj):
 436         for m in (matchobj.group(1), f"file://{matchobj.group(1)}"):
 437             if m in registry:
 438                 return f"(cid:{registry[m].cid}"
 439         return matchobj.group(0)
 440
 441     text = re.sub(r"\(([^)\s]+)", replace_image_with_cid, text)
 442     return text, htmltext, registry, mdwn
 443
 444
 445 # [ CSS STYLING ] #############################################################
 446
 447
 448 try:
 449     import pynliner
 450
 451     _PYNLINER = True
 452
 453 except ImportError:
 454     _PYNLINER = False
 455
 456 try:
 457     from pygments.formatters import get_formatter_by_name
 458
 459     _CODEHILITE_CLASS = "codehilite"
 460
 461     _PYGMENTS_CSS = get_formatter_by_name(
 462         "html", style="default"
 463     ).get_style_defs(f".{_CODEHILITE_CLASS}")
 464
 465 except ImportError:
 466     _PYGMENTS_CSS = None
 467
 468
 469 def apply_styling(html, css):
 470     return (
 471         pynliner.Pynliner()
 472         .from_string(html)
 473         .with_cssString("\n".join(s for s in [_PYGMENTS_CSS, css] if s))
 474         .run()
 475     )
 476
 477
 478 # [ FORMAT=FLOWED HANDLING ] ##################################################
 479
 480
 481 class FormatFlowedNewlineExtension(Extension):
 482     FFNL_RE = r"(?!\S)(\s)\n"
 483
 484     def extendMarkdown(self, md):
 485         ffnl = SimpleTextInlineProcessor(self.FFNL_RE)
 486         md.inlinePatterns.register(ffnl, "ffnl", 125)
 487
 488
 489 # [ QUOTE HANDLING ] ##########################################################
 490
 491
 492 class QuoteToAdmonitionExtension(Extension):
 493     class BlockProcessor(BlockProcessor):
 494         RE = re.compile(r"(?:^|\n)>\s*(.*)")
 495
 496         def __init__(self, parser):
 497             super().__init__(parser)
 498             self._title = None
 499             self._disable = False
 500
 501         def test(self, parent, blocks):
 502             if self._disable:
 503                 return False
 504
 505             if markdown.util.nearing_recursion_limit():
 506                 return False
 507
 508             lines = blocks.splitlines()
 509             if len(lines) < 2:
 510                 if not self._title:
 511                     return False
 512
 513                 elif not self.RE.search(lines[0]):
 514                     return False
 515
 516                 return len(lines) > 0
 517
 518             elif not self.RE.search(lines[0]) and self.RE.search(lines[1]):
 519                 return True
 520
 521             elif self._title and self.RE.search(lines[1]):
 522                 return True
 523
 524             return False
 525
 526         def run(self, parent, blocks):
 527             quotelines = blocks.pop(0).splitlines()
 528
 529             cont = bool(self._title)
 530             if not self.RE.search(quotelines[0]):
 531                 self._title = quotelines.pop(0)
 532
 533             admonition = etree.SubElement(parent, "div")
 534             admonition.set(
 535                 "class", f"admonition quote{' continued' if cont else ''}"
 536             )
 537             self.parser.parseChunk(admonition, self._title)
 538
 539             admonition[0].set("class", "admonition-title")
 540             with self.disable():
 541                 self.parser.parseChunk(admonition, "\n".join(quotelines))
 542
 543         @contextmanager
 544         def disable(self):
 545             self._disable = True
 546             yield True
 547             self._disable = False
 548
 549         @classmethod
 550         def clean(klass, line):
 551             m = klass.RE.match(line)
 552             return m.group(1) if m else line
 553
 554     def extendMarkdown(self, md):
 555         md.registerExtension(self)
 556         email_quote_proc = self.BlockProcessor(md.parser)
 557         md.parser.blockprocessors.register(email_quote_proc, "emailquote", 25)
 558
 559
 560 # [ PARTS GENERATION ] ########################################################
 561
 562
 563 class Part(
 564     namedtuple(
 565         "Part",
 566         ["type", "subtype", "path", "desc", "cid", "orig"],
 567         defaults=[None, None, False],
 568     )
 569 ):
 570     def __str__(self):
 571         ret = f"<{self.type}/{self.subtype}>"
 572         if self.cid:
 573             ret = f"{ret} cid:{self.cid}"
 574         if self.orig:
 575             ret = f"{ret} ORIGINAL"
 576         return ret
 577
 578
 579 class Multipart(
 580     namedtuple("Multipart", ["subtype", "children", "desc"], defaults=[None])
 581 ):
 582     def __str__(self):
 583         return f"<multipart/{self.subtype}> children={len(self.children)}"
 584
 585     def __hash__(self):
 586         return hash(str(self.subtype) + "".join(str(self.children)))
 587
 588
 589 def collect_inline_images(
 590     image_registry, *, tempdir=None, filefactory=FileFactory()
 591 ):
 592     relparts = []
 593     for path, info in image_registry.items():
 594         if path.startswith("cid:"):
 595             continue
 596
 597         data = request.urlopen(path)
 598
 599         mimetype = data.headers["Content-Type"]
 600         ext = mimetypes.guess_extension(mimetype)
 601         tempfilename = tempfile.mkstemp(prefix="img", suffix=ext, dir=tempdir)
 602         path = pathlib.Path(tempfilename[1])
 603
 604         with filefactory(path, "w+b") as out_f:
 605             out_f.write(data.read())
 606
 607         # filewriter_fn(path, data.read(), "w+b")
 608
 609         desc = (
 610             f'Inline image: "{info.desc}"'
 611             if info.desc
 612             else f"Inline image {str(len(relparts)+1)}"
 613         )
 614         relparts.append(
 615             Part(*mimetype.split("/"), path, cid=info.cid, desc=desc)
 616         )
 617
 618     return relparts
 619
 620
 621 EMAIL_SIG_SEP = "\n-- \n"
 622 HTML_SIG_MARKER = "=htmlsig "
 623
 624
 625 def make_html_doc(body, sig=None):
 626     ret = (
 627         "<!DOCTYPE html>\n"
 628         "<html>\n"
 629         "<head>\n"
 630         '<meta http-equiv="content-type" content="text/html; charset=UTF-8">\n'  # noqa: E501
 631         '<meta name="viewport" content="width=device-width, initial-scale=1.0">\n'  # noqa: E501
 632         "</head>\n"
 633         "<body>\n"
 634         f"{body}\n"
 635     )
 636
 637     if sig:
 638         nl = "\n"
 639         ret = (
 640             f'{ret}<div id="signature"><span class="sig_separator">{EMAIL_SIG_SEP.strip(nl)}</span>\n'  # noqa: E501
 641             f"{sig}\n"
 642             "</div>"
 643         )
 644
 645     return f"{ret}\n  </body>\n</html>"
 646
 647
 648 def make_text_mail(text, sig=None):
 649     return EMAIL_SIG_SEP.join((text, sig)) if sig else text
 650
 651
 652 def extract_signature(text, *, filefactory=FileFactory()):
 653     parts = text.split(EMAIL_SIG_SEP, 1)
 654     if len(parts) == 1:
 655         return text, None, None
 656
 657     lines = parts[1].splitlines()
 658     if lines[0].startswith(HTML_SIG_MARKER):
 659         path = pathlib.Path(re.split(r" +", lines.pop(0), maxsplit=1)[1])
 660         textsig = "\n".join(lines)
 661
 662         with filefactory(path.expanduser()) as sig_f:
 663             sig_input = sig_f.read()
 664
 665         soup = bs4.BeautifulSoup(sig_input, "html.parser")
 666
 667         style = str(soup.style.extract()) if soup.style else ""
 668         for sig_selector in (
 669             "#signature",
 670             "#signatur",
 671             "#emailsig",
 672             ".signature",
 673             ".signatur",
 674             ".emailsig",
 675             "body",
 676             "div",
 677         ):
 678             sig = soup.select_one(sig_selector)
 679             if sig:
 680                 break
 681
 682         if not sig:
 683             return parts[0], textsig, style + sig_input
 684
 685         if sig.attrs.get("id") == "signature":
 686             sig = "".join(str(c) for c in sig.children)
 687
 688         return parts[0], textsig, style + str(sig)
 689
 690     return parts[0], parts[1], None
 691
 692
 693 def convert_markdown_to_html(
 694     draft_f,
 695     *,
 696     related_to_html_only=False,
 697     css_f=None,
 698     htmldump_f=None,
 699     filefactory=FileFactory(),
 700     tempdir=None,
 701     extensions=None,
 702     extension_configs=None,
 703 ):
 704     # TODO extension_configs need to be handled differently
 705     extension_configs = extension_configs or {}
 706     extension_configs.setdefault("pymdownx.highlight", {})[
 707         "css_class"
 708     ] = _CODEHILITE_CLASS
 709
 710     extensions = extensions or []
 711     extensions.append(FormatFlowedNewlineExtension())
 712     extensions.append(QuoteToAdmonitionExtension())
 713
 714     draft = draft_f.read()
 715     origtext, textsig, htmlsig = extract_signature(
 716         draft, filefactory=filefactory
 717     )
 718
 719     (
 720         origtext,
 721         htmltext,
 722         image_registry,
 723         mdwn,
 724     ) = markdown_with_inline_image_support(
 725         origtext, extensions=extensions, extension_configs=extension_configs
 726     )
 727
 728     if htmlsig:
 729         if not textsig:
 730             # TODO: decide what to do if there is no plain-text version
 731             raise NotImplementedError("HTML signature but no text alternative")
 732
 733         soup = bs4.BeautifulSoup(htmlsig, "html.parser")
 734         for img in soup.find_all("img"):
 735             uri = img.attrs["src"]
 736             desc = img.attrs.get("title", img.attrs.get("alt"))
 737             cid = image_registry.register(uri, desc)
 738             img.attrs["src"] = f"cid:{cid}"
 739
 740         htmlsig = str(soup)
 741
 742     elif textsig:
 743         (
 744             textsig,
 745             htmlsig,
 746             image_registry,
 747             mdwn,
 748         ) = markdown_with_inline_image_support(
 749             textsig,
 750             extensions=extensions,
 751             extension_configs=extension_configs,
 752             image_registry=image_registry,
 753             mdwn=mdwn,
 754         )
 755
 756     origtext = make_text_mail(origtext, textsig)
 757     draft_f.write(origtext)
 758     textpart = Part(
 759         "text", "plain", draft_f.path, "Plain-text version", orig=True
 760     )
 761
 762     htmltext = make_html_doc(htmltext, htmlsig)
 763     htmltext = apply_styling(htmltext, css_f.read() if css_f else None)
 764
 765     if draft_f.path:
 766         htmlpath = draft_f.path.with_suffix(".html")
 767     else:
 768         htmlpath = pathlib.Path(
 769             tempfile.mkstemp(suffix=".html", dir=tempdir)[1]
 770         )
 771     with filefactory(
 772         htmlpath, "w", encoding="utf-8", errors="xmlcharrefreplace"
 773     ) as out_f:
 774         out_f.write(htmltext)
 775     htmlpart = Part("text", "html", htmlpath, "HTML version")
 776
 777     if htmldump_f:
 778         htmldump_f.write(htmltext)
 779
 780     imgparts = collect_inline_images(
 781         image_registry, tempdir=tempdir, filefactory=filefactory
 782     )
 783
 784     if related_to_html_only:
 785         # If there are inline image part, they will be contained within a
 786         # multipart/related part along with the HTML part only
 787         if imgparts:
 788             # replace htmlpart with a multipart/related container of the HTML
 789             # parts and the images
 790             htmlpart = Multipart(
 791                 "relative", [htmlpart] + imgparts, "Group of related content"
 792             )
 793
 794         return Multipart(
 795             "alternative", [textpart, htmlpart], "Group of alternative content"
 796         )
 797
 798     else:
 799         # If there are inline image part, they will be siblings to the
 800         # multipart/alternative tree within a multipart/related part
 801         altpart = Multipart(
 802             "alternative", [textpart, htmlpart], "Group of alternative content"
 803         )
 804         if imgparts:
 805             return Multipart(
 806                 "relative", [altpart] + imgparts, "Group of related content"
 807             )
 808         else:
 809             return altpart
 810
 811
 812 class MIMETreeDFWalker:
 813     def __init__(self, *, visitor_fn=None, debug=False):
 814         self._visitor_fn = visitor_fn or self._echovisit
 815         self._debug = debug
 816
 817     def _echovisit(self, node, ancestry, debugprint):
 818         debugprint(f"node={node} ancestry={ancestry}")
 819
 820     def walk(self, root, *, visitor_fn=None):
 821         """
 822         Recursive function to implement a depth-dirst walk of the MIME-tree
 823         rooted at `root`.
 824         """
 825         if isinstance(root, list):
 826             if len(root) > 1:
 827                 root = Multipart("mixed", children=root)
 828             else:
 829                 root = root[0]
 830
 831         self._walk(
 832             root,
 833             ancestry=[],
 834             descendents=[],
 835             visitor_fn=visitor_fn or self._visitor_fn,
 836         )
 837
 838     def _walk(self, node, *, ancestry, descendents, visitor_fn):
 839         # Let's start by enumerating the parts at the current level. At the
 840         # root level, ancestry will be the empty list, and we expect a
 841         # multipart/* container at this level. Later, e.g. within a
 842         # mutlipart/alternative container, the subtree will just be the
 843         # alternative parts, while the top of the ancestry will be the
 844         # multipart/alternative container, which we will process after the
 845         # following loop.
 846
 847         lead = f"{'│ '*len(ancestry)}"
 848         if isinstance(node, Multipart):
 849             self.debugprint(
 850                 f"{lead}├{node} ancestry={[s.subtype for s in ancestry]}"
 851             )
 852
 853             # Depth-first, so push the current container onto the ancestry
 854             # stack, then descend …
 855             ancestry.append(node)
 856             self.debugprint(lead + "│ " * 2)
 857             for child in node.children:
 858                 self._walk(
 859                     child,
 860                     ancestry=ancestry,
 861                     descendents=descendents,
 862                     visitor_fn=visitor_fn,
 863                 )
 864             assert ancestry.pop() == node
 865             sibling_descendents = descendents
 866             descendents.extend(node.children)
 867
 868         else:
 869             self.debugprint(f"{lead}├{node}")
 870             sibling_descendents = descendents
 871
 872         if False and ancestry:
 873             self.debugprint(lead[:-1] + " │")
 874
 875         if visitor_fn:
 876             visitor_fn(
 877                 node, ancestry, sibling_descendents, debugprint=self.debugprint
 878             )
 879
 880     def debugprint(self, s, **kwargs):
 881         if self._debug:
 882             print(s, file=sys.stderr, **kwargs)
 883
 884
 885 # [ RUN MODES ] ###############################################################
 886
 887
 888 class MuttCommands:
 889     """
 890     Stupid class to interface writing out Mutt commands. This is quite a hack
 891     to deal with the fact that Mutt runs "push" commands in reverse order, so
 892     all of a sudden, things become very complicated when mixing with "real"
 893     commands.
 894
 895     Hence we keep two sets of commands, and one set of pushes. Commands are
 896     added to the first until a push is added, after which commands are added to
 897     the second set of commands.
 898
 899     On flush(), the first set is printed, followed by the pushes in reverse,
 900     and then the second set is printed. All 3 sets are then cleared.
 901     """
 902
 903     def __init__(self, out_f=sys.stdout, *, debug=False):
 904         self._cmd1, self._push, self._cmd2 = [], [], []
 905         self._out_f = out_f
 906         self._debug = debug
 907
 908     def cmd(self, s):
 909         self.debugprint(s)
 910         if self._push:
 911             self._cmd2.append(s)
 912         else:
 913             self._cmd1.append(s)
 914
 915     def push(self, s):
 916         s = s.replace('"', r"\"")
 917         s = f'push "{s}"'
 918         self.debugprint(s)
 919         self._push.insert(0, s)
 920
 921     def flush(self):
 922         print(
 923             "\n".join(self._cmd1 + self._push + self._cmd2), file=self._out_f
 924         )
 925         self._cmd1, self._push, self._cmd2 = [], [], []
 926
 927     def debugprint(self, s, **kwargs):
 928         if self._debug:
 929             print(s, file=sys.stderr, **kwargs)
 930
 931
 932 def do_setup(
 933     *,
 934     out_f=sys.stdout,
 935     temppath=None,
 936     tempdir=None,
 937     debug_commands=False,
 938 ):
 939     temppath = temppath or pathlib.Path(
 940         tempfile.mkstemp(prefix="muttmdwn-", dir=tempdir)[1]
 941     )
 942     cmds = MuttCommands(out_f, debug=debug_commands)
 943
 944     editor = f"{' '.join(sys.argv)} massage --write-commands-to {temppath}"
 945
 946     cmds.cmd('set my_editor="$editor"')
 947     cmds.cmd('set my_edit_headers="$edit_headers"')
 948     cmds.cmd(f'set editor="{editor}"')
 949     cmds.cmd("unset edit_headers")
 950     cmds.cmd(f"set my_mdwn_postprocess_cmd_file={temppath}")
 951     cmds.push("<first-entry><edit-file>")
 952     cmds.flush()
 953
 954
 955 def do_massage(
 956     draft_f,
 957     cmd_f,
 958     *,
 959     extensions=None,
 960     css_f=None,
 961     htmldump_f=None,
 962     converter=convert_markdown_to_html,
 963     related_to_html_only=True,
 964     only_build=False,
 965     max_other_attachments=20,
 966     tempdir=None,
 967     debug_commands=False,
 968     debug_walk=False,
 969 ):
 970     # Here's the big picture: we're being invoked as the editor on the email
 971     # draft, and whatever commands we write to the file given as cmdpath will
 972     # be run by the second source command in the macro definition.
 973
 974     # Let's start by cleaning up what the setup did (see above), i.e. we
 975     # restore the $editor and $edit_headers variables, and also unset the
 976     # variable used to identify the command file we're currently writing
 977     # to.
 978     cmds = MuttCommands(cmd_f, debug=debug_commands)
 979
 980     extensions = extensions.split(",") if extensions else []
 981     tree = converter(
 982         draft_f,
 983         css_f=css_f,
 984         htmldump_f=htmldump_f,
 985         related_to_html_only=related_to_html_only,
 986         tempdir=tempdir,
 987         extensions=extensions,
 988     )
 989
 990     mimetree = MIMETreeDFWalker(debug=debug_walk)
 991
 992     state = dict(pos=1, tags={}, parts=1)
 993
 994     def visitor_fn(item, ancestry, descendents, *, debugprint=None):
 995         """
 996         Visitor function called for every node (part) of the MIME tree,
 997         depth-first, and responsible for telling NeoMutt how to assemble
 998         the tree.
 999         """
1000         KILL_LINE = r"\Ca\Ck"
1001
1002         if isinstance(item, Part):
1003             # We've hit a leaf-node, i.e. an alternative or a related part
1004             # with actual content.
1005
1006             # Let's add the part
1007             if item.orig:
1008                 # The original source already exists in the NeoMutt tree, but
1009                 # the underlying file may have been modified, so we need to
1010                 # update the encoding, but that's it:
1011                 cmds.push("<first-entry>")
1012                 cmds.push("<update-encoding>")
1013
1014                 # We really just need to be able to assume that at this point,
1015                 # NeoMutt is at position 1, and that we've processed only this
1016                 # part so far. Nevermind about actual attachments, we can
1017                 # safely ignore those as they stay at the end.
1018                 assert state["pos"] == 1
1019                 assert state["parts"] == 1
1020             else:
1021                 # … whereas all other parts need to be added, and they're all
1022                 # considered to be temporary and inline:
1023                 cmds.push(f"<attach-file>{item.path}<enter>")
1024                 cmds.push("<toggle-unlink><toggle-disposition>")
1025
1026                 # This added a part at the end of the list of parts, and that's
1027                 # just how many parts we've seen so far, so it's position in
1028                 # the NeoMutt compose list is the count of parts
1029                 state["parts"] += 1
1030                 state["pos"] = state["parts"]
1031
1032             # If the item (including the original) comes with additional
1033             # information, then we might just as well update the NeoMutt
1034             # tree now:
1035             if item.cid:
1036                 cmds.push(f"<edit-content-id>{KILL_LINE}{item.cid}<enter>")
1037
1038             # Now for the biggest hack in this script, which is to handle
1039             # attachments, such as PDFs, that aren't related or alternatives.
1040             # The problem is that when we add an inline image, it always gets
1041             # appended to the list, i.e. inserted *after* other attachments.
1042             # Since we don't know the number of attachments, we also cannot
1043             # infer the postition of the new attachment. Therefore, we bubble
1044             # it all the way to the top, only to then move it down again:
1045             if state["pos"] > 1:  # skip for the first part
1046                 for i in range(max_other_attachments):
1047                     # could use any number here, but has to be larger than the
1048                     # number of possible attachments. The performance
1049                     # difference of using a high number is negligible.
1050                     # Bubble up the new part
1051                     cmds.push("<move-up>")
1052
1053                 # As we push the part to the right position in the list (i.e.
1054                 # the last of the subset of attachments this script added), we
1055                 # must handle the situation that subtrees are skipped by
1056                 # NeoMutt. Hence, the actual number of positions to move down
1057                 # is decremented by the number of descendents so far
1058                 # encountered.
1059                 for i in range(1, state["pos"] - len(descendents)):
1060                     cmds.push("<move-down>")
1061
1062         elif isinstance(item, Multipart):
1063             # This node has children, but we already visited them (see
1064             # above). The tags dictionary of State should contain a list of
1065             # their positions in the NeoMutt compose window, so iterate those
1066             # and tag the parts there:
1067             n_tags = len(state["tags"][item])
1068             for tag in state["tags"][item]:
1069                 cmds.push(f"<jump>{tag}<enter><tag-entry>")
1070
1071             if item.subtype == "alternative":
1072                 cmds.push("<group-alternatives>")
1073             elif item.subtype in ("relative", "related"):
1074                 cmds.push("<group-related>")
1075             elif item.subtype == "multilingual":
1076                 cmds.push("<group-multilingual>")
1077             else:
1078                 raise NotImplementedError(
1079                     f"Handling of multipart/{item.subtype} is not implemented"
1080                 )
1081
1082             state["pos"] -= n_tags - 1
1083             state["parts"] += 1
1084
1085         else:
1086             # We should never get here
1087             raise RuntimeError(f"Type {type(item)} is unexpected: {item}")
1088
1089         # If the item has a description, we might just as well add it
1090         if item.desc:
1091             cmds.push(f"<edit-description>{KILL_LINE}{item.desc}<enter>")
1092
1093         if ancestry:
1094             # If there's an ancestry, record the current (assumed) position in
1095             # the NeoMutt compose window as needed-to-tag by our direct parent
1096             # (i.e. the last item of the ancestry)
1097             state["tags"].setdefault(ancestry[-1], []).append(state["pos"])
1098
1099             lead = "│ " * (len(ancestry) + 1) + "* "
1100             debugprint(
1101                 f"{lead}ancestry={[a.subtype for a in ancestry]}\n"
1102                 f"{lead}descendents={[d.subtype for d in descendents]}\n"
1103                 f"{lead}children_positions={state['tags'][ancestry[-1]]}\n"
1104                 f"{lead}pos={state['pos']}, parts={state['parts']}"
1105             )
1106
1107     # -----------------
1108     # End of visitor_fn
1109
1110     # Let's walk the tree and visit every node with our fancy visitor
1111     # function
1112     mimetree.walk(tree, visitor_fn=visitor_fn)
1113
1114     if not only_build:
1115         cmds.push("<send-message>")
1116
1117     # Finally, cleanup. Since we're responsible for removing the temporary
1118     # file, how's this for a little hack?
1119     try:
1120         filename = cmd_f.name
1121     except AttributeError:
1122         filename = "pytest_internal_file"
1123     cmds.cmd(f"source 'rm -f {filename}|'")
1124     cmds.cmd('set editor="$my_editor"')
1125     cmds.cmd('set edit_headers="$my_edit_headers"')
1126     cmds.cmd("unset my_editor")
1127     cmds.cmd("unset my_edit_headers")
1128     cmds.cmd("unset my_mdwn_postprocess_cmd_file")
1129     cmds.flush()
1130
1131
1132 # [ CLI ENTRY ] ###############################################################
1133
1134 if __name__ == "__main__":
1135     args = parse_cli_args()
1136
1137     if args.mode is None:
1138         do_setup(
1139             tempdir=args.tempdir,
1140             debug_commands=args.debug_commands,
1141         )
1142
1143     elif args.mode == "massage":
1144         with (
1145             File(args.MAILDRAFT, "r+") as draft_f,
1146             File(args.cmdpath, "w") as cmd_f,
1147             File(args.css_file, "r") as css_f,
1148             File(args.dump_html, "w") as htmldump_f,
1149         ):
1150             do_massage(
1151                 draft_f,
1152                 cmd_f,
1153                 extensions=args.extensions,
1154                 css_f=css_f,
1155                 htmldump_f=htmldump_f,
1156                 related_to_html_only=args.related_to_html_only,
1157                 max_other_attachments=args.max_number_other_attachments,
1158                 only_build=args.only_build,
1159                 tempdir=args.tempdir,
1160                 debug_commands=args.debug_commands,
1161                 debug_walk=args.debug_walk,
1162             )
1163
1164
1165 # [ TESTS ] ###################################################################
1166
1167 try:
1168     import pytest
1169
1170     class Tests:
1171         @pytest.fixture
1172         def const1(self):
1173             return "Curvature Vest Usher Dividing+T#iceps Senior"
1174
1175         @pytest.fixture
1176         def const2(self):
1177             return "Habitant Celestial 2litzy Resurf/ce Headpiece Harmonics"
1178
1179         @pytest.fixture
1180         def fakepath(self):
1181             return pathlib.Path("/does/not/exist")
1182
1183         @pytest.fixture
1184         def fakepath2(self):
1185             return pathlib.Path("/does/not/exist/either")
1186
1187         # NOTE: tests using the capsys fixture must specify sys.stdout to the
1188         # functions they call, else old stdout is used and not captured
1189
1190         @pytest.mark.muttctrl
1191         def test_MuttCommands_cmd(self, const1, const2, capsys):
1192             "Assert order of commands"
1193             cmds = MuttCommands(out_f=sys.stdout)
1194             cmds.cmd(const1)
1195             cmds.cmd(const2)
1196             cmds.flush()
1197             captured = capsys.readouterr()
1198             assert captured.out == "\n".join((const1, const2, ""))
1199
1200         @pytest.mark.muttctrl
1201         def test_MuttCommands_push(self, const1, const2, capsys):
1202             "Assert reverse order of pushes"
1203             cmds = MuttCommands(out_f=sys.stdout)
1204             cmds.push(const1)
1205             cmds.push(const2)
1206             cmds.flush()
1207             captured = capsys.readouterr()
1208             assert (
1209                 captured.out
1210                 == ('"\npush "'.join(("", const2, const1, "")))[2:-6]
1211             )
1212
1213         @pytest.mark.muttctrl
1214         def test_MuttCommands_push_escape(self, const1, const2, capsys):
1215             cmds = MuttCommands(out_f=sys.stdout)
1216             cmds.push(f'"{const1}"')
1217             cmds.flush()
1218             captured = capsys.readouterr()
1219             assert f'"\\"{const1}\\""' in captured.out
1220
1221         @pytest.mark.muttctrl
1222         def test_MuttCommands_cmd_push_mixed(self, const1, const2, capsys):
1223             "Assert reverse order of pushes"
1224             cmds = MuttCommands(out_f=sys.stdout)
1225             lines = ["000", "001", "010", "011", "100", "101", "110", "111"]
1226             for i in range(2):
1227                 cmds.cmd(lines[4 * i + 0])
1228                 cmds.cmd(lines[4 * i + 1])
1229                 cmds.push(lines[4 * i + 2])
1230                 cmds.push(lines[4 * i + 3])
1231             cmds.flush()
1232
1233             captured = capsys.readouterr()
1234             lines_out = captured.out.splitlines()
1235             assert lines[0] in lines_out[0]
1236             assert lines[1] in lines_out[1]
1237             assert lines[7] in lines_out[2]
1238             assert lines[6] in lines_out[3]
1239             assert lines[3] in lines_out[4]
1240             assert lines[2] in lines_out[5]
1241             assert lines[4] in lines_out[6]
1242             assert lines[5] in lines_out[7]
1243
1244         @pytest.fixture
1245         def mime_tree_related_to_alternative(self):
1246             return Multipart(
1247                 "relative",
1248                 children=[
1249                     Multipart(
1250                         "alternative",
1251                         children=[
1252                             Part(
1253                                 "text",
1254                                 "plain",
1255                                 "part.txt",
1256                                 desc="Plain",
1257                                 orig=True,
1258                             ),
1259                             Part("text", "html", "part.html", desc="HTML"),
1260                         ],
1261                         desc="Alternative",
1262                     ),
1263                     Part(
1264                         "text", "png", "logo.png", cid="logo.png", desc="Logo"
1265                     ),
1266                 ],
1267                 desc="Related",
1268             )
1269
1270         @pytest.fixture
1271         def mime_tree_related_to_html(self):
1272             return Multipart(
1273                 "alternative",
1274                 children=[
1275                     Part(
1276                         "text",
1277                         "plain",
1278                         "part.txt",
1279                         desc="Plain",
1280                         orig=True,
1281                     ),
1282                     Multipart(
1283                         "relative",
1284                         children=[
1285                             Part("text", "html", "part.html", desc="HTML"),
1286                             Part(
1287                                 "text",
1288                                 "png",
1289                                 "logo.png",
1290                                 cid="logo.png",
1291                                 desc="Logo",
1292                             ),
1293                         ],
1294                         desc="Related",
1295                     ),
1296                 ],
1297                 desc="Alternative",
1298             )
1299
1300         @pytest.fixture
1301         def mime_tree_nested(self):
1302             return Multipart(
1303                 "relative",
1304                 children=[
1305                     Multipart(
1306                         "alternative",
1307                         children=[
1308                             Part(
1309                                 "text",
1310                                 "plain",
1311                                 "part.txt",
1312                                 desc="Plain",
1313                                 orig=True,
1314                             ),
1315                             Multipart(
1316                                 "alternative",
1317                                 children=[
1318                                     Part(
1319                                         "text",
1320                                         "plain",
1321                                         "part.txt",
1322                                         desc="Nested plain",
1323                                     ),
1324                                     Part(
1325                                         "text",
1326                                         "html",
1327                                         "part.html",
1328                                         desc="Nested HTML",
1329                                     ),
1330                                 ],
1331                                 desc="Nested alternative",
1332                             ),
1333                         ],
1334                         desc="Alternative",
1335                     ),
1336                     Part(
1337                         "text",
1338                         "png",
1339                         "logo.png",
1340                         cid="logo.png",
1341                         desc="Logo",
1342                     ),
1343                 ],
1344                 desc="Related",
1345             )
1346
1347         @pytest.mark.treewalk
1348         def test_MIMETreeDFWalker_depth_first_walk(
1349             self, mime_tree_related_to_alternative
1350         ):
1351             mimetree = MIMETreeDFWalker()
1352
1353             items = []
1354
1355             def visitor_fn(item, ancestry, descendents, debugprint):
1356                 items.append((item, len(ancestry), len(descendents)))
1357
1358             mimetree.walk(
1359                 mime_tree_related_to_alternative, visitor_fn=visitor_fn
1360             )
1361             assert len(items) == 5
1362             assert items[0][0].subtype == "plain"
1363             assert items[0][1] == 2
1364             assert items[0][2] == 0
1365             assert items[1][0].subtype == "html"
1366             assert items[1][1] == 2
1367             assert items[1][2] == 0
1368             assert items[2][0].subtype == "alternative"
1369             assert items[2][1] == 1
1370             assert items[2][2] == 2
1371             assert items[3][0].subtype == "png"
1372             assert items[3][1] == 1
1373             assert items[3][2] == 2
1374             assert items[4][0].subtype == "relative"
1375             assert items[4][1] == 0
1376             assert items[4][2] == 4
1377
1378         @pytest.mark.treewalk
1379         def test_MIMETreeDFWalker_list_to_mixed(self, const1):
1380             mimetree = MIMETreeDFWalker()
1381             items = []
1382
1383             def visitor_fn(item, ancestry, descendents, debugprint):
1384                 items.append(item)
1385
1386             p = Part("text", "plain", const1)
1387             mimetree.walk([p], visitor_fn=visitor_fn)
1388             assert items[-1].subtype == "plain"
1389             mimetree.walk([p, p], visitor_fn=visitor_fn)
1390             assert items[-1].subtype == "mixed"
1391
1392         @pytest.mark.treewalk
1393         def test_MIMETreeDFWalker_visitor_in_constructor(
1394             self, mime_tree_related_to_alternative
1395         ):
1396             items = []
1397
1398             def visitor_fn(item, ancestry, descendents, debugprint):
1399                 items.append(item)
1400
1401             mimetree = MIMETreeDFWalker(visitor_fn=visitor_fn)
1402             mimetree.walk(mime_tree_related_to_alternative)
1403             assert len(items) == 5
1404
1405         @pytest.fixture
1406         def string_io(self, const1, text=None):
1407             return StringIO(text or const1)
1408
1409         @pytest.mark.massage
1410         def test_do_massage_basic(self):
1411             def converter(draft_f, **kwargs):
1412                 return Part("text", "plain", draft_f.path, orig=True)
1413
1414             with File() as draft_f, File() as cmd_f:
1415                 do_massage(
1416                     draft_f=draft_f,
1417                     cmd_f=cmd_f,
1418                     converter=converter,
1419                 )
1420                 lines = cmd_f.read().splitlines()
1421
1422             assert "send-message" in lines.pop(0)
1423             assert "update-encoding" in lines.pop(0)
1424             assert "first-entry" in lines.pop(0)
1425             assert "source 'rm -f " in lines.pop(0)
1426             assert '="$my_editor"' in lines.pop(0)
1427             assert '="$my_edit_headers"' in lines.pop(0)
1428             assert "unset my_editor" == lines.pop(0)
1429             assert "unset my_edit_headers" == lines.pop(0)
1430             assert "unset my_mdwn_postprocess_cmd_file" == lines.pop(0)
1431
1432         @pytest.mark.massage
1433         def test_do_massage_fulltree(self, mime_tree_related_to_alternative):
1434             def converter(draft_f, **kwargs):
1435                 return mime_tree_related_to_alternative
1436
1437             max_attachments = 5
1438
1439             with File() as draft_f, File() as cmd_f:
1440                 do_massage(
1441                     draft_f=draft_f,
1442                     cmd_f=cmd_f,
1443                     max_other_attachments=max_attachments,
1444                     converter=converter,
1445                 )
1446                 lines = cmd_f.read().splitlines()[:-6]
1447
1448             assert "first-entry" in lines.pop()
1449             assert "update-encoding" in lines.pop()
1450             assert "Plain" in lines.pop()
1451             assert "part.html" in lines.pop()
1452             assert "toggle-unlink" in lines.pop()
1453             for i in range(max_attachments):
1454                 assert "move-up" in lines.pop()
1455             assert "move-down" in lines.pop()
1456             assert "HTML" in lines.pop()
1457             assert "jump>1" in lines.pop()
1458             assert "jump>2" in lines.pop()
1459             assert "group-alternatives" in lines.pop()
1460             assert "Alternative" in lines.pop()
1461             assert "logo.png" in lines.pop()
1462             assert "toggle-unlink" in lines.pop()
1463             assert "content-id" in lines.pop()
1464             for i in range(max_attachments):
1465                 assert "move-up" in lines.pop()
1466             assert "move-down" in lines.pop()
1467             assert "Logo" in lines.pop()
1468             assert "jump>1" in lines.pop()
1469             assert "jump>4" in lines.pop()
1470             assert "group-related" in lines.pop()
1471             assert "Related" in lines.pop()
1472             assert "send-message" in lines.pop()
1473             assert len(lines) == 0
1474
1475         @pytest.mark.massage
1476         def test_mime_tree_relative_within_alternative(
1477             self, mime_tree_related_to_html
1478         ):
1479             def converter(draft_f, **kwargs):
1480                 return mime_tree_related_to_html
1481
1482             with File() as draft_f, File() as cmd_f:
1483                 do_massage(
1484                     draft_f=draft_f,
1485                     cmd_f=cmd_f,
1486                     converter=converter,
1487                 )
1488                 lines = cmd_f.read().splitlines()[:-6]
1489
1490             assert "first-entry" in lines.pop()
1491             assert "update-encoding" in lines.pop()
1492             assert "Plain" in lines.pop()
1493             assert "part.html" in lines.pop()
1494             assert "toggle-unlink" in lines.pop()
1495             assert "move-up" in lines.pop()
1496             while True:
1497                 top = lines.pop()
1498                 if "move-up" not in top:
1499                     break
1500             assert "move-down" in top
1501             assert "HTML" in lines.pop()
1502             assert "logo.png" in lines.pop()
1503             assert "toggle-unlink" in lines.pop()
1504             assert "content-id" in lines.pop()
1505             assert "move-up" in lines.pop()
1506             while True:
1507                 top = lines.pop()
1508                 if "move-up" not in top:
1509                     break
1510             assert "move-down" in top
1511             assert "move-down" in lines.pop()
1512             assert "Logo" in lines.pop()
1513             assert "jump>2" in lines.pop()
1514             assert "jump>3" in lines.pop()
1515             assert "group-related" in lines.pop()
1516             assert "Related" in lines.pop()
1517             assert "jump>1" in lines.pop()
1518             assert "jump>2" in lines.pop()
1519             assert "group-alternative" in lines.pop()
1520             assert "Alternative" in lines.pop()
1521             assert "send-message" in lines.pop()
1522             assert len(lines) == 0
1523
1524         @pytest.mark.massage
1525         def test_mime_tree_nested_trees_does_not_break_positioning(
1526             self, mime_tree_nested
1527         ):
1528             def converter(draft_f, **kwargs):
1529                 return mime_tree_nested
1530
1531             with File() as draft_f, File() as cmd_f:
1532                 do_massage(
1533                     draft_f=draft_f,
1534                     cmd_f=cmd_f,
1535                     converter=converter,
1536                 )
1537                 lines = cmd_f.read().splitlines()
1538
1539             while "logo.png" not in lines.pop():
1540                 pass
1541             lines.pop()
1542             assert "content-id" in lines.pop()
1543             assert "move-up" in lines.pop()
1544             while True:
1545                 top = lines.pop()
1546                 if "move-up" not in top:
1547                     break
1548             assert "move-down" in top
1549             # Due to the nested trees, the number of descendents of the sibling
1550             # actually needs to be considered, not just the nieces. So to move
1551             # from position 1 to position 6, it only needs one <move-down>
1552             # because that jumps over the entire sibling tree. Thus what
1553             # follows next must not be another <move-down>
1554             assert "Logo" in lines.pop()
1555
1556         @pytest.mark.converter
1557         def test_converter_tree_basic(self, fakepath, const1, fakefilefactory):
1558             with fakefilefactory(fakepath, content=const1) as draft_f:
1559                 tree = convert_markdown_to_html(
1560                     draft_f, filefactory=fakefilefactory
1561                 )
1562
1563             assert tree.subtype == "alternative"
1564             assert len(tree.children) == 2
1565             assert tree.children[0].subtype == "plain"
1566             assert tree.children[0].path == draft_f.path
1567             assert tree.children[0].orig
1568             assert tree.children[1].subtype == "html"
1569             assert tree.children[1].path == fakepath.with_suffix(".html")
1570
1571         @pytest.mark.converter
1572         def test_converter_writes(
1573             self, fakepath, fakefilefactory, const1, monkeypatch
1574         ):
1575             with fakefilefactory(fakepath, content=const1) as draft_f:
1576                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
1577
1578             html = fakefilefactory.pop()
1579             assert fakepath.with_suffix(".html") == html[0]
1580             assert const1 in html[1].read()
1581             text = fakefilefactory.pop()
1582             assert fakepath == text[0]
1583             assert const1 == text[1].read()
1584
1585         @pytest.mark.imgproc
1586         def test_markdown_inline_image_processor(self):
1587             imgpath1 = "file:/path/to/image.png"
1588             imgpath2 = "file:///path/to/image.png?url=params"
1589             imgpath3 = "/path/to/image.png"
1590             text = f"""![inline local image]({imgpath1})
1591                        ![image inlined
1592                          with newline]({imgpath2})
1593                        ![image local path]({imgpath3})"""
1594             text, html, images, mdwn = markdown_with_inline_image_support(text)
1595
1596             # local paths have been normalised to URLs:
1597             imgpath3 = f"file://{imgpath3}"
1598
1599             assert 'src="cid:' in html
1600             assert "](cid:" in text
1601             assert len(images) == 3
1602             assert imgpath1 in images
1603             assert imgpath2 in images
1604             assert imgpath3 in images
1605             assert images[imgpath1].cid != images[imgpath2].cid
1606             assert images[imgpath1].cid != images[imgpath3].cid
1607             assert images[imgpath2].cid != images[imgpath3].cid
1608
1609         @pytest.mark.imgproc
1610         def test_markdown_inline_image_processor_title_to_desc(self, const1):
1611             imgpath = "file:///path/to/image.png"
1612             text = f'![inline local image]({imgpath} "{const1}")'
1613             text, html, images, mdwn = markdown_with_inline_image_support(text)
1614             assert images[imgpath].desc == const1
1615
1616         @pytest.mark.imgproc
1617         def test_markdown_inline_image_processor_alt_to_desc(self, const1):
1618             imgpath = "file:///path/to/image.png"
1619             text = f"![{const1}]({imgpath})"
1620             text, html, images, mdwn = markdown_with_inline_image_support(text)
1621             assert images[imgpath].desc == const1
1622
1623         @pytest.mark.imgproc
1624         def test_markdown_inline_image_processor_title_over_alt_desc(
1625             self, const1, const2
1626         ):
1627             imgpath = "file:///path/to/image.png"
1628             text = f'![{const1}]({imgpath} "{const2}")'
1629             text, html, images, mdwn = markdown_with_inline_image_support(text)
1630             assert images[imgpath].desc == const2
1631
1632         @pytest.mark.imgproc
1633         def test_markdown_inline_image_not_external(self):
1634             imgpath = "https://path/to/image.png"
1635             text = f"![inline image]({imgpath})"
1636             text, html, images, mdwn = markdown_with_inline_image_support(text)
1637
1638             assert 'src="cid:' not in html
1639             assert "](cid:" not in text
1640             assert len(images) == 0
1641
1642         @pytest.mark.imgproc
1643         def test_markdown_inline_image_local_file(self):
1644             imgpath = "/path/to/image.png"
1645             text = f"![inline image]({imgpath})"
1646             text, html, images, mdwn = markdown_with_inline_image_support(text)
1647
1648             for k, v in images.items():
1649                 assert k == f"file://{imgpath}"
1650                 break
1651
1652         @pytest.mark.imgproc
1653         def test_markdown_inline_image_expanduser(self):
1654             imgpath = pathlib.Path("~/image.png")
1655             text = f"![inline image]({imgpath})"
1656             text, html, images, mdwn = markdown_with_inline_image_support(text)
1657
1658             for k, v in images.items():
1659                 assert k == f"file://{imgpath.expanduser()}"
1660                 break
1661
1662         @pytest.fixture
1663         def test_png(self):
1664             return (
1665                 "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAE"
1666                 "AAAABCAAAAAA6fptVAAAACklEQVQI12P4DwABAQEAG7buVgAA"
1667             )
1668
1669         @pytest.mark.imgproc
1670         def test_markdown_inline_image_processor_base64(self, test_png):
1671             text = f"![1px white inlined]({test_png})"
1672             text, html, images, mdwn = markdown_with_inline_image_support(text)
1673
1674             assert 'src="cid:' in html
1675             assert "](cid:" in text
1676             assert len(images) == 1
1677             assert test_png in images
1678
1679         @pytest.mark.converter
1680         def test_converter_tree_inline_image_base64(
1681             self, test_png, fakefilefactory
1682         ):
1683             text = f"![inline base64 image]({test_png})"
1684             with fakefilefactory(content=text) as draft_f:
1685                 tree = convert_markdown_to_html(
1686                     draft_f,
1687                     filefactory=fakefilefactory,
1688                     related_to_html_only=False,
1689                 )
1690             assert tree.subtype == "relative"
1691             assert tree.children[0].subtype == "alternative"
1692             assert tree.children[1].subtype == "png"
1693             written = fakefilefactory.pop()
1694             assert tree.children[1].path == written[0]
1695             assert b"PNG" in written[1].read()
1696
1697         @pytest.mark.converter
1698         def test_converter_tree_inline_image_base64_related_to_html(
1699             self, test_png, fakefilefactory
1700         ):
1701             text = f"![inline base64 image]({test_png})"
1702             with fakefilefactory(content=text) as draft_f:
1703                 tree = convert_markdown_to_html(
1704                     draft_f,
1705                     filefactory=fakefilefactory,
1706                     related_to_html_only=True,
1707                 )
1708             assert tree.subtype == "alternative"
1709             assert tree.children[1].subtype == "relative"
1710             assert tree.children[1].children[1].subtype == "png"
1711             written = fakefilefactory.pop()
1712             assert tree.children[1].children[1].path == written[0]
1713             assert b"PNG" in written[1].read()
1714
1715         @pytest.mark.converter
1716         def test_converter_tree_inline_image_cid(
1717             self, const1, fakefilefactory
1718         ):
1719             text = f"![inline base64 image](cid:{const1})"
1720             with fakefilefactory(content=text) as draft_f:
1721                 tree = convert_markdown_to_html(
1722                     draft_f,
1723                     filefactory=fakefilefactory,
1724                     related_to_html_only=False,
1725                 )
1726             assert len(tree.children) == 2
1727             assert tree.children[0].cid != const1
1728             assert tree.children[0].type != "image"
1729             assert tree.children[1].cid != const1
1730             assert tree.children[1].type != "image"
1731
1732         @pytest.fixture
1733         def fakefilefactory(self):
1734             return FakeFileFactory()
1735
1736         @pytest.mark.imgcoll
1737         def test_inline_image_collection(
1738             self, test_png, const1, const2, fakefilefactory
1739         ):
1740             test_images = {test_png: InlineImageInfo(cid=const1, desc=const2)}
1741             relparts = collect_inline_images(
1742                 test_images, filefactory=fakefilefactory
1743             )
1744
1745             written = fakefilefactory.pop()
1746             assert b"PNG" in written[1].read()
1747
1748             assert relparts[0].subtype == "png"
1749             assert relparts[0].path == written[0]
1750             assert relparts[0].cid == const1
1751             assert const2 in relparts[0].desc
1752
1753         if _PYNLINER:
1754
1755             @pytest.mark.styling
1756             def test_apply_stylesheet(self):
1757                 html = "<p>Hello, world!</p>"
1758                 css = "p { color:red }"
1759                 out = apply_styling(html, css)
1760                 assert 'p style="color' in out
1761
1762             @pytest.mark.styling
1763             def test_apply_no_stylesheet(self, const1):
1764                 out = apply_styling(const1, None)
1765
1766             @pytest.mark.massage
1767             @pytest.mark.styling
1768             def test_massage_styling_to_converter(self):
1769                 css = "p { color:red }"
1770                 css_applied = []
1771
1772                 def converter(draft_f, css_f, **kwargs):
1773                     css = css_f.read()
1774                     css_applied.append(css)
1775                     return Part("text", "plain", draft_f.path, orig=True)
1776
1777                 with (
1778                     File() as draft_f,
1779                     File(mode="w") as cmd_f,
1780                     File(content=css) as css_f,
1781                 ):
1782                     do_massage(
1783                         draft_f=draft_f,
1784                         cmd_f=cmd_f,
1785                         css_f=css_f,
1786                         converter=converter,
1787                     )
1788                 assert css_applied[0] == css
1789
1790             @pytest.mark.converter
1791             @pytest.mark.styling
1792             def test_converter_apply_styles(
1793                 self, const1, monkeypatch, fakepath, fakefilefactory
1794             ):
1795                 css = "p { color:red }"
1796                 with (
1797                     monkeypatch.context() as m,
1798                     fakefilefactory(fakepath, content=const1) as draft_f,
1799                     fakefilefactory(content=css) as css_f,
1800                 ):
1801                     m.setattr(
1802                         markdown.Markdown,
1803                         "convert",
1804                         lambda s, t: f"<p>{t}</p>",
1805                     )
1806                     convert_markdown_to_html(
1807                         draft_f, css_f=css_f, filefactory=fakefilefactory
1808                     )
1809                 assert re.search(
1810                     r"color:.*red",
1811                     fakefilefactory[fakepath.with_suffix(".html")].read(),
1812                 )
1813
1814         if _PYGMENTS_CSS:
1815
1816             @pytest.mark.styling
1817             def test_apply_stylesheet_pygments(self):
1818                 html = (
1819                     f'<div class="{_CODEHILITE_CLASS}">'
1820                     "<pre>def foo():\n    return</pre></div>"
1821                 )
1822                 out = apply_styling(html, _PYGMENTS_CSS)
1823                 assert f'{_CODEHILITE_CLASS}" style="' in out
1824
1825         @pytest.mark.sig
1826         def test_signature_extraction_no_signature(self, const1):
1827             assert (const1, None, None) == extract_signature(const1)
1828
1829         @pytest.mark.sig
1830         def test_signature_extraction_just_text(self, const1, const2):
1831             origtext, textsig, htmlsig = extract_signature(
1832                 f"{const1}{EMAIL_SIG_SEP}{const2}"
1833             )
1834             assert origtext == const1
1835             assert textsig == const2
1836             assert htmlsig is None
1837
1838         @pytest.mark.sig
1839         def test_signature_extraction_html(
1840             self, fakepath, fakefilefactory, const1, const2
1841         ):
1842             sigconst = "HTML signature from {path} but as a string"
1843             sig = f'<div id="signature">{sigconst.format(path=fakepath)}</div>'
1844
1845             sig_f = fakefilefactory(fakepath, content=sig)
1846
1847             origtext, textsig, htmlsig = extract_signature(
1848                 f"{const1}{EMAIL_SIG_SEP}{HTML_SIG_MARKER} {fakepath}\n{const2}",
1849                 filefactory=fakefilefactory,
1850             )
1851             assert origtext == const1
1852             assert textsig == const2
1853             assert htmlsig == sigconst.format(path=fakepath)
1854
1855         @pytest.mark.sig
1856         def test_signature_extraction_file_not_found(self, fakepath, const1):
1857             with pytest.raises(FileNotFoundError):
1858                 origtext, textsig, htmlsig = extract_signature(
1859                     f"{const1}{EMAIL_SIG_SEP}{HTML_SIG_MARKER}{fakepath}\n{const1}"
1860                 )
1861
1862         @pytest.mark.imgproc
1863         def test_image_registry(self, const1):
1864             reg = ImageRegistry()
1865             cid = reg.register(const1)
1866             assert "@" in cid
1867             assert not cid.startswith("<")
1868             assert not cid.endswith(">")
1869             assert const1 in reg
1870
1871         @pytest.mark.imgproc
1872         def test_image_registry_file_uri(self, const1):
1873             reg = ImageRegistry()
1874             reg.register("/some/path")
1875             for path in reg:
1876                 assert path.startswith("file://")
1877                 break
1878
1879         @pytest.mark.converter
1880         @pytest.mark.sig
1881         def test_converter_signature_handling(
1882             self, fakepath, fakefilefactory, monkeypatch
1883         ):
1884             mailparts = (
1885                 "This is the mail body\n",
1886                 f"{EMAIL_SIG_SEP}",
1887                 "This is a plain-text signature only",
1888             )
1889
1890             with (
1891                 fakefilefactory(
1892                     fakepath, content="".join(mailparts)
1893                 ) as draft_f,
1894                 monkeypatch.context() as m,
1895             ):
1896                 m.setattr(markdown.Markdown, "convert", lambda s, t: t)
1897                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
1898
1899             soup = bs4.BeautifulSoup(
1900                 fakefilefactory[fakepath.with_suffix(".html")].read(),
1901                 "html.parser",
1902             )
1903             body = soup.body.contents
1904
1905             assert mailparts[0] in body.pop(0)
1906
1907             sig = soup.select_one("#signature")
1908             assert sig == body.pop(0)
1909
1910             sep = sig.select_one("span.sig_separator")
1911             assert sep == sig.contents[0]
1912             assert f"\n{sep.text}\n" == EMAIL_SIG_SEP
1913
1914             assert mailparts[2] in sig.contents[1]
1915
1916         @pytest.mark.converter
1917         @pytest.mark.sig
1918         def test_converter_signature_handling_htmlsig(
1919             self, fakepath, fakepath2, fakefilefactory, monkeypatch
1920         ):
1921             mailparts = (
1922                 "This is the mail body",
1923                 f"{EMAIL_SIG_SEP}",
1924                 f"{HTML_SIG_MARKER}{fakepath2}\n",
1925                 "This is the plain-text version",
1926             )
1927             htmlsig = "HTML Signature from {path} but as a string"
1928             html = f'<div id="signature"><p>{htmlsig.format(path=fakepath2)}</p></div>'
1929
1930             sig_f = fakefilefactory(fakepath2, content=html)
1931
1932             def mdwn_fn(t):
1933                 return t.upper()
1934
1935             with (
1936                 fakefilefactory(
1937                     fakepath, content="".join(mailparts)
1938                 ) as draft_f,
1939                 monkeypatch.context() as m,
1940             ):
1941                 m.setattr(
1942                     markdown.Markdown, "convert", lambda s, t: mdwn_fn(t)
1943                 )
1944                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
1945
1946             soup = bs4.BeautifulSoup(
1947                 fakefilefactory[fakepath.with_suffix(".html")].read(),
1948                 "html.parser",
1949             )
1950             sig = soup.select_one("#signature")
1951             sig.span.extract()
1952
1953             assert HTML_SIG_MARKER not in sig.text
1954             assert htmlsig.format(path=fakepath2) == sig.text.strip()
1955
1956             plaintext = fakefilefactory[fakepath].read()
1957             assert plaintext.endswith(EMAIL_SIG_SEP + mailparts[-1])
1958
1959         @pytest.mark.converter
1960         @pytest.mark.sig
1961         def test_converter_signature_handling_htmlsig_with_image(
1962             self, fakepath, fakepath2, fakefilefactory, monkeypatch, test_png
1963         ):
1964             mailparts = (
1965                 "This is the mail body",
1966                 f"{EMAIL_SIG_SEP}",
1967                 f"{HTML_SIG_MARKER}{fakepath2}\n",
1968                 "This is the plain-text version",
1969             )
1970             htmlsig = (
1971                 "HTML Signature from {path} with image\n"
1972                 f'<img src="{test_png}">\n'
1973             )
1974             html = (
1975                 f'<div id="signature">{htmlsig.format(path=fakepath2)}</div>'
1976             )
1977
1978             sig_f = fakefilefactory(fakepath2, content=html)
1979
1980             def mdwn_fn(t):
1981                 return t.upper()
1982
1983             with (
1984                 fakefilefactory(
1985                     fakepath, content="".join(mailparts)
1986                 ) as draft_f,
1987                 monkeypatch.context() as m,
1988             ):
1989                 m.setattr(
1990                     markdown.Markdown, "convert", lambda s, t: mdwn_fn(t)
1991                 )
1992                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
1993
1994             assert fakefilefactory.pop()[0].suffix == ".png"
1995
1996             soup = bs4.BeautifulSoup(
1997                 fakefilefactory[fakepath.with_suffix(".html")].read(),
1998                 "html.parser",
1999             )
2000             assert soup.img.attrs["src"].startswith("cid:")
2001
2002         @pytest.mark.converter
2003         @pytest.mark.sig
2004         def test_converter_signature_handling_textsig_with_image(
2005             self, fakepath, fakefilefactory, test_png
2006         ):
2007             mailparts = (
2008                 "This is the mail body",
2009                 f"{EMAIL_SIG_SEP}",
2010                 "This is the plain-text version with image\n",
2011                 f"![Inline]({test_png})",
2012             )
2013             with (
2014                 fakefilefactory(
2015                     fakepath, content="".join(mailparts)
2016                 ) as draft_f,
2017             ):
2018                 tree = convert_markdown_to_html(
2019                     draft_f, filefactory=fakefilefactory
2020                 )
2021
2022             assert tree.subtype == "relative"
2023             assert tree.children[0].subtype == "alternative"
2024             assert tree.children[1].subtype == "png"
2025             written = fakefilefactory.pop()
2026             assert tree.children[1].path == written[0]
2027             assert written[1].read() == request.urlopen(test_png).read()
2028
2029         @pytest.mark.converter
2030         def test_converter_attribution_to_admonition(
2031             self, fakepath, fakefilefactory
2032         ):
2033             mailparts = (
2034                 "Regarding whatever",
2035                 "> blockquote line1",
2036                 "> blockquote line2",
2037                 "> ",
2038                 "> new para with **bold** text",
2039             )
2040             with fakefilefactory(
2041                 fakepath, content="\n".join(mailparts)
2042             ) as draft_f:
2043                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
2044
2045             soup = bs4.BeautifulSoup(
2046                 fakefilefactory[fakepath.with_suffix(".html")].read(),
2047                 "html.parser",
2048             )
2049             quote = soup.select_one("div.admonition.quote")
2050             assert quote
2051             assert (
2052                 soup.select_one("p.admonition-title").extract().text.strip()
2053                 == mailparts[0]
2054             )
2055
2056             p = quote.p.extract()
2057             assert p.text.strip() == "\n".join(p[2:] for p in mailparts[1:3])
2058
2059             p = quote.p.extract()
2060             assert p.contents[1].name == "strong"
2061
2062         @pytest.mark.converter
2063         def test_converter_attribution_to_admonition_with_blockquote(
2064             self, fakepath, fakefilefactory
2065         ):
2066             mailparts = (
2067                 "Regarding whatever",
2068                 "> blockquote line1",
2069                 "> blockquote line2",
2070                 "> ",
2071                 "> new para with **bold** text",
2072             )
2073             with fakefilefactory(
2074                 fakepath, content="\n".join(mailparts)
2075             ) as draft_f:
2076                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
2077
2078             soup = bs4.BeautifulSoup(
2079                 fakefilefactory[fakepath.with_suffix(".html")].read(),
2080                 "html.parser",
2081             )
2082             quote = soup.select_one("div.admonition.quote")
2083             assert quote.blockquote
2084
2085         @pytest.mark.converter
2086         def test_converter_attribution_to_admonition_multiple(
2087             self, fakepath, fakefilefactory
2088         ):
2089             mailparts = (
2090                 "Regarding whatever",
2091                 "> blockquote line1",
2092                 "> blockquote line2",
2093                 "",
2094                 "Normal text",
2095                 "",
2096                 "> continued emailquote",
2097                 "",
2098                 "Another email-quote",
2099                 "> something",
2100             )
2101             with fakefilefactory(
2102                 fakepath, content="\n".join(mailparts)
2103             ) as draft_f:
2104                 convert_markdown_to_html(draft_f, filefactory=fakefilefactory)
2105
2106             soup = bs4.BeautifulSoup(
2107                 fakefilefactory[fakepath.with_suffix(".html")].read(),
2108                 "html.parser",
2109             )
2110             quote = soup.select_one("div.admonition.quote.continued").extract()
2111             assert quote
2112             assert (
2113                 quote.select_one("p.admonition-title").extract().text.strip()
2114                 == mailparts[0]
2115             )
2116
2117             p = quote.p.extract()
2118             assert p
2119
2120             quote = soup.select_one("div.admonition.quote.continued").extract()
2121             assert quote
2122             assert (
2123                 quote.select_one("p.admonition-title").extract().text.strip()
2124                 == mailparts[-2]
2125             )
2126
2127         @pytest.mark.converter
2128         def test_converter_format_flowed_with_nl2br(
2129             self, fakepath, fakefilefactory
2130         ):
2131             mailparts = (
2132                 "This is format=flowed text ",
2133                 "with spaces at the end ",
2134                 "and there ought be no newlines.",
2135                 "",
2136                 "[link](https://example.org) ",
2137                 "and text.",
2138                 "",
2139                 "[link text ",
2140                 "broken up](https://example.org).",
2141                 "",
2142                 "This is on a new line with a hard break  ",
2143                 "due to the double space",
2144             )
2145             with fakefilefactory(
2146                 fakepath, content="\n".join(mailparts)
2147             ) as draft_f:
2148                 convert_markdown_to_html(
2149                     draft_f, extensions=["nl2br"], filefactory=fakefilefactory
2150                 )
2151
2152             soup = bs4.BeautifulSoup(
2153                 fakefilefactory[fakepath.with_suffix(".html")].read(),
2154                 "html.parser",
2155             )
2156             import ipdb
2157
2158             p = soup.p.extract().text
2159             assert "".join(mailparts[0:3]) == p
2160             p = ''.join(map(str, soup.p.extract().contents))
2161             assert p == '<a href="https://example.org">link</a> and text.'
2162             p = ''.join(map(str, soup.p.extract().contents))
2163             assert (
2164                 p == '<a href="https://example.org">link text broken up</a>.'
2165             )
2166
2167         @pytest.mark.fileio
2168         def test_file_class_contextmanager(self, const1, monkeypatch):
2169             state = dict(o=False, c=False)
2170
2171             def fn(t):
2172                 state[t] = True
2173
2174             with monkeypatch.context() as m:
2175                 m.setattr(File, "open", lambda s: fn("o"))
2176                 m.setattr(File, "close", lambda s: fn("c"))
2177                 with File() as f:
2178                     assert state["o"]
2179                     assert not state["c"]
2180             assert state["c"]
2181
2182         @pytest.mark.fileio
2183         def test_file_class_no_path(self, const1):
2184             with File(mode="w+") as f:
2185                 f.write(const1, cache=False)
2186                 assert f.read(cache=False) == const1
2187
2188         @pytest.mark.fileio
2189         def test_file_class_path(self, const1, tmp_path):
2190             with File(tmp_path / "file", mode="w+") as f:
2191                 f.write(const1, cache=False)
2192                 assert f.read(cache=False) == const1
2193
2194         @pytest.mark.fileio
2195         def test_file_class_path_no_exists(self, fakepath):
2196             with pytest.raises(FileNotFoundError):
2197                 File(fakepath, mode="r").open()
2198
2199         @pytest.mark.fileio
2200         def test_file_class_cache(self, tmp_path, const1, const2):
2201             path = tmp_path / "file"
2202             file = File(path, mode="w+")
2203             with file as f:
2204                 f.write(const1, cache=True)
2205             with open(path, mode="w") as f:
2206                 f.write(const2)
2207             with file as f:
2208                 assert f.read(cache=True) == const1
2209
2210         @pytest.mark.fileio
2211         def test_file_class_cache_init(self, const1):
2212             file = File(path=None, mode="r", content=const1)
2213             with file as f:
2214                 assert f.read() == const1
2215
2216         @pytest.mark.fileio
2217         def test_file_class_content_or_path(self, fakepath, const1):
2218             with pytest.raises(RuntimeError):
2219                 file = File(path=fakepath, content=const1)
2220
2221         @pytest.mark.fileio
2222         def test_file_class_content_needs_read(self, const1):
2223             with pytest.raises(RuntimeError):
2224                 file = File(mode="w", content=const1)
2225
2226         @pytest.mark.fileio
2227         def test_file_class_write_persists_close(self, const1):
2228             f = File(mode="w+")
2229             with f:
2230                 f.write(const1)
2231             with f:
2232                 assert f.read() == const1
2233
2234         @pytest.mark.fileio
2235         def test_file_class_write_resets_read_cache(self, const1, const2):
2236             with File(mode="w+", content=const1) as f:
2237                 assert f.read() == const1
2238                 f.write(const2)
2239                 assert f.read() == const2
2240
2241         @pytest.mark.fileio
2242         def test_file_factory(self):
2243             fact = FileFactory()
2244             f = fact()
2245             assert isinstance(f, File)
2246             assert len(fact) == 1
2247             assert f in fact
2248             assert f == fact[0]
2249
2250         @pytest.mark.fileio
2251         def test_fake_file_factory(self, fakepath, fakefilefactory):
2252             fact = FakeFileFactory()
2253             f = fakefilefactory(fakepath)
2254             assert f.path == fakepath
2255             assert f == fakefilefactory[fakepath]
2256
2257         @pytest.mark.fileio
2258         def test_fake_file_factory_path_persistence(
2259             self, fakepath, fakefilefactory
2260         ):
2261             f1 = fakefilefactory(fakepath)
2262             assert f1 == fakefilefactory(fakepath)
2263
2264 except ImportError:
2265     pass