X-Git-Url: https://git.madduck.net/etc/mutt.git/blobdiff_plain/a83d50d5a64488deb3796bbd0ca0a03b0516cc77..7013dfe93a05082153f79eca6c9e5c05b54d9d50:/.config/mutt/markdown2html diff --git a/.config/mutt/markdown2html b/.config/mutt/markdown2html new file mode 100755 index 0000000..309c22e --- /dev/null +++ b/.config/mutt/markdown2html @@ -0,0 +1,338 @@ +#!/usr/bin/python3 +# +# markdown2html.py — simple Markdown-to-HTML converter for use with Mutt +# +# Mutt recently learnt [how to compose `multipart/alternative` +# emails][1]. This script assumes a message has been composed using Markdown +# (with a lot of pandoc extensions enabled), and translates it to `text/html` +# for Mutt to tie into such a `multipart/alternative` message. +# +# [1]: https://gitlab.com/muttmua/mutt/commit/0e566a03725b4ad789aa6ac1d17cdf7bf4e7e354) +# +# Configuration: +# muttrc: +# set send_multipart_alternative=yes +# set send_multipart_alternative_filter=/path/to/markdown2html.py +# +# Optionally, Custom CSS styles will be read from `~/.config/mutt/markdown2html.css`, +# if present. +# +# Requirements: +# - python3 +# - PyPandoc (and pandoc installed, or downloaded) +# - Pynliner +# +# Optional: +# - Pygments, if installed, then syntax highlighting is enabled +# +# Latest version: +# https://git.madduck.net/etc/mutt.git/blob_plain/HEAD:/.mutt/markdown2html +# +# Copyright © 2019 martin f. krafft +# Released under the GPL-2+ licence, just like Mutt itself. +# + +import pypandoc +import pynliner +import re +import os +import sys + +try: + from pygments.formatters import get_formatter_by_name + formatter = get_formatter_by_name('html', style='default') + DEFAULT_CSS = formatter.get_style_defs('.sourceCode') + +except ImportError: + DEFAULT_CSS = "" + + +DEFAULT_CSS += ''' +.block { + padding: 0 0.5em; + margin: 0; + border-left: 2px solid #eee; +} +.quote, blockquote { + padding: 0 0.5em; + margin: 0; + font-style: italic; + border-left: 2px solid #666; + color: #666; + font-size: 80%; +} +.quotelead { + margin-bottom: -1em; + font-size: 80%; +} +.quotechar { display: none; } +.footnote-ref, .footnote-back { text-decoration: none;} +.signature { + color: #999; + font-family: monospace; + white-space: pre; + margin: 1em 0 0 0; + font-size: 80%; +} +table, th, td { + border-collapse: collapse; + border: 1px solid #999; +} +th, td { padding: 0.5em; } +.header { + background: #eee; +} +.even { background: #eee; } +h1, h2, h3, h4, h5, h6 { + color: #666; + background-color: #eee; + padding-left: 0.5em +} +h1 { font-size: 130%; } +h2 { font-size: 120%; } +h3 { font-size: 110%; } +h4 { font-size: 107%; } +h5 { font-size: 103%; } +h6 { font-size: 100%; } +p { padding: 0 0.5em; } +pre { padding: 0 1em; } +''' + +STYLESHEET = os.path.join(os.path.expanduser('~/.config/mutt'), + 'markdown2html.css') +if os.path.exists(STYLESHEET): + DEFAULT_CSS += open(STYLESHEET).read() + +SIGNATURE_HTML = \ + '
-- {sig}
' + + +def _preprocess_signature(sig): + ''' + Preprocess the signature before markdown processing. + ''' + return sig + +def _preprocess_markdown(mdwn): + ''' + Preprocess Markdown for handling by the converter. + ''' + # convert hard line breaks within paragraphs to 2 trailing spaces, which + # is the markdown way of representing hard line breaks. Note how the + # regexp will not match between paragraphs. + ret = re.sub(r'(\S)\n(\s*\S)', r'\g<1> \n\g<2>', mdwn, flags=re.MULTILINE) + + # Clients like Thunderbird need the leading '>' to be able to properly + # create nested quotes, so we duplicate the symbol, the first instance + # will tell pandoc to create a blockquote, while the second instance will + # be a containing the character, along with a class that causes CSS + # to actually hide it from display. However, this does not work with the + # text-mode HTML2text converters, and so it's left commented for now. + #ret = re.sub(r'\n>', r' \n>[>]{.quotechar}', ret, flags=re.MULTILINE) + + # With the autolink_bare_uris extension, we do not need to put links into + # angle brackets to have them converted, so let's conserve the brackets + # when used around email addresses. Note that this needs a postprocessing + # hack because the pandoc autolink converted includes the ambersand + # (https://github.com/jgm/pandoc/issues/7398). + ret = re.sub(r'<([^@]+@.+\.[^>]+)>', r'<\g<1> -PANDOC_BUG_7398->', ret) + + return ret + + +def _identify_quotes_for_later(mdwn): + ''' + Email quoting such as: + + ``` + On 1970-01-01, you said: + > The Flat Earth Society has members all around the globe. + ``` + + isn't really properly handled by Markdown, so let's do our best to + identify the individual elements, and mark them, using a syntax similar to + what pandoc uses already in some cases. As pandoc won't actually use these + data (yet?), we call `self._reformat_quotes` later to use these markers + to slap the appropriate classes on the HTML tags. + ''' + + def generate_lines_with_context(mdwn): + ''' + Iterates the input string line-wise, returning a triplet of + previous, current, and next line, the first and last of which + will be None on the first and last line of the input data + respectively. + ''' + prev = cur = nxt = None + lines = iter(mdwn.splitlines()) + cur = next(lines) + for nxt in lines: + yield prev, cur, nxt + prev = cur + cur = nxt + yield prev, cur, None + + ret = [] + for prev, cur, nxt in generate_lines_with_context(mdwn): + + # The lead-in to a quote is a single line immediately preceding the + # quote, and ending with ':'. Note that there could be multiple of + # these: + if re.match(r'^[^>]+.*:\s*$', cur) and nxt.startswith('>'): + ret.append(f'{{.quotelead}}{cur.strip()}') + # pandoc needs an empty line before the blockquote, so + # we enter one for the purpose of HTML rendition: + ret.append('') + continue + + # The first blockquote after such a lead-in gets marked as the + # "initial" quote: + elif prev and re.match(r'^[^>]+.*:\s*$', prev) and cur.startswith('>'): + ret.append(re.sub(r'^(\s*>\s*)+(.+)', + r'\g<1>{.quoteinitial}\g<2>', + cur, flags=re.MULTILINE)) + + # All other occurrences of blockquotes get the "subsequent" marker: + elif cur.startswith('>') and prev is not None and not prev.startswith('>'): + ret.append(re.sub(r'^((?:\s*>\s*)+)(.+)', + r'\g<1>{.quotesubsequent}\g<2>', + cur, flags=re.MULTILINE)) + + else: # pass through everything else. + ret.append(cur) + + return '\n'.join(ret) + + +def _reformat_quotes(html): + ''' + Earlier in the pipeline, we marked email quoting, using markers, which we + now need to turn into HTML classes, so that we can use CSS to style them. + ''' + ret = html.replace('

{.quotelead}', '

') + ret = re.sub(r'

\n((?:
\n)*)

(?:\{\.quote(\w+)\})', + r'

\n\g<1>

', ret, flags=re.MULTILINE) + return ret + + + +def _convert_with_pandoc(mdwn, inputfmt='markdown', outputfmt='html5', + ext_enabled=None, ext_disabled=None, + standalone=True, selfcontained=True, title=None): + ''' + Invoke pandoc to do the actual conversion of Markdown to HTML5. + ''' + if not ext_enabled: + ext_enabled = [ 'backtick_code_blocks', + 'line_blocks', + 'fancy_lists', + 'startnum', + 'definition_lists', + 'example_lists', + 'table_captions', + 'simple_tables', + 'multiline_tables', + 'grid_tables', + 'pipe_tables', + 'all_symbols_escapable', + 'intraword_underscores', + 'strikeout', + 'superscript', + 'subscript', + 'fenced_divs', + 'bracketed_spans', + 'footnotes', + 'inline_notes', + 'emoji', + 'tex_math_double_backslash', + 'autolink_bare_uris' + ] + if not ext_disabled: + ext_disabled = [ 'tex_math_single_backslash', + 'tex_math_dollars', + 'smart', + 'raw_html' + ] + + enabled = '+'.join(ext_enabled) + disabled = '-'.join(ext_disabled) + inputfmt = f'{inputfmt}+{enabled}-{disabled}' + + args = [] + if standalone: + args.append('--standalone') + if selfcontained: + args.append('--self-contained') + if title: + args.append(f'--metadata=pagetitle:"{title}"') + + return pypandoc.convert_text(mdwn, format=inputfmt, to=outputfmt, + extra_args=args) + + +def _apply_styling(html): + ''' + Inline all styles defined and used into the individual HTML tags. + ''' + return pynliner.Pynliner().from_string(html).with_cssString(DEFAULT_CSS).run() + + +def _postprocess_html(html): + ''' + Postprocess the generated and styled HTML. + ''' + + # Preprocessing leaves a sentinel to work around + # https://github.com/jgm/pandoc/issues/7398, and so we need to remove it: + html = html.replace(' -PANDOC_BUG_7398->', '>') + return html + + +def convert_markdown_to_html(mdwn): + ''' + Converts the input Markdown to HTML, handling separately the body, as well + as an optional signature. + ''' + parts = re.split(r'^-- $', mdwn, 1, flags=re.MULTILINE) + body = parts[0] + if len(parts) == 2: + sig = parts[1] + else: + sig = None + + html='' + if body: + body = _preprocess_markdown(body) + body = _identify_quotes_for_later(body) + html = _convert_with_pandoc(body, standalone=True, selfcontained=True, + title=None) + html = html.replace('Untitled\n','') + html = _reformat_quotes(html) + + if sig: + sig = _preprocess_signature(sig) + sig = _preprocess_markdown(sig) + sig = _convert_with_pandoc(sig, standalone=False, selfcontained=False) + sig = SIGNATURE_HTML.format(sig='
'.join(sig.splitlines())) + eob = html.find('') + html = f'{html[:eob]}{sig}\n{html[eob:]}' + + html = _apply_styling(html) + html = _postprocess_html(html) + + return html + + +def main(): + ''' + Convert text on stdin to HTML, and print it to stdout, like mutt would + expect. + ''' + html = convert_markdown_to_html(sys.stdin.read()) + if html: + # mutt expects the content type in the first line, so: + print(f'text/html\n\n{html}') + + +if __name__ == '__main__': + main()