+++ /dev/null
-#!/usr/bin/python3
-#
-# markdown2html.py — simple Markdown-to-HTML converter for use with Mutt
-#
-# Mutt recently learnt [how to compose `multipart/alternative`
-# emails][1]. This script assumes a message has been composed using Markdown
-# (with a lot of pandoc extensions enabled), and translates it to `text/html`
-# for Mutt to tie into such a `multipart/alternative` message.
-#
-# [1]: https://gitlab.com/muttmua/mutt/commit/0e566a03725b4ad789aa6ac1d17cdf7bf4e7e354)
-#
-# Configuration:
-# muttrc:
-# set send_multipart_alternative=yes
-# set send_multipart_alternative_filter=/path/to/markdown2html.py
-#
-# Optionally, Custom CSS styles will be read from `~/.mutt/markdown2html.css`,
-# if present.
-#
-# Requirements:
-# - python3
-# - PyPandoc (and pandoc installed, or downloaded)
-# - Pynliner
-#
-# Optional:
-# - Pygments, if installed, then syntax highlighting is enabled
-#
-# Latest version:
-# https://git.madduck.net/etc/mutt.git/blob_plain/HEAD:/.mutt/markdown2html
-#
-# Copyright © 2019 martin f. krafft <madduck@madduck.net>
-# Released under the GPL-2+ licence, just like Mutt itself.
-#
-
-import pypandoc
-import pynliner
-import re
-import os
-import sys
-
-try:
- from pygments.formatters import get_formatter_by_name
- formatter = get_formatter_by_name('html', style='default')
- DEFAULT_CSS = formatter.get_style_defs('.sourceCode')
-
-except ImportError:
- DEFAULT_CSS = ""
-
-
-DEFAULT_CSS += '''
-.block {
- padding: 0 0.5em;
- margin: 0;
- border-left: 2px solid #eee;
-}
-.quote, blockquote {
- padding: 0 0.5em;
- margin: 0;
- font-style: italic;
- border-left: 2px solid #666;
- color: #666;
- font-size: 80%;
-}
-.quotelead {
- margin-bottom: -1em;
- font-size: 80%;
-}
-.quotechar { display: none; }
-.footnote-ref, .footnote-back { text-decoration: none;}
-.signature {
- color: #999;
- font-family: monospace;
- white-space: pre;
- margin: 1em 0 0 0;
- font-size: 80%;
-}
-table, th, td {
- border-collapse: collapse;
- border: 1px solid #999;
-}
-th, td { padding: 0.5em; }
-.header {
- background: #eee;
-}
-.even { background: #eee; }
-h1, h2, h3, h4, h5, h6 {
- color: #666;
- background-color: #eee;
- padding-left: 0.5em
-}
-h1 { font-size: 130%; }
-h2 { font-size: 120%; }
-h3 { font-size: 110%; }
-h4 { font-size: 107%; }
-h5 { font-size: 103%; }
-h6 { font-size: 100%; }
-p { padding: 0 0.5em; }
-pre { padding: 0 1em; }
-'''
-
-STYLESHEET = os.path.join(os.path.expanduser('~/.mutt'),
- 'markdown2html.css')
-if os.path.exists(STYLESHEET):
- DEFAULT_CSS += open(STYLESHEET).read()
-
-SIGNATURE_HTML = \
- '<div class="signature"><span class="leader">-- </span>{sig}</div>'
-
-
-def _preprocess_signature(sig):
- '''
- Preprocess the signature before markdown processing.
- '''
- return sig
-
-def _preprocess_markdown(mdwn):
- '''
- Preprocess Markdown for handling by the converter.
- '''
- # convert hard line breaks within paragraphs to 2 trailing spaces, which
- # is the markdown way of representing hard line breaks. Note how the
- # regexp will not match between paragraphs.
- ret = re.sub(r'(\S)\n(\s*\S)', r'\g<1> \n\g<2>', mdwn, flags=re.MULTILINE)
-
- # Clients like Thunderbird need the leading '>' to be able to properly
- # create nested quotes, so we duplicate the symbol, the first instance
- # will tell pandoc to create a blockquote, while the second instance will
- # be a <span> containing the character, along with a class that causes CSS
- # to actually hide it from display. However, this does not work with the
- # text-mode HTML2text converters, and so it's left commented for now.
- #ret = re.sub(r'\n>', r' \n>[>]{.quotechar}', ret, flags=re.MULTILINE)
-
- # With the autolink_bare_uris extension, we do not need to put links into
- # angle brackets to have them converted, so let's conserve the brackets
- # when used around email addresses. Note that this needs a postprocessing
- # hack because the pandoc autolink converted includes the ambersand
- # (https://github.com/jgm/pandoc/issues/7398).
- ret = re.sub(r'<([^@]+@.+\.[^>]+)>', r'<\g<1> -PANDOC_BUG_7398->', ret)
-
- return ret
-
-
-def _identify_quotes_for_later(mdwn):
- '''
- Email quoting such as:
-
- ```
- On 1970-01-01, you said:
- > The Flat Earth Society has members all around the globe.
- ```
-
- isn't really properly handled by Markdown, so let's do our best to
- identify the individual elements, and mark them, using a syntax similar to
- what pandoc uses already in some cases. As pandoc won't actually use these
- data (yet?), we call `self._reformat_quotes` later to use these markers
- to slap the appropriate classes on the HTML tags.
- '''
-
- def generate_lines_with_context(mdwn):
- '''
- Iterates the input string line-wise, returning a triplet of
- previous, current, and next line, the first and last of which
- will be None on the first and last line of the input data
- respectively.
- '''
- prev = cur = nxt = None
- lines = iter(mdwn.splitlines())
- cur = next(lines)
- for nxt in lines:
- yield prev, cur, nxt
- prev = cur
- cur = nxt
- yield prev, cur, None
-
- ret = []
- for prev, cur, nxt in generate_lines_with_context(mdwn):
-
- # The lead-in to a quote is a single line immediately preceding the
- # quote, and ending with ':'. Note that there could be multiple of
- # these:
- if re.match(r'^[^>]+.*:\s*$', cur) and nxt.startswith('>'):
- ret.append(f'{{.quotelead}}{cur.strip()}')
- # pandoc needs an empty line before the blockquote, so
- # we enter one for the purpose of HTML rendition:
- ret.append('')
- continue
-
- # The first blockquote after such a lead-in gets marked as the
- # "initial" quote:
- elif prev and re.match(r'^[^>]+.*:\s*$', prev) and cur.startswith('>'):
- ret.append(re.sub(r'^(\s*>\s*)+(.+)',
- r'\g<1>{.quoteinitial}\g<2>',
- cur, flags=re.MULTILINE))
-
- # All other occurrences of blockquotes get the "subsequent" marker:
- elif cur.startswith('>') and prev is not None and not prev.startswith('>'):
- ret.append(re.sub(r'^((?:\s*>\s*)+)(.+)',
- r'\g<1>{.quotesubsequent}\g<2>',
- cur, flags=re.MULTILINE))
-
- else: # pass through everything else.
- ret.append(cur)
-
- return '\n'.join(ret)
-
-
-def _reformat_quotes(html):
- '''
- Earlier in the pipeline, we marked email quoting, using markers, which we
- now need to turn into HTML classes, so that we can use CSS to style them.
- '''
- ret = html.replace('<p>{.quotelead}', '<p class="quotelead">')
- ret = re.sub(r'<blockquote>\n((?:<blockquote>\n)*)<p>(?:\{\.quote(\w+)\})',
- r'<blockquote class="quote \g<2>">\n\g<1><p>', ret, flags=re.MULTILINE)
- return ret
-
-
-
-def _convert_with_pandoc(mdwn, inputfmt='markdown', outputfmt='html5',
- ext_enabled=None, ext_disabled=None,
- standalone=True, selfcontained=True, title=None):
- '''
- Invoke pandoc to do the actual conversion of Markdown to HTML5.
- '''
- if not ext_enabled:
- ext_enabled = [ 'backtick_code_blocks',
- 'line_blocks',
- 'fancy_lists',
- 'startnum',
- 'definition_lists',
- 'example_lists',
- 'table_captions',
- 'simple_tables',
- 'multiline_tables',
- 'grid_tables',
- 'pipe_tables',
- 'all_symbols_escapable',
- 'intraword_underscores',
- 'strikeout',
- 'superscript',
- 'subscript',
- 'fenced_divs',
- 'bracketed_spans',
- 'footnotes',
- 'inline_notes',
- 'emoji',
- 'tex_math_double_backslash',
- 'autolink_bare_uris'
- ]
- if not ext_disabled:
- ext_disabled = [ 'tex_math_single_backslash',
- 'tex_math_dollars',
- 'smart',
- 'raw_html'
- ]
-
- enabled = '+'.join(ext_enabled)
- disabled = '-'.join(ext_disabled)
- inputfmt = f'{inputfmt}+{enabled}-{disabled}'
-
- args = []
- if standalone:
- args.append('--standalone')
- if selfcontained:
- args.append('--self-contained')
- if title:
- args.append(f'--metadata=pagetitle:"{title}"')
-
- return pypandoc.convert_text(mdwn, format=inputfmt, to=outputfmt,
- extra_args=args)
-
-
-def _apply_styling(html):
- '''
- Inline all styles defined and used into the individual HTML tags.
- '''
- return pynliner.Pynliner().from_string(html).with_cssString(DEFAULT_CSS).run()
-
-
-def _postprocess_html(html):
- '''
- Postprocess the generated and styled HTML.
- '''
-
- # Preprocessing leaves a sentinel to work around
- # https://github.com/jgm/pandoc/issues/7398, and so we need to remove it:
- html = html.replace('</a> -PANDOC_BUG_7398->', '</a>>')
- return html
-
-
-def convert_markdown_to_html(mdwn):
- '''
- Converts the input Markdown to HTML, handling separately the body, as well
- as an optional signature.
- '''
- parts = re.split(r'^-- $', mdwn, 1, flags=re.MULTILINE)
- body = parts[0]
- if len(parts) == 2:
- sig = parts[1]
- else:
- sig = None
-
- html=''
- if body:
- body = _preprocess_markdown(body)
- body = _identify_quotes_for_later(body)
- html = _convert_with_pandoc(body, standalone=True, selfcontained=True,
- title=None)
- html = html.replace('<title>Untitled</title>\n','')
- html = _reformat_quotes(html)
-
- if sig:
- sig = _preprocess_signature(sig)
- sig = _preprocess_markdown(sig)
- sig = _convert_with_pandoc(sig, standalone=False, selfcontained=False)
- sig = SIGNATURE_HTML.format(sig='<br/>'.join(sig.splitlines()))
- eob = html.find('</body>')
- html = f'{html[:eob]}{sig}\n{html[eob:]}'
-
- html = _apply_styling(html)
- html = _postprocess_html(html)
-
- return html
-
-
-def main():
- '''
- Convert text on stdin to HTML, and print it to stdout, like mutt would
- expect.
- '''
- html = convert_markdown_to_html(sys.stdin.read())
- if html:
- # mutt expects the content type in the first line, so:
- print(f'text/html\n\n{html}')
-
-
-if __name__ == '__main__':
- main()