Work around Pandoc bug #7398

[etc/mutt.git] / .mutt / markdown2html
diff --git a/.mutt/markdown2html b/.mutt/markdown2html

index 540428f1d80f6485eb39357f2a92a976d179bf7a..60e6bea16e75c20e977d2b6eba02bc47d2e615e1 100755 (executable)
--- a/.mutt/markdown2html
+++ b/.mutt/markdown2html
@@ -48,20 +48,24 @@ except ImportError:
  
  
  DEFAULT_CSS += '''
-.quote {
+.block {
+    padding: 0 0.5em;
+    margin: 0;
+    border-left: 2px solid #eee;
+}
+.quote, blockquote {
      padding: 0 0.5em;
      margin: 0;
      font-style: italic;
-    border-left: 2px solid #ccc;
-    color: #999;
+    border-left: 2px solid #666;
+    color: #666;
      font-size: 80%;
  }
  .quotelead {
-    font-style: italic;
      margin-bottom: -1em;
-    color: #999;
      font-size: 80%;
  }
+.quotechar { display: none; }
  .footnote-ref, .footnote-back { text-decoration: none;}
  .signature {
      color: #999;
@@ -69,7 +73,29 @@ DEFAULT_CSS += '''
      white-space: pre;
      margin: 1em 0 0 0;
      font-size: 80%;
-}'''
+}
+table, th, td {
+    border-collapse: collapse;
+    border: 1px solid #999;
+}
+th, td { padding: 0.5em; }
+.header {
+    background: #eee;
+}
+.even { background: #eee; }
+h1, h2, h3, h4, h5, h6 {
+    color: #666;
+    background-color: #eee;
+    padding-left: 0.5em
+}
+h1 { font-size: 130%; }
+h2 { font-size: 120%; }
+h3 { font-size: 110%; }
+h4 { font-size: 107%; }
+h5 { font-size: 103%; }
+h6 { font-size: 100%; }
+p { padding: 0 0.5em; }
+'''
  
  STYLESHEET = os.path.join(os.path.expanduser('~/.mutt'),
                            'markdown2html.css')
@@ -81,7 +107,6 @@ HTML_DOCUMENT = '''<!DOCTYPE html>
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
  <meta charset="utf-8"/>
  <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes"/>
-<title>HTML E-Mail</title>
  </head><body class="email">
  {htmlbody}
  </body></html>'''
@@ -98,7 +123,22 @@ def _preprocess_markdown(mdwn):
      # convert hard line breaks within paragraphs to 2 trailing spaces, which
      # is the markdown way of representing hard line breaks. Note how the
      # regexp will not match between paragraphs.
-    ret = re.sub(r'(\S)\n(\s*\S)', r'\g<1>  \n\g<2>', mdwn, re.MULTILINE)
+    ret = re.sub(r'(\S)\n(\s*\S)', r'\g<1>  \n\g<2>', mdwn, flags=re.MULTILINE)
+
+    # Clients like Thunderbird need the leading '>' to be able to properly
+    # create nested quotes, so we duplicate the symbol, the first instance
+    # will tell pandoc to create a blockquote, while the second instance will
+    # be a <span> containing the character, along with a class that causes CSS
+    # to actually hide it from display. However, this does not work with the
+    # text-mode HTML2text converters, and so it's left commented for now.
+    #ret = re.sub(r'\n>', r'  \n>[>]{.quotechar}', ret, flags=re.MULTILINE)
+
+    # With the autolink_bare_uris extension, we do not need to put links into
+    # angle brackets to have them converted, so let's conserve the brackets
+    # when used around email addresses. Note that this needs a postprocessing
+    # hack because the pandoc autolink converted includes the ambersand
+    # (https://github.com/jgm/pandoc/issues/7398).
+    ret = re.sub(r'<([^@]+@.+\.[^>]+)>', r'&lt;\g<1> -PANDOC_BUG_7398-&gt;', ret)
  
      return ret
  
@@ -153,13 +193,13 @@ def _identify_quotes_for_later(mdwn):
          elif prev and re.match(r'^.+:\s*$', prev) and cur.startswith('>'):
              ret.append(re.sub(r'^(\s*>\s*)+(.+)',
                                r'\g<1>{.quoteinitial}\g<2>',
-                              cur, re.MULTILINE))
+                              cur, flags=re.MULTILINE))
  
          # All other occurrences of blockquotes get the "subsequent" marker:
          elif cur.startswith('>') and prev and not prev.startswith('>'):
              ret.append(re.sub(r'^((?:\s*>\s*)+)(.+)',
                                r'\g<1>{.quotesubsequent}\g<2>',
-                              cur, re.MULTILINE))
+                              cur, flags=re.MULTILINE))
  
          else: # pass through everything else.
              ret.append(cur)
@@ -174,14 +214,14 @@ def _reformat_quotes(html):
      '''
      ret = html.replace('<p>{.quotelead}', '<p class="quotelead">')
      ret = re.sub(r'<blockquote>\n((?:<blockquote>\n)*)<p>(?:\{\.quote(\w+)\})',
-                 r'<blockquote class="quote \g<2>">\n\g<1><p>', ret, re.MULTILINE)
+                 r'<blockquote class="quote \g<2>">\n\g<1><p>', ret, flags=re.MULTILINE)
      return ret
  
  
  
  def _convert_with_pandoc(mdwn, inputfmt='markdown', outputfmt='html5',
                           ext_enabled=None, ext_disabled=None,
-                         standalone=True, title="HTML E-Mail"):
+                         standalone=True, selfcontained=True, title=None):
      '''
      Invoke pandoc to do the actual conversion of Markdown to HTML5.
      '''
@@ -208,10 +248,12 @@ def _convert_with_pandoc(mdwn, inputfmt='markdown', outputfmt='html5',
                         'inline_notes',
                         'emoji',
                         'tex_math_double_backslash',
+                       'autolink_bare_uris'
                        ]
      if not ext_disabled:
          ext_disabled = [ 'tex_math_single_backslash',
                           'tex_math_dollars',
+                         'smart',
                           'raw_html'
                         ]
  
@@ -222,6 +264,8 @@ def _convert_with_pandoc(mdwn, inputfmt='markdown', outputfmt='html5',
      args = []
      if standalone:
          args.append('--standalone')
+    if selfcontained:
+        args.append('--self-contained')
      if title:
          args.append(f'--metadata=pagetitle:"{title}"')
  
@@ -240,6 +284,10 @@ def _postprocess_html(html):
      '''
      Postprocess the generated and styled HTML.
      '''
+
+    # Preprocessing leaves a sentinel to work around
+    # https://github.com/jgm/pandoc/issues/7398, and so we need to remove it:
+    html = html.replace('</a> -PANDOC_BUG_7398-&gt;', '</a>&gt;')
      return html
  
  
@@ -259,7 +307,7 @@ def convert_markdown_to_html(mdwn):
      if body:
          body = _preprocess_markdown(body)
          body = _identify_quotes_for_later(body)
-        html = _convert_with_pandoc(body, standalone=False)
+        html = _convert_with_pandoc(body, standalone=True, selfcontained=True)
          html = _reformat_quotes(html)
  
      if sig: