Fix matching of blockquotes

[etc/mutt.git] / .mutt / markdown2html
diff --git a/.mutt/markdown2html b/.mutt/markdown2html

index 35b4ea2900f9a5764a3d0429b9679ffd65ba13a1..487d3db98ab227baca19a888d6cd925b7b412d8f 100755 (executable)
--- a/.mutt/markdown2html
+++ b/.mutt/markdown2html
@@ -95,6 +95,7 @@ h4 { font-size: 107%; }
  h5 { font-size: 103%; }
  h6 { font-size: 100%; }
  p { padding: 0 0.5em; }
  h5 { font-size: 103%; }
  h6 { font-size: 100%; }
  p { padding: 0 0.5em; }
+pre { padding: 0 1em; }
  '''
  
  STYLESHEET = os.path.join(os.path.expanduser('~/.mutt'),
  '''
  
  STYLESHEET = os.path.join(os.path.expanduser('~/.mutt'),
@@ -102,21 +103,16 @@ STYLESHEET = os.path.join(os.path.expanduser('~/.mutt'),
  if os.path.exists(STYLESHEET):
      DEFAULT_CSS += open(STYLESHEET).read()
  
  if os.path.exists(STYLESHEET):
      DEFAULT_CSS += open(STYLESHEET).read()
  
-HTML_DOCUMENT = '''<!DOCTYPE html>
-<html><head>
-<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
-<meta charset="utf-8"/>
-<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes"/>
-<title>HTML E-Mail</title>
-</head><body class="email">
-{htmlbody}
-</body></html>'''
-
-
  SIGNATURE_HTML = \
          '<div class="signature"><span class="leader">-- </span>{sig}</div>'
  
  
  SIGNATURE_HTML = \
          '<div class="signature"><span class="leader">-- </span>{sig}</div>'
  
  
+def _preprocess_signature(sig):
+    '''
+    Preprocess the signature before markdown processing.
+    '''
+    return sig
+
  def _preprocess_markdown(mdwn):
      '''
      Preprocess Markdown for handling by the converter.
  def _preprocess_markdown(mdwn):
      '''
      Preprocess Markdown for handling by the converter.
@@ -134,6 +130,13 @@ def _preprocess_markdown(mdwn):
      # text-mode HTML2text converters, and so it's left commented for now.
      #ret = re.sub(r'\n>', r'  \n>[>]{.quotechar}', ret, flags=re.MULTILINE)
  
      # text-mode HTML2text converters, and so it's left commented for now.
      #ret = re.sub(r'\n>', r'  \n>[>]{.quotechar}', ret, flags=re.MULTILINE)
  
+    # With the autolink_bare_uris extension, we do not need to put links into
+    # angle brackets to have them converted, so let's conserve the brackets
+    # when used around email addresses. Note that this needs a postprocessing
+    # hack because the pandoc autolink converted includes the ambersand
+    # (https://github.com/jgm/pandoc/issues/7398).
+    ret = re.sub(r'<([^@]+@.+\.[^>]+)>', r'&lt;\g<1> -PANDOC_BUG_7398-&gt;', ret)
+
      return ret
  
  
      return ret
  
  
@@ -175,7 +178,7 @@ def _identify_quotes_for_later(mdwn):
          # The lead-in to a quote is a single line immediately preceding the
          # quote, and ending with ':'. Note that there could be multiple of
          # these:
          # The lead-in to a quote is a single line immediately preceding the
          # quote, and ending with ':'. Note that there could be multiple of
          # these:
-        if re.match(r'^.+:\s*$', cur) and nxt.startswith('>'):
+        if re.match(r'^[^>]+.*:\s*$', cur) and nxt.startswith('>'):
              ret.append(f'{{.quotelead}}{cur.strip()}')
              # pandoc needs an empty line before the blockquote, so
              # we enter one for the purpose of HTML rendition:
              ret.append(f'{{.quotelead}}{cur.strip()}')
              # pandoc needs an empty line before the blockquote, so
              # we enter one for the purpose of HTML rendition:
@@ -184,13 +187,13 @@ def _identify_quotes_for_later(mdwn):
  
          # The first blockquote after such a lead-in gets marked as the
          # "initial" quote:
  
          # The first blockquote after such a lead-in gets marked as the
          # "initial" quote:
-        elif prev and re.match(r'^.+:\s*$', prev) and cur.startswith('>'):
+        elif prev and re.match(r'^[^>]+.*:\s*$', prev) and cur.startswith('>'):
              ret.append(re.sub(r'^(\s*>\s*)+(.+)',
                                r'\g<1>{.quoteinitial}\g<2>',
                                cur, flags=re.MULTILINE))
  
          # All other occurrences of blockquotes get the "subsequent" marker:
              ret.append(re.sub(r'^(\s*>\s*)+(.+)',
                                r'\g<1>{.quoteinitial}\g<2>',
                                cur, flags=re.MULTILINE))
  
          # All other occurrences of blockquotes get the "subsequent" marker:
-        elif cur.startswith('>') and prev and not prev.startswith('>'):
+        elif cur.startswith('>') and prev is not None and not prev.startswith('>'):
              ret.append(re.sub(r'^((?:\s*>\s*)+)(.+)',
                                r'\g<1>{.quotesubsequent}\g<2>',
                                cur, flags=re.MULTILINE))
              ret.append(re.sub(r'^((?:\s*>\s*)+)(.+)',
                                r'\g<1>{.quotesubsequent}\g<2>',
                                cur, flags=re.MULTILINE))
@@ -215,7 +218,7 @@ def _reformat_quotes(html):
  
  def _convert_with_pandoc(mdwn, inputfmt='markdown', outputfmt='html5',
                           ext_enabled=None, ext_disabled=None,
  
  def _convert_with_pandoc(mdwn, inputfmt='markdown', outputfmt='html5',
                           ext_enabled=None, ext_disabled=None,
-                         standalone=True, title="HTML E-Mail"):
+                         standalone=True, selfcontained=True, title=None):
      '''
      Invoke pandoc to do the actual conversion of Markdown to HTML5.
      '''
      '''
      Invoke pandoc to do the actual conversion of Markdown to HTML5.
      '''
@@ -258,6 +261,8 @@ def _convert_with_pandoc(mdwn, inputfmt='markdown', outputfmt='html5',
      args = []
      if standalone:
          args.append('--standalone')
      args = []
      if standalone:
          args.append('--standalone')
+    if selfcontained:
+        args.append('--self-contained')
      if title:
          args.append(f'--metadata=pagetitle:"{title}"')
  
      if title:
          args.append(f'--metadata=pagetitle:"{title}"')
  
@@ -276,6 +281,10 @@ def _postprocess_html(html):
      '''
      Postprocess the generated and styled HTML.
      '''
      '''
      Postprocess the generated and styled HTML.
      '''
+
+    # Preprocessing leaves a sentinel to work around
+    # https://github.com/jgm/pandoc/issues/7398, and so we need to remove it:
+    html = html.replace('</a> -PANDOC_BUG_7398-&gt;', '</a>&gt;')
      return html
  
  
      return html
  
  
@@ -295,14 +304,19 @@ def convert_markdown_to_html(mdwn):
      if body:
          body = _preprocess_markdown(body)
          body = _identify_quotes_for_later(body)
      if body:
          body = _preprocess_markdown(body)
          body = _identify_quotes_for_later(body)
-        html = _convert_with_pandoc(body, standalone=False)
+        html = _convert_with_pandoc(body, standalone=True, selfcontained=True,
+                                    title=None)
+        html = html.replace('<title>Untitled</title>\n','')
          html = _reformat_quotes(html)
  
      if sig:
          html = _reformat_quotes(html)
  
      if sig:
+        sig = _preprocess_signature(sig)
          sig = _preprocess_markdown(sig)
          sig = _preprocess_markdown(sig)
-        html += SIGNATURE_HTML.format(sig='<br/>'.join(sig.splitlines()))
+        sig = _convert_with_pandoc(sig, standalone=False, selfcontained=False)
+        sig = SIGNATURE_HTML.format(sig='<br/>'.join(sig.splitlines()))
+        eob = html.find('</body>')
+        html = f'{html[:eob]}{sig}\n{html[eob:]}'
  
  
-    html = HTML_DOCUMENT.format(htmlbody=html)
      html = _apply_styling(html)
      html = _postprocess_html(html)
  
      html = _apply_styling(html)
      html = _postprocess_html(html)