import viewmailattachments script from alana

author martin f. krafft <madduck@madduck.net>

Sun, 12 Aug 2018 01:17:17 +0000 (13:17 +1200)

committer martin f. krafft <madduck@madduck.net>

Sun, 12 Aug 2018 01:17:17 +0000 (13:17 +1200)
author martin f. krafft <madduck@madduck.net>
Sun, 12 Aug 2018 01:17:17 +0000 (13:17 +1200)
committer martin f. krafft <madduck@madduck.net>
Sun, 12 Aug 2018 01:17:17 +0000 (13:17 +1200)
diff --git a/.mutt/keybindings b/.mutt/keybindings

index a23c46f756aea9cd18b7745f48b5a17d19f18865..3a76beee37b02255bf03dcf44a7a59fe5327bc51 100644 (file)
--- a/.mutt/keybindings
+++ b/.mutt/keybindings
@@ -87,4 +87,6 @@ macro attach  e       '<enter-command>set my_mailcap_path="$mailcap_path"<enter><enter-
  
  macro index,pager      M       '<enter-command>set my_weed="$weed"<enter><enter-command>set noweed<enter><pipe-message>formail -czx X-RSS-URL: | xargs sensible-browser<enter><enter-command>set weed="$my_weed"<enter>'
  
  
  macro index,pager      M       '<enter-command>set my_weed="$weed"<enter><enter-command>set noweed<enter><pipe-message>formail -czx X-RSS-URL: | xargs sensible-browser<enter><enter-command>set weed="$my_weed"<enter>'
  
+macro index,pager      <F10>   '<enter-command>set my_pipe_decode="$pipe_decode"<enter><enter-command>unset pipe_decode<enter><pipe-message>~/.mutt/viewmailattachments<enter><enter-command>set pipe_decode="$my_pipe_decode"<enter>' "view attachments in browser"
+
  # vim:sw=12:noet:sts=12:ts=12:ft=muttrc
  # vim:sw=12:noet:sts=12:ts=12:ft=muttrc
diff --git a/.mutt/viewmailattachments b/.mutt/viewmailattachments

new file mode 100755 (executable)

index 0000000..9ab3650
--- /dev/null
+++ b/.mutt/viewmailattachments
@@ -0,0 +1,381 @@
+#! /usr/bin/env python
+
+# Source: https://raw.githubusercontent.com/akkana/scripts/master/viewmailattachments
+
+# Take an mbox HTML message (e.g. from mutt), split it
+# and rewrite it so all of its attachments can be viewed in a browser
+# (perhaps after being converted to HTML from DOC or whatever first).
+#
+# Can be run from within a mailer like mutt, or independently
+# on a single message file.
+#
+# Grew out of a simpler script called viewhtmlmail.
+#
+# Copyright 2015 by Akkana Peck. Share and enjoy under the GPL v2 or later.
+# Changes:
+#   Holger Klawitter 2014: create a secure temp file and avoid temp mbox
+
+# To use it from mutt, put the following lines in your .muttrc:
+# macro  index  <F10>  "<pipe-message>~/bin/viewmailattachments\n" "View attachments in browser"
+# macro  pager  <F10>  "<pipe-message>~/bin/viewmailattachments\n" "View attachments in browser"
+
+import os, sys
+import re
+import time
+import shutil
+import email, email.header, mimetypes
+import tempfile
+import subprocess
+from bs4 import BeautifulSoup
+
+################################################
+# Some prefs:
+USE_WVHTML_FOR_DOC = False
+BROWSER_ARGS = []
+TMPDIR=os.path.join(os.getenv('TMPDIR'), 'volatile')
+
+# How many seconds do we need to wait for unoconv?
+# It defaults to 6, but on a 64-bit machine that's not enough.
+UNOCONV_STARTUP_TIME = "10"
+
+# Does the browser need a one-time argument for bringing up an initial window,
+# like Firefox's -private-window -new-instance ?
+BROWSER_FIRST_ARG = []
+
+# What browser to use:
+USE_QUICKBROWSE = False
+
+if USE_QUICKBROWSE:
+    BROWSER = "quickbrowse"
+
+    # Browser argument to precede new tabs:
+    BROWSER_FIRST_ARGS = []
+    BROWSER_ARGS = [ "--new-tab" ]
+
+    # Will the browser block when first run until its window is closed?
+    # If so, we have to run it in the background.
+    BROWSER_BACKGROUND = False
+
+    # Should we convert PDF to HTML? Depends on BROWSER:
+    # Firefox has a built-in PDF viewer, but quickbrowse doesn't.
+    CONVERT_PDF_TO_HTML = False
+
+else:    # Firefox in private browsing mode
+    BROWSER = "firefox"
+
+    # Not clear what to do here: Firefox has a built-in PDF viewer,
+    # but for some mime types it can't figure out that it should use it.
+    BROWSER_FIRST_ARGS = [ "-private-window" ]
+    BROWSER_ARGS = [ "-new-tab", "-private-window" ]
+    # Firefox doesn't run in the background.
+    BROWSER_BACKGROUND = True
+
+    CONVERT_PDF_TO_HTML = False
+
+# End global prefs
+################################################
+
+# Temporary for debugging:
+class mysubprocess:
+    @staticmethod
+    def call(arr):
+        print("\n\n================\n=== Calling: %s" % str(arr))
+        subprocess.call(arr)
+
+    @staticmethod
+    def call_bg(arr):
+        print("\n\n================\n=== Calling in background: %s" % str(arr))
+        subprocess.Popen(arr, shell=False,
+                         stdin=None, stdout=None, stderr=None)
+
+def view_message_attachments(fp, tmpdir):
+    '''View message attachments coming from the file-like object fp.
+    '''
+
+    msg = email.message_from_string(fp.read())
+
+    html_part = None
+    counter = 1
+    subfiles = []
+    subparts = []
+    htmlfiles = []
+    htmlparts = []
+
+    def tmp_file_name(part):
+        partfile=part.get_filename()
+        if partfile:
+            n, enc = email.header.decode_header(partfile)[0]
+            if n:
+                partfile = n.decode(enc) if enc else n
+
+        # Applications should really sanitize the given filename so that an
+        # email message can't be used to overwrite important files.
+        # As a first step, warn about ../
+        if partfile and '../' in partfile:
+            print("Eek! Possible security problem in filename %s" % partfile)
+            return None
+
+        # Make a filename in the tmp dir:
+        if not partfile:
+            ext = mimetypes.guess_extension(part.get_content_type())
+            if not ext:
+                # Use a generic bag-of-bits extension
+                ext = '.bin'
+            return tempfile.mkstemp(dir=tmpdir, suffix=ext, prefix='part-')[1]
+        else:
+            return os.path.join(tmpdir, partfile)
+
+    def save_tmp_file(part):
+        '''Saves this part's payload to a tmp file, returning the new filename.
+        '''
+        partfile = tmp_file_name(part)
+
+        tmpfile = open(partfile, "wb")
+        tmpfile.write(part.get_payload(decode=True))
+        tmpfile.close()
+        return partfile
+
+    # Walk through the message a first, preliminary time
+    # to separate out any images that might be referred to by
+    # an HTML part.
+    for part in msg.walk():
+        # walk() includes the top-level message
+        if part == msg:
+            # print "  Skipping the top-level message"
+            continue
+
+        if part.get_content_type() != "multipart/related":
+            continue
+
+        # It's multipart. Walk the subtree looking for image children.
+        for child in part.walk():
+            # print " ", child.get_content_type()
+
+            # At least for now, only save images as parts of multipart.
+            if child.get_content_maintype() != "image":
+                continue
+
+            filename = save_tmp_file(child)
+            # print "    Saved to", filename
+
+            # Rewrite image and other inline URLs in terms of content-id.
+            # Mailers may use Content-Id or Content-ID (or, presumably,
+            # other capitalizations). So we can't just look it up simply.
+            content_id = None
+            for k in list(child.keys()):
+                if k.lower() == 'content-id':
+                    # Remove angle brackets, if present.
+                    # child['Content-Id'] is unmutable: attempts to change it
+                    # are just ignored. Copy it to a local mutable string.
+                    content_id = child[k]
+                    if content_id.startswith('<') and \
+                       content_id.endswith('>'):
+                        content_id = content_id[1:-1]
+
+                    subfiles.append({ 'filename': filename,
+                                      'Content-Id': content_id })
+                    subparts.append(child)
+                    counter += 1
+                    fp = open(filename, 'wb')
+                    fp.write(child.get_payload(decode=True))
+                    fp.close()
+                    break     # no need to look at other keys
+
+            # if not content_id:
+            #     print filename, "doesn't have a Content-Id, not saving"
+            #     # print "keys:", child.keys()
+
+    # print "Subfiles:"
+    # for sf in subfiles:
+    #     print sf
+
+    # Call up the browser window right away,
+    # so the user can see something is happening.
+    # Firefox, alas, has no way from the commandline of calling up
+    # a new private window with content, then replacing that content.
+    # So we'll create a file that refreshes, so that when content is ready,
+    # it can redirect to the first content page.
+    def write_to_index(outfile, msg, timeout_secs, redirect_url):
+        if not redirect_url:
+            redirect_url = "file://" + outfile
+        ofp = open(outfile, "w")
+        ofp.write('''<html><head>
+<meta content="utf-8" http-equiv="encoding">
+<meta http-equiv="content-type" content="text/html; charset=UTF-8">
+<meta http-equiv="refresh" content="%d;URL=%s">
+</head><body>
+<br><br><br><br><br><br><big><big>%s</big></big>
+</body></html>
+''' % (timeout_secs, redirect_url, msg))
+        ofp.close()
+
+    redirect_timeout = 3
+    pleasewait_file = tmpdir + "/index.html"
+    write_to_index(pleasewait_file, "Please wait ...", redirect_timeout, None)
+
+    cmd = [ BROWSER ]
+    if BROWSER_FIRST_ARGS:
+        cmd += BROWSER_FIRST_ARGS
+
+    cmd.append("file://" + pleasewait_file)
+    print("Calling: %s" % ' '.join(cmd))
+    if BROWSER_BACKGROUND:
+        mysubprocess.call_bg(cmd)
+    else:
+        mysubprocess.call(cmd)
+
+                       # "data:text/html,<br><br><br><br><br><h1>Translating documents, please wait ..."
+                       # Use JS if we can figure out how to close or replace
+                       # the "please wait" tab once we have content to show.
+                       # But for now, setTimeout() doesn't work at all
+                       # in newly popped up private windows.
+                       # "javascript:document.writeln('<br><br><br><br><br><h1>Translating documents, please wait ...');setTimeout(function(){alert('hi');}, 500);"
+                     # ])
+
+    # Now walk through looking for the real parts:
+    # HTML, doc and docx.
+    for part in msg.walk():
+
+        # part has, for example:
+        # items: [('Content-Type', 'image/jpeg'),
+        #         ('Content-Transfer-Encoding', 'base64'),
+        #         ('Content-ID', '<14.3631871432@web82503.mail.mud.yahoo.com>'),
+        #         ('Content-Disposition',
+        #          'attachment; filename="ATT0001414.jpg"')]
+        # keys: ['Content-Type', 'Content-Transfer-Encoding',
+        #        'Content-ID', 'Content-Disposition']
+        # values: ['image/jpeg', 'base64',
+        #          '<14.3631871432@web82503.mail.mud.yahoo.com>',
+        # 'attachment; filename="ATT0001414.jpg"']
+
+        # multipart/* are just containers
+        #if part.get_content_maintype() == 'multipart':
+        if part.is_multipart() or part.get_content_type == 'message/rfc822':
+            continue
+
+        if part.get_content_maintype() == "application":
+            partfile = save_tmp_file(part)
+            fileparts = os.path.splitext(partfile)
+            htmlfilename = fileparts[0] + ".html"
+
+            if part.get_content_subtype() == "msword" and USE_WVHTML_FOR_DOC:
+                mysubprocess.call(["wvHtml", partfile, htmlfilename])
+                htmlfiles.append(htmlfilename)
+
+            elif part.get_content_subtype() == \
+                 "vnd.openxmlformats-officedocument.wordprocessingml.document" \
+                 or part.get_content_subtype() == "msword":
+                mysubprocess.call(["unoconv", "-f", "html",
+                                   "-T", UNOCONV_STARTUP_TIME,
+                                   "-o", htmlfilename, partfile])
+
+                htmlfilename = os.path.join(fileparts[0] + ".html")
+                htmlfiles.append(htmlfilename)
+
+            # unoconv conversions from powerpoint to HTML drop all images.
+            # Try converting to PDF instead:
+            elif part.get_content_subtype() == "vnd.ms-powerpoint" \
+                 or part.get_content_subtype() == \
+                    "vnd.openxmlformats-officedocument.presentationml.presentation" :
+                pdffile = fileparts[0] + ".pdf"
+                mysubprocess.call(["unoconv", "-f", "pdf",
+                                   "-o", pdffile, partfile])
+                htmlfiles.append(pdffile)
+
+            elif part.get_content_subtype() == "pdf":
+                if CONVERT_PDF_TO_HTML:
+                    mysubprocess.call(["pdftohtml", "-s", partfile])
+
+                    # But pdftohtml is idiotic about output filename
+                    # and won't let you override it:
+                    htmlfiles.append(fileparts[0] + "-html.html")
+                else:
+                    htmlfiles.append(partfile)
+
+        elif part.get_content_maintype() == "text" and \
+             part.get_content_subtype() == 'html':
+
+            htmlfile = tmp_file_name(part)
+
+            fp = open(htmlfile, 'wb')
+            htmlsrc = part.get_payload(decode=True)
+
+            soup = BeautifulSoup(htmlsrc, "lxml")
+
+            # Substitute filenames for CIDs:
+            for tag in soup.body.find_all("img", src=True):
+                if tag['src'].lower().startswith("cid:"):
+                    for sf in subfiles:
+                        if tag['src'][4:] == sf['Content-Id']:
+                            tag['src'] = "file://" + sf['filename']
+            # for sf in subfiles:
+            #     htmlsrc = re.sub('cid: ?' + sf['Content-Id'],
+            #                      'file://' + sf['filename'],
+            #                      htmlsrc, flags=re.IGNORECASE)
+
+            # If it's HTML, we may need to add a meta charset tag. Sigh.
+            # If it's text/plain, there's nothing we can do to fix charset.
+            charset = part.get_charset()
+            if not charset:
+                charset = "UTF-8"
+            head = soup.find("head")
+            if not head:
+                head = soup.new_tag("head")
+                html = soup.find("html")
+                if html:
+                    html.insert(0, head)
+                else:
+                    soup.insert(0, head)
+
+            if not head.findAll("meta", attrs={"http-equiv": "encoding"}) and \
+               not head.findAll("meta", attrs={"http-equiv": "content-type"}):
+                meta = soup.new_tag("meta")
+                meta["content"] = charset
+                meta["http-equiv"] = "encoding"
+                head.insert(0, meta)
+                meta = soup.new_tag("meta")
+                meta["http-equiv"] = "content-type"
+                meta["content"] = "text/html; charset=%s" % charset
+                head.insert(0, meta)
+
+            fp.write(soup.prettify().encode("utf-8", "xmlcharrefreplace"))
+            fp.close()
+
+            htmlfiles.append(htmlfile)
+        elif part.get_content_maintype() == "image" and part not in subparts:
+            partfile = save_tmp_file(part)
+            htmlfiles.append(partfile)
+
+    # Done processing attachments. Call the browser for everything.
+    if htmlfiles:
+        # For the first URL, just put a redirect in
+        write_to_index(pleasewait_file,
+                       "Redirecting to file://" + htmlfiles[0],
+                       0, "file://" + htmlfiles[0])
+
+        for f in htmlfiles[1:]:
+            # If we don't wait for the new window to pop up before
+            # calling new-tab, bad things will happen: the document
+            # may load in a new tab in the old window and THEN pop up
+            # an unwanted third window. Go firefox.
+            # Not clear whether this is true for all browsers.
+            time.sleep(1)
+            if BROWSER_ARGS:
+                mysubprocess.call([BROWSER] + BROWSER_ARGS + ["file://" + f])
+            else:
+                mysubprocess.call([BROWSER, "file://" + f])
+
+    # Wait a while to make sure the browser has loads the imgaes, then clean up.
+    time.sleep(6)
+    shutil.rmtree(tmpdir)
+
+if __name__ == '__main__':
+    tmpdir = tempfile.mkdtemp(dir=TMPDIR)
+
+    if len(sys.argv) > 1:
+        for f in sys.argv[1:]:
+            fp = open(f)
+            view_message_attachments(fp, tmpdir)
+            fp.close()
+    else:
+        view_message_attachments(sys.stdin, tmpdir)
author	martin f. krafft <madduck@madduck.net>
	Sun, 12 Aug 2018 01:17:17 +0000 (13:17 +1200)
committer	martin f. krafft <madduck@madduck.net>
	Sun, 12 Aug 2018 01:17:17 +0000 (13:17 +1200)
.mutt/keybindings		patch \| blob \| history
.mutt/viewmailattachments	[new file with mode: 0755]	patch \| blob