]> git.madduck.net Git - etc/mutt.git/commitdiff

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

import viewmailattachments script from alana
authormartin f. krafft <madduck@madduck.net>
Sun, 12 Aug 2018 01:17:17 +0000 (13:17 +1200)
committermartin f. krafft <madduck@madduck.net>
Sun, 12 Aug 2018 01:17:17 +0000 (13:17 +1200)
.mutt/keybindings
.mutt/viewmailattachments [new file with mode: 0755]

index a23c46f756aea9cd18b7745f48b5a17d19f18865..3a76beee37b02255bf03dcf44a7a59fe5327bc51 100644 (file)
@@ -87,4 +87,6 @@ macro attach  e       '<enter-command>set my_mailcap_path="$mailcap_path"<enter><enter-
 
 macro index,pager      M       '<enter-command>set my_weed="$weed"<enter><enter-command>set noweed<enter><pipe-message>formail -czx X-RSS-URL: | xargs sensible-browser<enter><enter-command>set weed="$my_weed"<enter>'
 
 
 macro index,pager      M       '<enter-command>set my_weed="$weed"<enter><enter-command>set noweed<enter><pipe-message>formail -czx X-RSS-URL: | xargs sensible-browser<enter><enter-command>set weed="$my_weed"<enter>'
 
+macro index,pager      <F10>   '<enter-command>set my_pipe_decode="$pipe_decode"<enter><enter-command>unset pipe_decode<enter><pipe-message>~/.mutt/viewmailattachments<enter><enter-command>set pipe_decode="$my_pipe_decode"<enter>' "view attachments in browser"
+
 # vim:sw=12:noet:sts=12:ts=12:ft=muttrc
 # vim:sw=12:noet:sts=12:ts=12:ft=muttrc
diff --git a/.mutt/viewmailattachments b/.mutt/viewmailattachments
new file mode 100755 (executable)
index 0000000..9ab3650
--- /dev/null
@@ -0,0 +1,381 @@
+#! /usr/bin/env python
+
+# Source: https://raw.githubusercontent.com/akkana/scripts/master/viewmailattachments
+
+# Take an mbox HTML message (e.g. from mutt), split it
+# and rewrite it so all of its attachments can be viewed in a browser
+# (perhaps after being converted to HTML from DOC or whatever first).
+#
+# Can be run from within a mailer like mutt, or independently
+# on a single message file.
+#
+# Grew out of a simpler script called viewhtmlmail.
+#
+# Copyright 2015 by Akkana Peck. Share and enjoy under the GPL v2 or later.
+# Changes:
+#   Holger Klawitter 2014: create a secure temp file and avoid temp mbox
+
+# To use it from mutt, put the following lines in your .muttrc:
+# macro  index  <F10>  "<pipe-message>~/bin/viewmailattachments\n" "View attachments in browser"
+# macro  pager  <F10>  "<pipe-message>~/bin/viewmailattachments\n" "View attachments in browser"
+
+import os, sys
+import re
+import time
+import shutil
+import email, email.header, mimetypes
+import tempfile
+import subprocess
+from bs4 import BeautifulSoup
+
+################################################
+# Some prefs:
+USE_WVHTML_FOR_DOC = False
+BROWSER_ARGS = []
+TMPDIR=os.path.join(os.getenv('TMPDIR'), 'volatile')
+
+# How many seconds do we need to wait for unoconv?
+# It defaults to 6, but on a 64-bit machine that's not enough.
+UNOCONV_STARTUP_TIME = "10"
+
+# Does the browser need a one-time argument for bringing up an initial window,
+# like Firefox's -private-window -new-instance ?
+BROWSER_FIRST_ARG = []
+
+# What browser to use:
+USE_QUICKBROWSE = False
+
+if USE_QUICKBROWSE:
+    BROWSER = "quickbrowse"
+
+    # Browser argument to precede new tabs:
+    BROWSER_FIRST_ARGS = []
+    BROWSER_ARGS = [ "--new-tab" ]
+
+    # Will the browser block when first run until its window is closed?
+    # If so, we have to run it in the background.
+    BROWSER_BACKGROUND = False
+
+    # Should we convert PDF to HTML? Depends on BROWSER:
+    # Firefox has a built-in PDF viewer, but quickbrowse doesn't.
+    CONVERT_PDF_TO_HTML = False
+
+else:    # Firefox in private browsing mode
+    BROWSER = "firefox"
+
+    # Not clear what to do here: Firefox has a built-in PDF viewer,
+    # but for some mime types it can't figure out that it should use it.
+    BROWSER_FIRST_ARGS = [ "-private-window" ]
+    BROWSER_ARGS = [ "-new-tab", "-private-window" ]
+    # Firefox doesn't run in the background.
+    BROWSER_BACKGROUND = True
+
+    CONVERT_PDF_TO_HTML = False
+
+# End global prefs
+################################################
+
+# Temporary for debugging:
+class mysubprocess:
+    @staticmethod
+    def call(arr):
+        print("\n\n================\n=== Calling: %s" % str(arr))
+        subprocess.call(arr)
+
+    @staticmethod
+    def call_bg(arr):
+        print("\n\n================\n=== Calling in background: %s" % str(arr))
+        subprocess.Popen(arr, shell=False,
+                         stdin=None, stdout=None, stderr=None)
+
+def view_message_attachments(fp, tmpdir):
+    '''View message attachments coming from the file-like object fp.
+    '''
+
+    msg = email.message_from_string(fp.read())
+
+    html_part = None
+    counter = 1
+    subfiles = []
+    subparts = []
+    htmlfiles = []
+    htmlparts = []
+
+    def tmp_file_name(part):
+        partfile=part.get_filename()
+        if partfile:
+            n, enc = email.header.decode_header(partfile)[0]
+            if n:
+                partfile = n.decode(enc) if enc else n
+
+        # Applications should really sanitize the given filename so that an
+        # email message can't be used to overwrite important files.
+        # As a first step, warn about ../
+        if partfile and '../' in partfile:
+            print("Eek! Possible security problem in filename %s" % partfile)
+            return None
+
+        # Make a filename in the tmp dir:
+        if not partfile:
+            ext = mimetypes.guess_extension(part.get_content_type())
+            if not ext:
+                # Use a generic bag-of-bits extension
+                ext = '.bin'
+            return tempfile.mkstemp(dir=tmpdir, suffix=ext, prefix='part-')[1]
+        else:
+            return os.path.join(tmpdir, partfile)
+
+    def save_tmp_file(part):
+        '''Saves this part's payload to a tmp file, returning the new filename.
+        '''
+        partfile = tmp_file_name(part)
+
+        tmpfile = open(partfile, "wb")
+        tmpfile.write(part.get_payload(decode=True))
+        tmpfile.close()
+        return partfile
+
+    # Walk through the message a first, preliminary time
+    # to separate out any images that might be referred to by
+    # an HTML part.
+    for part in msg.walk():
+        # walk() includes the top-level message
+        if part == msg:
+            # print "  Skipping the top-level message"
+            continue
+
+        if part.get_content_type() != "multipart/related":
+            continue
+
+        # It's multipart. Walk the subtree looking for image children.
+        for child in part.walk():
+            # print " ", child.get_content_type()
+
+            # At least for now, only save images as parts of multipart.
+            if child.get_content_maintype() != "image":
+                continue
+
+            filename = save_tmp_file(child)
+            # print "    Saved to", filename
+
+            # Rewrite image and other inline URLs in terms of content-id.
+            # Mailers may use Content-Id or Content-ID (or, presumably,
+            # other capitalizations). So we can't just look it up simply.
+            content_id = None
+            for k in list(child.keys()):
+                if k.lower() == 'content-id':
+                    # Remove angle brackets, if present.
+                    # child['Content-Id'] is unmutable: attempts to change it
+                    # are just ignored. Copy it to a local mutable string.
+                    content_id = child[k]
+                    if content_id.startswith('<') and \
+                       content_id.endswith('>'):
+                        content_id = content_id[1:-1]
+
+                    subfiles.append({ 'filename': filename,
+                                      'Content-Id': content_id })
+                    subparts.append(child)
+                    counter += 1
+                    fp = open(filename, 'wb')
+                    fp.write(child.get_payload(decode=True))
+                    fp.close()
+                    break     # no need to look at other keys
+
+            # if not content_id:
+            #     print filename, "doesn't have a Content-Id, not saving"
+            #     # print "keys:", child.keys()
+
+    # print "Subfiles:"
+    # for sf in subfiles:
+    #     print sf
+
+    # Call up the browser window right away,
+    # so the user can see something is happening.
+    # Firefox, alas, has no way from the commandline of calling up
+    # a new private window with content, then replacing that content.
+    # So we'll create a file that refreshes, so that when content is ready,
+    # it can redirect to the first content page.
+    def write_to_index(outfile, msg, timeout_secs, redirect_url):
+        if not redirect_url:
+            redirect_url = "file://" + outfile
+        ofp = open(outfile, "w")
+        ofp.write('''<html><head>
+<meta content="utf-8" http-equiv="encoding">
+<meta http-equiv="content-type" content="text/html; charset=UTF-8">
+<meta http-equiv="refresh" content="%d;URL=%s">
+</head><body>
+<br><br><br><br><br><br><big><big>%s</big></big>
+</body></html>
+''' % (timeout_secs, redirect_url, msg))
+        ofp.close()
+
+    redirect_timeout = 3
+    pleasewait_file = tmpdir + "/index.html"
+    write_to_index(pleasewait_file, "Please wait ...", redirect_timeout, None)
+
+    cmd = [ BROWSER ]
+    if BROWSER_FIRST_ARGS:
+        cmd += BROWSER_FIRST_ARGS
+
+    cmd.append("file://" + pleasewait_file)
+    print("Calling: %s" % ' '.join(cmd))
+    if BROWSER_BACKGROUND:
+        mysubprocess.call_bg(cmd)
+    else:
+        mysubprocess.call(cmd)
+
+                       # "data:text/html,<br><br><br><br><br><h1>Translating documents, please wait ..."
+                       # Use JS if we can figure out how to close or replace
+                       # the "please wait" tab once we have content to show.
+                       # But for now, setTimeout() doesn't work at all
+                       # in newly popped up private windows.
+                       # "javascript:document.writeln('<br><br><br><br><br><h1>Translating documents, please wait ...');setTimeout(function(){alert('hi');}, 500);"
+                     # ])
+
+    # Now walk through looking for the real parts:
+    # HTML, doc and docx.
+    for part in msg.walk():
+
+        # part has, for example:
+        # items: [('Content-Type', 'image/jpeg'),
+        #         ('Content-Transfer-Encoding', 'base64'),
+        #         ('Content-ID', '<14.3631871432@web82503.mail.mud.yahoo.com>'),
+        #         ('Content-Disposition',
+        #          'attachment; filename="ATT0001414.jpg"')]
+        # keys: ['Content-Type', 'Content-Transfer-Encoding',
+        #        'Content-ID', 'Content-Disposition']
+        # values: ['image/jpeg', 'base64',
+        #          '<14.3631871432@web82503.mail.mud.yahoo.com>',
+        # 'attachment; filename="ATT0001414.jpg"']
+
+        # multipart/* are just containers
+        #if part.get_content_maintype() == 'multipart':
+        if part.is_multipart() or part.get_content_type == 'message/rfc822':
+            continue
+
+        if part.get_content_maintype() == "application":
+            partfile = save_tmp_file(part)
+            fileparts = os.path.splitext(partfile)
+            htmlfilename = fileparts[0] + ".html"
+
+            if part.get_content_subtype() == "msword" and USE_WVHTML_FOR_DOC:
+                mysubprocess.call(["wvHtml", partfile, htmlfilename])
+                htmlfiles.append(htmlfilename)
+
+            elif part.get_content_subtype() == \
+                 "vnd.openxmlformats-officedocument.wordprocessingml.document" \
+                 or part.get_content_subtype() == "msword":
+                mysubprocess.call(["unoconv", "-f", "html",
+                                   "-T", UNOCONV_STARTUP_TIME,
+                                   "-o", htmlfilename, partfile])
+
+                htmlfilename = os.path.join(fileparts[0] + ".html")
+                htmlfiles.append(htmlfilename)
+
+            # unoconv conversions from powerpoint to HTML drop all images.
+            # Try converting to PDF instead:
+            elif part.get_content_subtype() == "vnd.ms-powerpoint" \
+                 or part.get_content_subtype() == \
+                    "vnd.openxmlformats-officedocument.presentationml.presentation" :
+                pdffile = fileparts[0] + ".pdf"
+                mysubprocess.call(["unoconv", "-f", "pdf",
+                                   "-o", pdffile, partfile])
+                htmlfiles.append(pdffile)
+
+            elif part.get_content_subtype() == "pdf":
+                if CONVERT_PDF_TO_HTML:
+                    mysubprocess.call(["pdftohtml", "-s", partfile])
+
+                    # But pdftohtml is idiotic about output filename
+                    # and won't let you override it:
+                    htmlfiles.append(fileparts[0] + "-html.html")
+                else:
+                    htmlfiles.append(partfile)
+
+        elif part.get_content_maintype() == "text" and \
+             part.get_content_subtype() == 'html':
+
+            htmlfile = tmp_file_name(part)
+
+            fp = open(htmlfile, 'wb')
+            htmlsrc = part.get_payload(decode=True)
+
+            soup = BeautifulSoup(htmlsrc, "lxml")
+
+            # Substitute filenames for CIDs:
+            for tag in soup.body.find_all("img", src=True):
+                if tag['src'].lower().startswith("cid:"):
+                    for sf in subfiles:
+                        if tag['src'][4:] == sf['Content-Id']:
+                            tag['src'] = "file://" + sf['filename']
+            # for sf in subfiles:
+            #     htmlsrc = re.sub('cid: ?' + sf['Content-Id'],
+            #                      'file://' + sf['filename'],
+            #                      htmlsrc, flags=re.IGNORECASE)
+
+            # If it's HTML, we may need to add a meta charset tag. Sigh.
+            # If it's text/plain, there's nothing we can do to fix charset.
+            charset = part.get_charset()
+            if not charset:
+                charset = "UTF-8"
+            head = soup.find("head")
+            if not head:
+                head = soup.new_tag("head")
+                html = soup.find("html")
+                if html:
+                    html.insert(0, head)
+                else:
+                    soup.insert(0, head)
+
+            if not head.findAll("meta", attrs={"http-equiv": "encoding"}) and \
+               not head.findAll("meta", attrs={"http-equiv": "content-type"}):
+                meta = soup.new_tag("meta")
+                meta["content"] = charset
+                meta["http-equiv"] = "encoding"
+                head.insert(0, meta)
+                meta = soup.new_tag("meta")
+                meta["http-equiv"] = "content-type"
+                meta["content"] = "text/html; charset=%s" % charset
+                head.insert(0, meta)
+
+            fp.write(soup.prettify().encode("utf-8", "xmlcharrefreplace"))
+            fp.close()
+
+            htmlfiles.append(htmlfile)
+        elif part.get_content_maintype() == "image" and part not in subparts:
+            partfile = save_tmp_file(part)
+            htmlfiles.append(partfile)
+
+    # Done processing attachments. Call the browser for everything.
+    if htmlfiles:
+        # For the first URL, just put a redirect in
+        write_to_index(pleasewait_file,
+                       "Redirecting to file://" + htmlfiles[0],
+                       0, "file://" + htmlfiles[0])
+
+        for f in htmlfiles[1:]:
+            # If we don't wait for the new window to pop up before
+            # calling new-tab, bad things will happen: the document
+            # may load in a new tab in the old window and THEN pop up
+            # an unwanted third window. Go firefox.
+            # Not clear whether this is true for all browsers.
+            time.sleep(1)
+            if BROWSER_ARGS:
+                mysubprocess.call([BROWSER] + BROWSER_ARGS + ["file://" + f])
+            else:
+                mysubprocess.call([BROWSER, "file://" + f])
+
+    # Wait a while to make sure the browser has loads the imgaes, then clean up.
+    time.sleep(6)
+    shutil.rmtree(tmpdir)
+
+if __name__ == '__main__':
+    tmpdir = tempfile.mkdtemp(dir=TMPDIR)
+
+    if len(sys.argv) > 1:
+        for f in sys.argv[1:]:
+            fp = open(f)
+            view_message_attachments(fp, tmpdir)
+            fp.close()
+    else:
+        view_message_attachments(sys.stdin, tmpdir)