X-Git-Url: https://git.madduck.net/etc/mutt.git/blobdiff_plain/ab4a7bac0c452d69330b94acb6871f37a952217f..71e4ef2b822967ea63b9885edb35e39c4cf18a06:/.mutt/viewmailattachments diff --git a/.mutt/viewmailattachments b/.mutt/viewmailattachments new file mode 100755 index 0000000..9ab3650 --- /dev/null +++ b/.mutt/viewmailattachments @@ -0,0 +1,381 @@ +#! /usr/bin/env python + +# Source: https://raw.githubusercontent.com/akkana/scripts/master/viewmailattachments + +# Take an mbox HTML message (e.g. from mutt), split it +# and rewrite it so all of its attachments can be viewed in a browser +# (perhaps after being converted to HTML from DOC or whatever first). +# +# Can be run from within a mailer like mutt, or independently +# on a single message file. +# +# Grew out of a simpler script called viewhtmlmail. +# +# Copyright 2015 by Akkana Peck. Share and enjoy under the GPL v2 or later. +# Changes: +# Holger Klawitter 2014: create a secure temp file and avoid temp mbox + +# To use it from mutt, put the following lines in your .muttrc: +# macro index "~/bin/viewmailattachments\n" "View attachments in browser" +# macro pager "~/bin/viewmailattachments\n" "View attachments in browser" + +import os, sys +import re +import time +import shutil +import email, email.header, mimetypes +import tempfile +import subprocess +from bs4 import BeautifulSoup + +################################################ +# Some prefs: +USE_WVHTML_FOR_DOC = False +BROWSER_ARGS = [] +TMPDIR=os.path.join(os.getenv('TMPDIR'), 'volatile') + +# How many seconds do we need to wait for unoconv? +# It defaults to 6, but on a 64-bit machine that's not enough. +UNOCONV_STARTUP_TIME = "10" + +# Does the browser need a one-time argument for bringing up an initial window, +# like Firefox's -private-window -new-instance ? +BROWSER_FIRST_ARG = [] + +# What browser to use: +USE_QUICKBROWSE = False + +if USE_QUICKBROWSE: + BROWSER = "quickbrowse" + + # Browser argument to precede new tabs: + BROWSER_FIRST_ARGS = [] + BROWSER_ARGS = [ "--new-tab" ] + + # Will the browser block when first run until its window is closed? + # If so, we have to run it in the background. + BROWSER_BACKGROUND = False + + # Should we convert PDF to HTML? Depends on BROWSER: + # Firefox has a built-in PDF viewer, but quickbrowse doesn't. + CONVERT_PDF_TO_HTML = False + +else: # Firefox in private browsing mode + BROWSER = "firefox" + + # Not clear what to do here: Firefox has a built-in PDF viewer, + # but for some mime types it can't figure out that it should use it. + BROWSER_FIRST_ARGS = [ "-private-window" ] + BROWSER_ARGS = [ "-new-tab", "-private-window" ] + # Firefox doesn't run in the background. + BROWSER_BACKGROUND = True + + CONVERT_PDF_TO_HTML = False + +# End global prefs +################################################ + +# Temporary for debugging: +class mysubprocess: + @staticmethod + def call(arr): + print("\n\n================\n=== Calling: %s" % str(arr)) + subprocess.call(arr) + + @staticmethod + def call_bg(arr): + print("\n\n================\n=== Calling in background: %s" % str(arr)) + subprocess.Popen(arr, shell=False, + stdin=None, stdout=None, stderr=None) + +def view_message_attachments(fp, tmpdir): + '''View message attachments coming from the file-like object fp. + ''' + + msg = email.message_from_string(fp.read()) + + html_part = None + counter = 1 + subfiles = [] + subparts = [] + htmlfiles = [] + htmlparts = [] + + def tmp_file_name(part): + partfile=part.get_filename() + if partfile: + n, enc = email.header.decode_header(partfile)[0] + if n: + partfile = n.decode(enc) if enc else n + + # Applications should really sanitize the given filename so that an + # email message can't be used to overwrite important files. + # As a first step, warn about ../ + if partfile and '../' in partfile: + print("Eek! Possible security problem in filename %s" % partfile) + return None + + # Make a filename in the tmp dir: + if not partfile: + ext = mimetypes.guess_extension(part.get_content_type()) + if not ext: + # Use a generic bag-of-bits extension + ext = '.bin' + return tempfile.mkstemp(dir=tmpdir, suffix=ext, prefix='part-')[1] + else: + return os.path.join(tmpdir, partfile) + + def save_tmp_file(part): + '''Saves this part's payload to a tmp file, returning the new filename. + ''' + partfile = tmp_file_name(part) + + tmpfile = open(partfile, "wb") + tmpfile.write(part.get_payload(decode=True)) + tmpfile.close() + return partfile + + # Walk through the message a first, preliminary time + # to separate out any images that might be referred to by + # an HTML part. + for part in msg.walk(): + # walk() includes the top-level message + if part == msg: + # print " Skipping the top-level message" + continue + + if part.get_content_type() != "multipart/related": + continue + + # It's multipart. Walk the subtree looking for image children. + for child in part.walk(): + # print " ", child.get_content_type() + + # At least for now, only save images as parts of multipart. + if child.get_content_maintype() != "image": + continue + + filename = save_tmp_file(child) + # print " Saved to", filename + + # Rewrite image and other inline URLs in terms of content-id. + # Mailers may use Content-Id or Content-ID (or, presumably, + # other capitalizations). So we can't just look it up simply. + content_id = None + for k in list(child.keys()): + if k.lower() == 'content-id': + # Remove angle brackets, if present. + # child['Content-Id'] is unmutable: attempts to change it + # are just ignored. Copy it to a local mutable string. + content_id = child[k] + if content_id.startswith('<') and \ + content_id.endswith('>'): + content_id = content_id[1:-1] + + subfiles.append({ 'filename': filename, + 'Content-Id': content_id }) + subparts.append(child) + counter += 1 + fp = open(filename, 'wb') + fp.write(child.get_payload(decode=True)) + fp.close() + break # no need to look at other keys + + # if not content_id: + # print filename, "doesn't have a Content-Id, not saving" + # # print "keys:", child.keys() + + # print "Subfiles:" + # for sf in subfiles: + # print sf + + # Call up the browser window right away, + # so the user can see something is happening. + # Firefox, alas, has no way from the commandline of calling up + # a new private window with content, then replacing that content. + # So we'll create a file that refreshes, so that when content is ready, + # it can redirect to the first content page. + def write_to_index(outfile, msg, timeout_secs, redirect_url): + if not redirect_url: + redirect_url = "file://" + outfile + ofp = open(outfile, "w") + ofp.write(''' + + + + +

%s + +''' % (timeout_secs, redirect_url, msg)) + ofp.close() + + redirect_timeout = 3 + pleasewait_file = tmpdir + "/index.html" + write_to_index(pleasewait_file, "Please wait ...", redirect_timeout, None) + + cmd = [ BROWSER ] + if BROWSER_FIRST_ARGS: + cmd += BROWSER_FIRST_ARGS + + cmd.append("file://" + pleasewait_file) + print("Calling: %s" % ' '.join(cmd)) + if BROWSER_BACKGROUND: + mysubprocess.call_bg(cmd) + else: + mysubprocess.call(cmd) + + # "data:text/html,

Translating documents, please wait ..." + # Use JS if we can figure out how to close or replace + # the "please wait" tab once we have content to show. + # But for now, setTimeout() doesn't work at all + # in newly popped up private windows. + # "javascript:document.writeln('

Translating documents, please wait ...');setTimeout(function(){alert('hi');}, 500);" + # ]) + + # Now walk through looking for the real parts: + # HTML, doc and docx. + for part in msg.walk(): + + # part has, for example: + # items: [('Content-Type', 'image/jpeg'), + # ('Content-Transfer-Encoding', 'base64'), + # ('Content-ID', '<14.3631871432@web82503.mail.mud.yahoo.com>'), + # ('Content-Disposition', + # 'attachment; filename="ATT0001414.jpg"')] + # keys: ['Content-Type', 'Content-Transfer-Encoding', + # 'Content-ID', 'Content-Disposition'] + # values: ['image/jpeg', 'base64', + # '<14.3631871432@web82503.mail.mud.yahoo.com>', + # 'attachment; filename="ATT0001414.jpg"'] + + # multipart/* are just containers + #if part.get_content_maintype() == 'multipart': + if part.is_multipart() or part.get_content_type == 'message/rfc822': + continue + + if part.get_content_maintype() == "application": + partfile = save_tmp_file(part) + fileparts = os.path.splitext(partfile) + htmlfilename = fileparts[0] + ".html" + + if part.get_content_subtype() == "msword" and USE_WVHTML_FOR_DOC: + mysubprocess.call(["wvHtml", partfile, htmlfilename]) + htmlfiles.append(htmlfilename) + + elif part.get_content_subtype() == \ + "vnd.openxmlformats-officedocument.wordprocessingml.document" \ + or part.get_content_subtype() == "msword": + mysubprocess.call(["unoconv", "-f", "html", + "-T", UNOCONV_STARTUP_TIME, + "-o", htmlfilename, partfile]) + + htmlfilename = os.path.join(fileparts[0] + ".html") + htmlfiles.append(htmlfilename) + + # unoconv conversions from powerpoint to HTML drop all images. + # Try converting to PDF instead: + elif part.get_content_subtype() == "vnd.ms-powerpoint" \ + or part.get_content_subtype() == \ + "vnd.openxmlformats-officedocument.presentationml.presentation" : + pdffile = fileparts[0] + ".pdf" + mysubprocess.call(["unoconv", "-f", "pdf", + "-o", pdffile, partfile]) + htmlfiles.append(pdffile) + + elif part.get_content_subtype() == "pdf": + if CONVERT_PDF_TO_HTML: + mysubprocess.call(["pdftohtml", "-s", partfile]) + + # But pdftohtml is idiotic about output filename + # and won't let you override it: + htmlfiles.append(fileparts[0] + "-html.html") + else: + htmlfiles.append(partfile) + + elif part.get_content_maintype() == "text" and \ + part.get_content_subtype() == 'html': + + htmlfile = tmp_file_name(part) + + fp = open(htmlfile, 'wb') + htmlsrc = part.get_payload(decode=True) + + soup = BeautifulSoup(htmlsrc, "lxml") + + # Substitute filenames for CIDs: + for tag in soup.body.find_all("img", src=True): + if tag['src'].lower().startswith("cid:"): + for sf in subfiles: + if tag['src'][4:] == sf['Content-Id']: + tag['src'] = "file://" + sf['filename'] + # for sf in subfiles: + # htmlsrc = re.sub('cid: ?' + sf['Content-Id'], + # 'file://' + sf['filename'], + # htmlsrc, flags=re.IGNORECASE) + + # If it's HTML, we may need to add a meta charset tag. Sigh. + # If it's text/plain, there's nothing we can do to fix charset. + charset = part.get_charset() + if not charset: + charset = "UTF-8" + head = soup.find("head") + if not head: + head = soup.new_tag("head") + html = soup.find("html") + if html: + html.insert(0, head) + else: + soup.insert(0, head) + + if not head.findAll("meta", attrs={"http-equiv": "encoding"}) and \ + not head.findAll("meta", attrs={"http-equiv": "content-type"}): + meta = soup.new_tag("meta") + meta["content"] = charset + meta["http-equiv"] = "encoding" + head.insert(0, meta) + meta = soup.new_tag("meta") + meta["http-equiv"] = "content-type" + meta["content"] = "text/html; charset=%s" % charset + head.insert(0, meta) + + fp.write(soup.prettify().encode("utf-8", "xmlcharrefreplace")) + fp.close() + + htmlfiles.append(htmlfile) + elif part.get_content_maintype() == "image" and part not in subparts: + partfile = save_tmp_file(part) + htmlfiles.append(partfile) + + # Done processing attachments. Call the browser for everything. + if htmlfiles: + # For the first URL, just put a redirect in + write_to_index(pleasewait_file, + "Redirecting to file://" + htmlfiles[0], + 0, "file://" + htmlfiles[0]) + + for f in htmlfiles[1:]: + # If we don't wait for the new window to pop up before + # calling new-tab, bad things will happen: the document + # may load in a new tab in the old window and THEN pop up + # an unwanted third window. Go firefox. + # Not clear whether this is true for all browsers. + time.sleep(1) + if BROWSER_ARGS: + mysubprocess.call([BROWSER] + BROWSER_ARGS + ["file://" + f]) + else: + mysubprocess.call([BROWSER, "file://" + f]) + + # Wait a while to make sure the browser has loads the imgaes, then clean up. + time.sleep(6) + shutil.rmtree(tmpdir) + +if name == 'main': + tmpdir = tempfile.mkdtemp(dir=TMPDIR) + + if len(sys.argv) > 1: + for f in sys.argv[1:]: + fp = open(f) + view_message_attachments(fp, tmpdir) + fp.close() + else: + view_message_attachments(sys.stdin, tmpdir)