X-Git-Url: https://git.madduck.net/etc/mutt.git/blobdiff_plain/a83d50d5a64488deb3796bbd0ca0a03b0516cc77..7013dfe93a05082153f79eca6c9e5c05b54d9d50:/.mutt/viewmailattachments?ds=sidebyside diff --git a/.mutt/viewmailattachments b/.mutt/viewmailattachments deleted file mode 100755 index 1a119ca..0000000 --- a/.mutt/viewmailattachments +++ /dev/null @@ -1,381 +0,0 @@ -#! /usr/bin/python3 - -# Source: https://raw.githubusercontent.com/akkana/scripts/master/viewmailattachments - -# Take an mbox HTML message (e.g. from mutt), split it -# and rewrite it so all of its attachments can be viewed in a browser -# (perhaps after being converted to HTML from DOC or whatever first). -# -# Can be run from within a mailer like mutt, or independently -# on a single message file. -# -# Grew out of a simpler script called viewhtmlmail. -# -# Copyright 2015 by Akkana Peck. Share and enjoy under the GPL v2 or later. -# Changes: -# Holger Klawitter 2014: create a secure temp file and avoid temp mbox - -# To use it from mutt, put the following lines in your .muttrc: -# macro index "~/bin/viewmailattachments\n" "View attachments in browser" -# macro pager "~/bin/viewmailattachments\n" "View attachments in browser" - -import os, sys -import re -import time -import shutil -import email, email.header, mimetypes -import tempfile -import subprocess -from bs4 import BeautifulSoup - -################################################ -# Some prefs: -USE_WVHTML_FOR_DOC = False -BROWSER_ARGS = [] -TMPDIR=os.path.join(os.getenv('TMPDIR'), 'volatile') - -# How many seconds do we need to wait for unoconv? -# It defaults to 6, but on a 64-bit machine that's not enough. -UNOCONV_STARTUP_TIME = "10" - -# Does the browser need a one-time argument for bringing up an initial window, -# like Firefox's -private-window -new-instance ? -BROWSER_FIRST_ARG = [] - -# What browser to use: -USE_QUICKBROWSE = False - -if USE_QUICKBROWSE: - BROWSER = "quickbrowse" - - # Browser argument to precede new tabs: - BROWSER_FIRST_ARGS = [] - BROWSER_ARGS = [ "--new-tab" ] - - # Will the browser block when first run until its window is closed? - # If so, we have to run it in the background. - BROWSER_BACKGROUND = False - - # Should we convert PDF to HTML? Depends on BROWSER: - # Firefox has a built-in PDF viewer, but quickbrowse doesn't. - CONVERT_PDF_TO_HTML = False - -else: # Firefox in private browsing mode - BROWSER = "firefox" - - # Not clear what to do here: Firefox has a built-in PDF viewer, - # but for some mime types it can't figure out that it should use it. - BROWSER_FIRST_ARGS = [ "-private-window" ] - BROWSER_ARGS = [ "-new-tab", "-private-window" ] - # Firefox doesn't run in the background. - BROWSER_BACKGROUND = True - - CONVERT_PDF_TO_HTML = False - -# End global prefs -################################################ - -# Temporary for debugging: -class mysubprocess: - @staticmethod - def call(arr): - print("\n\n================\n=== Calling: %s" % str(arr)) - subprocess.call(arr) - - @staticmethod - def call_bg(arr): - print("\n\n================\n=== Calling in background: %s" % str(arr)) - subprocess.Popen(arr, shell=False, - stdin=None, stdout=None, stderr=None) - -def view_message_attachments(fp, tmpdir): - '''View message attachments coming from the file-like object fp. - ''' - - msg = email.message_from_string(fp.read()) - - html_part = None - counter = 1 - subfiles = [] - subparts = [] - htmlfiles = [] - htmlparts = [] - - def tmp_file_name(part): - partfile=part.get_filename() - if partfile: - n, enc = email.header.decode_header(partfile)[0] - if n: - partfile = n.decode(enc) if enc else n - - # Applications should really sanitize the given filename so that an - # email message can't be used to overwrite important files. - # As a first step, warn about ../ - if partfile and '../' in partfile: - print("Eek! Possible security problem in filename %s" % partfile) - return None - - # Make a filename in the tmp dir: - if not partfile: - ext = mimetypes.guess_extension(part.get_content_type()) - if not ext: - # Use a generic bag-of-bits extension - ext = '.bin' - return tempfile.mkstemp(dir=tmpdir, suffix=ext, prefix='part-')[1] - else: - return os.path.join(tmpdir, partfile) - - def save_tmp_file(part): - '''Saves this part's payload to a tmp file, returning the new filename. - ''' - partfile = tmp_file_name(part) - - tmpfile = open(partfile, "wb") - tmpfile.write(part.get_payload(decode=True)) - tmpfile.close() - return partfile - - # Walk through the message a first, preliminary time - # to separate out any images that might be referred to by - # an HTML part. - for part in msg.walk(): - # walk() includes the top-level message - if part == msg: - # print " Skipping the top-level message" - continue - - if part.get_content_type() != "multipart/related": - continue - - # It's multipart. Walk the subtree looking for image children. - for child in part.walk(): - # print " ", child.get_content_type() - - # At least for now, only save images as parts of multipart. - if child.get_content_maintype() != "image": - continue - - filename = save_tmp_file(child) - # print " Saved to", filename - - # Rewrite image and other inline URLs in terms of content-id. - # Mailers may use Content-Id or Content-ID (or, presumably, - # other capitalizations). So we can't just look it up simply. - content_id = None - for k in list(child.keys()): - if k.lower() == 'content-id': - # Remove angle brackets, if present. - # child['Content-Id'] is unmutable: attempts to change it - # are just ignored. Copy it to a local mutable string. - content_id = child[k] - if content_id.startswith('<') and \ - content_id.endswith('>'): - content_id = content_id[1:-1] - - subfiles.append({ 'filename': filename, - 'Content-Id': content_id }) - subparts.append(child) - counter += 1 - fp = open(filename, 'wb') - fp.write(child.get_payload(decode=True)) - fp.close() - break # no need to look at other keys - - # if not content_id: - # print filename, "doesn't have a Content-Id, not saving" - # # print "keys:", child.keys() - - # print "Subfiles:" - # for sf in subfiles: - # print sf - - # Call up the browser window right away, - # so the user can see something is happening. - # Firefox, alas, has no way from the commandline of calling up - # a new private window with content, then replacing that content. - # So we'll create a file that refreshes, so that when content is ready, - # it can redirect to the first content page. - def write_to_index(outfile, msg, timeout_secs, redirect_url): - if not redirect_url: - redirect_url = "file://" + outfile - ofp = open(outfile, "w") - ofp.write(''' - - - - -

%s - -''' % (timeout_secs, redirect_url, msg)) - ofp.close() - - redirect_timeout = 3 - pleasewait_file = tmpdir + "/index.html" - write_to_index(pleasewait_file, "Please wait ...", redirect_timeout, None) - - cmd = [ BROWSER ] - if BROWSER_FIRST_ARGS: - cmd += BROWSER_FIRST_ARGS - - cmd.append("file://" + pleasewait_file) - print("Calling: %s" % ' '.join(cmd)) - if BROWSER_BACKGROUND: - mysubprocess.call_bg(cmd) - else: - mysubprocess.call(cmd) - - # "data:text/html,

Translating documents, please wait ..." - # Use JS if we can figure out how to close or replace - # the "please wait" tab once we have content to show. - # But for now, setTimeout() doesn't work at all - # in newly popped up private windows. - # "javascript:document.writeln('

Translating documents, please wait ...');setTimeout(function(){alert('hi');}, 500);" - # ]) - - # Now walk through looking for the real parts: - # HTML, doc and docx. - for part in msg.walk(): - - # part has, for example: - # items: [('Content-Type', 'image/jpeg'), - # ('Content-Transfer-Encoding', 'base64'), - # ('Content-ID', '<14.3631871432@web82503.mail.mud.yahoo.com>'), - # ('Content-Disposition', - # 'attachment; filename="ATT0001414.jpg"')] - # keys: ['Content-Type', 'Content-Transfer-Encoding', - # 'Content-ID', 'Content-Disposition'] - # values: ['image/jpeg', 'base64', - # '<14.3631871432@web82503.mail.mud.yahoo.com>', - # 'attachment; filename="ATT0001414.jpg"'] - - # multipart/* are just containers - #if part.get_content_maintype() == 'multipart': - if part.is_multipart() or part.get_content_type == 'message/rfc822': - continue - - if part.get_content_maintype() == "application": - partfile = save_tmp_file(part) - fileparts = os.path.splitext(partfile) - htmlfilename = fileparts[0] + ".html" - - if part.get_content_subtype() == "msword" and USE_WVHTML_FOR_DOC: - mysubprocess.call(["wvHtml", partfile, htmlfilename]) - htmlfiles.append(htmlfilename) - - elif part.get_content_subtype() == \ - "vnd.openxmlformats-officedocument.wordprocessingml.document" \ - or part.get_content_subtype() == "msword": - mysubprocess.call(["unoconv", "-f", "html", - "-T", UNOCONV_STARTUP_TIME, - "-o", htmlfilename, partfile]) - - htmlfilename = os.path.join(fileparts[0] + ".html") - htmlfiles.append(htmlfilename) - - # unoconv conversions from powerpoint to HTML drop all images. - # Try converting to PDF instead: - elif part.get_content_subtype() == "vnd.ms-powerpoint" \ - or part.get_content_subtype() == \ - "vnd.openxmlformats-officedocument.presentationml.presentation" : - pdffile = fileparts[0] + ".pdf" - mysubprocess.call(["unoconv", "-f", "pdf", - "-o", pdffile, partfile]) - htmlfiles.append(pdffile) - - elif part.get_content_subtype() == "pdf": - if CONVERT_PDF_TO_HTML: - mysubprocess.call(["pdftohtml", "-s", partfile]) - - # But pdftohtml is idiotic about output filename - # and won't let you override it: - htmlfiles.append(fileparts[0] + "-html.html") - else: - htmlfiles.append(partfile) - - elif part.get_content_maintype() == "text" and \ - part.get_content_subtype() == 'html': - - htmlfile = tmp_file_name(part) - - fp = open(htmlfile, 'wb') - htmlsrc = part.get_payload(decode=True) - - soup = BeautifulSoup(htmlsrc, "lxml") - - # Substitute filenames for CIDs: - for tag in soup.body.find_all("img", src=True): - if tag['src'].lower().startswith("cid:"): - for sf in subfiles: - if tag['src'][4:] == sf['Content-Id']: - tag['src'] = "file://" + sf['filename'] - # for sf in subfiles: - # htmlsrc = re.sub('cid: ?' + sf['Content-Id'], - # 'file://' + sf['filename'], - # htmlsrc, flags=re.IGNORECASE) - - # If it's HTML, we may need to add a meta charset tag. Sigh. - # If it's text/plain, there's nothing we can do to fix charset. - charset = part.get_charset() - if not charset: - charset = "UTF-8" - head = soup.find("head") - if not head: - head = soup.new_tag("head") - html = soup.find("html") - if html: - html.insert(0, head) - else: - soup.insert(0, head) - - if not head.findAll("meta", attrs={"http-equiv": "encoding"}) and \ - not head.findAll("meta", attrs={"http-equiv": "content-type"}): - meta = soup.new_tag("meta") - meta["content"] = charset - meta["http-equiv"] = "encoding" - head.insert(0, meta) - meta = soup.new_tag("meta") - meta["http-equiv"] = "content-type" - meta["content"] = "text/html; charset=%s" % charset - head.insert(0, meta) - - fp.write(soup.prettify().encode("utf-8", "xmlcharrefreplace")) - fp.close() - - htmlfiles.append(htmlfile) - elif part.get_content_maintype() == "image" and part not in subparts: - partfile = save_tmp_file(part) - htmlfiles.append(partfile) - - # Done processing attachments. Call the browser for everything. - if htmlfiles: - # For the first URL, just put a redirect in - write_to_index(pleasewait_file, - "Redirecting to file://" + htmlfiles[0], - 0, "file://" + htmlfiles[0]) - - for f in htmlfiles[1:]: - # If we don't wait for the new window to pop up before - # calling new-tab, bad things will happen: the document - # may load in a new tab in the old window and THEN pop up - # an unwanted third window. Go firefox. - # Not clear whether this is true for all browsers. - time.sleep(1) - if BROWSER_ARGS: - mysubprocess.call([BROWSER] + BROWSER_ARGS + ["file://" + f]) - else: - mysubprocess.call([BROWSER, "file://" + f]) - - # Wait a while to make sure the browser has loads the imgaes, then clean up. - time.sleep(6) - shutil.rmtree(tmpdir) - -if name == 'main': - tmpdir = tempfile.mkdtemp(dir=TMPDIR) - - if len(sys.argv) > 1: - for f in sys.argv[1:]: - fp = open(f) - view_message_attachments(fp, tmpdir) - fp.close() - else: - view_message_attachments(sys.stdin, tmpdir)