+++ /dev/null
-#! /usr/bin/python3
-
-# Source: https://raw.githubusercontent.com/akkana/scripts/master/viewmailattachments
-
-# Take an mbox HTML message (e.g. from mutt), split it
-# and rewrite it so all of its attachments can be viewed in a browser
-# (perhaps after being converted to HTML from DOC or whatever first).
-#
-# Can be run from within a mailer like mutt, or independently
-# on a single message file.
-#
-# Grew out of a simpler script called viewhtmlmail.
-#
-# Copyright 2015 by Akkana Peck. Share and enjoy under the GPL v2 or later.
-# Changes:
-#   Holger Klawitter 2014: create a secure temp file and avoid temp mbox
-
-# To use it from mutt, put the following lines in your .muttrc:
-# macro  index  <F10>  "<pipe-message>~/bin/viewmailattachments\n" "View attachments in browser"
-# macro  pager  <F10>  "<pipe-message>~/bin/viewmailattachments\n" "View attachments in browser"
-
-import os, sys
-import re
-import time
-import shutil
-import email, email.header, mimetypes
-import tempfile
-import subprocess
-from bs4 import BeautifulSoup
-
-################################################
-# Some prefs:
-USE_WVHTML_FOR_DOC = False
-BROWSER_ARGS = []
-TMPDIR=os.path.join(os.getenv('TMPDIR'), 'volatile')
-
-# How many seconds do we need to wait for unoconv?
-# It defaults to 6, but on a 64-bit machine that's not enough.
-UNOCONV_STARTUP_TIME = "10"
-
-# Does the browser need a one-time argument for bringing up an initial window,
-# like Firefox's -private-window -new-instance ?
-BROWSER_FIRST_ARG = []
-
-# What browser to use:
-USE_QUICKBROWSE = False
-
-if USE_QUICKBROWSE:
-    BROWSER = "quickbrowse"
-
-    # Browser argument to precede new tabs:
-    BROWSER_FIRST_ARGS = []
-    BROWSER_ARGS = [ "--new-tab" ]
-
-    # Will the browser block when first run until its window is closed?
-    # If so, we have to run it in the background.
-    BROWSER_BACKGROUND = False
-
-    # Should we convert PDF to HTML? Depends on BROWSER:
-    # Firefox has a built-in PDF viewer, but quickbrowse doesn't.
-    CONVERT_PDF_TO_HTML = False
-
-else:    # Firefox in private browsing mode
-    BROWSER = "firefox"
-
-    # Not clear what to do here: Firefox has a built-in PDF viewer,
-    # but for some mime types it can't figure out that it should use it.
-    BROWSER_FIRST_ARGS = [ "-private-window" ]
-    BROWSER_ARGS = [ "-new-tab", "-private-window" ]
-    # Firefox doesn't run in the background.
-    BROWSER_BACKGROUND = True
-
-    CONVERT_PDF_TO_HTML = False
-
-# End global prefs
-################################################
-
-# Temporary for debugging:
-class mysubprocess:
-    @staticmethod
-    def call(arr):
-        print("\n\n================\n=== Calling: %s" % str(arr))
-        subprocess.call(arr)
-
-    @staticmethod
-    def call_bg(arr):
-        print("\n\n================\n=== Calling in background: %s" % str(arr))
-        subprocess.Popen(arr, shell=False,
-                         stdin=None, stdout=None, stderr=None)
-
-def view_message_attachments(fp, tmpdir):
-    '''View message attachments coming from the file-like object fp.
-    '''
-
-    msg = email.message_from_string(fp.read())
-
-    html_part = None
-    counter = 1
-    subfiles = []
-    subparts = []
-    htmlfiles = []
-    htmlparts = []
-
-    def tmp_file_name(part):
-        partfile=part.get_filename()
-        if partfile:
-            n, enc = email.header.decode_header(partfile)[0]
-            if n:
-                partfile = n.decode(enc) if enc else n
-
-        # Applications should really sanitize the given filename so that an
-        # email message can't be used to overwrite important files.
-        # As a first step, warn about ../
-        if partfile and '../' in partfile:
-            print("Eek! Possible security problem in filename %s" % partfile)
-            return None
-
-        # Make a filename in the tmp dir:
-        if not partfile:
-            ext = mimetypes.guess_extension(part.get_content_type())
-            if not ext:
-                # Use a generic bag-of-bits extension
-                ext = '.bin'
-            return tempfile.mkstemp(dir=tmpdir, suffix=ext, prefix='part-')[1]
-        else:
-            return os.path.join(tmpdir, partfile)
-
-    def save_tmp_file(part):
-        '''Saves this part's payload to a tmp file, returning the new filename.
-        '''
-        partfile = tmp_file_name(part)
-
-        tmpfile = open(partfile, "wb")
-        tmpfile.write(part.get_payload(decode=True))
-        tmpfile.close()
-        return partfile
-
-    # Walk through the message a first, preliminary time
-    # to separate out any images that might be referred to by
-    # an HTML part.
-    for part in msg.walk():
-        # walk() includes the top-level message
-        if part == msg:
-            # print "  Skipping the top-level message"
-            continue
-
-        if part.get_content_type() != "multipart/related":
-            continue
-
-        # It's multipart. Walk the subtree looking for image children.
-        for child in part.walk():
-            # print " ", child.get_content_type()
-
-            # At least for now, only save images as parts of multipart.
-            if child.get_content_maintype() != "image":
-                continue
-
-            filename = save_tmp_file(child)
-            # print "    Saved to", filename
-
-            # Rewrite image and other inline URLs in terms of content-id.
-            # Mailers may use Content-Id or Content-ID (or, presumably,
-            # other capitalizations). So we can't just look it up simply.
-            content_id = None
-            for k in list(child.keys()):
-                if k.lower() == 'content-id':
-                    # Remove angle brackets, if present.
-                    # child['Content-Id'] is unmutable: attempts to change it
-                    # are just ignored. Copy it to a local mutable string.
-                    content_id = child[k]
-                    if content_id.startswith('<') and \
-                       content_id.endswith('>'):
-                        content_id = content_id[1:-1]
-
-                    subfiles.append({ 'filename': filename,
-                                      'Content-Id': content_id })
-                    subparts.append(child)
-                    counter += 1
-                    fp = open(filename, 'wb')
-                    fp.write(child.get_payload(decode=True))
-                    fp.close()
-                    break     # no need to look at other keys
-
-            # if not content_id:
-            #     print filename, "doesn't have a Content-Id, not saving"
-            #     # print "keys:", child.keys()
-
-    # print "Subfiles:"
-    # for sf in subfiles:
-    #     print sf
-
-    # Call up the browser window right away,
-    # so the user can see something is happening.
-    # Firefox, alas, has no way from the commandline of calling up
-    # a new private window with content, then replacing that content.
-    # So we'll create a file that refreshes, so that when content is ready,
-    # it can redirect to the first content page.
-    def write_to_index(outfile, msg, timeout_secs, redirect_url):
-        if not redirect_url:
-            redirect_url = "file://" + outfile
-        ofp = open(outfile, "w")
-        ofp.write('''<html><head>
-<meta content="utf-8" http-equiv="encoding">
-<meta http-equiv="content-type" content="text/html; charset=UTF-8">
-<meta http-equiv="refresh" content="%d;URL=%s">
-</head><body>
-<br><br><br><br><br><br><big><big>%s</big></big>
-</body></html>
-''' % (timeout_secs, redirect_url, msg))
-        ofp.close()
-
-    redirect_timeout = 3
-    pleasewait_file = tmpdir + "/index.html"
-    write_to_index(pleasewait_file, "Please wait ...", redirect_timeout, None)
-
-    cmd = [ BROWSER ]
-    if BROWSER_FIRST_ARGS:
-        cmd += BROWSER_FIRST_ARGS
-
-    cmd.append("file://" + pleasewait_file)
-    print("Calling: %s" % ' '.join(cmd))
-    if BROWSER_BACKGROUND:
-        mysubprocess.call_bg(cmd)
-    else:
-        mysubprocess.call(cmd)
-
-                       # "data:text/html,<br><br><br><br><br><h1>Translating documents, please wait ..."
-                       # Use JS if we can figure out how to close or replace
-                       # the "please wait" tab once we have content to show.
-                       # But for now, setTimeout() doesn't work at all
-                       # in newly popped up private windows.
-                       # "javascript:document.writeln('<br><br><br><br><br><h1>Translating documents, please wait ...');setTimeout(function(){alert('hi');}, 500);"
-                     # ])
-
-    # Now walk through looking for the real parts:
-    # HTML, doc and docx.
-    for part in msg.walk():
-
-        # part has, for example:
-        # items: [('Content-Type', 'image/jpeg'),
-        #         ('Content-Transfer-Encoding', 'base64'),
-        #         ('Content-ID', '<14.3631871432@web82503.mail.mud.yahoo.com>'),
-        #         ('Content-Disposition',
-        #          'attachment; filename="ATT0001414.jpg"')]
-        # keys: ['Content-Type', 'Content-Transfer-Encoding',
-        #        'Content-ID', 'Content-Disposition']
-        # values: ['image/jpeg', 'base64',
-        #          '<14.3631871432@web82503.mail.mud.yahoo.com>',
-        # 'attachment; filename="ATT0001414.jpg"']
-
-        # multipart/* are just containers
-        #if part.get_content_maintype() == 'multipart':
-        if part.is_multipart() or part.get_content_type == 'message/rfc822':
-            continue
-
-        if part.get_content_maintype() == "application":
-            partfile = save_tmp_file(part)
-            fileparts = os.path.splitext(partfile)
-            htmlfilename = fileparts[0] + ".html"
-
-            if part.get_content_subtype() == "msword" and USE_WVHTML_FOR_DOC:
-                mysubprocess.call(["wvHtml", partfile, htmlfilename])
-                htmlfiles.append(htmlfilename)
-
-            elif part.get_content_subtype() == \
-                 "vnd.openxmlformats-officedocument.wordprocessingml.document" \
-                 or part.get_content_subtype() == "msword":
-                mysubprocess.call(["unoconv", "-f", "html",
-                                   "-T", UNOCONV_STARTUP_TIME,
-                                   "-o", htmlfilename, partfile])
-
-                htmlfilename = os.path.join(fileparts[0] + ".html")
-                htmlfiles.append(htmlfilename)
-
-            # unoconv conversions from powerpoint to HTML drop all images.
-            # Try converting to PDF instead:
-            elif part.get_content_subtype() == "vnd.ms-powerpoint" \
-                 or part.get_content_subtype() == \
-                    "vnd.openxmlformats-officedocument.presentationml.presentation" :
-                pdffile = fileparts[0] + ".pdf"
-                mysubprocess.call(["unoconv", "-f", "pdf",
-                                   "-o", pdffile, partfile])
-                htmlfiles.append(pdffile)
-
-            elif part.get_content_subtype() == "pdf":
-                if CONVERT_PDF_TO_HTML:
-                    mysubprocess.call(["pdftohtml", "-s", partfile])
-
-                    # But pdftohtml is idiotic about output filename
-                    # and won't let you override it:
-                    htmlfiles.append(fileparts[0] + "-html.html")
-                else:
-                    htmlfiles.append(partfile)
-
-        elif part.get_content_maintype() == "text" and \
-             part.get_content_subtype() == 'html':
-
-            htmlfile = tmp_file_name(part)
-
-            fp = open(htmlfile, 'wb')
-            htmlsrc = part.get_payload(decode=True)
-
-            soup = BeautifulSoup(htmlsrc, "lxml")
-
-            # Substitute filenames for CIDs:
-            for tag in soup.body.find_all("img", src=True):
-                if tag['src'].lower().startswith("cid:"):
-                    for sf in subfiles:
-                        if tag['src'][4:] == sf['Content-Id']:
-                            tag['src'] = "file://" + sf['filename']
-            # for sf in subfiles:
-            #     htmlsrc = re.sub('cid: ?' + sf['Content-Id'],
-            #                      'file://' + sf['filename'],
-            #                      htmlsrc, flags=re.IGNORECASE)
-
-            # If it's HTML, we may need to add a meta charset tag. Sigh.
-            # If it's text/plain, there's nothing we can do to fix charset.
-            charset = part.get_charset()
-            if not charset:
-                charset = "UTF-8"
-            head = soup.find("head")
-            if not head:
-                head = soup.new_tag("head")
-                html = soup.find("html")
-                if html:
-                    html.insert(0, head)
-                else:
-                    soup.insert(0, head)
-
-            if not head.findAll("meta", attrs={"http-equiv": "encoding"}) and \
-               not head.findAll("meta", attrs={"http-equiv": "content-type"}):
-                meta = soup.new_tag("meta")
-                meta["content"] = charset
-                meta["http-equiv"] = "encoding"
-                head.insert(0, meta)
-                meta = soup.new_tag("meta")
-                meta["http-equiv"] = "content-type"
-                meta["content"] = "text/html; charset=%s" % charset
-                head.insert(0, meta)
-
-            fp.write(soup.prettify().encode("utf-8", "xmlcharrefreplace"))
-            fp.close()
-
-            htmlfiles.append(htmlfile)
-        elif part.get_content_maintype() == "image" and part not in subparts:
-            partfile = save_tmp_file(part)
-            htmlfiles.append(partfile)
-
-    # Done processing attachments. Call the browser for everything.
-    if htmlfiles:
-        # For the first URL, just put a redirect in
-        write_to_index(pleasewait_file,
-                       "Redirecting to file://" + htmlfiles[0],
-                       0, "file://" + htmlfiles[0])
-
-        for f in htmlfiles[1:]:
-            # If we don't wait for the new window to pop up before
-            # calling new-tab, bad things will happen: the document
-            # may load in a new tab in the old window and THEN pop up
-            # an unwanted third window. Go firefox.
-            # Not clear whether this is true for all browsers.
-            time.sleep(1)
-            if BROWSER_ARGS:
-                mysubprocess.call([BROWSER] + BROWSER_ARGS + ["file://" + f])
-            else:
-                mysubprocess.call([BROWSER, "file://" + f])
-
-    # Wait a while to make sure the browser has loads the imgaes, then clean up.
-    time.sleep(6)
-    shutil.rmtree(tmpdir)
-
-if __name__ == '__main__':
-    tmpdir = tempfile.mkdtemp(dir=TMPDIR)
-
-    if len(sys.argv) > 1:
-        for f in sys.argv[1:]:
-            fp = open(f)
-            view_message_attachments(fp, tmpdir)
-            fp.close()
-    else:
-        view_message_attachments(sys.stdin, tmpdir)