"Fossies" - the Fresh Open Source Software Archive

Member "zim-0.70/zim/export/exporters/mhtml.py" (14 Mar 2019, 3915 Bytes) of package /linux/privat/zim-0.70.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. For more information about "mhtml.py" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 0.69.1_vs_0.70.

    1 
    2 # Copyright 2008-2014 Jaap Karssenberg <jaap.karssenberg@gmail.com>
    3 
    4 import email.mime.multipart
    5 import email.mime.text
    6 import email.mime.nonmultipart
    7 
    8 import base64
    9 
   10 from zim.fs import get_tmpdir
   11 from zim.newfs import LocalFile, LocalFolder, File, Folder
   12 
   13 from zim.notebook import encode_filename
   14 
   15 from zim.export.exporters import Exporter
   16 from zim.export.exporters.files import SingleFileExporter
   17 from zim.export.layouts import SingleFileLayout
   18 from zim.export.linker import ExportLinker
   19 
   20 
   21 class MHTMLExporter(Exporter):
   22     '''Exporter that exports pages and attachments to a single MHTML
   23     file.
   24     '''
   25 
   26     # Output is Multipart-Mime message containing single HTML file
   27     # and all attachments and resources as mime parts
   28     # So first export as a single file, then wrap in mime
   29 
   30     # Keeps large data in memory - would need more low level
   31     # implementation with incremental writes to optimize it...
   32 
   33     # Also note that due to all the base64 encoding, size is going
   34     # to blow up even more ...
   35 
   36     def __init__(self, file, template, document_root_url=None):
   37         self.file = file
   38         self.template = template
   39         self.document_root_url = document_root_url
   40 
   41     def export_iter(self, pages):
   42         basename = encode_filename(pages.name)
   43         folder = LocalFolder(get_tmpdir().subdir('mhtml_export_tmp_dir').path) # XXX
   44         if folder.exists():
   45             folder.remove_children()
   46         else:
   47             folder.touch()
   48         file = folder.file(basename + '.html')
   49         layout = SingleFileLayout(file, pages.prefix)
   50         exporter = SingleFileExporter(layout, self.template, 'html', document_root_url=self.document_root_url)
   51 
   52         for p in exporter.export_iter(pages):
   53             yield p
   54 
   55         encoder = MHTMLEncoder()
   56         linker = ExportLinker(pages.notebook, layout, output=file, usebase=True)
   57         self.file.write(encoder(layout, linker))
   58 
   59 
   60 class MHTMLEncoder(object):
   61 
   62     # Create message of file + attachments + resources
   63 
   64     # We use a linker for relative names to make absolutely sure
   65     # we give same relative paths as mentioned in links
   66 
   67     def __call__(self, layout, linker):
   68         msg = email.mime.multipart.MIMEMultipart('related')
   69             # MIME-Version 1.0
   70             # Content-Type: multipart/related; boundry=...
   71         msg.preamble = '' \
   72         'This document is a Single File Web Page, also known as a Web Archive file\n' \
   73         'or MHTML. If you are seeing this message, your browser or editor doesn\'t\n' \
   74         'support MHTML. Please look for a plugin or extension that adds MHTML support\n' \
   75         'or download a browser that supports it.'
   76 
   77         # Add html file
   78         msg.attach(self.encode_text_file(layout.file, None, 'text/html'))
   79 
   80         # Add attachments and resource
   81         for file in self._walk(layout.dir):
   82             mt = file.get_mimetype()
   83             filename = linker.link(file.uri)
   84             if mt.startswith('text/'):
   85                 part = self.encode_text_file(file, filename, mt)
   86             else:
   87                 part = self.encode_data_file(file, filename, mt)
   88             msg.attach(part)
   89 
   90         # Write message to file
   91         return str(msg)
   92 
   93     def _walk(self, dir):
   94         if dir.exists():
   95             for child in dir:
   96                 if isinstance(child, File):
   97                     yield child
   98                 elif child.exists():
   99                     for child in self._walk(child): # recurs
  100                         yield child
  101 
  102     def encode_text_file(self, file, filename, mimetype):
  103         type, subtype = mimetype.split('/', 1)
  104         assert type == 'text'
  105 
  106         # Not using MIMEText here, since it uses base64 and inflates
  107         # all ascii text unnecessary
  108         charset = email.charset.Charset('utf-8')
  109         charset.body_encoding = email.charset.QP
  110         msg = email.mime.nonmultipart.MIMENonMultipart('text', subtype, charset='utf-8')
  111         if filename: # top level does not have filename
  112             msg['Content-Location'] = filename
  113         msg.set_payload(file.read(), charset=charset)
  114         return msg
  115 
  116     def encode_data_file(self, file, filename, mimetype):
  117         type, subtype = mimetype.split('/', 1)
  118         msg = email.mime.nonmultipart.MIMENonMultipart(type, subtype)
  119         msg['Content-Location'] = filename
  120         msg['Content-Type'] = mimetype
  121         msg.set_payload(file.raw())
  122         email.encoders.encode_base64(msg)
  123         return msg