"Fossies" - the Fresh Open Source Software Archive

Member "xhtml2pdf-0.2.5/xhtml2pdf/pisa.py" (25 Sep 2020, 12006 Bytes) of package /linux/www/xhtml2pdf-0.2.5.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. For more information about "pisa.py" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 0.2.4_vs_0.2.5.

    1 # -*- coding: utf-8 -*-
    2 """
    3 Copyright 2010 Dirk Holtwick, holtwick.it
    4 
    5 Licensed under the Apache License, Version 2.0 (the "License");
    6 you may not use this file except in compliance with the License.
    7 You may obtain a copy of the License at
    8 
    9     http://www.apache.org/licenses/LICENSE-2.0
   10 
   11 Unless required by applicable law or agreed to in writing, software
   12 distributed under the License is distributed on an "AS IS" BASIS,
   13 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14 See the License for the specific language governing permissions and
   15 limitations under the License.
   16 """
   17 
   18 from xhtml2pdf.default import DEFAULT_CSS
   19 from xhtml2pdf.document import pisaDocument
   20 from xhtml2pdf.util import getFile
   21 from xhtml2pdf import __version__
   22 from xhtml2pdf.config.httpconfig import httpConfig
   23 
   24 import getopt
   25 import glob
   26 import logging
   27 import os
   28 import six
   29 import sys
   30 import tempfile
   31 try:
   32     import urllib2
   33 except ImportError:
   34     import urllib.request as urllib2
   35 try:
   36     import urlparse
   37 except ImportError:
   38     import urllib.parse as urlparse
   39 
   40 log = logging.getLogger("xhtml2pdf")
   41 
   42 # Backward compatibility
   43 CreatePDF = pisaDocument
   44 
   45 USAGE = ("""
   46 
   47 USAGE: pisa [options] SRC [DEST]
   48 
   49 SRC
   50   Name of a HTML file or a file pattern using * placeholder.
   51   If you want to read from stdin use "-" as file name.
   52   You may also load an URL over HTTP. Take care of putting
   53   the <src> in quotes if it contains characters like "?".
   54 
   55 DEST
   56   Name of the generated PDF file or "-" if you like
   57   to send the result to stdout. Take care that the
   58   destination file is not already opened by an other
   59   application like the Adobe Reader. If the destination is
   60   not writeable a similar name will be calculated automatically.
   61 
   62 [options]
   63   --base, -b:
   64     Specify a base path if input come via STDIN
   65   --css, -c:
   66     Path to default CSS file
   67   --css-dump:
   68     Dumps the default CSS definitions to STDOUT
   69   --debug, -d:
   70     Show debugging informations
   71   --encoding:
   72     the character encoding of SRC. If left empty (default) this
   73     information will be extracted from the HTML header data
   74   --help, -h:
   75     Show this help text
   76   --quiet, -q:
   77     Show no messages
   78   --start-viewer, -s:
   79     Start PDF default viewer on Windows and MacOSX
   80     (e.g. AcrobatReader)
   81   --version:
   82     Show version information
   83   --warn, -w:
   84     Show warnings
   85   --xml, --xhtml, -x:
   86     Force parsing in XML Mode
   87     (automatically used if file ends with ".xml")
   88   --html:
   89     Force parsing in HTML Mode (default)
   90     
   91 [HTTP Connection options]
   92 
   93   --http_nosslcheck:
   94     No check ssl certificate.
   95     
   96 See http.client.HTTPSConnection documentation for this parameters 
   97 
   98   --http_key_file
   99   --http_cert_file
  100   --http_source_address
  101   --http_timeout
  102 """).strip()
  103 
  104 COPYRIGHT = __doc__
  105 
  106 LOG_FORMAT = "%(levelname)s [%(name)s] %(message)s"
  107 LOG_FORMAT_DEBUG = "%(levelname)s [%(name)s] %(pathname)s line %(lineno)d: %(message)s"
  108 
  109 
  110 def usage():
  111     print (USAGE)
  112 
  113 
  114 class pisaLinkLoader:
  115     """
  116     Helper to load page from an URL and load corresponding
  117     files to temporary files. If getFileName is called it
  118     returns the temporary filename and takes care to delete
  119     it when pisaLinkLoader is unloaded.
  120     """
  121 
  122     def __init__(self, src, quiet=True):
  123         self.quiet = quiet
  124         self.src = src
  125         self.tfileList = []
  126 
  127     def __del__(self):
  128         for path in self.tfileList:
  129             os.remove(path)
  130 
  131     def getFileName(self, name, relative=None):
  132         url = urlparse.urljoin(relative or self.src, name)
  133         path = urlparse.urlsplit(url)[2]
  134         suffix = ""
  135         if "." in path:
  136             new_suffix = "." + path.split(".")[-1].lower()
  137             if new_suffix in (".css", ".gif", ".jpg", ".png"):
  138                 suffix = new_suffix
  139         path = tempfile.mktemp(prefix="pisa-", suffix=suffix)
  140         ufile = urllib2.urlopen(url)
  141         tfile = open(path, "wb")
  142         while True:
  143             data = ufile.read(1024)
  144             if not data:
  145                 break
  146             tfile.write(data)
  147         ufile.close()
  148         tfile.close()
  149         self.tfileList.append(path)
  150 
  151         if not self.quiet:
  152             print ("  Loading %s to %s" % (url, path))
  153 
  154         return path
  155 
  156 
  157 def command():
  158     if "--profile" in sys.argv:
  159         print ("*** PROFILING ENABLED")
  160         import cProfile as profile
  161         import pstats
  162 
  163         prof = profile.Profile()
  164         prof.runcall(execute)
  165         pstats.Stats(prof).strip_dirs().sort_stats('cumulative').print_stats()
  166     else:
  167         execute()
  168 
  169 
  170 def execute():
  171 
  172     try:
  173         opts, args = getopt.getopt(sys.argv[1:], "dhqstwcxb", [
  174             "quiet",
  175             "help",
  176             "start-viewer",
  177             "start",
  178             "debug=",
  179             "copyright",
  180             "version",
  181             "warn",
  182             "tempdir=",
  183             "format=",
  184             "css=",
  185             "base=",
  186             "css-dump",
  187             "xml-dump",
  188             "xhtml",
  189             "xml",
  190             "html",
  191             "encoding=",
  192             "system",
  193             "profile",
  194             "http_nosslcheck",
  195             "http_key_file",
  196             "http_cert_file",
  197             "http_source_address",
  198             "http_timeout"
  199         ])
  200     except getopt.GetoptError:
  201         usage()
  202         sys.exit(2)
  203 
  204     errors = 0
  205     startviewer = 0
  206     quiet = 0
  207     debug = 0
  208     tempdir = None
  209     format = "pdf"
  210     css = None
  211     xhtml = None
  212     encoding = None
  213     xml_output = None
  214     base_dir = None
  215 
  216     log_level = logging.ERROR
  217     log_format = LOG_FORMAT
  218 
  219     for o, a in opts:
  220         if o in ("-h", "--help"):
  221             # Hilfe anzeigen
  222             usage()
  223             sys.exit()
  224 
  225         elif o in("--version",):
  226             print(__version__)
  227             sys.exit(0)
  228 
  229         elif o in ("--copyright"):
  230             print (COPYRIGHT)
  231             sys.exit(0)
  232 
  233         elif o in ("--system",):
  234             print (COPYRIGHT)
  235             print ()
  236             print ("SYSTEM INFORMATIONS")
  237             print ("--------------------------------------------")
  238             print ("OS:                %s" % sys.platform)
  239             print ("Python:            %s" % sys.version)
  240             print ("html5lib:          ?")
  241             import reportlab
  242 
  243             print ("Reportlab:         %s" % reportlab.Version)
  244             sys.exit(0)
  245 
  246         elif o in ("-s", "--start-viewer", "--start"):
  247             # Anzeigeprogramm starten
  248             startviewer = 1
  249 
  250         elif o in ("-q", "--quiet"):
  251             # Output unterdr´┐Żcken
  252             quiet = 1
  253 
  254         elif o in ("-w", "--warn"):
  255             # Warnings
  256             log_level = min(log_level, logging.WARN)  # If also -d ignore -w
  257 
  258         elif o in ("-d", "--debug"):
  259             # Debug
  260             log_level = logging.DEBUG
  261             log_format = LOG_FORMAT_DEBUG
  262 
  263             if a:
  264                 log_level = int(a)
  265 
  266         elif o in ("-t", "--format"):
  267             # Format XXX ???
  268             format = a
  269 
  270         elif o in ("-b", "--base"):
  271             base_dir = a
  272 
  273         elif o in ("--encoding",) and a:
  274             # Encoding
  275             encoding = a
  276 
  277         elif o in ("-c", "--css"):
  278             # CSS
  279             css = open(a, "r").read()
  280 
  281         elif o in ("--css-dump",):
  282             # CSS dump
  283             print (DEFAULT_CSS)
  284             return
  285 
  286         elif o in ("--xml-dump",):
  287             xml_output = sys.stdout
  288 
  289         elif o in ("-x", "--xml", "--xhtml"):
  290             xhtml = True
  291         
  292         elif o in ("--html",):
  293             xhtml = False
  294 
  295         elif httpConfig.is_http_config(o, a):
  296             continue
  297 
  298     if not quiet:
  299         logging.basicConfig(
  300             level=log_level,
  301             format=log_format)
  302 
  303     if len(args) not in (1, 2):
  304         usage()
  305         sys.exit(2)
  306 
  307     if len(args) == 2:
  308         a_src, a_dest = args
  309     else:
  310         a_src = args[0]
  311         a_dest = None
  312 
  313     if "*" in a_src:
  314         a_src = glob.glob(a_src)
  315         # print a_src
  316     else:
  317         a_src = [a_src]
  318 
  319     for src in a_src:
  320 
  321         # If not forced to parse in a special way have a look
  322         # at the filename suffix
  323         if xhtml is None:
  324             xhtml = src.lower().endswith(".xml")
  325 
  326         lc = None
  327 
  328         if src == "-" or base_dir is not None:
  329             # Output to console
  330             fsrc = sys.stdin
  331             wpath = os.getcwd()
  332             if base_dir:
  333                 wpath = base_dir
  334         else:
  335             if src.startswith("http:") or src.startswith("https:"):
  336                 wpath = src
  337                 fsrc = getFile(src).getFile()
  338                 src = "".join(urlparse.urlsplit(src)[1:3]).replace("/", "-")
  339             else:
  340                 fsrc = wpath = os.path.abspath(src)
  341                 fsrc = open(fsrc, "rb")
  342 
  343         if a_dest is None:
  344             dest_part = src
  345             if dest_part.lower().endswith(".html") or dest_part.lower().endswith(".htm"):
  346                 dest_part = ".".join(src.split(".")[:-1])
  347             dest = dest_part + "." + format.lower()
  348             for i in six.moves.range(10):
  349                 try:
  350                     open(dest, "wb").close()
  351                     break
  352                 except:
  353                     pass
  354                 dest = dest_part + "-%d.%s" % (i, format.lower())
  355         else:
  356             dest = a_dest
  357 
  358         fdestclose = 0
  359 
  360         if dest == "-" or base_dir:
  361             if sys.platform == "win32":
  362                 import msvcrt
  363                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
  364 
  365             fdest = sys.stdout
  366             startviewer = 0
  367         else:
  368             dest = os.path.abspath(dest)
  369             try:
  370                 open(dest, "wb").close()
  371             except:
  372                 print ("File '%s' seems to be in use of another application." % dest)
  373                 sys.exit(2)
  374             fdest = open(dest, "wb")
  375             fdestclose = 1
  376 
  377         if not quiet:
  378             print ("Converting {} to {}...".format(src, dest))
  379 
  380         pisaDocument(
  381             fsrc,
  382             fdest,
  383             debug=debug,
  384             path=wpath,
  385             errout=sys.stdout,
  386             tempdir=tempdir,
  387             format=format,
  388             link_callback=lc,
  389             default_css=css,
  390             xhtml=xhtml,
  391             encoding=encoding,
  392             xml_output=xml_output
  393         )
  394 
  395         if xml_output:
  396             xml_output.getvalue()
  397 
  398         if fdestclose:
  399             fdest.close()
  400 
  401         if (not errors) and startviewer:
  402             if not quiet:
  403                 print ("Open viewer for file %s" % dest)
  404             startViewer(dest)
  405 
  406 
  407 def startViewer(filename):
  408     """
  409     Helper for opening a PDF file
  410     """
  411 
  412     if filename:
  413         try:
  414             os.startfile(filename)
  415         except:
  416             # try to opan a la apple
  417             os.system('open "%s"' % filename)
  418 
  419 
  420 def showLogging(debug=False):
  421     """
  422     Shortcut for enabling log dump
  423     """
  424 
  425     try:
  426         log_level = logging.WARN
  427         log_format = LOG_FORMAT_DEBUG
  428         if debug:
  429             log_level = logging.DEBUG
  430         logging.basicConfig(
  431             level=log_level,
  432             format=log_format)
  433     except:
  434         logging.basicConfig()
  435 
  436 
  437 # Background informations in data URI here:
  438 # http://en.wikipedia.org/wiki/Data_URI_scheme
  439 
  440 def makeDataURI(data=None, mimetype=None, filename=None):
  441     import base64
  442 
  443     if not mimetype:
  444         if filename:
  445             import mimetypes
  446 
  447 
  448             mimetype = mimetypes.guess_type(filename)[0].split(";")[0]
  449         else:
  450             raise Exception("You need to provide a mimetype or a filename for makeDataURI")
  451     # encodestring was deprecated in Python 3 and removed in Python 3.9.
  452     if six.PY3:
  453         encoded_data = base64.encodebytes(data).split()
  454     else:
  455         encoded_data = base64.encodestring(data).split()
  456     return "data:" + mimetype + ";base64," + "".join(encoded_data)
  457 
  458 
  459 def makeDataURIFromFile(filename):
  460     data = open(filename, "rb").read()
  461     return makeDataURI(data, filename=filename)
  462 
  463 
  464 if __name__ == "__main__":
  465     command()