"Fossies" - the Fresh Open Source Software Archive

Member "OCRmyPDF-8.3.0/.docker/webservice.py" (13 May 2019, 4576 Bytes) of package /linux/privat/OCRmyPDF-8.3.0.tar.gz:


The requested HTML page contains a <FORM> tag that is unusable on "Fossies" in "automatic" (rendered) mode so that page is shown as HTML source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. See also the last Fossies "Diffs" side-by-side code changes report for "webservice.py": 8.1.0_vs_8.2.0.

    1 # webservice.py wrapper for OCRmyPDF
    2 # Copyright (C) 2019 James R. Barlow: github.com/jbarlow83
    3 #
    4 # This program is free software: you can redistribute it and/or modify
    5 # it under the terms of the GNU Affero General Public License as published by
    6 # the Free Software Foundation, either version 3 of the License, or
    7 # (at your option) any later version.
    8 #
    9 # This program is distributed in the hope that it will be useful,
   10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
   11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   12 # GNU Affero General Public License for more details.
   13 #
   14 # You should have received a copy of the GNU Affero General Public License
   15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
   16 
   17 """This is a simple web service/HTTP wrapper for OCRmyPDF
   18 
   19 This may be more convenient than the command line tool for some Docker users.
   20 Note that OCRmyPDF uses Ghostscript, which is licensed under AGPLv3+. While
   21 OCRmyPDF is under GPLv3, this file is distributed under the Affero GPLv3+ license,
   22 to emphasize that SaaS deployments should make sure they comply with
   23 Ghostscript's license as well as OCRmyPDF's.
   24 """
   25 
   26 from flask import (
   27     Flask,
   28     Response,
   29     flash,
   30     request,
   31     redirect,
   32     url_for,
   33     abort,
   34     send_from_directory,
   35 )
   36 from subprocess import run, PIPE
   37 from tempfile import TemporaryDirectory
   38 from werkzeug.utils import secure_filename
   39 import os
   40 import shlex
   41 
   42 app = Flask(__name__)
   43 app.secret_key = "secret"
   44 app.config['MAX_CONTENT_LENGTH'] = 50_000_000
   45 app.config.from_envvar("OCRMYPDF_WEBSERVICE_SETTINGS", silent=True)
   46 
   47 ALLOWED_EXTENSIONS = set(["pdf"])
   48 
   49 
   50 def allowed_file(filename):
   51     return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
   52 
   53 
   54 def do_ocrmypdf(file):
   55     uploaddir = TemporaryDirectory(prefix="ocrmypdf-upload")
   56     downloaddir = TemporaryDirectory(prefix="ocrmypdf-download")
   57 
   58     filename = secure_filename(file.filename)
   59     up_file = os.path.join(uploaddir.name, filename)
   60     file.save(up_file)
   61 
   62     down_file = os.path.join(downloaddir.name, filename)
   63 
   64     cmd_args = [arg for arg in shlex.split(request.form["params"])]
   65     if "--sidecar" in cmd_args:
   66         return Response("--sidecar not supported", 501, mimetype='text/plain')
   67 
   68     ocrmypdf_args = ["ocrmypdf", *cmd_args, up_file, down_file]
   69     proc = run(ocrmypdf_args, stdout=PIPE, stderr=PIPE, encoding="utf-8")
   70     if proc.returncode != 0:
   71         stderr = proc.stderr
   72         return Response(stderr, 400, mimetype='text/plain')
   73 
   74     return send_from_directory(downloaddir.name, filename)
   75 
   76 
   77 @app.route("/", methods=["GET", "POST"])
   78 def upload_file():
   79     if request.method == "POST":
   80         if "file" not in request.files:
   81             return Response("No file in POST", 400, mimetype='text/plain')
   82         file = request.files["file"]
   83         if file.filename == "":
   84             return Response("Empty filename", 400, mimetype='text/plain')
   85         if not allowed_file(file.filename):
   86             return Response("Invalid filename", 400, mimetype='text/plain')
   87         if file and allowed_file(file.filename):
   88             return do_ocrmypdf(file)
   89         return Response("Some other problem", 400, mimetype='text/plain')
   90 
   91     return """
   92     <!doctype html>
   93     <title>OCRmyPDF webservice</title>
   94     <h1>Upload a PDF (debug UI)</h1>
   95     <form method=post enctype=multipart/form-data>
   96       <label for="args">Command line parameters</label>
   97       <input type=textbox name=params>
   98       <label for="file">File to upload</label>
   99       <input type=file name=file>
  100       <input type=submit value=Upload>
  101     </form>
  102     <h4>Notice</h2>
  103     <div style="font-size: 70%; max-width: 34em;">
  104     <p>This is a webservice wrapper for OCRmyPDF.</p>
  105     <p>Copyright 2019 James R. Barlow</p>
  106     <p>This program is free software: you can redistribute it and/or modify
  107     it under the terms of the GNU Affero General Public License as published by
  108     the Free Software Foundation, either version 3 of the License, or
  109     (at your option) any later version.
  110     </p>
  111     <p>This program is distributed in the hope that it will be useful,
  112     but WITHOUT ANY WARRANTY; without even the implied warranty of
  113     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  114     GNU General Public License for more details.
  115     </p>
  116     <p>
  117     You should have received a copy of the GNU Affero General Public License
  118     along with this program.  If not, see &lt;http://www.gnu.org/licenses/&gt;.
  119     </p>
  120     </div>
  121     """
  122 
  123 
  124 if __name__ == "__main__":
  125     app.run(host='0.0.0.0', port=5000)