"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "src/ocrmypdf/_pipeline.py" between
OCRmyPDF-8.0.1.tar.gz and OCRmyPDF-8.1.0.tar.gz

About: OCRmyPDF adds an OCR text layer to scanned PDF files, allowing them to be searched.

_pipeline.py  (OCRmyPDF-8.0.1):_pipeline.py  (OCRmyPDF-8.1.0)
skipping to change at line 552 skipping to change at line 552
options = context.get_options() options = context.get_options()
if not options.clean: if not options.clean:
re_symlink(input_file, output_file, log) re_symlink(input_file, output_file, log)
return return
from .exec import unpaper from .exec import unpaper
pageinfo = get_pageinfo(input_file, context) pageinfo = get_pageinfo(input_file, context)
dpi = get_page_square_dpi(pageinfo, options) dpi = get_page_square_dpi(pageinfo, options)
unpaper.clean(input_file, output_file, dpi, log) unpaper.clean(input_file, output_file, dpi, log, options.unpaper_args)
def select_ocr_image(infiles, output_file, log, context): def select_ocr_image(infiles, output_file, log, context):
"""Select the image we send for OCR. May not be the same as the display """Select the image we send for OCR. May not be the same as the display
image depending on preprocessing. This image will never be shown to the image depending on preprocessing. This image will never be shown to the
user.""" user."""
image = infiles[0] image = infiles[0]
options = context.get_options() options = context.get_options()
pageinfo = get_pageinfo(image, context) pageinfo = get_pageinfo(image, context)
skipping to change at line 631 skipping to change at line 631
engine_mode=options.tesseract_oem, engine_mode=options.tesseract_oem,
tessconfig=options.tesseract_config, tessconfig=options.tesseract_config,
timeout=options.tesseract_timeout, timeout=options.tesseract_timeout,
pagesegmode=options.tesseract_pagesegmode, pagesegmode=options.tesseract_pagesegmode,
user_words=options.user_words, user_words=options.user_words,
user_patterns=options.user_patterns, user_patterns=options.user_patterns,
log=log, log=log,
) )
def select_visible_page_image(infiles, output_file, log, context): def select_visible_page_image(infiles, output_file, log, context):
"Selects a whole page image that we can show the user (if necessary)" """Selects a whole page image that we can show the user (if necessary)"""
options = context.get_options() options = context.get_options()
if options.clean_final: if options.clean_final:
image_suffix = '.pp-clean.png' image_suffix = '.pp-clean.png'
elif options.deskew: elif options.deskew:
image_suffix = '.pp-deskew.png' image_suffix = '.pp-deskew.png'
elif options.remove_background: elif options.remove_background:
image_suffix = '.pp-background.png' image_suffix = '.pp-background.png'
else: else:
image_suffix = '.page.png' image_suffix = '.page.png'
 End of changes. 2 change blocks. 
2 lines changed or deleted 2 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)