"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "src/ocrmypdf/_pipeline.py" between
OCRmyPDF-9.5.0.tar.gz and OCRmyPDF-9.6.0.tar.gz

About: OCRmyPDF adds an OCR text layer to scanned PDF files, allowing them to be searched.

_pipeline.py  (OCRmyPDF-9.5.0):_pipeline.py  (OCRmyPDF-9.6.0)
skipping to change at line 321 skipping to change at line 321
output_file, output_file,
xres=canvas_dpi, xres=canvas_dpi,
yres=canvas_dpi, yres=canvas_dpi,
raster_device='jpeggray', raster_device='jpeggray',
log=page_context.log, log=page_context.log,
page_dpi=(page_dpi, page_dpi), page_dpi=(page_dpi, page_dpi),
pageno=page_context.pageinfo.pageno + 1, pageno=page_context.pageinfo.pageno + 1,
) )
return output_file return output_file
def describe_rotation(page_context, orient_conf, correction):
"""
Describe the page rotation we are going to perform.
"""
direction = {0: '⇧', 90: '⇨', 180: '⇩', 270: '⇦'}
turns = {0: ' ', 90: '⬏', 180: '↻', 270: '⬑'}
existing_rotation = page_context.pageinfo.rotation
action = ''
if orient_conf.confidence >= page_context.options.rotate_pages_threshold:
if correction != 0:
action = 'will rotate ' + turns[correction]
else:
action = 'rotation appears correct'
else:
if correction != 0:
action = 'confidence too low to rotate'
else:
action = 'no change'
facing = ''
if existing_rotation != 0:
facing = f"with existing rotation {direction.get(existing_rotation, '?')
}, "
facing += f"page is facing {direction.get(orient_conf.angle, '?')}"
return f"{facing}, confidence {orient_conf.confidence:.2f} - {action}"
def get_orientation_correction(preview, page_context): def get_orientation_correction(preview, page_context):
""" """
Work out orientation correct for each page. Work out orientation correct for each page.
We ask Ghostscript to draw a preview page, which will rasterize with the We ask Ghostscript to draw a preview page, which will rasterize with the
current /Rotate applied, and then ask Tesseract which way the page is current /Rotate applied, and then ask Tesseract which way the page is
oriented. If the value of /Rotate is correct (e.g., a user already oriented. If the value of /Rotate is correct (e.g., a user already
manually fixed rotation), then Tesseract will say the page is pointing manually fixed rotation), then Tesseract will say the page is pointing
up and the correction is zero. Otherwise, the orientation found by up and the correction is zero. Otherwise, the orientation found by
Tesseract represents the clockwise rotation, or the counterclockwise Tesseract represents the clockwise rotation, or the counterclockwise
skipping to change at line 347 skipping to change at line 375
""" """
orient_conf = tesseract.get_orientation( orient_conf = tesseract.get_orientation(
preview, preview,
engine_mode=page_context.options.tesseract_oem, engine_mode=page_context.options.tesseract_oem,
timeout=page_context.options.tesseract_timeout, timeout=page_context.options.tesseract_timeout,
log=page_context.log, log=page_context.log,
tesseract_env=page_context.options.tesseract_env, tesseract_env=page_context.options.tesseract_env,
) )
direction = {0: '⇧', 90: '⇨', 180: '⇩', 270: '⇦'}
existing_rotation = page_context.pageinfo.rotation
correction = orient_conf.angle % 360 correction = orient_conf.angle % 360
page_context.log.info(describe_rotation(page_context, orient_conf, correctio
apply_correction = False n))
action = '' if (
if orient_conf.confidence >= page_context.options.rotate_pages_threshold: orient_conf.confidence >= page_context.options.rotate_pages_threshold
if correction != 0: and correction != 0
apply_correction = True ):
action = ' - will rotate'
else:
action = ' - rotation appears correct'
else:
if correction != 0:
action = ' - confidence too low to rotate'
else:
action = ' - no change'
facing = ''
if existing_rotation != 0:
facing = 'with existing rotation {}, '.format(
direction.get(existing_rotation, '?')
)
facing += 'page is facing {}'.format(direction.get(orient_conf.angle, '?'))
page_context.log.debug(
'{pagenum:4d}: {facing}, confidence {conf:.2f}{action}'.format(
pagenum=page_context.pageinfo.pageno,
facing=facing,
conf=orient_conf.confidence,
action=action,
)
)
if apply_correction:
return correction return correction
return 0 return 0
def rasterize( def rasterize(
input_file, page_context, correction=0, output_tag='', remove_vectors=None input_file, page_context, correction=0, output_tag='', remove_vectors=None
): ):
colorspaces = ['pngmono', 'pnggray', 'png256', 'png16m'] colorspaces = ['pngmono', 'pnggray', 'png256', 'png16m']
device_idx = 0 device_idx = 0
if remove_vectors is None: if remove_vectors is None:
remove_vectors = page_context.options.remove_vectors remove_vectors = page_context.options.remove_vectors
 End of changes. 4 change blocks. 
36 lines changed or deleted 36 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)