"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "src/ccmain/pageiterator.cpp" between
tesseract-5.0.0-rc2.tar.gz and tesseract-5.0.0-rc3.tar.gz

About: Tesseract is an Optical Character Recognition (OCR) engine. Release candidate.

pageiterator.cpp  (tesseract-5.0.0-rc2):pageiterator.cpp  (tesseract-5.0.0-rc3)
skipping to change at line 30 skipping to change at line 30
#include <allheaders.h> #include <allheaders.h>
#include <tesseract/pageiterator.h> #include <tesseract/pageiterator.h>
#include "helpers.h" #include "helpers.h"
#include "pageres.h" #include "pageres.h"
#include "tesseractclass.h" #include "tesseractclass.h"
#include <algorithm> #include <algorithm>
namespace tesseract { namespace tesseract {
PageIterator::PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale, PageIterator::PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
int scaled_yres, int scaled_yres, int rect_left, int rect_top,
int rect_left, int rect_top, int rect_width, int rect int rect_width, int rect_height)
_height) : page_res_(page_res),
: page_res_(page_res) tesseract_(tesseract),
, tesseract_(tesseract) word_(nullptr),
, word_(nullptr) word_length_(0),
, word_length_(0) blob_index_(0),
, blob_index_(0) cblob_it_(nullptr),
, cblob_it_(nullptr) include_upper_dots_(false),
, include_upper_dots_(false) include_lower_dots_(false),
, include_lower_dots_(false) scale_(scale),
, scale_(scale) scaled_yres_(scaled_yres),
, scaled_yres_(scaled_yres) rect_left_(rect_left),
, rect_left_(rect_left) rect_top_(rect_top),
, rect_top_(rect_top) rect_width_(rect_width),
, rect_width_(rect_width) rect_height_(rect_height) {
, rect_height_(rect_height) {
it_ = new PAGE_RES_IT(page_res); it_ = new PAGE_RES_IT(page_res);
PageIterator::Begin(); PageIterator::Begin();
} }
PageIterator::~PageIterator() { PageIterator::~PageIterator() {
delete it_; delete it_;
delete cblob_it_; delete cblob_it_;
} }
/** /**
* PageIterators may be copied! This makes it possible to iterate over * PageIterators may be copied! This makes it possible to iterate over
* all the objects at a lower level, while maintaining an iterator to * all the objects at a lower level, while maintaining an iterator to
* objects at a higher level. * objects at a higher level.
*/ */
PageIterator::PageIterator(const PageIterator &src) PageIterator::PageIterator(const PageIterator &src)
: page_res_(src.page_res_) : page_res_(src.page_res_),
, tesseract_(src.tesseract_) tesseract_(src.tesseract_),
, word_(nullptr) word_(nullptr),
, word_length_(src.word_length_) word_length_(src.word_length_),
, blob_index_(src.blob_index_) blob_index_(src.blob_index_),
, cblob_it_(nullptr) cblob_it_(nullptr),
, include_upper_dots_(src.include_upper_dots_) include_upper_dots_(src.include_upper_dots_),
, include_lower_dots_(src.include_lower_dots_) include_lower_dots_(src.include_lower_dots_),
, scale_(src.scale_) scale_(src.scale_),
, scaled_yres_(src.scaled_yres_) scaled_yres_(src.scaled_yres_),
, rect_left_(src.rect_left_) rect_left_(src.rect_left_),
, rect_top_(src.rect_top_) rect_top_(src.rect_top_),
, rect_width_(src.rect_width_) rect_width_(src.rect_width_),
, rect_height_(src.rect_height_) { rect_height_(src.rect_height_) {
it_ = new PAGE_RES_IT(*src.it_); it_ = new PAGE_RES_IT(*src.it_);
BeginWord(src.blob_index_); BeginWord(src.blob_index_);
} }
const PageIterator &PageIterator::operator=(const PageIterator &src) { const PageIterator &PageIterator::operator=(const PageIterator &src) {
page_res_ = src.page_res_; page_res_ = src.page_res_;
tesseract_ = src.tesseract_; tesseract_ = src.tesseract_;
include_upper_dots_ = src.include_upper_dots_; include_upper_dots_ = src.include_upper_dots_;
include_lower_dots_ = src.include_lower_dots_; include_lower_dots_ = src.include_lower_dots_;
scale_ = src.scale_; scale_ = src.scale_;
skipping to change at line 204 skipping to change at line 205
if (it_->block() == nullptr) { if (it_->block() == nullptr) {
return false; // Already at the end! return false; // Already at the end!
} }
if (it_->word() == nullptr) { if (it_->word() == nullptr) {
return true; // In an image block. return true; // In an image block.
} }
switch (level) { switch (level) {
case RIL_BLOCK: case RIL_BLOCK:
return blob_index_ == 0 && it_->block() != it_->prev_block(); return blob_index_ == 0 && it_->block() != it_->prev_block();
case RIL_PARA: case RIL_PARA:
return blob_index_ == 0 && (it_->block() != it_->prev_block() || return blob_index_ == 0 &&
it_->row()->row->para() != it_->prev_row()->ro (it_->block() != it_->prev_block() ||
w->para()); it_->row()->row->para() != it_->prev_row()->row->para());
case RIL_TEXTLINE: case RIL_TEXTLINE:
return blob_index_ == 0 && it_->row() != it_->prev_row(); return blob_index_ == 0 && it_->row() != it_->prev_row();
case RIL_WORD: case RIL_WORD:
return blob_index_ == 0; return blob_index_ == 0;
case RIL_SYMBOL: case RIL_SYMBOL:
return true; return true;
} }
return false; return false;
} }
/** /**
* Returns whether the iterator is positioned at the last element in a * Returns whether the iterator is positioned at the last element in a
* given level. (e.g. the last word in a line, the last line in a block) * given level. (e.g. the last word in a line, the last line in a block)
*/ */
bool PageIterator::IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel e bool PageIterator::IsAtFinalElement(PageIteratorLevel level,
lement) const { PageIteratorLevel element) const {
if (Empty(element)) { if (Empty(element)) {
return true; // Already at the end! return true; // Already at the end!
} }
// The result is true if we step forward by element and find we are // The result is true if we step forward by element and find we are
// at the the end of the page or at beginning of *all* levels in: // at the the end of the page or at beginning of *all* levels in:
// [level, element). // [level, element).
// When there is more than one level difference between element and level, // When there is more than one level difference between element and level,
// we could for instance move forward one symbol and still be at the first // we could for instance move forward one symbol and still be at the first
// word on a line, so we also have to be at the first symbol in a word. // word on a line, so we also have to be at the first symbol in a word.
PageIterator next(*this); PageIterator next(*this);
skipping to change at line 283 skipping to change at line 286
// (0,0)->(1,1). // (0,0)->(1,1).
// If an image rectangle has been set in the API, then returned coordinates // If an image rectangle has been set in the API, then returned coordinates
// relate to the original (full) image, rather than the rectangle. // relate to the original (full) image, rather than the rectangle.
/** /**
* Returns the bounding rectangle of the current object at the given level in * Returns the bounding rectangle of the current object at the given level in
* the coordinates of the working image that is pix_binary(). * the coordinates of the working image that is pix_binary().
* See comment on coordinate system above. * See comment on coordinate system above.
* Returns false if there is no such object at the current position. * Returns false if there is no such object at the current position.
*/ */
bool PageIterator::BoundingBoxInternal(PageIteratorLevel level, int *left, int * bool PageIterator::BoundingBoxInternal(PageIteratorLevel level, int *left,
top, int *right, int *top, int *right,
int *bottom) const { int *bottom) const {
if (Empty(level)) { if (Empty(level)) {
return false; return false;
} }
TBOX box; TBOX box;
PARA *para = nullptr; PARA *para = nullptr;
switch (level) { switch (level) {
case RIL_BLOCK: case RIL_BLOCK:
box = it_->block()->block->restricted_bounding_box(include_upper_dots_, in box = it_->block()->block->restricted_bounding_box(include_upper_dots_,
clude_lower_dots_); include_lower_dots_);
break; break;
case RIL_PARA: case RIL_PARA:
para = it_->row()->row->para(); para = it_->row()->row->para();
// Fall through. // Fall through.
case RIL_TEXTLINE: case RIL_TEXTLINE:
box = it_->row()->row->restricted_bounding_box(include_upper_dots_, includ box = it_->row()->row->restricted_bounding_box(include_upper_dots_,
e_lower_dots_); include_lower_dots_);
break; break;
case RIL_WORD: case RIL_WORD:
box = it_->word()->word->restricted_bounding_box(include_upper_dots_, incl box = it_->word()->word->restricted_bounding_box(include_upper_dots_,
ude_lower_dots_); include_lower_dots_);
break; break;
case RIL_SYMBOL: case RIL_SYMBOL:
if (cblob_it_ == nullptr) { if (cblob_it_ == nullptr) {
box = it_->word()->box_word->BlobBox(blob_index_); box = it_->word()->box_word->BlobBox(blob_index_);
} else { } else {
box = cblob_it_->data()->bounding_box(); box = cblob_it_->data()->bounding_box();
} }
} }
if (level == RIL_PARA) { if (level == RIL_PARA) {
PageIterator other = *this; PageIterator other = *this;
other.Begin(); other.Begin();
do { do {
if (other.it_->block() && other.it_->block()->block == it_->block()->block if (other.it_->block() &&
&& other.it_->block()->block == it_->block()->block &&
other.it_->row() && other.it_->row()->row && other.it_->row()->row->pa other.it_->row() && other.it_->row()->row &&
ra() == para) { other.it_->row()->row->para() == para) {
box = box.bounding_union(other.it_->row()->row->bounding_box()); box = box.bounding_union(other.it_->row()->row->bounding_box());
} }
} while (other.Next(RIL_TEXTLINE)); } while (other.Next(RIL_TEXTLINE));
} }
if (level != RIL_SYMBOL || cblob_it_ != nullptr) { if (level != RIL_SYMBOL || cblob_it_ != nullptr) {
box.rotate(it_->block()->block->re_rotation()); box.rotate(it_->block()->block->re_rotation());
} }
// Now we have a box in tesseract coordinates relative to the image rectangle, // Now we have a box in tesseract coordinates relative to the image rectangle,
// we have to convert the coords to a top-down system. // we have to convert the coords to a top-down system.
const int pix_height = pixGetHeight(tesseract_->pix_binary()); const int pix_height = pixGetHeight(tesseract_->pix_binary());
skipping to change at line 340 skipping to change at line 349
*bottom = ClipToRange(pix_height - box.bottom(), *top, pix_height); *bottom = ClipToRange(pix_height - box.bottom(), *top, pix_height);
return true; return true;
} }
/** /**
* Returns the bounding rectangle of the current object at the given level in * Returns the bounding rectangle of the current object at the given level in
* coordinates of the original image. * coordinates of the original image.
* See comment on coordinate system above. * See comment on coordinate system above.
* Returns false if there is no such object at the current position. * Returns false if there is no such object at the current position.
*/ */
bool PageIterator::BoundingBox(PageIteratorLevel level, int *left, int *top, int bool PageIterator::BoundingBox(PageIteratorLevel level, int *left, int *top,
*right, int *right, int *bottom) const {
int *bottom) const {
return BoundingBox(level, 0, left, top, right, bottom); return BoundingBox(level, 0, left, top, right, bottom);
} }
bool PageIterator::BoundingBox(PageIteratorLevel level, const int padding, int * bool PageIterator::BoundingBox(PageIteratorLevel level, const int padding,
left, int *top, int *left, int *top, int *right,
int *right, int *bottom) const { int *bottom) const {
if (!BoundingBoxInternal(level, left, top, right, bottom)) { if (!BoundingBoxInternal(level, left, top, right, bottom)) {
return false; return false;
} }
// Convert to the coordinate system of the original image. // Convert to the coordinate system of the original image.
*left = ClipToRange(*left / scale_ + rect_left_ - padding, rect_left_, rect_le *left = ClipToRange(*left / scale_ + rect_left_ - padding, rect_left_,
ft_ + rect_width_); rect_left_ + rect_width_);
*top = ClipToRange(*top / scale_ + rect_top_ - padding, rect_top_, rect_top_ + *top = ClipToRange(*top / scale_ + rect_top_ - padding, rect_top_,
rect_height_); rect_top_ + rect_height_);
*right = ClipToRange((*right + scale_ - 1) / scale_ + rect_left_ + padding, *l *right = ClipToRange((*right + scale_ - 1) / scale_ + rect_left_ + padding,
eft, *left, rect_left_ + rect_width_);
rect_left_ + rect_width_); *bottom = ClipToRange((*bottom + scale_ - 1) / scale_ + rect_top_ + padding,
*bottom = ClipToRange((*bottom + scale_ - 1) / scale_ + rect_top_ + padding, * *top, rect_top_ + rect_height_);
top,
rect_top_ + rect_height_);
return true; return true;
} }
/** Return that there is no such object at a given level. */ /** Return that there is no such object at a given level. */
bool PageIterator::Empty(PageIteratorLevel level) const { bool PageIterator::Empty(PageIteratorLevel level) const {
if (it_->block() == nullptr) { if (it_->block() == nullptr) {
return true; // Already at the end! return true; // Already at the end!
} }
if (it_->word() == nullptr && level != RIL_BLOCK) { if (it_->word() == nullptr && level != RIL_BLOCK) {
return true; // image block return true; // image block
skipping to change at line 443 skipping to change at line 455
* if rendered after recognition, making an xor reconstruction inaccurate, but * if rendered after recognition, making an xor reconstruction inaccurate, but
* an or construction better. Before recognition, symbol-level reconstruction * an or construction better. Before recognition, symbol-level reconstruction
* should be good, even with xor, since the images come from the connected * should be good, even with xor, since the images come from the connected
* components. * components.
*/ */
Pix *PageIterator::GetBinaryImage(PageIteratorLevel level) const { Pix *PageIterator::GetBinaryImage(PageIteratorLevel level) const {
int left, top, right, bottom; int left, top, right, bottom;
if (!BoundingBoxInternal(level, &left, &top, &right, &bottom)) { if (!BoundingBoxInternal(level, &left, &top, &right, &bottom)) {
return nullptr; return nullptr;
} }
if (level == RIL_SYMBOL && cblob_it_ != nullptr && cblob_it_->data()->area() ! if (level == RIL_SYMBOL && cblob_it_ != nullptr &&
= 0) { cblob_it_->data()->area() != 0) {
return cblob_it_->data()->render(); return cblob_it_->data()->render();
} }
Box *box = boxCreate(left, top, right - left, bottom - top); Box *box = boxCreate(left, top, right - left, bottom - top);
Image pix = pixClipRectangle(tesseract_->pix_binary(), box, nullptr); Image pix = pixClipRectangle(tesseract_->pix_binary(), box, nullptr);
boxDestroy(&box); boxDestroy(&box);
if (level == RIL_BLOCK || level == RIL_PARA) { if (level == RIL_BLOCK || level == RIL_PARA) {
// Clip to the block polygon as well. // Clip to the block polygon as well.
TBOX mask_box; TBOX mask_box;
Image mask = it_->block()->block->render_mask(&mask_box); Image mask = it_->block()->block->render_mask(&mask_box);
int mask_x = left - mask_box.left(); int mask_x = left - mask_box.left();
int mask_y = top - (tesseract_->ImageHeight() - mask_box.top()); int mask_y = top - (tesseract_->ImageHeight() - mask_box.top());
// AND the mask and pix, putting the result in pix. // AND the mask and pix, putting the result in pix.
pixRasterop(pix, std::max(0, -mask_x), std::max(0, -mask_y), pixGetWidth(pix pixRasterop(pix, std::max(0, -mask_x), std::max(0, -mask_y),
), pixGetWidth(pix), pixGetHeight(pix), PIX_SRC & PIX_DST, mask,
pixGetHeight(pix), PIX_SRC & PIX_DST, mask, std::max(0, mask_x), std::max(0, mask_x), std::max(0, mask_y));
std::max(0, mask_y));
mask.destroy(); mask.destroy();
} }
return pix; return pix;
} }
/** /**
* Returns an image of the current object at the given level in greyscale * Returns an image of the current object at the given level in greyscale
* if available in the input. To guarantee a binary image use BinaryImage. * if available in the input. To guarantee a binary image use BinaryImage.
* NOTE that in order to give the best possible image, the bounds are * NOTE that in order to give the best possible image, the bounds are
* expanded slightly over the binary connected component, by the supplied * expanded slightly over the binary connected component, by the supplied
* padding, so the top-left position of the returned image is returned * padding, so the top-left position of the returned image is returned
* in (left,top). These will most likely not match the coordinates * in (left,top). These will most likely not match the coordinates
* returned by BoundingBox. * returned by BoundingBox.
* If you do not supply an original image, you will get a binary one. * If you do not supply an original image, you will get a binary one.
* Use pixDestroy to delete the image after use. * Use pixDestroy to delete the image after use.
*/ */
Pix *PageIterator::GetImage(PageIteratorLevel level, int padding, Pix *original_ Pix *PageIterator::GetImage(PageIteratorLevel level, int padding,
img, int *left, Pix *original_img, int *left, int *top) const {
int *top) const {
int right, bottom; int right, bottom;
if (!BoundingBox(level, left, top, &right, &bottom)) { if (!BoundingBox(level, left, top, &right, &bottom)) {
return nullptr; return nullptr;
} }
if (original_img == nullptr) { if (original_img == nullptr) {
return GetBinaryImage(level); return GetBinaryImage(level);
} }
// Expand the box. // Expand the box.
*left = std::max(*left - padding, 0); *left = std::max(*left - padding, 0);
skipping to change at line 503 skipping to change at line 516
if (level == RIL_BLOCK || level == RIL_PARA) { if (level == RIL_BLOCK || level == RIL_PARA) {
// Clip to the block polygon as well. // Clip to the block polygon as well.
TBOX mask_box; TBOX mask_box;
Image mask = it_->block()->block->render_mask(&mask_box); Image mask = it_->block()->block->render_mask(&mask_box);
// Copy the mask registered correctly into an image the size of grey_pix. // Copy the mask registered correctly into an image the size of grey_pix.
int mask_x = *left - mask_box.left(); int mask_x = *left - mask_box.left();
int mask_y = *top - (pixGetHeight(original_img) - mask_box.top()); int mask_y = *top - (pixGetHeight(original_img) - mask_box.top());
int width = pixGetWidth(grey_pix); int width = pixGetWidth(grey_pix);
int height = pixGetHeight(grey_pix); int height = pixGetHeight(grey_pix);
Image resized_mask = pixCreate(width, height, 1); Image resized_mask = pixCreate(width, height, 1);
pixRasterop(resized_mask, std::max(0, -mask_x), std::max(0, -mask_y), width, pixRasterop(resized_mask, std::max(0, -mask_x), std::max(0, -mask_y), width,
height, PIX_SRC, height, PIX_SRC, mask, std::max(0, mask_x),
mask, std::max(0, mask_x), std::max(0, mask_y)); std::max(0, mask_y));
mask.destroy(); mask.destroy();
pixDilateBrick(resized_mask, resized_mask, 2 * padding + 1, 2 * padding + 1) pixDilateBrick(resized_mask, resized_mask, 2 * padding + 1,
; 2 * padding + 1);
pixInvert(resized_mask, resized_mask); pixInvert(resized_mask, resized_mask);
pixSetMasked(grey_pix, resized_mask, UINT32_MAX); pixSetMasked(grey_pix, resized_mask, UINT32_MAX);
resized_mask.destroy(); resized_mask.destroy();
} }
return grey_pix; return grey_pix;
} }
/** /**
* Returns the baseline of the current object at the given level. * Returns the baseline of the current object at the given level.
* The baseline is the line that passes through (x1, y1) and (x2, y2). * The baseline is the line that passes through (x1, y1) and (x2, y2).
* WARNING: with vertical text, baselines may be vertical! * WARNING: with vertical text, baselines may be vertical!
*/ */
bool PageIterator::Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2, bool PageIterator::Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2,
int *y2) const { int *y2) const {
if (it_->word() == nullptr) { if (it_->word() == nullptr) {
return false; // Already at the end! return false; // Already at the end!
} }
ROW *row = it_->row()->row; ROW *row = it_->row()->row;
WERD *word = it_->word()->word; WERD *word = it_->word()->word;
TBOX box = TBOX box = (level == RIL_WORD || level == RIL_SYMBOL) ? word->bounding_box()
(level == RIL_WORD || level == RIL_SYMBOL) ? word->bounding_box() : row->b : row->bounding_box();
ounding_box();
int left = box.left(); int left = box.left();
ICOORD startpt(left, static_cast<int16_t>(row->base_line(left) + 0.5)); ICOORD startpt(left, static_cast<int16_t>(row->base_line(left) + 0.5));
int right = box.right(); int right = box.right();
ICOORD endpt(right, static_cast<int16_t>(row->base_line(right) + 0.5)); ICOORD endpt(right, static_cast<int16_t>(row->base_line(right) + 0.5));
// Rotate to image coordinates and convert to global image coords. // Rotate to image coordinates and convert to global image coords.
startpt.rotate(it_->block()->block->re_rotation()); startpt.rotate(it_->block()->block->re_rotation());
endpt.rotate(it_->block()->block->re_rotation()); endpt.rotate(it_->block()->block->re_rotation());
*x1 = startpt.x() / scale_ + rect_left_; *x1 = startpt.x() / scale_ + rect_left_;
*y1 = (rect_height_ - startpt.y()) / scale_ + rect_top_; *y1 = (rect_height_ - startpt.y()) / scale_ + rect_top_;
*x2 = endpt.x() / scale_ + rect_left_; *x2 = endpt.x() / scale_ + rect_left_;
*y2 = (rect_height_ - endpt.y()) / scale_ + rect_top_; *y2 = (rect_height_ - endpt.y()) / scale_ + rect_top_;
return true; return true;
} }
void PageIterator::RowAttributes(float *row_height, float *descenders,
float *ascenders) const {
*row_height = it_->row()->row->x_height() + it_->row()->row->ascenders() -
it_->row()->row->descenders();
*descenders = it_->row()->row->descenders();
*ascenders = it_->row()->row->ascenders();
}
void PageIterator::Orientation(tesseract::Orientation *orientation, void PageIterator::Orientation(tesseract::Orientation *orientation,
tesseract::WritingDirection *writing_direction, tesseract::WritingDirection *writing_direction,
tesseract::TextlineOrder *textline_order, tesseract::TextlineOrder *textline_order,
float *deskew_angle) const { float *deskew_angle) const {
BLOCK *block = it_->block()->block; BLOCK *block = it_->block()->block;
// Orientation // Orientation
FCOORD up_in_image(0.0, 1.0); FCOORD up_in_image(0.0, 1.0);
up_in_image.unrotate(block->classify_rotation()); up_in_image.unrotate(block->classify_rotation());
up_in_image.rotate(block->re_rotation()); up_in_image.rotate(block->re_rotation());
skipping to change at line 567 skipping to change at line 591
} }
} else if (up_in_image.x() > 0.0F) { } else if (up_in_image.x() > 0.0F) {
*orientation = ORIENTATION_PAGE_RIGHT; *orientation = ORIENTATION_PAGE_RIGHT;
} else { } else {
*orientation = ORIENTATION_PAGE_LEFT; *orientation = ORIENTATION_PAGE_LEFT;
} }
// Writing direction // Writing direction
bool is_vertical_text = (block->classify_rotation().x() == 0.0); bool is_vertical_text = (block->classify_rotation().x() == 0.0);
bool right_to_left = block->right_to_left(); bool right_to_left = block->right_to_left();
*writing_direction = is_vertical_text ? WRITING_DIRECTION_TOP_TO_BOTTOM *writing_direction = is_vertical_text
: (right_to_left ? WRITING_DIRECTION_RIG ? WRITING_DIRECTION_TOP_TO_BOTTOM
HT_TO_LEFT : (right_to_left ? WRITING_DIRECTION_RIGHT_TO_LEFT
: WRITING_DIRECTION_LEF : WRITING_DIRECTION_LEFT_TO_RIGHT);
T_TO_RIGHT);
// Textline Order // Textline Order
const bool is_mongolian = false; // TODO(eger): fix me const bool is_mongolian = false; // TODO(eger): fix me
*textline_order = is_vertical_text ? (is_mongolian ? TEXTLINE_ORDER_LEFT_TO_RI *textline_order = is_vertical_text
GHT ? (is_mongolian ? TEXTLINE_ORDER_LEFT_TO_RIGHT
: TEXTLINE_ORDER_RIGHT_TO_L : TEXTLINE_ORDER_RIGHT_TO_LEFT)
EFT) : TEXTLINE_ORDER_TOP_TO_BOTTOM;
: TEXTLINE_ORDER_TOP_TO_BOTTOM;
// Deskew angle // Deskew angle
FCOORD skew = block->skew(); // true horizontal for textlines FCOORD skew = block->skew(); // true horizontal for textlines
*deskew_angle = -skew.angle(); *deskew_angle = -skew.angle();
} }
void PageIterator::ParagraphInfo(tesseract::ParagraphJustification *just, bool * void PageIterator::ParagraphInfo(tesseract::ParagraphJustification *just,
is_list_item, bool *is_list_item, bool *is_crown,
bool *is_crown, int *first_line_indent) const { int *first_line_indent) const {
*just = tesseract::JUSTIFICATION_UNKNOWN; *just = tesseract::JUSTIFICATION_UNKNOWN;
if (!it_->row() || !it_->row()->row || !it_->row()->row->para() || if (!it_->row() || !it_->row()->row || !it_->row()->row->para() ||
!it_->row()->row->para()->model) { !it_->row()->row->para()->model) {
return; return;
} }
PARA *para = it_->row()->row->para(); PARA *para = it_->row()->row->para();
*is_list_item = para->is_list_item; *is_list_item = para->is_list_item;
*is_crown = para->is_very_first_or_continuation; *is_crown = para->is_very_first_or_continuation;
*first_line_indent = para->model->first_indent() - para->model->body_indent(); *first_line_indent = para->model->first_indent() - para->model->body_indent();
skipping to change at line 616 skipping to change at line 643
blob_index_ = 0; blob_index_ = 0;
word_ = nullptr; word_ = nullptr;
return; return;
} }
if (word_res->best_choice != nullptr) { if (word_res->best_choice != nullptr) {
// Recognition has been done, so we are using the box_word, which // Recognition has been done, so we are using the box_word, which
// is already baseline denormalized. // is already baseline denormalized.
word_length_ = word_res->best_choice->length(); word_length_ = word_res->best_choice->length();
if (word_res->box_word != nullptr) { if (word_res->box_word != nullptr) {
if (word_res->box_word->length() != static_cast<unsigned>(word_length_)) { if (word_res->box_word->length() != static_cast<unsigned>(word_length_)) {
tprintf("Corrupted word! best_choice[len=%d] = %s, box_word[len=%d]: ", tprintf("Corrupted word! best_choice[len=%d] = %s, box_word[len=%d]: ",
word_length_, word_length_, word_res->best_choice->unichar_string().c_str(),
word_res->best_choice->unichar_string().c_str(), word_res->box_w word_res->box_word->length());
ord->length());
word_res->box_word->bounding_box().print(); word_res->box_word->bounding_box().print();
} }
ASSERT_HOST(word_res->box_word->length() == static_cast<unsigned>(word_len ASSERT_HOST(word_res->box_word->length() ==
gth_)); static_cast<unsigned>(word_length_));
} }
word_ = nullptr; word_ = nullptr;
// We will be iterating the box_word. // We will be iterating the box_word.
delete cblob_it_; delete cblob_it_;
cblob_it_ = nullptr; cblob_it_ = nullptr;
} else { } else {
// No recognition yet, so a "symbol" is a cblob. // No recognition yet, so a "symbol" is a cblob.
word_ = word_res->word; word_ = word_res->word;
ASSERT_HOST(word_->cblob_list() != nullptr); ASSERT_HOST(word_->cblob_list() != nullptr);
word_length_ = word_->cblob_list()->length(); word_length_ = word_->cblob_list()->length();
 End of changes. 25 change blocks. 
103 lines changed or deleted 101 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)