Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tesseract::ParagraphModelSmearer Class Reference

#include <paragraphs_internal.h>

List of all members.

Public Member Functions

 ParagraphModelSmearer (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, ParagraphTheory *theory)
void Smear ()

Detailed Description

Definition at line 234 of file paragraphs_internal.h.


Constructor & Destructor Documentation

tesseract::ParagraphModelSmearer::ParagraphModelSmearer ( GenericVector< RowScratchRegisters > *  rows,
int  row_start,
int  row_end,
ParagraphTheory theory 
)

Definition at line 1258 of file paragraphs.cpp.

: theory_(theory), rows_(rows), row_start_(row_start),
row_end_(row_end) {
if (!AcceptableRowArgs(0, 0, __func__, rows, row_start, row_end)) {
row_start_ = 0;
row_end_ = 0;
return;
}
SetOfModels no_models;
for (int row = row_start - 1; row <= row_end; row++) {
open_models_.push_back(no_models);
}
}

Member Function Documentation

void tesseract::ParagraphModelSmearer::Smear ( )

Definition at line 1305 of file paragraphs.cpp.

{
CalculateOpenModels(row_start_, row_end_);
// For each row which we're unsure about (that is, it is LT_UNKNOWN or
// we have multiple LT_START hypotheses), see if there's a model that
// was recently used (an "open" model) which might model it well.
for (int i = row_start_; i < row_end_; i++) {
RowScratchRegisters &row = (*rows_)[i];
if (row.ri_->num_words == 0)
continue;
// Step One:
// Figure out if there are "open" models which are left-alined or
// right-aligned. This is important for determining whether the
// "first" word in a row would fit at the "end" of the previous row.
bool left_align_open = false;
bool right_align_open = false;
for (int m = 0; m < OpenModels(i).size(); m++) {
switch (OpenModels(i)[m]->justification()) {
case JUSTIFICATION_LEFT: left_align_open = true; break;
case JUSTIFICATION_RIGHT: right_align_open = true; break;
default: left_align_open = right_align_open = true;
}
}
// Step Two:
// Use that knowledge to figure out if this row is likely to
// start a paragraph.
bool likely_start;
if (i == 0) {
likely_start = true;
} else {
if ((left_align_open && right_align_open) ||
(!left_align_open && !right_align_open)) {
likely_start = LikelyParagraphStart((*rows_)[i - 1], row,
LikelyParagraphStart((*rows_)[i - 1], row,
} else if (left_align_open) {
likely_start = LikelyParagraphStart((*rows_)[i - 1], row,
} else {
likely_start = LikelyParagraphStart((*rows_)[i - 1], row,
}
}
// Step Three:
// If this text line seems like an obvious first line of an
// open model, or an obvious continuation of an existing
// modelled paragraph, mark it up.
if (likely_start) {
// Add Start Hypotheses for all Open models that fit.
for (int m = 0; m < OpenModels(i).size(); m++) {
if (ValidFirstLine(rows_, i, OpenModels(i)[m])) {
row.AddStartLine(OpenModels(i)[m]);
}
}
} else {
// Add relevant body line hypotheses.
SetOfModels last_line_models;
if (i > 0) {
(*rows_)[i - 1].StrongHypotheses(&last_line_models);
} else {
theory_->NonCenteredModels(&last_line_models);
}
for (int m = 0; m < last_line_models.size(); m++) {
const ParagraphModel *model = last_line_models[m];
if (ValidBodyLine(rows_, i, model))
row.AddBodyLine(model);
}
}
// Step Four:
// If we're still quite unsure about this line, go through all
// models in our theory and see if this row could be the start
// of any of our models.
if (row.GetLineType() == LT_UNKNOWN ||
(row.GetLineType() == LT_START && !row.UniqueStartHypothesis())) {
SetOfModels all_models;
theory_->NonCenteredModels(&all_models);
for (int m = 0; m < all_models.size(); m++) {
if (ValidFirstLine(rows_, i, all_models[m])) {
row.AddStartLine(all_models[m]);
}
}
}
// Step Five:
// Since we may have updated the hypotheses about this row, we need
// to recalculate the Open models for the rest of rows[i + 1, row_end)
if (row.GetLineType() != LT_UNKNOWN) {
CalculateOpenModels(i + 1, row_end_);
}
}
}

The documentation for this class was generated from the following files: