Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
oldbasel.cpp File Reference
#include "mfcpch.h"
#include "ccstruct.h"
#include "statistc.h"
#include "quadlsq.h"
#include "detlinefit.h"
#include "makerow.h"
#include "drawtord.h"
#include "oldbasel.h"
#include "textord.h"
#include "tprintf.h"

Go to the source code of this file.

Namespaces

namespace  tesseract

Macros

#define EXTERN
#define TURNLIMIT   1 /*min size for turning point */
#define X_HEIGHT_FRACTION   0.7 /*x-height/caps height */
#define DESCENDER_FRACTION   0.5 /*descender/x-height */
#define MIN_ASC_FRACTION   0.20 /*min size of ascenders */
#define MIN_DESC_FRACTION   0.25 /*min size of descenders */
#define MINASCRISE   2.0 /*min ascender/desc step */
#define MAXHEIGHTVARIANCE   0.15 /*accepted variation in x-height */
#define MAXHEIGHT   300 /*max blob height */
#define MAXOVERLAP   0.1 /*max 10% missed overlap */
#define MAXBADRUN   2 /*max non best for failed */
#define HEIGHTBUCKETS   200 /* Num of buckets */
#define DELTAHEIGHT   5.0 /* Small amount of diff */
#define GOODHEIGHT   5
#define MAXLOOPS   10
#define MODENUM   10
#define MAXPARTS   6
#define SPLINESIZE   23
#define ABS(x)   ((x)<0 ? (-(x)) : (x))

Functions

int get_blob_coords (TO_ROW *row, inT32 lineheight, TBOX *blobcoords, BOOL8 &holed_line, int &outcount)
void make_first_baseline (TBOX blobcoords[], int blobcount, int xcoords[], int ycoords[], QSPLINE *spline, QSPLINE *baseline, float jumplimit)
void make_holed_baseline (TBOX blobcoords[], int blobcount, QSPLINE *spline, QSPLINE *baseline, float gradient)
int partition_line (TBOX blobcoords[], int blobcount, int *numparts, char partids[], int partsizes[], QSPLINE *spline, float jumplimit, float ydiffs[])
void merge_oldbl_parts (TBOX blobcoords[], int blobcount, char partids[], int partsizes[], int biggestpart, float jumplimit)
int get_ydiffs (TBOX blobcoords[], int blobcount, QSPLINE *spline, float ydiffs[])
int choose_partition (register float diff, float partdiffs[], int lastpart, float jumplimit, float *drift, float *lastdelta, int *partcount)
int partition_coords (TBOX blobcoords[], int blobcount, char partids[], int bestpart, int xcoords[], int ycoords[])
 *merge_partitions(partids,partcount,blobcount,bestpart) discards funny looking
int segment_spline (TBOX blobcoords[], int blobcount, int xcoords[], int ycoords[], int degree, int pointcount, int xstarts[])
BOOL8 split_stepped_spline (QSPLINE *baseline, float jumplimit, int xcoords[], int xstarts[], int &segments)
void insert_spline_point (int xstarts[], int segment, int coord1, int coord2, int &segments)
void find_lesser_parts (TO_ROW *row, TBOX blobcoords[], int blobcount, char partids[], int partsizes[], int partcount, int bestpart)
void old_first_xheight (TO_ROW *row, TBOX blobcoords[], int initialheight, int blobcount, QSPLINE *baseline, float jumplimit)
void make_first_xheight (TO_ROW *row, TBOX blobcoords[], int lineheight, int init_lineheight, int blobcount, QSPLINE *baseline, float jumplimit)
void find_top_modes (STATS *stats, int statnum, int modelist[], int modenum)
void pick_x_height (TO_ROW *row, int modelist[], int lefts[], int rights[], STATS *heightstat, int mode_threshold)

Variables

EXTERN bool textord_really_old_xheight = FALSE
EXTERN bool textord_oldbl_debug = FALSE
EXTERN bool textord_debug_baselines = FALSE
EXTERN bool textord_oldbl_paradef = TRUE
EXTERN bool textord_oldbl_split_splines = TRUE
EXTERN bool textord_oldbl_merge_parts = TRUE
EXTERN bool oldbl_corrfix = TRUE
EXTERN bool oldbl_xhfix = FALSE
EXTERN bool textord_ocropus_mode = FALSE
EXTERN double oldbl_xhfract = 0.4
EXTERN int oldbl_holed_losscount = 10
EXTERN double oldbl_dot_error_size = 1.26
EXTERN double textord_oldbl_jumplimit = 0.15
const int kMinModeFactorOcropus = 32
const int kMinModeFactor = 12

Macro Definition Documentation

#define ABS (   x)    ((x)<0 ? (-(x)) : (x))

Definition at line 74 of file oldbasel.cpp.

#define DELTAHEIGHT   5.0 /* Small amount of diff */

Definition at line 67 of file oldbasel.cpp.

#define DESCENDER_FRACTION   0.5 /*descender/x-height */

Definition at line 58 of file oldbasel.cpp.

#define EXTERN

Definition at line 36 of file oldbasel.cpp.

#define GOODHEIGHT   5

Definition at line 68 of file oldbasel.cpp.

#define HEIGHTBUCKETS   200 /* Num of buckets */

Definition at line 66 of file oldbasel.cpp.

#define MAXBADRUN   2 /*max non best for failed */

Definition at line 65 of file oldbasel.cpp.

#define MAXHEIGHT   300 /*max blob height */

Definition at line 63 of file oldbasel.cpp.

#define MAXHEIGHTVARIANCE   0.15 /*accepted variation in x-height */

Definition at line 62 of file oldbasel.cpp.

#define MAXLOOPS   10

Definition at line 69 of file oldbasel.cpp.

#define MAXOVERLAP   0.1 /*max 10% missed overlap */

Definition at line 64 of file oldbasel.cpp.

#define MAXPARTS   6

Definition at line 71 of file oldbasel.cpp.

#define MIN_ASC_FRACTION   0.20 /*min size of ascenders */

Definition at line 59 of file oldbasel.cpp.

#define MIN_DESC_FRACTION   0.25 /*min size of descenders */

Definition at line 60 of file oldbasel.cpp.

#define MINASCRISE   2.0 /*min ascender/desc step */

Definition at line 61 of file oldbasel.cpp.

#define MODENUM   10

Definition at line 70 of file oldbasel.cpp.

#define SPLINESIZE   23

Definition at line 72 of file oldbasel.cpp.

#define TURNLIMIT   1 /*min size for turning point */

Definition at line 56 of file oldbasel.cpp.

#define X_HEIGHT_FRACTION   0.7 /*x-height/caps height */

Definition at line 57 of file oldbasel.cpp.


Function Documentation

int choose_partition ( register float  diff,
float  partdiffs[],
int  lastpart,
float  jumplimit,
float *  drift,
float *  lastdelta,
int *  partcount 
)

Definition at line 963 of file oldbasel.cpp.

{
register int partition; /*partition no */
int bestpart; /*best new partition */
float bestdelta; /*best gap from a part */
float delta; /*diff from part */
if (lastpart < 0) {
partdiffs[0] = diff;
lastpart = 0; /*first point */
*drift = 0.0f;
*lastdelta = 0.0f;
}
/*adjusted diff from part */
delta = diff - partdiffs[lastpart] - *drift;
tprintf ("Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, *drift);
}
if (ABS (delta) > jumplimit / 2) {
/*delta on part 0 */
bestdelta = diff - partdiffs[0] - *drift;
bestpart = 0; /*0 best so far */
for (partition = 1; partition < *partcount; partition++) {
delta = diff - partdiffs[partition] - *drift;
if (ABS (delta) < ABS (bestdelta)) {
bestdelta = delta;
bestpart = partition; /*part with nearest jump */
}
}
delta = bestdelta;
/*too far away */
if (ABS (bestdelta) > jumplimit
&& *partcount < MAXPARTS) { /*and spare part left */
bestpart = (*partcount)++; /*best was new one */
/*start new one */
partdiffs[bestpart] = diff - *drift;
delta = 0.0f;
}
}
else {
bestpart = lastpart; /*best was last one */
}
if (bestpart == lastpart
&& (ABS (delta - *lastdelta) < jumplimit / 2
|| ABS (delta) < jumplimit / 2))
/*smooth the drift */
*drift = (3 * *drift + delta) / 3;
*lastdelta = delta;
tprintf ("P=%d\n", bestpart);
}
return bestpart;
}
void find_lesser_parts ( TO_ROW row,
TBOX  blobcoords[],
int  blobcount,
char  partids[],
int  partsizes[],
int  partcount,
int  bestpart 
)

Definition at line 1377 of file oldbasel.cpp.

{
register int blobindex; /*index of blob */
register int partition; /*current partition */
int xcentre; /*centre of blob */
int poscount; /*count of best up step */
int negcount; /*count of best down step */
float partsteps[MAXPARTS]; /*average step to part */
float bestpos; /*best up step */
float bestneg; /*best down step */
int runlength; /*length of bad run */
int biggestrun; /*biggest bad run */
biggestrun = 0;
for (partition = 0; partition < partcount; partition++)
partsteps[partition] = 0.0; /*zero accumulators */
for (runlength = 0, blobindex = 0; blobindex < blobcount; blobindex++) {
xcentre = (blobcoords[blobindex].left ()
+ blobcoords[blobindex].right ()) >> 1;
/*in other parts */
if (partids[blobindex] != bestpart) {
runlength++; /*run of non bests */
if (runlength > biggestrun)
biggestrun = runlength;
partsteps[partids[blobindex]] += blobcoords[blobindex].bottom ()
- row->baseline.y (xcentre);
}
else
runlength = 0;
}
if (biggestrun > MAXBADRUN)
row->xheight = -1.0f; /*failed */
else
row->xheight = 1.0f; /*success */
poscount = negcount = 0;
bestpos = bestneg = 0.0; /*no step yet */
for (partition = 0; partition < partcount; partition++) {
if (partition != bestpart) {
//by jetsoft divide by zero possible
if (partsizes[partition]==0)
partsteps[partition]=0;
else
partsteps[partition] /= partsizes[partition];
//
if (partsteps[partition] >= MINASCRISE
&& partsizes[partition] > poscount) {
/*ascender rise */
bestpos = partsteps[partition];
/*2nd most popular */
poscount = partsizes[partition];
}
if (partsteps[partition] <= -MINASCRISE
&& partsizes[partition] > negcount) {
/*ascender rise */
bestneg = partsteps[partition];
/*2nd most popular */
negcount = partsizes[partition];
}
}
}
/*average x-height */
partsteps[bestpart] /= blobcount;
row->descdrop = bestneg;
}
void find_top_modes ( STATS stats,
int  statnum,
int  modelist[],
int  modenum 
)

Definition at line 1632 of file oldbasel.cpp.

{
int mode_count;
int last_i = 0;
int last_max = MAX_INT32;
int i;
int mode;
int total_max = 0;
int mode_factor = textord_ocropus_mode ?
for (mode_count = 0; mode_count < modenum; mode_count++) {
mode = 0;
for (i = 0; i < statnum; i++) {
if (stats->pile_count (i) > stats->pile_count (mode)) {
if ((stats->pile_count (i) < last_max) ||
((stats->pile_count (i) == last_max) && (i > last_i))) {
mode = i;
}
}
}
last_i = mode;
last_max = stats->pile_count (last_i);
total_max += last_max;
if (last_max <= total_max / mode_factor)
mode = 0;
modelist[mode_count] = mode;
}
}
int get_blob_coords ( TO_ROW row,
inT32  lineheight,
TBOX blobcoords,
BOOL8 holed_line,
int &  outcount 
)

Definition at line 447 of file oldbasel.cpp.

{
//blobs
BLOBNBOX_IT blob_it = row->blob_list ();
register int blobindex; /*no along text line */
int losscount; //lost blobs
int maxlosscount; //greatest lost blobs
/*height stat collection */
STATS heightstat (0, MAXHEIGHT);
if (blob_it.empty ())
return 0; //none
maxlosscount = 0;
losscount = 0;
blob_it.mark_cycle_pt ();
blobindex = 0;
do {
blobcoords[blobindex] = box_next_pre_chopped (&blob_it);
if (blobcoords[blobindex].height () > lineheight * 0.25)
heightstat.add (blobcoords[blobindex].height (), 1);
if (blobindex == 0
|| blobcoords[blobindex].height () > lineheight * 0.25
|| blob_it.cycled_list ()) {
blobindex++; /*no of merged blobs */
losscount = 0;
}
else {
if (blobcoords[blobindex].height ()
< blobcoords[blobindex].width () * oldbl_dot_error_size
&& blobcoords[blobindex].width ()
< blobcoords[blobindex].height () * oldbl_dot_error_size) {
//counts as dot
blobindex++;
losscount = 0;
}
else {
losscount++; //lost it
if (losscount > maxlosscount)
//remember max
maxlosscount = losscount;
}
}
}
while (!blob_it.cycled_list ());
holed_line = maxlosscount > oldbl_holed_losscount;
outcount = blobindex; /*total blobs */
if (heightstat.get_total () > 1)
/*guess x-height */
return (int) heightstat.ile (0.25);
else
return blobcoords[0].height ();
}
int get_ydiffs ( TBOX  blobcoords[],
int  blobcount,
QSPLINE spline,
float  ydiffs[] 
)

Definition at line 912 of file oldbasel.cpp.

{
register int blobindex; /*current blob */
int xcentre; /*xcoord */
int lastx; /*last xcentre */
float diffsum; /*sum of diffs */
float diff; /*current difference */
float drift; /*sum of spline steps */
float bestsum; /*smallest diffsum */
int bestindex; /*index of bestsum */
diffsum = 0.0f;
bestindex = 0;
bestsum = (float) MAX_INT32;
drift = 0.0f;
lastx = blobcoords[0].left ();
/*do each blob in row */
for (blobindex = 0; blobindex < blobcount; blobindex++) {
/*centre of blob */
xcentre = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1;
//step functions in spline
drift += spline->step (lastx, xcentre);
lastx = xcentre;
diff = blobcoords[blobindex].bottom ();
diff -= spline->y (xcentre);
diff += drift;
ydiffs[blobindex] = diff; /*store difference */
if (blobindex > 2)
/*remove old one */
diffsum -= ABS (ydiffs[blobindex - 3]);
diffsum += ABS (diff); /*add new one */
if (blobindex >= 2 && diffsum < bestsum) {
bestsum = diffsum; /*find min sum */
bestindex = blobindex - 1; /*middle of set */
}
}
return bestindex;
}
void insert_spline_point ( int  xstarts[],
int  segment,
int  coord1,
int  coord2,
int &  segments 
)

Definition at line 1353 of file oldbasel.cpp.

{
int index; //for shuffling
for (index = segments; index > segment; index--)
xstarts[index + 1] = xstarts[index];
segments++;
xstarts[segment] = coord1;
xstarts[segment + 1] = coord2;
}
void make_first_baseline ( TBOX  blobcoords[],
int  blobcount,
int  xcoords[],
int  ycoords[],
QSPLINE spline,
QSPLINE baseline,
float  jumplimit 
)

Definition at line 517 of file oldbasel.cpp.

{
int leftedge; /*left edge of line */
int rightedge; /*right edge of line */
int blobindex; /*current blob */
int segment; /*current segment */
float prevy, thisy, nexty; /*3 y coords */
float y1, y2, y3; /*3 smooth blobs */
float maxmax, minmin; /*absolute limits */
int x2 = 0; /*right edge of old y3 */
int ycount; /*no of ycoords in use */
float yturns[SPLINESIZE]; /*y coords of turn pts */
int xturns[SPLINESIZE]; /*xcoords of turn pts */
int xstarts[SPLINESIZE + 1];
int segments; //no of segments
ICOORD shift; //shift of spline
prevy = 0;
/*left edge of row */
leftedge = blobcoords[0].left ();
/*right edge of line */
rightedge = blobcoords[blobcount - 1].right ();
if (spline == NULL /*no given spline */
|| spline->segments < 3 /*or trivial */
/*or too non-overlap */
|| spline->xcoords[1] > leftedge + MAXOVERLAP * (rightedge - leftedge)
|| spline->xcoords[spline->segments - 1] < rightedge
- MAXOVERLAP * (rightedge - leftedge)) {
return; //use default
xstarts[0] = blobcoords[0].left () - 1;
for (blobindex = 0; blobindex < blobcount; blobindex++) {
xcoords[blobindex] = (blobcoords[blobindex].left ()
+ blobcoords[blobindex].right ()) / 2;
ycoords[blobindex] = blobcoords[blobindex].bottom ();
}
xstarts[1] = blobcoords[blobcount - 1].right () + 1;
segments = 1; /*no of segments */
/*linear */
*baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
if (blobcount >= 3) {
y1 = y2 = y3 = 0.0f;
ycount = 0;
segment = 0; /*no of segments */
maxmax = minmin = 0.0f;
thisy = ycoords[0] - baseline->y (xcoords[0]);
nexty = ycoords[1] - baseline->y (xcoords[1]);
for (blobindex = 2; blobindex < blobcount; blobindex++) {
prevy = thisy; /*shift ycoords */
thisy = nexty;
nexty = ycoords[blobindex] - baseline->y (xcoords[blobindex]);
/*middle of smooth y */
if (ABS (thisy - prevy) < jumplimit && ABS (thisy - nexty) < jumplimit) {
y1 = y2; /*shift window */
y2 = y3;
y3 = thisy; /*middle point */
ycount++;
/*local max */
if (ycount >= 3 && ((y1 < y2 && y2 >= y3)
/*local min */
|| (y1 > y2 && y2 <= y3))) {
if (segment < SPLINESIZE - 2) {
/*turning pt */
xturns[segment] = x2;
yturns[segment] = y2;
segment++; /*no of spline segs */
}
}
if (ycount == 1) {
maxmax = minmin = y3;/*initialise limits */
}
else {
if (y3 > maxmax)
maxmax = y3; /*biggest max */
if (y3 < minmin)
minmin = y3; /*smallest min */
}
/*possible turning pt */
x2 = blobcoords[blobindex - 1].right ();
}
}
jumplimit *= 1.2;
/*must be wavy */
if (maxmax - minmin > jumplimit) {
ycount = segment; /*no of segments */
for (blobindex = 0, segment = 1; blobindex < ycount;
blobindex++) {
if (yturns[blobindex] > minmin + jumplimit
|| yturns[blobindex] < maxmax - jumplimit) {
/*significant peak */
if (segment == 1
|| yturns[blobindex] > prevy + jumplimit
|| yturns[blobindex] < prevy - jumplimit) {
/*different to previous */
xstarts[segment] = xturns[blobindex];
segment++;
prevy = yturns[blobindex];
}
/*bigger max */
else if ((prevy > minmin + jumplimit && yturns[blobindex] > prevy)
/*smaller min */
|| (prevy < maxmax - jumplimit && yturns[blobindex] < prevy)) {
xstarts[segment - 1] = xturns[blobindex];
/*improved previous */
prevy = yturns[blobindex];
}
}
}
xstarts[segment] = blobcoords[blobcount - 1].right () + 1;
segments = segment; /*no of segments */
/*linear */
*baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
}
}
}
else {
*baseline = *spline; /*copy it */
shift = ICOORD (0, (inT16) (blobcoords[0].bottom ()
- spline->y (blobcoords[0].right ())));
baseline->move (shift);
}
}
void make_first_xheight ( TO_ROW row,
TBOX  blobcoords[],
int  lineheight,
int  init_lineheight,
int  blobcount,
QSPLINE baseline,
float  jumplimit 
)

Definition at line 1547 of file oldbasel.cpp.

{
STATS heightstat (0, HEIGHTBUCKETS);
int lefts[HEIGHTBUCKETS];
int rights[HEIGHTBUCKETS];
int modelist[MODENUM];
int blobindex;
int mode_count; //blobs to count in thr
int sign_bit;
int mode_threshold;
const int kBaselineTouch = 2; // This really should change with resolution.
const int kGoodStrength = 8; // Strength of baseline-touching heights.
const float kMinHeight = 0.25; // Min fraction of lineheight to use.
sign_bit = row->xheight > 0 ? 1 : -1;
memset(lefts, 0, HEIGHTBUCKETS * sizeof(lefts[0]));
memset(rights, 0, HEIGHTBUCKETS * sizeof(rights[0]));
mode_count = 0;
for (blobindex = 0; blobindex < blobcount; blobindex++) {
int xcenter = (blobcoords[blobindex].left () +
blobcoords[blobindex].right ()) / 2;
float base = baseline->y(xcenter);
float bottomdiff = fabs(base - blobcoords[blobindex].bottom());
int strength = textord_ocropus_mode &&
bottomdiff <= kBaselineTouch ? kGoodStrength : 1;
int height = static_cast<int>(blobcoords[blobindex].top () - base + 0.5);
if (blobcoords[blobindex].height () > init_lineheight * kMinHeight) {
if (height > lineheight * oldbl_xhfract
&& height > textord_min_xheight) {
heightstat.add (height, strength);
if (height < HEIGHTBUCKETS) {
if (xcenter > rights[height])
rights[height] = xcenter;
if (xcenter > 0 && (lefts[height] == 0 || xcenter < lefts[height]))
lefts[height] = xcenter;
}
}
mode_count += strength;
}
}
mode_threshold = (int) (blobcount * 0.1);
mode_threshold = (int) (mode_count * 0.1);
tprintf ("blobcount=%d, mode_count=%d, mode_t=%d\n",
blobcount, mode_count, mode_threshold);
}
find_top_modes(&heightstat, HEIGHTBUCKETS, modelist, MODENUM);
for (blobindex = 0; blobindex < MODENUM; blobindex++)
tprintf ("mode[%d]=%d ", blobindex, modelist[blobindex]);
tprintf ("\n");
}
pick_x_height(row, modelist, lefts, rights, &heightstat, mode_threshold);
tprintf ("Output xheight=%g\n", row->xheight);
if (row->xheight < 0 && textord_oldbl_debug)
tprintf ("warning: Row Line height < 0; %4.2f\n", row->xheight);
if (sign_bit < 0)
row->xheight = -row->xheight;
}
void make_holed_baseline ( TBOX  blobcoords[],
int  blobcount,
QSPLINE spline,
QSPLINE baseline,
float  gradient 
)

Definition at line 660 of file oldbasel.cpp.

{
int leftedge; /*left edge of line */
int rightedge; /*right edge of line */
int blobindex; /*current blob */
float x; //centre of row
ICOORD shift; //shift of spline
tesseract::DetLineFit lms; // straight baseline
inT32 xstarts[2]; //straight line
double coeffs[3];
float c; //line parameter
/*left edge of row */
leftedge = blobcoords[0].left ();
/*right edge of line */
rightedge = blobcoords[blobcount - 1].right();
for (blobindex = 0; blobindex < blobcount; blobindex++) {
lms.Add(ICOORD((blobcoords[blobindex].left() +
blobcoords[blobindex].right()) / 2,
blobcoords[blobindex].bottom()));
}
lms.ConstrainedFit(gradient, &c);
xstarts[0] = leftedge;
xstarts[1] = rightedge;
coeffs[0] = 0;
coeffs[1] = gradient;
coeffs[2] = c;
*baseline = QSPLINE (1, xstarts, coeffs);
if (spline != NULL /*no given spline */
&& spline->segments >= 3 /*or trivial */
/*or too non-overlap */
&& spline->xcoords[1] <= leftedge + MAXOVERLAP * (rightedge - leftedge)
&& spline->xcoords[spline->segments - 1] >= rightedge
- MAXOVERLAP * (rightedge - leftedge)) {
*baseline = *spline; /*copy it */
x = (leftedge + rightedge) / 2.0;
shift = ICOORD (0, (inT16) (gradient * x + c - spline->y (x)));
baseline->move (shift);
}
}
void merge_oldbl_parts ( TBOX  blobcoords[],
int  blobcount,
char  partids[],
int  partsizes[],
int  biggestpart,
float  jumplimit 
)

Definition at line 799 of file oldbasel.cpp.

{
BOOL8 found_one; //found a bestpart blob
BOOL8 close_one; //found was close enough
register int blobindex; /*no along text line */
int prevpart; //previous iteration
int runlength; //no in this part
float diff; /*difference from line */
int startx; /*index of start blob */
int test_blob; //another index
FCOORD coord; //blob coordinate
float m, c; //fitted line
QLSQ stats; //line stuff
prevpart = biggestpart;
runlength = 0;
startx = 0;
for (blobindex = 0; blobindex < blobcount; blobindex++) {
if (partids[blobindex] != prevpart) {
// tprintf("Partition change at (%d,%d) from %d to %d after run of %d\n",
// blobcoords[blobindex].left(),blobcoords[blobindex].bottom(),
// prevpart,partids[blobindex],runlength);
if (prevpart != biggestpart && runlength > MAXBADRUN) {
stats.clear ();
for (test_blob = startx; test_blob < blobindex; test_blob++) {
coord = FCOORD ((blobcoords[test_blob].left ()
+ blobcoords[test_blob].right ()) / 2.0,
blobcoords[test_blob].bottom ());
stats.add (coord.x (), coord.y ());
}
stats.fit (1);
m = stats.get_b ();
c = stats.get_c ();
tprintf ("Fitted line y=%g x + %g\n", m, c);
found_one = FALSE;
close_one = FALSE;
for (test_blob = 1; !found_one
&& (startx - test_blob >= 0
|| blobindex + test_blob <= blobcount); test_blob++) {
if (startx - test_blob >= 0
&& partids[startx - test_blob] == biggestpart) {
found_one = TRUE;
coord = FCOORD ((blobcoords[startx - test_blob].left ()
+ blobcoords[startx -
test_blob].right ()) /
2.0,
blobcoords[startx -
test_blob].bottom ());
diff = m * coord.x () + c - coord.y ();
("Diff of common blob to suspect part=%g at (%g,%g)\n",
diff, coord.x (), coord.y ());
if (diff < jumplimit && -diff < jumplimit)
close_one = TRUE;
}
if (blobindex + test_blob <= blobcount
&& partids[blobindex + test_blob - 1] == biggestpart) {
found_one = TRUE;
coord =
FCOORD ((blobcoords[blobindex + test_blob - 1].
left () + blobcoords[blobindex + test_blob -
1].right ()) / 2.0,
blobcoords[blobindex + test_blob -
1].bottom ());
diff = m * coord.x () + c - coord.y ();
("Diff of common blob to suspect part=%g at (%g,%g)\n",
diff, coord.x (), coord.y ());
if (diff < jumplimit && -diff < jumplimit)
close_one = TRUE;
}
}
if (close_one) {
("Merged %d blobs back into part %d from %d starting at (%d,%d)\n",
runlength, biggestpart, prevpart,
blobcoords[startx].left (),
blobcoords[startx].bottom ());
//switch sides
partsizes[prevpart] -= runlength;
for (test_blob = startx; test_blob < blobindex; test_blob++)
partids[test_blob] = biggestpart;
}
}
prevpart = partids[blobindex];
runlength = 1;
startx = blobindex;
}
else
runlength++;
}
}
void old_first_xheight ( TO_ROW row,
TBOX  blobcoords[],
int  initialheight,
int  blobcount,
QSPLINE baseline,
float  jumplimit 
)

Definition at line 1462 of file oldbasel.cpp.

{
register int blobindex; /*current blob */
/*height statistics */
STATS heightstat (0, MAXHEIGHT);
int height; /*height of blob */
int xcentre; /*centre of blob */
int lineheight; /*approx xheight */
float ascenders; /*ascender sum */
int asccount; /*no of ascenders */
float xsum; /*xheight sum */
int xcount; /*xheight count */
register float diff; /*height difference */
if (blobcount > 1) {
for (blobindex = 0; blobindex < blobcount; blobindex++) {
xcentre = (blobcoords[blobindex].left ()
+ blobcoords[blobindex].right ()) / 2;
/*height of blob */
height = (int) (blobcoords[blobindex].top () - baseline->y (xcentre) + 0.5);
if (height > initialheight * oldbl_xhfract
&& height > textord_min_xheight)
heightstat.add (height, 1);
}
if (heightstat.get_total () > 3) {
lineheight = (int) heightstat.ile (0.25);
if (lineheight <= 0)
lineheight = (int) heightstat.ile (0.5);
}
else
lineheight = initialheight;
}
else {
lineheight = (int) (blobcoords[0].top ()
- baseline->y ((blobcoords[0].left ()
+ blobcoords[0].right ()) / 2) +
0.5);
}
xsum = 0.0f;
xcount = 0;
for (ascenders = 0.0f, asccount = 0, blobindex = 0; blobindex < blobcount;
blobindex++) {
xcentre = (blobcoords[blobindex].left ()
+ blobcoords[blobindex].right ()) / 2;
diff = blobcoords[blobindex].top () - baseline->y (xcentre);
/*is it ascender */
if (diff > lineheight + jumplimit) {
ascenders += diff;
asccount++; /*count ascenders */
}
else if (diff > lineheight - jumplimit) {
xsum += diff; /*mean xheight */
xcount++;
}
}
if (xcount > 0)
xsum /= xcount; /*average xheight */
else
xsum = (float) lineheight; /*guess it */
row->xheight *= xsum;
if (asccount > 0)
row->ascrise = ascenders / asccount - xsum;
else
row->ascrise = 0.0f; /*had none */
if (row->xheight == 0)
row->xheight = -1.0f;
}
int partition_coords ( TBOX  blobcoords[],
int  blobcount,
char  partids[],
int  bestpart,
int  xcoords[],
int  ycoords[] 
)

*merge_partitions(partids,partcount,blobcount,bestpart) discards funny looking

Definition at line 1090 of file oldbasel.cpp.

{
register int blobindex; /*no along text line */
int pointcount; /*no of points */
pointcount = 0;
for (blobindex = 0; blobindex < blobcount; blobindex++) {
if (partids[blobindex] == bestpart) {
/*centre of blob */
xcoords[pointcount] = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1;
ycoords[pointcount++] = blobcoords[blobindex].bottom ();
}
}
return pointcount; /*no of points found */
}
int partition_line ( TBOX  blobcoords[],
int  blobcount,
int *  numparts,
char  partids[],
int  partsizes[],
QSPLINE spline,
float  jumplimit,
float  ydiffs[] 
)

Definition at line 718 of file oldbasel.cpp.

{
register int blobindex; /*no along text line */
int bestpart; /*best new partition */
int biggestpart; /*part with most members */
float diff; /*difference from line */
int startx; /*index of start blob */
float partdiffs[MAXPARTS]; /*step between parts */
for (bestpart = 0; bestpart < MAXPARTS; bestpart++)
partsizes[bestpart] = 0; /*zero them all */
startx = get_ydiffs (blobcoords, blobcount, spline, ydiffs);
*numparts = 1; /*1 partition */
bestpart = -1; /*first point */
float drift = 0.0f;
float last_delta = 0.0f;
for (blobindex = startx; blobindex < blobcount; blobindex++) {
/*do each blob in row */
diff = ydiffs[blobindex]; /*diff from line */
tprintf ("%d(%d,%d), ", blobindex,
blobcoords[blobindex].left (),
blobcoords[blobindex].bottom ());
}
bestpart = choose_partition(diff, partdiffs, bestpart, jumplimit,
&drift, &last_delta, numparts);
/*record partition */
partids[blobindex] = bestpart;
partsizes[bestpart]++; /*another in it */
}
bestpart = -1; /*first point */
drift = 0.0f;
last_delta = 0.0f;
partsizes[0]--; /*doing 1st pt again */
/*do each blob in row */
for (blobindex = startx; blobindex >= 0; blobindex--) {
diff = ydiffs[blobindex]; /*diff from line */
tprintf ("%d(%d,%d), ", blobindex,
blobcoords[blobindex].left (),
blobcoords[blobindex].bottom ());
}
bestpart = choose_partition(diff, partdiffs, bestpart, jumplimit,
&drift, &last_delta, numparts);
/*record partition */
partids[blobindex] = bestpart;
partsizes[bestpart]++; /*another in it */
}
for (biggestpart = 0, bestpart = 1; bestpart < *numparts; bestpart++)
if (partsizes[bestpart] >= partsizes[biggestpart])
biggestpart = bestpart; /*new biggest */
merge_oldbl_parts(blobcoords,
blobcount,
partids,
partsizes,
biggestpart,
jumplimit);
return biggestpart; /*biggest partition */
}
void pick_x_height ( TO_ROW row,
int  modelist[],
int  lefts[],
int  rights[],
STATS heightstat,
int  mode_threshold 
)

Definition at line 1672 of file oldbasel.cpp.

{
int x;
int y;
int z;
float ratio;
int found_one_bigger = FALSE;
int best_x_height = 0;
int best_asc = 0;
int num_in_best;
for (x = 0; x < MODENUM; x++) {
for (y = 0; y < MODENUM; y++) {
/* Check for two modes */
if (modelist[x] && modelist[y] &&
heightstat->pile_count (modelist[x]) > mode_threshold &&
MIN(rights[modelist[x]], rights[modelist[y]]) >
MAX(lefts[modelist[x]], lefts[modelist[y]]))) {
ratio = (float) modelist[y] / (float) modelist[x];
if (1.2 < ratio && ratio < 1.8) {
/* Two modes found */
best_x_height = modelist[x];
num_in_best = heightstat->pile_count (modelist[x]);
/* Try to get one higher */
do {
found_one_bigger = FALSE;
for (z = 0; z < MODENUM; z++) {
if (modelist[z] == best_x_height + 1 &&
MIN(rights[modelist[x]], rights[modelist[y]]) >
MAX(lefts[modelist[x]], lefts[modelist[y]]))) {
ratio = (float) modelist[y] / (float) modelist[z];
if ((1.2 < ratio && ratio < 1.8) &&
/* Should be half of best */
heightstat->pile_count (modelist[z]) >
num_in_best * 0.5) {
best_x_height++;
found_one_bigger = TRUE;
break;
}
}
}
}
while (found_one_bigger);
/* try to get a higher ascender */
best_asc = modelist[y];
num_in_best = heightstat->pile_count (modelist[y]);
/* Try to get one higher */
do {
found_one_bigger = FALSE;
for (z = 0; z < MODENUM; z++) {
if (modelist[z] > best_asc &&
MIN(rights[modelist[x]], rights[modelist[y]]) >
MAX(lefts[modelist[x]], lefts[modelist[y]]))) {
ratio = (float) modelist[z] / (float) best_x_height;
if ((1.2 < ratio && ratio < 1.8) &&
/* Should be half of best */
heightstat->pile_count (modelist[z]) >
num_in_best * 0.5) {
best_asc = modelist[z];
found_one_bigger = TRUE;
break;
}
}
}
}
while (found_one_bigger);
row->xheight = (float) best_x_height;
row->ascrise = (float) best_asc - best_x_height;
return;
}
}
}
}
best_x_height = modelist[0]; /* Single Mode found */
num_in_best = heightstat->pile_count (best_x_height);
do {
/* Try to get one higher */
found_one_bigger = FALSE;
for (z = 1; z < MODENUM; z++) {
/* Should be half of best */
if ((modelist[z] == best_x_height + 1) &&
(heightstat->pile_count (modelist[z]) > num_in_best * 0.5)) {
best_x_height++;
found_one_bigger = TRUE;
break;
}
}
}
while (found_one_bigger);
row->ascrise = 0.0f;
row->xheight = (float) best_x_height;
if (row->xheight == 0)
row->xheight = -1.0f;
}
int segment_spline ( TBOX  blobcoords[],
int  blobcount,
int  xcoords[],
int  ycoords[],
int  degree,
int  pointcount,
int  xstarts[] 
)

Definition at line 1121 of file oldbasel.cpp.

{
register int ptindex; /*no along text line */
register int segment; /*partition no */
int lastmin, lastmax; /*possible turn points */
int turnpoints[SPLINESIZE]; /*good turning points */
int turncount; /*no of turning points */
int max_x; //max specified coord
xstarts[0] = xcoords[0] - 1; //leftmost defined pt
max_x = xcoords[pointcount - 1] + 1;
if (degree < 2)
pointcount = 0;
turncount = 0; /*no turning points yet */
if (pointcount > 3) {
ptindex = 1;
lastmax = lastmin = 0; /*start with first one */
while (ptindex < pointcount - 1 && turncount < SPLINESIZE - 1) {
/*minimum */
if (ycoords[ptindex - 1] > ycoords[ptindex] && ycoords[ptindex] <= ycoords[ptindex + 1]) {
if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT) {
if (turncount == 0 || turnpoints[turncount - 1] != lastmax)
/*new max point */
turnpoints[turncount++] = lastmax;
lastmin = ptindex; /*latest minimum */
}
else if (ycoords[ptindex] < ycoords[lastmin]) {
lastmin = ptindex; /*lower minimum */
}
}
/*maximum */
if (ycoords[ptindex - 1] < ycoords[ptindex] && ycoords[ptindex] >= ycoords[ptindex + 1]) {
if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT) {
if (turncount == 0 || turnpoints[turncount - 1] != lastmin)
/*new min point */
turnpoints[turncount++] = lastmin;
lastmax = ptindex; /*latest maximum */
}
else if (ycoords[ptindex] > ycoords[lastmax]) {
lastmax = ptindex; /*higher maximum */
}
}
ptindex++;
}
/*possible global min */
if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT
&& (turncount == 0 || turnpoints[turncount - 1] != lastmax)) {
if (turncount < SPLINESIZE - 1)
/*2 more turns */
turnpoints[turncount++] = lastmax;
if (turncount < SPLINESIZE - 1)
turnpoints[turncount++] = ptindex;
}
else if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT
/*possible global max */
&& (turncount == 0 || turnpoints[turncount - 1] != lastmin)) {
if (turncount < SPLINESIZE - 1)
/*2 more turns */
turnpoints[turncount++] = lastmin;
if (turncount < SPLINESIZE - 1)
turnpoints[turncount++] = ptindex;
}
else if (turncount > 0 && turnpoints[turncount - 1] == lastmin
&& turncount < SPLINESIZE - 1) {
if (ycoords[ptindex] > ycoords[lastmax])
turnpoints[turncount++] = ptindex;
else
turnpoints[turncount++] = lastmax;
}
else if (turncount > 0 && turnpoints[turncount - 1] == lastmax
&& turncount < SPLINESIZE - 1) {
if (ycoords[ptindex] < ycoords[lastmin])
turnpoints[turncount++] = ptindex;
else
turnpoints[turncount++] = lastmin;
}
}
if (textord_oldbl_debug && turncount > 0)
tprintf ("First turn is %d at (%d,%d)\n",
turnpoints[0], xcoords[turnpoints[0]], ycoords[turnpoints[0]]);
for (segment = 1; segment < turncount; segment++) {
/*centre y coord */
lastmax = (ycoords[turnpoints[segment - 1]] + ycoords[turnpoints[segment]]) / 2;
/* fix alg so that it works with both rising and falling sections */
if (ycoords[turnpoints[segment - 1]] < ycoords[turnpoints[segment]])
/*find rising y centre */
for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] <= lastmax; ptindex++);
else
/*find falling y centre */
for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] >= lastmax; ptindex++);
/*centre x */
xstarts[segment] = (xcoords[ptindex - 1] + xcoords[ptindex]
+ xcoords[turnpoints[segment - 1]]
+ xcoords[turnpoints[segment]] + 2) / 4;
/*halfway between turns */
tprintf ("Turn %d is %d at (%d,%d), mid pt is %d@%d, final @%d\n",
segment, turnpoints[segment],
xcoords[turnpoints[segment]], ycoords[turnpoints[segment]],
ptindex - 1, xcoords[ptindex - 1], xstarts[segment]);
}
xstarts[segment] = max_x;
return segment; /*no of splines */
}
BOOL8 split_stepped_spline ( QSPLINE baseline,
float  jumplimit,
int  xcoords[],
int  xstarts[],
int &  segments 
)

Definition at line 1246 of file oldbasel.cpp.

{
BOOL8 doneany; //return value
register int segment; /*partition no */
int startindex, centreindex, endindex;
float leftcoord, rightcoord;
int leftindex, rightindex;
float step; //spline step
doneany = FALSE;
startindex = 0;
for (segment = 1; segment < segments - 1; segment++) {
step = baseline->step ((xstarts[segment - 1] + xstarts[segment]) / 2.0,
(xstarts[segment] + xstarts[segment + 1]) / 2.0);
if (step < 0)
step = -step;
if (step > jumplimit) {
while (xcoords[startindex] < xstarts[segment - 1])
startindex++;
centreindex = startindex;
while (xcoords[centreindex] < xstarts[segment])
centreindex++;
endindex = centreindex;
while (xcoords[endindex] < xstarts[segment + 1])
endindex++;
if (segments >= SPLINESIZE) {
tprintf ("Too many segments to resegment spline!!\n");
}
else if (endindex - startindex >= textord_spline_medianwin * 3) {
while (centreindex - startindex <
centreindex++;
while (endindex - centreindex <
centreindex--;
leftindex = (startindex + startindex + centreindex) / 3;
rightindex = (centreindex + endindex + endindex) / 3;
leftcoord =
(xcoords[startindex] * 2 + xcoords[centreindex]) / 3.0;
rightcoord =
(xcoords[centreindex] + xcoords[endindex] * 2) / 3.0;
while (xcoords[leftindex] > leftcoord
&& leftindex - startindex > textord_spline_medianwin)
leftindex--;
while (xcoords[leftindex] < leftcoord
&& centreindex - leftindex >
leftindex++;
if (xcoords[leftindex] - leftcoord >
leftcoord - xcoords[leftindex - 1])
leftindex--;
while (xcoords[rightindex] > rightcoord
&& rightindex - centreindex >
rightindex--;
while (xcoords[rightindex] < rightcoord
&& endindex - rightindex > textord_spline_medianwin)
rightindex++;
if (xcoords[rightindex] - rightcoord >
rightcoord - xcoords[rightindex - 1])
rightindex--;
tprintf ("Splitting spline at %d with step %g at (%d,%d)\n",
xstarts[segment],
baseline->
step ((xstarts[segment - 1] +
xstarts[segment]) / 2.0,
(xstarts[segment] +
xstarts[segment + 1]) / 2.0),
(xcoords[leftindex - 1] + xcoords[leftindex]) / 2,
(xcoords[rightindex - 1] + xcoords[rightindex]) / 2);
insert_spline_point (xstarts, segment,
(xcoords[leftindex - 1] +
xcoords[leftindex]) / 2,
(xcoords[rightindex - 1] +
xcoords[rightindex]) / 2, segments);
doneany = TRUE;
}
("Resegmenting spline failed - insufficient pts (%d,%d,%d,%d)\n",
startindex, centreindex, endindex,
}
}
// else tprintf("Spline step at %d is %g\n",
// xstarts[segment],
// baseline->step((xstarts[segment-1]+xstarts[segment])/2.0,
// (xstarts[segment]+xstarts[segment+1])/2.0));
}
return doneany;
}

Variable Documentation

const int kMinModeFactor = 12

Definition at line 1629 of file oldbasel.cpp.

const int kMinModeFactorOcropus = 32

Definition at line 1628 of file oldbasel.cpp.

EXTERN bool oldbl_corrfix = TRUE

"Improve correlation of heights"

Definition at line 45 of file oldbasel.cpp.

EXTERN double oldbl_dot_error_size = 1.26

"Max aspect ratio of a dot"

Definition at line 52 of file oldbasel.cpp.

EXTERN int oldbl_holed_losscount = 10

"Max lost before fallback line used"

Definition at line 51 of file oldbasel.cpp.

EXTERN bool oldbl_xhfix = FALSE

"Fix bug in modes threshold for xheights"

Definition at line 47 of file oldbasel.cpp.

EXTERN double oldbl_xhfract = 0.4

"Fraction of est allowed in calc"

Definition at line 49 of file oldbasel.cpp.

EXTERN bool textord_debug_baselines = FALSE

"Debug baseline generation"

Definition at line 41 of file oldbasel.cpp.

EXTERN bool textord_ocropus_mode = FALSE

"Make baselines for ocropus"

Definition at line 48 of file oldbasel.cpp.

EXTERN bool textord_oldbl_debug = FALSE

"Debug old baseline generation"

Definition at line 40 of file oldbasel.cpp.

EXTERN double textord_oldbl_jumplimit = 0.15

"X fraction for new partition"

Definition at line 54 of file oldbasel.cpp.

EXTERN bool textord_oldbl_merge_parts = TRUE

"Merge suspect partitions"

Definition at line 44 of file oldbasel.cpp.

EXTERN bool textord_oldbl_paradef = TRUE

"Use para default mechanism"

Definition at line 42 of file oldbasel.cpp.

EXTERN bool textord_oldbl_split_splines = TRUE

"Split stepped splines"

Definition at line 43 of file oldbasel.cpp.

EXTERN bool textord_really_old_xheight = FALSE

"Use original wiseowl xheight"

Definition at line 39 of file oldbasel.cpp.