"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "xpdf/TextOutputDev.cc" between
xpdf-4.01.01.tar.gz and xpdf-4.02.tar.gz

About: Xpdf is a PDF viewer for X.

TextOutputDev.cc  (xpdf-4.01.01):TextOutputDev.cc  (xpdf-4.02)
skipping to change at line 19 skipping to change at line 19
#include <aconf.h> #include <aconf.h>
#ifdef USE_GCC_PRAGMAS #ifdef USE_GCC_PRAGMAS
#pragma implementation #pragma implementation
#endif #endif
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <stddef.h> #include <stddef.h>
#include <math.h> #include <math.h>
#include <limits.h>
#include <ctype.h> #include <ctype.h>
#ifdef _WIN32 #ifdef _WIN32
#include <fcntl.h> // for O_BINARY #include <fcntl.h> // for O_BINARY
#include <io.h> // for setmode #include <io.h> // for setmode
#endif #endif
#include "gmem.h" #include "gmem.h"
#include "gmempp.h" #include "gmempp.h"
#include "GString.h" #include "GString.h"
#include "GList.h" #include "GList.h"
#include "config.h" #include "config.h"
skipping to change at line 114 skipping to change at line 115
// If font size changes by at least this much (measured in points) // If font size changes by at least this much (measured in points)
// between lines, start a new paragraph. // between lines, start a new paragraph.
#define paragraphFontSizeDelta 1 #define paragraphFontSizeDelta 1
// Spaces at the start of a line in physical layout mode are this wide // Spaces at the start of a line in physical layout mode are this wide
// (as a multiple of font size). // (as a multiple of font size).
#define physLayoutSpaceWidth 0.33 #define physLayoutSpaceWidth 0.33
// In simple layout mode, lines are broken at gaps larger than this // In simple layout mode, lines are broken at gaps larger than this
// value multiplied by font size. // value multiplied by font size.
#define simpleLayoutGapThreshold 0.4 #define simpleLayoutGapThreshold 0.7
// Table cells (TextColumns) are allowed to overlap by this much // Table cells (TextColumns) are allowed to overlap by this much
// in table layout mode (as a fraction of cell width or height). // in table layout mode (as a fraction of cell width or height).
#define tableCellOverlapSlack 0.05 #define tableCellOverlapSlack 0.05
// Primary axis delta which will cause a line break in raw mode // Primary axis delta which will cause a line break in raw mode
// (as a fraction of font size). // (as a fraction of font size).
#define rawModeLineDelta 0.5 #define rawModeLineDelta 0.5
// Secondary axis delta which will cause a word break in raw mode // Secondary axis delta which will cause a word break in raw mode
skipping to change at line 410 skipping to change at line 411
} }
if (child->xMax > xMax) { if (child->xMax > xMax) {
xMax = child->xMax; xMax = child->xMax;
} }
if (child->yMax > yMax) { if (child->yMax > yMax) {
yMax = child->yMax; yMax = child->yMax;
} }
} }
//------------------------------------------------------------------------ //------------------------------------------------------------------------
// TextGap // TextGaps
//------------------------------------------------------------------------ //------------------------------------------------------------------------
class TextGap { struct TextGap {
public: double x; // center of gap: x for vertical gaps,
// y for horizontal gaps
double w; // width/height of gap
};
TextGap(double aXY, double aW): xy(aXY), w(aW) {} class TextGaps {
public:
double xy; // center of gap: x for vertical gaps, TextGaps();
// y for horizontal gaps ~TextGaps();
double w; // width of gap void addGap(double x, double w);
int getLength() { return length; }
double getX(int idx) { return gaps[idx].x; }
double getW(int idx) { return gaps[idx].w; }
private:
int length;
int size;
TextGap *gaps;
}; };
TextGaps::TextGaps() {
length = 0;
size = 16;
gaps = (TextGap *)gmallocn(size, sizeof(TextGap));
}
TextGaps::~TextGaps() {
gfree(gaps);
}
void TextGaps::addGap(double x, double w) {
if (length == size) {
size *= 2;
gaps = (TextGap *)greallocn(gaps, size, sizeof(TextGap));
}
gaps[length].x = x;
gaps[length].w = w;
++length;
}
//------------------------------------------------------------------------ //------------------------------------------------------------------------
// TextSuperLine // TextSuperLine
//------------------------------------------------------------------------ //------------------------------------------------------------------------
class TextSuperLine { class TextSuperLine {
public: public:
TextSuperLine(GList *linesA); TextSuperLine(GList *linesA);
~TextSuperLine(); ~TextSuperLine();
skipping to change at line 511 skipping to change at line 545
TextOutputControl::TextOutputControl() { TextOutputControl::TextOutputControl() {
mode = textOutReadingOrder; mode = textOutReadingOrder;
fixedPitch = 0; fixedPitch = 0;
fixedLineSpacing = 0; fixedLineSpacing = 0;
html = gFalse; html = gFalse;
clipText = gFalse; clipText = gFalse;
discardDiagonalText = gFalse; discardDiagonalText = gFalse;
discardInvisibleText = gFalse; discardInvisibleText = gFalse;
discardClippedText = gFalse; discardClippedText = gFalse;
insertBOM = gFalse; insertBOM = gFalse;
marginLeft = 0;
marginRight = 0;
marginTop = 0;
marginBottom = 0;
} }
//------------------------------------------------------------------------ //------------------------------------------------------------------------
// TextFontInfo // TextFontInfo
//------------------------------------------------------------------------ //------------------------------------------------------------------------
TextFontInfo::TextFontInfo(GfxState *state) { TextFontInfo::TextFontInfo(GfxState *state) {
GfxFont *gfxFont; GfxFont *gfxFont;
gfxFont = state->getFont(); gfxFont = state->getFont();
skipping to change at line 1138 skipping to change at line 1176
// base coordinate system used in the font is without actually // base coordinate system used in the font is without actually
// rendering the font. This code tries to guess by looking at the // rendering the font. This code tries to guess by looking at the
// width of the character 'm' (which breaks if the font is a // width of the character 'm' (which breaks if the font is a
// subset that doesn't contain 'm'). // subset that doesn't contain 'm').
mCode = letterCode = anyCode = -1; mCode = letterCode = anyCode = -1;
for (code = 0; code < 256; ++code) { for (code = 0; code < 256; ++code) {
name = ((Gfx8BitFont *)gfxFont)->getCharName(code); name = ((Gfx8BitFont *)gfxFont)->getCharName(code);
if (name && name[0] == 'm' && name[1] == '\0') { if (name && name[0] == 'm' && name[1] == '\0') {
mCode = code; mCode = code;
} }
if (letterCode < 0 && name && name[1] == '\0' && if (letterCode < 0 &&
name &&
((name[0] >= 'A' && name[0] <= 'Z') || ((name[0] >= 'A' && name[0] <= 'Z') ||
(name[0] >= 'a' && name[0] <= 'z'))) { (name[0] >= 'a' && name[0] <= 'z')) &&
name[1] == '\0') {
letterCode = code; letterCode = code;
} }
if (anyCode < 0 && name && if (anyCode < 0 && name &&
((Gfx8BitFont *)gfxFont)->getWidth((Guchar)code) > 0) { ((Gfx8BitFont *)gfxFont)->getWidth((Guchar)code) > 0) {
anyCode = code; anyCode = code;
} }
} }
if (mCode >= 0 && if (mCode >= 0 &&
(w = ((Gfx8BitFont *)gfxFont)->getWidth((Guchar)mCode)) > 0) { (w = ((Gfx8BitFont *)gfxFont)->getWidth((Guchar)mCode)) > 0) {
// 0.6 is a generic average 'm' width -- yes, this is a hack // 0.6 is a generic average 'm' width -- yes, this is a hack
skipping to change at line 1241 skipping to change at line 1281
sp += state->getWordSpace(); sp += state->getWordSpace();
} }
state->textTransformDelta(sp * state->getHorizScaling(), 0, &dx2, &dy2); state->textTransformDelta(sp * state->getHorizScaling(), 0, &dx2, &dy2);
dx -= dx2; dx -= dx2;
dy -= dy2; dy -= dy2;
state->transformDelta(dx, dy, &w1, &h1); state->transformDelta(dx, dy, &w1, &h1);
// throw away chars that aren't inside the page bounds // throw away chars that aren't inside the page bounds
// (and also do a sanity check on the character size) // (and also do a sanity check on the character size)
state->transform(x, y, &x1, &y1); state->transform(x, y, &x1, &y1);
if (x1 + w1 < 0 || x1 > pageWidth || if (x1 + w1 < control.marginLeft ||
y1 + h1 < 0 || y1 > pageHeight || x1 > pageWidth - control.marginRight ||
w1 > pageWidth || h1 > pageHeight) { y1 + h1 < control.marginTop ||
y1 > pageHeight - control.marginBottom ||
w1 > pageWidth ||
h1 > pageHeight) {
charPos += nBytes; charPos += nBytes;
return; return;
} }
// check the tiny chars limit // check the tiny chars limit
if (!globalParams->getTextKeepTinyChars() && if (!globalParams->getTextKeepTinyChars() &&
fabs(w1) < 3 && fabs(h1) < 3) { fabs(w1) < 3 && fabs(h1) < 3) {
if (++nTinyChars > 50000) { if (++nTinyChars > 50000) {
charPos += nBytes; charPos += nBytes;
return; return;
skipping to change at line 1878 skipping to change at line 1921
ch2 = (TextChar *)chars->get(i+1); ch2 = (TextChar *)chars->get(i+1);
if (ch2->rot != ch->rot) { if (ch2->rot != ch->rot) {
s->append(eol, eolLen); s->append(eol, eolLen);
} else { } else {
switch (ch->rot) { switch (ch->rot) {
case 0: case 0:
default: default:
if (fabs(ch2->yMin - ch->yMin) > rawModeLineDelta * ch->fontSize || if (fabs(ch2->yMin - ch->yMin) > rawModeLineDelta * ch->fontSize ||
ch2->xMin - ch->xMax < -rawModeCharOverlap * ch->fontSize) { ch2->xMin - ch->xMax < -rawModeCharOverlap * ch->fontSize) {
s->append(eol, eolLen); s->append(eol, eolLen);
} else if (ch2->xMin - ch->xMax > } else if (ch->spaceAfter ||
rawModeWordSpacing * ch->fontSize) { ch2->xMin - ch->xMax >
rawModeWordSpacing * ch->fontSize) {
s->append(space, spaceLen); s->append(space, spaceLen);
} }
break; break;
case 1: case 1:
if (fabs(ch->xMax - ch2->xMax) > rawModeLineDelta * ch->fontSize || if (fabs(ch->xMax - ch2->xMax) > rawModeLineDelta * ch->fontSize ||
ch2->yMin - ch->yMax < -rawModeCharOverlap * ch->fontSize) { ch2->yMin - ch->yMax < -rawModeCharOverlap * ch->fontSize) {
s->append(eol, eolLen); s->append(eol, eolLen);
} else if (ch2->yMin - ch->yMax > } else if (ch->spaceAfter ||
rawModeWordSpacing * ch->fontSize) { ch2->yMin - ch->yMax >
rawModeWordSpacing * ch->fontSize) {
s->append(space, spaceLen); s->append(space, spaceLen);
} }
break; break;
case 2: case 2:
if (fabs(ch->yMax - ch2->yMax) > rawModeLineDelta * ch->fontSize || if (fabs(ch->yMax - ch2->yMax) > rawModeLineDelta * ch->fontSize ||
ch->xMin - ch2->xMax < -rawModeCharOverlap * ch->fontSize) { ch->xMin - ch2->xMax < -rawModeCharOverlap * ch->fontSize) {
s->append(eol, eolLen); s->append(eol, eolLen);
} else if (ch->xMin - ch2->xMax > } else if (ch->spaceAfter ||
rawModeWordSpacing * ch->fontSize) { ch->xMin - ch2->xMax >
rawModeWordSpacing * ch->fontSize) {
s->append(space, spaceLen); s->append(space, spaceLen);
} }
break; break;
case 3: case 3:
if (fabs(ch2->xMin - ch->xMin) > rawModeLineDelta * ch->fontSize || if (fabs(ch2->xMin - ch->xMin) > rawModeLineDelta * ch->fontSize ||
ch->yMin - ch2->yMax < -rawModeCharOverlap * ch->fontSize) { ch->yMin - ch2->yMax < -rawModeCharOverlap * ch->fontSize) {
s->append(eol, eolLen); s->append(eol, eolLen);
} else if (ch->yMin - ch2->yMax > } else if (ch->spaceAfter ||
rawModeWordSpacing * ch->fontSize) { ch->yMin - ch2->yMax >
rawModeWordSpacing * ch->fontSize) {
s->append(space, spaceLen); s->append(space, spaceLen);
} }
break; break;
} }
} }
} else { } else {
s->append(eol, eolLen); s->append(eol, eolLen);
} }
if (s->getLength() > 1000) { if (s->getLength() > 1000) {
skipping to change at line 2695 skipping to change at line 2742
dumpTree(tree[0]); dumpTree(tree[0]);
#endif #endif
return tree[0]; return tree[0];
} }
// Generate a tree of TextBlocks, marked as columns, lines, and words. // Generate a tree of TextBlocks, marked as columns, lines, and words.
TextBlock *TextPage::split(GList *charsA, int rot) { TextBlock *TextPage::split(GList *charsA, int rot) {
TextBlock *blk; TextBlock *blk;
GList *chars2, *chars3; GList *chars2, *chars3;
GList *horizGaps, *vertGaps; TextGaps *horizGaps, *vertGaps;
TextGap *gap;
TextChar *ch; TextChar *ch;
double xMin, yMin, xMax, yMax, avgFontSize; double xMin, yMin, xMax, yMax, avgFontSize;
double horizGapSize, vertGapSize, minHorizChunkWidth, minVertChunkWidth; double horizGapSize, vertGapSize, minHorizChunkWidth, minVertChunkWidth;
double nLines, vertGapThreshold, minChunk; double gap, nLines, vertGapThreshold, minChunk;
double largeCharSize; double largeCharSize;
double x0, x1, y0, y1; double x0, x1, y0, y1;
int nHorizGaps, nVertGaps, nLargeChars; int nHorizGaps, nVertGaps, nLargeChars;
int i; int i;
GBool doHorizSplit, doVertSplit, smallSplit; GBool doHorizSplit, doVertSplit, smallSplit;
//----- find all horizontal and vertical gaps //----- find all horizontal and vertical gaps
horizGaps = new GList(); horizGaps = new TextGaps();
vertGaps = new GList(); vertGaps = new TextGaps();
findGaps(charsA, rot, &xMin, &yMin, &xMax, &yMax, &avgFontSize, findGaps(charsA, rot, &xMin, &yMin, &xMax, &yMax, &avgFontSize,
horizGaps, vertGaps); horizGaps, vertGaps);
//----- find the largest horizontal and vertical gaps //----- find the largest horizontal and vertical gaps
horizGapSize = 0; horizGapSize = 0;
for (i = 0; i < horizGaps->getLength(); ++i) { for (i = 0; i < horizGaps->getLength(); ++i) {
gap = (TextGap *)horizGaps->get(i); gap = horizGaps->getW(i);
if (gap->w > horizGapSize) { if (gap > horizGapSize) {
horizGapSize = gap->w; horizGapSize = gap;
} }
} }
vertGapSize = 0; vertGapSize = 0;
for (i = 0; i < vertGaps->getLength(); ++i) { for (i = 0; i < vertGaps->getLength(); ++i) {
gap = (TextGap *)vertGaps->get(i); gap = vertGaps->getW(i);
if (gap->w > vertGapSize) { if (gap > vertGapSize) {
vertGapSize = gap->w; vertGapSize = gap;
} }
} }
//----- count horiz/vert gaps equivalent to largest gaps //----- count horiz/vert gaps equivalent to largest gaps
minHorizChunkWidth = yMax - yMin; minHorizChunkWidth = yMax - yMin;
nHorizGaps = 0; nHorizGaps = 0;
if (horizGaps->getLength() > 0) { if (horizGaps->getLength() > 0) {
y0 = yMin; y0 = yMin;
for (i = 0; i < horizGaps->getLength(); ++i) { for (i = 0; i < horizGaps->getLength(); ++i) {
gap = (TextGap *)horizGaps->get(i); gap = horizGaps->getW(i);
if (gap->w > horizGapSize - splitGapSlack * avgFontSize) { if (gap > horizGapSize - splitGapSlack * avgFontSize) {
++nHorizGaps; ++nHorizGaps;
y1 = gap->xy - 0.5 * gap->w; y1 = horizGaps->getX(i) - 0.5 * gap;
if (y1 - y0 < minHorizChunkWidth) { if (y1 - y0 < minHorizChunkWidth) {
minHorizChunkWidth = y1 - y0; minHorizChunkWidth = y1 - y0;
} }
y0 = y1 + gap->w; y0 = y1 + gap;
} }
} }
y1 = yMax; y1 = yMax;
if (y1 - y0 < minHorizChunkWidth) { if (y1 - y0 < minHorizChunkWidth) {
minHorizChunkWidth = y1 - y0; minHorizChunkWidth = y1 - y0;
} }
} }
minVertChunkWidth = xMax - xMin; minVertChunkWidth = xMax - xMin;
nVertGaps = 0; nVertGaps = 0;
if (vertGaps->getLength() > 0) { if (vertGaps->getLength() > 0) {
x0 = xMin; x0 = xMin;
for (i = 0; i < vertGaps->getLength(); ++i) { for (i = 0; i < vertGaps->getLength(); ++i) {
gap = (TextGap *)vertGaps->get(i); gap = vertGaps->getW(i);
if (gap->w > vertGapSize - splitGapSlack * avgFontSize) { if (gap > vertGapSize - splitGapSlack * avgFontSize) {
++nVertGaps; ++nVertGaps;
x1 = gap->xy - 0.5 * gap->w; x1 = vertGaps->getX(i) - 0.5 * gap;
if (x1 - x0 < minVertChunkWidth) { if (x1 - x0 < minVertChunkWidth) {
minVertChunkWidth = x1 - x0; minVertChunkWidth = x1 - x0;
} }
x0 = x1 + gap->w; x0 = x1 + gap;
} }
} }
x1 = xMax; x1 = xMax;
if (x1 - x0 < minVertChunkWidth) { if (x1 - x0 < minVertChunkWidth) {
minVertChunkWidth = x1 - x0; minVertChunkWidth = x1 - x0;
} }
} }
//----- compute splitting parameters //----- compute splitting parameters
skipping to change at line 2883 skipping to change at line 2929
//~ this could use "other content" (vector graphics, rotated text) -- //~ this could use "other content" (vector graphics, rotated text) --
//~ presence of other content in a gap means we should definitely split //~ presence of other content in a gap means we should definitely split
// split vertically // split vertically
if (doVertSplit) { if (doVertSplit) {
#if 0 //~debug #if 0 //~debug
printf("vert split xMin=%g yMin=%g xMax=%g yMax=%g small=%d\n", printf("vert split xMin=%g yMin=%g xMax=%g yMax=%g small=%d\n",
xMin, pageHeight - yMax, xMax, pageHeight - yMin, smallSplit); xMin, pageHeight - yMax, xMax, pageHeight - yMin, smallSplit);
for (i = 0; i < vertGaps->getLength(); ++i) { for (i = 0; i < vertGaps->getLength(); ++i) {
gap = (TextGap *)vertGaps->get(i); if (vertGaps->getW(i) > vertGapSize - splitGapSlack * avgFontSize) {
if (gap->w > vertGapSize - splitGapSlack * avgFontSize) { printf(" x=%g\n", vertGaps->getX(i));
printf(" x=%g\n", gap->xy);
} }
} }
#endif #endif
blk = new TextBlock(blkVertSplit, rot); blk = new TextBlock(blkVertSplit, rot);
blk->smallSplit = smallSplit; blk->smallSplit = smallSplit;
x0 = xMin - 1; x0 = xMin - 1;
for (i = 0; i < vertGaps->getLength(); ++i) { for (i = 0; i < vertGaps->getLength(); ++i) {
gap = (TextGap *)vertGaps->get(i); if (vertGaps->getW(i) > vertGapSize - splitGapSlack * avgFontSize) {
if (gap->w > vertGapSize - splitGapSlack * avgFontSize) { x1 = vertGaps->getX(i);
x1 = gap->xy;
chars2 = getChars(charsA, x0, yMin - 1, x1, yMax + 1); chars2 = getChars(charsA, x0, yMin - 1, x1, yMax + 1);
blk->addChild(split(chars2, rot)); blk->addChild(split(chars2, rot));
delete chars2; delete chars2;
x0 = x1; x0 = x1;
} }
} }
chars2 = getChars(charsA, x0, yMin - 1, xMax + 1, yMax + 1); chars2 = getChars(charsA, x0, yMin - 1, xMax + 1, yMax + 1);
blk->addChild(split(chars2, rot)); blk->addChild(split(chars2, rot));
delete chars2; delete chars2;
// split horizontally // split horizontally
} else if (doHorizSplit) { } else if (doHorizSplit) {
#if 0 //~debug #if 0 //~debug
printf("horiz split xMin=%g yMin=%g xMax=%g yMax=%g small=%d\n", printf("horiz split xMin=%g yMin=%g xMax=%g yMax=%g small=%d\n",
xMin, pageHeight - yMax, xMax, pageHeight - yMin, smallSplit); xMin, pageHeight - yMax, xMax, pageHeight - yMin, smallSplit);
for (i = 0; i < horizGaps->getLength(); ++i) { for (i = 0; i < horizGaps->getLength(); ++i) {
gap = (TextGap *)horizGaps->get(i); if (horizGaps->getW(i) > horizGapSize - splitGapSlack * avgFontSize) {
if (gap->w > horizGapSize - splitGapSlack * avgFontSize) { printf(" y=%g\n", pageHeight - horizGaps->getX(i));
printf(" y=%g\n", pageHeight - gap->xy);
} }
} }
#endif #endif
blk = new TextBlock(blkHorizSplit, rot); blk = new TextBlock(blkHorizSplit, rot);
blk->smallSplit = smallSplit; blk->smallSplit = smallSplit;
y0 = yMin - 1; y0 = yMin - 1;
for (i = 0; i < horizGaps->getLength(); ++i) { for (i = 0; i < horizGaps->getLength(); ++i) {
gap = (TextGap *)horizGaps->get(i); if (horizGaps->getW(i) > horizGapSize - splitGapSlack * avgFontSize) {
if (gap->w > horizGapSize - splitGapSlack * avgFontSize) { y1 = horizGaps->getX(i);
y1 = gap->xy;
chars2 = getChars(charsA, xMin - 1, y0, xMax + 1, y1); chars2 = getChars(charsA, xMin - 1, y0, xMax + 1, y1);
blk->addChild(split(chars2, rot)); blk->addChild(split(chars2, rot));
delete chars2; delete chars2;
y0 = y1; y0 = y1;
} }
} }
chars2 = getChars(charsA, xMin - 1, y0, xMax + 1, yMax + 1); chars2 = getChars(charsA, xMin - 1, y0, xMax + 1, yMax + 1);
blk->addChild(split(chars2, rot)); blk->addChild(split(chars2, rot));
delete chars2; delete chars2;
skipping to change at line 2968 skipping to change at line 3010
#if 0 //~debug #if 0 //~debug
printf("leaf xMin=%g yMin=%g xMax=%g yMax=%g\n", printf("leaf xMin=%g yMin=%g xMax=%g yMax=%g\n",
xMin, pageHeight - yMax, xMax, pageHeight - yMin); xMin, pageHeight - yMax, xMax, pageHeight - yMin);
#endif #endif
blk = new TextBlock(blkLeaf, rot); blk = new TextBlock(blkLeaf, rot);
for (i = 0; i < charsA->getLength(); ++i) { for (i = 0; i < charsA->getLength(); ++i) {
blk->addChild((TextChar *)charsA->get(i), gTrue); blk->addChild((TextChar *)charsA->get(i), gTrue);
} }
} }
deleteGList(horizGaps, TextGap); delete horizGaps;
deleteGList(vertGaps, TextGap); delete vertGaps;
tagBlock(blk); tagBlock(blk);
return blk; return blk;
} }
// Return the subset of chars inside a rectangle. // Return the subset of chars inside a rectangle.
GList *TextPage::getChars(GList *charsA, double xMin, double yMin, GList *TextPage::getChars(GList *charsA, double xMin, double yMin,
double xMax, double yMax) { double xMax, double yMax) {
GList *ret; GList *ret;
skipping to change at line 3003 skipping to change at line 3045
ret->append(ch); ret->append(ch);
} }
} }
return ret; return ret;
} }
void TextPage::findGaps(GList *charsA, int rot, void TextPage::findGaps(GList *charsA, int rot,
double *xMinOut, double *yMinOut, double *xMinOut, double *yMinOut,
double *xMaxOut, double *yMaxOut, double *xMaxOut, double *yMaxOut,
double *avgFontSizeOut, double *avgFontSizeOut,
GList *horizGaps, GList *vertGaps) { TextGaps *horizGaps, TextGaps *vertGaps) {
TextChar *ch; TextChar *ch;
int *horizProfile, *vertProfile; char *horizProfile, *vertProfile;
double xMin, yMin, xMax, yMax, w; double xMin, yMin, xMax, yMax, w;
double minFontSize, avgFontSize, splitPrecision, ascentAdjust, descentAdjust; double minFontSize, avgFontSize, splitPrecision, invSplitPrecision;
double ascentAdjust, descentAdjust;
int xMinI, yMinI, xMaxI, yMaxI, xMinI2, yMinI2, xMaxI2, yMaxI2; int xMinI, yMinI, xMaxI, yMaxI, xMinI2, yMinI2, xMaxI2, yMaxI2;
int start, x, y, i; int start, x, y, i;
//----- compute bbox, min font size, average font size, and split precision //----- compute bbox, min font size, average font size, and split precision
xMin = yMin = xMax = yMax = 0; // make gcc happy xMin = yMin = xMax = yMax = 0; // make gcc happy
minFontSize = avgFontSize = 0; minFontSize = avgFontSize = 0;
for (i = 0; i < charsA->getLength(); ++i) { for (i = 0; i < charsA->getLength(); ++i) {
ch = (TextChar *)charsA->get(i); ch = (TextChar *)charsA->get(i);
if (i == 0 || ch->xMin < xMin) { if (i == 0 || ch->xMin < xMin) {
skipping to change at line 3039 skipping to change at line 3082
avgFontSize += ch->fontSize; avgFontSize += ch->fontSize;
if (i == 0 || ch->fontSize < minFontSize) { if (i == 0 || ch->fontSize < minFontSize) {
minFontSize = ch->fontSize; minFontSize = ch->fontSize;
} }
} }
avgFontSize /= charsA->getLength(); avgFontSize /= charsA->getLength();
splitPrecision = splitPrecisionMul * minFontSize; splitPrecision = splitPrecisionMul * minFontSize;
if (splitPrecision < minSplitPrecision) { if (splitPrecision < minSplitPrecision) {
splitPrecision = minSplitPrecision; splitPrecision = minSplitPrecision;
} }
invSplitPrecision = 1 / splitPrecision;
*xMinOut = xMin; *xMinOut = xMin;
*yMinOut = yMin; *yMinOut = yMin;
*xMaxOut = xMax; *xMaxOut = xMax;
*yMaxOut = yMax; *yMaxOut = yMax;
*avgFontSizeOut = avgFontSize; *avgFontSizeOut = avgFontSize;
//----- compute the horizontal and vertical profiles //----- compute the horizontal and vertical profiles
if (xMin * invSplitPrecision < 0.5 * INT_MIN ||
xMax * invSplitPrecision > 0.5 * INT_MAX ||
yMin * invSplitPrecision < 0.5 * INT_MIN ||
xMax * invSplitPrecision > 0.5 * INT_MAX) {
return;
}
// add some slack to the array bounds to avoid floating point // add some slack to the array bounds to avoid floating point
// precision problems // precision problems
xMinI = (int)floor(xMin / splitPrecision) - 1; xMinI = (int)floor(xMin * invSplitPrecision) - 1;
yMinI = (int)floor(yMin / splitPrecision) - 1; yMinI = (int)floor(yMin * invSplitPrecision) - 1;
xMaxI = (int)floor(xMax / splitPrecision) + 1; xMaxI = (int)floor(xMax * invSplitPrecision) + 1;
yMaxI = (int)floor(yMax / splitPrecision) + 1; yMaxI = (int)floor(yMax * invSplitPrecision) + 1;
horizProfile = (int *)gmallocn(yMaxI - yMinI + 1, sizeof(int)); horizProfile = (char *)gmalloc(yMaxI - yMinI + 1);
vertProfile = (int *)gmallocn(xMaxI - xMinI + 1, sizeof(int)); vertProfile = (char *)gmalloc(xMaxI - xMinI + 1);
memset(horizProfile, 0, (yMaxI - yMinI + 1) * sizeof(int)); memset(horizProfile, 0, yMaxI - yMinI + 1);
memset(vertProfile, 0, (xMaxI - xMinI + 1) * sizeof(int)); memset(vertProfile, 0, xMaxI - xMinI + 1);
for (i = 0; i < charsA->getLength(); ++i) { for (i = 0; i < charsA->getLength(); ++i) {
ch = (TextChar *)charsA->get(i); ch = (TextChar *)charsA->get(i);
// yMinI2 and yMaxI2 are adjusted to allow for slightly overlapping lines // yMinI2 and yMaxI2 are adjusted to allow for slightly overlapping lines
switch (rot) { switch (rot) {
case 0: case 0:
default: default:
xMinI2 = (int)floor(ch->xMin / splitPrecision); xMinI2 = (int)floor(ch->xMin * invSplitPrecision);
xMaxI2 = (int)floor(ch->xMax / splitPrecision); xMaxI2 = (int)floor(ch->xMax * invSplitPrecision);
ascentAdjust = ascentAdjustFactor * (ch->yMax - ch->yMin); ascentAdjust = ascentAdjustFactor * (ch->yMax - ch->yMin);
yMinI2 = (int)floor((ch->yMin + ascentAdjust) / splitPrecision); yMinI2 = (int)floor((ch->yMin + ascentAdjust) * invSplitPrecision);
descentAdjust = descentAdjustFactor * (ch->yMax - ch->yMin); descentAdjust = descentAdjustFactor * (ch->yMax - ch->yMin);
yMaxI2 = (int)floor((ch->yMax - descentAdjust) / splitPrecision); yMaxI2 = (int)floor((ch->yMax - descentAdjust) * invSplitPrecision);
break; break;
case 1: case 1:
descentAdjust = descentAdjustFactor * (ch->xMax - ch->xMin); descentAdjust = descentAdjustFactor * (ch->xMax - ch->xMin);
xMinI2 = (int)floor((ch->xMin + descentAdjust) / splitPrecision); xMinI2 = (int)floor((ch->xMin + descentAdjust) * invSplitPrecision);
ascentAdjust = ascentAdjustFactor * (ch->xMax - ch->xMin); ascentAdjust = ascentAdjustFactor * (ch->xMax - ch->xMin);
xMaxI2 = (int)floor((ch->xMax - ascentAdjust) / splitPrecision); xMaxI2 = (int)floor((ch->xMax - ascentAdjust) * invSplitPrecision);
yMinI2 = (int)floor(ch->yMin / splitPrecision); yMinI2 = (int)floor(ch->yMin * invSplitPrecision);
yMaxI2 = (int)floor(ch->yMax / splitPrecision); yMaxI2 = (int)floor(ch->yMax * invSplitPrecision);
break; break;
case 2: case 2:
xMinI2 = (int)floor(ch->xMin / splitPrecision); xMinI2 = (int)floor(ch->xMin * invSplitPrecision);
xMaxI2 = (int)floor(ch->xMax / splitPrecision); xMaxI2 = (int)floor(ch->xMax * invSplitPrecision);
descentAdjust = descentAdjustFactor * (ch->yMax - ch->yMin); descentAdjust = descentAdjustFactor * (ch->yMax - ch->yMin);
yMinI2 = (int)floor((ch->yMin + descentAdjust) / splitPrecision); yMinI2 = (int)floor((ch->yMin + descentAdjust) * invSplitPrecision);
ascentAdjust = ascentAdjustFactor * (ch->yMax - ch->yMin); ascentAdjust = ascentAdjustFactor * (ch->yMax - ch->yMin);
yMaxI2 = (int)floor((ch->yMax - ascentAdjust) / splitPrecision); yMaxI2 = (int)floor((ch->yMax - ascentAdjust) * invSplitPrecision);
break; break;
case 3: case 3:
ascentAdjust = ascentAdjustFactor * (ch->xMax - ch->xMin); ascentAdjust = ascentAdjustFactor * (ch->xMax - ch->xMin);
xMinI2 = (int)floor((ch->xMin + ascentAdjust) / splitPrecision); xMinI2 = (int)floor((ch->xMin + ascentAdjust) * invSplitPrecision);
descentAdjust = descentAdjustFactor * (ch->xMax - ch->xMin); descentAdjust = descentAdjustFactor * (ch->xMax - ch->xMin);
xMaxI2 = (int)floor((ch->xMax - descentAdjust) / splitPrecision); xMaxI2 = (int)floor((ch->xMax - descentAdjust) * invSplitPrecision);
yMinI2 = (int)floor(ch->yMin / splitPrecision); yMinI2 = (int)floor(ch->yMin * invSplitPrecision);
yMaxI2 = (int)floor(ch->yMax / splitPrecision); yMaxI2 = (int)floor(ch->yMax * invSplitPrecision);
break; break;
} }
for (y = yMinI2; y <= yMaxI2; ++y) { for (y = yMinI2; y <= yMaxI2; ++y) {
++horizProfile[y - yMinI]; horizProfile[y - yMinI] = 1;
} }
for (x = xMinI2; x <= xMaxI2; ++x) { for (x = xMinI2; x <= xMaxI2; ++x) {
++vertProfile[x - xMinI]; vertProfile[x - xMinI] = 1;
} }
} }
//----- build the list of horizontal gaps //----- build the list of horizontal gaps
for (start = yMinI; start < yMaxI && !horizProfile[start - yMinI]; ++start) ; for (start = yMinI; start < yMaxI && !horizProfile[start - yMinI]; ++start) ;
for (y = start; y < yMaxI; ++y) { for (y = start; y < yMaxI; ++y) {
if (horizProfile[y - yMinI]) { if (horizProfile[y - yMinI]) {
if (!horizProfile[y + 1 - yMinI]) { if (!horizProfile[y + 1 - yMinI]) {
start = y; start = y;
} }
} else { } else {
if (horizProfile[y + 1 - yMinI]) { if (horizProfile[y + 1 - yMinI]) {
w = (y - start) * splitPrecision; w = (y - start) * splitPrecision;
horizGaps->append(new TextGap((start + 1) * splitPrecision + 0.5 * w, horizGaps->addGap((start + 1) * splitPrecision + 0.5 * w, w);
w));
} }
} }
} }
//----- build the list of vertical gaps //----- build the list of vertical gaps
for (start = xMinI; start < xMaxI && !vertProfile[start - xMinI]; ++start) ; for (start = xMinI; start < xMaxI && !vertProfile[start - xMinI]; ++start) ;
for (x = start; x < xMaxI; ++x) { for (x = start; x < xMaxI; ++x) {
if (vertProfile[x - xMinI]) { if (vertProfile[x - xMinI]) {
if (!vertProfile[x + 1 - xMinI]) { if (!vertProfile[x + 1 - xMinI]) {
start = x; start = x;
} }
} else { } else {
if (vertProfile[x + 1 - xMinI]) { if (vertProfile[x + 1 - xMinI]) {
w = (x - start) * splitPrecision; w = (x - start) * splitPrecision;
vertGaps->append(new TextGap((start + 1) * splitPrecision + 0.5 * w, vertGaps->addGap((start + 1) * splitPrecision + 0.5 * w, w);
w));
} }
} }
} }
gfree(horizProfile); gfree(horizProfile);
gfree(vertProfile); gfree(vertProfile);
} }
// Decide whether this block is a line, column, or multiple columns: // Decide whether this block is a line, column, or multiple columns:
// - all leaf nodes are lines // - all leaf nodes are lines
skipping to change at line 3820 skipping to change at line 3868
if (gap > maxGap) { if (gap > maxGap) {
maxGap = gap; maxGap = gap;
} }
} }
} }
avgFontSize /= charsA->getLength(); avgFontSize /= charsA->getLength();
if (minGap < 0) { if (minGap < 0) {
minGap = 0; minGap = 0;
} }
// if spacing is nearly uniform (minGap is close to maxGap), use the // if spacing is nearly uniform (minGap is close to maxGap), there
// SpGap/AdjGap values if available, otherwise assume it's a single // are three cases:
// word (technically it could be either "ABC" or "A B C", but it's // (1) if the SpGap and AdjGap values are both available and
// essentially impossible to tell) // sensible, use them
// (2) if only the SpGap values are available, meaning that every
// character in the line had a space after it, split after every
// character
// (3) otherwise assume it's a single word (technically it could be
// either "ABC" or "A B C", but it's essentially impossible to
// tell)
if (maxGap - minGap < uniformSpacing * avgFontSize) { if (maxGap - minGap < uniformSpacing * avgFontSize) {
if (minAdjGap <= maxAdjGap && if (minSpGap <= maxSpGap) {
minSpGap <= maxSpGap && if (minAdjGap <= maxAdjGap &&
minSpGap - maxAdjGap > 0.01) { minSpGap - maxAdjGap > 0.01) {
return 0.5 * (maxAdjGap + minSpGap); return 0.5 * (maxAdjGap + minSpGap);
} else { } else if (minAdjGap > maxAdjGap &&
return maxGap + 1; maxSpGap - minSpGap < uniformSpacing * avgFontSize) {
return minSpGap - 1;
}
} }
return maxGap + 1;
// if there is some variation in spacing, but it's small, assume // if there is some variation in spacing, but it's small, assume
// there are some inter-word spaces // there are some inter-word spaces
} else if (maxGap - minGap < wordSpacing * avgFontSize) { } else if (maxGap - minGap < wordSpacing * avgFontSize) {
return 0.5 * (minGap + maxGap); return 0.5 * (minGap + maxGap);
// if there is a large variation in spacing, use the SpGap/AdjGap // if there is a large variation in spacing, use the SpGap/AdjGap
// values if they look reasonable, otherwise, assume a reasonable // values if they look reasonable, otherwise, assume a reasonable
// threshold for inter-word spacing (we can't use something like // threshold for inter-word spacing (we can't use something like
// 0.5*(minGap+maxGap) here because there can be outliers at the // 0.5*(minGap+maxGap) here because there can be outliers at the
skipping to change at line 4035 skipping to change at line 4092
for (i = 0; i < blk->children->getLength(); ++i) { for (i = 0; i < blk->children->getLength(); ++i) {
buildSuperLines((TextBlock *)blk->children->get(i), superLines); buildSuperLines((TextBlock *)blk->children->get(i), superLines);
} }
} }
} }
void TextPage::assignSimpleLayoutPositions(GList *superLines, void TextPage::assignSimpleLayoutPositions(GList *superLines,
UnicodeMap *uMap) { UnicodeMap *uMap) {
GList *lines; GList *lines;
TextLine *line0, *line1; TextLine *line0, *line1;
double xMin; double xMin, xMax;
int px, sp, i, j; int px, px2, sp, i, j;
// build a list of lines and sort by x // build a list of lines and sort by x
lines = new GList(); lines = new GList();
for (i = 0; i < superLines->getLength(); ++i) { for (i = 0; i < superLines->getLength(); ++i) {
lines->append(((TextSuperLine *)superLines->get(i))->lines); lines->append(((TextSuperLine *)superLines->get(i))->lines);
} }
lines->sort(&TextLine::cmpX); lines->sort(&TextLine::cmpX);
// assign positions // assign positions
xMin = ((TextLine *)lines->get(0))->xMin; xMin = ((TextLine *)lines->get(0))->xMin;
for (i = 0; i < lines->getLength(); ++i) { for (i = 0; i < lines->getLength(); ++i) {
line0 = (TextLine *)lines->get(i); line0 = (TextLine *)lines->get(i);
computeLinePhysWidth(line0, uMap); computeLinePhysWidth(line0, uMap);
line0->px = (int)((line0->xMin - xMin) / (0.5 * line0->fontSize)); px = 0;
xMax = xMin;
for (j = 0; j < i; ++j) { for (j = 0; j < i; ++j) {
line1 = (TextLine *)lines->get(j); line1 = (TextLine *)lines->get(j);
if (line0->xMin > line1->xMax) { if (line0->xMin > line1->xMax) {
sp = (int)((line0->xMin - line1->xMax) / if (line1->xMax > xMax) {
(0.5 * line0->fontSize) + 0.5); xMax = line1->xMax;
if (sp < 1) { }
sp = 1; px2 = line1->px + line1->pw;
} if (px2 > px) {
px = line1->px + line1->pw + sp; px = px2;
if (px > line0->px) {
line0->px = px;
} }
} }
} }
sp = (int)((line0->xMin - xMax) / (0.5 * line0->fontSize) + 0.5);
if (sp < 1 && xMax > xMin) {
sp = 1;
}
line0->px = px + sp;
} }
delete lines; delete lines;
} }
void TextPage::generateUnderlinesAndLinks(GList *columns) { void TextPage::generateUnderlinesAndLinks(GList *columns) {
TextColumn *col; TextColumn *col;
TextParagraph *par; TextParagraph *par;
TextLine *line; TextLine *line;
TextWord *word; TextWord *word;
 End of changes. 58 change blocks. 
108 lines changed or deleted 169 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)