HTMLGen.cc (xpdf-4.03) | : | HTMLGen.cc (xpdf-4.04) | ||
---|---|---|---|---|
//======================================================================== | //======================================================================== | |||
// | // | |||
// HTMLGen.cc | // HTMLGen.cc | |||
// | // | |||
// Copyright 2010 Glyph & Cog, LLC | // Copyright 2010-2021 Glyph & Cog, LLC | |||
// | // | |||
//======================================================================== | //======================================================================== | |||
//~ to do: | //~ to do: | |||
//~ - fonts | //~ - fonts | |||
//~ - underlined? (underlines are present in the background image) | //~ - underlined? (underlines are present in the background image) | |||
//~ - include the original font name in the CSS entry (before the | //~ - include the original font name in the CSS entry (before the | |||
//~ generic serif/sans-serif/monospace name) | //~ generic serif/sans-serif/monospace name) | |||
//~ - check that htmlDir exists and is a directory | //~ - check that htmlDir exists and is a directory | |||
//~ - links: | //~ - links: | |||
//~ - links to pages | //~ - internal links (to pages, to named destinations) | |||
//~ - links to named destinations | //~ - links from non-text content | |||
//~ - links to URLs | ||||
//~ - rotated text should go in the background image | //~ - rotated text should go in the background image | |||
//~ - metadata | //~ - metadata | |||
//~ - PDF outline | //~ - PDF outline | |||
#include <aconf.h> | #include <aconf.h> | |||
#ifdef USE_GCC_PRAGMAS | #ifdef USE_GCC_PRAGMAS | |||
#pragma implementation | #pragma implementation | |||
#endif | #endif | |||
#include <stdlib.h> | #include <stdlib.h> | |||
#include <png.h> | #include <png.h> | |||
#include "gmem.h" | #include "gmem.h" | |||
#include "gmempp.h" | #include "gmempp.h" | |||
#include "GString.h" | #include "GString.h" | |||
#include "GList.h" | #include "GList.h" | |||
#include "SplashBitmap.h" | #include "SplashBitmap.h" | |||
#include "PDFDoc.h" | #include "PDFDoc.h" | |||
#include "GfxFont.h" | #include "GfxFont.h" | |||
#include "AcroForm.h" | ||||
#include "TextOutputDev.h" | #include "TextOutputDev.h" | |||
#include "SplashOutputDev.h" | #include "SplashOutputDev.h" | |||
#include "ErrorCodes.h" | #include "ErrorCodes.h" | |||
#include "WebFont.h" | #include "WebFont.h" | |||
#include "HTMLGen.h" | #include "HTMLGen.h" | |||
#ifdef _WIN32 | #ifdef _WIN32 | |||
# define strcasecmp stricmp | # define strcasecmp stricmp | |||
# define strncasecmp strnicmp | # define strncasecmp strnicmp | |||
#endif | #endif | |||
skipping to change at line 199 | skipping to change at line 199 | |||
Ref fontID; | Ref fontID; | |||
GString *fontFace; // NULL for substituted fonts | GString *fontFace; // NULL for substituted fonts | |||
GString *fontSpec; | GString *fontSpec; | |||
double scale; | double scale; | |||
GBool used; // set when used (per page) | GBool used; // set when used (per page) | |||
}; | }; | |||
//------------------------------------------------------------------------ | //------------------------------------------------------------------------ | |||
class HTMLGenFormFieldInfo { | ||||
public: | ||||
HTMLGenFormFieldInfo(AcroFormField *acroFormFieldA) | ||||
: acroFormField(acroFormFieldA) {} | ||||
AcroFormField *acroFormField; | ||||
}; | ||||
//------------------------------------------------------------------------ | //------------------------------------------------------------------------ | |||
HTMLGen::HTMLGen(double backgroundResolutionA) { | class Base64Encoder { | |||
public: | ||||
Base64Encoder(int (*writeFuncA)(void *stream, const char *data, int size), | ||||
void *streamA); | ||||
void encode(const unsigned char *data, size_t size); | ||||
void flush(); | ||||
private: | ||||
int (*writeFunc)(void *stream, const char *data, int size); | ||||
void *stream; | ||||
unsigned char buf[3]; | ||||
int bufLen; | ||||
}; | ||||
static char base64Chars[65] = | ||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; | ||||
Base64Encoder::Base64Encoder(int (*writeFuncA)(void *stream, const char *data, | ||||
int size), | ||||
void *streamA) { | ||||
writeFunc = writeFuncA; | ||||
stream = streamA; | ||||
bufLen = 0; | ||||
} | ||||
void Base64Encoder::encode(const unsigned char *data, size_t size) { | ||||
size_t i = 0; | ||||
while (1) { | ||||
while (bufLen < 3) { | ||||
if (i >= size) { | ||||
return; | ||||
} | ||||
buf[bufLen++] = data[i++]; | ||||
} | ||||
char out[4]; | ||||
out[0] = base64Chars[(buf[0] >> 2) & 0x3f]; | ||||
out[1] = base64Chars[((buf[0] << 4) | (buf[1] >> 4)) & 0x3f]; | ||||
out[2] = base64Chars[((buf[1] << 2) | (buf[2] >> 6)) & 0x3f]; | ||||
out[3] = base64Chars[buf[2] & 0x3f]; | ||||
writeFunc(stream, out, 4); | ||||
bufLen = 0; | ||||
} | ||||
} | ||||
void Base64Encoder::flush() { | ||||
// if bufLen == 0, this does nothing | ||||
// bufLen should never be 3 here | ||||
char out[4]; | ||||
if (bufLen == 1) { | ||||
out[0] = base64Chars[(buf[0] >> 2) & 0x3f]; | ||||
out[1] = base64Chars[(buf[0] << 4) & 0x3f]; | ||||
out[2] = '='; | ||||
out[3] = '='; | ||||
writeFunc(stream, out, 4); | ||||
} else if (bufLen == 2) { | ||||
out[0] = base64Chars[(buf[0] >> 2) & 0x3f]; | ||||
out[1] = base64Chars[((buf[0] << 4) | (buf[1] >> 4)) & 0x3f]; | ||||
out[2] = base64Chars[(buf[1] << 2) & 0x3f]; | ||||
out[3] = '='; | ||||
writeFunc(stream, out, 4); | ||||
} | ||||
} | ||||
static int writeToString(void *stream, const char *data, int size) { | ||||
((GString *)stream)->append(data, size); | ||||
return size; | ||||
} | ||||
//------------------------------------------------------------------------ | ||||
//------------------------------------------------------------------------ | ||||
HTMLGen::HTMLGen(double backgroundResolutionA, GBool tableMode) { | ||||
TextOutputControl textOutControl; | TextOutputControl textOutControl; | |||
SplashColor paperColor; | SplashColor paperColor; | |||
ok = gTrue; | ok = gTrue; | |||
backgroundResolution = backgroundResolutionA; | backgroundResolution = backgroundResolutionA; | |||
zoom = 1.0; | zoom = 1.0; | |||
vStretch = 1.0; | ||||
drawInvisibleText = gTrue; | drawInvisibleText = gTrue; | |||
allTextInvisible = gFalse; | allTextInvisible = gFalse; | |||
extractFontFiles = gFalse; | extractFontFiles = gFalse; | |||
convertFormFields = gFalse; | ||||
embedBackgroundImage = gFalse; | ||||
embedFonts = gFalse; | ||||
// set up the TextOutputDev | // set up the TextOutputDev | |||
textOutControl.mode = textOutReadingOrder; | textOutControl.mode = tableMode ? textOutTableLayout : textOutReadingOrder; | |||
textOutControl.html = gTrue; | textOutControl.html = gTrue; | |||
textOutControl.splitRotatedWords = gTrue; | textOutControl.splitRotatedWords = gTrue; | |||
textOut = new TextOutputDev(NULL, &textOutControl, gFalse); | textOut = new TextOutputDev(NULL, &textOutControl, gFalse); | |||
if (!textOut->isOk()) { | if (!textOut->isOk()) { | |||
ok = gFalse; | ok = gFalse; | |||
} | } | |||
// set up the SplashOutputDev | // set up the SplashOutputDev | |||
paperColor[0] = paperColor[1] = paperColor[2] = 0xff; | paperColor[0] = paperColor[1] = paperColor[2] = 0xff; | |||
splashOut = new SplashOutputDev(splashModeRGB8, 1, gFalse, paperColor); | splashOut = new SplashOutputDev(splashModeRGB8, 1, gFalse, paperColor); | |||
skipping to change at line 268 | skipping to change at line 355 | |||
va_start(args, fmt); | va_start(args, fmt); | |||
s = GString::formatv(fmt, args); | s = GString::formatv(fmt, args); | |||
va_end(args); | va_end(args); | |||
ret = writeFunc(stream, s->getCString(), s->getLength()); | ret = writeFunc(stream, s->getCString(), s->getLength()); | |||
delete s; | delete s; | |||
return ret; | return ret; | |||
} | } | |||
struct PNGWriteInfo { | struct PNGWriteInfo { | |||
Base64Encoder *base64; | ||||
int (*writePNG)(void *stream, const char *data, int size); | int (*writePNG)(void *stream, const char *data, int size); | |||
void *pngStream; | void *pngStream; | |||
}; | }; | |||
static void pngWriteFunc(png_structp png, png_bytep data, png_size_t size) { | static void pngWriteFunc(png_structp png, png_bytep data, png_size_t size) { | |||
PNGWriteInfo *info; | PNGWriteInfo *info = (PNGWriteInfo *)png_get_progressive_ptr(png); | |||
if (info->base64) { | ||||
info = (PNGWriteInfo *)png_get_progressive_ptr(png); | info->base64->encode(data, size); | |||
info->writePNG(info->pngStream, (char *)data, (int)size); | } else { | |||
info->writePNG(info->pngStream, (char *)data, (int)size); | ||||
} | ||||
} | } | |||
int HTMLGen::convertPage( | int HTMLGen::convertPage( | |||
int pg, const char *pngURL, const char *htmlDir, | int pg, const char *pngURL, const char *htmlDir, | |||
int (*writeHTML)(void *stream, const char *data, int size), | int (*writeHTML)(void *stream, const char *data, int size), | |||
void *htmlStream, | void *htmlStream, | |||
int (*writePNG)(void *stream, const char *data, int size), | int (*writePNG)(void *stream, const char *data, int size), | |||
void *pngStream) { | void *pngStream) { | |||
png_structp png; | png_structp png; | |||
png_infop pngInfo; | png_infop pngInfo; | |||
skipping to change at line 306 | skipping to change at line 396 | |||
TextLine *line; | TextLine *line; | |||
HTMLGenFontDefn *fontDefn; | HTMLGenFontDefn *fontDefn; | |||
GString *s; | GString *s; | |||
double base; | double base; | |||
int primaryDir, spanDir; | int primaryDir, spanDir; | |||
int colIdx, parIdx, lineIdx, firstWordIdx, lastWordIdx; | int colIdx, parIdx, lineIdx, firstWordIdx, lastWordIdx; | |||
int y, i; | int y, i; | |||
// generate the background bitmap | // generate the background bitmap | |||
splashOut->setSkipText(!allTextInvisible, gFalse); | splashOut->setSkipText(!allTextInvisible, gFalse); | |||
doc->displayPage(splashOut, pg, backgroundResolution, backgroundResolution, | doc->displayPage(splashOut, pg, | |||
backgroundResolution, backgroundResolution * vStretch, | ||||
0, gFalse, gTrue, gFalse); | 0, gFalse, gTrue, gFalse); | |||
bitmap = splashOut->getBitmap(); | bitmap = splashOut->getBitmap(); | |||
if (!(png = png_create_write_struct(PNG_LIBPNG_VER_STRING, | ||||
NULL, NULL, NULL)) || | ||||
!(pngInfo = png_create_info_struct(png))) { | ||||
return errFileIO; | ||||
} | ||||
if (setjmp(png_jmpbuf(png))) { | ||||
return errFileIO; | ||||
} | ||||
writeInfo.writePNG = writePNG; | ||||
writeInfo.pngStream = pngStream; | ||||
png_set_write_fn(png, &writeInfo, pngWriteFunc, NULL); | ||||
png_set_IHDR(png, pngInfo, bitmap->getWidth(), bitmap->getHeight(), | ||||
8, PNG_COLOR_TYPE_RGB, PNG_INTERLACE_NONE, | ||||
PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT); | ||||
png_write_info(png, pngInfo); | ||||
p = bitmap->getDataPtr(); | ||||
for (y = 0; y < bitmap->getHeight(); ++y) { | ||||
png_write_row(png, (png_bytep)p); | ||||
p += bitmap->getRowSize(); | ||||
} | ||||
png_write_end(png, pngInfo); | ||||
png_destroy_write_struct(&png, &pngInfo); | ||||
// page size | // page size | |||
if (doc->getPageRotate(pg) == 90 || doc->getPageRotate(pg) == 270) { | if (doc->getPageRotate(pg) == 90 || doc->getPageRotate(pg) == 270) { | |||
pageW = doc->getPageCropHeight(pg); | pageW = doc->getPageCropHeight(pg); | |||
pageH = doc->getPageCropWidth(pg); | pageH = doc->getPageCropWidth(pg); | |||
} else { | } else { | |||
pageW = doc->getPageCropWidth(pg); | pageW = doc->getPageCropWidth(pg); | |||
pageH = doc->getPageCropHeight(pg); | pageH = doc->getPageCropHeight(pg); | |||
} | } | |||
// get the PDF text | // get the PDF text | |||
doc->displayPage(textOut, pg, 72, 72, 0, gFalse, gTrue, gFalse); | doc->displayPage(textOut, pg, 72, 72, 0, gFalse, gTrue, gFalse); | |||
doc->processLinks(textOut, pg); | doc->processLinks(textOut, pg); | |||
text = textOut->takeText(); | text = textOut->takeText(); | |||
primaryDir = text->primaryDirectionIsLR() ? 1 : -1; | primaryDir = text->primaryDirectionIsLR() ? 1 : -1; | |||
// insert a special character for each form field; | ||||
// remove existing characters inside field bboxes; | ||||
// erase background content inside field bboxes | ||||
formFieldFont = NULL; | ||||
formFieldInfo = NULL; | ||||
if (convertFormFields) { | ||||
AcroForm *form = doc->getCatalog()->getForm(); | ||||
if (form) { | ||||
formFieldInfo = new GList(); | ||||
formFieldFont = new TextFontInfo(); | ||||
double yTop = doc->getCatalog()->getPage(pg)->getMediaBox()->y2; | ||||
for (i = 0; i < form->getNumFields(); ++i) { | ||||
AcroFormField *field = form->getField(i); | ||||
AcroFormFieldType fieldType = field->getAcroFormFieldType(); | ||||
if (field->getPageNum() == pg && | ||||
(fieldType == acroFormFieldText || | ||||
fieldType == acroFormFieldCheckbox)) { | ||||
double llx, lly, urx, ury; | ||||
field->getBBox(&llx, &lly, &urx, &ury); | ||||
lly = yTop - lly; | ||||
ury = yTop - ury; | ||||
// add the field info | ||||
int fieldIdx = formFieldInfo->getLength(); | ||||
formFieldInfo->append(new HTMLGenFormFieldInfo(field)); | ||||
// remove exsting chars | ||||
text->removeChars(llx, ury, urx, lly, 0.75, 0.5); | ||||
// erase background content | ||||
int llxI = (int)(llx * backgroundResolution / 72 + 0.5); | ||||
int llyI = (int)(lly * backgroundResolution * vStretch / 72 + 0.5); | ||||
int urxI = (int)(urx * backgroundResolution / 72 + 0.5); | ||||
int uryI = (int)(ury * backgroundResolution * vStretch / 72 + 0.5); | ||||
llyI += (int)(backgroundResolution * vStretch / 20); | ||||
if (llxI < 0) { | ||||
llxI = 0; | ||||
} | ||||
if (urxI >= bitmap->getWidth()) { | ||||
urxI = bitmap->getWidth() - 1; | ||||
} | ||||
if (uryI < 0) { | ||||
uryI = 0; | ||||
} | ||||
if (llyI > bitmap->getHeight()) { | ||||
llyI = bitmap->getHeight() - 1; | ||||
} | ||||
if (uryI <= llyI && llxI <= urxI) { | ||||
SplashColorPtr p = bitmap->getDataPtr() | ||||
+ uryI * bitmap->getRowSize() + llxI * 3; | ||||
for (int y = uryI; y <= llyI; ++y) { | ||||
memset(p, 0xff, (urxI - llxI + 1) * 3); | ||||
p += bitmap->getRowSize(); | ||||
} | ||||
} | ||||
// add a special char | ||||
// (the font size is unused -- 10 is an arbitrary value) | ||||
text->addSpecialChar(llx, ury, urx, lly, | ||||
0, formFieldFont, 10, 0x80000000 + fieldIdx); | ||||
} | ||||
} | ||||
} | ||||
} | ||||
// HTML header | // HTML header | |||
pr(writeHTML, htmlStream, "<html>\n"); | pr(writeHTML, htmlStream, "<html>\n"); | |||
pr(writeHTML, htmlStream, "<head>\n"); | pr(writeHTML, htmlStream, "<head>\n"); | |||
pr(writeHTML, htmlStream, "<meta http-equiv=\"Content-Type\" content=\"text/ht ml; charset=UTF-8\">\n"); | pr(writeHTML, htmlStream, "<meta http-equiv=\"Content-Type\" content=\"text/ht ml; charset=UTF-8\">\n"); | |||
pr(writeHTML, htmlStream, "<style type=\"text/css\">\n"); | pr(writeHTML, htmlStream, "<style type=\"text/css\">\n"); | |||
pr(writeHTML, htmlStream, ".txt { white-space:nowrap; }\n"); | pr(writeHTML, htmlStream, ".txt { white-space:nowrap; }\n"); | |||
if (convertFormFields) { | ||||
pr(writeHTML, htmlStream, ".textfield {\n"); | ||||
pr(writeHTML, htmlStream, " border: 0;\n"); | ||||
pr(writeHTML, htmlStream, " padding: 0;\n"); | ||||
pr(writeHTML, htmlStream, " background: #ccccff;\n"); | ||||
pr(writeHTML, htmlStream, "}\n"); | ||||
pr(writeHTML, htmlStream, ".checkbox {\n"); | ||||
pr(writeHTML, htmlStream, "}\n"); | ||||
} | ||||
fonts = text->getFonts(); | fonts = text->getFonts(); | |||
fontScales = (double *)gmallocn(fonts->getLength(), sizeof(double)); | fontScales = (double *)gmallocn(fonts->getLength(), sizeof(double)); | |||
for (i = 0; i < fontDefns->getLength(); ++i) { | for (i = 0; i < fontDefns->getLength(); ++i) { | |||
fontDefn = (HTMLGenFontDefn *)fontDefns->get(i); | fontDefn = (HTMLGenFontDefn *)fontDefns->get(i); | |||
fontDefn->used = gFalse; | fontDefn->used = gFalse; | |||
} | } | |||
for (i = 0; i < fonts->getLength(); ++i) { | for (i = 0; i < fonts->getLength(); ++i) { | |||
font = (TextFontInfo *)fonts->get(i); | font = (TextFontInfo *)fonts->get(i); | |||
fontDefn = getFontDefn(font, htmlDir); | fontDefn = getFontDefn(font, htmlDir); | |||
if (!fontDefn->used && fontDefn->fontFace) { | if (!fontDefn->used && fontDefn->fontFace) { | |||
pr(writeHTML, htmlStream, fontDefn->fontFace->getCString()); | pr(writeHTML, htmlStream, fontDefn->fontFace->getCString()); | |||
} | } | |||
pf(writeHTML, htmlStream, "#f{0:d} {{ {1:t} }}\n", i, fontDefn->fontSpec); | pf(writeHTML, htmlStream, ".f{0:d} {{ {1:t} }}\n", i, fontDefn->fontSpec); | |||
fontScales[i] = fontDefn->scale; | fontScales[i] = fontDefn->scale; | |||
fontDefn->used = gTrue; | fontDefn->used = gTrue; | |||
} | } | |||
pr(writeHTML, htmlStream, "</style>\n"); | pr(writeHTML, htmlStream, "</style>\n"); | |||
pr(writeHTML, htmlStream, "</head>\n"); | pr(writeHTML, htmlStream, "</head>\n"); | |||
if (primaryDir >= 0) { | if (primaryDir >= 0) { | |||
pr(writeHTML, htmlStream, "<body>\n"); | pr(writeHTML, htmlStream, "<body>\n"); | |||
} else { | } else { | |||
pr(writeHTML, htmlStream, "<body dir=\"rtl\">\n"); | pr(writeHTML, htmlStream, "<body dir=\"rtl\">\n"); | |||
} | } | |||
// background image element (part 1) | ||||
if (primaryDir >= 0) { | if (primaryDir >= 0) { | |||
pf(writeHTML, htmlStream, "<img id=\"background\" style=\"position:absolute; | pf(writeHTML, htmlStream, "<img style=\"position:absolute; left:0px; top:0px | |||
left:0px; top:0px;\" width=\"{0:d}\" height=\"{1:d}\" src=\"{2:s}\">\n", | ;\" width=\"{0:d}\" height=\"{1:d}\" ", | |||
(int)(pageW * zoom), (int)(pageH * zoom), pngURL); | (int)(pageW * zoom), (int)(pageH * zoom * vStretch)); | |||
} else { | ||||
pf(writeHTML, htmlStream, "<img style=\"position:absolute; right:0px; top:0p | ||||
x;\" width=\"{0:d}\" height=\"{1:d}\" ", | ||||
(int)(pageW * zoom), (int)(pageH * zoom * vStretch)); | ||||
} | ||||
if (embedBackgroundImage) { | ||||
pr(writeHTML, htmlStream, "src=\"data:image/png;base64,\n"); | ||||
writeInfo.base64 = new Base64Encoder(writeHTML, htmlStream); | ||||
writeInfo.writePNG = NULL; | ||||
writeInfo.pngStream = NULL; | ||||
} else { | } else { | |||
pf(writeHTML, htmlStream, "<img id=\"background\" style=\"position:absolute; | pf(writeHTML, htmlStream, "src=\"{0:s}\"", pngURL); | |||
right:0px; top:0px;\" width=\"{0:d}\" height=\"{1:d}\" src=\"{2:s}\">\n", | writeInfo.base64 = NULL; | |||
(int)(pageW * zoom), (int)(pageH * zoom), pngURL); | writeInfo.writePNG = writePNG; | |||
writeInfo.pngStream = pngStream; | ||||
} | } | |||
// background image data - writing to a separate file, or embedding | ||||
// with base64 encoding | ||||
if (!(png = png_create_write_struct(PNG_LIBPNG_VER_STRING, | ||||
NULL, NULL, NULL)) || | ||||
!(pngInfo = png_create_info_struct(png))) { | ||||
return errFileIO; | ||||
} | ||||
if (setjmp(png_jmpbuf(png))) { | ||||
return errFileIO; | ||||
} | ||||
png_set_write_fn(png, &writeInfo, pngWriteFunc, NULL); | ||||
png_set_IHDR(png, pngInfo, bitmap->getWidth(), bitmap->getHeight(), | ||||
8, PNG_COLOR_TYPE_RGB, PNG_INTERLACE_NONE, | ||||
PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT); | ||||
png_write_info(png, pngInfo); | ||||
p = bitmap->getDataPtr(); | ||||
for (y = 0; y < bitmap->getHeight(); ++y) { | ||||
png_write_row(png, (png_bytep)p); | ||||
p += bitmap->getRowSize(); | ||||
} | ||||
png_write_end(png, pngInfo); | ||||
png_destroy_write_struct(&png, &pngInfo); | ||||
if (embedBackgroundImage) { | ||||
writeInfo.base64->flush(); | ||||
delete writeInfo.base64; | ||||
} | ||||
// background image element (part 2) | ||||
pr(writeHTML, htmlStream, "\">\n"); | ||||
// generate the HTML text | // generate the HTML text | |||
nextFieldID = 0; | ||||
cols = text->makeColumns(); | cols = text->makeColumns(); | |||
for (colIdx = 0; colIdx < cols->getLength(); ++colIdx) { | for (colIdx = 0; colIdx < cols->getLength(); ++colIdx) { | |||
col = (TextColumn *)cols->get(colIdx); | col = (TextColumn *)cols->get(colIdx); | |||
pars = col->getParagraphs(); | pars = col->getParagraphs(); | |||
for (parIdx = 0; parIdx < pars->getLength(); ++parIdx) { | for (parIdx = 0; parIdx < pars->getLength(); ++parIdx) { | |||
par = (TextParagraph *)pars->get(parIdx); | par = (TextParagraph *)pars->get(parIdx); | |||
lines = par->getLines(); | lines = par->getLines(); | |||
for (lineIdx = 0; lineIdx < lines->getLength(); ++lineIdx) { | for (lineIdx = 0; lineIdx < lines->getLength(); ++lineIdx) { | |||
line = (TextLine *)lines->get(lineIdx); | line = (TextLine *)lines->get(lineIdx); | |||
if (line->getRotation() != 0) { | if (line->getRotation() != 0) { | |||
skipping to change at line 418 | skipping to change at line 605 | |||
lastWordIdx = findDirSpan(words, firstWordIdx, | lastWordIdx = findDirSpan(words, firstWordIdx, | |||
primaryDir, &spanDir); | primaryDir, &spanDir); | |||
appendSpans(words, firstWordIdx, lastWordIdx, | appendSpans(words, firstWordIdx, lastWordIdx, | |||
primaryDir, spanDir, | primaryDir, spanDir, | |||
base, lineIdx == 0 && par->hasDropCap(), | base, lineIdx == 0 && par->hasDropCap(), | |||
s); | s); | |||
} | } | |||
if (primaryDir >= 0) { | if (primaryDir >= 0) { | |||
pf(writeHTML, htmlStream, "<div class=\"txt\" style=\"position:absolute ; left:{0:d}px; top:{1:d}px;\">{2:t}</div>\n", | pf(writeHTML, htmlStream, "<div class=\"txt\" style=\"position:absolute ; left:{0:d}px; top:{1:d}px;\">{2:t}</div>\n", | |||
(int)(line->getXMin() * zoom), | (int)(line->getXMin() * zoom), | |||
(int)(line->getYMin() * zoom), s); | (int)(line->getYMin() * zoom * vStretch), s); | |||
} else { | } else { | |||
pf(writeHTML, htmlStream, "<div class=\"txt\" style=\"position:absolute ; right:{0:d}px; top:{1:d}px;\">{2:t}</div>\n", | pf(writeHTML, htmlStream, "<div class=\"txt\" style=\"position:absolute ; right:{0:d}px; top:{1:d}px;\">{2:t}</div>\n", | |||
(int)((pageW - line->getXMax()) * zoom), | (int)((pageW - line->getXMax()) * zoom), | |||
(int)(line->getYMin() * zoom), s); | (int)(line->getYMin() * zoom * vStretch), s); | |||
} | } | |||
delete s; | delete s; | |||
} | } | |||
} | } | |||
} | } | |||
gfree(fontScales); | gfree(fontScales); | |||
delete text; | delete text; | |||
deleteGList(cols, TextColumn); | deleteGList(cols, TextColumn); | |||
if (formFieldFont) { | ||||
delete formFieldFont; | ||||
formFieldFont = NULL; | ||||
} | ||||
if (formFieldInfo) { | ||||
deleteGList(formFieldInfo, HTMLGenFormFieldInfo); | ||||
formFieldInfo = NULL; | ||||
} | ||||
// HTML trailer | // HTML trailer | |||
pr(writeHTML, htmlStream, "</body>\n"); | pr(writeHTML, htmlStream, "</body>\n"); | |||
pr(writeHTML, htmlStream, "</html>\n"); | pr(writeHTML, htmlStream, "</html>\n"); | |||
return errNone; | return errNone; | |||
} | } | |||
// Find a sequence of words, starting at <firstWordIdx>, that have the | // Find a sequence of words, starting at <firstWordIdx>, that have the | |||
// same writing direction. Returns the index of the last word, and | // same writing direction. Returns the index of the last word, and | |||
skipping to change at line 473 | skipping to change at line 668 | |||
} | } | |||
return nextWordIdx - primaryDir; | return nextWordIdx - primaryDir; | |||
} | } | |||
// Create HTML spans for words <firstWordIdx> .. <lastWordIdx>, and | // Create HTML spans for words <firstWordIdx> .. <lastWordIdx>, and | |||
// append them to <s>. | // append them to <s>. | |||
void HTMLGen::appendSpans(GList *words, int firstWordIdx, int lastWordIdx, | void HTMLGen::appendSpans(GList *words, int firstWordIdx, int lastWordIdx, | |||
int primaryDir, int spanDir, | int primaryDir, int spanDir, | |||
double base, GBool dropCapLine, GString *s) { | double base, GBool dropCapLine, GString *s) { | |||
TextWord *word0, *word1; | if (allTextInvisible && !drawInvisibleText) { | |||
VerticalAlignment vertAlign0, vertAlign1; | return; | |||
const char *dirTag; | } | |||
Unicode u; | ||||
GBool invisible, sp; | ||||
double r0, g0, b0, r1, g1, b1; | ||||
double base1; | ||||
int wordIdx, t, i; | ||||
if (spanDir != primaryDir) { | if (spanDir != primaryDir) { | |||
t = firstWordIdx; | int t = firstWordIdx; | |||
firstWordIdx = lastWordIdx; | firstWordIdx = lastWordIdx; | |||
lastWordIdx = t; | lastWordIdx = t; | |||
} | } | |||
word0 = NULL; | int wordIdx = firstWordIdx; | |||
vertAlign0 = vertAlignBaseline; // make gcc happy | while ((spanDir >= 0) ? wordIdx <= lastWordIdx | |||
r0 = g0 = b0 = 0; // make gcc happy | : wordIdx >= lastWordIdx) { | |||
for (wordIdx = firstWordIdx; | TextWord *word0 = (TextWord *)words->get(wordIdx); | |||
(spanDir >= 0) ? wordIdx <= lastWordIdx : wordIdx >= lastWordIdx; | ||||
wordIdx += spanDir) { | // form field(s): generate <input> element(s) | |||
word1 = (TextWord *)words->get(wordIdx); | if (convertFormFields && word0->getFontInfo() == formFieldFont) { | |||
invisible = allTextInvisible || word1->isInvisible() || word1->isRotated(); | for (int i = (spanDir >= 0) ? 0 : word0->getLength() - 1; | |||
if (!drawInvisibleText && invisible) { | (spanDir >= 0) ? i < word0->getLength() : i >= 0; | |||
continue; | i += spanDir) { | |||
} | int fieldIdx = word0->getChar(0) - 0x80000000; | |||
word1->getColor(&r1, &g1, &b1); | if (fieldIdx >= 0 && fieldIdx < formFieldInfo->getLength()) { | |||
base1 = word1->getBaseline(); | HTMLGenFormFieldInfo *ffi = | |||
if (dropCapLine) { | (HTMLGenFormFieldInfo *)formFieldInfo->get(fieldIdx); | |||
//~ this will fail if there are subscripts or superscripts in | AcroFormField *field = ffi->acroFormField; | |||
//~ the first line of a paragraph with a drop cap | AcroFormFieldType fieldType = field->getAcroFormFieldType(); | |||
vertAlign1 = vertAlignTop; | double llx, lly, urx, ury; | |||
} else if (base1 - base < -1) { | field->getBBox(&llx, &lly, &urx, &ury); | |||
vertAlign1 = vertAlignSuper; | int width = (int)(urx - llx); | |||
} else if (base1 - base > 1) { | Ref fontID; | |||
vertAlign1 = vertAlignSub; | double fontSize; | |||
} else { | field->getFont(&fontID, &fontSize); | |||
vertAlign1 = vertAlignBaseline; | if (fontSize == 0) { | |||
} | fontSize = 12; | |||
if (!word0 || | } | |||
word1->getFontInfo() != word0->getFontInfo() || | if (fieldType == acroFormFieldText) { | |||
word1->getFontSize() != word0->getFontSize() || | s->appendf("<input type=\"text\" class=\"textfield\" id=\"textfield{0 | |||
word1->isInvisible() != word0->isInvisible() || | :d}\" style=\"width:{1:d}px; font-size:{2:d}px;\">", nextFieldID, width, (int)(f | |||
word1->isRotated() != word0->isRotated() || | ontSize + 0.5)); | |||
vertAlign1 != vertAlign0 || | ++nextFieldID; | |||
r1 != r0 || g1 != g0 || b1 != b0) { | } else if (fieldType == acroFormFieldCheckbox) { | |||
if (word0) { | s->appendf("<input type=\"checkbox\" class=\"checkbox\" id=\"checkbox | |||
s->append("</span>"); | {0:d}\" style=\"width:{1:d}px; font-size:{2:d}px;\">", nextFieldID, width, (int) | |||
} | (fontSize + 0.5)); | |||
for (i = 0; i < fonts->getLength(); ++i) { | ++nextFieldID; | |||
if (word1->getFontInfo() == (TextFontInfo *)fonts->get(i)) { | } | |||
break; | ||||
} | ||||
} | ||||
// we force spans to be LTR or RTL; this is a kludge, but it's | ||||
// far easier than implementing the full Unicode bidi algorithm | ||||
if (spanDir == primaryDir) { | ||||
dirTag = ""; | ||||
} else if (spanDir < 0) { | ||||
dirTag = " dir=\"rtl\""; | ||||
} else { | ||||
dirTag = " dir=\"ltr\""; | ||||
} | ||||
s->appendf("<span id=\"f{0:d}\"{1:s} style=\"font-size:{2:d}px;vertical-al | ||||
ign:{3:s};{4:s}color:rgba({5:d},{6:d},{7:d},{8:d});\">", | ||||
i, | ||||
dirTag, | ||||
(int)(fontScales[i] * word1->getFontSize() * zoom), | ||||
vertAlignNames[vertAlign1], | ||||
(dropCapLine && wordIdx == 0) ? "line-height:75%;" : "", | ||||
(int)(r1 * 255), (int)(g1 * 255), (int)(b1 * 255), | ||||
invisible ? 0 : 1); | ||||
} | ||||
// add a space before the word, if needed | ||||
// -- this only happens with the first word in a reverse section | ||||
if (spanDir != primaryDir && wordIdx == firstWordIdx) { | ||||
if (spanDir >= 0) { | ||||
if (wordIdx > 0) { | ||||
sp = ((TextWord *)words->get(wordIdx - 1))->getSpaceAfter(); | ||||
} else { | ||||
sp = gFalse; | ||||
} | } | |||
} else { | ||||
sp = word1->getSpaceAfter(); | ||||
} | } | |||
if (sp) { | ||||
if (word0->getSpaceAfter()) { | ||||
s->append(' '); | s->append(' '); | |||
} | } | |||
} | ||||
for (i = (spanDir >= 0) ? 0 : word1->getLength() - 1; | wordIdx += spanDir; | |||
(spanDir >= 0) ? i < word1->getLength() : i >= 0; | ||||
i += spanDir) { | // skip invisible words | |||
u = word1->getChar(i); | } else if (!drawInvisibleText && | |||
if (u >= privateUnicodeMapStart && | (word0->isInvisible() || word0->isRotated())) { | |||
u <= privateUnicodeMapEnd && | wordIdx += spanDir; | |||
privateUnicodeMap[u - privateUnicodeMapStart]) { | ||||
u = privateUnicodeMap[u - privateUnicodeMapStart]; | // generate a <span> containing one or more words | |||
} | ||||
appendUTF8(u, s); | ||||
} | ||||
// add a space after the word, if needed | ||||
// -- there is never a space after the last word in a reverse | ||||
// section (this will be handled as a space after the last word | ||||
// in the previous primary-direction section) | ||||
if (spanDir != primaryDir && wordIdx == lastWordIdx) { | ||||
sp = gFalse; | ||||
} else if (spanDir >= 0) { | ||||
sp = word1->getSpaceAfter(); | ||||
} else { | } else { | |||
if (wordIdx > 0) { | ||||
sp = ((TextWord *)words->get(wordIdx - 1))->getSpaceAfter(); | double r0 = 0, g0 = 0, b0 = 0; // make gcc happy | |||
} else { | VerticalAlignment vertAlign0 = vertAlignBaseline; // make gcc happy | |||
sp = gFalse; | GString *linkURI0 = NULL; | |||
GBool invisible = word0->isInvisible() || word0->isRotated(); | ||||
do { | ||||
TextWord *word1 = (TextWord *)words->get(wordIdx); | ||||
// get word parameters | ||||
double r1, g1, b1; | ||||
word0->getColor(&r1, &g1, &b1); | ||||
double base1 = word1->getBaseline(); | ||||
VerticalAlignment vertAlign1; | ||||
if (dropCapLine) { | ||||
//~ this will fail if there are subscripts or superscripts in | ||||
//~ the first line of a paragraph with a drop cap | ||||
vertAlign1 = vertAlignTop; | ||||
} else if (base1 - base < -1) { | ||||
vertAlign1 = vertAlignSuper; | ||||
} else if (base1 - base > 1) { | ||||
vertAlign1 = vertAlignSub; | ||||
} else { | ||||
vertAlign1 = vertAlignBaseline; | ||||
} | ||||
GString *linkURI1 = word1->getLinkURI(); | ||||
// start of span | ||||
if (word1 == word0) { | ||||
r0 = r1; | ||||
g0 = g1; | ||||
b0 = b1; | ||||
vertAlign0 = vertAlign1; | ||||
linkURI0 = linkURI1; | ||||
int i; | ||||
for (i = 0; i < fonts->getLength(); ++i) { | ||||
if (word1->getFontInfo() == (TextFontInfo *)fonts->get(i)) { | ||||
break; | ||||
} | ||||
} | ||||
if (linkURI1) { | ||||
s->appendf("<a href=\"{0:t}\">", linkURI0); | ||||
} | ||||
// we force spans to be LTR or RTL; this is a kludge, but it's | ||||
// far easier than implementing the full Unicode bidi algorithm | ||||
const char *dirTag; | ||||
if (spanDir == primaryDir) { | ||||
dirTag = ""; | ||||
} else if (spanDir < 0) { | ||||
dirTag = " dir=\"rtl\""; | ||||
} else { | ||||
dirTag = " dir=\"ltr\""; | ||||
} | ||||
s->appendf("<span class=\"f{0:d}\"{1:s} style=\"font-size:{2:d}px;verti | ||||
cal-align:{3:s};{4:s}color:rgba({5:d},{6:d},{7:d},{8:d});\">", | ||||
i, | ||||
dirTag, | ||||
(int)(fontScales[i] * word1->getFontSize() * zoom), | ||||
vertAlignNames[vertAlign1], | ||||
(dropCapLine && wordIdx == 0) ? "line-height:75%;" : "", | ||||
(int)(r0 * 255), (int)(g0 * 255), (int)(b0 * 255), | ||||
invisible ? 0 : 1); | ||||
// end of span | ||||
} else if (word1->getFontInfo() != word0->getFontInfo() || | ||||
word1->getFontSize() != word0->getFontSize() || | ||||
word1->isInvisible() != word0->isInvisible() || | ||||
word1->isRotated() != word0->isRotated() || | ||||
vertAlign1 != vertAlign0 || | ||||
r1 != r0 || g1 != g0 || b1 != b0 || | ||||
linkURI1 != linkURI0) { | ||||
break; | ||||
} | ||||
// add a space before the word, if needed | ||||
// -- this only happens with the first word in a reverse section | ||||
if (spanDir != primaryDir && wordIdx == firstWordIdx) { | ||||
GBool sp; | ||||
if (spanDir >= 0) { | ||||
if (wordIdx > 0) { | ||||
sp = ((TextWord *)words->get(wordIdx - 1))->getSpaceAfter(); | ||||
} else { | ||||
sp = gFalse; | ||||
} | ||||
} else { | ||||
sp = word1->getSpaceAfter(); | ||||
} | ||||
if (sp) { | ||||
s->append(' '); | ||||
} | ||||
} | ||||
// generate the word text | ||||
for (int i = (spanDir >= 0) ? 0 : word1->getLength() - 1; | ||||
(spanDir >= 0) ? i < word1->getLength() : i >= 0; | ||||
i += spanDir) { | ||||
Unicode u = word1->getChar(i); | ||||
if (u >= privateUnicodeMapStart && | ||||
u <= privateUnicodeMapEnd && | ||||
privateUnicodeMap[u - privateUnicodeMapStart]) { | ||||
u = privateUnicodeMap[u - privateUnicodeMapStart]; | ||||
} | ||||
appendUTF8(u, s); | ||||
} | ||||
// add a space after the word, if needed | ||||
// -- there is never a space after the last word in a reverse | ||||
// section (this will be handled as a space after the last | ||||
// word in the previous primary-direction section) | ||||
GBool sp; | ||||
if (spanDir != primaryDir && wordIdx == lastWordIdx) { | ||||
sp = gFalse; | ||||
} else if (spanDir >= 0) { | ||||
sp = word1->getSpaceAfter(); | ||||
} else { | ||||
if (wordIdx > 0) { | ||||
sp = ((TextWord *)words->get(wordIdx - 1))->getSpaceAfter(); | ||||
} else { | ||||
sp = gFalse; | ||||
} | ||||
} | ||||
if (sp) { | ||||
s->append(' '); | ||||
} | ||||
wordIdx += spanDir; | ||||
} while ((spanDir >= 0) ? wordIdx <= lastWordIdx | ||||
: wordIdx >= lastWordIdx); | ||||
s->append("</span>"); | ||||
if (linkURI0) { | ||||
s->append("</a>"); | ||||
} | } | |||
} | } | |||
if (sp) { | ||||
s->append(' '); | ||||
} | ||||
word0 = word1; | ||||
vertAlign0 = vertAlign1; | ||||
r0 = r1; | ||||
g0 = g1; | ||||
b0 = b1; | ||||
} | } | |||
s->append("</span>"); | ||||
} | } | |||
void HTMLGen::appendUTF8(Unicode u, GString *s) { | void HTMLGen::appendUTF8(Unicode u, GString *s) { | |||
if (u <= 0x7f) { | if (u <= 0x7f) { | |||
if (u == '&') { | if (u == '&') { | |||
s->append("&"); | s->append("&"); | |||
} else if (u == '<') { | } else if (u == '<') { | |||
s->append("<"); | s->append("<"); | |||
} else if (u == '>') { | } else if (u == '>') { | |||
s->append(">"); | s->append(">"); | |||
skipping to change at line 696 | skipping to change at line 957 | |||
doc->getXRef()->fetch(id.num, id.gen, &fontObj); | doc->getXRef()->fetch(id.num, id.gen, &fontObj); | |||
if (!fontObj.isDict()) { | if (!fontObj.isDict()) { | |||
fontObj.free(); | fontObj.free(); | |||
return NULL; | return NULL; | |||
} | } | |||
gfxFont = GfxFont::makeFont(doc->getXRef(), "F", id, fontObj.getDict()); | gfxFont = GfxFont::makeFont(doc->getXRef(), "F", id, fontObj.getDict()); | |||
webFont = new WebFont(gfxFont, doc->getXRef()); | webFont = new WebFont(gfxFont, doc->getXRef()); | |||
fontDefn = NULL; | fontDefn = NULL; | |||
fontFace = NULL; | ||||
if (webFont->canWriteTTF()) { | if (webFont->canWriteTTF()) { | |||
fontFile = GString::format("{0:d}.ttf", nextFontFaceIdx); | if (embedFonts) { | |||
fontPath = GString::format("{0:s}/{1:t}", htmlDir, fontFile); | GString *ttfData = webFont->getTTFData(); | |||
if (webFont->writeTTF(fontPath->getCString())) { | if (ttfData) { | |||
fontFace = GString::format("@font-face {{ font-family: ff{0:d}; src: url(\ | fontFace = GString::format("@font-face {{ font-family: ff{0:d}; src: url( | |||
"{1:t}\"); }}\n", | \"data:font/ttf;base64,", | |||
nextFontFaceIdx, fontFile); | nextFontFaceIdx); | |||
Base64Encoder enc(writeToString, fontFace); | ||||
enc.encode((unsigned char *)ttfData->getCString(), | ||||
(size_t)ttfData->getLength()); | ||||
enc.flush(); | ||||
fontFace->append("\"); }\n"); | ||||
delete ttfData; | ||||
} | ||||
} else { | ||||
fontFile = GString::format("{0:d}.ttf", nextFontFaceIdx); | ||||
fontPath = GString::format("{0:s}/{1:t}", htmlDir, fontFile); | ||||
if (webFont->writeTTF(fontPath->getCString())) { | ||||
fontFace = GString::format("@font-face {{ font-family: ff{0:d}; src: url( | ||||
\"{1:t}\"); }}\n", | ||||
nextFontFaceIdx, fontFile); | ||||
} | ||||
delete fontPath; | ||||
delete fontFile; | ||||
} | ||||
if (fontFace) { | ||||
getFontDetails(font, &family, &weight, &style, &scale); | getFontDetails(font, &family, &weight, &style, &scale); | |||
fontSpec = GString::format("font-family:ff{0:d},{1:s}; font-weight:{2:s}; font-style:{3:s};", | fontSpec = GString::format("font-family:ff{0:d},{1:s}; font-weight:{2:s}; font-style:{3:s};", | |||
nextFontFaceIdx, family, weight, style); | nextFontFaceIdx, family, weight, style); | |||
++nextFontFaceIdx; | ++nextFontFaceIdx; | |||
fontDefn = new HTMLGenFontDefn(id, fontFace, fontSpec, 1.0); | fontDefn = new HTMLGenFontDefn(id, fontFace, fontSpec, 1.0); | |||
} | } | |||
delete fontPath; | ||||
delete fontFile; | ||||
} else if (webFont->canWriteOTF()) { | } else if (webFont->canWriteOTF()) { | |||
fontFile = GString::format("{0:d}.otf", nextFontFaceIdx); | if (embedFonts) { | |||
fontPath = GString::format("{0:s}/{1:t}", htmlDir, fontFile); | GString *otfData = webFont->getOTFData(); | |||
if (webFont->writeOTF(fontPath->getCString())) { | if (otfData) { | |||
fontFace = GString::format("@font-face {{ font-family: ff{0:d}; src: url(\ | fontFace = GString::format("@font-face {{ font-family: ff{0:d}; src: url( | |||
"{1:t}\"); }}\n", | \"data:font/otf;base64,", | |||
nextFontFaceIdx, fontFile); | nextFontFaceIdx); | |||
Base64Encoder enc(writeToString, fontFace); | ||||
enc.encode((unsigned char *)otfData->getCString(), | ||||
(size_t)otfData->getLength()); | ||||
enc.flush(); | ||||
fontFace->append("\"); }\n"); | ||||
delete otfData; | ||||
} | ||||
} else { | ||||
fontFile = GString::format("{0:d}.otf", nextFontFaceIdx); | ||||
fontPath = GString::format("{0:s}/{1:t}", htmlDir, fontFile); | ||||
if (webFont->writeOTF(fontPath->getCString())) { | ||||
fontFace = GString::format("@font-face {{ font-family: ff{0:d}; src: url( | ||||
\"{1:t}\"); }}\n", | ||||
nextFontFaceIdx, fontFile); | ||||
} | ||||
delete fontPath; | ||||
delete fontFile; | ||||
} | ||||
if (fontFace) { | ||||
getFontDetails(font, &family, &weight, &style, &scale); | getFontDetails(font, &family, &weight, &style, &scale); | |||
fontSpec = GString::format("font-family:ff{0:d},{1:s}; font-weight:{2:s}; font-style:{3:s};", | fontSpec = GString::format("font-family:ff{0:d},{1:s}; font-weight:{2:s}; font-style:{3:s};", | |||
nextFontFaceIdx, family, weight, style); | nextFontFaceIdx, family, weight, style); | |||
++nextFontFaceIdx; | ++nextFontFaceIdx; | |||
fontDefn = new HTMLGenFontDefn(id, fontFace, fontSpec, 1.0); | fontDefn = new HTMLGenFontDefn(id, fontFace, fontSpec, 1.0); | |||
} | } | |||
delete fontPath; | ||||
delete fontFile; | ||||
} | } | |||
delete webFont; | delete webFont; | |||
delete gfxFont; | delete gfxFont; | |||
fontObj.free(); | fontObj.free(); | |||
return fontDefn; | return fontDefn; | |||
} | } | |||
HTMLGenFontDefn *HTMLGen::getSubstituteFont(TextFontInfo *font) { | HTMLGenFontDefn *HTMLGen::getSubstituteFont(TextFontInfo *font) { | |||
End of changes. 38 change blocks. | ||||
172 lines changed or deleted | 472 lines changed or added |