w32tex
About: TeX Live provides a comprehensive TeX system including all the major TeX-related programs, macro packages, and fonts that are free software. Windows sources.
  Fossies Dox: w32tex-src.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

HTMLGen.cc
Go to the documentation of this file.
1 //========================================================================
2 //
3 // HTMLGen.cc
4 //
5 // Copyright 2010 Glyph & Cog, LLC
6 //
7 //========================================================================
8 
9 //~ to do:
10 //~ - fonts
11 //~ - underlined? (underlines are present in the background image)
12 //~ - include the original font name in the CSS entry (before the
13 //~ generic serif/sans-serif/monospace name)
14 //~ - check that htmlDir exists and is a directory
15 //~ - links:
16 //~ - links to pages
17 //~ - links to named destinations
18 //~ - links to URLs
19 //~ - rotated text should go in the background image
20 //~ - metadata
21 //~ - PDF outline
22 
23 #include <aconf.h>
24 
25 #ifdef USE_GCC_PRAGMAS
26 #pragma implementation
27 #endif
28 
29 #include <stdlib.h>
30 #include <png.h>
31 #include "gmem.h"
32 #include "gmempp.h"
33 #include "GString.h"
34 #include "GList.h"
35 #include "SplashBitmap.h"
36 #include "PDFDoc.h"
37 #include "GfxFont.h"
38 #include "TextOutputDev.h"
39 #include "SplashOutputDev.h"
40 #include "ErrorCodes.h"
41 #include "WebFont.h"
42 #include "HTMLGen.h"
43 
44 #ifdef _WIN32
45 # define strcasecmp stricmp
46 # define strncasecmp strnicmp
47 #endif
48 
49 //------------------------------------------------------------------------
50 
52  const char *tag;
53  int tagLen;
56 };
57 
58 // NB: these are compared, in order, against the tail of the font
59 // name, so "BoldItalic" must come before "Italic", etc.
61  {"Roman", 5, gFalse, gFalse},
62  {"Regular", 7, gFalse, gFalse},
63  {"Condensed", 9, gFalse, gFalse},
64  {"CondensedBold", 13, gTrue, gFalse},
65  {"CondensedLight", 14, gFalse, gFalse},
66  {"SemiBold", 8, gTrue, gFalse},
67  {"BoldItalicMT", 12, gTrue, gTrue},
68  {"BoldItalic", 10, gTrue, gTrue},
69  {"Bold_Italic", 11, gTrue, gTrue},
70  {"BoldOblique", 11, gTrue, gTrue},
71  {"Bold_Oblique", 12, gTrue, gTrue},
72  {"BoldMT", 6, gTrue, gFalse},
73  {"Bold", 4, gTrue, gFalse},
74  {"ItalicMT", 8, gFalse, gTrue},
75  {"Italic", 6, gFalse, gTrue},
76  {"Oblique", 7, gFalse, gTrue},
77  {"Light", 5, gFalse, gFalse},
78  {NULL, 0, gFalse, gFalse}
79 };
80 
82  const char *name;
85 };
86 
88  {"Arial", gFalse, gFalse},
89  {"Courier", gTrue, gFalse},
90  {"Futura", gFalse, gFalse},
91  {"Helvetica", gFalse, gFalse},
92  {"Minion", gFalse, gTrue},
93  {"NewCenturySchlbk", gFalse, gTrue},
94  {"Times", gFalse, gTrue},
95  {"TimesNew", gFalse, gTrue},
96  {"Times_New", gFalse, gTrue},
97  {"Verdana", gFalse, gFalse},
98  {"LucidaSans", gFalse, gFalse},
99  {NULL, gFalse, gFalse}
100 };
101 
103  double mWidth;
104 };
105 
106 // index: {fixed:8, serif:4, sans-serif:0} + bold*2 + italic
107 static SubstFontInfo substFonts[16] = {
108  {0.833},
109  {0.833},
110  {0.889},
111  {0.889},
112  {0.788},
113  {0.722},
114  {0.833},
115  {0.778},
116  {0.600},
117  {0.600},
118  {0.600},
119  {0.600}
120 };
121 
122 // Map Unicode indexes from the private use area, following the Adobe
123 // Glyph list.
124 #define privateUnicodeMapStart 0xf6f9
125 #define privateUnicodeMapEnd 0xf7ff
126 static int
128  0x0141, 0x0152, 0, 0, 0x0160, 0, 0x017d, // f6f9
129  0, 0, 0, 0, 0, 0, 0, 0, // f700
130  0, 0, 0, 0, 0, 0, 0, 0,
131  0, 0, 0, 0, 0, 0, 0, 0, // f710
132  0, 0, 0, 0, 0, 0, 0, 0,
133  0, 0x0021, 0, 0, 0x0024, 0, 0x0026, 0, // f720
134  0, 0, 0, 0, 0, 0, 0, 0,
135  0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, // f730
136  0x0038, 0x0039, 0, 0, 0, 0, 0, 0x003f,
137  0, 0, 0, 0, 0, 0, 0, 0, // f740
138  0, 0, 0, 0, 0, 0, 0, 0,
139  0, 0, 0, 0, 0, 0, 0, 0, // f750
140  0, 0, 0, 0, 0, 0, 0, 0,
141  0, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, // f760
142  0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
143  0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, // f770
144  0x0058, 0x0059, 0x005a, 0, 0, 0, 0, 0,
145  0, 0, 0, 0, 0, 0, 0, 0, // f780
146  0, 0, 0, 0, 0, 0, 0, 0,
147  0, 0, 0, 0, 0, 0, 0, 0, // f790
148  0, 0, 0, 0, 0, 0, 0, 0,
149  0, 0x00a1, 0x00a2, 0, 0, 0, 0, 0, // f7a0
150  0, 0, 0, 0, 0, 0, 0, 0,
151  0, 0, 0, 0, 0, 0, 0, 0, // f7b0
152  0, 0, 0, 0, 0, 0, 0, 0x00bf,
153  0, 0, 0, 0, 0, 0, 0, 0, // f7c0
154  0, 0, 0, 0, 0, 0, 0, 0,
155  0, 0, 0, 0, 0, 0, 0, 0, // f7d0
156  0, 0, 0, 0, 0, 0, 0, 0,
157  0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, // f7e0
158  0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
159  0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0, // f7f0
160  0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x0178
161 };
162 
168 };
169 
170 static const char *vertAlignNames[] = {
171  "baseline",
172  "sub",
173  "super",
174  "top"
175 };
176 
177 //------------------------------------------------------------------------
178 
180 public:
181 
182  HTMLGenFontDefn(Ref fontIDA, GString *fontFaceA, GString *fontSpecA,
183  double scaleA)
184  : fontID(fontIDA), fontFace(fontFaceA), fontSpec(fontSpecA)
185  , scale(scaleA), used(gFalse) {}
186  ~HTMLGenFontDefn() { delete fontFace; delete fontSpec; }
187  GBool match(Ref fontIDA)
188  { return fontIDA.num == fontID.num && fontIDA.gen == fontID.gen; }
189 
191  GString *fontFace; // NULL for substituted fonts
193  double scale;
194  GBool used; // set when used (per page)
195 };
196 
197 //------------------------------------------------------------------------
198 
199 
200 //------------------------------------------------------------------------
201 
202 HTMLGen::HTMLGen(double backgroundResolutionA) {
203  TextOutputControl textOutControl;
204  SplashColor paperColor;
205 
206  ok = gTrue;
207 
208  backgroundResolution = backgroundResolutionA;
209  zoom = 1.0;
213 
214  // set up the TextOutputDev
215  textOutControl.mode = textOutReadingOrder;
216  textOutControl.html = gTrue;
217  textOutControl.splitRotatedWords = gTrue;
218  textOut = new TextOutputDev(NULL, &textOutControl, gFalse);
219  if (!textOut->isOk()) {
220  ok = gFalse;
221  }
222 
223  // set up the SplashOutputDev
224  paperColor[0] = paperColor[1] = paperColor[2] = 0xff;
225  splashOut = new SplashOutputDev(splashModeRGB8, 1, gFalse, paperColor);
226 
227  fontDefns = NULL;
228 }
229 
231  delete textOut;
232  delete splashOut;
233  if (fontDefns) {
235  }
236 }
237 
239  doc = docA;
241 
242  if (fontDefns) {
244  }
245  fontDefns = new GList();
246  nextFontFaceIdx = 0;
247 }
248 
249 static inline int pr(int (*writeFunc)(void *stream, const char *data, int size),
250  void *stream, const char *data) {
251  return writeFunc(stream, data, (int)strlen(data));
252 }
253 
254 static int pf(int (*writeFunc)(void *stream, const char *data, int size),
255  void *stream, const char *fmt, ...) {
256  va_list args;
257  GString *s;
258  int ret;
259 
260  va_start(args, fmt);
262  va_end(args);
263  ret = writeFunc(stream, s->getCString(), s->getLength());
264  delete s;
265  return ret;
266 }
267 
268 struct PNGWriteInfo {
269  int (*writePNG)(void *stream, const char *data, int size);
270  void *pngStream;
271 };
272 
275 
277  info->writePNG(info->pngStream, (char *)data, (int)size);
278 }
279 
281  int pg, const char *pngURL, const char *htmlDir,
282  int (*writeHTML)(void *stream, const char *data, int size),
283  void *htmlStream,
284  int (*writePNG)(void *stream, const char *data, int size),
285  void *pngStream) {
287  png_infop pngInfo;
288  PNGWriteInfo writeInfo;
290  Guchar *p;
291  double pageW, pageH;
292  TextPage *text;
293  GList *cols, *pars, *lines, *words;
295  TextColumn *col;
296  TextParagraph *par;
297  TextLine *line;
298  HTMLGenFontDefn *fontDefn;
299  GString *s;
300  double base;
301  int primaryDir, spanDir;
302  int colIdx, parIdx, lineIdx, firstWordIdx, lastWordIdx;
303  int y, i;
304 
305  // generate the background bitmap
308  0, gFalse, gTrue, gFalse);
311  NULL, NULL, NULL)) ||
312  !(pngInfo = png_create_info_struct(png))) {
313  return errFileIO;
314  }
315  if (setjmp(png_jmpbuf(png))) {
316  return errFileIO;
317  }
318  writeInfo.writePNG = writePNG;
319  writeInfo.pngStream = pngStream;
320  png_set_write_fn(png, &writeInfo, pngWriteFunc, NULL);
321  png_set_IHDR(png, pngInfo, bitmap->getWidth(), bitmap->getHeight(),
324  png_write_info(png, pngInfo);
325  p = bitmap->getDataPtr();
326  for (y = 0; y < bitmap->getHeight(); ++y) {
328  p += bitmap->getRowSize();
329  }
330  png_write_end(png, pngInfo);
331  png_destroy_write_struct(&png, &pngInfo);
332 
333  // page size
334  if (doc->getPageRotate(pg) == 90 || doc->getPageRotate(pg) == 270) {
335  pageW = doc->getPageCropHeight(pg);
336  pageH = doc->getPageCropWidth(pg);
337  } else {
338  pageW = doc->getPageCropWidth(pg);
339  pageH = doc->getPageCropHeight(pg);
340  }
341 
342  // get the PDF text
343  doc->displayPage(textOut, pg, 72, 72, 0, gFalse, gTrue, gFalse);
344  doc->processLinks(textOut, pg);
345  text = textOut->takeText();
346  primaryDir = text->primaryDirectionIsLR() ? 1 : -1;
347 
348  // HTML header
349  pr(writeHTML, htmlStream, "<html>\n");
350  pr(writeHTML, htmlStream, "<head>\n");
351  pr(writeHTML, htmlStream, "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">\n");
352  pr(writeHTML, htmlStream, "<style type=\"text/css\">\n");
353  pr(writeHTML, htmlStream, ".txt { white-space:nowrap; }\n");
354  fonts = text->getFonts();
355  fontScales = (double *)gmallocn(fonts->getLength(), sizeof(double));
356  for (i = 0; i < fontDefns->getLength(); ++i) {
357  fontDefn = (HTMLGenFontDefn *)fontDefns->get(i);
358  fontDefn->used = gFalse;
359  }
360  for (i = 0; i < fonts->getLength(); ++i) {
361  font = (TextFontInfo *)fonts->get(i);
362  fontDefn = getFontDefn(font, htmlDir);
363  if (!fontDefn->used && fontDefn->fontFace) {
364  pr(writeHTML, htmlStream, fontDefn->fontFace->getCString());
365  }
366  pf(writeHTML, htmlStream, "#f{0:d} {{ {1:t} }}\n", i, fontDefn->fontSpec);
367  fontScales[i] = fontDefn->scale;
368  fontDefn->used = gTrue;
369  }
370  pr(writeHTML, htmlStream, "</style>\n");
371  pr(writeHTML, htmlStream, "</head>\n");
372  if (primaryDir >= 0) {
373  pr(writeHTML, htmlStream, "<body>\n");
374  } else {
375  pr(writeHTML, htmlStream, "<body dir=\"rtl\">\n");
376  }
377  if (primaryDir >= 0) {
378  pf(writeHTML, htmlStream, "<img id=\"background\" style=\"position:absolute; left:0px; top:0px;\" width=\"{0:d}\" height=\"{1:d}\" src=\"{2:s}\">\n",
379  (int)(pageW * zoom), (int)(pageH * zoom), pngURL);
380  } else {
381  pf(writeHTML, htmlStream, "<img id=\"background\" style=\"position:absolute; right:0px; top:0px;\" width=\"{0:d}\" height=\"{1:d}\" src=\"{2:s}\">\n",
382  (int)(pageW * zoom), (int)(pageH * zoom), pngURL);
383  }
384 
385  // generate the HTML text
386  cols = text->makeColumns();
387  for (colIdx = 0; colIdx < cols->getLength(); ++colIdx) {
388  col = (TextColumn *)cols->get(colIdx);
389  pars = col->getParagraphs();
390  for (parIdx = 0; parIdx < pars->getLength(); ++parIdx) {
391  par = (TextParagraph *)pars->get(parIdx);
392  lines = par->getLines();
393  for (lineIdx = 0; lineIdx < lines->getLength(); ++lineIdx) {
394  line = (TextLine *)lines->get(lineIdx);
395  if (line->getRotation() != 0) {
396  continue;
397  }
398  words = line->getWords();
399  if (lineIdx == 0 && par->hasDropCap() && words->getLength() >= 2) {
400  base = ((TextWord *)words->get(1))->getBaseline();
401  } else {
402  base = line->getBaseline();
403  }
404  s = new GString();
405  for (firstWordIdx = (primaryDir >= 0) ? 0 : words->getLength() - 1;
406  (primaryDir >= 0) ? firstWordIdx < words->getLength()
407  : firstWordIdx >= 0;
408  firstWordIdx = lastWordIdx + primaryDir) {
409  lastWordIdx = findDirSpan(words, firstWordIdx,
410  primaryDir, &spanDir);
411  appendSpans(words, firstWordIdx, lastWordIdx,
412  primaryDir, spanDir,
413  base, lineIdx == 0 && par->hasDropCap(),
414  s);
415  }
416  if (primaryDir >= 0) {
417  pf(writeHTML, htmlStream, "<div class=\"txt\" style=\"position:absolute; left:{0:d}px; top:{1:d}px;\">{2:t}</div>\n",
418  (int)(line->getXMin() * zoom),
419  (int)(line->getYMin() * zoom), s);
420  } else {
421  pf(writeHTML, htmlStream, "<div class=\"txt\" style=\"position:absolute; right:{0:d}px; top:{1:d}px;\">{2:t}</div>\n",
422  (int)((pageW - line->getXMax()) * zoom),
423  (int)(line->getYMin() * zoom), s);
424  }
425  delete s;
426  }
427  }
428  }
429  gfree(fontScales);
430  delete text;
432 
433  // HTML trailer
434  pr(writeHTML, htmlStream, "</body>\n");
435  pr(writeHTML, htmlStream, "</html>\n");
436 
437  return errNone;
438 }
439 
440 // Find a sequence of words, starting at <firstWordIdx>, that have the
441 // same writing direction. Returns the index of the last word, and
442 // sets *<spanDir> to the span direction.
443 int HTMLGen::findDirSpan(GList *words, int firstWordIdx, int primaryDir,
444  int *spanDir) {
445  int dir0, dir1, nextWordIdx;
446 
447  dir0 = ((TextWord *)words->get(firstWordIdx))->getDirection();
448  for (nextWordIdx = firstWordIdx + primaryDir;
449  (primaryDir >= 0) ? nextWordIdx < words->getLength()
450  : nextWordIdx >= 0;
451  nextWordIdx += primaryDir) {
452  dir1 = ((TextWord *)words->get(nextWordIdx))->getDirection();
453  if (dir0 == 0) {
454  dir0 = dir1;
455  } else if (dir1 != 0 && dir1 != dir0) {
456  break;
457  }
458  }
459 
460  if (dir0 == 0) {
461  *spanDir = primaryDir;
462  } else {
463  *spanDir = dir0;
464  }
465 
466  return nextWordIdx - primaryDir;
467 }
468 
469 // Create HTML spans for words <firstWordIdx> .. <lastWordIdx>, and
470 // append them to <s>.
471 void HTMLGen::appendSpans(GList *words, int firstWordIdx, int lastWordIdx,
472  int primaryDir, int spanDir,
473  double base, GBool dropCapLine, GString *s) {
474  TextWord *word0, *word1;
475  VerticalAlignment vertAlign0, vertAlign1;
476  const char *dirTag;
477  Unicode u;
478  GBool invisible, sp;
479  double r0, g0, b0, r1, g1, b1;
480  double base1;
481  int wordIdx, t, i;
482 
483  if (spanDir != primaryDir) {
484  t = firstWordIdx;
485  firstWordIdx = lastWordIdx;
486  lastWordIdx = t;
487  }
488 
489  word0 = NULL;
490  vertAlign0 = vertAlignBaseline; // make gcc happy
491  r0 = g0 = b0 = 0; // make gcc happy
492  for (wordIdx = firstWordIdx;
493  (spanDir >= 0) ? wordIdx <= lastWordIdx : wordIdx >= lastWordIdx;
494  wordIdx += spanDir) {
495  word1 = (TextWord *)words->get(wordIdx);
496  invisible = allTextInvisible || word1->isInvisible() || word1->isRotated();
497  if (!drawInvisibleText && invisible) {
498  continue;
499  }
500  word1->getColor(&r1, &g1, &b1);
501  base1 = word1->getBaseline();
502  if (dropCapLine) {
503  //~ this will fail if there are subscripts or superscripts in
504  //~ the first line of a paragraph with a drop cap
505  vertAlign1 = vertAlignTop;
506  } else if (base1 - base < -1) {
507  vertAlign1 = vertAlignSuper;
508  } else if (base1 - base > 1) {
509  vertAlign1 = vertAlignSub;
510  } else {
511  vertAlign1 = vertAlignBaseline;
512  }
513  if (!word0 ||
514  word1->getFontInfo() != word0->getFontInfo() ||
515  word1->getFontSize() != word0->getFontSize() ||
516  word1->isInvisible() != word0->isInvisible() ||
517  word1->isRotated() != word0->isRotated() ||
518  vertAlign1 != vertAlign0 ||
519  r1 != r0 || g1 != g0 || b1 != b0) {
520  if (word0) {
521  s->append("</span>");
522  }
523  for (i = 0; i < fonts->getLength(); ++i) {
524  if (word1->getFontInfo() == (TextFontInfo *)fonts->get(i)) {
525  break;
526  }
527  }
528  // we force spans to be LTR or RTL; this is a kludge, but it's
529  // far easier than implementing the full Unicode bidi algorithm
530  if (spanDir == primaryDir) {
531  dirTag = "";
532  } else if (spanDir < 0) {
533  dirTag = " dir=\"rtl\"";
534  } else {
535  dirTag = " dir=\"ltr\"";
536  }
537  s->appendf("<span id=\"f{0:d}\"{1:s} style=\"font-size:{2:d}px;vertical-align:{3:s};{4:s}color:rgba({5:d},{6:d},{7:d},{8:d});\">",
538  i,
539  dirTag,
540  (int)(fontScales[i] * word1->getFontSize() * zoom),
541  vertAlignNames[vertAlign1],
542  (dropCapLine && wordIdx == 0) ? "line-height:75%;" : "",
543  (int)(r1 * 255), (int)(g1 * 255), (int)(b1 * 255),
544  invisible ? 0 : 1);
545  }
546 
547  // add a space before the word, if needed
548  // -- this only happens with the first word in a reverse section
549  if (spanDir != primaryDir && wordIdx == firstWordIdx) {
550  if (spanDir >= 0) {
551  if (wordIdx > 0) {
552  sp = ((TextWord *)words->get(wordIdx - 1))->getSpaceAfter();
553  } else {
554  sp = gFalse;
555  }
556  } else {
557  sp = word1->getSpaceAfter();
558  }
559  if (sp) {
560  s->append(' ');
561  }
562  }
563 
564  for (i = (spanDir >= 0) ? 0 : word1->getLength() - 1;
565  (spanDir >= 0) ? i < word1->getLength() : i >= 0;
566  i += spanDir) {
567  u = word1->getChar(i);
568  if (u >= privateUnicodeMapStart &&
569  u <= privateUnicodeMapEnd &&
572  }
573  appendUTF8(u, s);
574  }
575 
576  // add a space after the word, if needed
577  // -- there is never a space after the last word in a reverse
578  // section (this will be handled as a space after the last word
579  // in the previous primary-direction section)
580  if (spanDir != primaryDir && wordIdx == lastWordIdx) {
581  sp = gFalse;
582  } else if (spanDir >= 0) {
583  sp = word1->getSpaceAfter();
584  } else {
585  if (wordIdx > 0) {
586  sp = ((TextWord *)words->get(wordIdx - 1))->getSpaceAfter();
587  } else {
588  sp = gFalse;
589  }
590  }
591  if (sp) {
592  s->append(' ');
593  }
594 
595  word0 = word1;
596  vertAlign0 = vertAlign1;
597  r0 = r1;
598  g0 = g1;
599  b0 = b1;
600  }
601  s->append("</span>");
602 }
603 
605  if (u <= 0x7f) {
606  if (u == '&') {
607  s->append("&amp;");
608  } else if (u == '<') {
609  s->append("&lt;");
610  } else if (u == '>') {
611  s->append("&gt;");
612  } else {
613  s->append((char)u);
614  }
615  } else if (u <= 0x7ff) {
616  s->append((char)(0xc0 + (u >> 6)));
617  s->append((char)(0x80 + (u & 0x3f)));
618  } else if (u <= 0xffff) {
619  s->append((char)(0xe0 + (u >> 12)));
620  s->append((char)(0x80 + ((u >> 6) & 0x3f)));
621  s->append((char)(0x80 + (u & 0x3f)));
622  } else if (u <= 0x1fffff) {
623  s->append((char)(0xf0 + (u >> 18)));
624  s->append((char)(0x80 + ((u >> 12) & 0x3f)));
625  s->append((char)(0x80 + ((u >> 6) & 0x3f)));
626  s->append((char)(0x80 + (u & 0x3f)));
627  } else if (u <= 0x3ffffff) {
628  s->append((char)(0xf8 + (u >> 24)));
629  s->append((char)(0x80 + ((u >> 18) & 0x3f)));
630  s->append((char)(0x80 + ((u >> 12) & 0x3f)));
631  s->append((char)(0x80 + ((u >> 6) & 0x3f)));
632  s->append((char)(0x80 + (u & 0x3f)));
633  } else if (u <= 0x7fffffff) {
634  s->append((char)(0xfc + (u >> 30)));
635  s->append((char)(0x80 + ((u >> 24) & 0x3f)));
636  s->append((char)(0x80 + ((u >> 18) & 0x3f)));
637  s->append((char)(0x80 + ((u >> 12) & 0x3f)));
638  s->append((char)(0x80 + ((u >> 6) & 0x3f)));
639  s->append((char)(0x80 + (u & 0x3f)));
640  }
641 }
642 
644  const char *htmlDir) {
645  Ref id;
646  HTMLGenFontDefn *fontDefn;
647  int i;
648 
649  // check the existing font defns
650  id = font->getFontID();
651  if (id.num >= 0) {
652  for (i = 0; i < fontDefns->getLength(); ++i) {
653  fontDefn = (HTMLGenFontDefn *)fontDefns->get(i);
654  if (fontDefn->match(id)) {
655  return fontDefn;
656  }
657  }
658  }
659 
660  // try to extract a font file
661  if (!extractFontFiles ||
662  !(fontDefn = getFontFile(font, htmlDir))) {
663 
664  // get a substitute font
665  fontDefn = getSubstituteFont(font);
666  }
667 
668  fontDefns->append(fontDefn);
669  return fontDefn;
670 }
671 
673  const char *htmlDir) {
674  Ref id;
675  HTMLGenFontDefn *fontDefn;
676  Object fontObj;
677  GfxFont *gfxFont;
678  WebFont *webFont;
679  GString *fontFile, *fontPath, *fontFace, *fontSpec;
680  const char *family, *weight, *style;
681  double scale;
682 
683  id = font->getFontID();
684  if (id.num < 0) {
685  return NULL;
686  }
687 
688  doc->getXRef()->fetch(id.num, id.gen, &fontObj);
689  if (!fontObj.isDict()) {
690  fontObj.free();
691  return NULL;
692  }
693 
694  gfxFont = GfxFont::makeFont(doc->getXRef(), "F", id, fontObj.getDict());
695  webFont = new WebFont(gfxFont, doc->getXRef());
696  fontDefn = NULL;
697 
698  if (webFont->canWriteTTF()) {
699  fontFile = GString::format("{0:d}.ttf", nextFontFaceIdx);
700  fontPath = GString::format("{0:s}/{1:t}", htmlDir, fontFile);
701  if (webFont->writeTTF(fontPath->getCString())) {
702  fontFace = GString::format("@font-face {{ font-family: ff{0:d}; src: url(\"{1:t}\"); }}\n",
703  nextFontFaceIdx, fontFile);
704  getFontDetails(font, &family, &weight, &style, &scale);
705  fontSpec = GString::format("font-family:ff{0:d},{1:s}; font-weight:{2:s}; font-style:{3:s};",
706  nextFontFaceIdx, family, weight, style);
707  ++nextFontFaceIdx;
708  fontDefn = new HTMLGenFontDefn(id, fontFace, fontSpec, 1.0);
709  }
710  delete fontPath;
711  delete fontFile;
712 
713  } else if (webFont->canWriteOTF()) {
714  fontFile = GString::format("{0:d}.otf", nextFontFaceIdx);
715  fontPath = GString::format("{0:s}/{1:t}", htmlDir, fontFile);
716  if (webFont->writeOTF(fontPath->getCString())) {
717  fontFace = GString::format("@font-face {{ font-family: ff{0:d}; src: url(\"{1:t}\"); }}\n",
718  nextFontFaceIdx, fontFile);
719  getFontDetails(font, &family, &weight, &style, &scale);
720  fontSpec = GString::format("font-family:ff{0:d},{1:s}; font-weight:{2:s}; font-style:{3:s};",
721  nextFontFaceIdx, family, weight, style);
722  ++nextFontFaceIdx;
723  fontDefn = new HTMLGenFontDefn(id, fontFace, fontSpec, 1.0);
724  }
725  delete fontPath;
726  delete fontFile;
727  }
728 
729  delete webFont;
730  delete gfxFont;
731  fontObj.free();
732 
733  return fontDefn;
734 }
735 
737  const char *family, *weight, *style;
738  double scale;
739  GString *fontSpec;
740 
741  getFontDetails(font, &family, &weight, &style, &scale);
742  fontSpec = GString::format("font-family:{0:s}; font-weight:{1:s}; font-style:{2:s};",
743  family, weight, style);
744  return new HTMLGenFontDefn(font->getFontID(), NULL, fontSpec, scale);
745 }
746 
748  const char **weight, const char **style,
749  double *scale) {
750  GString *fontName;
751  char *fontName2;
752  FontStyleTagInfo *fst;
754  GBool fixedWidth, serif, bold, italic;
755  double s;
756  int n, i;
757 
758  // get the font name, remove any subset tag
759  fontName = font->getFontName();
760  if (fontName) {
761  fontName2 = fontName->getCString();
762  n = fontName->getLength();
763  for (i = 0; i < n && i < 7; ++i) {
764  if (fontName2[i] < 'A' || fontName2[i] > 'Z') {
765  break;
766  }
767  }
768  if (i == 6 && n > 7 && fontName2[6] == '+') {
769  fontName2 += 7;
770  n -= 7;
771  }
772  } else {
773  fontName2 = NULL;
774  n = 0;
775  }
776 
777  // get the style info from the font descriptor flags
778  fixedWidth = font->isFixedWidth();
779  serif = font->isSerif();
780  bold = font->isBold();
781  italic = font->isItalic();
782 
783  if (fontName2) {
784 
785  // look for a style tag at the end of the font name -- this
786  // overrides the font descriptor bold/italic flags
787  for (fst = fontStyleTags; fst->tag; ++fst) {
788  if (n > fst->tagLen &&
789  !strcasecmp(fontName2 + n - fst->tagLen, fst->tag)) {
790  bold = fst->bold;
791  italic = fst->italic;
792  n -= fst->tagLen;
793  if (n > 1 && (fontName2[n-1] == '-' ||
794  fontName2[n-1] == ',' ||
795  fontName2[n-1] == '.' ||
796  fontName2[n-1] == '_')) {
797  --n;
798  }
799  break;
800  }
801  }
802 
803  // look for a known font name -- this overrides the font descriptor
804  // fixedWidth/serif flags
805  for (sf = standardFonts; sf->name; ++sf) {
806  if (!strncasecmp(fontName2, sf->name, n)) {
807  fixedWidth = sf->fixedWidth;
808  serif = sf->serif;
809  break;
810  }
811  }
812  }
813 
814  // compute the scaling factor
815  *scale = 1;
816  if ((s = font->getMWidth())) {
817  i = (fixedWidth ? 8 : serif ? 4 : 0) + (bold ? 2 : 0) + (italic ? 1 : 0);
818  if (s < substFonts[i].mWidth) {
819  *scale = s / substFonts[i].mWidth;
820  }
821  }
822 
823  *family = fixedWidth ? "monospace" : serif ? "serif" : "sans-serif";
824  *weight = bold ? "bold" : "normal";
825  *style = italic ? "italic" : "normal";
826 }
#define deleteGList(list, T)
Definition: GList.h:94
static const char * vertAlignNames[]
Definition: HTMLGen.cc:170
static void pngWriteFunc(png_structp png, png_bytep data, png_size_t size)
Definition: HTMLGen.cc:273
static int privateUnicodeMap[0xf7ff - 0xf6f9+1]
Definition: HTMLGen.cc:127
static FontStyleTagInfo fontStyleTags[]
Definition: HTMLGen.cc:60
static int pf(int(*writeFunc)(void *stream, const char *data, int size), void *stream, const char *fmt,...)
Definition: HTMLGen.cc:254
#define privateUnicodeMapEnd
Definition: HTMLGen.cc:125
#define privateUnicodeMapStart
Definition: HTMLGen.cc:124
static StandardFontInfo standardFonts[]
Definition: HTMLGen.cc:87
VerticalAlignment
Definition: HTMLGen.cc:163
@ vertAlignSuper
Definition: HTMLGen.cc:166
@ vertAlignBaseline
Definition: HTMLGen.cc:164
@ vertAlignSub
Definition: HTMLGen.cc:165
@ vertAlignTop
Definition: HTMLGen.cc:167
static int pr(int(*writeFunc)(void *stream, const char *data, int size), void *stream, const char *data)
Definition: HTMLGen.cc:249
static SubstFontInfo substFonts[16]
Definition: HTMLGen.cc:107
long * italic
Definition: afm2tfm.c:1034
#define font
Definition: aptex-macros.h:175
#define text(a)
Definition: aptex-macros.h:925
Definition: GList.h:24
int getLength()
Definition: GList.h:39
void append(void *p)
Definition: GList.cc:53
void * get(int i)
Definition: GList.h:48
static GString * format(const char *fmt,...)
Definition: GString.cc:189
char * getCString()
Definition: GString.h:83
int getLength()
Definition: GString.h:80
static GString * formatv(const char *fmt, va_list argList)
Definition: GString.cc:200
static GfxFont * makeFont(XRef *xref, const char *tagA, Ref idA, Dict *fontDict)
Definition: GfxFont.cc:167
HTMLGenFontDefn(Ref fontIDA, GString *fontFaceA, GString *fontSpecA, double scaleA)
Definition: HTMLGen.cc:182
double scale
Definition: HTMLGen.cc:193
GBool match(Ref fontIDA)
Definition: HTMLGen.cc:187
GString * fontSpec
Definition: HTMLGen.cc:192
GString * fontFace
Definition: HTMLGen.cc:191
GBool allTextInvisible
Definition: HTMLGen.h:78
SplashOutputDev * splashOut
Definition: HTMLGen.h:83
TextOutputDev * textOut
Definition: HTMLGen.h:82
double zoom
Definition: HTMLGen.h:76
double * fontScales
Definition: HTMLGen.h:86
void getFontDetails(TextFontInfo *font, const char **family, const char **weight, const char **style, double *scale)
Definition: HTMLGen.cc:747
int findDirSpan(GList *words, int firstWordIdx, int primaryDir, int *spanDir)
Definition: HTMLGen.cc:443
HTMLGenFontDefn * getFontFile(TextFontInfo *font, const char *htmlDir)
Definition: HTMLGen.cc:672
GBool drawInvisibleText
Definition: HTMLGen.h:77
PDFDoc * doc
Definition: HTMLGen.h:81
void appendSpans(GList *words, int firstWordIdx, int lastWordIdx, int primaryDir, int spanDir, double base, GBool dropCapLine, GString *s)
Definition: HTMLGen.cc:471
HTMLGenFontDefn * getSubstituteFont(TextFontInfo *font)
Definition: HTMLGen.cc:736
GBool ok
Definition: HTMLGen.h:91
GBool extractFontFiles
Definition: HTMLGen.h:79
HTMLGen(double backgroundResolutionA)
Definition: HTMLGen.cc:202
~HTMLGen()
Definition: HTMLGen.cc:230
GList * fontDefns
Definition: HTMLGen.h:88
HTMLGenFontDefn * getFontDefn(TextFontInfo *font, const char *htmlDir)
Definition: HTMLGen.cc:643
void appendUTF8(Unicode u, GString *s)
Definition: HTMLGen.cc:604
int nextFontFaceIdx
Definition: HTMLGen.h:89
GList * fonts
Definition: HTMLGen.h:85
int convertPage(int pg, const char *pngURL, const char *htmlDir, int(*writeHTML)(void *stream, const char *data, int size), void *htmlStream, int(*writePNG)(void *stream, const char *data, int size), void *pngStream)
Definition: HTMLGen.cc:280
void startDoc(PDFDoc *docA)
Definition: HTMLGen.cc:238
double backgroundResolution
Definition: HTMLGen.h:75
Definition: Object.h:84
GBool isDict()
Definition: Object.h:137
void free()
Definition: Object.cc:119
Dict * getDict()
Definition: Object.h:161
Definition: PDFDoc.h:38
void processLinks(OutputDev *out, int page)
Definition: PDFDoc.cc:488
double getPageCropWidth(int page)
Definition: PDFDoc.h:85
double getPageCropHeight(int page)
Definition: PDFDoc.h:87
int getPageRotate(int page)
Definition: PDFDoc.h:89
XRef * getXRef()
Definition: PDFDoc.h:72
void displayPage(OutputDev *out, int page, double hDPI, double vDPI, int rotate, GBool useMediaBox, GBool crop, GBool printing, GBool(*abortCheckCbk)(void *data)=NULL, void *abortCheckCbkData=NULL)
Definition: PDFDoc.cc:444
void setSkipText(GBool skipHorizTextA, GBool skipRotatedTextA)
void startDoc(XRef *xrefA)
SplashBitmap * getBitmap()
TextOutputMode mode
Definition: TextOutputDev.h:64
virtual GBool isOk()
TextPage * takeText()
GList * getLines()
GBool hasDropCap()
void getColor(double *r, double *g, double *b)
double getBaseline()
Unicode getChar(int idx)
double getFontSize()
int getLength()
GBool isInvisible()
TextFontInfo * getFontInfo()
GBool getSpaceAfter()
GBool isRotated()
GBool writeTTF(const char *fontFilePath)
Definition: WebFont.cc:74
GBool canWriteOTF()
Definition: WebFont.cc:66
GBool writeOTF(const char *fontFilePath)
Definition: WebFont.cc:114
GBool canWriteTTF()
Definition: WebFont.cc:62
Object * fetch(int num, int gen, Object *obj, int recursion=0)
Definition: XRef.cc:1155
#define n
Definition: t4ht.c:1290
#define gfree(p)
Definition: dt2dv.c:326
static int id
Definition: bifont.c:66
#define info
Definition: dviinfo.c:42
struct rect data
Definition: dvipdfm.c:64
#define s
Definition: afcover.h:80
#define t
Definition: afcover.h:96
#define r1
#define r0
void * gmallocn(int nObjs, int objSize)
Definition: gmem.cc:204
int base
Definition: gsftopk.c:1502
int col
Definition: gsftopk.c:443
#define gFalse
Definition: gtypes.h:18
int GBool
Definition: gtypes.h:16
#define gTrue
Definition: gtypes.h:17
unsigned char Guchar
Definition: gtypes.h:24
string family
#define NULL
Definition: ftobjs.h:61
small capitals from c petite p scientific f u
Definition: afcover.h:88
small capitals from c petite p
Definition: afcover.h:72
small capitals from c petite p scientific i
Definition: afcover.h:80
Arabic default style
Definition: afstyles.h:94
kerning y
Definition: ttdriver.c:212
png_infop png_create_info_struct(png_const_structrp png_ptr)
Definition: png.c:354
png_voidp() png_get_progressive_ptr(png_const_structrp png_ptr)
Definition: pngpread.c:1089
void() png_write_row(png_structrp png_ptr, png_const_bytep row)
Definition: pngwrite.c:698
#define PNG_COLOR_TYPE_RGB
Definition: png.h:670
#define PNG_COMPRESSION_TYPE_DEFAULT
Definition: png.h:679
void() png_destroy_write_struct(png_structpp png_ptr_ptr, png_infopp info_ptr_ptr)
Definition: pngwrite.c:979
void() png_write_end(png_structrp png_ptr, png_inforp info_ptr)
Definition: pngwrite.c:358
png_structp() png_create_write_struct(png_const_charp user_png_ver, png_voidp error_ptr, png_error_ptr error_fn, png_error_ptr warn_fn)
Definition: pngwrite.c:499
#define png_jmpbuf(png_ptr)
Definition: png.h:952
void() png_set_write_fn(png_structrp png_ptr, png_voidp io_ptr, png_rw_ptr write_data_fn, png_flush_ptr output_flush_fn)
Definition: pngwio.c:122
void() png_write_info(png_structrp png_ptr, png_const_inforp info_ptr)
Definition: pngwrite.c:192
void() png_set_IHDR(png_const_structrp png_ptr, png_inforp info_ptr, png_uint_32 width, png_uint_32 height, int bit_depth, int color_type, int interlace_method, int compression_method, int filter_method)
Definition: pngset.c:254
#define PNG_LIBPNG_VER_STRING
Definition: png.h:281
#define PNG_FILTER_TYPE_DEFAULT
Definition: png.h:684
#define PNG_INTERLACE_NONE
Definition: png.h:687
png_byte * png_bytep
Definition: pngconf.h:579
size_t png_size_t
Definition: pngconf.h:523
@ splashModeRGB8
Definition: SplashTypes.h:40
Guchar SplashColor[3]
Definition: SplashTypes.h:63
#define errNone
Definition: ErrorCodes.h:12
#define errFileIO
Definition: ErrorCodes.h:34
@ textOutReadingOrder
Definition: TextOutputDev.h:42
int num
Definition: disdvi.c:621
#define strcasecmp
Definition: c-auto.h:150
#define strncasecmp
Definition: win32lib.h:115
static int ret
Definition: convert.c:72
int lines
Definition: var.h:5
#define b0
Definition: texmfmem.h:168
#define b1
Definition: texmfmem.h:169
char args[100]
Definition: fixwrites.c:7
int getLength(char *s)
Definition: lengths.c:99
-arabic-joining-list
static int cols
Definition: pbmmask.c:21
char * bitmap
Definition: pbmpage.c:35
static bool png
Definition: pdftocairo.cc:89
double scale
Definition: pnmhistmap.c:38
static int size
Definition: ppmlabel.c:24
char line[1024]
Definition: process_score.c:29
char const * words[127]
Definition: ps_tiny.c:68
struct stream_s stream
Definition: pts_fax.h:93
const char * tag
Definition: HTMLGen.cc:52
void * pngStream
Definition: HTMLGen.cc:270
int(* writePNG)(void *stream, const char *data, int size)
Definition: HTMLGen.cc:269
Definition: Object.h:37
int gen
Definition: Object.h:39
int num
Definition: Object.h:38
const char * name
Definition: HTMLGen.cc:82
GBool fixedWidth
Definition: HTMLGen.cc:83
double mWidth
Definition: HTMLGen.cc:103
Definition: usprintf.c:39
Definition: gf2pbm.c:137
Definition: pbmfont.h:11
Definition: bdf.c:133
Definition: tfmaux.c:31
Definition: dvips.h:235
*job_name strlen((char *) job_name) - 4)
char fmt[256]
Definition: tex4ht.c:3925
return() int(((double) *(font_tbl[cur_fnt].wtbl+(int)(*(font_tbl[cur_fnt].char_wi+(int)(ch - font_tbl[cur_fnt].char_f)% 256)))/(double)(1L<< 20)) *(double) font_tbl[cur_fnt].scale)
#define sp
Definition: stack.c:11
#define va_start(pvar)
Definition: varargs.h:30
#define va_end(pvar)
Definition: varargs.h:38
char * va_list
Definition: varargs.h:22