poppler  0.47.0
About: Poppler is a PDF rendering library (based on the xpdf-3.0 code base).
  Fossies Dox: poppler-0.47.0.tar.gz  ("inofficial" and yet experimental doxygen-generated source code documentation)  

HtmlOutputDev.cc
Go to the documentation of this file.
1 //========================================================================
2 //
3 // HtmlOutputDev.cc
4 //
5 // Copyright 1997-2002 Glyph & Cog, LLC
6 //
7 // Changed 1999-2000 by G.Ovtcharov
8 //
9 // Changed 2002 by Mikhail Kruk
10 //
11 //========================================================================
12 
13 //========================================================================
14 //
15 // Modified under the Poppler project - http://poppler.freedesktop.org
16 //
17 // All changes made under the Poppler project to this file are licensed
18 // under GPL version 2 or later
19 //
20 // Copyright (C) 2005-2013 Albert Astals Cid <aacid@kde.org>
21 // Copyright (C) 2008 Kjartan Maraas <kmaraas@gnome.org>
22 // Copyright (C) 2008 Boris Toloknov <tlknv@yandex.ru>
23 // Copyright (C) 2008 Haruyuki Kawabe <Haruyuki.Kawabe@unisys.co.jp>
24 // Copyright (C) 2008 Tomas Are Haavet <tomasare@gmail.com>
25 // Copyright (C) 2009 Warren Toomey <wkt@tuhs.org>
26 // Copyright (C) 2009, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
27 // Copyright (C) 2009 Reece Dunn <msclrhd@gmail.com>
28 // Copyright (C) 2010, 2012, 2013 Adrian Johnson <ajohnson@redneon.com>
29 // Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
30 // Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in)
31 // Copyright (C) 2011 Joshua Richardson <jric@chegg.com>
32 // Copyright (C) 2011 Stephen Reichling <sreichling@chegg.com>
33 // Copyright (C) 2011, 2012 Igor Slepchin <igor.slepchin@gmail.com>
34 // Copyright (C) 2012 Ihar Filipau <thephilips@gmail.com>
35 // Copyright (C) 2012 Gerald Schmidt <solahcin@gmail.com>
36 // Copyright (C) 2012 Pino Toscano <pino@kde.org>
37 // Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de>
38 // Copyright (C) 2013 Julien Nabet <serval2412@yahoo.fr>
39 // Copyright (C) 2013 Johannes Brandst├Ątter <jbrandstaetter@gmail.com>
40 // Copyright (C) 2014 Fabio D'Urso <fabiodurso@hotmail.it>
41 // Copyright (C) 2016 Vincent Le Garrec <legarrec.vincent@gmail.com>
42 //
43 // To see a description of the changes please see the Changelog file that
44 // came with your tarball or type make ChangeLog if you are building from git
45 //
46 //========================================================================
47 
48 #ifdef __GNUC__
49 #pragma implementation
50 #endif
51 
52 #include "config.h"
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <stdarg.h>
56 #include <stddef.h>
57 #include <ctype.h>
58 #include <math.h>
59 #include <iostream>
60 #include "goo/GooString.h"
61 #include "goo/GooList.h"
62 #include "UnicodeMap.h"
63 #include "goo/gmem.h"
64 #include "Error.h"
65 #include "GfxState.h"
66 #include "Page.h"
67 #include "Annot.h"
68 #include "PNGWriter.h"
69 #include "GlobalParams.h"
70 #include "HtmlOutputDev.h"
71 #include "HtmlFonts.h"
72 #include "HtmlUtils.h"
73 #include "Outline.h"
74 #include "PDFDoc.h"
75 
76 #ifdef ENABLE_LIBPNG
77 #include <png.h>
78 #endif
79 
80 #define DEBUG __FILE__ << ": " << __LINE__ << ": DEBUG: "
81 
82 class HtmlImage
83 {
84 public:
85  HtmlImage(GooString *_fName, GfxState *state)
86  : fName(_fName) {
87  state->transform(0, 0, &xMin, &yMax);
88  state->transform(1, 1, &xMax, &yMin);
89  }
90  ~HtmlImage() { delete fName; }
91 
92  double xMin, xMax; // image x coordinates
93  double yMin, yMax; // image y coordinates
94  GooString *fName; // image file name
95 };
96 
97 // returns true if x is closer to y than x is to z
98 static inline bool IS_CLOSER(float x, float y, float z) { return fabs((x)-(y)) < fabs((x)-(z)); }
99 
100 extern GBool complexMode;
101 extern GBool singleHtml;
102 extern GBool ignore;
103 extern GBool printCommands;
104 extern GBool printHtml;
105 extern GBool noframes;
106 extern GBool stout;
107 extern GBool xml;
108 extern GBool showHidden;
109 extern GBool noMerge;
110 
111 extern double wordBreakThreshold;
112 
113 static GBool debug = gFalse;
114 static GooString *gstr_buff0 = NULL; // a workspace in which I format strings
115 
116 static GooString* basename(GooString* str){
117 
118  char *p=str->getCString();
119  int len=str->getLength();
120  for (int i=len-1;i>=0;i--)
121  if (*(p+i)==SLASH)
122  return new GooString((p+i+1),len-i-1);
123  return new GooString(str);
124 }
125 
126 #if 0
127 static GooString* Dirname(GooString* str){
128 
129  char *p=str->getCString();
130  int len=str->getLength();
131  for (int i=len-1;i>=0;i--)
132  if (*(p+i)==SLASH)
133  return new GooString(p,i+1);
134  return new GooString();
135 }
136 #endif
137 
138 static const char *print_matrix(const double *mat) {
139  delete gstr_buff0;
140 
141  gstr_buff0 = GooString::format("[{0:g} {1:g} {2:g} {3:g} {4:g} {5:g}]",
142  *mat, mat[1], mat[2], mat[3], mat[4], mat[5]);
143  return gstr_buff0->getCString();
144 }
145 
146 static const char *print_uni_str(const Unicode *u, const unsigned uLen) {
147  GooString *gstr_buff1 = NULL;
148 
149  delete gstr_buff0;
150 
151  if (!uLen) return "";
152  gstr_buff0 = GooString::format("{0:c}", (*u < 0x7F ? *u & 0xFF : '?'));
153  for (unsigned i = 1; i < uLen; i++) {
154  if (u[i] < 0x7F) {
155  gstr_buff1 = gstr_buff0->append(u[i] < 0x7F ? static_cast<char>(u[i]) & 0xFF : '?');
156  delete gstr_buff0;
157  gstr_buff0 = gstr_buff1;
158  }
159  }
160 
161  return gstr_buff0->getCString();
162 }
163 
164 //------------------------------------------------------------------------
165 // HtmlString
166 //------------------------------------------------------------------------
167 
168 HtmlString::HtmlString(GfxState *state, double fontSize, HtmlFontAccu* _fonts) : fonts(_fonts) {
169  GfxFont *font;
170  double x, y;
171 
172  state->transform(state->getCurX(), state->getCurY(), &x, &y);
173  if ((font = state->getFont())) {
174  double ascent = font->getAscent();
175  double descent = font->getDescent();
176  if( ascent > 1.05 ){
177  //printf( "ascent=%.15g is too high, descent=%.15g\n", ascent, descent );
178  ascent = 1.05;
179  }
180  if( descent < -0.4 ){
181  //printf( "descent %.15g is too low, ascent=%.15g\n", descent, ascent );
182  descent = -0.4;
183  }
184  yMin = y - ascent * fontSize;
185  yMax = y - descent * fontSize;
186  GfxRGB rgb;
187  state->getFillRGB(&rgb);
188  HtmlFont hfont=HtmlFont(font, static_cast<int>(fontSize-1), rgb);
189  if (isMatRotOrSkew(state->getTextMat())) {
190  double normalizedMatrix[4];
191  memcpy(normalizedMatrix, state->getTextMat(), sizeof(normalizedMatrix));
192  // browser rotates the opposite way
193  // so flip the sign of the angle -> sin() components change sign
194  if (debug)
195  std::cerr << DEBUG << "before transform: " << print_matrix(normalizedMatrix) << std::endl;
196  normalizedMatrix[1] *= -1;
197  normalizedMatrix[2] *= -1;
198  if (debug)
199  std::cerr << DEBUG << "after reflecting angle: " << print_matrix(normalizedMatrix) << std::endl;
200  normalizeRotMat(normalizedMatrix);
201  if (debug)
202  std::cerr << DEBUG << "after norm: " << print_matrix(normalizedMatrix) << std::endl;
203  hfont.setRotMat(normalizedMatrix);
204  }
205  fontpos = fonts->AddFont(hfont);
206  } else {
207  // this means that the PDF file draws text without a current font,
208  // which should never happen
209  yMin = y - 0.95 * fontSize;
210  yMax = y + 0.35 * fontSize;
211  fontpos=0;
212  }
213  if (yMin == yMax) {
214  // this is a sanity check for a case that shouldn't happen -- but
215  // if it does happen, we want to avoid dividing by zero later
216  yMin = y;
217  yMax = y + 1;
218  }
219  col = 0;
220  text = NULL;
221  xRight = NULL;
222  link = NULL;
223  len = size = 0;
224  yxNext = NULL;
225  xyNext = NULL;
226  htext=new GooString();
227  dir = textDirUnknown;
228 }
229 
230 
232  gfree(text);
233  delete htext;
234  gfree(xRight);
235 }
236 
237 void HtmlString::addChar(GfxState *state, double x, double y,
238  double dx, double dy, Unicode u) {
239  if (dir == textDirUnknown) {
240  //dir = UnicodeMap::getDirection(u);
241  dir = textDirLeftRight;
242  }
243 
244  if (len == size) {
245  size += 16;
246  text = (Unicode *)grealloc(text, size * sizeof(Unicode));
247  xRight = (double *)grealloc(xRight, size * sizeof(double));
248  }
249  text[len] = u;
250  if (len == 0) {
251  xMin = x;
252  }
253  xMax = xRight[len] = x + dx;
254 //printf("added char: %f %f xright = %f\n", x, dx, x+dx);
255  ++len;
256 }
257 
259 {
260  if( dir == textDirRightLeft && len > 1 )
261  {
262  //printf("will reverse!\n");
263  for (int i = 0; i < len / 2; i++)
264  {
265  Unicode ch = text[i];
266  text[i] = text[len - i - 1];
267  text[len - i - 1] = ch;
268  }
269  }
270 }
271 
272 //------------------------------------------------------------------------
273 // HtmlPage
274 //------------------------------------------------------------------------
275 
276 HtmlPage::HtmlPage(GBool rawOrder, char *imgExtVal) {
277  this->rawOrder = rawOrder;
278  curStr = NULL;
279  yxStrings = NULL;
280  xyStrings = NULL;
281  yxCur1 = yxCur2 = NULL;
282  fonts=new HtmlFontAccu();
283  links=new HtmlLinks();
284  imgList=new GooList();
285  pageWidth=0;
286  pageHeight=0;
287  fontsPageMarker = 0;
288  DocName=NULL;
289  firstPage = -1;
290  imgExt = new GooString(imgExtVal);
291 }
292 
294  clear();
295  delete DocName;
296  delete fonts;
297  delete links;
298  delete imgExt;
299  deleteGooList(imgList, HtmlImage);
300 }
301 
303  GfxFont *font;
304  double *fm;
305  char *name;
306  int code;
307  double w;
308 
309  // adjust the font size
310  fontSize = state->getTransformedFontSize();
311  if ((font = state->getFont()) && font->getType() == fontType3) {
312  // This is a hack which makes it possible to deal with some Type 3
313  // fonts. The problem is that it's impossible to know what the
314  // base coordinate system used in the font is without actually
315  // rendering the font. This code tries to guess by looking at the
316  // width of the character 'm' (which breaks if the font is a
317  // subset that doesn't contain 'm').
318  for (code = 0; code < 256; ++code) {
319  if ((name = ((Gfx8BitFont *)font)->getCharName(code)) &&
320  name[0] == 'm' && name[1] == '\0') {
321  break;
322  }
323  }
324  if (code < 256) {
325  w = ((Gfx8BitFont *)font)->getWidth(code);
326  if (w != 0) {
327  // 600 is a generic average 'm' width -- yes, this is a hack
328  fontSize *= w / 0.6;
329  }
330  }
331  fm = font->getFontMatrix();
332  if (fm[0] != 0) {
333  fontSize *= fabs(fm[3] / fm[0]);
334  }
335  }
336 }
337 
339  curStr = new HtmlString(state, fontSize, fonts);
340 }
341 
342 
344  HtmlString *tmp;
345 
346  int linkIndex = 0;
347  HtmlFont* h;
348  for(tmp=yxStrings;tmp;tmp=tmp->yxNext){
349  int pos=tmp->fontpos;
350  // printf("%d\n",pos);
351  h=fonts->Get(pos);
352 
353  if (tmp->htext) delete tmp->htext;
354  tmp->htext=HtmlFont::simple(h,tmp->text,tmp->len);
355 
356  if (links->inLink(tmp->xMin,tmp->yMin,tmp->xMax,tmp->yMax, linkIndex)){
357  tmp->link = links->getLink(linkIndex);
358  /*GooString *t=tmp->htext;
359  tmp->htext=links->getLink(k)->Link(tmp->htext);
360  delete t;*/
361  }
362  }
363 
364 }
365 
366 
367 void HtmlPage::addChar(GfxState *state, double x, double y,
368  double dx, double dy,
369  double ox, double oy, Unicode *u, int uLen) {
370  double x1, y1, w1, h1, dx2, dy2;
371  int n, i;
372  state->transform(x, y, &x1, &y1);
373  n = curStr->len;
374 
375  // check that new character is in the same direction as current string
376  // and is not too far away from it before adding
377  //if ((UnicodeMap::getDirection(u[0]) != curStr->dir) ||
378  // XXX
379  if (debug) {
380  double *text_mat = state->getTextMat();
381  // rotation is (cos q, sin q, -sin q, cos q, 0, 0)
382  // sin q is zero iff there is no rotation, or 180 deg. rotation;
383  // for 180 rotation, cos q will be negative
384  if (text_mat[0] < 0 || !is_within(text_mat[1], .1, 0)) {
385  std::cerr << DEBUG << "rotation matrix for \"" << print_uni_str(u, uLen) << '"' << std::endl;
386  std::cerr << "text " << print_matrix(state->getTextMat());
387  }
388  }
389  if (n > 0 && // don't start a new string, unless there is already a string
390  // TODO: the following line assumes that text is flowing left to
391  // right, which will not necessarily be the case, e.g. if rotated;
392  // It assesses whether or not two characters are close enough to
393  // be part of the same string
394  fabs(x1 - curStr->xRight[n-1]) > wordBreakThreshold * (curStr->yMax - curStr->yMin) &&
395  // rotation is (cos q, sin q, -sin q, cos q, 0, 0)
396  // sin q is zero iff there is no rotation, or 180 deg. rotation;
397  // for 180 rotation, cos q will be negative
398  !rot_matrices_equal(curStr->getFont().getRotMat(), state->getTextMat()))
399  {
400  endString();
401  beginString(state, NULL);
402  }
403  state->textTransformDelta(state->getCharSpace() * state->getHorizScaling(),
404  0, &dx2, &dy2);
405  dx -= dx2;
406  dy -= dy2;
407  state->transformDelta(dx, dy, &w1, &h1);
408  if (uLen != 0) {
409  w1 /= uLen;
410  h1 /= uLen;
411  }
412  for (i = 0; i < uLen; ++i) {
413  curStr->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u[i]);
414  }
415 }
416 
418  HtmlString *p1, *p2;
419  double h, y1, y2;
420 
421  // throw away zero-length strings -- they don't have valid xMin/xMax
422  // values, and they're useless anyway
423  if (curStr->len == 0) {
424  delete curStr;
425  curStr = NULL;
426  return;
427  }
428 
429  curStr->endString();
430 
431 #if 0 //~tmp
432  if (curStr->yMax - curStr->yMin > 20) {
433  delete curStr;
434  curStr = NULL;
435  return;
436  }
437 #endif
438 
439  // insert string in y-major list
440  h = curStr->yMax - curStr->yMin;
441  y1 = curStr->yMin + 0.5 * h;
442  y2 = curStr->yMin + 0.8 * h;
443  if (rawOrder) {
444  p1 = yxCur1;
445  p2 = NULL;
446  } else if ((!yxCur1 ||
447  (y1 >= yxCur1->yMin &&
448  (y2 >= yxCur1->yMax || curStr->xMax >= yxCur1->xMin))) &&
449  (!yxCur2 ||
450  (y1 < yxCur2->yMin ||
451  (y2 < yxCur2->yMax && curStr->xMax < yxCur2->xMin)))) {
452  p1 = yxCur1;
453  p2 = yxCur2;
454  } else {
455  for (p1 = NULL, p2 = yxStrings; p2; p1 = p2, p2 = p2->yxNext) {
456  if (y1 < p2->yMin || (y2 < p2->yMax && curStr->xMax < p2->xMin))
457  break;
458  }
459  yxCur2 = p2;
460  }
461  yxCur1 = curStr;
462  if (p1)
463  p1->yxNext = curStr;
464  else
465  yxStrings = curStr;
466  curStr->yxNext = p2;
467  curStr = NULL;
468 }
469 
470 static const char *strrstr( const char *s, const char *ss )
471 {
472  const char *p = strstr( s, ss );
473  for( const char *pp = p; pp != NULL; pp = strstr( p+1, ss ) ){
474  p = pp;
475  }
476  return p;
477 }
478 
479 static void CloseTags( GooString *htext, GBool &finish_a, GBool &finish_italic, GBool &finish_bold )
480 {
481  const char *last_italic = finish_italic && ( finish_bold || finish_a ) ? strrstr( htext->getCString(), "<i>" ) : NULL;
482  const char *last_bold = finish_bold && ( finish_italic || finish_a ) ? strrstr( htext->getCString(), "<b>" ) : NULL;
483  const char *last_a = finish_a && ( finish_italic || finish_bold ) ? strrstr( htext->getCString(), "<a " ) : NULL;
484  if( finish_a && ( finish_italic || finish_bold ) && last_a > ( last_italic > last_bold ? last_italic : last_bold ) ){
485  htext->append("</a>", 4);
486  finish_a = false;
487  }
488  if( finish_italic && finish_bold && last_italic > last_bold ){
489  htext->append("</i>", 4);
490  finish_italic = false;
491  }
492  if( finish_bold )
493  htext->append("</b>", 4);
494  if( finish_italic )
495  htext->append("</i>", 4);
496  if( finish_a )
497  htext->append("</a>");
498 }
499 
500 // Strings are lines of text;
501 // This function aims to combine strings into lines and paragraphs if !noMerge
502 // It may also strip out duplicate strings (if they are on top of each other); sometimes they are to create a font effect
504  HtmlString *str1, *str2;
505  HtmlFont *hfont1, *hfont2;
506  double space, horSpace, vertSpace, vertOverlap;
507  GBool addSpace, addLineBreak;
508  int n, i;
509  double curX, curY;
510 
511 #if 0 //~ for debugging
512  for (str1 = yxStrings; str1; str1 = str1->yxNext) {
513  printf("x=%f..%f y=%f..%f size=%2d '",
514  str1->xMin, str1->xMax, str1->yMin, str1->yMax,
515  (int)(str1->yMax - str1->yMin));
516  for (i = 0; i < str1->len; ++i) {
517  fputc(str1->text[i] & 0xff, stdout);
518  }
519  printf("'\n");
520  }
521  printf("\n------------------------------------------------------------\n\n");
522 #endif
523  str1 = yxStrings;
524 
525  if( !str1 ) return;
526 
527  //----- discard duplicated text (fake boldface, drop shadows)
528  if( !complexMode )
529  { /* if not in complex mode get rid of duplicate strings */
530  HtmlString *str3;
531  GBool found;
532  while (str1)
533  {
534  double size = str1->yMax - str1->yMin;
535  double xLimit = str1->xMin + size * 0.2;
536  found = gFalse;
537  for (str2 = str1, str3 = str1->yxNext;
538  str3 && str3->xMin < xLimit;
539  str2 = str3, str3 = str2->yxNext)
540  {
541  if (str3->len == str1->len &&
542  !memcmp(str3->text, str1->text, str1->len * sizeof(Unicode)) &&
543  fabs(str3->yMin - str1->yMin) < size * 0.2 &&
544  fabs(str3->yMax - str1->yMax) < size * 0.2 &&
545  fabs(str3->xMax - str1->xMax) < size * 0.2)
546  {
547  found = gTrue;
548  //printf("found duplicate!\n");
549  break;
550  }
551  }
552  if (found)
553  {
554  str2->xyNext = str3->xyNext;
555  str2->yxNext = str3->yxNext;
556  delete str3;
557  }
558  else
559  {
560  str1 = str1->yxNext;
561  }
562  }
563  } /*- !complexMode */
564 
565  str1 = yxStrings;
566 
567  hfont1 = getFont(str1);
568  if( hfont1->isBold() )
569  str1->htext->insert(0,"<b>",3);
570  if( hfont1->isItalic() )
571  str1->htext->insert(0,"<i>",3);
572  if( str1->getLink() != NULL ) {
573  GooString *ls = str1->getLink()->getLinkStart();
574  str1->htext->insert(0, ls);
575  delete ls;
576  }
577  curX = str1->xMin; curY = str1->yMin;
578 
579  while (str1 && (str2 = str1->yxNext)) {
580  hfont2 = getFont(str2);
581  space = str1->yMax - str1->yMin; // the height of the font's bounding box
582  horSpace = str2->xMin - str1->xMax;
583  // if strings line up on left-hand side AND they are on subsequent lines, we need a line break
584  addLineBreak = !noMerge && (fabs(str1->xMin - str2->xMin) < 0.4) && IS_CLOSER(str2->yMax, str1->yMax + space, str1->yMax);
585  vertSpace = str2->yMin - str1->yMax;
586 
587 //printf("coalesce %d %d %f? ", str1->dir, str2->dir, d);
588 
589  if (str2->yMin >= str1->yMin && str2->yMin <= str1->yMax)
590  {
591  vertOverlap = str1->yMax - str2->yMin;
592  } else
593  if (str2->yMax >= str1->yMin && str2->yMax <= str1->yMax)
594  {
595  vertOverlap = str2->yMax - str1->yMin;
596  } else
597  {
598  vertOverlap = 0;
599  }
600 
601  // Combine strings if:
602  // They appear to be the same font (complex mode only) && going in the same direction AND at least one of the following:
603  // 1. They appear to be part of the same line of text
604  // 2. They appear to be subsequent lines of a paragraph
605  // We assume (1) or (2) above, respectively, based on:
606  // (1) strings overlap vertically AND
607  // horizontal space between end of str1 and start of str2 is consistent with a single space or less;
608  // when rawOrder, the strings have to overlap vertically by at least 50%
609  // (2) Strings flow down the page, but the space between them is not too great, and they are lined up on the left
610  if (
611  (
612  (
613  (
614  (rawOrder && vertOverlap > 0.5 * space)
615  ||
616  (!rawOrder && str2->yMin < str1->yMax)
617  ) &&
618  (horSpace > -0.5 * space && horSpace < space)
619  ) ||
620  (vertSpace >= 0 && vertSpace < 0.5 * space && addLineBreak)
621  ) &&
622  (!complexMode || (hfont1->isEqualIgnoreBold(*hfont2))) && // in complex mode fonts must be the same, in other modes fonts do not metter
623  str1->dir == str2->dir // text direction the same
624  )
625  {
626 // printf("yes\n");
627  n = str1->len + str2->len;
628  if ((addSpace = horSpace > wordBreakThreshold * space)) {
629  ++n;
630  }
631  if (addLineBreak) {
632  ++n;
633  }
634 
635  str1->size = (n + 15) & ~15;
636  str1->text = (Unicode *)grealloc(str1->text,
637  str1->size * sizeof(Unicode));
638  str1->xRight = (double *)grealloc(str1->xRight,
639  str1->size * sizeof(double));
640  if (addSpace) {
641  str1->text[str1->len] = 0x20;
642  str1->htext->append(xml?" ":"&#160;");
643  str1->xRight[str1->len] = str2->xMin;
644  ++str1->len;
645  }
646  if (addLineBreak) {
647  str1->text[str1->len] = '\n';
648  str1->htext->append("<br/>");
649  str1->xRight[str1->len] = str2->xMin;
650  ++str1->len;
651  str1->yMin = str2->yMin;
652  str1->yMax = str2->yMax;
653  str1->xMax = str2->xMax;
654  int fontLineSize = hfont1->getLineSize();
655  int curLineSize = (int)(vertSpace + space);
656  if( curLineSize != fontLineSize )
657  {
658  HtmlFont *newfnt = new HtmlFont(*hfont1);
659  newfnt->setLineSize(curLineSize);
660  str1->fontpos = fonts->AddFont(*newfnt);
661  delete newfnt;
662  hfont1 = getFont(str1);
663  // we have to reget hfont2 because it's location could have
664  // changed on resize
665  hfont2 = getFont(str2);
666  }
667  }
668  for (i = 0; i < str2->len; ++i) {
669  str1->text[str1->len] = str2->text[i];
670  str1->xRight[str1->len] = str2->xRight[i];
671  ++str1->len;
672  }
673 
674  /* fix <i>, <b> if str1 and str2 differ and handle switch of links */
675  HtmlLink *hlink1 = str1->getLink();
676  HtmlLink *hlink2 = str2->getLink();
677  bool switch_links = !hlink1 || !hlink2 || !hlink1->isEqualDest(*hlink2);
678  GBool finish_a = switch_links && hlink1 != NULL;
679  GBool finish_italic = hfont1->isItalic() && ( !hfont2->isItalic() || finish_a );
680  GBool finish_bold = hfont1->isBold() && ( !hfont2->isBold() || finish_a || finish_italic );
681  CloseTags( str1->htext, finish_a, finish_italic, finish_bold );
682  if( switch_links && hlink2 != NULL ) {
683  GooString *ls = hlink2->getLinkStart();
684  str1->htext->append(ls);
685  delete ls;
686  }
687  if( ( !hfont1->isItalic() || finish_italic ) && hfont2->isItalic() )
688  str1->htext->append("<i>", 3);
689  if( ( !hfont1->isBold() || finish_bold ) && hfont2->isBold() )
690  str1->htext->append("<b>", 3);
691 
692 
693  str1->htext->append(str2->htext);
694  // str1 now contains href for link of str2 (if it is defined)
695  str1->link = str2->link;
696  hfont1 = hfont2;
697  if (str2->xMax > str1->xMax) {
698  str1->xMax = str2->xMax;
699  }
700  if (str2->yMax > str1->yMax) {
701  str1->yMax = str2->yMax;
702  }
703  str1->yxNext = str2->yxNext;
704  delete str2;
705  } else { // keep strings separate
706 // printf("no\n");
707  GBool finish_a = str1->getLink() != NULL;
708  GBool finish_bold = hfont1->isBold();
709  GBool finish_italic = hfont1->isItalic();
710  CloseTags( str1->htext, finish_a, finish_italic, finish_bold );
711 
712  str1->xMin = curX; str1->yMin = curY;
713  str1 = str2;
714  curX = str1->xMin; curY = str1->yMin;
715  hfont1 = hfont2;
716  if( hfont1->isBold() )
717  str1->htext->insert(0,"<b>",3);
718  if( hfont1->isItalic() )
719  str1->htext->insert(0,"<i>",3);
720  if( str1->getLink() != NULL ) {
721  GooString *ls = str1->getLink()->getLinkStart();
722  str1->htext->insert(0, ls);
723  delete ls;
724  }
725  }
726  }
727  str1->xMin = curX; str1->yMin = curY;
728 
729  GBool finish_bold = hfont1->isBold();
730  GBool finish_italic = hfont1->isItalic();
731  GBool finish_a = str1->getLink() != NULL;
732  CloseTags( str1->htext, finish_a, finish_italic, finish_bold );
733 
734 #if 0 //~ for debugging
735  for (str1 = yxStrings; str1; str1 = str1->yxNext) {
736  printf("x=%3d..%3d y=%3d..%3d size=%2d ",
737  (int)str1->xMin, (int)str1->xMax, (int)str1->yMin, (int)str1->yMax,
738  (int)(str1->yMax - str1->yMin));
739  printf("'%s'\n", str1->htext->getCString());
740  }
741  printf("\n------------------------------------------------------------\n\n");
742 #endif
743 
744 }
745 
746 void HtmlPage::dumpAsXML(FILE* f,int page){
747  fprintf(f, "<page number=\"%d\" position=\"absolute\"", page);
748  fprintf(f," top=\"0\" left=\"0\" height=\"%d\" width=\"%d\">\n", pageHeight,pageWidth);
749 
750  for(int i=fontsPageMarker;i < fonts->size();i++) {
751  GooString *fontCSStyle = fonts->CSStyle(i);
752  fprintf(f,"\t%s\n",fontCSStyle->getCString());
753  delete fontCSStyle;
754  }
755 
756  int listlen=imgList->getLength();
757  for (int i = 0; i < listlen; i++) {
758  HtmlImage *img = (HtmlImage*)imgList->del(0);
759  fprintf(f,"<image top=\"%d\" left=\"%d\" ",xoutRound(img->yMin),xoutRound(img->xMin));
760  fprintf(f,"width=\"%d\" height=\"%d\" ",xoutRound(img->xMax-img->xMin),xoutRound(img->yMax-img->yMin));
761  fprintf(f,"src=\"%s\"/>\n",img->fName->getCString());
762  delete img;
763  }
764 
765  for(HtmlString *tmp=yxStrings;tmp;tmp=tmp->yxNext){
766  if (tmp->htext){
767  fprintf(f,"<text top=\"%d\" left=\"%d\" ",xoutRound(tmp->yMin),xoutRound(tmp->xMin));
768  fprintf(f,"width=\"%d\" height=\"%d\" ",xoutRound(tmp->xMax-tmp->xMin),xoutRound(tmp->yMax-tmp->yMin));
769  fprintf(f,"font=\"%d\">", tmp->fontpos);
770  fputs(tmp->htext->getCString(),f);
771  fputs("</text>\n",f);
772  }
773  }
774  fputs("</page>\n",f);
775 }
776 
777 static void printCSS(FILE *f)
778 {
779  // Image flip/flop CSS
780  // Source:
781  // http://stackoverflow.com/questions/1309055/cross-browser-way-to-flip-html-image-via-javascript-css
782  // tested in Chrome, Fx (Linux) and IE9 (W7)
783  static const char css[] =
784  "<style type=\"text/css\">" "\n"
785  "<!--" "\n"
786  ".xflip {" "\n"
787  " -moz-transform: scaleX(-1);" "\n"
788  " -webkit-transform: scaleX(-1);" "\n"
789  " -o-transform: scaleX(-1);" "\n"
790  " transform: scaleX(-1);" "\n"
791  " filter: fliph;" "\n"
792  "}" "\n"
793  ".yflip {" "\n"
794  " -moz-transform: scaleY(-1);" "\n"
795  " -webkit-transform: scaleY(-1);" "\n"
796  " -o-transform: scaleY(-1);" "\n"
797  " transform: scaleY(-1);" "\n"
798  " filter: flipv;" "\n"
799  "}" "\n"
800  ".xyflip {" "\n"
801  " -moz-transform: scaleX(-1) scaleY(-1);" "\n"
802  " -webkit-transform: scaleX(-1) scaleY(-1);" "\n"
803  " -o-transform: scaleX(-1) scaleY(-1);" "\n"
804  " transform: scaleX(-1) scaleY(-1);" "\n"
805  " filter: fliph + flipv;" "\n"
806  "}" "\n"
807  "-->" "\n"
808  "</style>" "\n";
809 
810  fwrite( css, sizeof(css)-1, 1, f );
811 }
812 
813 int HtmlPage::dumpComplexHeaders(FILE * const file, FILE *& pageFile, int page) {
814  GooString* tmp;
815 
816  if( !noframes )
817  {
818  GooString* pgNum=GooString::fromInt(page);
819  tmp = new GooString(DocName);
820  if (!singleHtml){
821  tmp->append('-')->append(pgNum)->append(".html");
822  pageFile = fopen(tmp->getCString(), "w");
823  } else {
824  tmp->append("-html")->append(".html");
825  pageFile = fopen(tmp->getCString(), "a");
826  }
827  delete pgNum;
828  if (!pageFile) {
829  error(errIO, -1, "Couldn't open html file '{0:t}'", tmp);
830  delete tmp;
831  return 1;
832  }
833 
834  if (!singleHtml)
835  fprintf(pageFile,"%s\n<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title>Page %d</title>\n\n", DOCTYPE, page);
836  else
837  fprintf(pageFile,"%s\n<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title>%s</title>\n\n", DOCTYPE, tmp->getCString());
838 
839  delete tmp;
840 
841  GooString *htmlEncoding = HtmlOutputDev::mapEncodingToHtml(globalParams->getTextEncodingName());
842  if (!singleHtml)
843  fprintf(pageFile, "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding->getCString());
844  else
845  fprintf(pageFile, "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n <br/>\n", htmlEncoding->getCString());
846  delete htmlEncoding;
847  }
848  else
849  {
850  pageFile = file;
851  fprintf(pageFile,"<!-- Page %d -->\n", page);
852  fprintf(pageFile,"<a name=\"%d\"></a>\n", page);
853  }
854 
855  return 0;
856 }
857 
858 void HtmlPage::dumpComplex(FILE *file, int page){
859  FILE* pageFile;
860  GooString* tmp;
861 
862  if( firstPage == -1 ) firstPage = page;
863 
864  if (dumpComplexHeaders(file, pageFile, page)) { error(errIO, -1, "Couldn't write headers."); return; }
865 
866  tmp=basename(DocName);
867 
868  fputs("<style type=\"text/css\">\n<!--\n",pageFile);
869  fputs("\tp {margin: 0; padding: 0;}",pageFile);
870  for(int i=fontsPageMarker;i!=fonts->size();i++) {
871  GooString *fontCSStyle;
872  if (!singleHtml)
873  fontCSStyle = fonts->CSStyle(i);
874  else
875  fontCSStyle = fonts->CSStyle(i,page);
876  fprintf(pageFile,"\t%s\n",fontCSStyle->getCString());
877  delete fontCSStyle;
878  }
879 
880  fputs("-->\n</style>\n",pageFile);
881 
882  if( !noframes )
883  {
884  fputs("</head>\n<body bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n",pageFile);
885  }
886 
887  fprintf(pageFile,"<div id=\"page%d-div\" style=\"position:relative;width:%dpx;height:%dpx;\">\n",
888  page, pageWidth, pageHeight);
889 
890  if( !ignore )
891  {
892  fprintf(pageFile,
893  "<img width=\"%d\" height=\"%d\" src=\"%s%03d.%s\" alt=\"background image\"/>\n",
894  pageWidth, pageHeight, tmp->getCString(),
895  (page-firstPage+1), imgExt->getCString());
896  }
897 
898  delete tmp;
899 
900  for(HtmlString *tmp1=yxStrings;tmp1;tmp1=tmp1->yxNext){
901  if (tmp1->htext){
902  fprintf(pageFile,
903  "<p style=\"position:absolute;top:%dpx;left:%dpx;white-space:nowrap\" class=\"ft",
904  xoutRound(tmp1->yMin),
905  xoutRound(tmp1->xMin));
906  if (!singleHtml) {
907  fputc('0', pageFile);
908  } else {
909  fprintf(pageFile, "%d", page);
910  }
911  fprintf(pageFile,"%d\">", tmp1->fontpos);
912  fputs(tmp1->htext->getCString(), pageFile);
913  fputs("</p>\n", pageFile);
914  }
915  }
916 
917  fputs("</div>\n", pageFile);
918 
919  if( !noframes )
920  {
921  fputs("</body>\n</html>\n",pageFile);
922  fclose(pageFile);
923  }
924 }
925 
926 
927 void HtmlPage::dump(FILE *f, int pageNum)
928 {
929  if (complexMode || singleHtml)
930  {
931  if (xml) dumpAsXML(f, pageNum);
932  if (!xml) dumpComplex(f, pageNum);
933  }
934  else
935  {
936  fprintf(f,"<a name=%d></a>",pageNum);
937  // Loop over the list of image names on this page
938  int listlen=imgList->getLength();
939  for (int i = 0; i < listlen; i++) {
940  HtmlImage *img = (HtmlImage*)imgList->del(0);
941 
942  // see printCSS() for class names
943  const char *styles[4] = { "", " class=\"xflip\"", " class=\"yflip\"", " class=\"xyflip\"" };
944  int style_index=0;
945  if (img->xMin > img->xMax) style_index += 1; // xFlip
946  if (img->yMin > img->yMax) style_index += 2; // yFlip
947 
948  fprintf(f,"<img%s src=\"%s\"/><br/>\n",styles[style_index],img->fName->getCString());
949  delete img;
950  }
951 
952  GooString* str;
953  for(HtmlString *tmp=yxStrings;tmp;tmp=tmp->yxNext){
954  if (tmp->htext){
955  str=new GooString(tmp->htext);
956  fputs(str->getCString(),f);
957  delete str;
958  fputs("<br/>\n",f);
959  }
960  }
961  fputs("<hr/>\n",f);
962  }
963 }
964 
965 
966 
968  HtmlString *p1, *p2;
969 
970  if (curStr) {
971  delete curStr;
972  curStr = NULL;
973  }
974  for (p1 = yxStrings; p1; p1 = p2) {
975  p2 = p1->yxNext;
976  delete p1;
977  }
978  yxStrings = NULL;
979  xyStrings = NULL;
980  yxCur1 = yxCur2 = NULL;
981 
982  if( !noframes )
983  {
984  delete fonts;
985  fonts=new HtmlFontAccu();
986  fontsPageMarker = 0;
987  }
988  else
989  {
990  fontsPageMarker = fonts->size();
991  }
992 
993  delete links;
994  links=new HtmlLinks();
995 
996 
997 }
998 
999 void HtmlPage::setDocName(char *fname){
1000  DocName=new GooString(fname);
1001 }
1002 
1003 void HtmlPage::addImage(GooString *fname, GfxState *state) {
1004  HtmlImage *img = new HtmlImage(fname, state);
1005  imgList->append(img);
1006 }
1007 
1008 //------------------------------------------------------------------------
1009 // HtmlMetaVar
1010 //------------------------------------------------------------------------
1011 
1012 HtmlMetaVar::HtmlMetaVar(const char *_name, const char *_content)
1013 {
1014  name = new GooString(_name);
1015  content = new GooString(_content);
1016 }
1017 
1019 {
1020  delete name;
1021  delete content;
1022 }
1023 
1025 {
1026  GooString *result = new GooString("<meta name=\"");
1027  result->append(name);
1028  result->append("\" content=\"");
1029  result->append(content);
1030  result->append("\"/>");
1031  return result;
1032 }
1033 
1034 //------------------------------------------------------------------------
1035 // HtmlOutputDev
1036 //------------------------------------------------------------------------
1037 
1038 static const char* HtmlEncodings[][2] = {
1039  {"Latin1", "ISO-8859-1"},
1040  {NULL, NULL}
1041 };
1042 
1043 GooString* HtmlOutputDev::mapEncodingToHtml(GooString* encoding)
1044 {
1045  GooString* enc = encoding;
1046  for(int i = 0; HtmlEncodings[i][0] != NULL; i++)
1047  {
1048  if( enc->cmp(HtmlEncodings[i][0]) == 0 )
1049  {
1050  delete enc;
1051  return new GooString(HtmlEncodings[i][1]);
1052  }
1053  }
1054  return enc;
1055 }
1056 
1057 void HtmlOutputDev::doFrame(int firstPage){
1058  GooString* fName=new GooString(Docname);
1059  GooString* htmlEncoding;
1060  fName->append(".html");
1061 
1062  if (!(fContentsFrame = fopen(fName->getCString(), "w"))){
1063  error(errIO, -1, "Couldn't open html file '{0:t}'", fName);
1064  delete fName;
1065  return;
1066  }
1067 
1068  delete fName;
1069 
1070  fName=basename(Docname);
1071  fputs(DOCTYPE, fContentsFrame);
1072  fputs("\n<html>",fContentsFrame);
1073  fputs("\n<head>",fContentsFrame);
1074  fprintf(fContentsFrame,"\n<title>%s</title>",docTitle->getCString());
1075  htmlEncoding = mapEncodingToHtml(globalParams->getTextEncodingName());
1076  fprintf(fContentsFrame, "\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding->getCString());
1077  dumpMetaVars(fContentsFrame);
1078  fprintf(fContentsFrame, "</head>\n");
1079  fputs("<frameset cols=\"100,*\">\n",fContentsFrame);
1080  fprintf(fContentsFrame,"<frame name=\"links\" src=\"%s_ind.html\"/>\n",fName->getCString());
1081  fputs("<frame name=\"contents\" src=",fContentsFrame);
1082  if (complexMode)
1083  fprintf(fContentsFrame,"\"%s-%d.html\"",fName->getCString(), firstPage);
1084  else
1085  fprintf(fContentsFrame,"\"%ss.html\"",fName->getCString());
1086 
1087  fputs("/>\n</frameset>\n</html>\n",fContentsFrame);
1088 
1089  delete fName;
1090  delete htmlEncoding;
1091  fclose(fContentsFrame);
1092 }
1093 
1094 HtmlOutputDev::HtmlOutputDev(Catalog *catalogA, char *fileName, char *title,
1095  char *author, char *keywords, char *subject, char *date,
1096  char *extension,
1097  GBool rawOrder, int firstPage, GBool outline)
1098 {
1099  catalog = catalogA;
1100  fContentsFrame = NULL;
1101  docTitle = new GooString(title);
1102  pages = NULL;
1103  dumpJPEG=gTrue;
1104  //write = gTrue;
1105  this->rawOrder = rawOrder;
1106  this->doOutline = outline;
1107  ok = gFalse;
1108  //this->firstPage = firstPage;
1109  //pageNum=firstPage;
1110  // open file
1111  needClose = gFalse;
1112  pages = new HtmlPage(rawOrder, extension);
1113 
1114  glMetaVars = new GooList();
1115  glMetaVars->append(new HtmlMetaVar("generator", "pdftohtml 0.36"));
1116  if( author ) glMetaVars->append(new HtmlMetaVar("author", author));
1117  if( keywords ) glMetaVars->append(new HtmlMetaVar("keywords", keywords));
1118  if( date ) glMetaVars->append(new HtmlMetaVar("date", date));
1119  if( subject ) glMetaVars->append(new HtmlMetaVar("subject", subject));
1120 
1121  maxPageWidth = 0;
1122  maxPageHeight = 0;
1123 
1124  pages->setDocName(fileName);
1125  Docname=new GooString (fileName);
1126 
1127  // for non-xml output (complex or simple) with frames generate the left frame
1128  if(!xml && !noframes)
1129  {
1130  if (!singleHtml)
1131  {
1132  GooString* left=new GooString(fileName);
1133  left->append("_ind.html");
1134 
1135  doFrame(firstPage);
1136 
1137  if (!(fContentsFrame = fopen(left->getCString(), "w")))
1138  {
1139  error(errIO, -1, "Couldn't open html file '{0:t}'", left);
1140  delete left;
1141  return;
1142  }
1143  delete left;
1144  fputs(DOCTYPE, fContentsFrame);
1145  fputs("<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title></title>\n</head>\n<body>\n", fContentsFrame);
1146 
1147  if (doOutline)
1148  {
1149  GooString *str = basename(Docname);
1150  fprintf(fContentsFrame, "<a href=\"%s%s\" target=\"contents\">Outline</a><br/>", str->getCString(), complexMode ? "-outline.html" : "s.html#outline");
1151  delete str;
1152  }
1153  }
1154  if (!complexMode)
1155  { /* not in complex mode */
1156 
1157  GooString* right=new GooString(fileName);
1158  right->append("s.html");
1159 
1160  if (!(page=fopen(right->getCString(),"w"))){
1161  error(errIO, -1, "Couldn't open html file '{0:t}'", right);
1162  delete right;
1163  return;
1164  }
1165  delete right;
1166  fputs(DOCTYPE, page);
1167  fputs("<html>\n<head>\n<title></title>\n",page);
1168  printCSS(page);
1169  fputs("</head>\n<body>\n",page);
1170  }
1171  }
1172 
1173  if (noframes) {
1174  if (stout) page=stdout;
1175  else {
1176  GooString* right=new GooString(fileName);
1177  if (!xml) right->append(".html");
1178  if (xml) right->append(".xml");
1179  if (!(page=fopen(right->getCString(),"w"))){
1180  error(errIO, -1, "Couldn't open html file '{0:t}'", right);
1181  delete right;
1182  return;
1183  }
1184  delete right;
1185  }
1186 
1187  GooString *htmlEncoding = mapEncodingToHtml(globalParams->getTextEncodingName());
1188  if (xml)
1189  {
1190  fprintf(page, "<?xml version=\"1.0\" encoding=\"%s\"?>\n", htmlEncoding->getCString());
1191  fputs("<!DOCTYPE pdf2xml SYSTEM \"pdf2xml.dtd\">\n\n", page);
1192  fprintf(page,"<pdf2xml producer=\"%s\" version=\"%s\">\n", PACKAGE_NAME, PACKAGE_VERSION);
1193  }
1194  else
1195  {
1196  fprintf(page,"%s\n<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title>%s</title>\n", DOCTYPE, docTitle->getCString());
1197 
1198  fprintf(page, "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding->getCString());
1199 
1200  dumpMetaVars(page);
1201  printCSS(page);
1202  fprintf(page,"</head>\n");
1203  fprintf(page,"<body bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n");
1204  }
1205  delete htmlEncoding;
1206  }
1207  ok = gTrue;
1208 }
1209 
1211  HtmlFont::clear();
1212 
1213  delete Docname;
1214  delete docTitle;
1215 
1216  deleteGooList(glMetaVars, HtmlMetaVar);
1217 
1218  if (fContentsFrame){
1219  fputs("</body>\n</html>\n",fContentsFrame);
1220  fclose(fContentsFrame);
1221  }
1222  if (page != NULL) {
1223  if (xml) {
1224  fputs("</pdf2xml>\n",page);
1225  fclose(page);
1226  } else
1227  if ( !complexMode || xml || noframes )
1228  {
1229  fputs("</body>\n</html>\n",page);
1230  fclose(page);
1231  }
1232  }
1233  if (pages)
1234  delete pages;
1235 }
1236 
1237 void HtmlOutputDev::startPage(int pageNum, GfxState *state, XRef *xref) {
1238 #if 0
1239  if (mode&&!xml){
1240  if (write){
1241  write=gFalse;
1242  GooString* fname=Dirname(Docname);
1243  fname->append("image.log");
1244  if((tin=fopen(getFileNameFromPath(fname->getCString(),fname->getLength()),"w"))==NULL){
1245  printf("Error : can not open %s",fname);
1246  exit(1);
1247  }
1248  delete fname;
1249  // if(state->getRotation()!=0)
1250  // fprintf(tin,"ROTATE=%d rotate %d neg %d neg translate\n",state->getRotation(),state->getX1(),-state->getY1());
1251  // else
1252  fprintf(tin,"ROTATE=%d neg %d neg translate\n",state->getX1(),state->getY1());
1253  }
1254  }
1255 #endif
1256 
1257  this->pageNum = pageNum;
1258  GooString *str=basename(Docname);
1259  pages->clear();
1260  if(!noframes)
1261  {
1262  if (fContentsFrame)
1263  {
1264  if (complexMode)
1265  fprintf(fContentsFrame,"<a href=\"%s-%d.html\"",str->getCString(),pageNum);
1266  else
1267  fprintf(fContentsFrame,"<a href=\"%ss.html#%d\"",str->getCString(),pageNum);
1268  fprintf(fContentsFrame," target=\"contents\" >Page %d</a><br/>\n",pageNum);
1269  }
1270  }
1271 
1272  pages->pageWidth=static_cast<int>(state->getPageWidth());
1273  pages->pageHeight=static_cast<int>(state->getPageHeight());
1274 
1275  delete str;
1276 }
1277 
1278 
1280  Links *linksList = docPage->getLinks();
1281  for (int i = 0; i < linksList->getNumLinks(); ++i)
1282  {
1283  doProcessLink(linksList->getLink(i));
1284  }
1285  delete linksList;
1286 
1287  pages->conv();
1288  pages->coalesce();
1289  pages->dump(page, pageNum);
1290 
1291  // I don't yet know what to do in the case when there are pages of different
1292  // sizes and we want complex output: running ghostscript many times
1293  // seems very inefficient. So for now I'll just use last page's size
1294  maxPageWidth = pages->pageWidth;
1295  maxPageHeight = pages->pageHeight;
1296 
1297  //if(!noframes&&!xml) fputs("<br/>\n", fContentsFrame);
1298  if(!stout && !globalParams->getErrQuiet()) printf("Page-%d\n",(pageNum));
1299 }
1300 
1302  pages->updateFont(state);
1303 }
1304 
1306  pages->beginString(state, s);
1307 }
1308 
1310  pages->endString();
1311 }
1312 
1313 void HtmlOutputDev::drawChar(GfxState *state, double x, double y,
1314  double dx, double dy,
1315  double originX, double originY,
1316  CharCode code, int /*nBytes*/, Unicode *u, int uLen)
1317 {
1318  if ( !showHidden && (state->getRender() & 3) == 3) {
1319  return;
1320  }
1321  pages->addChar(state, x, y, dx, dy, originX, originY, u, uLen);
1322 }
1323 
1324 void HtmlOutputDev::drawJpegImage(GfxState *state, Stream *str)
1325 {
1326  FILE *f1;
1327  int c;
1328 
1329  // open the image file
1330  GooString *fName=createImageFileName("jpg");
1331  if (!(f1 = fopen(fName->getCString(), "wb"))) {
1332  error(errIO, -1, "Couldn't open image file '{0:t}'", fName);
1333  delete fName;
1334  return;
1335  }
1336 
1337  // initialize stream
1338  str = str->getNextStream();
1339  str->reset();
1340 
1341  // copy the stream
1342  while ((c = str->getChar()) != EOF)
1343  fputc(c, f1);
1344 
1345  fclose(f1);
1346 
1347  if (fName) {
1348  pages->addImage(fName, state);
1349  }
1350 }
1351 
1352 void HtmlOutputDev::drawPngImage(GfxState *state, Stream *str, int width, int height,
1353  GfxImageColorMap *colorMap, GBool isMask)
1354 {
1355 #ifdef ENABLE_LIBPNG
1356  FILE *f1;
1357 
1358  if (!colorMap && !isMask) {
1359  error(errInternal, -1, "Can't have color image without a color map");
1360  return;
1361  }
1362 
1363  // open the image file
1364  GooString *fName=createImageFileName("png");
1365  if (!(f1 = fopen(fName->getCString(), "wb"))) {
1366  error(errIO, -1, "Couldn't open image file '{0:t}'", fName);
1367  delete fName;
1368  return;
1369  }
1370 
1371  PNGWriter *writer = new PNGWriter( isMask ? PNGWriter::MONOCHROME : PNGWriter::RGB );
1372  // TODO can we calculate the resolution of the image?
1373  if (!writer->init(f1, width, height, 72, 72)) {
1374  error(errInternal, -1, "Can't init PNG for image '{0:t}'", fName);
1375  delete writer;
1376  fclose(f1);
1377  return;
1378  }
1379 
1380  if (!isMask) {
1381  Guchar *p;
1382  GfxRGB rgb;
1383  png_byte *row = (png_byte *) gmalloc(3 * width); // 3 bytes/pixel: RGB
1384  png_bytep *row_pointer= &row;
1385 
1386  // Initialize the image stream
1387  ImageStream *imgStr = new ImageStream(str, width,
1388  colorMap->getNumPixelComps(), colorMap->getBits());
1389  imgStr->reset();
1390 
1391  // For each line...
1392  for (int y = 0; y < height; y++) {
1393 
1394  // Convert into a PNG row
1395  p = imgStr->getLine();
1396  if (!p) {
1397  error(errIO, -1, "Failed to read PNG. '{0:t}' will be incorrect", fName);
1398  delete writer;
1399  delete imgStr;
1400  fclose(f1);
1401  return;
1402  }
1403  for (int x = 0; x < width; x++) {
1404  colorMap->getRGB(p, &rgb);
1405  // Write the RGB pixels into the row
1406  row[3*x]= colToByte(rgb.r);
1407  row[3*x+1]= colToByte(rgb.g);
1408  row[3*x+2]= colToByte(rgb.b);
1409  p += colorMap->getNumPixelComps();
1410  }
1411 
1412  if (!writer->writeRow(row_pointer)) {
1413  error(errIO, -1, "Failed to write into PNG '{0:t}'", fName);
1414  delete writer;
1415  delete imgStr;
1416  fclose(f1);
1417  return;
1418  }
1419  }
1420  gfree(row);
1421  imgStr->close();
1422  delete imgStr;
1423  }
1424  else { // isMask == true
1425  int size = (width + 7)/8;
1426 
1427  // PDF masks use 0 = draw current color, 1 = leave unchanged.
1428  // We invert this to provide the standard interpretation of alpha
1429  // (0 = transparent, 1 = opaque). If the colorMap already inverts
1430  // the mask we leave the data unchanged.
1431  int invert_bits = 0xff;
1432  if (colorMap) {
1433  GfxGray gray;
1434  Guchar zero = 0;
1435  colorMap->getGray(&zero, &gray);
1436  if (colToByte(gray) == 0)
1437  invert_bits = 0x00;
1438  }
1439 
1440  str->reset();
1441  Guchar *png_row = (Guchar *)gmalloc(size);
1442 
1443  for (int ri = 0; ri < height; ++ri)
1444  {
1445  for(int i = 0; i < size; i++)
1446  png_row[i] = str->getChar() ^ invert_bits;
1447 
1448  if (!writer->writeRow( &png_row ))
1449  {
1450  error(errIO, -1, "Failed to write into PNG '{0:t}'", fName);
1451  delete writer;
1452  fclose(f1);
1453  gfree(png_row);
1454  return;
1455  }
1456  }
1457  str->close();
1458  gfree(png_row);
1459  }
1460 
1461  str->close();
1462 
1463  writer->close();
1464  delete writer;
1465  fclose(f1);
1466 
1467  pages->addImage(fName, state);
1468 #else
1469  return;
1470 #endif
1471 }
1472 
1473 GooString *HtmlOutputDev::createImageFileName(const char *ext)
1474 {
1475  GooString *fName=new GooString(Docname);
1476  fName->append("-");
1477  GooString *pgNum= GooString::fromInt(pageNum);
1478  GooString *imgnum= GooString::fromInt(pages->getNumImages()+1);
1479 
1480  fName->append(pgNum)->append("_")->append(imgnum)->append(".")->append(ext);
1481  delete pgNum;
1482  delete imgnum;
1483 
1484  return fName;
1485 }
1486 
1488  int width, int height, GBool invert,
1489  GBool interpolate, GBool inlineImg) {
1490 
1491  if (ignore||(complexMode && !xml)) {
1492  OutputDev::drawImageMask(state, ref, str, width, height, invert, interpolate, inlineImg);
1493  return;
1494  }
1495 
1496  // dump JPEG file
1497  if (dumpJPEG && str->getKind() == strDCT) {
1498  drawJpegImage(state, str);
1499  }
1500  else {
1501 #ifdef ENABLE_LIBPNG
1502  drawPngImage(state, str, width, height, NULL, gTrue);
1503 #else
1504  OutputDev::drawImageMask(state, ref, str, width, height, invert, interpolate, inlineImg);
1505 #endif
1506  }
1507 }
1508 
1510  int width, int height, GfxImageColorMap *colorMap,
1511  GBool interpolate, int *maskColors, GBool inlineImg) {
1512 
1513  if (ignore||(complexMode && !xml)) {
1514  OutputDev::drawImage(state, ref, str, width, height, colorMap, interpolate,
1515  maskColors, inlineImg);
1516  return;
1517  }
1518 
1519  /*if( !globalParams->getErrQuiet() )
1520  printf("image stream of kind %d\n", str->getKind());*/
1521  // dump JPEG file
1522  if (dumpJPEG && str->getKind() == strDCT && (colorMap->getNumPixelComps() == 1 ||
1523  colorMap->getNumPixelComps() == 3) && !inlineImg) {
1524  drawJpegImage(state, str);
1525  }
1526  else {
1527 #ifdef ENABLE_LIBPNG
1528  drawPngImage(state, str, width, height, colorMap );
1529 #else
1530  OutputDev::drawImage(state, ref, str, width, height, colorMap, interpolate,
1531  maskColors, inlineImg);
1532 #endif
1533  }
1534 }
1535 
1536 
1537 
1538 void HtmlOutputDev::doProcessLink(AnnotLink* link){
1539  double _x1,_y1,_x2,_y2;
1540  int x1,y1,x2,y2;
1541 
1542  link->getRect(&_x1,&_y1,&_x2,&_y2);
1543  cvtUserToDev(_x1,_y1,&x1,&y1);
1544 
1545  cvtUserToDev(_x2,_y2,&x2,&y2);
1546 
1547 
1548  GooString* _dest=getLinkDest(link);
1549  HtmlLink t((double) x1,(double) y2,(double) x2,(double) y1,_dest);
1550  pages->AddLink(t);
1551  delete _dest;
1552 }
1553 
1554 GooString* HtmlOutputDev::getLinkDest(AnnotLink *link){
1555  char *p;
1556  if (!link->getAction())
1557  return new GooString();
1558  switch(link->getAction()->getKind())
1559  {
1560  case actionGoTo:
1561  {
1562  GooString* file=basename(Docname);
1563  int page=1;
1564  LinkGoTo *ha=(LinkGoTo *)link->getAction();
1565  LinkDest *dest=NULL;
1566  if (ha->getDest()!=NULL)
1567  dest=ha->getDest()->copy();
1568  else if (ha->getNamedDest()!=NULL)
1569  dest=catalog->findDest(ha->getNamedDest());
1570 
1571  if (dest){
1572  if (dest->isPageRef()){
1573  Ref pageref=dest->getPageRef();
1574  page=catalog->findPage(pageref.num,pageref.gen);
1575  }
1576  else {
1577  page=dest->getPageNum();
1578  }
1579 
1580  delete dest;
1581 
1582  GooString *str=GooString::fromInt(page);
1583  /* complex simple
1584  frames file-4.html files.html#4
1585  noframes file.html#4 file.html#4
1586  */
1587  if (noframes)
1588  {
1589  file->append(".html#");
1590  file->append(str);
1591  }
1592  else
1593  {
1594  if( complexMode )
1595  {
1596  file->append("-");
1597  file->append(str);
1598  file->append(".html");
1599  }
1600  else
1601  {
1602  file->append("s.html#");
1603  file->append(str);
1604  }
1605  }
1606 
1607  if (printCommands) printf(" link to page %d ",page);
1608  delete str;
1609  return file;
1610  }
1611  else
1612  {
1613  return new GooString();
1614  }
1615  }
1616  case actionGoToR:
1617  {
1618  LinkGoToR *ha=(LinkGoToR *) link->getAction();
1619  LinkDest *dest=NULL;
1620  int page=1;
1621  GooString *file=new GooString();
1622  if (ha->getFileName()){
1623  delete file;
1624  file=new GooString(ha->getFileName()->getCString());
1625  }
1626  if (ha->getDest()!=NULL) dest=ha->getDest()->copy();
1627  if (dest&&file){
1628  if (!(dest->isPageRef())) page=dest->getPageNum();
1629  delete dest;
1630 
1631  if (printCommands) printf(" link to page %d ",page);
1632  if (printHtml){
1633  p=file->getCString()+file->getLength()-4;
1634  if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")){
1635  file->del(file->getLength()-4,4);
1636  file->append(".html");
1637  }
1638  file->append('#');
1639  file->append(GooString::fromInt(page));
1640  }
1641  }
1642  if (printCommands && file) printf("filename %s\n",file->getCString());
1643  return file;
1644  }
1645  case actionURI:
1646  {
1647  LinkURI *ha=(LinkURI *) link->getAction();
1648  GooString* file=new GooString(ha->getURI()->getCString());
1649  // printf("uri : %s\n",file->getCString());
1650  return file;
1651  }
1652  case actionLaunch:
1653  {
1654  LinkLaunch *ha=(LinkLaunch *) link->getAction();
1655  GooString* file=new GooString(ha->getFileName()->getCString());
1656  if (printHtml) {
1657  p=file->getCString()+file->getLength()-4;
1658  if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")){
1659  file->del(file->getLength()-4,4);
1660  file->append(".html");
1661  }
1662  if (printCommands) printf("filename %s",file->getCString());
1663 
1664  return file;
1665 
1666  }
1667  }
1668  default:
1669  return new GooString();
1670  }
1671 }
1672 
1673 void HtmlOutputDev::dumpMetaVars(FILE *file)
1674 {
1675  GooString *var;
1676 
1677  for(int i = 0; i < glMetaVars->getLength(); i++)
1678  {
1679  HtmlMetaVar *t = (HtmlMetaVar*)glMetaVars->get(i);
1680  var = t->toString();
1681  fprintf(file, "%s\n", var->getCString());
1682  delete var;
1683  }
1684 }
1685 
1687 {
1688 #ifdef DISABLE_OUTLINE
1689  return gFalse;
1690 #else
1691  FILE * output = NULL;
1692  GBool bClose = gFalse;
1693  Catalog *catalog = doc->getCatalog();
1694 
1695  if (!ok)
1696  return gFalse;
1697 
1698  Outline *outline = doc->getOutline();
1699  if (!outline)
1700  return gFalse;
1701 
1702  GooList *outlines = outline->getItems();
1703  if (!outlines)
1704  return gFalse;
1705 
1706  if (!complexMode || xml)
1707  {
1708  output = page;
1709  }
1710  else if (complexMode && !xml)
1711  {
1712  if (noframes)
1713  {
1714  output = page;
1715  fputs("<hr/>\n", output);
1716  }
1717  else
1718  {
1719  GooString *str = Docname->copy();
1720  str->append("-outline.html");
1721  output = fopen(str->getCString(), "w");
1722  delete str;
1723  if (output == NULL)
1724  return gFalse;
1725  bClose = gTrue;
1726 
1727  GooString *htmlEncoding =
1728  HtmlOutputDev::mapEncodingToHtml(globalParams->getTextEncodingName());
1729 
1730  fprintf(output, "<html xmlns=\"http://www.w3.org/1999/xhtml\" " \
1731  "lang=\"\" xml:lang=\"\">\n" \
1732  "<head>\n" \
1733  "<title>Document Outline</title>\n" \
1734  "<meta http-equiv=\"Content-Type\" content=\"text/html; " \
1735  "charset=%s\"/>\n" \
1736  "</head>\n<body>\n", htmlEncoding->getCString());
1737  delete htmlEncoding;
1738  }
1739  }
1740 
1741  if (!xml)
1742  {
1743  GBool done = newHtmlOutlineLevel(output, outlines, catalog);
1744  if (done && !complexMode)
1745  fputs("<hr/>\n", output);
1746 
1747  if (bClose)
1748  {
1749  fputs("</body>\n</html>\n", output);
1750  fclose(output);
1751  }
1752  }
1753  else
1754  newXmlOutlineLevel(output, outlines, catalog);
1755 
1756  return gTrue;
1757 #endif
1758 }
1759 
1760 GBool HtmlOutputDev::newHtmlOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog, int level)
1761 {
1762 #ifdef DISABLE_OUTLINE
1763  return gFalse;
1764 #else
1765  GBool atLeastOne = gFalse;
1766 
1767  if (level == 1)
1768  {
1769  fputs("<a name=\"outline\"></a>", output);
1770  fputs("<h1>Document Outline</h1>\n", output);
1771  }
1772  fputs("<ul>\n",output);
1773 
1774  for (int i = 0; i < outlines->getLength(); i++)
1775  {
1776  OutlineItem *item = (OutlineItem*)outlines->get(i);
1777  GooString *titleStr = HtmlFont::HtmlFilter(item->getTitle(),
1778  item->getTitleLength());
1779 
1780  GooString *linkName = NULL;;
1781  int page = getOutlinePageNum(item);
1782  if (page > 0)
1783  {
1784  /* complex simple
1785  frames file-4.html files.html#4
1786  noframes file.html#4 file.html#4
1787  */
1788  linkName=basename(Docname);
1789  GooString *str=GooString::fromInt(page);
1790  if (noframes) {
1791  linkName->append(".html#");
1792  linkName->append(str);
1793  } else {
1794  if( complexMode ) {
1795  linkName->append("-");
1796  linkName->append(str);
1797  linkName->append(".html");
1798  } else {
1799  linkName->append("s.html#");
1800  linkName->append(str);
1801  }
1802  }
1803  delete str;
1804  }
1805 
1806  fputs("<li>",output);
1807  if (linkName)
1808  fprintf(output,"<a href=\"%s\">", linkName->getCString());
1809  fputs(titleStr->getCString(),output);
1810  if (linkName) {
1811  fputs("</a>",output);
1812  delete linkName;
1813  }
1814  delete titleStr;
1815  atLeastOne = gTrue;
1816 
1817  item->open();
1818  if (item->hasKids() && item->getKids())
1819  {
1820  fputs("\n",output);
1821  newHtmlOutlineLevel(output, item->getKids(), catalog, level+1);
1822  }
1823  item->close();
1824  fputs("</li>\n",output);
1825  }
1826  fputs("</ul>\n",output);
1827 
1828  return atLeastOne;
1829 #endif
1830 }
1831 
1832 void HtmlOutputDev::newXmlOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog)
1833 {
1834 #ifndef DISABLE_OUTLINE
1835  fputs("<outline>\n", output);
1836 
1837  for (int i = 0; i < outlines->getLength(); i++)
1838  {
1839  OutlineItem *item = (OutlineItem*)outlines->get(i);
1840  GooString *titleStr = HtmlFont::HtmlFilter(item->getTitle(),
1841  item->getTitleLength());
1842  int page = getOutlinePageNum(item);
1843  if (page > 0)
1844  {
1845  fprintf(output, "<item page=\"%d\">%s</item>\n",
1846  page, titleStr->getCString());
1847  }
1848  else
1849  {
1850  fprintf(output, "<item>%s</item>\n", titleStr->getCString());
1851  }
1852  delete titleStr;
1853 
1854  item->open();
1855  if (item->hasKids() && item->getKids())
1856  {
1857  newXmlOutlineLevel(output, item->getKids(), catalog);
1858  }
1859  item->close();
1860  }
1861 
1862  fputs("</outline>\n", output);
1863 #endif
1864 }
1865 
1866 #ifndef DISABLE_OUTLINE
1867 int HtmlOutputDev::getOutlinePageNum(OutlineItem *item)
1868 {
1869  LinkAction *action = item->getAction();
1870  LinkGoTo *link = NULL;
1871  LinkDest *linkdest = NULL;
1872  int pagenum = -1;
1873 
1874  if (!action || action->getKind() != actionGoTo)
1875  return pagenum;
1876 
1877  link = dynamic_cast<LinkGoTo*>(action);
1878 
1879  if (!link || !link->isOk())
1880  return pagenum;
1881 
1882  if (link->getDest())
1883  linkdest = link->getDest()->copy();
1884  else if (link->getNamedDest())
1885  linkdest = catalog->findDest(link->getNamedDest());
1886 
1887  if (!linkdest)
1888  return pagenum;
1889 
1890  if (linkdest->isPageRef()) {
1891  Ref pageref = linkdest->getPageRef();
1892  pagenum = catalog->findPage(pageref.num, pageref.gen);
1893  } else {
1894  pagenum = linkdest->getPageNum();
1895  }
1896 
1897  delete linkdest;
1898  return pagenum;
1899 }
1900 #endif
Definition: Object.h:123
HtmlString(GfxState *state, double fontSize, HtmlFontAccu *fonts)
void conv()
void transform(double x1, double y1, double *x2, double *y2)
Definition: GfxState.h:1487
friend class HtmlPage
HtmlImage(GooString *_fName, GfxState *state)
GBool hasKids()
Definition: Outline.h:72
virtual void reset()=0
HtmlMetaVar(const char *_name, const char *_content)
Definition: Error.h:47
Guchar * getLine()
Definition: Stream.cc:512
GBool isBold() const
Definition: HtmlFonts.h:84
PDFRectangle * getRect() const
Definition: Annot.h:603
double getPageHeight()
Definition: GfxState.h:1421
static GooString * HtmlFilter(Unicode *u, int uLen)
Definition: HtmlFonts.cc:242
GooString * getURI()
Definition: Link.h:248
GfxColorComp b
Definition: GfxState.h:156
virtual void close()
Definition: Stream.cc:129
double getHorizScaling()
Definition: GfxState.h:1467
virtual int getChar()=0
GBool complexMode
Definition: pdftohtml.cc:74
void getRGB(Guchar *x, GfxRGB *rgb)
Definition: GfxState.cc:6058
void addChar(GfxState *state, double x, double y, double dx, double dy, double ox, double oy, Unicode *u, int uLen)
char * getCString()
Definition: GooString.h:121
HtmlOutputDev(Catalog *catalogA, char *fileName, char *title, char *author, char *keywords, char *subject, char *date, char *extension, GBool rawOrder, int firstPage=1, GBool outline=0)
GfxFontType getType()
Definition: GfxFont.h:207
GooString * getTextEncodingName()
Definition: Object.h:77
void endString()
void transformDelta(double x1, double y1, double *x2, double *y2)
Definition: GfxState.h:1490
Definition: XRef.h:94
GfxColorComp GfxGray
Definition: GfxState.h:149
Definition: PDFDoc.h:80
GBool isEqualIgnoreBold(const HtmlFont &x) const
Definition: HtmlFonts.cc:214
GooString * toString()
double * getTextMat()
Definition: GfxState.h:1464
GBool dumpDocOutline(PDFDoc *doc)
void setRotMat(const double *const mat)
Definition: HtmlFonts.h:89
virtual LinkActionKind getKind()=0
GBool isMatRotOrSkew(const double *const mat)
Definition: HtmlUtils.h:38
GBool noframes
Definition: pdftohtml.cc:79
Definition: Link.h:234
double getCurY()
Definition: GfxState.h:1475
int getRender()
Definition: GfxState.h:1470
LinkDest * getDest()
Definition: Link.h:191
GBool xml
Definition: pdftohtml.cc:81
GBool singleHtml
Definition: pdftohtml.cc:75
Outline * getOutline()
Definition: PDFDoc.cc:1899
const HtmlFont & getFont() const
Definition: HtmlOutputDev.h:92
int AddFont(const HtmlFont &font)
Definition: HtmlFonts.cc:299
double getAscent()
Definition: GfxFont.h:247
GooString * del(int i, int n=1)
Definition: GooString.cc:789
void gfree(void *p)
Definition: gmem.cc:251
#define DEBUG
int cmp(GooString *str) const
Definition: GooString.cc:824
int findPage(int num, int gen)
Definition: Catalog.cc:441
void * get(int i)
Definition: GooList.h:60
void updateFont(GfxState *state)
#define gTrue
Definition: gtypes.h:35
GooString * getNamedDest()
Definition: Link.h:161
#define xoutRound(x)
Definition: HtmlFonts.cc:66
GooString * insert(int i, char c)
Definition: GooString.cc:770
GooString * getFileName()
Definition: Link.h:190
GBool stout
Definition: pdftohtml.cc:80
Definition: Link.h:96
void normalizeRotMat(double *mat)
Definition: HtmlUtils.h:45
Ref getPageRef()
Definition: Link.h:112
unsigned char Guchar
Definition: gtypes.h:46
virtual void drawImage(GfxState *state, Object *ref, Stream *str, int width, int height, GfxImageColorMap *colorMap, GBool interpolate, int *maskColors, GBool inlineImg)
Definition: Stream.h:62
void * grealloc(void *p, size_t size)
Definition: gmem.cc:172
GBool getErrQuiet()
double xMin
GooList * getItems()
Definition: Outline.h:46
GfxFont * getFont()
Definition: GfxState.h:1462
void dump(FILE *f, int pageNum)
int getPageNum()
Definition: Link.h:111
int getLength()
Definition: GooList.h:51
LinkDest * findDest(GooString *name)
Definition: Catalog.cc:452
void reset()
Definition: Stream.cc:489
virtual GBool isOk()
Definition: Link.h:156
unsigned int CharCode
Definition: CharTypes.h:22
int gen
Definition: Object.h:79
virtual Stream * getNextStream()
Definition: Stream.h:224
void setLineSize(int _lineSize)
Definition: HtmlFonts.h:88
void getGray(Guchar *x, GfxGray *gray)
Definition: GfxState.cc:6041
virtual void startPage(int pageNum, GfxState *state, XRef *xref)
static void clear()
Definition: HtmlFonts.cc:191
double * getFontMatrix()
Definition: GfxFont.h:241
unsigned int Unicode
Definition: CharTypes.h:13
Definition: Link.h:146
GBool noMerge
Definition: pdftohtml.cc:87
GBool showHidden
Definition: pdftohtml.cc:86
double getY1()
Definition: GfxState.h:1417
double getTransformedFontSize()
Definition: GfxState.cc:6988
GooString * fName
int size() const
Definition: HtmlFonts.h:114
double getCharSpace()
Definition: GfxState.h:1465
void endString()
static GooString * fromInt(int x)
Definition: GooString.cc:238
void coalesce()
GBool is_within(double a, double thresh, double b)
Definition: HtmlUtils.h:26
void textTransformDelta(double x1, double y1, double *x2, double *y2)
Definition: GfxState.h:1496
LinkAction * getAction()
Definition: Outline.h:70
HtmlPage(GBool rawOrder, char *imgExtVal)
LinkDest * copy()
Definition: Link.h:103
GooString * CSStyle(int i, int j=0)
Definition: HtmlFonts.cc:314
void open()
Definition: Outline.cc:177
Unicode * getTitle()
Definition: Outline.h:68
int getLineSize() const
Definition: HtmlFonts.h:87
int getNumPixelComps()
Definition: GfxState.h:1173
#define SLASH
Definition: HtmlOutputDev.h:55
Catalog * getCatalog()
Definition: PDFDoc.h:121
void addImage(GooString *fname, GfxState *state)
void beginString(GfxState *state, GooString *s)
bool GBool
Definition: gtypes.h:34
virtual void updateFont(GfxState *state)
double xMax
const char * name
virtual void drawImageMask(GfxState *state, Object *ref, Stream *str, int width, int height, GBool invert, GBool interpolate, GBool inlineImg)
Definition: OutputDev.cc:98
double yMin
double wordBreakThreshold
Definition: pdftohtml.cc:84
void close()
Definition: Stream.cc:493
LinkDest * getDest()
Definition: Link.h:160
#define DOCTYPE
Definition: HtmlOutputDev.h:60
virtual void beginString(GfxState *state, GooString *s)
#define deleteGooList(list, T)
Definition: GooList.h:108
HtmlLink * getLink()
Definition: HtmlOutputDev.h:91
GlobalParams * globalParams
GfxColorComp r
Definition: GfxState.h:156
double getX1()
Definition: GfxState.h:1416
GooString * append(char c)
Definition: GooString.cc:275
void addChar(GfxState *state, double x, double y, double dx, double dy, Unicode u)
static GooString * simple(HtmlFont *font, Unicode *content, int uLen)
Definition: HtmlFonts.cc:276
GfxColorComp g
Definition: GfxState.h:156
void close()
Definition: Outline.cc:183
double getDescent()
Definition: GfxFont.h:248
virtual void endPage()
double getPageWidth()
Definition: GfxState.h:1420
int getTitleLength()
Definition: Outline.h:69
GBool printHtml
Definition: pdftohtml.cc:73
GBool isItalic() const
Definition: HtmlFonts.h:83
virtual void endString(GfxState *state)
GBool isPageRef()
Definition: Link.h:110
virtual StreamKind getKind()=0
int getLength() const
Definition: GooString.h:118
int num
Definition: Object.h:78
double getCurX()
Definition: GfxState.h:1474
virtual void drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, CharCode code, int nBytes, Unicode *u, int uLen)
GBool ignore
Definition: pdftohtml.cc:76
void CDECL error(ErrorCategory category, Goffset pos, const char *msg,...)
Definition: Error.cc:62
void * gmalloc(size_t size)
Definition: gmem.cc:119
double yMax
GBool rot_matrices_equal(const double *const mat0, const double *const mat1)
Definition: HtmlUtils.h:30
static GooString * format(const char *fmt,...) GOOSTRING_FORMAT
Definition: GooString.cc:246
void getFillRGB(GfxRGB *rgb)
Definition: GfxState.h:1429
GBool printCommands
Definition: pdftohtml.cc:71
HtmlFont * Get(int i)
Definition: HtmlFonts.h:110
GooList * getKids()
Definition: Outline.h:73
virtual ~HtmlOutputDev()
virtual void drawImageMask(GfxState *state, Object *ref, Stream *str, int width, int height, GBool invert, GBool interpolate, GBool inlineImg)
GooString * getFileName()
Definition: Link.h:221
virtual void drawImage(GfxState *state, Object *ref, Stream *str, int width, int height, GfxImageColorMap *colorMap, GBool interpolate, int *maskColors, GBool inlineImg)
Definition: OutputDev.cc:123
void clear()
Definition: Stream.h:98
#define gFalse
Definition: gtypes.h:36