"Fossies" - the Fresh Open Source Software Archive 
Member "cb2bib-2.0.1/xpdf/HTMLGen.cc" (12 Feb 2021, 8472 Bytes) of package /linux/privat/cb2bib-2.0.1.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "HTMLGen.cc" see the
Fossies "Dox" file reference documentation.
1 //========================================================================
2 //
3 // Modified pdftohtml (constans@molspaces.com, 2014)
4 //
5 // HTMLGen.cc
6 //
7 // Copyright 2010 Glyph & Cog, LLC
8 //
9 //========================================================================
10
11 #include <aconf.h>
12
13 #ifdef USE_GCC_PRAGMAS
14 #pragma implementation
15 #endif
16
17 #include <stdlib.h>
18
19 #include "gmem.h"
20 #include "GString.h"
21 #include "GList.h"
22 #include "PDFDoc.h"
23 #include "TextOutputDev.h"
24 #include "SplashOutputDev.h"
25 #include "ErrorCodes.h"
26
27 #include "HTMLGen.h"
28
29 #ifdef _WIN32
30 # define strcasecmp stricmp
31 # define strncasecmp strnicmp
32 #endif
33
34 //------------------------------------------------------------------------
35
36 // Map Unicode indexes from the private use area, following the Adobe
37 // Glyph list.
38 #define privateUnicodeMapStart 0xf6f9
39 #define privateUnicodeMapEnd 0xf7ff
40 static int
41 privateUnicodeMap[privateUnicodeMapEnd - privateUnicodeMapStart + 1] =
42 {
43 0x0141, 0x0152, 0, 0, 0x0160, 0, 0x017d, // f6f9
44 0, 0, 0, 0, 0, 0, 0, 0, // f700
45 0, 0, 0, 0, 0, 0, 0, 0,
46 0, 0, 0, 0, 0, 0, 0, 0, // f710
47 0, 0, 0, 0, 0, 0, 0, 0,
48 0, 0x0021, 0, 0, 0x0024, 0, 0x0026, 0, // f720
49 0, 0, 0, 0, 0, 0, 0, 0,
50 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, // f730
51 0x0038, 0x0039, 0, 0, 0, 0, 0, 0x003f,
52 0, 0, 0, 0, 0, 0, 0, 0, // f740
53 0, 0, 0, 0, 0, 0, 0, 0,
54 0, 0, 0, 0, 0, 0, 0, 0, // f750
55 0, 0, 0, 0, 0, 0, 0, 0,
56 0, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, // f760
57 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
58 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, // f770
59 0x0058, 0x0059, 0x005a, 0, 0, 0, 0, 0,
60 0, 0, 0, 0, 0, 0, 0, 0, // f780
61 0, 0, 0, 0, 0, 0, 0, 0,
62 0, 0, 0, 0, 0, 0, 0, 0, // f790
63 0, 0, 0, 0, 0, 0, 0, 0,
64 0, 0x00a1, 0x00a2, 0, 0, 0, 0, 0, // f7a0
65 0, 0, 0, 0, 0, 0, 0, 0,
66 0, 0, 0, 0, 0, 0, 0, 0, // f7b0
67 0, 0, 0, 0, 0, 0, 0, 0x00bf,
68 0, 0, 0, 0, 0, 0, 0, 0, // f7c0
69 0, 0, 0, 0, 0, 0, 0, 0,
70 0, 0, 0, 0, 0, 0, 0, 0, // f7d0
71 0, 0, 0, 0, 0, 0, 0, 0,
72 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, // f7e0
73 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
74 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0, // f7f0
75 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x0178
76 };
77
78 //------------------------------------------------------------------------
79
80
81
82 //------------------------------------------------------------------------
83
84 HTMLGen::HTMLGen()
85 {
86 TextOutputControl textOutControl;
87 SplashColor paperColor;
88
89 ok = gTrue;
90
91 backgroundResolution = 150;
92 drawInvisibleText = gTrue;
93
94 // set up the TextOutputDev
95 textOutControl.mode = textOutReadingOrder;
96 textOutControl.html = gTrue;
97 textOut = new TextOutputDev(NULL, &textOutControl, gFalse);
98 if (!textOut->isOk())
99 {
100 ok = gFalse;
101 }
102
103 // set up the SplashOutputDev
104 paperColor[0] = paperColor[1] = paperColor[2] = 0xff;
105 splashOut = new SplashOutputDev(splashModeRGB8, 1, gFalse, paperColor);
106 splashOut->setSkipText(gTrue, gFalse);
107 }
108
109 HTMLGen::~HTMLGen()
110 {
111 delete textOut;
112 delete splashOut;
113 }
114
115 void HTMLGen::startDoc(PDFDoc* docA)
116 {
117 doc = docA;
118 splashOut->startDoc(doc->getXRef());
119 }
120
121 int HTMLGen::convertPage(int pg, int (*writeHTML)(void* stream, const char* data, int size), void* htmlStream)
122 {
123 TextPage* text;
124 GList* cols, *pars, *lines, *words;
125 TextColumn* col;
126 TextParagraph* par;
127 TextLine* line;
128 TextWord* word1;
129 GString* s;
130 double base;
131 int colIdx, parIdx, lineIdx, wordIdx;
132 int i, u;
133 const char parsep('\n');
134
135 // get the PDF text
136 doc->displayPage(textOut, pg, 72, 72, 0, gFalse, gTrue, gFalse);
137 text = textOut->takeText();
138
139 // generate the PDF text
140 cols = text->makeColumns();
141 for (colIdx = 0; colIdx < cols->getLength(); ++colIdx)
142 {
143 col = (TextColumn*)cols->get(colIdx);
144 pars = col->getParagraphs();
145 for (parIdx = 0; parIdx < pars->getLength(); ++parIdx)
146 {
147 par = (TextParagraph*)pars->get(parIdx);
148 lines = par->getLines();
149 for (lineIdx = 0; lineIdx < lines->getLength(); ++lineIdx)
150 {
151 line = (TextLine*)lines->get(lineIdx);
152 words = line->getWords();
153 base = line->getBaseline();
154 s = new GString();
155 for (wordIdx = 0; wordIdx < words->getLength(); ++wordIdx)
156 {
157 word1 = (TextWord*)words->get(wordIdx);
158 if (word1->getBaseline() - base < -0.55)
159 s->append(' '); // superscript
160 for (i = 0; i < word1->getLength(); ++i)
161 {
162 u = word1->getChar(i);
163 if (u >= privateUnicodeMapStart &&
164 u <= privateUnicodeMapEnd &&
165 privateUnicodeMap[u - privateUnicodeMapStart])
166 {
167 u = privateUnicodeMap[u - privateUnicodeMapStart];
168 }
169 if (u <= 0x7f)
170 {
171 s->append((char)u);
172 }
173 else if (u <= 0x7ff)
174 {
175 s->append((char)(0xc0 + (u >> 6)));
176 s->append((char)(0x80 + (u & 0x3f)));
177 }
178 else if (u <= 0xffff)
179 {
180 s->append((char)0xe0 + (u >> 12));
181 s->append((char)0x80 + ((u >> 6) & 0x3f));
182 s->append((char)0x80 + (u & 0x3f));
183 }
184 else if (u <= 0x1fffff)
185 {
186 s->append((char)0xf0 + (u >> 18));
187 s->append((char)0x80 + ((u >> 12) & 0x3f));
188 s->append((char)0x80 + ((u >> 6) & 0x3f));
189 s->append((char)0x80 + (u & 0x3f));
190 }
191 else if (u <= 0x3ffffff)
192 {
193 s->append((char)0xf8 + (u >> 24));
194 s->append((char)0x80 + ((u >> 18) & 0x3f));
195 s->append((char)0x80 + ((u >> 12) & 0x3f));
196 s->append((char)0x80 + ((u >> 6) & 0x3f));
197 s->append((char)0x80 + (u & 0x3f));
198 }
199 else if (u <= 0x7fffffff)
200 {
201 s->append((char)0xfc + (u >> 30));
202 s->append((char)0x80 + ((u >> 24) & 0x3f));
203 s->append((char)0x80 + ((u >> 18) & 0x3f));
204 s->append((char)0x80 + ((u >> 12) & 0x3f));
205 s->append((char)0x80 + ((u >> 6) & 0x3f));
206 s->append((char)0x80 + (u & 0x3f));
207 }
208 }
209 if (word1->getSpaceAfter())
210 s->append(' ');
211 }
212 s->append('\n');
213 writeHTML(htmlStream, s->getCString(), s->getLength());
214 delete s;
215 }
216 writeHTML(htmlStream, &parsep, 1);
217 }
218 }
219 delete text;
220 deleteGList(cols, TextColumn);
221
222 return errNone;
223 }