"Fossies" - the Fresh Open Source Software Archive

Member "cb2bib-2.0.1/xpdf/HTMLGen.cc" (12 Feb 2021, 8472 Bytes) of package /linux/privat/cb2bib-2.0.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "HTMLGen.cc" see the Fossies "Dox" file reference documentation.

    1 //========================================================================
    2 //
    3 // Modified pdftohtml (constans@molspaces.com, 2014)
    4 //
    5 // HTMLGen.cc
    6 //
    7 // Copyright 2010 Glyph & Cog, LLC
    8 //
    9 //========================================================================
   10 
   11 #include <aconf.h>
   12 
   13 #ifdef USE_GCC_PRAGMAS
   14 #pragma implementation
   15 #endif
   16 
   17 #include <stdlib.h>
   18 
   19 #include "gmem.h"
   20 #include "GString.h"
   21 #include "GList.h"
   22 #include "PDFDoc.h"
   23 #include "TextOutputDev.h"
   24 #include "SplashOutputDev.h"
   25 #include "ErrorCodes.h"
   26 
   27 #include "HTMLGen.h"
   28 
   29 #ifdef _WIN32
   30 #  define strcasecmp stricmp
   31 #  define strncasecmp strnicmp
   32 #endif
   33 
   34 //------------------------------------------------------------------------
   35 
   36 // Map Unicode indexes from the private use area, following the Adobe
   37 // Glyph list.
   38 #define privateUnicodeMapStart 0xf6f9
   39 #define privateUnicodeMapEnd   0xf7ff
   40 static int
   41 privateUnicodeMap[privateUnicodeMapEnd - privateUnicodeMapStart + 1] =
   42 {
   43     0x0141, 0x0152, 0,      0,      0x0160, 0,      0x017d,         // f6f9
   44     0,      0,      0,      0,      0,      0,      0,      0,      // f700
   45     0,      0,      0,      0,      0,      0,      0,      0,
   46     0,      0,      0,      0,      0,      0,      0,      0,      // f710
   47     0,      0,      0,      0,      0,      0,      0,      0,
   48     0,      0x0021, 0,      0,      0x0024, 0,      0x0026, 0,      // f720
   49     0,      0,      0,      0,      0,      0,      0,      0,
   50     0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, // f730
   51     0x0038, 0x0039, 0,      0,      0,      0,      0,      0x003f,
   52     0,      0,      0,      0,      0,      0,      0,      0,      // f740
   53     0,      0,      0,      0,      0,      0,      0,      0,
   54     0,      0,      0,      0,      0,      0,      0,      0,      // f750
   55     0,      0,      0,      0,      0,      0,      0,      0,
   56     0,      0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, // f760
   57     0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
   58     0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, // f770
   59     0x0058, 0x0059, 0x005a, 0,      0,      0,      0,      0,
   60     0,      0,      0,      0,      0,      0,      0,      0,      // f780
   61     0,      0,      0,      0,      0,      0,      0,      0,
   62     0,      0,      0,      0,      0,      0,      0,      0,      // f790
   63     0,      0,      0,      0,      0,      0,      0,      0,
   64     0,      0x00a1, 0x00a2, 0,      0,      0,      0,      0,      // f7a0
   65     0,      0,      0,      0,      0,      0,      0,      0,
   66     0,      0,      0,      0,      0,      0,      0,      0,      // f7b0
   67     0,      0,      0,      0,      0,      0,      0,      0x00bf,
   68     0,      0,      0,      0,      0,      0,      0,      0,      // f7c0
   69     0,      0,      0,      0,      0,      0,      0,      0,
   70     0,      0,      0,      0,      0,      0,      0,      0,      // f7d0
   71     0,      0,      0,      0,      0,      0,      0,      0,
   72     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, // f7e0
   73     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
   74     0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0,      // f7f0
   75     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x0178
   76 };
   77 
   78 //------------------------------------------------------------------------
   79 
   80 
   81 
   82 //------------------------------------------------------------------------
   83 
   84 HTMLGen::HTMLGen()
   85 {
   86     TextOutputControl textOutControl;
   87     SplashColor paperColor;
   88 
   89     ok = gTrue;
   90 
   91     backgroundResolution = 150;
   92     drawInvisibleText = gTrue;
   93 
   94     // set up the TextOutputDev
   95     textOutControl.mode = textOutReadingOrder;
   96     textOutControl.html = gTrue;
   97     textOut = new TextOutputDev(NULL, &textOutControl, gFalse);
   98     if (!textOut->isOk())
   99     {
  100         ok = gFalse;
  101     }
  102 
  103     // set up the SplashOutputDev
  104     paperColor[0] = paperColor[1] = paperColor[2] = 0xff;
  105     splashOut = new SplashOutputDev(splashModeRGB8, 1, gFalse, paperColor);
  106     splashOut->setSkipText(gTrue, gFalse);
  107 }
  108 
  109 HTMLGen::~HTMLGen()
  110 {
  111     delete textOut;
  112     delete splashOut;
  113 }
  114 
  115 void HTMLGen::startDoc(PDFDoc* docA)
  116 {
  117     doc = docA;
  118     splashOut->startDoc(doc->getXRef());
  119 }
  120 
  121 int HTMLGen::convertPage(int pg, int (*writeHTML)(void* stream, const char* data, int size), void* htmlStream)
  122 {
  123     TextPage* text;
  124     GList* cols, *pars, *lines, *words;
  125     TextColumn* col;
  126     TextParagraph* par;
  127     TextLine* line;
  128     TextWord* word1;
  129     GString* s;
  130     double base;
  131     int colIdx, parIdx, lineIdx, wordIdx;
  132     int i, u;
  133     const char parsep('\n');
  134 
  135     // get the PDF text
  136     doc->displayPage(textOut, pg, 72, 72, 0, gFalse, gTrue, gFalse);
  137     text = textOut->takeText();
  138 
  139     // generate the PDF text
  140     cols = text->makeColumns();
  141     for (colIdx = 0; colIdx < cols->getLength(); ++colIdx)
  142     {
  143         col = (TextColumn*)cols->get(colIdx);
  144         pars = col->getParagraphs();
  145         for (parIdx = 0; parIdx < pars->getLength(); ++parIdx)
  146         {
  147             par = (TextParagraph*)pars->get(parIdx);
  148             lines = par->getLines();
  149             for (lineIdx = 0; lineIdx < lines->getLength(); ++lineIdx)
  150             {
  151                 line = (TextLine*)lines->get(lineIdx);
  152                 words = line->getWords();
  153                 base = line->getBaseline();
  154                 s = new GString();
  155                 for (wordIdx = 0; wordIdx < words->getLength(); ++wordIdx)
  156                 {
  157                     word1 = (TextWord*)words->get(wordIdx);
  158                     if (word1->getBaseline() - base < -0.55)
  159                         s->append(' ');  // superscript
  160                     for (i = 0; i < word1->getLength(); ++i)
  161                     {
  162                         u = word1->getChar(i);
  163                         if (u >= privateUnicodeMapStart &&
  164                             u <= privateUnicodeMapEnd &&
  165                             privateUnicodeMap[u - privateUnicodeMapStart])
  166                         {
  167                             u = privateUnicodeMap[u - privateUnicodeMapStart];
  168                         }
  169                         if (u <= 0x7f)
  170                         {
  171                             s->append((char)u);
  172                         }
  173                         else if (u <= 0x7ff)
  174                         {
  175                             s->append((char)(0xc0 + (u >> 6)));
  176                             s->append((char)(0x80 + (u & 0x3f)));
  177                         }
  178                         else if (u <= 0xffff)
  179                         {
  180                             s->append((char)0xe0 + (u >> 12));
  181                             s->append((char)0x80 + ((u >> 6) & 0x3f));
  182                             s->append((char)0x80 + (u & 0x3f));
  183                         }
  184                         else if (u <= 0x1fffff)
  185                         {
  186                             s->append((char)0xf0 + (u >> 18));
  187                             s->append((char)0x80 + ((u >> 12) & 0x3f));
  188                             s->append((char)0x80 + ((u >> 6) & 0x3f));
  189                             s->append((char)0x80 + (u & 0x3f));
  190                         }
  191                         else if (u <= 0x3ffffff)
  192                         {
  193                             s->append((char)0xf8 + (u >> 24));
  194                             s->append((char)0x80 + ((u >> 18) & 0x3f));
  195                             s->append((char)0x80 + ((u >> 12) & 0x3f));
  196                             s->append((char)0x80 + ((u >> 6) & 0x3f));
  197                             s->append((char)0x80 + (u & 0x3f));
  198                         }
  199                         else if (u <= 0x7fffffff)
  200                         {
  201                             s->append((char)0xfc + (u >> 30));
  202                             s->append((char)0x80 + ((u >> 24) & 0x3f));
  203                             s->append((char)0x80 + ((u >> 18) & 0x3f));
  204                             s->append((char)0x80 + ((u >> 12) & 0x3f));
  205                             s->append((char)0x80 + ((u >> 6) & 0x3f));
  206                             s->append((char)0x80 + (u & 0x3f));
  207                         }
  208                     }
  209                     if (word1->getSpaceAfter())
  210                         s->append(' ');
  211                 }
  212                 s->append('\n');
  213                 writeHTML(htmlStream, s->getCString(), s->getLength());
  214                 delete s;
  215             }
  216             writeHTML(htmlStream, &parsep, 1);
  217         }
  218     }
  219     delete text;
  220     deleteGList(cols, TextColumn);
  221 
  222     return errNone;
  223 }