"Fossies" - the Fresh Open Source Software Archive

Member "libextractor-1.11/src/plugins/ole2_extractor.c" (30 Jan 2021, 27803 Bytes) of package /linux/privat/libextractor-1.11.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "ole2_extractor.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 1.8_vs_1.9.

    1 /*
    2      This file is part of libextractor.
    3      Copyright (C) 2004, 2005, 2006, 2007, 2009, 2012, 2018 Vidyut Samanta and Christian Grothoff
    4 
    5      libextractor is free software; you can redistribute it and/or modify
    6      it under the terms of the GNU General Public License as published
    7      by the Free Software Foundation; either version 3, or (at your
    8      option) any later version.
    9 
   10      libextractor is distributed in the hope that it will be useful, but
   11      WITHOUT ANY WARRANTY; without even the implied warranty of
   12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13      General Public License for more details.
   14 
   15      You should have received a copy of the GNU General Public License
   16      along with libextractor; see the file COPYING.  If not, write to the
   17      Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
   18      Boston, MA 02110-1301, USA.
   19 
   20      This code makes extensive use of libgsf
   21      -- the Gnome Structured File Library
   22      Copyright Copyright (C) 2002-2004 Jody Goldberg (jody@gnome.org)
   23 
   24      Part of this code was adapted from wordleaker.
   25 */
   26 /**
   27  * @file plugins/ole2_extractor.c
   28  * @brief plugin to support OLE2 (DOC, XLS, etc.) files
   29  * @author Christian Grothoff
   30  */
   31 #include "platform.h"
   32 #include "extractor.h"
   33 #include "convert.h"
   34 #include <glib-object.h>
   35 #include <string.h>
   36 #include <stdio.h>
   37 #include <ctype.h>
   38 #include <gsf/gsf-utils.h>
   39 #include <gsf/gsf-input-impl.h>
   40 #include <gsf/gsf-input-memory.h>
   41 #include <gsf/gsf-impl-utils.h>
   42 #include <gsf/gsf-infile.h>
   43 #include <gsf/gsf-infile-msole.h>
   44 #include <gsf/gsf-msole-utils.h>
   45 
   46 
   47 /**
   48  * Set to 1 to use our own GsfInput subclass which supports seeking
   49  * and thus can handle very large files.  Set to 0 to use the simple
   50  * gsf in-memory buffer (which can only access the first ~16k) for
   51  * debugging.
   52  */
   53 #define USE_LE_INPUT 1
   54 
   55 
   56 /**
   57  * Give the given UTF8 string to LE by calling 'proc'.
   58  *
   59  * @param proc callback to invoke
   60  * @param proc_cls closure for proc
   61  * @param phrase metadata string to pass; may include spaces
   62  *        just double-quotes or just a space in a double quote;
   63  *        in those cases, nothing should be done
   64  * @param type meta data type to use
   65  * @return if 'proc' returned 1, otherwise 0
   66  */
   67 static int
   68 add_metadata (EXTRACTOR_MetaDataProcessor proc,
   69               void *proc_cls,
   70               const char *phrase,
   71               enum EXTRACTOR_MetaType type)
   72 {
   73   char *tmp;
   74   int ret;
   75 
   76   if (0 == strlen (phrase))
   77     return 0;
   78   if (0 == strcmp (phrase, "\"\""))
   79     return 0;
   80   if (0 == strcmp (phrase, "\" \""))
   81     return 0;
   82   if (0 == strcmp (phrase, " "))
   83     return 0;
   84   if (NULL == (tmp = strdup (phrase)))
   85     return 0;
   86 
   87   while ( (strlen (tmp) > 0) &&
   88           (isblank ((unsigned char) tmp [strlen (tmp) - 1])) )
   89     tmp [strlen (tmp) - 1] = '\0';
   90   ret = proc (proc_cls,
   91               "ole2",
   92               type,
   93               EXTRACTOR_METAFORMAT_UTF8,
   94               "text/plain",
   95               tmp,
   96               strlen (tmp) + 1);
   97   free (tmp);
   98   return ret;
   99 }
  100 
  101 
  102 /**
  103  * Entry in the map from OLE meta type  strings
  104  * to LE types.
  105  */
  106 struct Matches
  107 {
  108   /**
  109    * OLE description.
  110    */
  111   const char *text;
  112 
  113   /**
  114    * Corresponding LE type.
  115    */
  116   enum EXTRACTOR_MetaType type;
  117 };
  118 
  119 
  120 static struct Matches tmap[] = {
  121   { "Title", EXTRACTOR_METATYPE_TITLE },
  122   { "PresentationFormat", EXTRACTOR_METATYPE_FORMAT },
  123   { "Category", EXTRACTOR_METATYPE_SECTION },
  124   { "Manager", EXTRACTOR_METATYPE_MANAGER },
  125   { "Company", EXTRACTOR_METATYPE_COMPANY },
  126   { "Subject", EXTRACTOR_METATYPE_SUBJECT },
  127   { "Author", EXTRACTOR_METATYPE_AUTHOR_NAME },
  128   { "Keywords", EXTRACTOR_METATYPE_KEYWORDS },
  129   { "Comments", EXTRACTOR_METATYPE_COMMENT },
  130   { "Template", EXTRACTOR_METATYPE_TEMPLATE },
  131   { "NumPages", EXTRACTOR_METATYPE_PAGE_COUNT },
  132   { "AppName", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE },
  133   { "RevisionNumber", EXTRACTOR_METATYPE_REVISION_NUMBER },
  134   { "NumBytes", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE },
  135   { "CreatedTime", EXTRACTOR_METATYPE_CREATION_DATE },
  136   { "LastSavedTime", EXTRACTOR_METATYPE_MODIFICATION_DATE },
  137   { "gsf:company", EXTRACTOR_METATYPE_COMPANY },
  138   { "gsf:character-count", EXTRACTOR_METATYPE_CHARACTER_COUNT },
  139   { "gsf:page-count", EXTRACTOR_METATYPE_PAGE_COUNT },
  140   { "gsf:line-count", EXTRACTOR_METATYPE_LINE_COUNT },
  141   { "gsf:word-count", EXTRACTOR_METATYPE_WORD_COUNT },
  142   { "gsf:paragraph-count", EXTRACTOR_METATYPE_PARAGRAPH_COUNT },
  143   { "gsf:last-saved-by", EXTRACTOR_METATYPE_LAST_SAVED_BY },
  144   { "gsf:manager", EXTRACTOR_METATYPE_MANAGER },
  145   { "dc:title", EXTRACTOR_METATYPE_TITLE },
  146   { "dc:creator", EXTRACTOR_METATYPE_CREATOR },
  147   { "dc:date", EXTRACTOR_METATYPE_UNKNOWN_DATE },
  148   { "dc:subject", EXTRACTOR_METATYPE_SUBJECT },
  149   { "dc:keywords", EXTRACTOR_METATYPE_KEYWORDS },
  150   { "dc:last-printed", EXTRACTOR_METATYPE_LAST_PRINTED },
  151   { "dc:description", EXTRACTOR_METATYPE_DESCRIPTION },
  152   { "meta:creation-date", EXTRACTOR_METATYPE_CREATION_DATE },
  153   { "meta:generator", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE },
  154   { "meta:template", EXTRACTOR_METATYPE_TEMPLATE },
  155   { "meta:editing-cycles", EXTRACTOR_METATYPE_EDITING_CYCLES },
  156   /* { "Dictionary", EXTRACTOR_METATYPE_LANGUAGE },  */
  157   /* { "gsf:security", EXTRACTOR_SECURITY }, */
  158   /* { "gsf:scale", EXTRACTOR_SCALE }, // always "false"? */
  159   /* { "meta:editing-duration", EXTRACTOR_METATYPE_TOTAL_EDITING_TIME }, // encoding? */
  160   /* { "msole:codepage", EXTRACTOR_CHARACTER_SET }, */
  161   { NULL, 0 }
  162 };
  163 
  164 
  165 /**
  166  * Closure for 'process_metadata'.
  167  */
  168 struct ProcContext
  169 {
  170   /**
  171    * Function to call for meta data that was found.
  172    */
  173   EXTRACTOR_MetaDataProcessor proc;
  174 
  175   /**
  176    * Closure for @e proc.
  177    */
  178   void *proc_cls;
  179 
  180   /**
  181    * Return value; 0 to continue to extract, 1 if we are done
  182    */
  183   int ret;
  184 };
  185 
  186 
  187 /**
  188  * Function invoked by 'gst_msole_metadata_read' with
  189  * metadata found in the document.
  190  *
  191  * @param key 'const char *' describing the meta data
  192  * @param value the UTF8 representation of the meta data
  193  * @param user_data our 'struct ProcContext' (closure)
  194  */
  195 static void
  196 process_metadata (gpointer key,
  197                   gpointer value,
  198                   gpointer user_data)
  199 {
  200   const char *type = key;
  201   const GsfDocProp *prop = value;
  202   struct ProcContext *pc = user_data;
  203   const GValue *gval;
  204   char *contents;
  205   int pos;
  206 
  207   if ( (NULL == key) ||
  208        (NULL == value) )
  209     return;
  210   if (0 != pc->ret)
  211     return;
  212   gval = gsf_doc_prop_get_val (prop);
  213 
  214   if (G_VALUE_TYPE (gval) == G_TYPE_STRING)
  215   {
  216     const char *gvals;
  217 
  218     gvals = g_value_get_string (gval);
  219     if (NULL == gvals)
  220       return;
  221     contents = strdup (gvals);
  222   }
  223   else
  224   {
  225     /* convert other formats? */
  226     contents = g_strdup_value_contents (gval);
  227   }
  228   if (NULL == contents)
  229     return;
  230   if (0 == strcmp (type,
  231                    "meta:generator"))
  232   {
  233     const char *mimetype = "application/vnd.ms-files";
  234     struct
  235     {
  236       const char *v;
  237       const char *m;
  238     } mm[] = {
  239       { "Microsoft Word", "application/msword" },
  240       { "Microsoft Office Word", "application/msword" },
  241       { "Microsoft Excel", "application/vnd.ms-excel" },
  242       { "Microsoft Office Excel", "application/vnd.ms-excel" },
  243       { "Microsoft PowerPoint", "application/vnd.ms-powerpoint" },
  244       { "Microsoft Office PowerPoint", "application/vnd.ms-powerpoint"},
  245       { "Microsoft Project", "application/vnd.ms-project" },
  246       { "Microsoft Visio", "application/vnd.visio" },
  247       { "Microsoft Office", "application/vnd.ms-office" },
  248       { NULL, NULL }
  249     };
  250     int i;
  251 
  252     for (i = 0; NULL != mm[i].v; i++)
  253       if (0 == strncmp (value,
  254                         mm[i].v,
  255                         strlen (mm[i].v) + 1))
  256       {
  257         mimetype = mm[i].m;
  258         break;
  259       }
  260     if (0 != add_metadata (pc->proc,
  261                            pc->proc_cls,
  262                            mimetype,
  263                            EXTRACTOR_METATYPE_MIMETYPE))
  264     {
  265       free (contents);
  266       pc->ret = 1;
  267       return;
  268     }
  269   }
  270   for (pos = 0; NULL != tmap[pos].text; pos++)
  271     if (0 == strcmp (tmap[pos].text,
  272                      type))
  273       break;
  274   if ( (NULL != tmap[pos].text) &&
  275        (0 != add_metadata (pc->proc, pc->proc_cls,
  276                            contents,
  277                            tmap[pos].type)) )
  278   {
  279     free (contents);
  280     pc->ret = 1;
  281     return;
  282   }
  283   free (contents);
  284 }
  285 
  286 
  287 /**
  288  * Function called on (Document)SummaryInformation OLE
  289  * streams.
  290  *
  291  * @param in the input OLE stream
  292  * @param proc function to call on meta data found
  293  * @param proc_cls closure for proc
  294  * @return 0 to continue to extract, 1 if we are done
  295  */
  296 static int
  297 process (GsfInput *in,
  298          EXTRACTOR_MetaDataProcessor proc,
  299          void *proc_cls)
  300 {
  301   struct ProcContext pc;
  302   GsfDocMetaData *sections;
  303   GError *error;
  304 
  305   pc.proc = proc;
  306   pc.proc_cls = proc_cls;
  307   pc.ret = 0;
  308   sections = gsf_doc_meta_data_new ();
  309 #ifdef HAVE_GSF_DOC_META_DATA_READ_FROM_MSOLE
  310   error = gsf_doc_meta_data_read_from_msole (sections, in);
  311 #else
  312   error = gsf_msole_metadata_read (in, sections);
  313 #endif
  314   if (NULL == error)
  315   {
  316     gsf_doc_meta_data_foreach (sections,
  317                                &process_metadata,
  318                                &pc);
  319   }
  320   else
  321   {
  322     g_error_free (error);
  323   }
  324   g_object_unref (G_OBJECT (sections));
  325   return pc.ret;
  326 }
  327 
  328 
  329 /**
  330  * Function called on SfxDocumentInfo OLE
  331  * streams.
  332  *
  333  * @param in the input OLE stream
  334  * @param proc function to call on meta data found
  335  * @param proc_cls closure for proc
  336  * @return 0 to continue to extract, 1 if we are done
  337  */
  338 static int
  339 process_star_office (GsfInput *src,
  340                      EXTRACTOR_MetaDataProcessor proc,
  341                      void *proc_cls)
  342 {
  343   off_t size = gsf_input_size (src);
  344 
  345   if ( (size < 0x374) ||
  346        (size > 4 * 1024 * 1024) ) /* == 0x375?? */
  347     return 0;
  348   {
  349     char buf[size];
  350 
  351     gsf_input_read (src, size, (unsigned char*) buf);
  352     if ( (buf[0] != 0x0F) ||
  353          (buf[1] != 0x0) ||
  354          (0 != strncmp (&buf[2],
  355                         "SfxDocumentInfo",
  356                         strlen ("SfxDocumentInfo"))) ||
  357          (buf[0x11] != 0x0B) ||
  358          (buf[0x13] != 0x00) || /* pw protected! */
  359          (buf[0x12] != 0x00) )
  360       return 0;
  361     buf[0xd3] = '\0';
  362     if ( (buf[0x94] + buf[0x93] > 0) &&
  363          (0 != add_metadata (proc, proc_cls,
  364                              &buf[0x95],
  365                              EXTRACTOR_METATYPE_TITLE)) )
  366       return 1;
  367     buf[0x114] = '\0';
  368     if ( (buf[0xd5] + buf[0xd4] > 0) &&
  369          (0 != add_metadata (proc, proc_cls,
  370                              &buf[0xd6],
  371                              EXTRACTOR_METATYPE_SUBJECT)) )
  372       return 1;
  373     buf[0x215] = '\0';
  374     if ( (buf[0x115] + buf[0x116] > 0) &&
  375          (0 != add_metadata (proc, proc_cls,
  376                              &buf[0x117],
  377                              EXTRACTOR_METATYPE_COMMENT)) )
  378       return 1;
  379     buf[0x296] = '\0';
  380     if ( (buf[0x216] + buf[0x217] > 0) &&
  381          (0 != add_metadata (proc, proc_cls,
  382                              &buf[0x218],
  383                              EXTRACTOR_METATYPE_KEYWORDS)) )
  384       return 1;
  385     /* fixme: do timestamps,
  386        mime-type, user-defined info's */
  387   }
  388   return 0;
  389 }
  390 
  391 
  392 /**
  393  * We use "__" to translate using iso-639.
  394  *
  395  * @param a string to translate
  396  * @return translated string
  397  */
  398 #define __(a) dgettext ("iso-639", a)
  399 
  400 
  401 /**
  402  * Get the language string for the given language ID (lid)
  403  * value.
  404  *
  405  * @param lid language id value
  406  * @return language string corresponding to the lid
  407  */
  408 static const char *
  409 lid_to_language (unsigned int lid)
  410 {
  411   switch (lid)
  412   {
  413   case 0x0400:
  414     return _ ("No Proofing");
  415   case 0x0401:
  416     return __ ("Arabic");
  417   case 0x0402:
  418     return __ ("Bulgarian");
  419   case 0x0403:
  420     return __ ("Catalan");
  421   case 0x0404:
  422     return _ ("Traditional Chinese");
  423   case 0x0804:
  424     return _ ("Simplified Chinese");
  425   case 0x0405:
  426     return __ ("Chechen");
  427   case 0x0406:
  428     return __ ("Danish");
  429   case 0x0407:
  430     return __ ("German");
  431   case 0x0807:
  432     return _ ("Swiss German");
  433   case 0x0408:
  434     return __ ("Greek");
  435   case 0x0409:
  436     return _ ("U.S. English");
  437   case 0x0809:
  438     return _ ("U.K. English");
  439   case 0x0c09:
  440     return _ ("Australian English");
  441   case 0x040a:
  442     return _ ("Castilian Spanish");
  443   case 0x080a:
  444     return _ ("Mexican Spanish");
  445   case 0x040b:
  446     return __ ("Finnish");
  447   case 0x040c:
  448     return __ ("French");
  449   case 0x080c:
  450     return _ ("Belgian French");
  451   case 0x0c0c:
  452     return _ ("Canadian French");
  453   case 0x100c:
  454     return _ ("Swiss French");
  455   case 0x040d:
  456     return __ ("Hebrew");
  457   case 0x040e:
  458     return __ ("Hungarian");
  459   case 0x040f:
  460     return __ ("Icelandic");
  461   case 0x0410:
  462     return __ ("Italian");
  463   case 0x0810:
  464     return _ ("Swiss Italian");
  465   case 0x0411:
  466     return __ ("Japanese");
  467   case 0x0412:
  468     return __ ("Korean");
  469   case 0x0413:
  470     return __ ("Dutch");
  471   case 0x0813:
  472     return _ ("Belgian Dutch");
  473   case 0x0414:
  474     return _ ("Norwegian Bokmal");
  475   case 0x0814:
  476     return __ ("Norwegian Nynorsk");
  477   case 0x0415:
  478     return __ ("Polish");
  479   case 0x0416:
  480     return __ ("Brazilian Portuguese");
  481   case 0x0816:
  482     return __ ("Portuguese");
  483   case 0x0417:
  484     return _ ("Rhaeto-Romanic");
  485   case 0x0418:
  486     return __ ("Romanian");
  487   case 0x0419:
  488     return __ ("Russian");
  489   case 0x041a:
  490     return _ ("Croato-Serbian (Latin)");
  491   case 0x081a:
  492     return _ ("Serbo-Croatian (Cyrillic)");
  493   case 0x041b:
  494     return __ ("Slovak");
  495   case 0x041c:
  496     return __ ("Albanian");
  497   case 0x041d:
  498     return __ ("Swedish");
  499   case 0x041e:
  500     return __ ("Thai");
  501   case 0x041f:
  502     return __ ("Turkish");
  503   case 0x0420:
  504     return __ ("Urdu");
  505   case 0x0421:
  506     return __ ("Bahasa");
  507   case 0x0422:
  508     return __ ("Ukrainian");
  509   case 0x0423:
  510     return __ ("Byelorussian");
  511   case 0x0424:
  512     return __ ("Slovenian");
  513   case 0x0425:
  514     return __ ("Estonian");
  515   case 0x0426:
  516     return __ ("Latvian");
  517   case 0x0427:
  518     return __ ("Lithuanian");
  519   case 0x0429:
  520     return _ ("Farsi");
  521   case 0x042D:
  522     return __ ("Basque");
  523   case 0x042F:
  524     return __ ("Macedonian");
  525   case 0x0436:
  526     return __ ("Afrikaans");
  527   case 0x043E:
  528     return __ ("Malayalam");
  529   default:
  530     return NULL;
  531   }
  532 }
  533 
  534 
  535 /**
  536  * Extract editing history from XTable stream.
  537  *
  538  * @param stream OLE stream to process
  539  * @param lcSttbSavedBy length of the revision history in bytes
  540  * @param fcSttbSavedBy offset of the revision history in the stream
  541  * @param proc function to call on meta data found
  542  * @param proc_cls closure for proc
  543  * @return 0 to continue to extract, 1 if we are done
  544  */
  545 static int
  546 history_extract (GsfInput *stream,
  547                  unsigned int lcbSttbSavedBy,
  548                  unsigned int fcSttbSavedBy,
  549                  EXTRACTOR_MetaDataProcessor proc,
  550                  void *proc_cls)
  551 {
  552   unsigned int where;
  553   unsigned char *lbuffer;
  554   unsigned int i;
  555   unsigned int length;
  556   char *author;
  557   char *filename;
  558   char *rbuf;
  559   unsigned int nRev;
  560   int ret;
  561 
  562   /* goto offset of revision information */
  563   gsf_input_seek (stream, fcSttbSavedBy, G_SEEK_SET);
  564   if (gsf_input_remaining (stream) < lcbSttbSavedBy)
  565     return 0;
  566   if (NULL == (lbuffer = malloc (lcbSttbSavedBy)))
  567     return 0;
  568   /* read all the revision history */
  569   gsf_input_read (stream, lcbSttbSavedBy, lbuffer);
  570   /* there are n strings, so n/2 revisions (author & file) */
  571   nRev = (lbuffer[2] + (lbuffer[3] << 8)) / 2;
  572   where = 6;
  573   ret = 0;
  574   for (i = 0; i < nRev; i++)
  575   {
  576     if (where >= lcbSttbSavedBy)
  577       break;
  578     length = lbuffer[where++];
  579     if ( (where + 2 * length + 2 >= lcbSttbSavedBy) ||
  580          (where + 2 * length + 2 <= where) )
  581       break;
  582     author = EXTRACTOR_common_convert_to_utf8 ((const char*) &lbuffer[where],
  583                                                length * 2,
  584                                                "UTF-16BE");
  585     where += length * 2 + 1;
  586     length = lbuffer[where++];
  587     if ( (where + 2 * length >= lcbSttbSavedBy) ||
  588          (where + 2 * length + 1 <= where) )
  589     {
  590       if (NULL != author)
  591         free (author);
  592       break;
  593     }
  594     filename = EXTRACTOR_common_convert_to_utf8 ((const char*) &lbuffer[where],
  595                                                  length * 2,
  596                                                  "UTF-16BE");
  597     where += length * 2 + 1;
  598     if ( (NULL != author) &&
  599          (NULL != filename) )
  600     {
  601       size_t bsize;
  602 
  603       bsize = strlen (author) + strlen (filename) + 512;
  604       if (NULL != (rbuf = malloc (bsize)))
  605       {
  606         int snret;
  607 
  608         snret = snprintf (rbuf,
  609                           bsize,
  610                           _ ("Revision #%u: Author `%s' worked on `%s'"),
  611                           i,
  612                           author,
  613                           filename);
  614         if ( (-1 != snret) &&
  615              (bsize > (size_t) snret) )
  616         {
  617           ret = add_metadata (proc,
  618                               proc_cls,
  619                               rbuf,
  620                               EXTRACTOR_METATYPE_REVISION_HISTORY);
  621         }
  622         free (rbuf);
  623       }
  624     }
  625     if (NULL != author)
  626       free (author);
  627     if (NULL != filename)
  628       free (filename);
  629     if (0 != ret)
  630       break;
  631   }
  632   free (lbuffer);
  633   return ret;
  634 }
  635 
  636 
  637 /* *************************** custom GSF input method ***************** */
  638 
  639 #define LE_TYPE_INPUT                  (le_input_get_type ())
  640 #define LE_INPUT(obj)                  (G_TYPE_CHECK_INSTANCE_CAST ((obj), \
  641                                                                     LE_TYPE_INPUT, \
  642                                                                     LeInput))
  643 #define LE_INPUT_CLASS(klass)          (G_TYPE_CHECK_CLASS_CAST ((klass), \
  644                                                                  LE_TYPE_INPUT, \
  645                                                                  LeInputClass))
  646 #define IS_LE_INPUT(obj)               (G_TYPE_CHECK_INSTANCE_TYPE ((obj), \
  647                                                                     LE_TYPE_INPUT))
  648 #define IS_LE_INPUT_CLASS(klass)       (G_TYPE_CHECK_CLASS_TYPE ((klass), \
  649                                                                  LE_TYPE_INPUT))
  650 #define LE_INPUT_GET_CLASS(obj)        (G_TYPE_INSTANCE_GET_CLASS ((obj), \
  651                                                                    LE_TYPE_INPUT, \
  652                                                                    LeInputClass))
  653 
  654 /**
  655  * Internal state of an "LeInput" object.
  656  */
  657 typedef struct _LeInputPrivate
  658 {
  659   /**
  660    * Our extraction context.
  661    */
  662   struct EXTRACTOR_ExtractContext *ec;
  663 } LeInputPrivate;
  664 
  665 
  666 /**
  667  * Overall state of an "LeInput" object.
  668  */
  669 typedef struct _LeInput
  670 {
  671   /**
  672    * Inherited state from parent (GsfInput).
  673    */
  674   GsfInput input;
  675 
  676   /*< private > */
  677   /**
  678    * Private state of the LeInput.
  679    */
  680   LeInputPrivate *priv;
  681 } LeInput;
  682 
  683 
  684 /**
  685  * LeInput's class state.
  686  */
  687 typedef struct _LeInputClass
  688 {
  689   /**
  690    * GsfInput is our parent class.
  691    */
  692   GsfInputClass parent_class;
  693 
  694   /* Padding for future expansion */
  695   void (*_gtk_reserved1)(void);
  696   void (*_gtk_reserved2)(void);
  697   void (*_gtk_reserved3)(void);
  698   void (*_gtk_reserved4)(void);
  699 } LeInputClass;
  700 
  701 
  702 /**
  703  * Constructor for LeInput objects.
  704  *
  705  * @param ec extraction context to use
  706  * @return the LeInput, NULL on error
  707  */
  708 GsfInput *
  709 le_input_new (struct EXTRACTOR_ExtractContext *ec);
  710 
  711 
  712 /**
  713  * Class initializer for the "LeInput" class.
  714  *
  715  * @param class class object to initialize
  716  */
  717 static void
  718 le_input_class_init (LeInputClass *class);
  719 
  720 
  721 /**
  722  * Initialize internal state of fresh input object.
  723  *
  724  * @param input object to initialize
  725  */
  726 static void
  727 le_input_init (LeInput *input);
  728 
  729 
  730 /**
  731  * Macro to create LeInput type definition and register the class.
  732  */
  733 GSF_CLASS (LeInput, le_input, le_input_class_init, le_input_init,
  734            GSF_INPUT_TYPE)
  735 
  736 
  737 /**
  738  * Duplicate input, leaving the new one at the same offset.
  739  *
  740  * @param input the input to duplicate
  741  * @param err location for error reporting, can be NULL
  742  * @return NULL on error (always)
  743  */
  744 static GsfInput *
  745 le_input_dup (GsfInput * input,
  746               GError * *err)
  747 {
  748   if (NULL != err)
  749     *err = g_error_new (gsf_input_error_id (), 0,
  750                         "dup not supported on LeInput");
  751   return NULL;
  752 }
  753 
  754 
  755 /**
  756  * Read at least num_bytes. Does not change the current position if
  757  * there is an error. Will only read if the entire amount can be
  758  * read. Invalidates the buffer associated with previous calls to
  759  * gsf_input_read.
  760  *
  761  * @param input
  762  * @param num_bytes
  763  * @param optional_buffer
  764  * @return buffer where num_bytes data are available, or NULL on error
  765  */
  766 static const guint8 *
  767 le_input_read (GsfInput *input,
  768                size_t num_bytes,
  769                guint8 *optional_buffer)
  770 {
  771   LeInput *li = LE_INPUT (input);
  772   struct EXTRACTOR_ExtractContext *ec;
  773   void *buf;
  774   uint64_t old_off;
  775   ssize_t ret;
  776 
  777   ec = li->priv->ec;
  778   old_off = ec->seek (ec->cls, 0, SEEK_CUR);
  779   if (num_bytes
  780       != (ret = ec->read (ec->cls,
  781                           &buf,
  782                           num_bytes)))
  783   {
  784     /* we don't support partial reads;
  785  most other GsfInput implementations in this case
  786  allocate some huge temporary buffer just to avoid
  787  the partial read; we might need to do that as well!? */
  788     ec->seek (ec->cls, SEEK_SET, old_off);
  789     return NULL;
  790   }
  791   if (NULL != optional_buffer)
  792   {
  793     memcpy (optional_buffer, buf, num_bytes);
  794     return optional_buffer;
  795   }
  796   return buf;
  797 }
  798 
  799 
  800 /**
  801  * Move the current location in an input stream
  802  *
  803  * @param input stream to seek
  804  * @param offset target offset
  805  * @param whence determines to what the offset is relative to
  806  * @return TRUE on error
  807  */
  808 static gboolean
  809 le_input_seek (GsfInput *input,
  810                gsf_off_t offset,
  811                GSeekType whence)
  812 {
  813   LeInput *li = LE_INPUT (input);
  814   struct EXTRACTOR_ExtractContext *ec;
  815   int w;
  816   int64_t ret;
  817 
  818   ec = li->priv->ec;
  819   switch (whence)
  820   {
  821   case G_SEEK_SET:
  822     w = SEEK_SET;
  823     break;
  824   case G_SEEK_CUR:
  825     w = SEEK_CUR;
  826     break;
  827   case G_SEEK_END:
  828     w = SEEK_END;
  829     break;
  830   default:
  831     return TRUE;
  832   }
  833   if (-1 ==
  834       (ret = ec->seek (ec->cls,
  835                        offset,
  836                        w)))
  837     return TRUE;
  838   return FALSE;
  839 }
  840 
  841 
  842 /**
  843  * Class initializer for the "LeInput" class.
  844  *
  845  * @param class class object to initialize
  846  */
  847 static void
  848 le_input_class_init (LeInputClass *class)
  849 {
  850   GsfInputClass *input_class;
  851 
  852   input_class = (GsfInputClass *) class;
  853   input_class->Dup = le_input_dup;
  854   input_class->Read = le_input_read;
  855   input_class->Seek = le_input_seek;
  856   g_type_class_add_private (class, sizeof (LeInputPrivate));
  857 }
  858 
  859 
  860 /**
  861  * Initialize internal state of fresh input object.
  862  *
  863  * @param input object to initialize
  864  */
  865 static void
  866 le_input_init (LeInput *input)
  867 {
  868   LeInputPrivate *priv;
  869 
  870   input->priv =
  871     G_TYPE_INSTANCE_GET_PRIVATE (input, LE_TYPE_INPUT,
  872                                  LeInputPrivate);
  873   priv = input->priv;
  874   priv->ec = NULL;
  875 }
  876 
  877 
  878 /**
  879  * Creates a new LeInput object.
  880  *
  881  * @param ec extractor context to wrap
  882  * @return NULL on error
  883  */
  884 GsfInput *
  885 le_input_new (struct EXTRACTOR_ExtractContext *ec)
  886 {
  887   LeInput *input;
  888 
  889   input = g_object_new (LE_TYPE_INPUT, NULL);
  890   gsf_input_set_size (GSF_INPUT (input),
  891                       ec->get_size (ec->cls));
  892   gsf_input_seek_emulate (GSF_INPUT (input),
  893                           0);
  894   input->input.name = NULL;
  895   input->input.container = NULL;
  896   input->priv->ec = ec;
  897 
  898   return GSF_INPUT (input);
  899 }
  900 
  901 
  902 /* *********************** end of custom GSF input method ************* */
  903 
  904 
  905 /**
  906  * Main entry method for the OLE2 extraction plugin.
  907  *
  908  * @param ec extraction context provided to the plugin
  909  */
  910 void
  911 EXTRACTOR_ole2_extract_method (struct EXTRACTOR_ExtractContext *ec)
  912 {
  913   GsfInput *input;
  914   GsfInfile *infile;
  915   GsfInput *src;
  916   const char *name;
  917   unsigned int i;
  918   unsigned int lcb;
  919   unsigned int fcb;
  920   const unsigned char *data512;
  921   unsigned int lid;
  922   const char *lang;
  923   int ret;
  924   void *data;
  925   uint64_t fsize;
  926   ssize_t data_size;
  927 
  928   fsize = ec->get_size (ec->cls);
  929   if (fsize < 512 + 898)
  930   {
  931     /* File too small for OLE2 */
  932     return;   /* can hardly be OLE2 */
  933   }
  934   if (512 + 898 > (data_size = ec->read (ec->cls, &data, fsize)))
  935   {
  936     /* Failed to read minimum file size to buffer */
  937     return;
  938   }
  939   data512 = (const unsigned char*) data + 512;
  940   lid = data512[6] + (data512[7] << 8);
  941   if ( (NULL != (lang = lid_to_language (lid))) &&
  942        (0 != (ret = add_metadata (ec->proc, ec->cls,
  943                                   lang,
  944                                   EXTRACTOR_METATYPE_LANGUAGE))) )
  945     return;
  946   lcb = data512[726] + (data512[727] << 8) + (data512[728] << 16)
  947         + (data512[729] << 24);
  948   fcb = data512[722] + (data512[723] << 8) + (data512[724] << 16)
  949         + (data512[725] << 24);
  950   if (0 != ec->seek (ec->cls, 0, SEEK_SET))
  951   {
  952     /* seek failed!? */
  953     return;
  954   }
  955 #if USE_LE_INPUT
  956   if (NULL == (input = le_input_new (ec)))
  957   {
  958     fprintf (stderr, "le_input_new failed\n");
  959     return;
  960   }
  961 #else
  962   input = gsf_input_memory_new ((const guint8 *) data,
  963                                 data_size,
  964                                 FALSE);
  965 #endif
  966   if (NULL == (infile = gsf_infile_msole_new (input, NULL)))
  967   {
  968     g_object_unref (G_OBJECT (input));
  969     return;
  970   }
  971   ret = 0;
  972   for (i = 0; i<gsf_infile_num_children (infile); i++)
  973   {
  974     if (0 != ret)
  975       break;
  976     if (NULL == (name = gsf_infile_name_by_index (infile, i)))
  977       continue;
  978     src = NULL;
  979     if ( ( (0 == strcmp (name, "\005SummaryInformation")) ||
  980            (0 == strcmp (name, "\005DocumentSummaryInformation")) ) &&
  981          (NULL != (src = gsf_infile_child_by_index (infile, i))) )
  982       ret = process (src,
  983                      ec->proc,
  984                      ec->cls);
  985     if ( (0 == strcmp (name, "SfxDocumentInfo")) &&
  986          (NULL != (src = gsf_infile_child_by_index (infile, i))) )
  987       ret = process_star_office (src,
  988                                  ec->proc,
  989                                  ec->cls);
  990     if (NULL != src)
  991       g_object_unref (G_OBJECT (src));
  992   }
  993   if (0 != ret)
  994     goto CLEANUP;
  995 
  996   if (lcb < 6)
  997     goto CLEANUP;
  998   for (i = 0; i<gsf_infile_num_children (infile); i++)
  999   {
 1000     if (ret != 0)
 1001       break;
 1002     if (NULL == (name = gsf_infile_name_by_index (infile, i)))
 1003       continue;
 1004     if ( ( (0 == strcmp (name, "1Table")) ||
 1005            (0 == strcmp (name, "0Table")) ) &&
 1006          (NULL != (src = gsf_infile_child_by_index (infile, i))) )
 1007     {
 1008       ret = history_extract (src,
 1009                              lcb,
 1010                              fcb,
 1011                              ec->proc, ec->cls);
 1012       g_object_unref (G_OBJECT (src));
 1013     }
 1014   }
 1015 CLEANUP:
 1016   g_object_unref (G_OBJECT (infile));
 1017   g_object_unref (G_OBJECT (input));
 1018 }
 1019 
 1020 
 1021 /**
 1022  * Custom log function we give to GSF to disable logging.
 1023  *
 1024  * @param log_domain unused
 1025  * @param log_level unused
 1026  * @param message unused
 1027  * @param user_data unused
 1028  */
 1029 static void
 1030 nolog (const gchar *log_domain,
 1031        GLogLevelFlags log_level,
 1032        const gchar *message,
 1033        gpointer user_data)
 1034 {
 1035   /* do nothing */
 1036 }
 1037 
 1038 
 1039 /**
 1040  * OLE2 plugin constructor. Initializes glib and gsf, in particular
 1041  * gsf logging is disabled.
 1042  */
 1043 void __attribute__ ((constructor))
 1044 ole2_ltdl_init ()
 1045 {
 1046 #if ! GLIB_CHECK_VERSION (2, 35, 0)
 1047   g_type_init ();
 1048 #endif
 1049 #ifdef HAVE_GSF_INIT
 1050   gsf_init ();
 1051 #endif
 1052   /* disable logging -- thanks, Jody! */
 1053   g_log_set_handler ("libgsf:msole",
 1054                      G_LOG_LEVEL_CRITICAL | G_LOG_LEVEL_WARNING,
 1055                      &nolog, NULL);
 1056 }
 1057 
 1058 
 1059 /**
 1060  * OLE2 plugin destructor.  Shutdown of gsf.
 1061  */
 1062 void __attribute__ ((destructor))
 1063 ole2_ltdl_fini ()
 1064 {
 1065 #ifdef HAVE_GSF_INIT
 1066   gsf_shutdown ();
 1067 #endif
 1068 }
 1069 
 1070 
 1071 /* end of ole2_extractor.c */