"Fossies" - the Fresh Open Source Software Archive

Member "libextractor-1.11/src/plugins/ps_extractor.c" (30 Jan 2021, 5929 Bytes) of package /linux/privat/libextractor-1.11.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "ps_extractor.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 1.5_vs_1.6.

    1 /*
    2      This file is part of libextractor.
    3      Copyright (C) 2002, 2003, 2009, 2012 Vidyut Samanta and Christian Grothoff
    4 
    5      libextractor is free software; you can redistribute it and/or modify
    6      it under the terms of the GNU General Public License as published
    7      by the Free Software Foundation; either version 3, or (at your
    8      option) any later version.
    9 
   10      libextractor is distributed in the hope that it will be useful, but
   11      WITHOUT ANY WARRANTY; without even the implied warranty of
   12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13      General Public License for more details.
   14 
   15      You should have received a copy of the GNU General Public License
   16      along with libextractor; see the file COPYING.  If not, write to the
   17      Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
   18      Boston, MA 02110-1301, USA.
   19  */
   20 /**
   21  * @file plugins/ps_extractor.c
   22  * @brief plugin to support PostScript files
   23  * @author Christian Grothoff
   24  */
   25 #include "platform.h"
   26 #include "extractor.h"
   27 
   28 
   29 /**
   30  * Maximum length of a single line in the PostScript file we're
   31  * willing to look at.  While the body of the file can have longer
   32  * lines, this should be a sane limit for the lines in the header with
   33  * the meta data.
   34  */
   35 #define MAX_LINE (1024)
   36 
   37 /**
   38  * Header of a PostScript file.
   39  */
   40 #define PS_HEADER "%!PS-Adobe"
   41 
   42 
   43 /**
   44  * Pair with prefix in the PS header and corresponding LE type.
   45  */
   46 struct Matches
   47 {
   48   /**
   49    * PS header prefix.
   50    */
   51   const char *prefix;
   52 
   53   /**
   54    * Corresponding LE type.
   55    */
   56   enum EXTRACTOR_MetaType type;
   57 };
   58 
   59 
   60 /**
   61  * Map of PS prefixes to LE types.
   62  */
   63 static struct Matches tests[] = {
   64   { "%%Title: ", EXTRACTOR_METATYPE_TITLE },
   65   { "% Subject: ", EXTRACTOR_METATYPE_SUBJECT },
   66   { "%%Author: ", EXTRACTOR_METATYPE_AUTHOR_NAME },
   67   { "% From: ", EXTRACTOR_METATYPE_AUTHOR_NAME },
   68   { "%%Version: ", EXTRACTOR_METATYPE_REVISION_NUMBER },
   69   { "%%Creator: ", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE },
   70   { "%%CreationDate: ", EXTRACTOR_METATYPE_CREATION_DATE },
   71   { "% Date: ", EXTRACTOR_METATYPE_UNKNOWN_DATE },
   72   { "%%Pages: ", EXTRACTOR_METATYPE_PAGE_COUNT },
   73   { "%%Orientation: ", EXTRACTOR_METATYPE_PAGE_ORIENTATION },
   74   { "%%DocumentPaperSizes: ", EXTRACTOR_METATYPE_PAPER_SIZE },
   75   { "%%PageOrder: ", EXTRACTOR_METATYPE_PAGE_ORDER },
   76   { "%%LanguageLevel: ", EXTRACTOR_METATYPE_FORMAT_VERSION },
   77   { "%%Magnification: ", EXTRACTOR_METATYPE_MAGNIFICATION },
   78 
   79   /* Also widely used but not supported since they
   80      probably make no sense:
   81      "%%BoundingBox: ",
   82      "%%DocumentNeededResources: ",
   83      "%%DocumentSuppliedResources: ",
   84      "%%DocumentProcSets: ",
   85      "%%DocumentData: ", */
   86 
   87   { NULL, 0 }
   88 };
   89 
   90 
   91 /**
   92  * Read a single ('\n'-terminated) line of input.
   93  *
   94  * @param ec context for IO
   95  * @return NULL on end-of-file (or if next line exceeds limit)
   96  */
   97 static char *
   98 readline (struct EXTRACTOR_ExtractContext *ec)
   99 {
  100   int64_t pos;
  101   ssize_t ret;
  102   char *res;
  103   void *data;
  104   const char *cdata;
  105   const char *eol;
  106 
  107   pos = ec->seek (ec->cls, 0, SEEK_CUR);
  108   if (0 >= (ret = ec->read (ec->cls, &data, MAX_LINE)))
  109     return NULL;
  110   cdata = data;
  111   if (NULL == (eol = memchr (cdata, '\n', ret)))
  112     return NULL; /* no end-of-line found */
  113   if (NULL == (res = malloc (eol - cdata + 1)))
  114     return NULL;
  115   memcpy (res, cdata, eol - cdata);
  116   res[eol - cdata] = '\0';
  117   ec->seek (ec->cls, pos + eol - cdata + 1, SEEK_SET);
  118   return res;
  119 }
  120 
  121 
  122 /**
  123  * Main entry method for the 'application/postscript' extraction plugin.
  124  *
  125  * @param ec extraction context provided to the plugin
  126  */
  127 void
  128 EXTRACTOR_ps_extract_method (struct EXTRACTOR_ExtractContext *ec)
  129 {
  130   unsigned int i;
  131   char *line;
  132   char *next;
  133   char *acc;
  134   const char *match;
  135 
  136   if (NULL == (line = readline (ec)))
  137     return;
  138   if ( (strlen (line) < strlen (PS_HEADER)) ||
  139        (0 != memcmp (PS_HEADER,
  140                      line,
  141                      strlen (PS_HEADER))) )
  142   {
  143     free (line);
  144     return;
  145   }
  146   free (line);
  147   if (0 != ec->proc (ec->cls,
  148                      "ps",
  149                      EXTRACTOR_METATYPE_MIMETYPE,
  150                      EXTRACTOR_METAFORMAT_UTF8,
  151                      "text/plain",
  152                      "application/postscript",
  153                      strlen ("application/postscript") + 1))
  154     return;
  155 
  156   line = NULL;
  157   next = readline (ec);
  158   while ( (NULL != next) &&
  159           ('%' == next[0]) )
  160   {
  161     line = next;
  162     next = readline (ec);
  163     for (i = 0; NULL != tests[i].prefix; i++)
  164     {
  165       match = tests[i].prefix;
  166       if ( (strlen (line) < strlen (match)) ||
  167            (0 != strncmp (line, match, strlen (match))) )
  168         continue;
  169       /* %%+ continues previous meta-data type... */
  170       while ( (NULL != next) &&
  171               (0 == strncmp (next, "%%+", strlen ("%%+"))) )
  172       {
  173         if (NULL == (acc = malloc (strlen (line) + strlen (next) - 1)))
  174           break;
  175         strcpy (acc, line);
  176         strcat (acc, " ");
  177         strcat (acc, next + 3);
  178         free (line);
  179         line = acc;
  180         free (next);
  181         next = readline (ec);
  182       }
  183       if ( (line[strlen (line) - 1] == ')') &&
  184            (line[strlen (match)] == '(') )
  185       {
  186         acc = &line[strlen (match) + 1];
  187         acc[strlen (acc) - 1] = '\0'; /* remove ")" */
  188       }
  189       else
  190       {
  191         acc = &line[strlen (match)];
  192       }
  193       while (isspace ((unsigned char) acc[0]))
  194         acc++;
  195       if ( (strlen (acc) > 0) &&
  196            (0 != ec->proc (ec->cls,
  197                            "ps",
  198                            tests[i].type,
  199                            EXTRACTOR_METAFORMAT_UTF8,
  200                            "text/plain",
  201                            acc,
  202                            strlen (acc) + 1)) )
  203       {
  204         free (line);
  205         if (NULL != next)
  206           free (next);
  207         return;
  208       }
  209       break;
  210     }
  211     free (line);
  212   }
  213   if (NULL != next)
  214     free (next);
  215 }
  216 
  217 
  218 /* end of ps_extractor.c */