libextractor  1.11
About: GNU libextractor is a library used to extract meta-data from files of arbitrary type.
  Fossies Dox: libextractor-1.11.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

ps_extractor.c
Go to the documentation of this file.
1 /*
2  This file is part of libextractor.
3  Copyright (C) 2002, 2003, 2009, 2012 Vidyut Samanta and Christian Grothoff
4 
5  libextractor is free software; you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published
7  by the Free Software Foundation; either version 3, or (at your
8  option) any later version.
9 
10  libextractor is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with libextractor; see the file COPYING. If not, write to the
17  Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18  Boston, MA 02110-1301, USA.
19  */
20 /**
21  * @file plugins/ps_extractor.c
22  * @brief plugin to support PostScript files
23  * @author Christian Grothoff
24  */
25 #include "platform.h"
26 #include "extractor.h"
27 
28 
29 /**
30  * Maximum length of a single line in the PostScript file we're
31  * willing to look at. While the body of the file can have longer
32  * lines, this should be a sane limit for the lines in the header with
33  * the meta data.
34  */
35 #define MAX_LINE (1024)
36 
37 /**
38  * Header of a PostScript file.
39  */
40 #define PS_HEADER "%!PS-Adobe"
41 
42 
43 /**
44  * Pair with prefix in the PS header and corresponding LE type.
45  */
46 struct Matches
47 {
48  /**
49  * PS header prefix.
50  */
51  const char *prefix;
52 
53  /**
54  * Corresponding LE type.
55  */
57 };
58 
59 
60 /**
61  * Map of PS prefixes to LE types.
62  */
63 static struct Matches tests[] = {
64  { "%%Title: ", EXTRACTOR_METATYPE_TITLE },
65  { "% Subject: ", EXTRACTOR_METATYPE_SUBJECT },
66  { "%%Author: ", EXTRACTOR_METATYPE_AUTHOR_NAME },
67  { "% From: ", EXTRACTOR_METATYPE_AUTHOR_NAME },
68  { "%%Version: ", EXTRACTOR_METATYPE_REVISION_NUMBER },
69  { "%%Creator: ", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE },
70  { "%%CreationDate: ", EXTRACTOR_METATYPE_CREATION_DATE },
71  { "% Date: ", EXTRACTOR_METATYPE_UNKNOWN_DATE },
72  { "%%Pages: ", EXTRACTOR_METATYPE_PAGE_COUNT },
73  { "%%Orientation: ", EXTRACTOR_METATYPE_PAGE_ORIENTATION },
74  { "%%DocumentPaperSizes: ", EXTRACTOR_METATYPE_PAPER_SIZE },
75  { "%%PageOrder: ", EXTRACTOR_METATYPE_PAGE_ORDER },
76  { "%%LanguageLevel: ", EXTRACTOR_METATYPE_FORMAT_VERSION },
77  { "%%Magnification: ", EXTRACTOR_METATYPE_MAGNIFICATION },
78 
79  /* Also widely used but not supported since they
80  probably make no sense:
81  "%%BoundingBox: ",
82  "%%DocumentNeededResources: ",
83  "%%DocumentSuppliedResources: ",
84  "%%DocumentProcSets: ",
85  "%%DocumentData: ", */
86 
87  { NULL, 0 }
88 };
89 
90 
91 /**
92  * Read a single ('\n'-terminated) line of input.
93  *
94  * @param ec context for IO
95  * @return NULL on end-of-file (or if next line exceeds limit)
96  */
97 static char *
99 {
100  int64_t pos;
101  ssize_t ret;
102  char *res;
103  void *data;
104  const char *cdata;
105  const char *eol;
106 
107  pos = ec->seek (ec->cls, 0, SEEK_CUR);
108  if (0 >= (ret = ec->read (ec->cls, &data, MAX_LINE)))
109  return NULL;
110  cdata = data;
111  if (NULL == (eol = memchr (cdata, '\n', ret)))
112  return NULL; /* no end-of-line found */
113  if (NULL == (res = malloc (eol - cdata + 1)))
114  return NULL;
115  memcpy (res, cdata, eol - cdata);
116  res[eol - cdata] = '\0';
117  ec->seek (ec->cls, pos + eol - cdata + 1, SEEK_SET);
118  return res;
119 }
120 
121 
122 /**
123  * Main entry method for the 'application/postscript' extraction plugin.
124  *
125  * @param ec extraction context provided to the plugin
126  */
127 void
129 {
130  unsigned int i;
131  char *line;
132  char *next;
133  char *acc;
134  const char *match;
135 
136  if (NULL == (line = readline (ec)))
137  return;
138  if ( (strlen (line) < strlen (PS_HEADER)) ||
139  (0 != memcmp (PS_HEADER,
140  line,
141  strlen (PS_HEADER))) )
142  {
143  free (line);
144  return;
145  }
146  free (line);
147  if (0 != ec->proc (ec->cls,
148  "ps",
151  "text/plain",
152  "application/postscript",
153  strlen ("application/postscript") + 1))
154  return;
155 
156  line = NULL;
157  next = readline (ec);
158  while ( (NULL != next) &&
159  ('%' == next[0]) )
160  {
161  line = next;
162  next = readline (ec);
163  for (i = 0; NULL != tests[i].prefix; i++)
164  {
165  match = tests[i].prefix;
166  if ( (strlen (line) < strlen (match)) ||
167  (0 != strncmp (line, match, strlen (match))) )
168  continue;
169  /* %%+ continues previous meta-data type... */
170  while ( (NULL != next) &&
171  (0 == strncmp (next, "%%+", strlen ("%%+"))) )
172  {
173  if (NULL == (acc = malloc (strlen (line) + strlen (next) - 1)))
174  break;
175  strcpy (acc, line);
176  strcat (acc, " ");
177  strcat (acc, next + 3);
178  free (line);
179  line = acc;
180  free (next);
181  next = readline (ec);
182  }
183  if ( (line[strlen (line) - 1] == ')') &&
184  (line[strlen (match)] == '(') )
185  {
186  acc = &line[strlen (match) + 1];
187  acc[strlen (acc) - 1] = '\0'; /* remove ")" */
188  }
189  else
190  {
191  acc = &line[strlen (match)];
192  }
193  while (isspace ((unsigned char) acc[0]))
194  acc++;
195  if ( (strlen (acc) > 0) &&
196  (0 != ec->proc (ec->cls,
197  "ps",
198  tests[i].type,
200  "text/plain",
201  acc,
202  strlen (acc) + 1)) )
203  {
204  free (line);
205  if (NULL != next)
206  free (next);
207  return;
208  }
209  break;
210  }
211  free (line);
212  }
213  if (NULL != next)
214  free (next);
215 }
216 
217 
218 /* end of ps_extractor.c */
@ EXTRACTOR_METAFORMAT_UTF8
Definition: extractor.h:102
#define NULL
Definition: getopt1.c:60
EXTRACTOR_MetaType
Definition: extractor.h:126
@ EXTRACTOR_METATYPE_UNKNOWN_DATE
Definition: extractor.h:195
@ EXTRACTOR_METATYPE_PAGE_ORDER
Definition: extractor.h:295
@ EXTRACTOR_METATYPE_REVISION_NUMBER
Definition: extractor.h:274
@ EXTRACTOR_METATYPE_AUTHOR_NAME
Definition: extractor.h:143
@ EXTRACTOR_METATYPE_FORMAT_VERSION
Definition: extractor.h:191
@ EXTRACTOR_METATYPE_TITLE
Definition: extractor.h:134
@ EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE
Definition: extractor.h:194
@ EXTRACTOR_METATYPE_PAGE_ORIENTATION
Definition: extractor.h:269
@ EXTRACTOR_METATYPE_MAGNIFICATION
Definition: extractor.h:254
@ EXTRACTOR_METATYPE_CREATION_DATE
Definition: extractor.h:196
@ EXTRACTOR_METATYPE_PAGE_COUNT
Definition: extractor.h:141
@ EXTRACTOR_METATYPE_PAPER_SIZE
Definition: extractor.h:270
@ EXTRACTOR_METATYPE_MIMETYPE
Definition: extractor.h:129
@ EXTRACTOR_METATYPE_SUBJECT
Definition: extractor.h:188
plaform specifics
static char * readline(struct EXTRACTOR_ExtractContext *ec)
Definition: ps_extractor.c:98
static struct Matches tests[]
Definition: ps_extractor.c:63
void EXTRACTOR_ps_extract_method(struct EXTRACTOR_ExtractContext *ec)
Definition: ps_extractor.c:128
#define MAX_LINE
Definition: ps_extractor.c:35
#define PS_HEADER
Definition: ps_extractor.c:40
int64_t(* seek)(void *cls, int64_t pos, int whence)
Definition: extractor.h:509
EXTRACTOR_MetaDataProcessor proc
Definition: extractor.h:525
ssize_t(* read)(void *cls, void **data, size_t size)
Definition: extractor.h:494
enum EXTRACTOR_MetaType type
Definition: deb_extractor.c:82
const char * prefix
Definition: ps_extractor.c:51