"Fossies" - the Fresh Open Source Software Archive 
Member "libextractor-1.11/src/plugins/ps_extractor.c" (30 Jan 2021, 5929 Bytes) of package /linux/privat/libextractor-1.11.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "ps_extractor.c" see the
Fossies "Dox" file reference documentation and the last
Fossies "Diffs" side-by-side code changes report:
1.5_vs_1.6.
1 /*
2 This file is part of libextractor.
3 Copyright (C) 2002, 2003, 2009, 2012 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 Boston, MA 02110-1301, USA.
19 */
20 /**
21 * @file plugins/ps_extractor.c
22 * @brief plugin to support PostScript files
23 * @author Christian Grothoff
24 */
25 #include "platform.h"
26 #include "extractor.h"
27
28
29 /**
30 * Maximum length of a single line in the PostScript file we're
31 * willing to look at. While the body of the file can have longer
32 * lines, this should be a sane limit for the lines in the header with
33 * the meta data.
34 */
35 #define MAX_LINE (1024)
36
37 /**
38 * Header of a PostScript file.
39 */
40 #define PS_HEADER "%!PS-Adobe"
41
42
43 /**
44 * Pair with prefix in the PS header and corresponding LE type.
45 */
46 struct Matches
47 {
48 /**
49 * PS header prefix.
50 */
51 const char *prefix;
52
53 /**
54 * Corresponding LE type.
55 */
56 enum EXTRACTOR_MetaType type;
57 };
58
59
60 /**
61 * Map of PS prefixes to LE types.
62 */
63 static struct Matches tests[] = {
64 { "%%Title: ", EXTRACTOR_METATYPE_TITLE },
65 { "% Subject: ", EXTRACTOR_METATYPE_SUBJECT },
66 { "%%Author: ", EXTRACTOR_METATYPE_AUTHOR_NAME },
67 { "% From: ", EXTRACTOR_METATYPE_AUTHOR_NAME },
68 { "%%Version: ", EXTRACTOR_METATYPE_REVISION_NUMBER },
69 { "%%Creator: ", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE },
70 { "%%CreationDate: ", EXTRACTOR_METATYPE_CREATION_DATE },
71 { "% Date: ", EXTRACTOR_METATYPE_UNKNOWN_DATE },
72 { "%%Pages: ", EXTRACTOR_METATYPE_PAGE_COUNT },
73 { "%%Orientation: ", EXTRACTOR_METATYPE_PAGE_ORIENTATION },
74 { "%%DocumentPaperSizes: ", EXTRACTOR_METATYPE_PAPER_SIZE },
75 { "%%PageOrder: ", EXTRACTOR_METATYPE_PAGE_ORDER },
76 { "%%LanguageLevel: ", EXTRACTOR_METATYPE_FORMAT_VERSION },
77 { "%%Magnification: ", EXTRACTOR_METATYPE_MAGNIFICATION },
78
79 /* Also widely used but not supported since they
80 probably make no sense:
81 "%%BoundingBox: ",
82 "%%DocumentNeededResources: ",
83 "%%DocumentSuppliedResources: ",
84 "%%DocumentProcSets: ",
85 "%%DocumentData: ", */
86
87 { NULL, 0 }
88 };
89
90
91 /**
92 * Read a single ('\n'-terminated) line of input.
93 *
94 * @param ec context for IO
95 * @return NULL on end-of-file (or if next line exceeds limit)
96 */
97 static char *
98 readline (struct EXTRACTOR_ExtractContext *ec)
99 {
100 int64_t pos;
101 ssize_t ret;
102 char *res;
103 void *data;
104 const char *cdata;
105 const char *eol;
106
107 pos = ec->seek (ec->cls, 0, SEEK_CUR);
108 if (0 >= (ret = ec->read (ec->cls, &data, MAX_LINE)))
109 return NULL;
110 cdata = data;
111 if (NULL == (eol = memchr (cdata, '\n', ret)))
112 return NULL; /* no end-of-line found */
113 if (NULL == (res = malloc (eol - cdata + 1)))
114 return NULL;
115 memcpy (res, cdata, eol - cdata);
116 res[eol - cdata] = '\0';
117 ec->seek (ec->cls, pos + eol - cdata + 1, SEEK_SET);
118 return res;
119 }
120
121
122 /**
123 * Main entry method for the 'application/postscript' extraction plugin.
124 *
125 * @param ec extraction context provided to the plugin
126 */
127 void
128 EXTRACTOR_ps_extract_method (struct EXTRACTOR_ExtractContext *ec)
129 {
130 unsigned int i;
131 char *line;
132 char *next;
133 char *acc;
134 const char *match;
135
136 if (NULL == (line = readline (ec)))
137 return;
138 if ( (strlen (line) < strlen (PS_HEADER)) ||
139 (0 != memcmp (PS_HEADER,
140 line,
141 strlen (PS_HEADER))) )
142 {
143 free (line);
144 return;
145 }
146 free (line);
147 if (0 != ec->proc (ec->cls,
148 "ps",
149 EXTRACTOR_METATYPE_MIMETYPE,
150 EXTRACTOR_METAFORMAT_UTF8,
151 "text/plain",
152 "application/postscript",
153 strlen ("application/postscript") + 1))
154 return;
155
156 line = NULL;
157 next = readline (ec);
158 while ( (NULL != next) &&
159 ('%' == next[0]) )
160 {
161 line = next;
162 next = readline (ec);
163 for (i = 0; NULL != tests[i].prefix; i++)
164 {
165 match = tests[i].prefix;
166 if ( (strlen (line) < strlen (match)) ||
167 (0 != strncmp (line, match, strlen (match))) )
168 continue;
169 /* %%+ continues previous meta-data type... */
170 while ( (NULL != next) &&
171 (0 == strncmp (next, "%%+", strlen ("%%+"))) )
172 {
173 if (NULL == (acc = malloc (strlen (line) + strlen (next) - 1)))
174 break;
175 strcpy (acc, line);
176 strcat (acc, " ");
177 strcat (acc, next + 3);
178 free (line);
179 line = acc;
180 free (next);
181 next = readline (ec);
182 }
183 if ( (line[strlen (line) - 1] == ')') &&
184 (line[strlen (match)] == '(') )
185 {
186 acc = &line[strlen (match) + 1];
187 acc[strlen (acc) - 1] = '\0'; /* remove ")" */
188 }
189 else
190 {
191 acc = &line[strlen (match)];
192 }
193 while (isspace ((unsigned char) acc[0]))
194 acc++;
195 if ( (strlen (acc) > 0) &&
196 (0 != ec->proc (ec->cls,
197 "ps",
198 tests[i].type,
199 EXTRACTOR_METAFORMAT_UTF8,
200 "text/plain",
201 acc,
202 strlen (acc) + 1)) )
203 {
204 free (line);
205 if (NULL != next)
206 free (next);
207 return;
208 }
209 break;
210 }
211 free (line);
212 }
213 if (NULL != next)
214 free (next);
215 }
216
217
218 /* end of ps_extractor.c */