"Fossies" - the Fresh Open Source Software Archive 
Member "libextractor-1.11/src/plugins/man_extractor.c" (30 Jan 2021, 7055 Bytes) of package /linux/privat/libextractor-1.11.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "man_extractor.c" see the
Fossies "Dox" file reference documentation and the latest
Fossies "Diffs" side-by-side code changes report:
1.10_vs_1.11.
1 /*
2 This file is part of libextractor.
3 Copyright (C) 2002, 2003, 2004, 2009, 2012 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 Boston, MA 02110-1301, USA.
19 */
20 /**
21 * @file plugins/man_extractor.c
22 * @brief plugin to support man pages
23 * @author Christian Grothoff
24 */
25 #include "platform.h"
26 #include "extractor.h"
27 #include <ctype.h>
28
29
30 /**
31 * Create string from first 'n' characters of 'str'. See 'strndup'.
32 *
33 * @param str input string
34 * @param n desired output length (plus 0-termination)
35 * @return copy of first 'n' bytes from 'str' plus 0-terminator, NULL on error
36 */
37 static char *
38 stndup (const char *str, size_t n)
39 {
40 char *tmp;
41
42 if (NULL == (tmp = malloc (n + 1)))
43 return NULL;
44 tmp[n] = '\0';
45 memcpy (tmp, str, n);
46 return tmp;
47 }
48
49
50 /**
51 * Give a metadata item to LE. Removes double-quotes and
52 * makes sure we don't pass empty strings or NULL pointers.
53 *
54 * @param type metadata type to use
55 * @param keyword metadata value; freed in the process
56 * @param proc function to call with meta data
57 * @param proc_cls closure for 'proc'
58 * @return 0 to continue extracting, 1 if we are done
59 */
60 static int
61 add_keyword (enum EXTRACTOR_MetaType type,
62 char *keyword,
63 EXTRACTOR_MetaDataProcessor proc,
64 void *proc_cls)
65 {
66 int ret;
67 char *value;
68
69 if (NULL == keyword)
70 return 0;
71 if ( (keyword[0] == '\"') &&
72 (keyword[strlen (keyword) - 1] == '\"') )
73 {
74 keyword[strlen (keyword) - 1] = '\0';
75 value = &keyword[1];
76 }
77 else
78 value = keyword;
79 if (0 == strlen (value))
80 {
81 free (keyword);
82 return 0;
83 }
84 ret = proc (proc_cls,
85 "man",
86 type,
87 EXTRACTOR_METAFORMAT_UTF8,
88 "text/plain",
89 value,
90 strlen (value) + 1);
91 free (keyword);
92 return ret;
93 }
94
95
96 /**
97 * Find the end of the current token (which may be quoted).
98 *
99 * @param end beginning of the current token, updated to its end; set to size + 1 if the token does not end properly
100 * @param buf input buffer with the characters
101 * @param size number of bytes in buf
102 */
103 static void
104 find_end_of_token (size_t *end,
105 const char *buf,
106 const size_t size)
107 {
108 int quot;
109
110 quot = 0;
111 while ( (*end < size) &&
112 ( (0 != (quot & 1)) ||
113 ((' ' != buf[*end])) ) )
114 {
115 if ('\"' == buf[*end])
116 quot++;
117 (*end)++;
118 }
119 if (1 == (quot & 1))
120 (*end) = size + 1;
121 }
122
123
124 /**
125 * How many bytes do we actually try to scan? (from the beginning
126 * of the file).
127 */
128 #define MAX_READ (16 * 1024)
129
130
131 /**
132 * Add a keyword to LE.
133 *
134 * @param t type to use
135 * @param s keyword to give to LE
136 */
137 #define ADD(t,s) do { if (0 != add_keyword (t, s, ec->proc, ec->cls)) return; \
138 } while (0)
139
140
141 /**
142 * Main entry method for the man page extraction plugin.
143 *
144 * @param ec extraction context provided to the plugin
145 */
146 void
147 EXTRACTOR_man_extract_method (struct EXTRACTOR_ExtractContext *ec)
148 {
149 const size_t xlen = strlen (".TH ");
150 size_t pos;
151 size_t xsize;
152 size_t end;
153 void *data;
154 ssize_t size;
155 char *buf;
156
157 if (0 >= (size = ec->read (ec->cls, &data, MAX_READ)))
158 return;
159 buf = data;
160 pos = 0;
161 if (size < xlen)
162 return;
163 /* find actual beginning of the man page (.TH);
164 abort if we find non-printable characters */
165 while ( (pos < size - xlen) &&
166 ( (0 != strncmp (".TH ",
167 &buf[pos],
168 xlen)) ||
169 ( (0 != pos) &&
170 (buf[pos - 1] != '\n') ) ) )
171 {
172 if ( (! isgraph ((unsigned char) buf[pos])) &&
173 (! isspace ((unsigned char) buf[pos])) )
174 return;
175 pos++;
176 }
177 if (0 != strncmp (".TH ", &buf[pos], xlen))
178 return;
179
180 /* find end of ".TH"-line */
181 xsize = pos;
182 while ( (xsize < size) && ('\n' != buf[xsize]) )
183 xsize++;
184 /* limit processing to ".TH" line */
185 size = xsize;
186
187 /* skip over ".TH" */
188 pos += xlen;
189
190 /* first token is the title */
191 end = pos;
192 find_end_of_token (&end, buf, size);
193 if (end > size)
194 return;
195 if (end > pos)
196 {
197 ADD (EXTRACTOR_METATYPE_TITLE, stndup (&buf[pos], end - pos));
198 pos = end + 1;
199 }
200 if (pos >= size)
201 return;
202
203 /* next token is the section */
204 end = pos;
205 find_end_of_token (&end, buf, size);
206 if (end > size)
207 return;
208 if ('\"' == buf[pos])
209 pos++;
210 if ((end - pos >= 1) && (end - pos <= 4))
211 {
212 switch (buf[pos])
213 {
214 case '1':
215 ADD (EXTRACTOR_METATYPE_SECTION,
216 strdup (_ ("Commands")));
217 break;
218 case '2':
219 ADD (EXTRACTOR_METATYPE_SECTION,
220 strdup (_ ("System calls")));
221 break;
222 case '3':
223 ADD (EXTRACTOR_METATYPE_SECTION,
224 strdup (_ ("Library calls")));
225 break;
226 case '4':
227 ADD (EXTRACTOR_METATYPE_SECTION,
228 strdup (_ ("Special files")));
229 break;
230 case '5':
231 ADD (EXTRACTOR_METATYPE_SECTION,
232 strdup (_ ("File formats and conventions")));
233 break;
234 case '6':
235 ADD (EXTRACTOR_METATYPE_SECTION,
236 strdup (_ ("Games")));
237 break;
238 case '7':
239 ADD (EXTRACTOR_METATYPE_SECTION,
240 strdup (_ ("Conventions and miscellaneous")));
241 break;
242 case '8':
243 ADD (EXTRACTOR_METATYPE_SECTION,
244 strdup (_ ("System management commands")));
245 break;
246 case '9':
247 ADD (EXTRACTOR_METATYPE_SECTION,
248 strdup (_ ("Kernel routines")));
249 break;
250 default:
251 ADD (EXTRACTOR_METATYPE_SECTION,
252 stndup (&buf[pos], 1));
253 }
254 pos = end + 1;
255 }
256 end = pos;
257
258 /* next token is the modification date */
259 find_end_of_token (&end, buf, size);
260 if (end > size)
261 return;
262 if (end > pos)
263 {
264 ADD (EXTRACTOR_METATYPE_MODIFICATION_DATE, stndup (&buf[pos], end - pos));
265 pos = end + 1;
266 }
267
268 /* next token is the source of the man page */
269 end = pos;
270 find_end_of_token (&end, buf, size);
271 if (end > size)
272 return;
273 if (end > pos)
274 {
275 ADD (EXTRACTOR_METATYPE_SOURCE,
276 stndup (&buf[pos], end - pos));
277 pos = end + 1;
278 }
279
280 /* last token is the title of the book the man page belongs to */
281 end = pos;
282 find_end_of_token (&end, buf, size);
283 if (end > size)
284 return;
285 if (end > pos)
286 {
287 ADD (EXTRACTOR_METATYPE_BOOK_TITLE,
288 stndup (&buf[pos], end - pos));
289 pos = end + 1;
290 }
291 }
292
293
294 /* end of man_extractor.c */