libextractor  1.11
About: GNU libextractor is a library used to extract meta-data from files of arbitrary type.
  Fossies Dox: libextractor-1.11.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

Loading...
Searching...
No Matches
extractor_plugin_main.c
Go to the documentation of this file.
1/*
2 This file is part of libextractor.
3 Copyright (C) 2012 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 Boston, MA 02110-1301, USA.
19 */
20/**
21 * @file main/extractor_plugin_main.c
22 * @brief main loop for an out-of-process plugin
23 * @author Christian Grothoff
24 */
25#include "platform.h"
26#include "extractor.h"
27#include "extractor_common.h"
29#include "extractor_plugins.h"
30#include "extractor_ipc.h"
31#include "extractor_logging.h"
33#include <dirent.h>
34#include <sys/types.h>
35#if GNU_LINUX
36#include <sys/wait.h>
37#include <sys/shm.h>
38#include <signal.h>
39#endif
40
41#if WINDOWS
42#define SHM_ID HANDLE
43#define INVALID_SHM_ID NULL
44#else
45#define SHM_ID int
46#define INVALID_SHM_ID -1
47#endif
48
49/**
50 * Closure we use for processing requests inside the helper process.
51 */
53{
54 /**
55 * Our plugin handle.
56 */
58
59 /**
60 * Shared memory area.
61 */
62 void *shm;
63
64 /**
65 * Overall size of the file.
66 */
67 uint64_t file_size;
68
69 /**
70 * Current read offset when reading from the SHM.
71 */
72 uint64_t read_position;
73
74 /**
75 * Current offset of the SHM in the file.
76 */
77 uint64_t shm_off;
78
79 /**
80 * Handle to the shared memory.
81 */
83
84 /**
85 * Size of the shared memory map.
86 */
87 uint32_t shm_map_size;
88
89 /**
90 * Number of bytes ready in SHM.
91 */
93
94 /**
95 * Input stream.
96 */
97 int in;
98
99 /**
100 * Output stream.
101 */
102 int out;
103};
104
105
106/**
107 * Moves current absolute buffer position to 'pos' in 'whence' mode.
108 * Will move logical position without shifting the buffer, if possible.
109 * Will not move beyond the end of file.
110 *
111 * @param plugin plugin context
112 * @param pos position to move to
113 * @param whence seek mode (SEEK_CUR, SEEK_SET, SEEK_END)
114 * @return new absolute position, -1 on error
115 */
116static int64_t
118 int64_t pos,
119 int whence)
120{
121 struct ProcessingContext *pc = cls;
122 struct SeekRequestMessage srm = {
124 };
125 struct UpdateMessage um;
126 uint64_t npos;
127 unsigned char reply;
128 uint16_t wval;
129
130 switch (whence)
131 {
132 case SEEK_CUR:
133 if ( (pos < 0) && (pc->read_position < -pos) )
134 {
135 LOG ("Invalid seek operation\n");
136 return -1;
137 }
138 if ((pos > 0) && ((pc->read_position + pos < pc->read_position) ||
139 (pc->read_position + pos > pc->file_size)))
140 {
141 LOG ("Invalid seek operation\n");
142 return -1;
143 }
144 npos = (uint64_t) (pc->read_position + pos);
145 wval = 0;
146 break;
147 case SEEK_END:
148 if (pos > 0)
149 {
150 LOG ("Invalid seek operation\n");
151 return -1;
152 }
153 if (UINT64_MAX == pc->file_size)
154 {
155 wval = 2;
156 npos = (uint64_t) -pos;
157 break;
158 }
159 pos = (int64_t) (pc->file_size + pos);
160 /* fall-through! */
161 case SEEK_SET:
162 if ( (pos < 0) || (pc->file_size < pos) )
163 {
164 LOG ("Invalid seek operation\n");
165 return -1;
166 }
167 npos = (uint64_t) pos;
168 wval = 0;
169 break;
170 default:
171 LOG ("Invalid seek operation\n");
172 return -1;
173 }
174 if ( (pc->shm_off <= npos) &&
175 (pc->shm_off + pc->shm_ready_bytes > npos) &&
176 (0 == wval) )
177 {
178 pc->read_position = npos;
179 return (int64_t) npos;
180 }
181 /* need to seek */
182 srm.opcode = MESSAGE_SEEK;
183 srm.reserved = 0;
184 srm.whence = wval;
186 if (0 == wval)
187 {
188 if (srm.requested_bytes > pc->file_size - npos)
189 srm.requested_bytes = pc->file_size - npos;
190 }
191 else
192 {
193 srm.requested_bytes = npos;
194 }
195 srm.file_offset = npos;
196 if (-1 == EXTRACTOR_write_all_ (pc->out, &srm, sizeof (srm)))
197 {
198 LOG ("Failed to send MESSAGE_SEEK\n");
199 return -1;
200 }
201 if (-1 ==
203 &reply, sizeof (reply)))
204 {
205 LOG ("Plugin `%s' failed to read response to MESSAGE_SEEK\n",
206 pc->plugin->short_libname);
207 return -1;
208 }
209 if (MESSAGE_UPDATED_SHM != reply)
210 {
211 LOG ("Unexpected reply %d to seek\n", reply);
212 return -1; /* was likely a MESSAGE_DISCARD_STATE */
213 }
214 if (-1 == EXTRACTOR_read_all_ (pc->in, &um.reserved, sizeof (um) - 1))
215 {
216 LOG ("Failed to read MESSAGE_UPDATED_SHM\n");
217 return -1;
218 }
219 pc->shm_off = um.shm_off;
221 pc->file_size = um.file_size;
222 if (2 == wval)
223 {
224 /* convert offset to be absolute from beginning of the file */
225 npos = pc->file_size - npos;
226 }
227 if ( (pc->shm_off <= npos) &&
228 ((pc->shm_off + pc->shm_ready_bytes > npos) ||
229 (pc->file_size == pc->shm_off)) )
230 {
231 pc->read_position = npos;
232 return (int64_t) npos;
233 }
234 /* oops, serious missunderstanding, we asked to seek
235 and then were notified about a different position!? */
236 LOG (
237 "Plugin `%s' got invalid MESSAGE_UPDATED_SHM in response to my %d-seek (%llu not in %llu-%llu)\n",
239 (int) wval,
240 (unsigned long long) npos,
241 (unsigned long long) pc->shm_off,
242 (unsigned long long) pc->shm_off + pc->shm_ready_bytes);
243 return -1;
244}
245
246
247/**
248 * Fills @a data with a pointer to the data buffer.
249 *
250 * @param plugin plugin context
251 * @param data location to store data pointer
252 * @param count number of bytes to read
253 * @return number of bytes (<= count) available in @a data, -1 on error
254 */
255static ssize_t
257 void **data,
258 size_t count)
259{
260 struct ProcessingContext *pc = cls;
261 unsigned char *dp;
262
263 *data = NULL;
264 if ( (count + pc->read_position > pc->file_size) ||
265 (count + pc->read_position < pc->read_position) )
266 count = pc->file_size - pc->read_position;
267 if ( ( ( (pc->read_position >= pc->shm_off + pc->shm_ready_bytes) &&
268 (pc->read_position < pc->file_size)) ||
269 (pc->read_position < pc->shm_off) ) &&
270 (-1 == plugin_env_seek (pc, pc->read_position, SEEK_SET) ) )
271 {
272 LOG ("Failed to seek to satisfy read\n");
273 return -1;
274 }
275 if (pc->read_position + count > pc->shm_off + pc->shm_ready_bytes)
276 count = pc->shm_off + pc->shm_ready_bytes - pc->read_position;
277 dp = pc->shm;
278 *data = &dp[pc->read_position - pc->shm_off];
279 pc->read_position += count;
280 return count;
281}
282
283
284/**
285 * Provide the overall file size to plugins.
286 *
287 * @param cls the 'struct ProcessingContext'
288 * @return overall file size of the current file
289 */
290static uint64_t
292{
293 struct ProcessingContext *pc = cls;
294
295 return pc->file_size;
296}
297
298
299/**
300 * Function called by a plugin in a child process. Transmits
301 * the meta data back to the parent process.
302 *
303 * @param cls closure, "struct ProcessingContext" with the FD for transmission
304 * @param plugin_name name of the plugin that produced this value;
305 * special values can be used (i.e. '<zlib>' for zlib being
306 * used in the main libextractor library and yielding
307 * meta data).
308 * @param type libextractor-type describing the meta data
309 * @param format basic format information about data
310 * @param data_mime_type mime-type of data (not of the original file);
311 * can be NULL (if mime-type is not known)
312 * @param data actual meta-data found
313 * @param data_len number of bytes in data
314 * @return 0 to continue extracting, 1 to abort (transmission error)
315 */
316static int
318 const char *plugin_name,
320 enum EXTRACTOR_MetaFormat format,
321 const char *data_mime_type,
322 const char *data,
323 size_t data_len)
324{
325 struct ProcessingContext *pc = cls;
326 struct MetaMessage mm;
327 size_t mime_len;
328 unsigned char reply;
329
330 if (data_len > MAX_META_DATA)
331 return 0; /* skip, too large */
332 if (NULL == data_mime_type)
333 mime_len = 0;
334 else
335 mime_len = strlen (data_mime_type) + 1;
336 if (mime_len > UINT16_MAX)
337 mime_len = UINT16_MAX;
338 mm.opcode = MESSAGE_META;
339 mm.reserved = 0;
340 mm.meta_type = type;
341 mm.meta_format = (uint16_t) format;
342 mm.mime_length = (uint16_t) mime_len;
343 mm.value_size = (uint32_t) data_len;
344 if ( (sizeof (mm) !=
346 &mm, sizeof (mm))) ||
347 (mime_len !=
349 data_mime_type, mime_len)) ||
350 (data_len !=
352 data, data_len)) )
353 {
354 LOG ("Failed to send meta message\n");
355 return 1;
356 }
357 if (-1 ==
359 &reply, sizeof (reply)))
360 {
361 LOG ("Failed to read response to meta message\n");
362 return 1;
363 }
364 if (MESSAGE_DISCARD_STATE == reply)
365 return 1;
366 if (MESSAGE_CONTINUE_EXTRACTING != reply)
367 {
368 LOG ("Received unexpected reply to meta data: %d\n", reply);
369 return 1;
370 }
371 return 0;
372}
373
374
375/**
376 * Handle an init message. The opcode itself has already been read.
377 *
378 * @param pc processing context
379 * @return 0 on success, -1 on error
380 */
381static int
383{
384 struct InitMessage init = {
386 };
387
388 if (NULL != pc->shm)
389 {
390 LOG ("Cannot handle 'init' message, have already been initialized\n");
391 return -1;
392 }
393 if (sizeof (struct InitMessage) - 1
394 != EXTRACTOR_read_all_ (pc->in,
395 &init.reserved,
396 sizeof (struct InitMessage) - 1))
397 {
398 LOG ("Failed to read 'init' message\n");
399 return -1;
400 }
401 if (init.shm_name_length > MAX_SHM_NAME)
402 {
403 LOG ("Invalid 'init' message\n");
404 return -1;
405 }
406 {
407 char shm_name[init.shm_name_length + 1];
408
409 if (init.shm_name_length
410 != EXTRACTOR_read_all_ (pc->in,
411 shm_name,
412 init.shm_name_length))
413 {
414 LOG ("Failed to read 'init' message\n");
415 return -1;
416 }
417 shm_name[init.shm_name_length] = '\0';
418
419 pc->shm_map_size = init.shm_map_size;
420#if WINDOWS
421 /* FIXME: storing pointer in an int */
422 pc->shm_id = OpenFileMapping (FILE_MAP_READ, FALSE, shm_name);
423 if (NULL == pc->shm_id)
424 return -1;
425 pc->shm = MapViewOfFile (pc->shm_id, FILE_MAP_READ, 0, 0, 0);
426 if (NULL == pc->shm)
427 {
428 CloseHandle (pc->shm_id);
429 return -1;
430 }
431#else
432 pc->shm_id = shm_open (shm_name, O_RDONLY, 0);
433 if (-1 == pc->shm_id)
434 {
435 LOG_STRERROR_FILE ("open", shm_name);
436 return -1;
437 }
438 pc->shm = mmap (NULL,
439 pc->shm_map_size,
440 PROT_READ,
441 MAP_SHARED,
442 pc->shm_id, 0);
443 if ( ((void*) -1) == pc->shm)
444 {
445 LOG_STRERROR_FILE ("mmap", shm_name);
446 return -1;
447 }
448#endif
449 }
450 return 0;
451}
452
453
454/**
455 * Handle a start message. The opcode itself has already been read.
456 *
457 * @param pc processing context
458 * @return 0 on success, -1 on error
459 */
460static int
462{
463 struct StartMessage start = {
465 };
466 struct EXTRACTOR_ExtractContext ec;
467 char done;
468
469 if (sizeof (struct StartMessage) - 1
470 != EXTRACTOR_read_all_ (pc->in,
471 &start.reserved,
472 sizeof (struct StartMessage) - 1))
473 {
474 LOG ("Failed to read 'start' message\n");
475 return -1;
476 }
478 pc->file_size = start.file_size;
479 pc->read_position = 0;
480 pc->shm_off = 0;
481 ec.cls = pc;
482 ec.config = pc->plugin->plugin_options;
483 ec.read = &plugin_env_read;
484 ec.seek = &plugin_env_seek;
487 pc->plugin->extract_method (&ec);
488 done = MESSAGE_DONE;
489 if (-1 == EXTRACTOR_write_all_ (pc->out, &done, sizeof (done)))
490 {
491 LOG ("Failed to write 'done' message\n");
492 return -1;
493 }
494 if ( (NULL != pc->plugin->specials) &&
495 (NULL != strstr (pc->plugin->specials, "force-kill")) )
496 {
497 /* we're required to die after each file since this
498 plugin only supports a single file at a time */
499#if ! WINDOWS
500 fsync (pc->out);
501#else
502 _commit (pc->out);
503#endif
504 _exit (0);
505 }
506 return 0;
507}
508
509
510/**
511 * Main loop function for plugins. Reads a message from the plugin
512 * input pipe and acts on it.
513 *
514 * @param pc processing context
515 */
516static void
518{
519 while (1)
520 {
521 unsigned char code;
522
523 if (1 != EXTRACTOR_read_all_ (pc->in, &code, 1))
524 {
525 LOG ("Failed to read next request\n");
526 break;
527 }
528 switch (code)
529 {
531 if (0 != handle_init_message (pc))
532 {
533 LOG ("Failure to handle INIT\n");
534 return;
535 }
536 break;
538 if (0 != handle_start_message (pc))
539 {
540 LOG ("Failure to handle START\n");
541 return;
542 }
543 break;
545 LOG ("Illegal message\n");
546 /* not allowed here, we're not waiting for SHM to move! */
547 return;
549 /* odd, we're already in the start state... */
550 continue;
551 default:
552 LOG ("Received invalid message %d\n", (int) code);
553 /* error, unexpected message */
554 return;
555 }
556 }
557}
558
559
560/**
561 * Open '/dev/null' and make the result the given
562 * file descriptor.
563 *
564 * @param target_fd desired FD to point to /dev/null
565 * @param flags open flags (O_RDONLY, O_WRONLY)
566 */
567static void
568open_dev_null (int target_fd,
569 int flags)
570{
571 int fd;
572
573#ifndef WINDOWS
574 fd = open ("/dev/null", flags);
575#else
576 fd = open ("\\\\?\\NUL", flags);
577#endif
578 if (-1 == fd)
579 {
580 LOG_STRERROR_FILE ("open", "/dev/null");
581 return; /* good luck */
582 }
583 if (fd == target_fd)
584 return; /* already done */
585 if (-1 == dup2 (fd, target_fd))
586 {
587 LOG_STRERROR ("dup2");
588 (void) close (fd);
589 return; /* good luck */
590 }
591 /* close original result from 'open' */
592 if (0 != close (fd))
593 LOG_STRERROR ("close");
594}
595
596
597/**
598 * 'main' function of the child process. Loads the plugin,
599 * sets up its in and out pipes, then runs the request serving function.
600 *
601 * @param plugin extractor plugin to use
602 * @param in stream to read from
603 * @param out stream to write to
604 */
605void
607 int in, int out)
608{
609 struct ProcessingContext pc;
610
612 {
613#if DEBUG
614 fprintf (stderr, "Plugin `%s' failed to load!\n",
616#endif
617 return;
618 }
619 if ( (NULL != plugin->specials) &&
620 (NULL != strstr (plugin->specials, "close-stderr")))
621 {
622 if (0 != close (2))
623 LOG_STRERROR ("close");
624 open_dev_null (2, O_WRONLY);
625 }
626 if ( (NULL != plugin->specials) &&
627 (NULL != strstr (plugin->specials, "close-stdout")))
628 {
629 if (0 != close (1))
630 LOG_STRERROR ("close");
631 open_dev_null (1, O_WRONLY);
632 }
633 pc.plugin = plugin;
634 pc.in = in;
635 pc.out = out;
637 pc.shm = NULL;
638 pc.shm_map_size = 0;
639 process_requests (&pc);
640 LOG ("IPC error; plugin `%s' terminates!\n",
642#if WINDOWS
643 if (NULL != pc.shm)
644 UnmapViewOfFile (pc.shm);
645 if (NULL != pc.shm_id)
646 CloseHandle (pc.shm_id);
647#else
648 if ( (NULL != pc.shm) &&
649 (((void*) 1) != pc.shm) )
650 munmap (pc.shm, pc.shm_map_size);
651 if (-1 != pc.shm_id)
652 {
653 if (0 != close (pc.shm_id))
654 LOG_STRERROR ("close");
655 }
656#endif
657}
658
659
660#if WINDOWS
661/**
662 * Reads plugin data from the LE server process.
663 * Also initializes allocation granularity (duh...).
664 *
665 * @param fd the pipe to read from
666 * @return newly allocated plugin context
667 */
668static struct EXTRACTOR_PluginList *
669read_plugin_data (int fd)
670{
671 struct EXTRACTOR_PluginList *ret;
672 SYSTEM_INFO si;
673 size_t i;
674
675 // FIXME: check for errors from 'EXTRACTOR_read_all_'!
676 if (NULL == (ret = malloc (sizeof (struct EXTRACTOR_PluginList))))
677 {
678 LOG_STRERROR ("malloc");
679 return NULL;
680 }
681 memset (ret, 0, sizeof (struct EXTRACTOR_PluginList));
682 /*GetSystemInfo (&si);
683 ret->allocation_granularity = si.dwAllocationGranularity;*/
684 EXTRACTOR_read_all_ (fd, &i, sizeof (size_t));
685 if (NULL == (ret->libname = malloc (i)))
686 {
687 free (ret);
688 return NULL;
689 }
690 EXTRACTOR_read_all_ (fd, ret->libname, i);
691 ret->libname[i - 1] = '\0';
692 EXTRACTOR_read_all_ (fd, &i, sizeof (size_t));
693 if (NULL == (ret->short_libname = malloc (i)))
694 {
695 free (ret->libname);
696 free (ret);
697 return NULL;
698 }
700 ret->short_libname[i - 1] = '\0';
701 EXTRACTOR_read_all_ (fd, &i, sizeof (size_t));
702 if (0 == i)
703 {
704 ret->plugin_options = NULL;
705 return ret;
706 }
707 if (NULL == (ret->plugin_options = malloc (i)))
708 {
709 free (ret->short_libname);
710 free (ret->libname);
711 free (ret);
712 return NULL;
713 }
715 ret->plugin_options[i - 1] = '\0';
716 return ret;
717}
718
719
720/**
721 * FIXME: document.
722 */
723void CALLBACK
724RundllEntryPoint (HWND hwnd,
725 HINSTANCE hinst,
726 LPSTR lpszCmdLine,
727 int nCmdShow)
728{
729 struct EXTRACTOR_PluginList *plugin;
730 intptr_t in_h;
731 intptr_t out_h;
732 int in;
733 int out;
734
735 sscanf (lpszCmdLine, "%lu %lu", &in_h, &out_h);
736 in = _open_osfhandle (in_h, _O_RDONLY);
737 out = _open_osfhandle (out_h, 0);
738 setmode (in, _O_BINARY);
739 setmode (out, _O_BINARY);
740 if (NULL == (plugin = read_plugin_data (in)))
741 {
742 close (in);
743 close (out);
744 return;
745 }
746 EXTRACTOR_plugin_main_ (plugin, in, out);
747 close (in);
748 close (out);
749 /* libgobject may crash us hard if we LoadLibrary() it directly or
750 * indirectly, and then exit normally (causing FreeLibrary() to be
751 * called by the OS) or call FreeLibrary() on it directly or
752 * indirectly.
753 * By terminating here we alleviate that problem.
754 */TerminateProcess (GetCurrentProcess (), 0);
755}
756
757
758/**
759 * FIXME: document.
760 */
761void CALLBACK
762RundllEntryPointA (HWND hwnd,
763 HINSTANCE hinst,
764 LPSTR lpszCmdLine,
765 int nCmdShow)
766{
767 return RundllEntryPoint (hwnd, hinst, lpszCmdLine, nCmdShow);
768}
769
770
771#endif
EXTRACTOR_MetaFormat
Definition: extractor.h:92
ssize_t EXTRACTOR_write_all_(int fd, const void *buf, size_t size)
ssize_t EXTRACTOR_read_all_(int fd, void *buf, size_t size)
commonly used functions within the library
random access and possibly decompression of data from buffer in memory or file on disk
IPC with plugin (OS-independent API)
#define MAX_SHM_NAME
Definition: extractor_ipc.h:82
#define MESSAGE_META
#define MESSAGE_SEEK
#define MESSAGE_DONE
#define MESSAGE_DISCARD_STATE
#define MAX_META_DATA
Definition: extractor_ipc.h:77
#define MESSAGE_INIT_STATE
Definition: extractor_ipc.h:87
#define MESSAGE_UPDATED_SHM
#define MESSAGE_EXTRACT_START
#define MESSAGE_CONTINUE_EXTRACTING
logging API for GNU libextractor
#define LOG(...)
#define LOG_STRERROR(syscall)
#define LOG_STRERROR_FILE(syscall, filename)
void EXTRACTOR_plugin_main_(struct EXTRACTOR_PluginList *plugin, int in, int out)
static ssize_t plugin_env_read(void *cls, void **data, size_t count)
static void process_requests(struct ProcessingContext *pc)
static int handle_init_message(struct ProcessingContext *pc)
#define INVALID_SHM_ID
static void open_dev_null(int target_fd, int flags)
static int plugin_env_send_proc(void *cls, const char *plugin_name, enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, const char *data_mime_type, const char *data, size_t data_len)
static int64_t plugin_env_seek(void *cls, int64_t pos, int whence)
#define SHM_ID
static int handle_start_message(struct ProcessingContext *pc)
static uint64_t plugin_env_get_size(void *cls)
int EXTRACTOR_plugin_load_(struct EXTRACTOR_PluginList *plugin)
code to load plugins
#define NULL
Definition: getopt1.c:60
EXTRACTOR_MetaType
Definition: extractor.h:126
enum EXTRACTOR_MetaType type
plaform specifics
int64_t(* seek)(void *cls, int64_t pos, int whence)
Definition: extractor.h:509
uint64_t(* get_size)(void *cls)
Definition: extractor.h:520
EXTRACTOR_MetaDataProcessor proc
Definition: extractor.h:525
ssize_t(* read)(void *cls, void **data, size_t size)
Definition: extractor.h:494
EXTRACTOR_extract_method extract_method
unsigned char reserved
uint32_t shm_map_size
unsigned char opcode
Definition: extractor_ipc.h:97
uint32_t shm_name_length
uint32_t value_size
uint16_t meta_type
uint16_t mime_length
uint16_t meta_format
unsigned char reserved
unsigned char opcode
struct EXTRACTOR_PluginList * plugin
uint32_t requested_bytes
unsigned char opcode
unsigned char reserved
unsigned char reserved
uint32_t shm_ready_bytes
unsigned char opcode
uint64_t file_size
uint32_t shm_ready_bytes
uint64_t shm_off
unsigned char reserved
uint64_t file_size