libextractor  1.11
About: GNU libextractor is a library used to extract meta-data from files of arbitrary type.
  Fossies Dox: libextractor-1.11.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

extractor_ipc.h
Go to the documentation of this file.
1 /*
2  This file is part of libextractor.
3  Copyright (C) 2012 Vidyut Samanta and Christian Grothoff
4 
5  libextractor is free software; you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published
7  by the Free Software Foundation; either version 3, or (at your
8  option) any later version.
9 
10  libextractor is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with libextractor; see the file COPYING. If not, write to the
17  Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18  Boston, MA 02110-1301, USA.
19  */
20 /**
21  * @file main/extractor_ipc.h
22  * @brief IPC with plugin (OS-independent API)
23  * @author Christian Grothoff
24  *
25  * @detail
26  * The IPC communication between plugins and the main library works
27  * as follows. Each message begins with a 1-character opcode which
28  * specifies the message type. The main library starts the plugins
29  * by forking the helper process and establishes two pipes for
30  * communication in both directions.
31  * First, the main library send an 'INIT_STATE' message
32  * to the plugin. The start message specifies the name (and size)
33  * of a shared memory segment which will contain parts of the (uncompressed)
34  * data of the file that is being processed. The same shared memory
35  * segment is used throughout the lifetime of the plugin.
36  *
37  * Then, the following messages are exchanged for each file.
38  * First, an EXTRACT_START message is sent with the specific
39  * size of the file (or -1 if unknown) and the number of bytes
40  * ready in the shared memory segment. The plugin then answers
41  * with either:
42  * 1) MESSAGE_DONE to indicate that no further processing is
43  * required for this file; the IPC continues with the
44  * EXTRACT_START message for the next file afterwards;
45  * 2) MESSAGE_SEEK to indicate that the plugin would like to
46  * read data at a different offset; the main library can
47  * then either
48  * a) respond with a MESSAGE_DISCARD_STATE to
49  * tell the plugin to abort processing (the next message will
50  * then be another EXTRACT_START)
51  * b) respond with a MESSAGE_UPDATED_SHM which notifies the
52  * plugin that the shared memory segment was moved to a
53  * different location in the overall file; the target of the
54  * seek should now be within the new range (but does NOT have
55  * to be at the beginning of the seek)
56  * 3) MESSAGE_META to provide extracted meta data to the main
57  * library. The main library can then either:
58  * a) respond with a MESSAGE_DISCARD_STATE to
59  * tell the plugin to abort processing (the next message will
60  * then be another EXTRACT_START)
61  * b) respond with a MESSAGE_CONTINUE_EXTRACTING to
62  * tell the plugin to continue extracting meta data; in this
63  * case, the plugin is then expected to produce another
64  * MESSAGE_DONE, MESSAGE_SEEK or MESSAGE_META round of messages.
65  */
66 #ifndef EXTRACTOR_IPC_H
67 #define EXTRACTOR_IPC_H
68 
69 #include "extractor_datasource.h"
70 
71 
72 /**
73  * How long do we allow an individual meta data object to be?
74  * Used to guard against (broken) plugns causing us to use
75  * excessive amounts of memory.
76  */
77 #define MAX_META_DATA 32 * 1024 * 1024
78 
79 /**
80  * Maximum length of a shared memory object name
81  */
82 #define MAX_SHM_NAME 255
83 
84 /**
85  * Sent from LE to a plugin to initialize it (opens shm).
86  */
87 #define MESSAGE_INIT_STATE 0x00
88 
89 /**
90  * IPC message send to plugin to initialize SHM.
91  */
93 {
94  /**
95  * Set to #MESSAGE_INIT_STATE.
96  */
97  unsigned char opcode;
98 
99  /**
100  * Always zero.
101  */
102  unsigned char reserved;
103 
104  /**
105  * Always zero.
106  */
107  uint16_t reserved2;
108 
109  /**
110  * Name of the shared-memory name.
111  */
112  uint32_t shm_name_length;
113 
114  /**
115  * Maximum size of the shm map.
116  */
117  uint32_t shm_map_size;
118 
119  /* followed by name of the SHM */
120 };
121 
122 
123 /**
124  * Sent from LE to a plugin to tell it extracting
125  * can now start. The SHM will point to offset 0
126  * of the file.
127  */
128 #define MESSAGE_EXTRACT_START 0x01
129 
130 /**
131  * IPC message send to plugin to start extracting.
132  */
134 {
135  /**
136  * Set to #MESSAGE_EXTRACT_START.
137  */
138  unsigned char opcode;
139 
140  /**
141  * Always zero.
142  */
143  unsigned char reserved;
144 
145  /**
146  * Always zero.
147  */
148  uint16_t reserved2;
149 
150  /**
151  * Number of bytes ready in SHM.
152  */
153  uint32_t shm_ready_bytes;
154 
155  /**
156  * Overall size of the file.
157  */
158  uint64_t file_size;
159 
160 };
161 
162 /**
163  * Sent from LE to a plugin to tell it that shm contents
164  * were updated.
165  */
166 #define MESSAGE_UPDATED_SHM 0x02
167 
168 /**
169  * IPC message send to plugin to notify it about a change in the SHM.
170  */
172 {
173  /**
174  * Set to #MESSAGE_UPDATED_SHM.
175  */
176  unsigned char opcode;
177 
178  /**
179  * Always zero.
180  */
181  unsigned char reserved;
182 
183  /**
184  * Always zero.
185  */
186  uint16_t reserved2;
187 
188  /**
189  * Number of bytes ready in SHM.
190  */
191  uint32_t shm_ready_bytes;
192 
193  /**
194  * Offset of the shm in the overall file.
195  */
196  uint64_t shm_off;
197 
198  /**
199  * Overall size of the file.
200  */
201  uint64_t file_size;
202 
203 };
204 
205 /**
206  * Sent from plugin to LE to tell LE that plugin is done
207  * analyzing current file and will send no more data.
208  * No message format as this is only one byte.
209  */
210 #define MESSAGE_DONE 0x03
211 
212 /**
213  * Sent from plugin to LE to tell LE that plugin needs
214  * to read a different part of the source file.
215  */
216 #define MESSAGE_SEEK 0x04
217 
218 /**
219  * IPC message send to plugin to start extracting.
220  */
222 {
223  /**
224  * Set to #MESSAGE_SEEK.
225  */
226  unsigned char opcode;
227 
228  /**
229  * Always zero.
230  */
231  unsigned char reserved;
232 
233  /**
234  * 'whence' value for the seek operation;
235  * 0 = SEEK_SET, 1 = SEEK_CUR, 2 = SEEK_END.
236  * Note that 'SEEK_CUR' is never used here.
237  */
238  uint16_t whence;
239 
240  /**
241  * Number of bytes requested for SHM.
242  */
243  uint32_t requested_bytes;
244 
245  /**
246  * Requested offset; a positive value from the end of the
247  * file is used of 'whence' is SEEK_END; a postive value
248  * from the start is used of 'whence' is SEEK_SET.
249  * 'SEEK_CUR' is never used.
250  */
251  uint64_t file_offset;
252 
253 };
254 
255 /**
256  * Sent from plugin to LE to tell LE about metadata discovered.
257  */
258 #define MESSAGE_META 0x05
259 
260 /**
261  * Plugin to parent: metadata discovered
262  */
264 {
265  /**
266  * Set to #MESSAGE_META.
267  */
268  unsigned char opcode;
269 
270  /**
271  * Always zero.
272  */
273  unsigned char reserved;
274 
275  /**
276  * An 'enum EXTRACTOR_MetaFormat' in 16 bits.
277  */
278  uint16_t meta_format;
279 
280  /**
281  * An 'enum EXTRACTOR_MetaType' in 16 bits.
282  */
283  uint16_t meta_type;
284 
285  /**
286  * Length of the mime type string.
287  */
288  uint16_t mime_length;
289 
290  /**
291  * Size of the value.
292  */
293  uint32_t value_size;
294 
295  /* followed by mime_length bytes of 0-terminated
296  mime-type (unless mime_length is 0) */
297 
298  /* followed by value_size bytes of value */
299 
300 };
301 
302 /**
303  * Sent from LE to plugin to make plugin discard its state
304  * (extraction aborted by application). Only one byte.
305  * Plugin should get ready for next 'StartMessage' after this.
306  * (sent in response to META data or SEEK requests).
307  */
308 #define MESSAGE_DISCARD_STATE 0x06
309 
310 /**
311  * Sent from LE to plugin to make plugin continue extraction.
312  * (sent in response to META data).
313  */
314 #define MESSAGE_CONTINUE_EXTRACTING 0x07
315 
316 
317 /**
318  * Definition of an IPC communication channel with
319  * some plugin.
320  */
321 struct EXTRACTOR_Channel;
322 
323 /**
324  * Definition of a shared memory area.
325  */
327 
328 
329 /**
330  * Create a shared memory area.
331  *
332  * @param size size of the shared area
333  * @return NULL on error
334  */
335 struct EXTRACTOR_SharedMemory *
337 
338 
339 /**
340  * Destroy shared memory area.
341  *
342  * @param shm memory area to destroy
343  * @return NULL on error
344  */
345 void
347 
348 
349 /**
350  * Change the reference counter for this shm instance.
351  *
352  * @param shm instance to update
353  * @param delta value to change RC by
354  * @return new RC
355  */
356 unsigned int
358  int delta);
359 
360 
361 /**
362  * Initialize shared memory area from data source.
363  *
364  * @param shm memory area to initialize
365  * @param ds data source to use for initialization
366  * @param off offset to use in data source
367  * @param size number of bytes to copy
368  * @return -1 on error, otherwise number of bytes copied
369  */
370 ssize_t
372  struct EXTRACTOR_Datasource *ds,
373  uint64_t off,
374  size_t size);
375 
376 
377 /**
378  * Query datasource for current position
379  *
380  * @param ds data source to query
381  * @return current position in the datasource or UINT_MAX on error
382  */
383 uint64_t
385 
386 
387 /**
388  * Create a channel to communicate with a process wrapping
389  * the plugin of the given name. Starts the process as well.
390  *
391  * @param plugin the plugin
392  * @param shm memory to share with the process
393  * @return NULL on error, otherwise IPC channel
394  */
395 struct EXTRACTOR_Channel *
397  struct EXTRACTOR_SharedMemory *shm);
398 
399 
400 /**
401  * Destroy communication channel with a plugin/process. Also
402  * destroys the process.
403  *
404  * @param channel channel to communicate with the plugin
405  */
406 void
408 
409 
410 /**
411  * Send data via the given IPC channel (blocking).
412  *
413  * @param channel channel to communicate with the plugin
414  * @param buf data to send
415  * @param size number of bytes in buf to send
416  * @return -1 on error, number of bytes sent on success
417  * (never does partial writes)
418  */
419 ssize_t
421  const void *data,
422  size_t size);
423 
424 
425 /**
426  * Handler for a message from one of the plugins.
427  *
428  * @param cls closure
429  * @param plugin plugin of the channel sending the message
430  * @param meta_type type of the meta data
431  * @param meta_format format of the meta data
432  * @param mime mime string send from the plugin
433  * @param value 'data' send from the plugin
434  * @param value_len number of bytes in 'value'
435  */
436 typedef void (*EXTRACTOR_ChannelMessageProcessor) (void *cls,
437  struct EXTRACTOR_PluginList *
438  plugin,
439  enum EXTRACTOR_MetaType
440  meta_type,
442  meta_format,
443  const char *mime,
444  const void *value,
445  size_t value_len);
446 
447 
448 /**
449  * Process a reply from channel (seek request, metadata and done message)
450  *
451  * @param plugin plugin this communication is about
452  * @param buf buffer with data from IPC channel
453  * @param size number of bytes in buffer
454  * @param proc metadata callback
455  * @param proc_cls callback cls
456  * @return number of bytes processed, -1 on error
457  */
458 ssize_t
460  const void *data,
461  size_t size,
463  void *proc_cls);
464 
465 
466 /**
467  * Receive data from any of the given IPC channels (blocking).
468  * Wait for one of the plugins to reply.
469  *
470  * @param channels array of channels, channels that break may be set to NULL
471  * @param num_channels length of the 'channels' array
472  * @param proc function to call to process messages (may be called
473  * more than once)
474  * @param proc_cls closure for 'proc'
475  * @return -1 on error (i.e. no response in 10s), 1 on success
476  */
477 int
479  unsigned int num_channels,
481  void *proc_cls);
482 
483 
484 #endif
EXTRACTOR_MetaFormat
Definition: extractor.h:92
random access and possibly decompression of data from buffer in memory or file on disk
struct EXTRACTOR_SharedMemory * EXTRACTOR_IPC_shared_memory_create_(size_t size)
struct EXTRACTOR_Channel * EXTRACTOR_IPC_channel_create_(struct EXTRACTOR_PluginList *plugin, struct EXTRACTOR_SharedMemory *shm)
unsigned int EXTRACTOR_IPC_shared_memory_change_rc_(struct EXTRACTOR_SharedMemory *shm, int delta)
ssize_t EXTRACTOR_IPC_channel_send_(struct EXTRACTOR_Channel *channel, const void *data, size_t size)
void EXTRACTOR_IPC_shared_memory_destroy_(struct EXTRACTOR_SharedMemory *shm)
ssize_t EXTRACTOR_IPC_shared_memory_set_(struct EXTRACTOR_SharedMemory *shm, struct EXTRACTOR_Datasource *ds, uint64_t off, size_t size)
void(* EXTRACTOR_ChannelMessageProcessor)(void *cls, struct EXTRACTOR_PluginList *plugin, enum EXTRACTOR_MetaType meta_type, enum EXTRACTOR_MetaFormat meta_format, const char *mime, const void *value, size_t value_len)
ssize_t EXTRACTOR_IPC_process_reply_(struct EXTRACTOR_PluginList *plugin, const void *data, size_t size, EXTRACTOR_ChannelMessageProcessor proc, void *proc_cls)
Definition: extractor_ipc.c:42
int EXTRACTOR_IPC_channel_recv_(struct EXTRACTOR_Channel **channels, unsigned int num_channels, EXTRACTOR_ChannelMessageProcessor proc, void *proc_cls)
void EXTRACTOR_IPC_channel_destroy_(struct EXTRACTOR_Channel *channel)
uint64_t EXTRACTOR_datasource_get_pos_(struct EXTRACTOR_Datasource *ds)
EXTRACTOR_MetaType
Definition: extractor.h:126
struct EXTRACTOR_SharedMemory * shm
struct EXTRACTOR_PluginList * plugin
uint16_t reserved2
unsigned char reserved
uint32_t shm_map_size
unsigned char opcode
Definition: extractor_ipc.h:97
uint32_t shm_name_length
uint32_t value_size
uint16_t meta_type
uint16_t mime_length
uint16_t meta_format
unsigned char reserved
unsigned char opcode
uint32_t requested_bytes
unsigned char opcode
unsigned char reserved
uint16_t reserved2
unsigned char reserved
uint32_t shm_ready_bytes
unsigned char opcode
uint64_t file_size
unsigned char opcode
uint32_t shm_ready_bytes
uint64_t shm_off
unsigned char reserved
uint16_t reserved2
uint64_t file_size