libextractor
1.11
About: GNU libextractor is a library used to extract meta-data from files of arbitrary type.
![]() ![]() |
#include "platform.h"
#include "extractor.h"
#include <dirent.h>
#include <sys/types.h>
#include <signal.h>
#include <ltdl.h>
#include "extractor_datasource.h"
#include "extractor_ipc.h"
#include "extractor_logging.h"
#include "extractor_plugpath.h"
#include "extractor_plugins.h"
Go to the source code of this file.
Data Structures | |
struct | PluginReplyProcessor |
struct | InProcessContext |
Macros | |
#define | DEFAULT_SHM_SIZE (16 * 1024) |
Functions | |
static void | send_update_message (struct EXTRACTOR_PluginList *plugin, int64_t shm_off, size_t data_available, struct EXTRACTOR_Datasource *ds) |
static void | send_discard_message (struct EXTRACTOR_PluginList *plugin) |
static void | abort_all_channels (struct EXTRACTOR_PluginList *plugins) |
static void | process_plugin_reply (void *cls, struct EXTRACTOR_PluginList *plugin, enum EXTRACTOR_MetaType meta_type, enum EXTRACTOR_MetaFormat meta_format, const char *mime, const void *value, size_t value_len) |
static ssize_t | in_process_read (void *cls, void **data, size_t size) |
static int64_t | in_process_seek (void *cls, int64_t pos, int whence) |
static uint64_t | in_process_get_size (void *cls) |
static int | in_process_proc (void *cls, const char *plugin_name, enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, const char *data_mime_type, const char *data, size_t data_len) |
static void | do_extract (struct EXTRACTOR_PluginList *plugins, struct EXTRACTOR_SharedMemory *shm, struct EXTRACTOR_Datasource *ds, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) |
void | EXTRACTOR_extract (struct EXTRACTOR_PluginList *plugins, const char *filename, const void *data, size_t size, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) |
void | EXTRACTOR_ltdl_init () |
void | EXTRACTOR_ltdl_fini () |
#define DEFAULT_SHM_SIZE (16 * 1024) |
Size used for the shared memory segment.
Definition at line 37 of file extractor.c.
|
static |
We had some serious trouble. Abort all channels.
plugins | list of plugins with channels to abort |
Definition at line 128 of file extractor.c.
References EXTRACTOR_PluginList::channel, EXTRACTOR_IPC_channel_destroy_(), EXTRACTOR_PluginList::next, and NULL.
Referenced by do_extract().
|
static |
Extract keywords using the given set of plugins.
plugins | the list of plugins to use |
shm | shared memory object used by the plugins (NULL if all plugins are in-process) |
ds | data to process |
proc | function to call for each meta data item found |
proc_cls | cls argument to proc |
Definition at line 364 of file extractor.c.
References abort_all_channels(), EXTRACTOR_PluginList::channel, EXTRACTOR_ExtractContext::cls, EXTRACTOR_ExtractContext::config, DEFAULT_SHM_SIZE, InProcessContext::ds, EXTRACTOR_PluginList::extract_method, EXTRACTOR_datasource_get_size_(), EXTRACTOR_datasource_seek_(), EXTRACTOR_IPC_channel_destroy_(), EXTRACTOR_IPC_channel_recv_(), EXTRACTOR_IPC_channel_send_(), EXTRACTOR_IPC_shared_memory_set_(), EXTRACTOR_OPTION_IN_PROCESS, EXTRACTOR_plugin_load_(), PluginReplyProcessor::file_finished, StartMessage::file_size, InProcessContext::finished, EXTRACTOR_PluginList::flags, EXTRACTOR_ExtractContext::get_size, in_process_get_size(), in_process_proc(), in_process_read(), in_process_seek(), LOG, MESSAGE_EXTRACT_START, EXTRACTOR_PluginList::next, NULL, StartMessage::opcode, InProcessContext::plugin, EXTRACTOR_PluginList::plugin_options, EXTRACTOR_ExtractContext::proc, PluginReplyProcessor::proc, InProcessContext::proc, PluginReplyProcessor::proc_cls, InProcessContext::proc_cls, process_plugin_reply(), EXTRACTOR_ExtractContext::read, StartMessage::reserved, StartMessage::reserved2, EXTRACTOR_PluginList::round_finished, EXTRACTOR_ExtractContext::seek, EXTRACTOR_PluginList::seek_request, EXTRACTOR_PluginList::seek_whence, send_discard_message(), send_update_message(), EXTRACTOR_Channel::shm, and StartMessage::shm_ready_bytes.
Referenced by EXTRACTOR_extract().
void EXTRACTOR_extract | ( | struct EXTRACTOR_PluginList * | plugins, |
const char * | filename, | ||
const void * | data, | ||
size_t | size, | ||
EXTRACTOR_MetaDataProcessor | proc, | ||
void * | proc_cls | ||
) |
Extract keywords from a file using the given set of plugins. If needed, opens the file and loads its data (via mmap). Then decompresses it if the data is compressed. Finally runs the plugins on the (now possibly decompressed) data.
plugins | the list of plugins to use |
filename | the name of the file, can be NULL if data is not NULL |
data | data of the file in memory, can be NULL (in which case libextractor will open file) if filename is not NULL |
size | number of bytes in data, ignored if data is NULL |
proc | function to call for each meta data item found |
proc_cls | cls argument to proc |
Definition at line 597 of file extractor.c.
References EXTRACTOR_PluginList::channel, DEFAULT_SHM_SIZE, do_extract(), EXTRACTOR_datasource_create_from_buffer_(), EXTRACTOR_datasource_create_from_file_(), EXTRACTOR_datasource_destroy_(), EXTRACTOR_IPC_channel_create_(), EXTRACTOR_IPC_shared_memory_change_rc_(), EXTRACTOR_IPC_shared_memory_create_(), EXTRACTOR_OPTION_IN_PROCESS, EXTRACTOR_PluginList::flags, LOG, EXTRACTOR_PluginList::next, NULL, EXTRACTOR_PluginList::round_finished, and EXTRACTOR_PluginList::shm.
Referenced by main().
void EXTRACTOR_ltdl_fini | ( | ) |
Deinit.
Definition at line 696 of file extractor.c.
void EXTRACTOR_ltdl_init | ( | ) |
Initialize gettext and libltdl (and W32 if needed).
Definition at line 667 of file extractor.c.
References _, and bindtextdomain.
|
static |
Determine the overall size of the file. Callback used for in-process plugins.
cls | a struct InProcessContext |
Definition at line 300 of file extractor.c.
References InProcessContext::ds, and EXTRACTOR_datasource_get_size_().
Referenced by do_extract().
|
static |
Type of a function that libextractor calls for each meta data item found. Callback used for in-process plugins.
cls | a 'struct InProcessContext' |
plugin_name | name of the plugin that produced this value; special values can be used (i.e. '<zlib>' for zlib being used in the main libextractor library and yielding meta data). |
type | libextractor-type describing the meta data |
format | basic format information about data |
data_mime_type | mime-type of data (not of the original file); can be NULL (if mime-type is not known) |
data | actual meta-data found |
data_len | number of bytes in data |
Definition at line 327 of file extractor.c.
References InProcessContext::finished, InProcessContext::proc, InProcessContext::proc_cls, and type.
Referenced by do_extract().
|
static |
Obtain a pointer to up to size bytes of data from the file to process. Callback used for in-process plugins.
cls | a struct InProcessContext |
data | pointer to set to the file data, set to NULL on error |
size | maximum number of bytes requested |
Definition at line 246 of file extractor.c.
References InProcessContext::buf, InProcessContext::ds, EXTRACTOR_datasource_read_(), and NULL.
Referenced by do_extract().
|
static |
Seek in the file. Use 'SEEK_CUR' for whence and pos of 0 to obtain the current position in the file. Callback used for in-process plugins.
cls | a 'struct InProcessContext' |
pos | position to seek (see 'man lseek') |
whence | how to see (absolute to start, relative, absolute to end) |
Definition at line 280 of file extractor.c.
References InProcessContext::ds, and EXTRACTOR_datasource_seek_().
Referenced by do_extract().
|
static |
Handler for a message from one of the plugins.
cls | closure with our 'struct PluginReplyProcessor' |
plugin | plugin of the channel sending the message |
meta_type | type of the meta data |
meta_format | format of the meta data |
mime | mime string send from the plugin |
value | 'data' send from the plugin |
value_len | number of bytes in 'value' |
Definition at line 154 of file extractor.c.
References EXTRACTOR_PluginList::channel, EXTRACTOR_IPC_channel_destroy_(), EXTRACTOR_IPC_channel_send_(), PluginReplyProcessor::file_finished, LOG, MESSAGE_CONTINUE_EXTRACTING, NULL, PluginReplyProcessor::proc, PluginReplyProcessor::proc_cls, EXTRACTOR_PluginList::round_finished, send_discard_message(), and EXTRACTOR_PluginList::short_libname.
Referenced by do_extract().
|
static |
Send a 'discard state' message to the plugin and mark it as finished for this round.
plugin | plugin to notify |
Definition at line 105 of file extractor.c.
References EXTRACTOR_PluginList::channel, EXTRACTOR_IPC_channel_destroy_(), EXTRACTOR_IPC_channel_send_(), LOG, MESSAGE_DISCARD_STATE, NULL, and EXTRACTOR_PluginList::round_finished.
Referenced by do_extract(), and process_plugin_reply().
|
static |
Send an 'update' message to the plugin.
plugin | plugin to notify |
shm_off | new offset for the SHM |
data_available | number of bytes available in shm |
ds | datastore backend we are using |
Definition at line 72 of file extractor.c.
References EXTRACTOR_PluginList::channel, EXTRACTOR_datasource_get_size_(), EXTRACTOR_IPC_channel_destroy_(), EXTRACTOR_IPC_channel_send_(), UpdateMessage::file_size, LOG, MESSAGE_UPDATED_SHM, NULL, UpdateMessage::opcode, UpdateMessage::reserved, UpdateMessage::reserved2, EXTRACTOR_PluginList::round_finished, UpdateMessage::shm_off, and UpdateMessage::shm_ready_bytes.
Referenced by do_extract().