libextractor  1.11
About: GNU libextractor is a library used to extract meta-data from files of arbitrary type.
  Fossies Dox: libextractor-1.11.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

extractor.c File Reference
#include "platform.h"
#include "extractor.h"
#include <dirent.h>
#include <sys/types.h>
#include <signal.h>
#include <ltdl.h>
#include "extractor_datasource.h"
#include "extractor_ipc.h"
#include "extractor_logging.h"
#include "extractor_plugpath.h"
#include "extractor_plugins.h"
Include dependency graph for extractor.c:

Go to the source code of this file.

Data Structures

struct  PluginReplyProcessor
 
struct  InProcessContext
 

Macros

#define DEFAULT_SHM_SIZE   (16 * 1024)
 

Functions

static void send_update_message (struct EXTRACTOR_PluginList *plugin, int64_t shm_off, size_t data_available, struct EXTRACTOR_Datasource *ds)
 
static void send_discard_message (struct EXTRACTOR_PluginList *plugin)
 
static void abort_all_channels (struct EXTRACTOR_PluginList *plugins)
 
static void process_plugin_reply (void *cls, struct EXTRACTOR_PluginList *plugin, enum EXTRACTOR_MetaType meta_type, enum EXTRACTOR_MetaFormat meta_format, const char *mime, const void *value, size_t value_len)
 
static ssize_t in_process_read (void *cls, void **data, size_t size)
 
static int64_t in_process_seek (void *cls, int64_t pos, int whence)
 
static uint64_t in_process_get_size (void *cls)
 
static int in_process_proc (void *cls, const char *plugin_name, enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, const char *data_mime_type, const char *data, size_t data_len)
 
static void do_extract (struct EXTRACTOR_PluginList *plugins, struct EXTRACTOR_SharedMemory *shm, struct EXTRACTOR_Datasource *ds, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
 
void EXTRACTOR_extract (struct EXTRACTOR_PluginList *plugins, const char *filename, const void *data, size_t size, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
 
void EXTRACTOR_ltdl_init ()
 
void EXTRACTOR_ltdl_fini ()
 

Macro Definition Documentation

◆ DEFAULT_SHM_SIZE

#define DEFAULT_SHM_SIZE   (16 * 1024)

Size used for the shared memory segment.

Definition at line 37 of file extractor.c.

Function Documentation

◆ abort_all_channels()

static void abort_all_channels ( struct EXTRACTOR_PluginList plugins)
static

We had some serious trouble. Abort all channels.

Parameters
pluginslist of plugins with channels to abort

Definition at line 128 of file extractor.c.

References EXTRACTOR_PluginList::channel, EXTRACTOR_IPC_channel_destroy_(), EXTRACTOR_PluginList::next, and NULL.

Referenced by do_extract().

◆ do_extract()

static void do_extract ( struct EXTRACTOR_PluginList plugins,
struct EXTRACTOR_SharedMemory shm,
struct EXTRACTOR_Datasource ds,
EXTRACTOR_MetaDataProcessor  proc,
void *  proc_cls 
)
static

Extract keywords using the given set of plugins.

Parameters
pluginsthe list of plugins to use
shmshared memory object used by the plugins (NULL if all plugins are in-process)
dsdata to process
procfunction to call for each meta data item found
proc_clscls argument to proc

Definition at line 364 of file extractor.c.

References abort_all_channels(), EXTRACTOR_PluginList::channel, EXTRACTOR_ExtractContext::cls, EXTRACTOR_ExtractContext::config, DEFAULT_SHM_SIZE, InProcessContext::ds, EXTRACTOR_PluginList::extract_method, EXTRACTOR_datasource_get_size_(), EXTRACTOR_datasource_seek_(), EXTRACTOR_IPC_channel_destroy_(), EXTRACTOR_IPC_channel_recv_(), EXTRACTOR_IPC_channel_send_(), EXTRACTOR_IPC_shared_memory_set_(), EXTRACTOR_OPTION_IN_PROCESS, EXTRACTOR_plugin_load_(), PluginReplyProcessor::file_finished, StartMessage::file_size, InProcessContext::finished, EXTRACTOR_PluginList::flags, EXTRACTOR_ExtractContext::get_size, in_process_get_size(), in_process_proc(), in_process_read(), in_process_seek(), LOG, MESSAGE_EXTRACT_START, EXTRACTOR_PluginList::next, NULL, StartMessage::opcode, InProcessContext::plugin, EXTRACTOR_PluginList::plugin_options, EXTRACTOR_ExtractContext::proc, PluginReplyProcessor::proc, InProcessContext::proc, PluginReplyProcessor::proc_cls, InProcessContext::proc_cls, process_plugin_reply(), EXTRACTOR_ExtractContext::read, StartMessage::reserved, StartMessage::reserved2, EXTRACTOR_PluginList::round_finished, EXTRACTOR_ExtractContext::seek, EXTRACTOR_PluginList::seek_request, EXTRACTOR_PluginList::seek_whence, send_discard_message(), send_update_message(), EXTRACTOR_Channel::shm, and StartMessage::shm_ready_bytes.

Referenced by EXTRACTOR_extract().

◆ EXTRACTOR_extract()

void EXTRACTOR_extract ( struct EXTRACTOR_PluginList plugins,
const char *  filename,
const void *  data,
size_t  size,
EXTRACTOR_MetaDataProcessor  proc,
void *  proc_cls 
)

Extract keywords from a file using the given set of plugins. If needed, opens the file and loads its data (via mmap). Then decompresses it if the data is compressed. Finally runs the plugins on the (now possibly decompressed) data.

Parameters
pluginsthe list of plugins to use
filenamethe name of the file, can be NULL if data is not NULL
datadata of the file in memory, can be NULL (in which case libextractor will open file) if filename is not NULL
sizenumber of bytes in data, ignored if data is NULL
procfunction to call for each meta data item found
proc_clscls argument to proc

Definition at line 597 of file extractor.c.

References EXTRACTOR_PluginList::channel, DEFAULT_SHM_SIZE, do_extract(), EXTRACTOR_datasource_create_from_buffer_(), EXTRACTOR_datasource_create_from_file_(), EXTRACTOR_datasource_destroy_(), EXTRACTOR_IPC_channel_create_(), EXTRACTOR_IPC_shared_memory_change_rc_(), EXTRACTOR_IPC_shared_memory_create_(), EXTRACTOR_OPTION_IN_PROCESS, EXTRACTOR_PluginList::flags, LOG, EXTRACTOR_PluginList::next, NULL, EXTRACTOR_PluginList::round_finished, and EXTRACTOR_PluginList::shm.

Referenced by main().

◆ EXTRACTOR_ltdl_fini()

void EXTRACTOR_ltdl_fini ( )

Deinit.

Definition at line 696 of file extractor.c.

◆ EXTRACTOR_ltdl_init()

void EXTRACTOR_ltdl_init ( )

Initialize gettext and libltdl (and W32 if needed).

Definition at line 667 of file extractor.c.

References _, and bindtextdomain.

◆ in_process_get_size()

static uint64_t in_process_get_size ( void *  cls)
static

Determine the overall size of the file. Callback used for in-process plugins.

Parameters
clsa struct InProcessContext
Returns
overall file size, UINT64_MAX on error (i.e. IPC failure)

Definition at line 300 of file extractor.c.

References InProcessContext::ds, and EXTRACTOR_datasource_get_size_().

Referenced by do_extract().

◆ in_process_proc()

static int in_process_proc ( void *  cls,
const char *  plugin_name,
enum EXTRACTOR_MetaType  type,
enum EXTRACTOR_MetaFormat  format,
const char *  data_mime_type,
const char *  data,
size_t  data_len 
)
static

Type of a function that libextractor calls for each meta data item found. Callback used for in-process plugins.

Parameters
clsa 'struct InProcessContext'
plugin_namename of the plugin that produced this value; special values can be used (i.e. '<zlib>' for zlib being used in the main libextractor library and yielding meta data).
typelibextractor-type describing the meta data
formatbasic format information about data
data_mime_typemime-type of data (not of the original file); can be NULL (if mime-type is not known)
dataactual meta-data found
data_lennumber of bytes in data
Returns
0 to continue extracting, 1 to abort

Definition at line 327 of file extractor.c.

References InProcessContext::finished, InProcessContext::proc, InProcessContext::proc_cls, and type.

Referenced by do_extract().

◆ in_process_read()

static ssize_t in_process_read ( void *  cls,
void **  data,
size_t  size 
)
static

Obtain a pointer to up to size bytes of data from the file to process. Callback used for in-process plugins.

Parameters
clsa struct InProcessContext
datapointer to set to the file data, set to NULL on error
sizemaximum number of bytes requested
Returns
number of bytes now available in data (can be smaller than size), -1 on error

Definition at line 246 of file extractor.c.

References InProcessContext::buf, InProcessContext::ds, EXTRACTOR_datasource_read_(), and NULL.

Referenced by do_extract().

◆ in_process_seek()

static int64_t in_process_seek ( void *  cls,
int64_t  pos,
int  whence 
)
static

Seek in the file. Use 'SEEK_CUR' for whence and pos of 0 to obtain the current position in the file. Callback used for in-process plugins.

Parameters
clsa 'struct InProcessContext'
posposition to seek (see 'man lseek')
whencehow to see (absolute to start, relative, absolute to end)
Returns
new absolute position, -1 on error (i.e. desired position does not exist)

Definition at line 280 of file extractor.c.

References InProcessContext::ds, and EXTRACTOR_datasource_seek_().

Referenced by do_extract().

◆ process_plugin_reply()

static void process_plugin_reply ( void *  cls,
struct EXTRACTOR_PluginList plugin,
enum EXTRACTOR_MetaType  meta_type,
enum EXTRACTOR_MetaFormat  meta_format,
const char *  mime,
const void *  value,
size_t  value_len 
)
static

Handler for a message from one of the plugins.

Parameters
clsclosure with our 'struct PluginReplyProcessor'
pluginplugin of the channel sending the message
meta_typetype of the meta data
meta_formatformat of the meta data
mimemime string send from the plugin
value'data' send from the plugin
value_lennumber of bytes in 'value'

Definition at line 154 of file extractor.c.

References EXTRACTOR_PluginList::channel, EXTRACTOR_IPC_channel_destroy_(), EXTRACTOR_IPC_channel_send_(), PluginReplyProcessor::file_finished, LOG, MESSAGE_CONTINUE_EXTRACTING, NULL, PluginReplyProcessor::proc, PluginReplyProcessor::proc_cls, EXTRACTOR_PluginList::round_finished, send_discard_message(), and EXTRACTOR_PluginList::short_libname.

Referenced by do_extract().

◆ send_discard_message()

static void send_discard_message ( struct EXTRACTOR_PluginList plugin)
static

Send a 'discard state' message to the plugin and mark it as finished for this round.

Parameters
pluginplugin to notify

Definition at line 105 of file extractor.c.

References EXTRACTOR_PluginList::channel, EXTRACTOR_IPC_channel_destroy_(), EXTRACTOR_IPC_channel_send_(), LOG, MESSAGE_DISCARD_STATE, NULL, and EXTRACTOR_PluginList::round_finished.

Referenced by do_extract(), and process_plugin_reply().

◆ send_update_message()

static void send_update_message ( struct EXTRACTOR_PluginList plugin,
int64_t  shm_off,
size_t  data_available,
struct EXTRACTOR_Datasource ds 
)
static

Send an 'update' message to the plugin.

Parameters
pluginplugin to notify
shm_offnew offset for the SHM
data_availablenumber of bytes available in shm
dsdatastore backend we are using

Definition at line 72 of file extractor.c.

References EXTRACTOR_PluginList::channel, EXTRACTOR_datasource_get_size_(), EXTRACTOR_IPC_channel_destroy_(), EXTRACTOR_IPC_channel_send_(), UpdateMessage::file_size, LOG, MESSAGE_UPDATED_SHM, NULL, UpdateMessage::opcode, UpdateMessage::reserved, UpdateMessage::reserved2, EXTRACTOR_PluginList::round_finished, UpdateMessage::shm_off, and UpdateMessage::shm_ready_bytes.

Referenced by do_extract().