libextractor
1.11
About: GNU libextractor is a library used to extract meta-data from files of arbitrary type.
![]() ![]() |
#include <stdint.h>
#include <stdio.h>
Go to the source code of this file.
Data Structures | |
struct | EXTRACTOR_ExtractContext |
Macros | |
#define | EXTRACTOR_VERSION 0x010B0000 |
#define | _EXTRACTOR_EXTERN extern |
Typedefs | |
typedef int(* | EXTRACTOR_MetaDataProcessor) (void *cls, const char *plugin_name, enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, const char *data_mime_type, const char *data, size_t data_len) |
typedef void(* | EXTRACTOR_extract_method) (struct EXTRACTOR_ExtractContext *ec) |
#define _EXTRACTOR_EXTERN extern |
Definition at line 49 of file extractor.h.
#define EXTRACTOR_VERSION 0x010B0000 |
0.2.6-1 => 0x00020601 4.5.2-0 => 0x04050200
Definition at line 38 of file extractor.h.
typedef void(* EXTRACTOR_extract_method) (struct EXTRACTOR_ExtractContext *ec) |
Signature of the extract method that each plugin must provide.
ec | extraction context provided to the plugin |
Definition at line 536 of file extractor.h.
typedef int(* EXTRACTOR_MetaDataProcessor) (void *cls, const char *plugin_name, enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, const char *data_mime_type, const char *data, size_t data_len) |
Type of a function that libextractor calls for each meta data item found.
cls | closure (user-defined) |
plugin_name | name of the plugin that produced this value; special values can be used (i.e. '<zlib>' for zlib being used in the main libextractor library and yielding meta data). |
type | libextractor-type describing the meta data |
format | basic format information about data |
data_mime_type | mime-type of data (not of the original file); can be NULL (if mime-type is not known) |
data | actual meta-data found |
data_len | number of bytes in data |
Definition at line 459 of file extractor.h.
enum EXTRACTOR_MetaFormat |
Format in which the extracted meta data is presented.
Definition at line 91 of file extractor.h.
enum EXTRACTOR_Options |
Options for how plugin execution should be done.
Definition at line 56 of file extractor.h.
void EXTRACTOR_extract | ( | struct EXTRACTOR_PluginList * | plugins, |
const char * | filename, | ||
const void * | data, | ||
size_t | size, | ||
EXTRACTOR_MetaDataProcessor | proc, | ||
void * | proc_cls | ||
) |
Extract keywords from a file using the given set of plugins.
plugins | the list of plugins to use |
filename | the name of the file, can be NULL if data is not NULL |
data | data of the file in memory, can be NULL (in which case libextractor will open file) if filename is not NULL |
size | number of bytes in data, ignored if data is NULL |
proc | function to call for each meta data item found |
proc_cls | cls argument to proc |
Extract keywords from a file using the given set of plugins. If needed, opens the file and loads its data (via mmap). Then decompresses it if the data is compressed. Finally runs the plugins on the (now possibly decompressed) data.
plugins | the list of plugins to use |
filename | the name of the file, can be NULL if data is not NULL |
data | data of the file in memory, can be NULL (in which case libextractor will open file) if filename is not NULL |
size | number of bytes in data, ignored if data is NULL |
proc | function to call for each meta data item found |
proc_cls | cls argument to proc |
Definition at line 597 of file extractor.c.
References EXTRACTOR_PluginList::channel, DEFAULT_SHM_SIZE, do_extract(), EXTRACTOR_datasource_create_from_buffer_(), EXTRACTOR_datasource_create_from_file_(), EXTRACTOR_datasource_destroy_(), EXTRACTOR_IPC_channel_create_(), EXTRACTOR_IPC_shared_memory_change_rc_(), EXTRACTOR_IPC_shared_memory_create_(), EXTRACTOR_OPTION_IN_PROCESS, EXTRACTOR_PluginList::flags, LOG, EXTRACTOR_PluginList::next, NULL, EXTRACTOR_PluginList::round_finished, and EXTRACTOR_PluginList::shm.
Referenced by main().
int EXTRACTOR_meta_data_print | ( | void * | handle, |
const char * | plugin_name, | ||
enum EXTRACTOR_MetaType | type, | ||
enum EXTRACTOR_MetaFormat | format, | ||
const char * | data_mime_type, | ||
const char * | data, | ||
size_t | data_len | ||
) |
Simple EXTRACTOR_MetaDataProcessor implementation that simply prints the extracted meta data to the given file. Only prints those keywords that are in UTF-8 format.
handle | the file to write to (stdout , stderr ), must NOT be NULL, must be of type FILE * . |
plugin_name | name of the plugin that produced this value |
type | libextractor-type describing the meta data |
format | basic format information about data |
data_mime_type | mime-type of data (not of the original file); can be NULL (if mime-type is not known) |
data | actual meta-data found |
data_len | number of bytes in data |
Simple EXTRACTOR_MetaDataProcessor implementation that simply prints the extracted meta data to the given file. Only prints those keywords that are in UTF-8 format.
handle | the file to write to (stdout, stderr), must NOT be NULL, must be of type "FILE *". |
plugin_name | name of the plugin that produced this value |
type | libextractor-type describing the meta data |
format | basic format information about data |
data_mime_type | mime-type of data (not of the original file); can be NULL (if mime-type is not known) |
data | actual meta-data found |
data_len | number of bytes in data |
Definition at line 49 of file extractor_print.c.
References dgettext, EXTRACTOR_METAFORMAT_UTF8, EXTRACTOR_metatype_to_string(), gettext_noop, iconv_helper(), LOG_STRERROR, NULL, and type.
struct EXTRACTOR_PluginList * EXTRACTOR_plugin_add | ( | struct EXTRACTOR_PluginList * | prev, |
const char * | library, | ||
const char * | options, | ||
enum EXTRACTOR_Options | flags | ||
) |
Add a library for keyword extraction.
prev | the previous list of libraries, may be NULL |
library | the name of the library (short handle, i.e. "mime") |
options | options to give to the library |
flags | options to use |
Add a library for keyword extraction.
prev | the previous list of libraries, may be NULL |
library | the name of the library |
options | options to pass to the plugin |
flags | options to use |
Definition at line 206 of file extractor_plugins.c.
References EXTRACTOR_find_plugin_(), EXTRACTOR_PluginList::flags, EXTRACTOR_PluginList::libname, LOG, EXTRACTOR_PluginList::next, NULL, EXTRACTOR_PluginList::plugin_options, EXTRACTOR_PluginList::seek_request, and EXTRACTOR_PluginList::short_libname.
Referenced by EXTRACTOR_plugin_add_config(), and load_plugins_from_dir().
struct EXTRACTOR_PluginList * EXTRACTOR_plugin_add_config | ( | struct EXTRACTOR_PluginList * | prev, |
const char * | config, | ||
enum EXTRACTOR_Options | flags | ||
) |
Load multiple libraries as specified by the user.
config | a string given by the user that defines which libraries should be loaded. Has the format "[[-]LIBRARYNAME[(options)][:[-]LIBRARYNAME[(options)]]]*". For example, 'mp3:ogg' loads the mp3 and the ogg plugins. The '-' before the LIBRARYNAME indicates that the library should be removed from the library list. |
prev | the previous list of libraries, may be NULL |
flags | options to use |
Load multiple libraries as specified by the user.
config | a string given by the user that defines which libraries should be loaded. Has the format "[[-]LIBRARYNAME[(options)][:[-]LIBRARYNAME[(options)]]]*". For example, 'mp3:ogg.so' loads the mp3 and the ogg library. The '-' before the LIBRARYNAME indicates that the library should be removed from the library list. |
prev | the previous list of libraries, may be NULL |
flags | options to use |
Definition at line 261 of file extractor_plugins.c.
References ABORT, EXTRACTOR_plugin_add(), EXTRACTOR_plugin_remove(), EXTRACTOR_PluginList::flags, and NULL.
Referenced by EXTRACTOR_plugin_add_defaults(), and main().
struct EXTRACTOR_PluginList * EXTRACTOR_plugin_add_defaults | ( | enum EXTRACTOR_Options | flags | ) |
Load the default set of plugins. The default can be changed by setting the LIBEXTRACTOR_LIBRARIES environment variable; If it is set to "env", then this function will return EXTRACTOR_plugin_add_config (NULL, env, flags).
If LIBEXTRACTOR_LIBRARIES is not set, the function will attempt to locate the installed plugins and load all of them. The directory where the code will search for plugins is typically automatically determined; it can be specified explicitly using the "LIBEXTRACTOR_PREFIX" environment variable.
This environment variable must be set to the precise directory with the plugins (i.e. "/usr/lib/libextractor", not "/usr"). Note that setting the environment variable will disable all of the methods that are typically used to determine the location of plugins. Multiple paths can be specified using ':' to separate them.
flags | options for all of the plugins loaded |
Load the default set of plugins. The default can be changed by setting the LIBEXTRACTOR_LIBRARIES environment variable. If it is set to "env", then this function will return #EXTRACTOR_plugin_add_config(NULL, env, flags). Otherwise, it will load all of the installed plugins and return them.
flags | options for all of the plugins loaded |
Definition at line 683 of file extractor_plugpath.c.
References EXTRACTOR_plugin_add_config(), DefaultLoaderContext::flags, get_installation_paths(), getenv(), load_plugins_from_dir(), NULL, and DefaultLoaderContext::res.
Referenced by main().
struct EXTRACTOR_PluginList * EXTRACTOR_plugin_remove | ( | struct EXTRACTOR_PluginList * | prev, |
const char * | library | ||
) |
Remove a plugin from a list.
prev | the current list of plugins |
library | the name of the plugin to remove (short handle) |
Remove a plugin from a list.
prev | the current list of plugins |
library | the name of the plugin to remove |
Definition at line 342 of file extractor_plugins.c.
References EXTRACTOR_PluginList::channel, EXTRACTOR_IPC_channel_destroy_(), EXTRACTOR_IPC_shared_memory_change_rc_(), EXTRACTOR_IPC_shared_memory_destroy_(), EXTRACTOR_PluginList::libname, EXTRACTOR_PluginList::libraryHandle, LOG, EXTRACTOR_PluginList::next, NULL, EXTRACTOR_PluginList::plugin_options, EXTRACTOR_PluginList::shm, and EXTRACTOR_PluginList::short_libname.
Referenced by EXTRACTOR_plugin_add_config(), and EXTRACTOR_plugin_remove_all().
void EXTRACTOR_plugin_remove_all | ( | struct EXTRACTOR_PluginList * | plugins | ) |
Remove all plugins from the given list (destroys the list).
plugin | the list of plugins |
Definition at line 390 of file extractor_plugins.c.
References EXTRACTOR_plugin_remove(), NULL, and EXTRACTOR_PluginList::short_libname.
Referenced by main().