libextractor
1.11
About: GNU libextractor is a library used to extract meta-data from files of arbitrary type.
![]() ![]() |
plugin to support OLE2 (DOC, XLS, etc.) files More...
#include "platform.h"
#include "extractor.h"
#include "convert.h"
#include <glib-object.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include <gsf/gsf-utils.h>
#include <gsf/gsf-input-impl.h>
#include <gsf/gsf-input-memory.h>
#include <gsf/gsf-impl-utils.h>
#include <gsf/gsf-infile.h>
#include <gsf/gsf-infile-msole.h>
#include <gsf/gsf-msole-utils.h>
Go to the source code of this file.
Data Structures | |
struct | Matches |
struct | ProcContext |
struct | _LeInputPrivate |
struct | _LeInput |
struct | _LeInputClass |
Macros | |
#define | USE_LE_INPUT 1 |
#define | __(a) dgettext ("iso-639", a) |
#define | LE_TYPE_INPUT (le_input_get_type ()) |
#define | LE_INPUT(obj) |
#define | LE_INPUT_CLASS(klass) |
#define | IS_LE_INPUT(obj) |
#define | IS_LE_INPUT_CLASS(klass) |
#define | LE_INPUT_GET_CLASS(obj) |
Typedefs | |
typedef struct _LeInputPrivate | LeInputPrivate |
typedef struct _LeInput | LeInput |
typedef struct _LeInputClass | LeInputClass |
Functions | |
static int | add_metadata (EXTRACTOR_MetaDataProcessor proc, void *proc_cls, const char *phrase, enum EXTRACTOR_MetaType type) |
static void | process_metadata (gpointer key, gpointer value, gpointer user_data) |
static int | process (GsfInput *in, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) |
static int | process_star_office (GsfInput *src, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) |
static const char * | lid_to_language (unsigned int lid) |
static int | history_extract (GsfInput *stream, unsigned int lcbSttbSavedBy, unsigned int fcSttbSavedBy, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) |
GsfInput * | le_input_new (struct EXTRACTOR_ExtractContext *ec) |
static void | le_input_class_init (LeInputClass *class) |
static void | le_input_init (LeInput *input) |
GSF_CLASS (LeInput, le_input, le_input_class_init, le_input_init, GSF_INPUT_TYPE) | |
static const guint8 * | le_input_read (GsfInput *input, size_t num_bytes, guint8 *optional_buffer) |
static gboolean | le_input_seek (GsfInput *input, gsf_off_t offset, GSeekType whence) |
void | EXTRACTOR_ole2_extract_method (struct EXTRACTOR_ExtractContext *ec) |
static void | nolog (const gchar *log_domain, GLogLevelFlags log_level, const gchar *message, gpointer user_data) |
void | ole2_ltdl_init () |
void | ole2_ltdl_fini () |
Variables | |
static struct Matches | tmap [] |
plugin to support OLE2 (DOC, XLS, etc.) files
Definition in file ole2_extractor.c.
#define __ | ( | a | ) | dgettext ("iso-639", a) |
We use "__" to translate using iso-639.
a | string to translate |
Definition at line 398 of file ole2_extractor.c.
#define IS_LE_INPUT | ( | obj | ) |
Definition at line 646 of file ole2_extractor.c.
#define IS_LE_INPUT_CLASS | ( | klass | ) |
Definition at line 648 of file ole2_extractor.c.
#define LE_INPUT | ( | obj | ) |
Definition at line 640 of file ole2_extractor.c.
#define LE_INPUT_CLASS | ( | klass | ) |
Definition at line 643 of file ole2_extractor.c.
#define LE_INPUT_GET_CLASS | ( | obj | ) |
Definition at line 650 of file ole2_extractor.c.
#define LE_TYPE_INPUT (le_input_get_type ()) |
Definition at line 639 of file ole2_extractor.c.
#define USE_LE_INPUT 1 |
Set to 1 to use our own GsfInput subclass which supports seeking and thus can handle very large files. Set to 0 to use the simple gsf in-memory buffer (which can only access the first ~16k) for debugging.
Definition at line 53 of file ole2_extractor.c.
typedef struct _LeInputClass LeInputClass |
LeInput's class state.
typedef struct _LeInputPrivate LeInputPrivate |
Internal state of an "LeInput" object.
|
static |
Give the given UTF8 string to LE by calling 'proc'.
proc | callback to invoke |
proc_cls | closure for proc |
phrase | metadata string to pass; may include spaces just double-quotes or just a space in a double quote; in those cases, nothing should be done |
type | meta data type to use |
Definition at line 68 of file ole2_extractor.c.
References EXTRACTOR_METAFORMAT_UTF8, NULL, and type.
Referenced by EXTRACTOR_ole2_extract_method(), history_extract(), process_metadata(), and process_star_office().
void EXTRACTOR_ole2_extract_method | ( | struct EXTRACTOR_ExtractContext * | ec | ) |
Main entry method for the OLE2 extraction plugin.
ec | extraction context provided to the plugin |
Definition at line 911 of file ole2_extractor.c.
References add_metadata(), EXTRACTOR_ExtractContext::cls, EXTRACTOR_METATYPE_LANGUAGE, EXTRACTOR_ExtractContext::get_size, history_extract(), le_input_new(), lid_to_language(), name, NULL, EXTRACTOR_ExtractContext::proc, process(), process_star_office(), EXTRACTOR_ExtractContext::read, and EXTRACTOR_ExtractContext::seek.
GSF_CLASS | ( | LeInput | , |
le_input | , | ||
le_input_class_init | , | ||
le_input_init | , | ||
GSF_INPUT_TYPE | |||
) |
Macro to create LeInput type definition and register the class.
Definition at line 733 of file ole2_extractor.c.
References NULL.
|
static |
Extract editing history from XTable stream.
stream | OLE stream to process |
lcSttbSavedBy | length of the revision history in bytes |
fcSttbSavedBy | offset of the revision history in the stream |
proc | function to call on meta data found |
proc_cls | closure for proc |
Definition at line 546 of file ole2_extractor.c.
References _, add_metadata(), EXTRACTOR_common_convert_to_utf8(), EXTRACTOR_METATYPE_REVISION_HISTORY, NULL, ProcContext::proc, ProcContext::proc_cls, and ProcContext::ret.
Referenced by EXTRACTOR_ole2_extract_method().
|
static |
Class initializer for the "LeInput" class.
class | class object to initialize |
Definition at line 848 of file ole2_extractor.c.
References le_input_read(), and le_input_seek().
|
static |
Initialize internal state of fresh input object.
input | object to initialize |
Definition at line 866 of file ole2_extractor.c.
References _LeInputPrivate::ec, LE_TYPE_INPUT, NULL, and _LeInput::priv.
GsfInput * le_input_new | ( | struct EXTRACTOR_ExtractContext * | ec | ) |
Constructor for LeInput objects.
ec | extraction context to use |
Creates a new LeInput object.
ec | extractor context to wrap |
Definition at line 885 of file ole2_extractor.c.
References EXTRACTOR_ExtractContext::cls, _LeInputPrivate::ec, EXTRACTOR_ExtractContext::get_size, _LeInput::input, LE_TYPE_INPUT, NULL, and _LeInput::priv.
Referenced by EXTRACTOR_ole2_extract_method().
|
static |
Read at least num_bytes. Does not change the current position if there is an error. Will only read if the entire amount can be read. Invalidates the buffer associated with previous calls to gsf_input_read.
input | |
num_bytes | |
optional_buffer |
Definition at line 767 of file ole2_extractor.c.
References EXTRACTOR_ExtractContext::cls, _LeInputPrivate::ec, LE_INPUT, NULL, _LeInput::priv, EXTRACTOR_ExtractContext::read, and EXTRACTOR_ExtractContext::seek.
Referenced by le_input_class_init().
|
static |
Move the current location in an input stream
input | stream to seek |
offset | target offset |
whence | determines to what the offset is relative to |
Definition at line 809 of file ole2_extractor.c.
References EXTRACTOR_ExtractContext::cls, _LeInputPrivate::ec, LE_INPUT, _LeInput::priv, and EXTRACTOR_ExtractContext::seek.
Referenced by le_input_class_init().
|
static |
Get the language string for the given language ID (lid) value.
lid | language id value |
Definition at line 409 of file ole2_extractor.c.
Referenced by EXTRACTOR_ole2_extract_method().
|
static |
Custom log function we give to GSF to disable logging.
log_domain | unused |
log_level | unused |
message | unused |
user_data | unused |
Definition at line 1030 of file ole2_extractor.c.
Referenced by ole2_ltdl_init().
void ole2_ltdl_fini | ( | ) |
OLE2 plugin destructor. Shutdown of gsf.
Definition at line 1063 of file ole2_extractor.c.
void ole2_ltdl_init | ( | ) |
OLE2 plugin constructor. Initializes glib and gsf, in particular gsf logging is disabled.
Definition at line 1044 of file ole2_extractor.c.
|
static |
Function called on (Document)SummaryInformation OLE streams.
in | the input OLE stream |
proc | function to call on meta data found |
proc_cls | closure for proc |
Definition at line 297 of file ole2_extractor.c.
References NULL, ProcContext::proc, ProcContext::proc_cls, process_metadata(), and ProcContext::ret.
Referenced by EXTRACTOR_ole2_extract_method().
|
static |
Function invoked by 'gst_msole_metadata_read' with metadata found in the document.
key | 'const char *' describing the meta data |
value | the UTF8 representation of the meta data |
user_data | our 'struct ProcContext' (closure) |
Definition at line 196 of file ole2_extractor.c.
References add_metadata(), EXTRACTOR_METATYPE_MIMETYPE, NULL, ProcContext::proc, ProcContext::proc_cls, ProcContext::ret, Matches::text, tmap, Matches::type, and type.
Referenced by process().
|
static |
Function called on SfxDocumentInfo OLE streams.
in | the input OLE stream |
proc | function to call on meta data found |
proc_cls | closure for proc |
Definition at line 339 of file ole2_extractor.c.
References add_metadata(), EXTRACTOR_METATYPE_COMMENT, EXTRACTOR_METATYPE_KEYWORDS, EXTRACTOR_METATYPE_SUBJECT, EXTRACTOR_METATYPE_TITLE, ProcContext::proc, and ProcContext::proc_cls.
Referenced by EXTRACTOR_ole2_extract_method().
|
static |
Definition at line 68 of file ole2_extractor.c.
Referenced by process_metadata().