libextractor  1.11
About: GNU libextractor is a library used to extract meta-data from files of arbitrary type.
  Fossies Dox: libextractor-1.11.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

ole2_extractor.c File Reference

plugin to support OLE2 (DOC, XLS, etc.) files More...

#include "platform.h"
#include "extractor.h"
#include "convert.h"
#include <glib-object.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include <gsf/gsf-utils.h>
#include <gsf/gsf-input-impl.h>
#include <gsf/gsf-input-memory.h>
#include <gsf/gsf-impl-utils.h>
#include <gsf/gsf-infile.h>
#include <gsf/gsf-infile-msole.h>
#include <gsf/gsf-msole-utils.h>
Include dependency graph for ole2_extractor.c:

Go to the source code of this file.

Data Structures

struct  Matches
 
struct  ProcContext
 
struct  _LeInputPrivate
 
struct  _LeInput
 
struct  _LeInputClass
 

Macros

#define USE_LE_INPUT   1
 
#define __(a)   dgettext ("iso-639", a)
 
#define LE_TYPE_INPUT   (le_input_get_type ())
 
#define LE_INPUT(obj)
 
#define LE_INPUT_CLASS(klass)
 
#define IS_LE_INPUT(obj)
 
#define IS_LE_INPUT_CLASS(klass)
 
#define LE_INPUT_GET_CLASS(obj)
 

Typedefs

typedef struct _LeInputPrivate LeInputPrivate
 
typedef struct _LeInput LeInput
 
typedef struct _LeInputClass LeInputClass
 

Functions

static int add_metadata (EXTRACTOR_MetaDataProcessor proc, void *proc_cls, const char *phrase, enum EXTRACTOR_MetaType type)
 
static void process_metadata (gpointer key, gpointer value, gpointer user_data)
 
static int process (GsfInput *in, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
 
static int process_star_office (GsfInput *src, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
 
static const char * lid_to_language (unsigned int lid)
 
static int history_extract (GsfInput *stream, unsigned int lcbSttbSavedBy, unsigned int fcSttbSavedBy, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
 
GsfInput * le_input_new (struct EXTRACTOR_ExtractContext *ec)
 
static void le_input_class_init (LeInputClass *class)
 
static void le_input_init (LeInput *input)
 
 GSF_CLASS (LeInput, le_input, le_input_class_init, le_input_init, GSF_INPUT_TYPE)
 
static const guint8 * le_input_read (GsfInput *input, size_t num_bytes, guint8 *optional_buffer)
 
static gboolean le_input_seek (GsfInput *input, gsf_off_t offset, GSeekType whence)
 
void EXTRACTOR_ole2_extract_method (struct EXTRACTOR_ExtractContext *ec)
 
static void nolog (const gchar *log_domain, GLogLevelFlags log_level, const gchar *message, gpointer user_data)
 
void ole2_ltdl_init ()
 
void ole2_ltdl_fini ()
 

Variables

static struct Matches tmap []
 

Detailed Description

plugin to support OLE2 (DOC, XLS, etc.) files

Author
Christian Grothoff

Definition in file ole2_extractor.c.

Macro Definition Documentation

◆ __

#define __ (   a)    dgettext ("iso-639", a)

We use "__" to translate using iso-639.

Parameters
astring to translate
Returns
translated string

Definition at line 398 of file ole2_extractor.c.

◆ IS_LE_INPUT

#define IS_LE_INPUT (   obj)
Value:
(G_TYPE_CHECK_INSTANCE_TYPE ((obj), \
#define LE_TYPE_INPUT

Definition at line 646 of file ole2_extractor.c.

◆ IS_LE_INPUT_CLASS

#define IS_LE_INPUT_CLASS (   klass)
Value:
(G_TYPE_CHECK_CLASS_TYPE ((klass), \

Definition at line 648 of file ole2_extractor.c.

◆ LE_INPUT

#define LE_INPUT (   obj)
Value:
(G_TYPE_CHECK_INSTANCE_CAST ((obj), \

Definition at line 640 of file ole2_extractor.c.

◆ LE_INPUT_CLASS

#define LE_INPUT_CLASS (   klass)
Value:
(G_TYPE_CHECK_CLASS_CAST ((klass), \

Definition at line 643 of file ole2_extractor.c.

◆ LE_INPUT_GET_CLASS

#define LE_INPUT_GET_CLASS (   obj)
Value:
(G_TYPE_INSTANCE_GET_CLASS ((obj), \

Definition at line 650 of file ole2_extractor.c.

◆ LE_TYPE_INPUT

#define LE_TYPE_INPUT   (le_input_get_type ())

Definition at line 639 of file ole2_extractor.c.

◆ USE_LE_INPUT

#define USE_LE_INPUT   1

Set to 1 to use our own GsfInput subclass which supports seeking and thus can handle very large files. Set to 0 to use the simple gsf in-memory buffer (which can only access the first ~16k) for debugging.

Definition at line 53 of file ole2_extractor.c.

Typedef Documentation

◆ LeInput

typedef struct _LeInput LeInput

Overall state of an "LeInput" object.

◆ LeInputClass

typedef struct _LeInputClass LeInputClass

LeInput's class state.

◆ LeInputPrivate

Internal state of an "LeInput" object.

Function Documentation

◆ add_metadata()

static int add_metadata ( EXTRACTOR_MetaDataProcessor  proc,
void *  proc_cls,
const char *  phrase,
enum EXTRACTOR_MetaType  type 
)
static

Give the given UTF8 string to LE by calling 'proc'.

Parameters
proccallback to invoke
proc_clsclosure for proc
phrasemetadata string to pass; may include spaces just double-quotes or just a space in a double quote; in those cases, nothing should be done
typemeta data type to use
Returns
if 'proc' returned 1, otherwise 0

Definition at line 68 of file ole2_extractor.c.

References EXTRACTOR_METAFORMAT_UTF8, NULL, and type.

Referenced by EXTRACTOR_ole2_extract_method(), history_extract(), process_metadata(), and process_star_office().

◆ EXTRACTOR_ole2_extract_method()

void EXTRACTOR_ole2_extract_method ( struct EXTRACTOR_ExtractContext ec)

◆ GSF_CLASS()

GSF_CLASS ( LeInput  ,
le_input  ,
le_input_class_init  ,
le_input_init  ,
GSF_INPUT_TYPE   
)

Macro to create LeInput type definition and register the class.

Definition at line 733 of file ole2_extractor.c.

References NULL.

◆ history_extract()

static int history_extract ( GsfInput *  stream,
unsigned int  lcbSttbSavedBy,
unsigned int  fcSttbSavedBy,
EXTRACTOR_MetaDataProcessor  proc,
void *  proc_cls 
)
static

Extract editing history from XTable stream.

Parameters
streamOLE stream to process
lcSttbSavedBylength of the revision history in bytes
fcSttbSavedByoffset of the revision history in the stream
procfunction to call on meta data found
proc_clsclosure for proc
Returns
0 to continue to extract, 1 if we are done

Definition at line 546 of file ole2_extractor.c.

References _, add_metadata(), EXTRACTOR_common_convert_to_utf8(), EXTRACTOR_METATYPE_REVISION_HISTORY, NULL, ProcContext::proc, ProcContext::proc_cls, and ProcContext::ret.

Referenced by EXTRACTOR_ole2_extract_method().

◆ le_input_class_init()

static void le_input_class_init ( LeInputClass class)
static

Class initializer for the "LeInput" class.

Parameters
classclass object to initialize

Definition at line 848 of file ole2_extractor.c.

References le_input_read(), and le_input_seek().

◆ le_input_init()

static void le_input_init ( LeInput input)
static

Initialize internal state of fresh input object.

Parameters
inputobject to initialize

Definition at line 866 of file ole2_extractor.c.

References _LeInputPrivate::ec, LE_TYPE_INPUT, NULL, and _LeInput::priv.

◆ le_input_new()

GsfInput * le_input_new ( struct EXTRACTOR_ExtractContext ec)

Constructor for LeInput objects.

Parameters
ecextraction context to use
Returns
the LeInput, NULL on error

Creates a new LeInput object.

Parameters
ecextractor context to wrap
Returns
NULL on error

Definition at line 885 of file ole2_extractor.c.

References EXTRACTOR_ExtractContext::cls, _LeInputPrivate::ec, EXTRACTOR_ExtractContext::get_size, _LeInput::input, LE_TYPE_INPUT, NULL, and _LeInput::priv.

Referenced by EXTRACTOR_ole2_extract_method().

◆ le_input_read()

static const guint8* le_input_read ( GsfInput *  input,
size_t  num_bytes,
guint8 *  optional_buffer 
)
static

Read at least num_bytes. Does not change the current position if there is an error. Will only read if the entire amount can be read. Invalidates the buffer associated with previous calls to gsf_input_read.

Parameters
input
num_bytes
optional_buffer
Returns
buffer where num_bytes data are available, or NULL on error

Definition at line 767 of file ole2_extractor.c.

References EXTRACTOR_ExtractContext::cls, _LeInputPrivate::ec, LE_INPUT, NULL, _LeInput::priv, EXTRACTOR_ExtractContext::read, and EXTRACTOR_ExtractContext::seek.

Referenced by le_input_class_init().

◆ le_input_seek()

static gboolean le_input_seek ( GsfInput *  input,
gsf_off_t  offset,
GSeekType  whence 
)
static

Move the current location in an input stream

Parameters
inputstream to seek
offsettarget offset
whencedetermines to what the offset is relative to
Returns
TRUE on error

Definition at line 809 of file ole2_extractor.c.

References EXTRACTOR_ExtractContext::cls, _LeInputPrivate::ec, LE_INPUT, _LeInput::priv, and EXTRACTOR_ExtractContext::seek.

Referenced by le_input_class_init().

◆ lid_to_language()

static const char* lid_to_language ( unsigned int  lid)
static

Get the language string for the given language ID (lid) value.

Parameters
lidlanguage id value
Returns
language string corresponding to the lid

Definition at line 409 of file ole2_extractor.c.

References _, __, and NULL.

Referenced by EXTRACTOR_ole2_extract_method().

◆ nolog()

static void nolog ( const gchar *  log_domain,
GLogLevelFlags  log_level,
const gchar *  message,
gpointer  user_data 
)
static

Custom log function we give to GSF to disable logging.

Parameters
log_domainunused
log_levelunused
messageunused
user_dataunused

Definition at line 1030 of file ole2_extractor.c.

Referenced by ole2_ltdl_init().

◆ ole2_ltdl_fini()

void ole2_ltdl_fini ( )

OLE2 plugin destructor. Shutdown of gsf.

Definition at line 1063 of file ole2_extractor.c.

◆ ole2_ltdl_init()

void ole2_ltdl_init ( )

OLE2 plugin constructor. Initializes glib and gsf, in particular gsf logging is disabled.

Definition at line 1044 of file ole2_extractor.c.

References nolog(), and NULL.

◆ process()

static int process ( GsfInput *  in,
EXTRACTOR_MetaDataProcessor  proc,
void *  proc_cls 
)
static

Function called on (Document)SummaryInformation OLE streams.

Parameters
inthe input OLE stream
procfunction to call on meta data found
proc_clsclosure for proc
Returns
0 to continue to extract, 1 if we are done

Definition at line 297 of file ole2_extractor.c.

References NULL, ProcContext::proc, ProcContext::proc_cls, process_metadata(), and ProcContext::ret.

Referenced by EXTRACTOR_ole2_extract_method().

◆ process_metadata()

static void process_metadata ( gpointer  key,
gpointer  value,
gpointer  user_data 
)
static

Function invoked by 'gst_msole_metadata_read' with metadata found in the document.

Parameters
key'const char *' describing the meta data
valuethe UTF8 representation of the meta data
user_dataour 'struct ProcContext' (closure)

Definition at line 196 of file ole2_extractor.c.

References add_metadata(), EXTRACTOR_METATYPE_MIMETYPE, NULL, ProcContext::proc, ProcContext::proc_cls, ProcContext::ret, Matches::text, tmap, Matches::type, and type.

Referenced by process().

◆ process_star_office()

static int process_star_office ( GsfInput *  src,
EXTRACTOR_MetaDataProcessor  proc,
void *  proc_cls 
)
static

Function called on SfxDocumentInfo OLE streams.

Parameters
inthe input OLE stream
procfunction to call on meta data found
proc_clsclosure for proc
Returns
0 to continue to extract, 1 if we are done

Definition at line 339 of file ole2_extractor.c.

References add_metadata(), EXTRACTOR_METATYPE_COMMENT, EXTRACTOR_METATYPE_KEYWORDS, EXTRACTOR_METATYPE_SUBJECT, EXTRACTOR_METATYPE_TITLE, ProcContext::proc, and ProcContext::proc_cls.

Referenced by EXTRACTOR_ole2_extract_method().

Variable Documentation

◆ tmap

struct Matches tmap[]
static

Definition at line 68 of file ole2_extractor.c.

Referenced by process_metadata().