libextractor  1.11
About: GNU libextractor is a library used to extract meta-data from files of arbitrary type.
  Fossies Dox: libextractor-1.11.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

extract.c File Reference

command-line tool to run GNU libextractor More...

#include "platform.h"
#include "extractor.h"
#include "getopt.h"
#include <signal.h>
Include dependency graph for extract.c:

Go to the source code of this file.

Data Structures

struct  Help
 
struct  BibTexMap
 

Macros

#define YES   1
 
#define NO   0
 
#define BORDER   29
 

Functions

static void ignore_sigpipe ()
 
static void format_help (const char *general, const char *description, const struct Help *opt)
 
static void print_help ()
 
static int print_selected_keywords (void *cls, const char *plugin_name, enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, const char *data_mime_type, const char *data, size_t data_len)
 
static int print_selected_keywords_grep_friendly (void *cls, const char *plugin_name, enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, const char *data_mime_type, const char *data, size_t data_len)
 
static void cleanup_bibtex ()
 
static int print_bibtex (void *cls, const char *plugin_name, enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, const char *data_mime_type, const char *data, size_t data_len)
 
static void finish_bibtex (const char *fn)
 
static char ** _make_continuous_arg_copy (int argc, char *const *argv)
 
static int _get_utf8_args (int argc, char *const *argv, int *u8argc, char ***u8argv)
 
int main (int argc, char *argv[])
 

Variables

static int * print
 
static int verbose
 
static int in_process
 
static int from_memory
 
static char * entry_type
 
static struct BibTexMap btm []
 

Detailed Description

command-line tool to run GNU libextractor

Author
Christian Grothoff

Definition in file extract.c.

Macro Definition Documentation

◆ BORDER

#define BORDER   29

Indentation for descriptions.

Definition at line 111 of file extract.c.

◆ NO

#define NO   0

Definition at line 31 of file extract.c.

◆ YES

#define YES   1

Definition at line 30 of file extract.c.

Function Documentation

◆ _get_utf8_args()

static int _get_utf8_args ( int  argc,
char *const argv,
int *  u8argc,
char ***  u8argv 
)
static

Returns utf-8 encoded arguments. Returned argv has u8argv[u8argc] == NULL. Returned argv is a single memory block, and can be freed with a single free () call.

Parameters
argcargc (as given by main())
argvargv (as given by main())
u8argca location to store new argc in (though it's th same as argc)
u8argva location to store new argv in
Returns
0 on success, -1 on failure

Definition at line 690 of file extract.c.

References _make_continuous_arg_copy(), and NULL.

Referenced by main().

◆ _make_continuous_arg_copy()

static char** _make_continuous_arg_copy ( int  argc,
char *const argv 
)
static

Makes a copy of argv that consists of a single memory chunk that can be freed with a single call to free ();

Definition at line 654 of file extract.c.

References NULL.

Referenced by _get_utf8_args().

◆ cleanup_bibtex()

static void cleanup_bibtex ( )
static

Clean up the bibtex processor in preparation for the next round.

Definition at line 498 of file extract.c.

References BibTexMap::bibTexName, btm, entry_type, NULL, and BibTexMap::value.

Referenced by main().

◆ finish_bibtex()

static void finish_bibtex ( const char *  fn)
static

Print the computed bibTeX entry.

Parameters
fnfile for which the entry was created.

Definition at line 564 of file extract.c.

References BibTexMap::bibTexName, btm, entry_type, NULL, and BibTexMap::value.

Referenced by main().

◆ format_help()

static void format_help ( const char *  general,
const char *  description,
const struct Help opt 
)
static

Display help text (–help).

Parameters
generalbinary name
descriptionprogram description
optprogram options (NULL-terminated array)

Definition at line 122 of file extract.c.

References _, BORDER, gettext, and NULL.

Referenced by print_help().

◆ ignore_sigpipe()

static void ignore_sigpipe ( )
static

Install a signal handler to ignore SIGPIPE.

Definition at line 59 of file extract.c.

Referenced by main().

◆ main()

int main ( int  argc,
char *  argv[] 
)

◆ print_bibtex()

static int print_bibtex ( void *  cls,
const char *  plugin_name,
enum EXTRACTOR_MetaType  type,
enum EXTRACTOR_MetaFormat  format,
const char *  data_mime_type,
const char *  data,
size_t  data_len 
)
static

Callback function for printing meta data in bibtex format.

Parameters
clsclosure, not used
plugin_namename of the plugin that produced this value; special values can be used (i.e. '<zlib>' for zlib being used in the main libextractor library and yielding meta data).
typelibextractor-type describing the meta data
formatbasic format information about data
data_mime_typemime-type of data (not of the original file); can be NULL (if mime-type is not known)
dataactual meta-data found
data_lennumber of bytes in data
Returns
0 to continue extracting (always)

Definition at line 529 of file extract.c.

References BibTexMap::bibTexName, btm, entry_type, EXTRACTOR_METAFORMAT_UTF8, EXTRACTOR_METATYPE_BIBTEX_ENTRY_TYPE, BibTexMap::le_type, NULL, print, type, BibTexMap::value, and YES.

Referenced by main().

◆ print_help()

static void print_help ( )
static

Run –help.

Definition at line 215 of file extract.c.

References _, format_help(), gettext_noop, and NULL.

Referenced by main().

◆ print_selected_keywords()

static int print_selected_keywords ( void *  cls,
const char *  plugin_name,
enum EXTRACTOR_MetaType  type,
enum EXTRACTOR_MetaFormat  format,
const char *  data_mime_type,
const char *  data,
size_t  data_len 
)
static

Print a keyword list to a file.

Parameters
clsclosure, not used
plugin_namename of the plugin that produced this value; special values can be used (i.e. '<zlib>' for zlib being used in the main libextractor library and yielding meta data).
typelibextractor-type describing the meta data
formatbasic format information about data
data_mime_typemime-type of data (not of the original file); can be NULL (if mime-type is not known)
dataactual meta-data found
data_lennumber of bytes in data
Returns
0 to continue extracting, 1 to abort

Definition at line 275 of file extract.c.

References _, EXTRACTOR_METAFORMAT_BINARY, EXTRACTOR_METAFORMAT_C_STRING, EXTRACTOR_METAFORMAT_UNKNOWN, EXTRACTOR_METAFORMAT_UTF8, EXTRACTOR_metatype_to_string(), gettext, iconv_helper(), NULL, print, type, verbose, and YES.

Referenced by main().

◆ print_selected_keywords_grep_friendly()

static int print_selected_keywords_grep_friendly ( void *  cls,
const char *  plugin_name,
enum EXTRACTOR_MetaType  type,
enum EXTRACTOR_MetaFormat  format,
const char *  data_mime_type,
const char *  data,
size_t  data_len 
)
static

Print a keyword list to a file without new lines.

Parameters
clsclosure, not used
plugin_namename of the plugin that produced this value; special values can be used (i.e. '<zlib>' for zlib being used in the main libextractor library and yielding meta data).
typelibextractor-type describing the meta data
formatbasic format information about data
data_mime_typemime-type of data (not of the original file); can be NULL (if mime-type is not known)
dataactual meta-data found
data_lennumber of bytes in data
Returns
0 to continue extracting, 1 to abort

Definition at line 368 of file extract.c.

References EXTRACTOR_METAFORMAT_BINARY, EXTRACTOR_METAFORMAT_C_STRING, EXTRACTOR_METAFORMAT_UNKNOWN, EXTRACTOR_METAFORMAT_UTF8, EXTRACTOR_metatype_to_string(), gettext, gettext_noop, iconv_helper(), NULL, print, type, verbose, and YES.

Referenced by main().

Variable Documentation

◆ btm

struct BibTexMap btm[]
static
Initial value:
= {
{ NULL, 0, NULL }
}
#define NULL
Definition: getopt1.c:60
@ EXTRACTOR_METATYPE_BOOK_EDITION
Definition: extractor.h:136
@ EXTRACTOR_METATYPE_PUBLISHER_SERIES
Definition: extractor.h:149
@ EXTRACTOR_METATYPE_PUBLISHER_INSTITUTION
Definition: extractor.h:148
@ EXTRACTOR_METATYPE_JOURNAL_NAME
Definition: extractor.h:138
@ EXTRACTOR_METATYPE_JOURNAL_NUMBER
Definition: extractor.h:140
@ EXTRACTOR_METATYPE_BOOK_TITLE
Definition: extractor.h:135
@ EXTRACTOR_METATYPE_PUBLICATION_MONTH
Definition: extractor.h:152
@ EXTRACTOR_METATYPE_PUBLICATION_YEAR
Definition: extractor.h:151
@ EXTRACTOR_METATYPE_PUBLISHER_ADDRESS
Definition: extractor.h:147
@ EXTRACTOR_METATYPE_AUTHOR_NAME
Definition: extractor.h:143
@ EXTRACTOR_METATYPE_AUTHOR_INSTITUTION
Definition: extractor.h:145
@ EXTRACTOR_METATYPE_JOURNAL_VOLUME
Definition: extractor.h:139
@ EXTRACTOR_METATYPE_COMMENT
Definition: extractor.h:131
@ EXTRACTOR_METATYPE_TITLE
Definition: extractor.h:134
@ EXTRACTOR_METATYPE_PUBLICATION_TYPE
Definition: extractor.h:150
@ EXTRACTOR_METATYPE_PAGE_RANGE
Definition: extractor.h:142
@ EXTRACTOR_METATYPE_PAGE_COUNT
Definition: extractor.h:141
@ EXTRACTOR_METATYPE_BOOK_CHAPTER_NUMBER
Definition: extractor.h:137
@ EXTRACTOR_METATYPE_PUBLISHER
Definition: extractor.h:146
@ EXTRACTOR_METATYPE_URL
Definition: extractor.h:159
@ EXTRACTOR_METATYPE_BIBTEX_EPRINT
Definition: extractor.h:155

Mapping between bibTeX strings, libextractor meta data types and values for the current document.

Definition at line 462 of file extract.c.

Referenced by cleanup_bibtex(), finish_bibtex(), and print_bibtex().

◆ entry_type

char* entry_type
static

Type of the entry for bibtex.

Definition at line 462 of file extract.c.

Referenced by cleanup_bibtex(), finish_bibtex(), and print_bibtex().

◆ from_memory

int from_memory
static

Read file contents into memory, then feed them to extractor.

Definition at line 52 of file extract.c.

Referenced by main().

◆ in_process

int in_process
static

Run plugins in-process.

Definition at line 47 of file extract.c.

Referenced by main().

◆ print

int* print
static

Which keyword types should we print?

Definition at line 37 of file extract.c.

Referenced by main(), print_bibtex(), print_selected_keywords(), and print_selected_keywords_grep_friendly().

◆ verbose

int verbose
static

How verbose are we supposed to be?

Definition at line 42 of file extract.c.

Referenced by main(), print_selected_keywords(), and print_selected_keywords_grep_friendly().