libextractor  1.11
About: GNU libextractor is a library used to extract meta-data from files of arbitrary type.
  Fossies Dox: libextractor-1.11.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

extractor_datasource.c File Reference

random access and possibly decompression of data from buffer in memory or file on disk More...

#include "platform.h"
#include "extractor_logging.h"
#include "extractor_datasource.h"
Include dependency graph for extractor_datasource.c:

Go to the source code of this file.

Data Structures

struct  BufferedFileDataSource
 
struct  CompressedFileSource
 
struct  EXTRACTOR_Datasource
 

Macros

#define MIN_COMPRESSED_HEADER   -1
 
#define O_LARGEFILE   0
 
#define MAX_READ   (4 * 1024 * 1024)
 
#define COM_CHUNK_SIZE   (16 * 1024)
 

Enumerations

enum  ExtractorCompressionType { COMP_TYPE_UNDEFINED = -1 , COMP_TYPE_INVALID = 0 , COMP_TYPE_ZLIB = 1 , COMP_TYPE_BZ2 = 2 }
 

Functions

static int bfds_pick_next_buffer_at (struct BufferedFileDataSource *bfds, uint64_t pos)
 
static struct BufferedFileDataSourcebfds_new (const void *data, int fd, int64_t fsize)
 
static void bfds_delete (struct BufferedFileDataSource *bfds)
 
static int64_t bfds_seek (struct BufferedFileDataSource *bfds, int64_t pos, int whence)
 
static ssize_t bfds_read (struct BufferedFileDataSource *bfds, void *buf_ptr, size_t count)
 
static int cfs_init_decompressor (struct CompressedFileSource *cfs, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
 
static int cfs_deinit_decompressor (struct CompressedFileSource *cfs)
 
static int cfs_reset_stream (struct CompressedFileSource *cfs)
 
static void cfs_destroy (struct CompressedFileSource *cfs)
 
struct CompressedFileSourcecfs_new (struct BufferedFileDataSource *bfds, int64_t fsize, enum ExtractorCompressionType compression_type, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
 
static ssize_t cfs_read (struct CompressedFileSource *cfs, void *data, size_t size)
 
static int64_t cfs_seek (struct CompressedFileSource *cfs, int64_t position, int whence)
 
static enum ExtractorCompressionType get_compression_type (struct BufferedFileDataSource *bfds)
 
struct EXTRACTOR_DatasourceEXTRACTOR_datasource_create_from_file_ (const char *filename, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
 
struct EXTRACTOR_DatasourceEXTRACTOR_datasource_create_from_buffer_ (const char *buf, size_t size, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
 
void EXTRACTOR_datasource_destroy_ (struct EXTRACTOR_Datasource *ds)
 
ssize_t EXTRACTOR_datasource_read_ (void *cls, void *data, size_t size)
 
int64_t EXTRACTOR_datasource_seek_ (void *cls, int64_t pos, int whence)
 
int64_t EXTRACTOR_datasource_get_size_ (void *cls, int force)
 

Detailed Description

random access and possibly decompression of data from buffer in memory or file on disk

Author
Christian Grothoff

Definition in file extractor_datasource.c.

Macro Definition Documentation

◆ COM_CHUNK_SIZE

#define COM_CHUNK_SIZE   (16 * 1024)

Data is read from the source and shoved into decompressor in chunks this big.

Definition at line 62 of file extractor_datasource.c.

◆ MAX_READ

#define MAX_READ   (4 * 1024 * 1024)

Maximum size of an IO buffer.

Definition at line 56 of file extractor_datasource.c.

◆ MIN_COMPRESSED_HEADER

#define MIN_COMPRESSED_HEADER   -1

Definition at line 46 of file extractor_datasource.c.

◆ O_LARGEFILE

#define O_LARGEFILE   0

Definition at line 50 of file extractor_datasource.c.

Enumeration Type Documentation

◆ ExtractorCompressionType

Enum with the various possible types of compression supported.

Enumerator
COMP_TYPE_UNDEFINED 

We cannot tell from the data (header incomplete).

COMP_TYPE_INVALID 

Invalid header (likely uncompressed)

COMP_TYPE_ZLIB 

libz / gzip compression.

COMP_TYPE_BZ2 

bz2 compression

Definition at line 68 of file extractor_datasource.c.

Function Documentation

◆ bfds_delete()

static void bfds_delete ( struct BufferedFileDataSource bfds)
static

Unallocates bfds

Parameters
bfdsbfds to deallocate

Definition at line 303 of file extractor_datasource.c.

Referenced by EXTRACTOR_datasource_create_from_buffer_(), EXTRACTOR_datasource_create_from_file_(), and EXTRACTOR_datasource_destroy_().

◆ bfds_new()

static struct BufferedFileDataSource* bfds_new ( const void *  data,
int  fd,
int64_t  fsize 
)
static

Creates a bfds

Parameters
datadata buffer to use as a source (NULL if fd != -1)
fdfile descriptor to use as a source (-1 if data != NULL)
fsizesize of the file (or the buffer)
Returns
newly allocated bfds

Definition at line 260 of file extractor_datasource.c.

References bfds_pick_next_buffer_at(), BufferedFileDataSource::buffer, BufferedFileDataSource::buffer_bytes, BufferedFileDataSource::buffer_size, BufferedFileDataSource::data, BufferedFileDataSource::fd, BufferedFileDataSource::fsize, LOG, LOG_STRERROR, MAX_READ, and NULL.

Referenced by EXTRACTOR_datasource_create_from_buffer_(), and EXTRACTOR_datasource_create_from_file_().

◆ bfds_pick_next_buffer_at()

static int bfds_pick_next_buffer_at ( struct BufferedFileDataSource bfds,
uint64_t  pos 
)
static

Makes bfds seek to 'pos' and read a chunk of bytes there. Changes bfds->fpos, bfds->buffer_bytes and bfds->buffer_pos. Does almost nothing for memory-backed bfds.

Parameters
bfdsbfds
posposition
Returns
0 on success, -1 on error

Definition at line 216 of file extractor_datasource.c.

References BufferedFileDataSource::buffer, BufferedFileDataSource::buffer_bytes, BufferedFileDataSource::buffer_pos, BufferedFileDataSource::buffer_size, BufferedFileDataSource::fd, BufferedFileDataSource::fpos, BufferedFileDataSource::fsize, LOG, LOG_STRERROR, and NULL.

Referenced by bfds_new(), bfds_read(), and bfds_seek().

◆ bfds_read()

static ssize_t bfds_read ( struct BufferedFileDataSource bfds,
void *  buf_ptr,
size_t  count 
)
static

Fills 'buf_ptr' with a chunk of data. Will fail if 'count' exceeds buffer size.

Parameters
bfdsbfds
buf_ptrlocation to store data
countnumber of bytes to read
Returns
number of bytes (<= count) available at location pointed by buf_ptr, 0 for end of stream, -1 on error

Definition at line 409 of file extractor_datasource.c.

References bfds_pick_next_buffer_at(), BufferedFileDataSource::buffer_bytes, BufferedFileDataSource::buffer_pos, BufferedFileDataSource::data, BufferedFileDataSource::fpos, BufferedFileDataSource::fsize, and LOG.

Referenced by EXTRACTOR_datasource_read_().

◆ bfds_seek()

static int64_t bfds_seek ( struct BufferedFileDataSource bfds,
int64_t  pos,
int  whence 
)
static

Makes bfds seek to 'pos' in 'whence' mode. Will try to seek within the buffer, will move the buffer location if the seek request falls outside of the buffer range.

Parameters
bfdsbfds
posposition to seek to
whenceone of the seek constants (SEEK_CUR, SEEK_SET, SEEK_END)
Returns
new absolute position, -1 on error

Definition at line 320 of file extractor_datasource.c.

References ASSERT, bfds_pick_next_buffer_at(), BufferedFileDataSource::buffer, BufferedFileDataSource::buffer_bytes, BufferedFileDataSource::buffer_pos, BufferedFileDataSource::fpos, BufferedFileDataSource::fsize, LOG, and NULL.

Referenced by EXTRACTOR_datasource_seek_().

◆ cfs_deinit_decompressor()

static int cfs_deinit_decompressor ( struct CompressedFileSource cfs)
static

Deinitializes decompression object.

Parameters
cfscfs to deinitialize
Returns
1 on success, -1 on error

Definition at line 700 of file extractor_datasource.c.

References COMP_TYPE_BZ2, COMP_TYPE_ZLIB, CompressedFileSource::compression_type, and LOG.

Referenced by cfs_destroy(), and cfs_reset_stream().

◆ cfs_destroy()

static void cfs_destroy ( struct CompressedFileSource cfs)
static

Destroy compressed file source.

Parameters
cfssource to destroy

Definition at line 743 of file extractor_datasource.c.

References cfs_deinit_decompressor().

Referenced by EXTRACTOR_datasource_destroy_().

◆ cfs_init_decompressor()

static int cfs_init_decompressor ( struct CompressedFileSource cfs,
EXTRACTOR_MetaDataProcessor  proc,
void *  proc_cls 
)
static

Initializes decompression object. Might report metadata about compresse stream, if available. Resets the stream to the beginning.

Parameters
cfscfs to initialize
proccallback for metadata
proc_clscallback cls
Returns
1 on success, 0 to terminate extraction, -1 on error

Definition at line 635 of file extractor_datasource.c.

References COMP_TYPE_BZ2, COMP_TYPE_ZLIB, CompressedFileSource::compression_type, CompressedFileSource::fpos, LOG, and CompressedFileSource::result_pos.

Referenced by cfs_new(), and cfs_reset_stream().

◆ cfs_new()

struct CompressedFileSource* cfs_new ( struct BufferedFileDataSource bfds,
int64_t  fsize,
enum ExtractorCompressionType  compression_type,
EXTRACTOR_MetaDataProcessor  proc,
void *  proc_cls 
)

Allocates and initializes new cfs object.

Parameters
bfdsdata source to use
fsizesize of the source
compression_typetype of compression used
procmetadata callback to call with meta data found upon opening
proc_clscallback cls
Returns
newly allocated cfs on success, NULL on error

Definition at line 761 of file extractor_datasource.c.

References CompressedFileSource::bfds, cfs_init_decompressor(), CompressedFileSource::compression_type, CompressedFileSource::fsize, LOG_STRERROR, NULL, and CompressedFileSource::uncompressed_size.

Referenced by EXTRACTOR_datasource_create_from_buffer_(), and EXTRACTOR_datasource_create_from_file_().

◆ cfs_read()

static ssize_t cfs_read ( struct CompressedFileSource cfs,
void *  data,
size_t  size 
)
static

Fills 'data' with new uncompressed data. Does the actual decompression. Will set uncompressed_size on the end of compressed stream.

Parameters
cfdscfs to read from
datawhere to copy the data
sizenumber of bytes available in data
Returns
number of bytes in data. 0 if no more data can be uncompressed, -1 on error

Definition at line 997 of file extractor_datasource.c.

References COMP_TYPE_BZ2, COMP_TYPE_ZLIB, CompressedFileSource::compression_type, and LOG.

Referenced by cfs_seek(), EXTRACTOR_datasource_get_size_(), and EXTRACTOR_datasource_read_().

◆ cfs_reset_stream()

static int cfs_reset_stream ( struct CompressedFileSource cfs)
static

Resets the compression stream to begin uncompressing from the beginning. Used at initialization time, and when seeking backward.

Parameters
cfscfs to reset
Returns
1 on success, 0 to terminate extraction, -1 on error

Definition at line 729 of file extractor_datasource.c.

References cfs_deinit_decompressor(), cfs_init_decompressor(), and NULL.

Referenced by cfs_seek().

◆ cfs_seek()

static int64_t cfs_seek ( struct CompressedFileSource cfs,
int64_t  position,
int  whence 
)
static

Moves the buffer to 'position' in uncompressed steam. If position requires seeking backwards beyond the boundaries of the buffer, resets the stream and repeats decompression from the beginning to 'position'.

Parameters
cfscfs to seek on
positionnew starting point for the buffer
whenceone of the seek constants (SEEK_CUR, SEEK_SET, SEEK_END)
Returns
new absolute buffer position, -1 on error or EOS

Definition at line 1029 of file extractor_datasource.c.

References ASSERT, cfs_read(), cfs_reset_stream(), COM_CHUNK_SIZE, CompressedFileSource::fpos, LOG, CompressedFileSource::result_pos, and CompressedFileSource::uncompressed_size.

Referenced by EXTRACTOR_datasource_get_size_(), and EXTRACTOR_datasource_seek_().

◆ EXTRACTOR_datasource_create_from_buffer_()

struct EXTRACTOR_Datasource* EXTRACTOR_datasource_create_from_buffer_ ( const char *  buf,
size_t  size,
EXTRACTOR_MetaDataProcessor  proc,
void *  proc_cls 
)

Create a datasource from a buffer in memory.

Parameters
bufdata in memory
sizenumber of bytes in 'buf'
procmetadata callback to call with meta data found upon opening
proc_clscallback cls
Returns
handle to the datasource

Definition at line 1282 of file extractor_datasource.c.

References EXTRACTOR_Datasource::bfds, bfds_delete(), bfds_new(), EXTRACTOR_Datasource::cfs, cfs_new(), COMP_TYPE_BZ2, COMP_TYPE_ZLIB, EXTRACTOR_Datasource::fd, get_compression_type(), LOG, LOG_STRERROR, and NULL.

Referenced by EXTRACTOR_extract().

◆ EXTRACTOR_datasource_create_from_file_()

struct EXTRACTOR_Datasource* EXTRACTOR_datasource_create_from_file_ ( const char *  filename,
EXTRACTOR_MetaDataProcessor  proc,
void *  proc_cls 
)

Create a datasource from a file on disk.

Parameters
filenamename of the file on disk
procmetadata callback to call with meta data found upon opening
proc_clscallback cls
Returns
handle to the datasource, NULL on error

Definition at line 1202 of file extractor_datasource.c.

References EXTRACTOR_Datasource::bfds, bfds_delete(), bfds_new(), EXTRACTOR_Datasource::cfs, cfs_new(), COMP_TYPE_BZ2, COMP_TYPE_ZLIB, EXTRACTOR_Datasource::fd, get_compression_type(), LOG, LOG_STRERROR, LOG_STRERROR_FILE, NULL, and O_LARGEFILE.

Referenced by EXTRACTOR_extract().

◆ EXTRACTOR_datasource_destroy_()

void EXTRACTOR_datasource_destroy_ ( struct EXTRACTOR_Datasource ds)

Destroy a data source.

Parameters
dssource to destroy

Definition at line 1330 of file extractor_datasource.c.

References EXTRACTOR_Datasource::bfds, bfds_delete(), EXTRACTOR_Datasource::cfs, cfs_destroy(), EXTRACTOR_Datasource::fd, and NULL.

Referenced by EXTRACTOR_extract().

◆ EXTRACTOR_datasource_get_size_()

int64_t EXTRACTOR_datasource_get_size_ ( void *  cls,
int  force 
)

Determine the overall size of the data source (after compression).

Parameters
clsmust be a 'struct EXTRACTOR_Datasource'
forceforce computing the size if it is unavailable
Returns
overall file size, UINT64_MAX on error or unknown

Definition at line 1404 of file extractor_datasource.c.

References EXTRACTOR_Datasource::bfds, EXTRACTOR_Datasource::cfs, cfs_read(), cfs_seek(), CompressedFileSource::fpos, BufferedFileDataSource::fsize, LOG, NULL, and CompressedFileSource::uncompressed_size.

Referenced by do_extract(), EXTRACTOR_datasource_seek_(), in_process_get_size(), and send_update_message().

◆ EXTRACTOR_datasource_read_()

ssize_t EXTRACTOR_datasource_read_ ( void *  cls,
void *  data,
size_t  size 
)

Make 'size' bytes of data from the data source available at 'data'.

Parameters
clsmust be a 'struct EXTRACTOR_Datasource'
datawhere the data should be copied to
sizemaximum number of bytes requested
Returns
number of bytes now available in data (can be smaller than 'size'), -1 on error

Definition at line 1351 of file extractor_datasource.c.

References EXTRACTOR_Datasource::bfds, bfds_read(), EXTRACTOR_Datasource::cfs, cfs_read(), and NULL.

Referenced by EXTRACTOR_IPC_shared_memory_set_(), and in_process_read().

◆ EXTRACTOR_datasource_seek_()

int64_t EXTRACTOR_datasource_seek_ ( void *  cls,
int64_t  pos,
int  whence 
)

Seek in the datasource. Use 'SEEK_CUR' for whence and 'pos' of 0 to obtain the current position in the file.

Parameters
clsmust be a 'struct EXTRACTOR_Datasource'
posposition to seek (see 'man lseek')
whencehow to see (absolute to start, relative, absolute to end)
Returns
new absolute position, UINT64_MAX on error (i.e. desired position does not exist)

Definition at line 1374 of file extractor_datasource.c.

References EXTRACTOR_Datasource::bfds, bfds_seek(), EXTRACTOR_Datasource::cfs, cfs_seek(), EXTRACTOR_datasource_get_size_(), NULL, and CompressedFileSource::uncompressed_size.

Referenced by do_extract(), EXTRACTOR_datasource_get_pos_(), EXTRACTOR_IPC_shared_memory_set_(), and in_process_seek().

◆ get_compression_type()

static enum ExtractorCompressionType get_compression_type ( struct BufferedFileDataSource bfds)
static

Detect if we have compressed data on our hands.

Parameters
datapointer to a data buffer or NULL (in case fd is not -1)
fda file to read data from, or -1 (if data is not NULL)
fsizesize of data (if data is not NULL) or of file (if fd is not -1)
Returns
-1 to indicate an error, 0 to indicate uncompressed data, or a type (> 0) of compression

Definition at line 1029 of file extractor_datasource.c.

Referenced by EXTRACTOR_datasource_create_from_buffer_(), and EXTRACTOR_datasource_create_from_file_().