"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "xdelta3.c" between
xdelta3-3.0.10.tar.gz and xdelta3-3.0.11.tar.gz

About: xdelta is a tool and library for differential compression (supports VCDIFF encoding and decoding).

xdelta3.c  (xdelta3-3.0.10):xdelta3.c  (xdelta3-3.0.11)
skipping to change at line 523 skipping to change at line 523
#if XD3_DEBUG #if XD3_DEBUG
static void xd3_verify_run_state (xd3_stream *stream, static void xd3_verify_run_state (xd3_stream *stream,
const uint8_t *inp, const uint8_t *inp,
usize_t x_run_l, usize_t x_run_l,
uint8_t *x_run_c); uint8_t *x_run_c);
static void xd3_verify_large_state (xd3_stream *stream, static void xd3_verify_large_state (xd3_stream *stream,
const uint8_t *inp, const uint8_t *inp,
uint32_t x_cksum); uint32_t x_cksum);
static void xd3_verify_small_state (xd3_stream *stream, static void xd3_verify_small_state (xd3_stream *stream,
const uint8_t *inp, const uint8_t *inp,
uint32_t x_cksum); uint32_t x_cksum);
#endif /* XD3_DEBUG */ #endif /* XD3_DEBUG */
#endif /* XD3_ENCODER */ #endif /* XD3_ENCODER */
static int xd3_decode_allocate (xd3_stream *stream, usize_t size, static int xd3_decode_allocate (xd3_stream *stream, usize_t size,
uint8_t **copied1, usize_t *alloc1); uint8_t **copied1, usize_t *alloc1);
static void* xd3_alloc (xd3_stream *stream, usize_t elts, usize_t size); static void* xd3_alloc (xd3_stream *stream, usize_t elts, usize_t size);
static void xd3_free (xd3_stream *stream, void *ptr); static void xd3_free (xd3_stream *stream, void *ptr);
skipping to change at line 1105 skipping to change at line 1105
return i; return i;
} }
static usize_t static usize_t
xd3_round_blksize (usize_t sz, usize_t blksz) xd3_round_blksize (usize_t sz, usize_t blksz)
{ {
usize_t mod = sz & (blksz-1); usize_t mod = sz & (blksz-1);
XD3_ASSERT (xd3_check_pow2 (blksz, NULL) == 0); XD3_ASSERT (xd3_check_pow2 (blksz, NULL) == 0);
return mod ? (sz + (blksz - mod)) : sz; if (mod == 0)
{
return sz;
}
if (sz > USIZE_T_MAXBLKSZ)
{
return USIZE_T_MAXBLKSZ;
}
return sz + (blksz - mod);
} }
/*********************************************************************** /***********************************************************************
Adler32 stream function: code copied from Zlib, defined in RFC1950 Adler32 stream function: code copied from Zlib, defined in RFC1950
***********************************************************************/ ***********************************************************************/
#define A32_BASE 65521L /* Largest prime smaller than 2^16 */ #define A32_BASE 65521L /* Largest prime smaller than 2^16 */
#define A32_NMAX 5552 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(B ASE-1) <= 2^32-1 */ #define A32_NMAX 5552 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(B ASE-1) <= 2^32-1 */
#define A32_DO1(buf,i) {s1 += buf[i]; s2 += s1;} #define A32_DO1(buf,i) {s1 += buf[i]; s2 += s1;}
skipping to change at line 1633 skipping to change at line 1643
} }
#endif #endif
xd3_free (stream, stream->acache.near_array); xd3_free (stream, stream->acache.near_array);
xd3_free (stream, stream->acache.same_array); xd3_free (stream, stream->acache.same_array);
xd3_free (stream, stream->inst_sect.copied1); xd3_free (stream, stream->inst_sect.copied1);
xd3_free (stream, stream->addr_sect.copied1); xd3_free (stream, stream->addr_sect.copied1);
xd3_free (stream, stream->data_sect.copied1); xd3_free (stream, stream->data_sect.copied1);
if (stream->dec_lastwin != stream->dec_buffer)
{
xd3_free (stream, (uint8_t*) stream->dec_lastwin);
}
xd3_free (stream, stream->dec_buffer); xd3_free (stream, stream->dec_buffer);
xd3_free (stream, (uint8_t*) stream->dec_lastwin);
xd3_free (stream, stream->buf_in); xd3_free (stream, stream->buf_in);
xd3_free (stream, stream->dec_appheader); xd3_free (stream, stream->dec_appheader);
xd3_free (stream, stream->dec_codetbl); xd3_free (stream, stream->dec_codetbl);
xd3_free (stream, stream->code_table_alloc); xd3_free (stream, stream->code_table_alloc);
#if SECONDARY_ANY #if SECONDARY_ANY
xd3_free (stream, stream->inst_sect.copied2); xd3_free (stream, stream->inst_sect.copied2);
xd3_free (stream, stream->addr_sect.copied2); xd3_free (stream, stream->addr_sect.copied2);
xd3_free (stream, stream->data_sect.copied2); xd3_free (stream, stream->data_sect.copied2);
skipping to change at line 1880 skipping to change at line 1893
return 0; return 0;
} }
/*********************************************************** /***********************************************************
Getblk interface Getblk interface
***********************************************************/ ***********************************************************/
inline inline
xoff_t xd3_source_eof(const xd3_source *src) xoff_t xd3_source_eof(const xd3_source *src)
{ {
xoff_t r = (src->blksize * src->max_blkno) + (xoff_t)src->onlastblk; xoff_t r = (src->max_blkno << src->shiftby) + (xoff_t)src->onlastblk;
return r; return r;
} }
inline inline
usize_t xd3_bytes_on_srcblk (xd3_source *src, xoff_t blkno) usize_t xd3_bytes_on_srcblk (xd3_source *src, xoff_t blkno)
{ {
usize_t r = (blkno == src->max_blkno ? usize_t r = (blkno == src->max_blkno ?
src->onlastblk : src->onlastblk :
src->blksize); src->blksize);
return r; return r;
} }
/* This function interfaces with the client getblk function, checks /* This function interfaces with the client getblk function, checks
* its results, updates frontier_blkno, max_blkno, onlastblk, eof_known. */ * its results, updates max_blkno, onlastblk, eof_known. */
static int static int
xd3_getblk (xd3_stream *stream, xoff_t blkno) xd3_getblk (xd3_stream *stream, xoff_t blkno)
{ {
int ret; int ret;
xd3_source *source = stream->src; xd3_source *source = stream->src;
if (source->curblk == NULL || blkno != source->curblkno) if (source->curblk == NULL || blkno != source->curblkno)
{ {
source->getblkno = blkno; source->getblkno = blkno;
if (stream->getblk == NULL) if (stream->getblk == NULL)
{ {
IF_DEBUG2 (DP(RINT "[getblk] XD3_GETSRCBLK %"Q"u\n", blkno));
stream->msg = "getblk source input"; stream->msg = "getblk source input";
return XD3_GETSRCBLK; return XD3_GETSRCBLK;
} }
ret = stream->getblk (stream, source, blkno); ret = stream->getblk (stream, source, blkno);
if (ret != 0) if (ret != 0)
{ {
IF_DEBUG1 (DP(RINT "[getblk] app error blkno %"Q"u: %s\n", IF_DEBUG2 (DP(RINT "[getblk] app error blkno %"Q"u: %s\n",
blkno, xd3_strerror (ret))); blkno, xd3_strerror (ret)));
return ret; return ret;
} }
IF_DEBUG2 (DP(RINT "[getblk] read source block %"Q"u onblk " IF_DEBUG2 (DP(RINT "[getblk] read source block %"Q"u onblk "
"%u blksize %u\n", blkno, source->onblk, source->blksize)); "%u blksize %u max_blkno %"Q"u\n", blkno, source->onblk,
source->blksize, source->max_blkno));
} }
if (blkno >= source->frontier_blkno) if (blkno > source->max_blkno)
{ {
if (blkno > source->max_blkno) source->max_blkno = blkno;
{
source->max_blkno = blkno;
source->onlastblk = source->onblk;
}
if (source->onblk == source->blksize) if (source->onblk == source->blksize)
{ {
source->frontier_blkno = blkno + 1; IF_DEBUG1 (DP(RINT "[getblk] full source blkno %"Q"u: "
IF_DEBUG2 (DP(RINT "[getblk] full source blkno %"Q"u: "
"source length unknown %"Q"u\n", "source length unknown %"Q"u\n",
blkno, blkno,
xd3_source_eof (source))); xd3_source_eof (source)));
} }
else else if (!source->eof_known)
{ {
source->frontier_blkno = blkno; IF_DEBUG1 (DP(RINT "[getblk] eof block has %d bytes; "
"source length known %"Q"u\n",
if (xd3_bytes_on_srcblk (source, blkno) != 0) xd3_bytes_on_srcblk (source, blkno),
{ xd3_source_eof (source)));
source->frontier_blkno += 1; source->eof_known = 1;
}
if (!source->eof_known)
{
IF_DEBUG2 (DP(RINT "[getblk] eof block has %d bytes; "
"source length known %"Q"u\n",
xd3_bytes_on_srcblk (source, blkno),
xd3_source_eof (source)));
source->eof_known = 1;
}
} }
} }
XD3_ASSERT (source->curblk != NULL); XD3_ASSERT (source->curblk != NULL);
if (blkno == source->max_blkno) if (blkno == source->max_blkno)
{ {
/* In case the application sets the source as 1 block w/ a /* In case the application sets the source as 1 block w/ a
preset buffer. */ * preset buffer. */
source->onlastblk = source->onblk; source->onlastblk = source->onblk;
if (source->onblk == source->blksize)
{
source->frontier_blkno = blkno + 1;
}
} }
return 0; return 0;
} }
/*********************************************************** /***********************************************************
Stream open/close Stream open/close
***************************************************************/ ***************************************************************/
int int
xd3_set_source (xd3_stream *stream, xd3_set_source (xd3_stream *stream,
skipping to change at line 2007 skipping to change at line 2002
src->shiftby = shiftby; src->shiftby = shiftby;
src->maskby = (1 << shiftby) - 1; src->maskby = (1 << shiftby) - 1;
if (xd3_check_pow2 (src->max_winsize, NULL) != 0) if (xd3_check_pow2 (src->max_winsize, NULL) != 0)
{ {
src->max_winsize = xd3_xoff_roundup(src->max_winsize); src->max_winsize = xd3_xoff_roundup(src->max_winsize);
IF_DEBUG1 (DP(RINT "raising src_maxsize to %u\n", src->blksize)); IF_DEBUG1 (DP(RINT "raising src_maxsize to %u\n", src->blksize));
} }
src->max_winsize = xd3_max (src->max_winsize, XD3_ALLOCSIZE); src->max_winsize = xd3_max (src->max_winsize, XD3_ALLOCSIZE);
return 0; return 0;
} }
int int
xd3_set_source_and_size (xd3_stream *stream, xd3_set_source_and_size (xd3_stream *stream,
xd3_source *user_source, xd3_source *user_source,
xoff_t source_size) { xoff_t source_size) {
int ret = xd3_set_source (stream, user_source); int ret = xd3_set_source (stream, user_source);
if (ret == 0) if (ret == 0)
{ {
stream->src->eof_known = 1; stream->src->eof_known = 1;
IF_DEBUG2 (DP(RINT "[set source] size known %"Q"u\n",
source_size));
xd3_blksize_div(source_size, xd3_blksize_div(source_size,
stream->src, stream->src,
&stream->src->max_blkno, &stream->src->max_blkno,
&stream->src->onlastblk); &stream->src->onlastblk);
IF_DEBUG1 (DP(RINT "[set source] size known %"Q"u max_blkno %"Q"u\n",
source_size, stream->src->max_blkno));
} }
return ret; return ret;
} }
void void
xd3_abort_stream (xd3_stream *stream) xd3_abort_stream (xd3_stream *stream)
{ {
stream->dec_state = DEC_ABORTED; stream->dec_state = DEC_ABORTED;
stream->enc_state = ENC_ABORTED; stream->enc_state = ENC_ABORTED;
} }
skipping to change at line 2079 skipping to change at line 2074
case DEC_VCHEAD: case DEC_VCHEAD:
case DEC_WININD: case DEC_WININD:
/* TODO: Address the zero-byte ambiguity. Does the encoder /* TODO: Address the zero-byte ambiguity. Does the encoder
* emit a window or not? If so, then catch an error here. * emit a window or not? If so, then catch an error here.
* If not, need another routine to say * If not, need another routine to say
* decode_at_least_one_if_empty. */ * decode_at_least_one_if_empty. */
case DEC_ABORTED: case DEC_ABORTED:
break; break;
default: default:
/* If decoding, should be ready for the next window. */ /* If decoding, should be ready for the next window. */
stream->msg = "EOF in decode"; stream->msg = "eof in decode";
return XD3_INTERNAL; return XD3_INVALID_INPUT;
} }
} }
return 0; return 0;
} }
/************************************************************** /**************************************************************
Application header Application header
****************************************************************/ ****************************************************************/
skipping to change at line 2239 skipping to change at line 2234
DP(RINT "[iopt copy:%d] pos %"Q"u-%"Q"u addr %"Q"u-%"Q"u size %u\n", DP(RINT "[iopt copy:%d] pos %"Q"u-%"Q"u addr %"Q"u-%"Q"u size %u\n",
cnt++, cnt++,
stream->total_in + inst->pos, stream->total_in + inst->pos,
stream->total_in + inst->pos + inst->size, stream->total_in + inst->pos + inst->size,
inst->addr, inst->addr + inst->size, inst->size); inst->addr, inst->addr + inst->size, inst->size);
}); });
break; break;
} }
case XD3_RUN: case XD3_RUN:
{ {
XD3_ASSERT (inst->size >= MIN_MATCH);
if ((ret = xd3_emit_byte (stream, & DATA_TAIL (stream), inst->xtra))) { r eturn ret; } if ((ret = xd3_emit_byte (stream, & DATA_TAIL (stream), inst->xtra))) { r eturn ret; }
stream->n_run += 1; stream->n_run += 1;
stream->l_run += inst->size; stream->l_run += inst->size;
IF_DEBUG2 ({ IF_DEBUG2 ({
static int cnt; static int cnt;
DP(RINT "[iopt run:%d] pos %"Q"u size %u\n", cnt++, stream->total_in + inst->pos, inst->size); DP(RINT "[iopt run:%d] pos %"Q"u size %u\n", cnt++, stream->total_in + inst->pos, inst->size);
}); });
break; break;
skipping to change at line 3215 skipping to change at line 3208
{ {
if ((ret = xd3_source_extend_match (stream)) != 0) if ((ret = xd3_source_extend_match (stream)) != 0)
{ {
return ret; return ret;
} }
/* The search has to make forward progress here /* The search has to make forward progress here
* or else it can get stuck in a match-backward * or else it can get stuck in a match-backward
* (getsrcblk) then match-forward (getsrcblk), * (getsrcblk) then match-forward (getsrcblk),
* find insufficient match length, then repeat * find insufficient match length, then repeat
* exactly the same search. * exactly the same search.
*/ */
stream->input_position += stream->match_fwd; stream->input_position += stream->match_fwd;
} }
case MATCH_SEARCHING: case MATCH_SEARCHING:
/* Continue string matching. (It's possible that the /* Continue string matching. (It's possible that the
* initial match continued through the entire input, in * initial match continued through the entire input, in
* which case we're still in MATCH_FORWARD and should * which case we're still in MATCH_FORWARD and should
* remain so for the next input window.) */ * remain so for the next input window.) */
skipping to change at line 3240 skipping to change at line 3234
if (stream->avail_in != 0 && if (stream->avail_in != 0 &&
(ret = stream->smatcher.string_match (stream))) (ret = stream->smatcher.string_match (stream)))
{ {
return ret; return ret;
} }
stream->enc_state = ENC_INSTR; stream->enc_state = ENC_INSTR;
case ENC_INSTR: case ENC_INSTR:
/* Note: Jump here to encode VCDIFF deltas w/o using this /* Note: Jump here to encode VCDIFF deltas w/o using this
* string-matching code. Merging code code enters here. */ * string-matching code. Merging code enters here. */
/* Flush the instrution buffer, then possibly add one more /* Flush the instrution buffer, then possibly add one more
* instruction, then emit the header. */ * instruction, then emit the header. */
if ((ret = xd3_iopt_flush_instructions (stream, 1)) || if ((ret = xd3_iopt_flush_instructions (stream, 1)) ||
(ret = xd3_iopt_add_finalize (stream))) (ret = xd3_iopt_add_finalize (stream)))
{ {
return ret; return ret;
} }
stream->enc_state = ENC_FLUSH; stream->enc_state = ENC_FLUSH;
skipping to change at line 3693 skipping to change at line 3687
{ {
src->srcbase = stream->match_minaddr; src->srcbase = stream->match_minaddr;
src->srclen = (usize_t) length; src->srclen = (usize_t) length;
XD3_ASSERT (src->srclen); XD3_ASSERT (src->srclen);
goto done; goto done;
} }
/* Otherwise, we have to make a guess. More copies may still be /* Otherwise, we have to make a guess. More copies may still be
* issued, but we have to decide the source window base and length * issued, but we have to decide the source window base and length
* now. */ * now. */
/* TODO: This may not working well in practice, more testing needed. */
src->srcbase = stream->match_minaddr; src->srcbase = stream->match_minaddr;
src->srclen = xd3_max ((usize_t) length, src->srclen = xd3_max ((usize_t) length,
stream->avail_in + (stream->avail_in >> 2)); stream->avail_in + (stream->avail_in >> 2));
if (src->eof_known) if (src->eof_known)
{ {
/* Note: if the source size is known, we must reduce srclen or /* Note: if the source size is known, we must reduce srclen or
* code that expects to pass a single block w/ getblk == NULL * code that expects to pass a single block w/ getblk == NULL
* will not function, as the code will return GETSRCBLK asking * will not function, as the code will return GETSRCBLK asking
* for the second block. */ * for the second block. */
skipping to change at line 3726 skipping to change at line 3721
/* Sets the bounding region for a newly discovered source match, prior /* Sets the bounding region for a newly discovered source match, prior
* to calling xd3_source_extend_match(). This sets the match_maxfwd, * to calling xd3_source_extend_match(). This sets the match_maxfwd,
* match_maxback variables. Note: srcpos is an absolute position * match_maxback variables. Note: srcpos is an absolute position
* (xoff_t) but the match_maxfwd, match_maxback variables are usize_t. * (xoff_t) but the match_maxfwd, match_maxback variables are usize_t.
* Returns 0 if the setup succeeds, or 1 if the source position lies * Returns 0 if the setup succeeds, or 1 if the source position lies
* outside an already-decided srcbase/srclen window. */ * outside an already-decided srcbase/srclen window. */
static int static int
xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos) xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos)
{ {
xd3_source *src = stream->src; xd3_source *const src = stream->src;
usize_t greedy_or_not; usize_t greedy_or_not;
xoff_t frontier_pos;
stream->match_maxback = 0; stream->match_maxback = 0;
stream->match_maxfwd = 0; stream->match_maxfwd = 0;
stream->match_back = 0; stream->match_back = 0;
stream->match_fwd = 0; stream->match_fwd = 0;
/* This avoids a non-blocking endless loop caused by scanning /* This avoids a non-blocking endless loop caused by scanning
* backwards across a block boundary, only to find not enough * backwards across a block boundary, only to find not enough
* matching bytes to beat the current min_match due to a better lazy * matching bytes to beat the current min_match due to a better lazy
* target match: the re-entry to xd3_string_match() repeats the same * target match: the re-entry to xd3_string_match() repeats the same
skipping to change at line 3752 skipping to change at line 3746
* TestNonBlockingProgress test! */ * TestNonBlockingProgress test! */
if (srcpos != 0 && srcpos == stream->match_last_srcpos) if (srcpos != 0 && srcpos == stream->match_last_srcpos)
{ {
IF_DEBUG2(DP(RINT "[match_setup] looping failure\n")); IF_DEBUG2(DP(RINT "[match_setup] looping failure\n"));
goto bad; goto bad;
} }
/* Implement src->max_winsize, which prevents the encoder from seeking /* Implement src->max_winsize, which prevents the encoder from seeking
* back further than the LRU cache maintaining FIFO discipline, (to * back further than the LRU cache maintaining FIFO discipline, (to
* avoid seeking). */ * avoid seeking). */
frontier_pos = stream->src->frontier_blkno * stream->src->blksize; if (srcpos < stream->srcwin_cksum_pos &&
IF_DEBUG2(DP(RINT "[match_setup] frontier_pos %"Q"u, srcpos %"Q"u, " stream->srcwin_cksum_pos - srcpos > src->max_winsize)
{
IF_DEBUG2(DP(RINT "[match_setup] rejected due to src->max_winsize "
"distance eof=%"Q"u srcpos=%"Q"u max_winsz=%"Q"u\n",
xd3_source_eof (src),
srcpos, src->max_winsize));
goto bad;
}
/* There are cases where the above test does not reject a match that
* will experience XD3_TOOFARBACK at the first xd3_getblk call
* because the input may have advanced up to one block beyond the
* actual EOF. */
IF_DEBUG2(DP(RINT "[match_setup] %"Q"u srcpos %"Q"u, "
"src->max_winsize %"Q"u\n", "src->max_winsize %"Q"u\n",
frontier_pos, srcpos, stream->src->max_winsize)); stream->total_in + stream->input_position,
if (srcpos < frontier_pos && srcpos, src->max_winsize));
frontier_pos - srcpos > stream->src->max_winsize) {
IF_DEBUG1(DP(RINT "[match_setup] rejected due to src->max_winsize "
"distance eof=%"Q"u srcpos=%"Q"u maxsz=%"Q"u\n",
xd3_source_eof (stream->src),
srcpos, stream->src->max_winsize));
goto bad;
}
/* Going backwards, the 1.5-pass algorithm allows some /* Going backwards, the 1.5-pass algorithm allows some
* already-matched input may be covered by a longer source match. * already-matched input may be covered by a longer source match.
* The greedy algorithm does not allow this. */ * The greedy algorithm does not allow this. */
if (stream->flags & XD3_BEGREEDY) if (stream->flags & XD3_BEGREEDY)
{ {
/* The greedy algorithm allows backward matching to the last /* The greedy algorithm allows backward matching to the last
matched position. */ matched position. */
greedy_or_not = xd3_iopt_last_matched (stream); greedy_or_not = xd3_iopt_last_matched (stream);
} }
skipping to change at line 3805 skipping to change at line 3805
{ {
/* Unrestricted case: the match can cover the entire source, /* Unrestricted case: the match can cover the entire source,
* 0--src->size. We compare the usize_t * 0--src->size. We compare the usize_t
* match_maxfwd/match_maxback against the xoff_t * match_maxfwd/match_maxback against the xoff_t
* src->size/srcpos values and take the min. */ * src->size/srcpos values and take the min. */
if (srcpos < (xoff_t) stream->match_maxback) if (srcpos < (xoff_t) stream->match_maxback)
{ {
stream->match_maxback = (usize_t) srcpos; stream->match_maxback = (usize_t) srcpos;
} }
if (stream->src->eof_known) if (src->eof_known)
{ {
xoff_t srcavail = xd3_source_eof (stream->src) - srcpos; xoff_t srcavail = xd3_source_eof (src) - srcpos;
if (srcavail < (xoff_t) stream->match_maxfwd) if (srcavail < (xoff_t) stream->match_maxfwd)
{ {
stream->match_maxfwd = (usize_t) srcavail; stream->match_maxfwd = (usize_t) srcavail;
} }
} }
IF_DEBUG2(DP(RINT IF_DEBUG2(DP(RINT
"[match_setup] srcpos %"Q"u (tgtpos %"Q"u) " "[match_setup] srcpos %"Q"u (tgtpos %"Q"u) "
"unrestricted maxback %u maxfwd %u\n", "unrestricted maxback %u maxfwd %u\n",
skipping to change at line 3851 skipping to change at line 3851
{ {
stream->match_maxback = srcavail; stream->match_maxback = srcavail;
} }
srcavail = (usize_t) (src->srcbase + (xoff_t) src->srclen - srcpos); srcavail = (usize_t) (src->srcbase + (xoff_t) src->srclen - srcpos);
if (srcavail < stream->match_maxfwd) if (srcavail < stream->match_maxfwd)
{ {
stream->match_maxfwd = srcavail; stream->match_maxfwd = srcavail;
} }
IF_DEBUG1(DP(RINT IF_DEBUG2(DP(RINT
"[match_setup] srcpos %"Q"u (tgtpos %"Q"u) " "[match_setup] srcpos %"Q"u (tgtpos %"Q"u) "
"restricted maxback %u maxfwd %u\n", "restricted maxback %u maxfwd %u\n",
srcpos, srcpos,
stream->total_in + stream->input_position, stream->total_in + stream->input_position,
stream->match_maxback, stream->match_maxback,
stream->match_maxfwd)); stream->match_maxfwd));
goto good; goto good;
} }
good: good:
stream->match_state = MATCH_BACKWARD; stream->match_state = MATCH_BACKWARD;
stream->match_srcpos = srcpos; stream->match_srcpos = srcpos;
stream->match_last_srcpos = srcpos; stream->match_last_srcpos = srcpos;
return 0; return 0;
bad: bad:
stream->match_state = MATCH_SEARCHING; stream->match_state = MATCH_SEARCHING;
stream->match_last_srcpos = srcpos;
return 1; return 1;
} }
static inline int static inline int
xd3_forward_match(const uint8_t *s1c, const uint8_t *s2c, int n) xd3_forward_match(const uint8_t *s1c, const uint8_t *s2c, int n)
{ {
int i = 0; int i = 0;
#if UNALIGNED_OK #if UNALIGNED_OK
int nint = n / sizeof(int); int nint = n / sizeof(int);
skipping to change at line 3920 skipping to change at line 3921
* function, the string_matching routine when a checksum match is * function, the string_matching routine when a checksum match is
* discovered, and xd3_encode_input whenever a continuing (or initial) * discovered, and xd3_encode_input whenever a continuing (or initial)
* match is suspected. The two callers do different things with the * match is suspected. The two callers do different things with the
* input_position, thus this function leaves that variable untouched. * input_position, thus this function leaves that variable untouched.
* If a match is taken the resulting stream->match_fwd is left * If a match is taken the resulting stream->match_fwd is left
* non-zero. */ * non-zero. */
static int static int
xd3_source_extend_match (xd3_stream *stream) xd3_source_extend_match (xd3_stream *stream)
{ {
int ret; int ret;
xd3_source *src = stream->src; xd3_source *const src = stream->src;
xoff_t matchoff; /* matchoff is the current right/left-boundary of xoff_t matchoff; /* matchoff is the current right/left-boundary of
the source match being tested. */ the source match being tested. */
usize_t streamoff; /* streamoff is the current right/left-boundary usize_t streamoff; /* streamoff is the current right/left-boundary
of the input match being tested. */ of the input match being tested. */
xoff_t tryblk; /* tryblk, tryoff are the block, offset position xoff_t tryblk; /* tryblk, tryoff are the block, offset position
of matchoff */ of matchoff */
usize_t tryoff; usize_t tryoff;
usize_t tryrem; /* tryrem is the number of matchable bytes */ usize_t tryrem; /* tryrem is the number of matchable bytes */
usize_t matched; usize_t matched;
IF_DEBUG2(DP(RINT "[extend match] srcpos %"Q"u\n",
stream->match_srcpos));
XD3_ASSERT (src != NULL); XD3_ASSERT (src != NULL);
/* Does it make sense to compute backward match AFTER forward match? */ /* Does it make sense to compute backward match AFTER forward match? */
if (stream->match_state == MATCH_BACKWARD) if (stream->match_state == MATCH_BACKWARD)
{ {
/* Note: this code is practically duplicated below, substituting /* Note: this code is practically duplicated below, substituting
* match_fwd/match_back and direction. TODO: Consolidate? */ * match_fwd/match_back and direction. */
matchoff = stream->match_srcpos - stream->match_back; matchoff = stream->match_srcpos - stream->match_back;
streamoff = stream->input_position - stream->match_back; streamoff = stream->input_position - stream->match_back;
xd3_blksize_div (matchoff, src, &tryblk, &tryoff); xd3_blksize_div (matchoff, src, &tryblk, &tryoff);
/* this loops backward over source blocks */ /* this loops backward over source blocks */
while (stream->match_back < stream->match_maxback) while (stream->match_back < stream->match_maxback)
{ {
/* see if we're backing across a source block boundary */ /* see if we're backing across a source block boundary */
if (tryoff == 0) if (tryoff == 0)
{ {
tryoff = src->blksize; tryoff = src->blksize;
tryblk -= 1; tryblk -= 1;
} }
if ((ret = xd3_getblk (stream, tryblk))) if ((ret = xd3_getblk (stream, tryblk)))
{ {
/* if search went too far back, continue forward. */
if (ret == XD3_TOOFARBACK) if (ret == XD3_TOOFARBACK)
{ {
break; IF_DEBUG2(DP(RINT "[maxback] %"Q"u TOOFARBACK: %u INP %"Q"u CKS
UM %"Q"u\n",
tryblk, stream->match_back,
stream->total_in + stream->input_position,
stream->srcwin_cksum_pos));
/* the starting position is too far back. */
if (stream->match_back == 0)
{
XD3_ASSERT(stream->match_fwd == 0);
goto donefwd;
}
/* search went too far back, continue forward. */
goto doneback;
} }
/* could be a XD3_GETSRCBLK failure. */ /* could be a XD3_GETSRCBLK failure. */
return ret; return ret;
} }
tryrem = xd3_min (tryoff, stream->match_maxback - stream->match_back); tryrem = xd3_min (tryoff, stream->match_maxback - stream->match_back);
IF_DEBUG2(DP(RINT "[maxback] maxback %u trysrc %"Q"u/%u tgt %u tryrem % u\n", IF_DEBUG2(DP(RINT "[maxback] maxback %u trysrc %"Q"u/%u tgt %u tryrem % u\n",
stream->match_maxback, tryblk, tryoff, streamoff, tryrem)) ; stream->match_maxback, tryblk, tryoff, streamoff, tryrem)) ;
skipping to change at line 4006 skipping to change at line 4016
while (stream->match_fwd < stream->match_maxfwd) while (stream->match_fwd < stream->match_maxfwd)
{ {
if (tryoff == src->blksize) if (tryoff == src->blksize)
{ {
tryoff = 0; tryoff = 0;
tryblk += 1; tryblk += 1;
} }
if ((ret = xd3_getblk (stream, tryblk))) if ((ret = xd3_getblk (stream, tryblk)))
{ {
XD3_ASSERT (ret != XD3_TOOFARBACK); if (ret == XD3_TOOFARBACK)
{
IF_DEBUG2(DP(RINT "[maxfwd] %"Q"u TOOFARBACK: %u INP %"Q"u CKSUM %"
Q"u\n",
tryblk, stream->match_fwd,
stream->total_in + stream->input_position,
stream->srcwin_cksum_pos));
goto donefwd;
}
/* could be a XD3_GETSRCBLK failure. */ /* could be a XD3_GETSRCBLK failure. */
return ret; return ret;
} }
tryrem = xd3_min(stream->match_maxfwd - stream->match_fwd, tryrem = xd3_min(stream->match_maxfwd - stream->match_fwd,
src->onblk - tryoff); src->onblk - tryoff);
if (tryrem == 0) if (tryrem == 0)
{ {
skipping to change at line 4037 skipping to change at line 4054
tryoff += matched; tryoff += matched;
streamoff += matched; streamoff += matched;
stream->match_fwd += matched; stream->match_fwd += matched;
if (tryrem != matched) if (tryrem != matched)
{ {
break; break;
} }
} }
donefwd:
stream->match_state = MATCH_SEARCHING; stream->match_state = MATCH_SEARCHING;
IF_DEBUG2(DP(RINT "[extend match] input %"Q"u srcpos %"Q"u len %u\n",
stream->input_position + stream->total_in,
stream->match_srcpos,
stream->match_fwd));
/* If the match ends short of the last instruction end, we probably /* If the match ends short of the last instruction end, we probably
* don't want it. There is the possibility that a copy ends short * don't want it. There is the possibility that a copy ends short
* of the last copy but also goes further back, in which case we * of the last copy but also goes further back, in which case we
* might want it. This code does not implement such: if so we would * might want it. This code does not implement such: if so we would
* need more complicated xd3_iopt_erase logic. */ * need more complicated xd3_iopt_erase logic. */
if (stream->match_fwd < stream->min_match) if (stream->match_fwd < stream->min_match)
{ {
stream->match_fwd = 0; stream->match_fwd = 0;
} }
else else
skipping to change at line 4090 skipping to change at line 4113
} }
if (match_end > stream->maxsrcaddr) if (match_end > stream->maxsrcaddr)
{ {
/* Note: across windows */ /* Note: across windows */
stream->maxsrcaddr = match_end; stream->maxsrcaddr = match_end;
} }
IF_DEBUG2 ({ IF_DEBUG2 ({
static int x = 0; static int x = 0;
DP(RINT "[source match:%d] <inp %"Q"u %"Q"u> <src %"Q"u %"Q"u> (%s) [ %u bytes ]\n", DP(RINT "[source match:%d] length %u <inp %"Q"u %"Q"u> <src %"Q"u %"Q"u> (%s) [ %u bytes ]\n",
x++, x++,
match_length,
stream->total_in + target_position, stream->total_in + target_position,
stream->total_in + target_position + match_length, stream->total_in + target_position + match_length,
match_position, match_position,
match_position + match_length, match_position + match_length,
(stream->total_in + target_position == match_position) ? "same" : "dif f", (stream->total_in + target_position == match_position) ? "same" : "dif f",
match_length); match_length);
}); });
if ((ret = xd3_found_match (stream, if ((ret = xd3_found_match (stream,
/* decoder position */ target_position, /* decoder position */ target_position,
skipping to change at line 4332 skipping to change at line 4356
XD3_ASSERT (run_l == 0 || run_c == *x_run_c); XD3_ASSERT (run_l == 0 || run_c == *x_run_c);
XD3_ASSERT (x_run_l > slook || run_l == x_run_l); XD3_ASSERT (x_run_l > slook || run_l == x_run_l);
} }
#endif /* XD3_DEBUG */ #endif /* XD3_DEBUG */
/* This function computes more source checksums to advance the window. /* This function computes more source checksums to advance the window.
* Called at every entrance to the string-match loop and each time * Called at every entrance to the string-match loop and each time
* stream->input_position reaches the value returned as * stream->input_position reaches the value returned as
* *next_move_point. NB: this is one of the most expensive functions * *next_move_point. NB: this is one of the most expensive functions
* in this code and also the most critical for good compression. * in this code and also the most critical for good compression.
* TODO: optimize the inner loop
*/ */
static int static int
xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point) xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point)
{ {
xoff_t logical_input_cksum_pos; /* the source file is indexed until this point */
xoff_t source_size; xoff_t target_cksum_pos;
/* the absolute target file input position */
xoff_t absolute_input_pos;
if (stream->src->eof_known) if (stream->src->eof_known)
{ {
source_size = xd3_source_eof (stream->src); xoff_t source_size = xd3_source_eof (stream->src);
XD3_ASSERT(stream->srcwin_cksum_pos <= source_size); XD3_ASSERT(stream->srcwin_cksum_pos <= source_size);
if (stream->srcwin_cksum_pos == source_size) if (stream->srcwin_cksum_pos == source_size)
{ {
*next_move_point = USIZE_T_MAX; *next_move_point = USIZE_T_MAX;
return 0; return 0;
} }
} }
/* Begin by advancing at twice the input rate, up to half the absolute_input_pos = stream->total_in + stream->input_position;
* maximum window size. */
logical_input_cksum_pos = xd3_min((stream->total_in + stream->input_position) /* Immediately read the entire window.
* 2, *
(stream->total_in + stream->input_position) + * Note: this reverses a long held policy, at this point in the
(stream->src->max_winsize / 2)); * code, of advancing relatively slowly as the input is read, which
* results in better compression for very-similar inputs, but worse
/* If srcwin_cksum_pos is already greater, wait until the difference * compression where data is deleted near the beginning of the file.
* is met. */ *
if (stream->srcwin_cksum_pos > logical_input_cksum_pos) * The new policy is simpler, somewhat slower and can benefit, or
* slightly worsen, compression performance. */
if (absolute_input_pos < stream->src->max_winsize / 2)
{ {
*next_move_point = stream->input_position + target_cksum_pos = stream->src->max_winsize;
(usize_t)(stream->srcwin_cksum_pos - logical_input_cksum_pos); }
return 0; else
{
/* TODO: The addition of 2 blocks here is arbitrary. Do a
* better job of stream alignment based on observed source copy
* addresses, and when both input sizes are known, the
* difference in size. */
target_cksum_pos = absolute_input_pos +
stream->src->max_winsize / 2 +
stream->src->blksize * 2;
target_cksum_pos &= ~stream->src->maskby;
} }
/* A long match may have extended past srcwin_cksum_pos. Don't /* A long match may have extended past srcwin_cksum_pos. Don't
* start checksumming already-matched source data. */ * start checksumming already-matched source data. */
if (stream->maxsrcaddr > stream->srcwin_cksum_pos) if (stream->maxsrcaddr > stream->srcwin_cksum_pos)
{ {
stream->srcwin_cksum_pos = stream->maxsrcaddr; stream->srcwin_cksum_pos = stream->maxsrcaddr;
} }
if (logical_input_cksum_pos < stream->srcwin_cksum_pos) if (target_cksum_pos < stream->srcwin_cksum_pos)
{ {
logical_input_cksum_pos = stream->srcwin_cksum_pos; target_cksum_pos = stream->srcwin_cksum_pos;
} }
/* Advance at least one source block. With the command-line while (stream->srcwin_cksum_pos < target_cksum_pos &&
* defaults this means:
*
* if (src->size <= src->max_winsize), index the entire source at once
* using the position of the first non-match. This is good for
* small inputs, especially when the content may have moved anywhere
* in the file (e.g., tar files).
*
* if (src->size > src->max_winsize), index at least one block ahead
* of the logical position. This is good for different reasons:
* when a long match spanning several source blocks is encountered,
* this avoids computing checksums for those blocks.
*/
logical_input_cksum_pos += stream->src->blksize;
while (stream->srcwin_cksum_pos < logical_input_cksum_pos &&
(!stream->src->eof_known || (!stream->src->eof_known ||
stream->srcwin_cksum_pos < xd3_source_eof (stream->src))) stream->srcwin_cksum_pos < xd3_source_eof (stream->src)))
{ {
xoff_t blkno; xoff_t blkno;
xoff_t blkbaseoffset; xoff_t blkbaseoffset;
usize_t blkrem; usize_t blkrem;
ssize_t oldpos; /* Using ssize_t because of a */ ssize_t oldpos; /* Using ssize_t because of a */
ssize_t blkpos; /* do { blkpos-- } ssize_t blkpos; /* do { blkpos-- }
while (blkpos >= oldpos); */ while (blkpos >= oldpos); */
int ret; int ret;
skipping to change at line 4416 skipping to change at line 4438
stream->src, &blkno, &blkrem); stream->src, &blkno, &blkrem);
oldpos = blkrem; oldpos = blkrem;
if ((ret = xd3_getblk (stream, blkno))) if ((ret = xd3_getblk (stream, blkno)))
{ {
/* TOOFARBACK should never occur here, since we read forward. */ /* TOOFARBACK should never occur here, since we read forward. */
if (ret == XD3_TOOFARBACK) if (ret == XD3_TOOFARBACK)
{ {
ret = XD3_INTERNAL; ret = XD3_INTERNAL;
} }
IF_DEBUG1 (DP(RINT IF_DEBUG1 (DP(RINT
"[srcwin_move_point] async getblk return for %"Q"u\n", "[srcwin_move_point] async getblk return for %"Q"u: %s\n"
blkno)); ,
blkno, xd3_strerror (ret)));
return ret; return ret;
} }
IF_DEBUG1 (DP(RINT IF_DEBUG1 (DP(RINT
"[srcwin_move_point] T=%"Q"u{%"Q"u} S=%"Q"u EOF=%"Q"u %s\n", "[srcwin_move_point] block %"Q"u T=%"Q"u S=%"Q"u L=%"Q"u EOF=
%"Q"u %s\n",
blkno,
stream->total_in + stream->input_position, stream->total_in + stream->input_position,
logical_input_cksum_pos,
stream->srcwin_cksum_pos, stream->srcwin_cksum_pos,
target_cksum_pos,
xd3_source_eof (stream->src), xd3_source_eof (stream->src),
stream->src->eof_known ? "known" : "unknown")); stream->src->eof_known ? "known" : "unknown"));
blkpos = xd3_bytes_on_srcblk (stream->src, blkno); blkpos = xd3_bytes_on_srcblk (stream->src, blkno);
if (blkpos < (ssize_t) stream->smatcher.large_look) if (blkpos < (ssize_t) stream->smatcher.large_look)
{ {
stream->srcwin_cksum_pos = (blkno + 1) * stream->src->blksize; stream->srcwin_cksum_pos = (blkno + 1) * stream->src->blksize;
IF_DEBUG1 (DP(RINT "[srcwin_move_point] continue (end-of-block)\n")); IF_DEBUG2 (DP(RINT "[srcwin_move_point] continue (end-of-block): %"Q"u\ n", blkpos));
continue; continue;
} }
/* This inserts checksums for the entire block, in reverse, /* This inserts checksums for the entire block, in reverse,
* starting from the end of the block. This logic does not test * starting from the end of the block. This logic does not test
* stream->srcwin_cksum_pos because it always advances it to the * stream->srcwin_cksum_pos because it always advances it to the
* start of the next block. * start of the next block.
* *
* oldpos is the srcwin_cksum_pos within this block. blkpos is * oldpos is the srcwin_cksum_pos within this block. blkpos is
* the number of bytes available. Each iteration inspects * the number of bytes available. Each iteration inspects
skipping to change at line 4470 skipping to change at line 4494
IF_DEBUG (stream->large_ckcnt += 1); IF_DEBUG (stream->large_ckcnt += 1);
blkpos -= stream->smatcher.large_step; blkpos -= stream->smatcher.large_step;
} }
while (blkpos >= oldpos); while (blkpos >= oldpos);
stream->srcwin_cksum_pos = (blkno + 1) * stream->src->blksize; stream->srcwin_cksum_pos = (blkno + 1) * stream->src->blksize;
} }
IF_DEBUG1 (DP(RINT
"[srcwin_move_point] exited loop T=%"Q"u{%"Q"u} "
"S=%"Q"u EOF=%"Q"u %s\n",
stream->total_in + stream->input_position,
logical_input_cksum_pos,
stream->srcwin_cksum_pos,
xd3_source_eof (stream->src),
stream->src->eof_known ? "known" : "unknown"));
if (stream->src->eof_known) if (stream->src->eof_known)
{ {
source_size = xd3_source_eof (stream->src); xoff_t source_size = xd3_source_eof (stream->src);
if (stream->srcwin_cksum_pos >= source_size) if (stream->srcwin_cksum_pos >= source_size)
{ {
/* This invariant is needed for xd3_source_cksum_offset() */ /* This invariant is needed for xd3_source_cksum_offset() */
stream->srcwin_cksum_pos = source_size; stream->srcwin_cksum_pos = source_size;
*next_move_point = USIZE_T_MAX; *next_move_point = USIZE_T_MAX;
IF_DEBUG1 (DP(RINT IF_DEBUG1 (DP(RINT
"[srcwin_move_point] finished with source input\n")); "[srcwin_move_point] finished with source input\n"));
return 0; return 0;
} }
} }
/* How long until this function should be called again. */ /* How long until this function should be called again. */
XD3_ASSERT(stream->srcwin_cksum_pos >= logical_input_cksum_pos); XD3_ASSERT(stream->srcwin_cksum_pos >= target_cksum_pos);
*next_move_point = stream->input_position + 1 +
(usize_t)(stream->srcwin_cksum_pos - logical_input_cksum_pos); *next_move_point = stream->input_position +
stream->src->blksize -
((stream->srcwin_cksum_pos - target_cksum_pos) & stream->src->maskby);
IF_DEBUG2 (DP(RINT
"[srcwin_move_point] finished T=%"Q"u "
"S=%"Q"u L=%"Q"u EOF=%"Q"u %s again in %u\n",
stream->total_in + stream->input_position,
stream->srcwin_cksum_pos,
target_cksum_pos,
xd3_source_eof (stream->src),
stream->src->eof_known ? "known" : "unknown",
*next_move_point - stream->input_position));
return 0; return 0;
} }
#endif /* XD3_ENCODER */ #endif /* XD3_ENCODER */
/******************************************************************** /********************************************************************
TEMPLATE pass TEMPLATE pass
*********************************************************************/ *********************************************************************/
#endif /* __XDELTA3_C_INLINE_PASS__ */ #endif /* __XDELTA3_C_INLINE_PASS__ */
skipping to change at line 4556 skipping to change at line 4583
uint32_t lcksum = 0; uint32_t lcksum = 0;
usize_t sinx; usize_t sinx;
usize_t linx; usize_t linx;
uint8_t run_c; uint8_t run_c;
usize_t run_l; usize_t run_l;
int ret; int ret;
usize_t match_length; usize_t match_length;
usize_t match_offset = 0; usize_t match_offset = 0;
usize_t next_move_point; usize_t next_move_point;
IF_DEBUG2(DP(RINT "[string_match] initial entry %u\n", stream->input_position)
);
/* If there will be no compression due to settings or short input, /* If there will be no compression due to settings or short input,
* skip it entirely. */ * skip it entirely. */
if (! (DO_SMALL || DO_LARGE || DO_RUN) || if (! (DO_SMALL || DO_LARGE || DO_RUN) ||
stream->input_position + SLOOK > stream->avail_in) { goto loopnomore; } stream->input_position + SLOOK > stream->avail_in) { goto loopnomore; }
if ((ret = xd3_string_match_init (stream))) { return ret; } if ((ret = xd3_string_match_init (stream))) { return ret; }
/* The restartloop label is reached when the incremental loop state /* The restartloop label is reached when the incremental loop state
* needs to be reset. */ * needs to be reset. */
restartloop: restartloop:
IF_DEBUG2(DP(RINT "[string_match] restartloop %u\n", stream->input_position));
/* If there is not enough input remaining for any kind of match, /* If there is not enough input remaining for any kind of match,
skip it. */ skip it. */
if (stream->input_position + SLOOK > stream->avail_in) { goto loopnomore; } if (stream->input_position + SLOOK > stream->avail_in) { goto loopnomore; }
/* Now reset the incremental loop state: */ /* Now reset the incremental loop state: */
/* The min_match variable is updated to avoid matching the same lazy /* The min_match variable is updated to avoid matching the same lazy
* match over and over again. For example, if you find a (small) * match over and over again. For example, if you find a (small)
* match of length 9 at one position, you will likely find a match * match of length 9 at one position, you will likely find a match
* of length 8 at the next position. */ * of length 8 at the next position. */
skipping to change at line 4610 skipping to change at line 4641
run_l = xd3_comprun (inp, SLOOK, & run_c); run_l = xd3_comprun (inp, SLOOK, & run_c);
} }
/* Large match state. We continue the loop even after not enough /* Large match state. We continue the loop even after not enough
* bytes for LLOOK remain, so always check stream->input_position in * bytes for LLOOK remain, so always check stream->input_position in
* DO_LARGE code. */ * DO_LARGE code. */
if (DO_LARGE && (stream->input_position + LLOOK <= stream->avail_in)) if (DO_LARGE && (stream->input_position + LLOOK <= stream->avail_in))
{ {
/* Source window: next_move_point is the point that /* Source window: next_move_point is the point that
* stream->input_position must reach before computing more * stream->input_position must reach before computing more
* source checksum. */ * source checksum. Note: this is called unconditionally
* the first time after reentry, subsequent calls will be
* avoided if next_move_point is > input_position */
if ((ret = xd3_srcwin_move_point (stream, & next_move_point))) if ((ret = xd3_srcwin_move_point (stream, & next_move_point)))
{ {
return ret; return ret;
} }
lcksum = xd3_lcksum (inp, LLOOK); lcksum = xd3_lcksum (inp, LLOOK);
} }
/* TRYLAZYLEN: True if a certain length match should be followed by /* TRYLAZYLEN: True if a certain length match should be followed by
* lazy search. This checks that LEN is shorter than MAXLAZY and * lazy search. This checks that LEN is shorter than MAXLAZY and
 End of changes. 63 change blocks. 
126 lines changed or deleted 163 lines changed or added

Home  |  About  |  All  |  Newest  |  Fossies Dox  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTPS