"Fossies" - the Fresh Open Source Software Archive 
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
1 /* Copyright (C) 2004-2005 Ghostgum Software Pty Ltd. All rights reserved.
2
3 This software is provided AS-IS with no warranty, either express or
4 implied.
5
6 This software is distributed under licence and may not be copied,
7 modified or distributed except as expressly authorised under the terms
8 of the licence contained in the file LICENCE in this distribution.
9
10 For more information about licensing, please refer to
11 http://www.ghostgum.com.au/ or contact Ghostsgum Software Pty Ltd,
12 218 Gallaghers Rd, Glen Waverley VIC 3150, AUSTRALIA,
13 Fax +61 3 9886 6616.
14 */
15
16 /* $Id: clzw.c,v 1.2 2005/06/10 09:39:24 ghostgum Exp $ */
17 /* LZW compression, compatible with PostScript LZWDecode filter */
18
19 #include <stdlib.h>
20 #include <string.h>
21 #include "clzw.h"
22
23
24 /*
25 * Explanation of LZW by Mark Nelson in Dr. Dobb's Journal, October 1989.
26 * http://www.dogma.net/markn/articles/lzw/lzw.htm
27 *
28 * This is an implementation of 12-bit LZW, compatible with
29 * PostScript LZWDecode filter.
30 */
31
32 #define LZW_HASH_SIZE 5021
33 #define LZW_MAX 4094
34 #define LZW_RESET 256
35 #define LZW_EOD 257
36 #define LZW_FIRST 258
37
38 typedef struct lzw_code_s {
39 short code;
40 short base_code;
41 unsigned char ch;
42 } lzw_code_t;
43
44 struct lzw_state_s {
45 short next_code;
46 short lzwstr;
47 lzw_code_t table[LZW_HASH_SIZE];
48 int code_bit_length; /* length of current codes, 9, 10, 11 or 12 */
49 short code_change; /* code at which code_bit_length increases */
50 int output_bits; /* bits that didn't fit in a whole byte */
51 /* This must be a 32-bit or larger type */
52 int output_bits_count; /* number of bits that didn't fit */
53 int bytes_in; /* for checking compression efficiency */
54 int bytes_out;
55 };
56
57 /* Reset the code table */
58 static void
59 lzw_reset(lzw_state_t *state)
60 {
61 int i;
62 lzw_code_t *table = state->table;
63 state->next_code = LZW_FIRST;
64 state->code_bit_length = 9;
65 state->code_change = (short)((1<<state->code_bit_length)-1);
66 state->bytes_in = 0;
67 state->bytes_out = 0;
68 for (i=0; i<LZW_HASH_SIZE; i++) {
69 table[i].code = -1;
70 table[i].base_code = -1;
71 table[i].ch = 0;
72 }
73 }
74
75 static void
76 lzw_init(lzw_state_t *state)
77 {
78 memset(state, 0, sizeof(lzw_state_t));
79 state->next_code = LZW_FIRST;
80 state->code_bit_length = 9;
81 state->output_bits = 0;
82 state->output_bits_count = 0;
83 state->lzwstr = -1;
84 lzw_reset(state);
85 }
86
87 lzw_state_t *
88 lzw_new(void)
89 {
90 lzw_state_t *state = (lzw_state_t *)malloc(sizeof(lzw_state_t));
91 if (state != (lzw_state_t *)NULL)
92 lzw_init(state);
93 return state;
94 }
95
96
97 /* Returns hash index of code/ch if found, or index of an empty location */
98 static int
99 lzw_find_match(lzw_code_t *table, short code, unsigned char ch)
100 {
101 int i = (ch << 4) ^ code;
102 int hash_offset = (i == 0) ? 1 : LZW_HASH_SIZE - i;
103
104 while (table) { /* while (true) */
105 if (table[i].code == -1)
106 break; /* not in table */
107 else if ((table[i].base_code == code) && (table[i].ch == ch))
108 break; /* found it */
109 else {
110 /* search forwards from here */
111 i += hash_offset;
112 if (i >= LZW_HASH_SIZE)
113 i -= LZW_HASH_SIZE;
114 }
115 }
116 return i;
117 }
118
119 void
120 lzw_compress(lzw_state_t *state,
121 const unsigned char *inbuf, int *inlen,
122 unsigned char *outbuf, int *outlen)
123 {
124 int icount = 0;
125 int ilen = *inlen;
126 int ocount = 0;
127 int olen = *outlen;
128 unsigned char ch;
129 int hash_index;
130 int bits = state->output_bits;
131 int len = state->output_bits_count;
132 int code_len = state->code_bit_length;
133 short lzwstr = state->lzwstr;
134 short next_code = state->next_code;
135 lzw_code_t *table = state->table;
136 int do_reset = 0;
137 int bytes_in = state->bytes_in;
138 int bytes_out = state->bytes_out;
139
140 if (lzwstr == -1) {
141 /* get first char */
142 lzwstr = inbuf[icount++];
143 /* PostScript LZWEncode always starts with LZW_RESET */
144 bits = LZW_RESET;
145 len = code_len;
146 }
147 /* Write out any bits we couldn't fit last time */
148 while ((len >= 8) && (ocount < olen)) {
149 outbuf[ocount++] = (unsigned char)(bits >> (len-8));
150 len -= 8;
151 }
152 while ((icount < ilen) && (ocount < olen)) {
153 ch = inbuf[icount++];
154 hash_index = lzw_find_match(table, lzwstr, ch);
155 if (table[hash_index].code != -1)
156 lzwstr = table[hash_index].code;
157 else {
158 /* Output this code */
159 bits = (bits << code_len) + lzwstr;
160 len += code_len;
161 while ((len >= 8) && (ocount < olen)) {
162 outbuf[ocount++] = (unsigned char)(bits >> (len-8));
163 len -= 8;
164 }
165
166 if (next_code == state->code_change) {
167 state->code_bit_length = ++code_len;
168 state->code_change = (short)((1 << code_len) - 1);
169 /* Monitor compression efficiency */
170 bytes_in = state->bytes_in + icount;
171 bytes_out = state->bytes_out + ocount;
172 if (bytes_out > bytes_in + bytes_in/16) {
173 /* Data is not compressing */
174 /* Reset the table to avoid ratio getting worse */
175 do_reset = 1;
176 }
177 }
178
179 if (do_reset || (next_code >= LZW_MAX)) {
180 /* Table is full or poor efficiency, so start again */
181 bits = (bits << code_len) + LZW_RESET;
182 len += code_len;
183 while ((len >= 8) && (ocount < olen)) {
184 outbuf[ocount++] = (unsigned char)(bits >> (len-8));
185 len -= 8;
186 }
187 lzw_reset(state);
188 lzwstr = ch;
189 next_code = state->next_code;
190 code_len = state->code_bit_length;
191 do_reset = 0;
192 }
193 else {
194 /* Add new code to table */
195 table[hash_index].code = next_code++;
196 table[hash_index].base_code = lzwstr;
197 table[hash_index].ch = ch;
198 lzwstr = ch;
199 }
200 }
201 }
202 if (*inlen == 0) {
203 /* Flush and EOD */
204 bits = (bits << 2*code_len) + (lzwstr << code_len) + LZW_EOD;
205 len += 2*code_len;
206 while ((len >= 8) && (ocount < olen)) {
207 outbuf[ocount++] = (unsigned char)(bits >> (len-8));
208 len -= 8;
209 }
210 if ((len > 0) && (ocount < olen))
211 outbuf[ocount++] = (unsigned char)(bits << (8-len));
212 }
213
214 /* Save state for next time */
215 state->output_bits = bits;
216 state->output_bits_count = len;
217 state->code_bit_length = code_len;
218 state->lzwstr = lzwstr;
219 state->next_code = next_code;
220 state->bytes_in += icount;
221 state->bytes_out += ocount;
222 *outlen = ocount; /* bytes written to output buffer */
223 *inlen = icount; /* input bytes used */
224 }
225
226 void
227 lzw_free(lzw_state_t *state)
228 {
229 free(state);
230 }
231
232
233 #ifdef STANDALONE
234 #include <stdio.h>
235 int main(int argc, char *argv[])
236 {
237 char outbuf[4096];
238 int outlen = sizeof(outbuf);
239 int outcount;
240 char inbuf[4096];
241 int inlen;
242 int incount;
243 int inused;
244 lzw_state_t *lzw;
245 FILE *infile = NULL;
246 FILE *outfile = NULL;
247 int inread=0, outwritten=0;
248
249 if (argc != 3)
250 return 1;
251
252 infile = fopen(argv[1], "rb");
253 if (infile == (FILE*)NULL)
254 return 1;
255 outfile = fopen(argv[2], "wb");
256 if (outfile == (FILE*)NULL)
257 return 1;
258
259 lzw = lzw_new();
260 while ((incount = fread(inbuf, 1, sizeof(inbuf), infile)) != 0) {
261 inread += incount;
262 inused = 0;
263 inlen = incount;
264 while (inused < incount) {
265 outlen = sizeof(outbuf);
266 inlen = incount - inused;
267 lzw_compress(lzw, inbuf+inused, &inlen, outbuf, &outlen);
268 inused += inlen;
269 if (outlen)
270 fwrite(outbuf, 1, outlen, outfile);
271 outwritten += outlen;
272 }
273 }
274 inlen = 0; /* EOD */
275 outlen = sizeof(outbuf);
276 lzw_compress(lzw, inbuf, &inlen, outbuf, &outlen);
277 lzw_free(lzw);
278 if (outlen)
279 fwrite(outbuf, 1, outlen, outfile);
280 outwritten += outlen;
281
282 fprintf(stdout, "in=%d out=%d\n", inread, outwritten);
283
284 fclose(infile);
285 fclose(outfile);
286
287 return 0;
288 }
289 #endif