"Fossies" - the Fresh Open Source Software Archive 
Member "apg-2.2.3/bloom.c" (7 Aug 2003, 12780 Bytes) of package /linux/privat/old/apg-2.2.3.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
1 /*
2 ** Copyright (c) 2001, 2002, 2003
3 ** Adel I. Mirzazhanov. All rights reserved
4 **
5 ** Redistribution and use in source and binary forms, with or without
6 ** modification, are permitted provided that the following conditions
7 ** are met:
8 **
9 ** 1.Redistributions of source code must retain the above copyright notice,
10 ** this list of conditions and the following disclaimer.
11 ** 2.Redistributions in binary form must reproduce the above copyright
12 ** notice, this list of conditions and the following disclaimer in the
13 ** documentation and/or other materials provided with the distribution.
14 ** 3.The name of the author may not be used to endorse or promote products
15 ** derived from this software without specific prior written permission.
16 **
17 ** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 ** OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 ** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ** ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21 ** DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 ** DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23 ** GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 ** INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 ** WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 ** NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 ** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30
31 /*
32 ** C module for APG (Automated Password Generator)
33 ** Bloom filter implementation.
34 ** Functions:
35 ** insert_word - insert word in the filter file
36 ** check_word - check word
37 ** create_filter - create initial(empty) filter file
38 ** open_filter - open APG Bloom filter file
39 ** get_filtersize - get APG Bloom filter size
40 ** get_filtermode - get APG Bloom filter mode
41 ** count_words - count words in plain dictionary file
42 ** print_flt_info - print filter info
43 **=============================================================
44 ** hash2bit - generates 5 values (should be 5 values of independent
45 ** hash functions) from input string.
46 ** getbit - get the bit value from file.
47 ** putbit - put the bit in the file.
48 */
49
50 #include "bloom.h"
51 #include "convert.h"
52
53 #define FSIZE_BIT(word_count) ((unsigned long int)(5.0/(1.0-pow( 0.84151068, 1.0/((double)word_count)))))
54 #define FSIZE_BYTE(word_count) ((((unsigned long int)(5.0/(1.0-pow( 0.84151068, 1.0/((double)word_count)))))/8)+1)
55
56 h_val * hash2bit(char * word, h_val *b);
57 int getbit(FILE * f, h_val bitnum);
58 int putbit(FILE * f, h_val bitnum);
59
60 #ifdef APGBFM
61 /*
62 ** print_flt_info - print filter information
63 ** INPUT:
64 ** FILE * filter - filter file descriptor
65 ** OUTPUT:
66 ** int
67 ** 0 - everything OK
68 ** -1 - something wrong
69 */
70 int
71 print_flt_info(FILE * filter)
72 {
73 struct apg_bf_hdr bf_hdr;
74 int i = 0;
75
76 if (fseek (filter, 0, SEEK_SET) == -1)
77 return(-1);
78 if (fread ( (void *)&bf_hdr, APGBFHDRSIZE, 1, filter) != 1)
79 if (ferror (filter) != 0)
80 return(-1);
81 printf ("**************************************\n");
82 printf ("** APGBFM: Bloom-filter information **\n");
83 printf ("**************************************\n");
84 printf ("Filter ID : ");
85 for (i=0; i < sizeof(bf_hdr.id); i++)
86 printf ("%c", bf_hdr.id[i]);
87 printf ("\n");
88 printf ("Filter Version: ");
89 printf ("%c.", bf_hdr.version[0]);
90 printf ("%c.", bf_hdr.version[1]);
91 printf ("%c", bf_hdr.version[2]);
92 printf ("\n");
93 printf ("Filter size : %lu bits\n", (unsigned long int)bf_hdr.fs);
94 printf ("Filter mode : ");
95 if (bf_hdr.mode == 0x00) printf ("PLAIN\n");
96 if (bf_hdr.mode == 0x01) printf ("CASE_INSENSITIVE\n");
97 printf ("**************************************\n");
98 if (fseek (filter, 0, SEEK_SET) == -1)
99 return(-1);
100 return(0);
101 }
102 #endif /* APGBFM */
103
104 /*
105 ** insert_word - insert word in the filter file
106 ** INPUT:
107 ** char *word - word to incert in the filter
108 ** FILE *file - filter file descriptor
109 ** h_val filter_size - filter size in bits
110 ** f_mode mode - filter mode
111 ** OUTPUT:
112 ** int
113 ** 0 - everything OK
114 ** -1 - something wrong
115 */
116 int
117 insert_word(char *word, FILE *file, h_val filter_size, f_mode mode)
118 {
119 h_val h[H_NUM];
120 int i = 0;
121
122 #ifdef APG_DEBUG
123 fprintf (stdout, "DEBUG> insert_word: word to insert: %s\n", word);
124 fflush (stdout);
125 #endif /* APG_DEBUG */
126
127 if ((mode & BF_CASE_INSENSITIVE) > 0)
128 {
129 decapitalize(word);
130 #ifdef APG_DEBUG
131 fprintf (stdout, "DEBUG> insert_word: decapitalized word: %s\n", word);
132 fflush (stdout);
133 #endif /* APG_DEBUG */
134 }
135
136 hash2bit (word, &h[0]);
137 for(i = 0; i < H_NUM; i++)
138 if (putbit (file, h[i] % filter_size)== -1)
139 return (-1);
140 return(0);
141 }
142
143 /*
144 ** check_word - check word
145 ** INPUT:
146 ** char *word - word to check
147 ** FILE *file - filter file descriptor
148 ** h_val filter_size - filter size in bits
149 ** f_mode - filter mode
150 ** OUTPUT:
151 ** int
152 ** 0 - word is not in dictionary
153 ** 1 - word is in dictionary
154 ** -1 - something wrong
155 */
156 int
157 check_word(char *word, FILE *file, h_val filter_size, f_mode mode)
158 {
159 h_val h[H_NUM];
160 int i = 0;
161 char * tmp_word;
162
163 if ((tmp_word = (char *) calloc(1,MAX_DICT_STRLEN)) == NULL)
164 return(-1);
165 (void)memcpy ((void *) tmp_word, (void *) word, strlen(word));
166
167 #ifdef APG_DEBUG
168 fprintf (stdout, "DEBUG> check_word: word to check: %s\n", word);
169 fflush (stdout);
170 #endif /* APG_DEBUG */
171 if ((mode & BF_CASE_INSENSITIVE) > 0)
172 {
173 decapitalize(tmp_word);
174 #ifdef APG_DEBUG
175 fprintf (stdout, "DEBUG> check_word: decapitalized word: %s\n", tmp_word);
176 fflush (stdout);
177 #endif /* APG_DEBUG */
178 }
179
180 hash2bit (tmp_word, &h[0]);
181
182 free ((void *)tmp_word);
183
184 for(i = 0; i < H_NUM; i++)
185 {
186 switch(getbit(file, h[i] % filter_size))
187 {
188 case 0:
189 return(0);
190 break;
191 case -1:
192 return(-1);
193 break;
194 default:
195 break;
196 }
197 }
198 return (1);
199 }
200
201 /*
202 ** open_filter - open APG Bloom filter file
203 ** open filter file and check is this the real bloom filter file
204 ** INPUT:
205 ** char * f_name - filter filename
206 ** const char *mode - "r" or "r+"
207 ** OUTPUT:
208 ** FILE * - file pointer
209 ** NULL - something wrong.
210 */
211 FILE *
212 open_filter(char * f_name, const char *mode)
213 {
214 FILE *f;
215 char etalon_bf_id[] = APGBF_ID;
216 char etalon_bf_ver[] = APGBF_VERSION;
217 struct apg_bf_hdr bf_hdr;
218
219 if ((f = fopen (f_name, mode)) == NULL)
220 return(NULL);
221 if (fread ( (void *)&bf_hdr, APGBFHDRSIZE, 1, f) != 1)
222 if (ferror (f) != 0)
223 return(NULL);
224 if ((bf_hdr.id[0] != etalon_bf_id[0]) || (bf_hdr.id[1] != etalon_bf_id[1]) ||
225 (bf_hdr.id[2] != etalon_bf_id[2]) || (bf_hdr.id[3] != etalon_bf_id[3]) ||
226 (bf_hdr.id[4] != etalon_bf_id[4]) )
227 return (NULL);
228 if ((bf_hdr.version[0] != etalon_bf_ver[0]) ||
229 (bf_hdr.version[1] != etalon_bf_ver[1]) ||
230 (bf_hdr.version[2] != etalon_bf_ver[2]) )
231 return (NULL);
232 else
233 {
234 if (fseek (f, 0, SEEK_SET) == -1)
235 return(NULL);
236 return(f);
237 }
238 }
239
240 /*
241 ** close_filter - close APG Bloom filter file
242 ** close filter file
243 ** INPUT:
244 ** FILE * f_dsk - filter file pointer
245 ** OUTPUT:
246 ** int - same as fclose() return value
247 */
248 int
249 close_filter(FILE *f_dsk)
250 {
251 return(fclose(f_dsk));
252 }
253
254 /*
255 ** get_filtersize - get APG Bloom filter size
256 ** INPUT:
257 ** FILE *f - filter file descriptor
258 ** OUTPUT:
259 ** h_val - size of APG Bloom filter.
260 ** 0 - something wrong
261 */
262 h_val
263 get_filtersize(FILE * f)
264 {
265 struct apg_bf_hdr bf_hdr;
266 if (fseek (f, 0, SEEK_SET) == -1)
267 return(0);
268 if (fread ( (void *)&bf_hdr, APGBFHDRSIZE, 1, f) != 1)
269 if (ferror (f) != 0)
270 return(0);
271 if (fseek (f, 0, SEEK_SET) == -1)
272 return(0);
273 return( (h_val)bf_hdr.fs);
274 }
275
276 /*
277 ** get_filtermode - get APG Bloom filter mode
278 ** INPUT:
279 ** FILE *f - filter file descriptor
280 ** OUTPUT:
281 ** f_mode - APG Bloom filter mode.
282 ** 0 - something wrong
283 */
284 f_mode
285 get_filtermode(FILE *f)
286 {
287 struct apg_bf_hdr bf_hdr;
288 if (fseek (f, 0, SEEK_SET) == -1)
289 return(0);
290 if (fread ( (void *)&bf_hdr, APGBFHDRSIZE, 1, f) != 1)
291 if (ferror (f) != 0)
292 return(0);
293 if (fseek (f, 0, SEEK_SET) == -1)
294 return(0);
295 return( (f_mode)bf_hdr.mode);
296 }
297
298 /*
299 ** create_filter - create initial(empty) filter file
300 ** 5 - number of hash functions
301 ** 0.0001 (0.01%) - probability of false positives
302 ** INPUT:
303 ** char * f_name - filter filename
304 ** unsigned long int n_words - number of words in filter
305 ** OUTPUT:
306 ** FILE * - filter file descriptor
307 ** NULL - something wrong
308 ** NOTES:
309 ** n - number of words in the filter
310 ** N - size of filter(?)
311 **
312 ** a=(1-(4/N))^n
313 ** 0.0001=(1-a)^5 ==> 1-a=0.15849... ==> a=0.84151068 ==>
314 ** 0.84151068=(1-(5/N))^n ==> 0.84151068^(1/n)=1-(5/N) ==>
315 **
316 ** N=5/(1-[0.84151068^(1/n)])
317 **
318 ** 5
319 ** N = -----------------
320 ** 1/n
321 ** 1 - 0.84151068
322 */
323 FILE *
324 create_filter(char * f_name, unsigned long int n_words, f_mode mode)
325 {
326 FILE *f;
327 char zero = 0x00;
328 long int i = 0L;
329 char etalon_bf_id[] = APGBF_ID;
330 char etalon_bf_ver[] = APGBF_VERSION;
331 struct apg_bf_hdr bf_hdr;
332
333 bf_hdr.id[0] = etalon_bf_id[0];
334 bf_hdr.id[1] = etalon_bf_id[1];
335 bf_hdr.id[2] = etalon_bf_id[2];
336 bf_hdr.id[3] = etalon_bf_id[3];
337 bf_hdr.id[4] = etalon_bf_id[4];
338
339 bf_hdr.version[0] = etalon_bf_ver[0];
340 bf_hdr.version[1] = etalon_bf_ver[1];
341 bf_hdr.version[2] = etalon_bf_ver[2];
342
343 bf_hdr.fs = FSIZE_BIT(n_words);
344
345 bf_hdr.mode = mode;
346
347 if ((f = fopen (f_name, "w+")) == NULL)
348 return(NULL);
349
350 if (fwrite ( (void *)&bf_hdr, APGBFHDRSIZE, 1, f) != 1)
351 if (ferror (f) != 0)
352 return(NULL);
353
354 for (i = 0; i < FSIZE_BYTE(n_words); i++)
355 if ( fwrite ( (void *)&zero, 1, 1, f) < 1)
356 if (ferror (f) != 0)
357 return(NULL);
358 if (fseek (f, 0, SEEK_SET) == -1)
359 return (NULL);
360
361 return (f);
362 }
363
364 /*
365 ** count_words - count words in plain dictionary file
366 ** INPUT:
367 ** FILE *dict_file -plain dicionary file descriptor
368 ** OUTPUT:
369 ** h_val - amount of words in dictionary file
370 ** 0 - something wrong
371 */
372 h_val
373 count_words(FILE *dict_file)
374 {
375 h_val i = 0L; /* word counter */
376 char *string; /* temp string holder */
377 char *tmp; /* just tmp char pointer and nothing more it has no memory assigned */
378 if ((string = (char *) calloc(1,MAX_DICT_STRLEN)) == NULL)
379 return(0);
380 while ((fgets(string, MAX_DICT_STRLEN, dict_file) != NULL))
381 {
382 tmp = (char *)strtok (string," \t\n\0");
383 if (tmp != NULL) i++;
384 }
385 if (fseek (dict_file, 0, SEEK_SET) == -1)
386 return (0);
387 free ((void *) string);
388 return (i);
389 }
390
391 /*
392 ** hash2bit - generates 4 values (should be 4 values of independent
393 ** hash functions) from input string.
394 ** INPUT:
395 ** char *word - word to hash
396 ** h_val *b - pointer to bitnumber array
397 ** OUTPUT:
398 ** h_val * - pointer to bitnumber array
399 */
400 h_val *
401 hash2bit(char * word, h_val *b)
402 {
403 apg_SHA_INFO context;
404 BYTE cs[SHA_DIGESTSIZE];
405
406 apg_shaInit (&context);
407 apg_shaUpdate (&context, (BYTE *)word, strlen(word));
408 apg_shaFinal (&context, cs);
409 return ( (h_val *)memcpy( (void *)b, (void *)&cs[0], SHA_DIGESTSIZE));
410 }
411
412 /*
413 ** getbit - get the bit value from file.
414 ** INPUT:
415 ** FILE *f - file descriptor
416 ** h_val bitnum - bit number
417 ** OUTPUT:
418 ** int
419 ** 0,1 - bit value
420 ** -1 - something wrong
421 */
422 int
423 getbit(FILE * f, h_val bitnum)
424 {
425 long int bytenum = 0L;
426 short int bit_in_byte = 0;
427 unsigned char read_byte = 0x00;
428 unsigned char test_byte = 0x01;
429 int i = 0;
430
431 bit_in_byte = bitnum % 8;
432 bytenum = APGBFHDRSIZE + (bitnum/8);
433 if (fseek (f, bytenum, SEEK_SET) == -1)
434 return(-1);
435 if (fread ((void*)&read_byte,1,1,f) < 1)
436 if (ferror(f) != 0)
437 return (-1);
438 for (i=0;i < bit_in_byte;i++)
439 test_byte = test_byte*2;
440 if ((read_byte & test_byte) > 0) return (1);
441 else return (0);
442 }
443
444 /*
445 ** putbit - put the bit in the file.
446 ** INPUT:
447 ** FILE *f - file descriptor
448 ** h_val bitnum - bit number
449 ** OUTPUT:
450 ** int
451 ** 0 - everything OK
452 ** -1 - something wrong
453 */
454 int
455 putbit(FILE * f, h_val bitnum)
456 {
457 long int bytenum = 0L;
458 short int bit_in_byte = 0;
459 unsigned char read_byte = 0x00;
460 unsigned char test_byte = 0x01;
461 int i = 0;
462 bit_in_byte = bitnum % 8;
463 bytenum = APGBFHDRSIZE + (bitnum/8);
464 if (fseek (f, bytenum, SEEK_SET) == -1)
465 return(-1);
466 if (fread ((void*)&read_byte,1,1,f) < 1)
467 if (ferror(f) != 0)
468 return (-1);
469 for (i=0;i < bit_in_byte;i++)
470 test_byte = test_byte*2;
471 read_byte = read_byte | test_byte;
472 if (fseek (f, bytenum, SEEK_SET) == -1)
473 return(-1);
474 if (fwrite ((void*)&read_byte,1,1,f) < 1)
475 if (ferror(f) != 0)
476 return (-1);
477 return (0);
478 }
479 /* END OF bloom.c file */