"Fossies" - the Fresh Open Source Software Archive

Member "apg-2.2.3/bloom.c" (7 Aug 2003, 12780 Bytes) of package /linux/privat/old/apg-2.2.3.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 /*
    2 ** Copyright (c) 2001, 2002, 2003
    3 ** Adel I. Mirzazhanov. All rights reserved
    4 **
    5 ** Redistribution and use in source and binary forms, with or without
    6 ** modification, are permitted provided that the following conditions
    7 ** are met:
    8 ** 
    9 **     1.Redistributions of source code must retain the above copyright notice,
   10 **       this list of conditions and the following disclaimer. 
   11 **     2.Redistributions in binary form must reproduce the above copyright
   12 **       notice, this list of conditions and the following disclaimer in the
   13 **       documentation and/or other materials provided with the distribution. 
   14 **     3.The name of the author may not be used to endorse or promote products
   15 **       derived from this software without specific prior written permission. 
   16 **        
   17 ** THIS SOFTWARE IS PROVIDED BY THE AUTHOR  ``AS IS'' AND ANY EXPRESS
   18 ** OR IMPLIED WARRANTIES, INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED
   19 ** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20 ** ARE DISCLAIMED.  IN  NO  EVENT  SHALL THE AUTHOR BE LIABLE FOR ANY
   21 ** DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22 ** DAMAGES (INCLUDING, BUT NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE
   23 ** GOODS OR SERVICES;  LOSS OF USE,  DATA,  OR  PROFITS;  OR BUSINESS
   24 ** INTERRUPTION)  HOWEVER  CAUSED  AND  ON  ANY  THEORY OF LIABILITY,
   25 ** WHETHER  IN  CONTRACT,   STRICT   LIABILITY,  OR  TORT  (INCLUDING
   26 ** NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   27 ** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   28 */
   29 
   30 
   31 /*
   32 ** C module for APG (Automated Password Generator)
   33 ** Bloom filter implementation.
   34 ** Functions:
   35 **   insert_word    - insert word in the filter file
   36 **   check_word     - check word
   37 **   create_filter  - create initial(empty) filter file 
   38 **   open_filter    - open APG Bloom filter file
   39 **   get_filtersize - get APG Bloom filter size
   40 **   get_filtermode - get APG Bloom filter mode
   41 **   count_words    - count words in plain dictionary file
   42 **   print_flt_info - print filter info
   43 **=============================================================
   44 **   hash2bit       - generates 5 values (should be 5 values of independent
   45 **                    hash functions) from input string.
   46 **   getbit         - get the bit value from file.
   47 **   putbit         - put the bit in the file.
   48 */
   49 
   50 #include "bloom.h"
   51 #include "convert.h"
   52 
   53 #define FSIZE_BIT(word_count) ((unsigned long int)(5.0/(1.0-pow( 0.84151068, 1.0/((double)word_count)))))
   54 #define FSIZE_BYTE(word_count) ((((unsigned long int)(5.0/(1.0-pow( 0.84151068, 1.0/((double)word_count)))))/8)+1)
   55 
   56 h_val * hash2bit(char * word, h_val *b);
   57 int getbit(FILE * f, h_val bitnum);
   58 int putbit(FILE * f, h_val bitnum);
   59 
   60 #ifdef APGBFM
   61 /*
   62 ** print_flt_info - print filter information
   63 ** INPUT:
   64 **    FILE * filter - filter file descriptor
   65 ** OUTPUT:
   66 **    int
   67 **      0 - everything OK
   68 **     -1 - something wrong
   69 */
   70 int
   71 print_flt_info(FILE * filter)
   72 {
   73  struct apg_bf_hdr bf_hdr;
   74  int i = 0;
   75 
   76  if (fseek (filter, 0, SEEK_SET) == -1)
   77     return(-1);
   78  if (fread ( (void *)&bf_hdr, APGBFHDRSIZE, 1, filter) != 1)
   79     if (ferror (filter) != 0)
   80        return(-1);
   81  printf ("**************************************\n");
   82  printf ("** APGBFM: Bloom-filter information **\n");
   83  printf ("**************************************\n");
   84  printf ("Filter ID     : ");
   85  for (i=0; i < sizeof(bf_hdr.id); i++)
   86    printf ("%c", bf_hdr.id[i]);
   87  printf ("\n");
   88  printf ("Filter Version: ");
   89  printf ("%c.", bf_hdr.version[0]);
   90  printf ("%c.", bf_hdr.version[1]);
   91  printf ("%c", bf_hdr.version[2]);
   92  printf ("\n");
   93  printf ("Filter size   : %lu bits\n", (unsigned long int)bf_hdr.fs);
   94  printf ("Filter mode   : ");
   95  if (bf_hdr.mode == 0x00) printf ("PLAIN\n");
   96  if (bf_hdr.mode == 0x01) printf ("CASE_INSENSITIVE\n");
   97  printf ("**************************************\n");
   98  if (fseek (filter, 0, SEEK_SET) == -1)
   99     return(-1);
  100  return(0);
  101 }
  102 #endif /* APGBFM */
  103 
  104 /*
  105 ** insert_word   - insert word in the filter file
  106 ** INPUT:
  107 **    char *word        - word to incert in the filter
  108 **    FILE *file        - filter file descriptor
  109 **    h_val filter_size - filter size in bits
  110 **    f_mode mode       - filter mode
  111 ** OUTPUT:
  112 **    int
  113 **      0 - everything OK
  114 **     -1 - something wrong
  115 */
  116 int
  117 insert_word(char *word, FILE *file, h_val filter_size, f_mode mode)
  118 {
  119  h_val h[H_NUM];
  120  int i = 0;
  121 
  122 #ifdef APG_DEBUG
  123  fprintf (stdout, "DEBUG> insert_word: word to insert: %s\n", word);
  124  fflush (stdout);
  125 #endif /* APG_DEBUG */
  126 
  127  if ((mode & BF_CASE_INSENSITIVE) > 0)
  128   {
  129    decapitalize(word);
  130 #ifdef APG_DEBUG
  131    fprintf (stdout, "DEBUG> insert_word: decapitalized word: %s\n", word);
  132    fflush (stdout);
  133 #endif /* APG_DEBUG */
  134   }
  135 
  136  hash2bit (word, &h[0]);
  137  for(i = 0; i < H_NUM; i++)
  138    if (putbit (file, h[i] % filter_size)== -1)
  139       return (-1);
  140  return(0);
  141 }
  142 
  143 /*
  144 ** check_word    - check word
  145 ** INPUT:
  146 **    char *word        - word to check
  147 **    FILE *file        - filter file descriptor
  148 **    h_val filter_size - filter size in bits
  149 **    f_mode            - filter mode
  150 ** OUTPUT:
  151 **    int
  152 **      0 - word is not in dictionary
  153 **      1 - word is in dictionary
  154 **     -1 - something wrong
  155 */
  156 int
  157 check_word(char *word, FILE *file,  h_val filter_size, f_mode mode)
  158 {
  159  h_val h[H_NUM];
  160  int i = 0;
  161  char * tmp_word;
  162  
  163  if ((tmp_word = (char *) calloc(1,MAX_DICT_STRLEN)) == NULL)
  164     return(-1);
  165  (void)memcpy ((void *) tmp_word, (void *) word, strlen(word));
  166 
  167 #ifdef APG_DEBUG
  168  fprintf (stdout, "DEBUG> check_word: word to check: %s\n", word);
  169  fflush (stdout);
  170 #endif /* APG_DEBUG */
  171  if ((mode & BF_CASE_INSENSITIVE) > 0)
  172   {
  173    decapitalize(tmp_word);
  174 #ifdef APG_DEBUG
  175    fprintf (stdout, "DEBUG> check_word: decapitalized word: %s\n", tmp_word);
  176    fflush (stdout);
  177 #endif /* APG_DEBUG */
  178   }
  179 
  180  hash2bit (tmp_word, &h[0]);
  181 
  182  free ((void *)tmp_word);
  183 
  184  for(i = 0; i < H_NUM; i++)
  185   {
  186    switch(getbit(file, h[i] % filter_size))
  187      {
  188       case 0:
  189         return(0);
  190     break;
  191       case -1:
  192         return(-1);
  193     break;
  194       default:
  195         break;
  196      }
  197   }
  198  return (1);
  199 }
  200 
  201 /*
  202 ** open_filter - open APG Bloom filter file
  203 ** open filter file and check is this the real bloom filter file
  204 ** INPUT:
  205 **    char * f_name - filter filename
  206 **    const char *mode - "r" or "r+"
  207 ** OUTPUT:
  208 **    FILE * - file pointer
  209 **    NULL   - something wrong. 
  210 */
  211 FILE *
  212 open_filter(char * f_name, const char *mode)
  213 {
  214  FILE *f;
  215  char etalon_bf_id[] = APGBF_ID;
  216  char etalon_bf_ver[] = APGBF_VERSION;
  217  struct apg_bf_hdr bf_hdr;
  218  
  219  if ((f = fopen (f_name, mode)) == NULL)
  220    return(NULL);
  221  if (fread ( (void *)&bf_hdr, APGBFHDRSIZE, 1, f) != 1)
  222     if (ferror (f) != 0)
  223        return(NULL);
  224  if ((bf_hdr.id[0] != etalon_bf_id[0]) || (bf_hdr.id[1] != etalon_bf_id[1]) ||
  225      (bf_hdr.id[2] != etalon_bf_id[2]) || (bf_hdr.id[3] != etalon_bf_id[3]) ||
  226      (bf_hdr.id[4] != etalon_bf_id[4]) )
  227         return (NULL);
  228  if ((bf_hdr.version[0] != etalon_bf_ver[0]) ||
  229      (bf_hdr.version[1] != etalon_bf_ver[1]) ||
  230      (bf_hdr.version[2] != etalon_bf_ver[2]) )
  231         return (NULL);
  232  else
  233   {
  234    if (fseek (f, 0, SEEK_SET) == -1)
  235       return(NULL);
  236    return(f);
  237   }
  238 }
  239 
  240 /*
  241 ** close_filter - close APG Bloom filter file
  242 ** close filter file
  243 ** INPUT:
  244 **    FILE * f_dsk - filter file pointer
  245 ** OUTPUT:
  246 **    int - same as fclose() return value
  247 */
  248 int
  249 close_filter(FILE *f_dsk)
  250 {
  251  return(fclose(f_dsk));
  252 }
  253 
  254 /*
  255 ** get_filtersize - get APG Bloom filter size
  256 ** INPUT:
  257 **    FILE *f - filter file descriptor
  258 ** OUTPUT:
  259 **    h_val - size of APG Bloom filter.
  260 **    0     - something wrong 
  261 */
  262 h_val
  263 get_filtersize(FILE * f)
  264 {
  265  struct apg_bf_hdr bf_hdr;
  266  if (fseek (f, 0, SEEK_SET) == -1)
  267     return(0);
  268  if (fread ( (void *)&bf_hdr, APGBFHDRSIZE, 1, f) != 1)
  269     if (ferror (f) != 0)
  270        return(0);
  271  if (fseek (f, 0, SEEK_SET) == -1)
  272     return(0);
  273  return( (h_val)bf_hdr.fs);
  274 } 
  275 
  276 /*
  277 ** get_filtermode - get APG Bloom filter mode
  278 ** INPUT:
  279 **    FILE *f - filter file descriptor
  280 ** OUTPUT:
  281 **    f_mode - APG Bloom filter mode.
  282 **    0      - something wrong 
  283 */
  284 f_mode
  285 get_filtermode(FILE *f)
  286 {
  287  struct apg_bf_hdr bf_hdr;
  288  if (fseek (f, 0, SEEK_SET) == -1)
  289     return(0);
  290  if (fread ( (void *)&bf_hdr, APGBFHDRSIZE, 1, f) != 1)
  291     if (ferror (f) != 0)
  292        return(0);
  293  if (fseek (f, 0, SEEK_SET) == -1)
  294     return(0);
  295  return( (f_mode)bf_hdr.mode);
  296 }
  297 
  298 /*
  299 ** create_filter - create initial(empty) filter file 
  300 ** 5 - number of hash functions
  301 ** 0.0001 (0.01%) - probability of false positives
  302 ** INPUT:
  303 **    char * f_name - filter filename
  304 **    unsigned long int n_words - number of words in filter
  305 ** OUTPUT:
  306 **    FILE * - filter file descriptor
  307 **    NULL   - something wrong
  308 ** NOTES:
  309 **   n - number of words in the filter
  310 **   N - size of filter(?)
  311 **
  312 **   a=(1-(4/N))^n
  313 **   0.0001=(1-a)^5 ==> 1-a=0.15849... ==> a=0.84151068 ==>
  314 **   0.84151068=(1-(5/N))^n ==> 0.84151068^(1/n)=1-(5/N) ==>
  315 **
  316 **   N=5/(1-[0.84151068^(1/n)])
  317 **
  318 **               5
  319 **   N = -----------------
  320 **                     1/n
  321 **       1 - 0.84151068
  322 */
  323 FILE *
  324 create_filter(char * f_name, unsigned long int n_words, f_mode mode) 
  325 {
  326  FILE *f;
  327  char zero = 0x00;
  328  long int i = 0L;
  329  char etalon_bf_id[] = APGBF_ID;
  330  char etalon_bf_ver[] = APGBF_VERSION;
  331  struct apg_bf_hdr bf_hdr;
  332 
  333  bf_hdr.id[0] = etalon_bf_id[0];
  334  bf_hdr.id[1] = etalon_bf_id[1];
  335  bf_hdr.id[2] = etalon_bf_id[2];
  336  bf_hdr.id[3] = etalon_bf_id[3];
  337  bf_hdr.id[4] = etalon_bf_id[4];
  338  
  339  bf_hdr.version[0] = etalon_bf_ver[0];
  340  bf_hdr.version[1] = etalon_bf_ver[1];
  341  bf_hdr.version[2] = etalon_bf_ver[2];
  342 
  343  bf_hdr.fs = FSIZE_BIT(n_words);
  344  
  345  bf_hdr.mode = mode;
  346 
  347  if ((f = fopen (f_name, "w+")) == NULL)
  348    return(NULL);
  349 
  350  if (fwrite ( (void *)&bf_hdr, APGBFHDRSIZE, 1, f) != 1)
  351     if (ferror (f) != 0)
  352        return(NULL);
  353 
  354  for (i = 0; i < FSIZE_BYTE(n_words); i++)
  355    if ( fwrite ( (void *)&zero, 1, 1, f) < 1)
  356       if (ferror (f) != 0)
  357          return(NULL);
  358  if (fseek (f, 0, SEEK_SET) == -1)
  359     return (NULL);
  360 
  361  return (f);
  362 }
  363 
  364 /*
  365 ** count_words - count words in plain dictionary file
  366 ** INPUT:
  367 **    FILE *dict_file -plain dicionary file descriptor
  368 ** OUTPUT:
  369 **    h_val - amount of words in dictionary file
  370 **    0     - something wrong
  371 */
  372 h_val
  373 count_words(FILE *dict_file)
  374 {
  375  h_val i = 0L; /* word counter */
  376  char *string; /* temp string holder */
  377  char *tmp;    /* just tmp char pointer and nothing more it has no memory assigned */
  378  if ((string = (char *) calloc(1,MAX_DICT_STRLEN)) == NULL)
  379     return(0);
  380  while ((fgets(string, MAX_DICT_STRLEN, dict_file) != NULL))
  381   {
  382    tmp = (char *)strtok (string," \t\n\0");
  383    if (tmp != NULL) i++;
  384   }
  385  if (fseek (dict_file, 0, SEEK_SET) == -1)
  386     return (0);
  387  free ((void *) string);
  388  return (i);
  389 }
  390 
  391 /*
  392 ** hash2bit      - generates 4 values (should be 4 values of independent
  393 **                 hash functions) from input string.
  394 ** INPUT:
  395 **    char *word - word to hash
  396 **    h_val *b   - pointer to bitnumber array
  397 ** OUTPUT:
  398 **    h_val * - pointer to bitnumber array
  399 */
  400 h_val *
  401 hash2bit(char * word, h_val *b)
  402 {
  403  apg_SHA_INFO context;
  404  BYTE cs[SHA_DIGESTSIZE];
  405 
  406  apg_shaInit (&context);
  407  apg_shaUpdate (&context, (BYTE *)word, strlen(word));
  408  apg_shaFinal (&context, cs);
  409  return ( (h_val *)memcpy( (void *)b, (void *)&cs[0], SHA_DIGESTSIZE));
  410 }
  411 
  412 /*
  413 ** getbit        - get the bit value from file.
  414 ** INPUT:
  415 **    FILE *f - file descriptor
  416 **    h_val bitnum - bit number
  417 ** OUTPUT:
  418 **    int
  419 **      0,1 - bit value
  420 **     -1   - something wrong
  421 */
  422 int
  423 getbit(FILE * f, h_val bitnum)
  424 {
  425  long int bytenum = 0L;
  426  short int bit_in_byte = 0;
  427  unsigned char read_byte = 0x00;
  428  unsigned char test_byte = 0x01;
  429  int i = 0;
  430  
  431  bit_in_byte = bitnum % 8;
  432  bytenum =  APGBFHDRSIZE + (bitnum/8);
  433  if (fseek (f, bytenum, SEEK_SET) == -1)
  434     return(-1);
  435  if (fread ((void*)&read_byte,1,1,f) < 1)
  436     if (ferror(f) != 0)
  437        return (-1);
  438  for (i=0;i < bit_in_byte;i++)
  439    test_byte = test_byte*2;
  440  if ((read_byte & test_byte) > 0) return (1);
  441  else return (0);
  442 }
  443 
  444 /*
  445 ** putbit        - put the bit in the file.
  446 ** INPUT:
  447 **    FILE *f - file descriptor
  448 **    h_val bitnum - bit number
  449 ** OUTPUT:
  450 **    int
  451 **      0 - everything OK
  452 **     -1 - something wrong
  453 */
  454 int
  455 putbit(FILE * f, h_val bitnum)
  456 {
  457  long int bytenum = 0L;
  458  short int bit_in_byte = 0;
  459  unsigned char read_byte = 0x00;
  460  unsigned char test_byte = 0x01;
  461  int i = 0;
  462  bit_in_byte = bitnum % 8;
  463  bytenum =  APGBFHDRSIZE + (bitnum/8);
  464  if (fseek (f, bytenum, SEEK_SET) == -1)
  465     return(-1);
  466  if (fread ((void*)&read_byte,1,1,f) < 1)
  467     if (ferror(f) != 0)
  468        return (-1);
  469  for (i=0;i < bit_in_byte;i++)
  470    test_byte = test_byte*2;
  471  read_byte = read_byte | test_byte;
  472  if (fseek (f, bytenum, SEEK_SET) == -1)
  473     return(-1);
  474  if (fwrite ((void*)&read_byte,1,1,f) < 1)
  475     if (ferror(f) != 0)
  476        return (-1);
  477  return (0);
  478 }
  479 /* END OF bloom.c file */