"Fossies" - the Fresh Open Source Software Archive

Member "ffe-0.3.9/src/anonymize.c" (12 Feb 2017, 15623 Bytes) of package /linux/privat/ffe-0.3.9.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "anonymize.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 0.3.7_vs_0.3.7-1.

    1 /*
    2  *    ffe - Flat File Extractor
    3  *
    4  *    Copyright (C) 2017 Timo Savinen
    5  *    This file is part of ffe.
    6  * 
    7  *    ffe is free software; you can redistribute it and/or modify
    8  *    it under the terms of the GNU General Public License as published by
    9  *    the Free Software Foundation; either version 2 of the License, or
   10  *    (at your option) any later version.
   11  *
   12  *    ffe is distributed in the hope that it will be useful,
   13  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
   14  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   15  *    GNU General Public License for more details.
   16  *
   17  *    You should have received a copy of the GNU General Public License
   18  *    along with ffe; if not, write to the Free Software
   19  *    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
   20  *    F607480034
   21  *    HJ9004-2
   22  *
   23  */
   24 
   25 #include "ffe.h"
   26 #include <stdlib.h>
   27 #include <string.h>
   28 
   29 #ifdef HAVE_GCRYPT_H
   30 #include <gcrypt.h>
   31 #endif
   32 
   33 #define MAX_NFIELD_LEN 262144
   34 
   35 #define CRYPT_ASCII_CHARS "0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
   36 #define CRYPT_NUMBER_CHARS "0123456789"
   37 
   38 static uint8_t bcd_to_ascii[]={'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','\000'};
   39 
   40 static char crypt_ascii_chars[]=CRYPT_ASCII_CHARS;
   41 static char crypt_number_chars[]=CRYPT_NUMBER_CHARS;
   42 
   43 #define NUM_ASCII_CHARS  (sizeof(crypt_ascii_chars) - 1)
   44 #define NUM_NUMBER_CHARS  (sizeof(crypt_number_chars) - 1)
   45 
   46 static inline uint8_t
   47 bcdtocl(uint8_t bcd)
   48 {
   49         return bcd_to_ascii[bcd & 0x0f];
   50 }
   51 
   52 static inline uint8_t
   53 bcdtocb(uint8_t bcd)
   54 {
   55         return bcd_to_ascii[(bcd >> 4) & 0x0f];
   56 }
   57 
   58 
   59 /* Init libgcrypt
   60  */
   61 void init_libgcrypt()
   62 {
   63 #ifdef HAVE_WORKING_LIBGCRYPT
   64     if (gcry_check_version(GCRYPT_VERSION) == NULL) panic("libgcrypt init error",NULL,NULL);
   65     gcry_control(GCRYCTL_DISABLE_SECMEM,0);
   66     gcry_control(GCRYCTL_INITIALIZATION_FINISHED,0);
   67 #endif
   68 }
   69 
   70 
   71 
   72 /* read field dta from buffer and write it in normalized form to nbuffer,
   73  * return nbuffer data length
   74  * rules:
   75  * - binary data (execpt little endian hex values) and text n fixed and separated data is written as it is
   76  * - bcd values are written as clear text numbers
   77  * - little endian hex vlaues are written in big endian order
   78  */
   79 static int get_normalized_field(struct field *f,char *type,uint8_t quote,int len,uint8_t *buffer,uint8_t *nbuffer)
   80 {
   81    int copy_length = 0;
   82    register int i;
   83    register uint8_t *data;
   84    int inside_quote;
   85    uint8_t separator;
   86    uint8_t *start;
   87    uint8_t c;
   88    uint8_t *data_end;
   89    uint8_t *stop;
   90   
   91 
   92    if(f->const_data != NULL || f->bposition < 0) return 0;   // const data is not anonymized or nothing is done for non existing field
   93 
   94    switch(type[0])
   95    {
   96        case FIXED_LENGTH: 
   97            copy_length = f->length;
   98            if(copy_length > MAX_NFIELD_LEN) copy_length = MAX_NFIELD_LEN;
   99            i = 0;
  100            data = &buffer[f->bposition];
  101            if(copy_length)
  102            {
  103 #ifdef WIN32
  104                while(i < copy_length && data[i] != '\n' && data[i] != '\r' && data[i]) nbuffer[i] = data[i++];
  105 #else
  106                while(i < copy_length && data[i] != '\n' && data[i]) nbuffer[i] = data[i++];
  107 #endif
  108            } else
  109            {
  110 #ifdef WIN32
  111                while(data[i] != '\n' && data[i] != '\r' && data[i]) nbuffer[i] = data[i++];
  112 #else
  113                while(data[i] != '\n' && data[i]) nbuffer[i] = data[i++];
  114 #endif
  115            }
  116            copy_length = i;
  117            break;
  118        case SEPARATED:
  119            separator = type[1];
  120 
  121            if(f->bposition < 0) return 0;
  122                
  123            data = &buffer[f->bposition];
  124 
  125            inside_quote = 0;
  126 
  127            if(*data == quote && quote) {        //skip first quote
  128                data = &buffer[f->bposition + 1];
  129                inside_quote = 1;
  130            }
  131            start = data;
  132 #ifdef WIN32
  133            while((*data != separator || inside_quote) && *data != '\n' && *data != '\r' && (data - start) < MAX_NFIELD_LEN)
  134 #else
  135            while((*data != separator || inside_quote) && *data != '\n' && (data - start) < MAX_NFIELD_LEN)
  136 #endif
  137            {
  138                if(((*data == quote && data[1] == quote) || (*data == '\\' && data[1] == quote)) && quote)
  139                {
  140                    *nbuffer = *data;
  141                    nbuffer++;
  142                    data++;
  143                } else if(*data == quote)
  144                {
  145                    if(inside_quote) inside_quote=0;
  146                }
  147 #ifdef WIN32
  148                if(*data != '\n' && *data != '\r') 
  149 #else
  150                    if(*data != '\n')
  151 #endif
  152                    {
  153                        *nbuffer = *data;
  154                        nbuffer++;
  155                        data++;
  156                    }
  157            }
  158 
  159            if(data > start && data[-1] == quote && quote) data--; // don't take last quote
  160 
  161            copy_length = (int) (data - start);
  162            break;
  163        case BINARY:
  164            switch(f->type)
  165            {
  166                case F_ASC:
  167                case F_CHAR:
  168                case F_DOUBLE:
  169                case F_FLOAT:
  170                case F_INT:
  171                case F_UINT:
  172                case F_HEX:
  173                    copy_length = f->length;
  174                    if(copy_length > MAX_NFIELD_LEN) copy_length = MAX_NFIELD_LEN;
  175                    if(f->type == F_ASC) 
  176                    {
  177                        stop = memccpy(nbuffer,&buffer[f->bposition],0,copy_length);  // stop at null for text
  178                        if(stop != NULL) copy_length = (int) (stop - nbuffer) - 1;
  179                    } else
  180                    {
  181                        memcpy(nbuffer,&buffer[f->bposition],copy_length);
  182                    }
  183                    break;
  184                case F_BCD:
  185                    data = &buffer[f->bposition];
  186                    data_end = data + f->length;
  187                    i = 0;
  188                    switch(f->endianess)
  189                    {
  190                        case F_BIG_ENDIAN:
  191                            do
  192                            {
  193                                c = bcdtocb(*data);
  194                                if(c)
  195                                {
  196                                    nbuffer[i++] = c;
  197                                    c = bcdtocl(*data);
  198                                    if(c) nbuffer[i++] = c;
  199                                }
  200                                data++;
  201                            } while(data < data_end && c);
  202                            break;
  203                        case F_LITTLE_ENDIAN:
  204                            do
  205                            {
  206                                c = bcdtocl(*data);
  207                                if(c)
  208                                {
  209                                    nbuffer[i++] = c;
  210                                    c = bcdtocb(*data);
  211                                    if(c) nbuffer[i++] = c;
  212                                }
  213                                data++;
  214                            } while(data < data_end && c);
  215                            break;
  216                    }
  217                    copy_length = i;
  218            }
  219            break;
  220    }
  221    return copy_length;
  222 }
  223 
  224 static uint8_t ascii_to_bcd(uint8_t asc)
  225 {
  226         return asc - (asc >= '0' && asc <= '9' ? '0' : ('a' - 10));
  227 }
  228 
  229 
  230 /* Write scrambled field back to buffer
  231  */
  232 static void write_scrambled_field(struct field *f,char *type,uint8_t quote,uint8_t *buffer,int scramble_len,uint8_t *scrambled_data)
  233 {
  234     int quoted;
  235     uint8_t *data;
  236     register int i;
  237     register uint8_t c,t;
  238 
  239     switch(type[0])
  240     {
  241         case FIXED_LENGTH: 
  242             memcpy(&buffer[f->bposition],scrambled_data,scramble_len);
  243             break;
  244         case SEPARATED:
  245             quoted = (buffer[f->bposition] == quote && quote) ? 1 : 0 ;     //skip first quote
  246             memcpy(&buffer[f->bposition + quoted],scrambled_data,scramble_len);
  247             break;
  248         case BINARY:
  249             switch(f->type)
  250             {
  251                 case F_ASC:
  252                 case F_CHAR:
  253                 case F_DOUBLE:
  254                 case F_FLOAT:
  255                 case F_INT:
  256                 case F_UINT:
  257                 case F_HEX:
  258                     memcpy(&buffer[f->bposition],scrambled_data,scramble_len);
  259                     break;
  260                 case F_BCD:  // write in big endian and swap after that if little endian
  261                     data = &buffer[f->bposition];
  262                     i = 0;
  263                     c = 0;
  264                     while(i < scramble_len)
  265                     {
  266                         c = (uint8_t) (ascii_to_bcd(scrambled_data[i]) & 0x0f);
  267                         c = c << 4;
  268                         i++;
  269                         if(i < scramble_len)
  270                         {
  271                             c = c | (uint8_t) (ascii_to_bcd(scrambled_data[i]) & 0x0f);
  272                         } else
  273                         {
  274                             c = c | 0x0f;
  275                         }
  276                         i++;
  277                         if(f->endianess == F_LITTLE_ENDIAN)
  278                         {
  279                             t = (c >> 4) & 0x0f;
  280                             c = (c << 4) | t;
  281                         }
  282                         *data = c;
  283                         data++;
  284                     }       
  285                     if((c & 0x0f) != 0x0f && (c & 0xf0) != 0xf0 && scramble_len/2 < f->length) *data=0xff;
  286                     break;
  287             }
  288     }
  289 }
  290 
  291 
  292 
  293 
  294 static void scramble_MASK(uint8_t *scramble,int length,uint8_t mask)
  295 {
  296     if(length < 0) return;
  297     do
  298     {
  299          scramble[--length] = mask;
  300     } while (length >= 0);
  301 }
  302 
  303 static void scramble_HASH(int ftype,int hash_length,unsigned char *hash,int scramble_length,uint8_t *scramble,int num_chars,char *chars)
  304 {
  305     register int i;
  306 
  307     if(hash_length < 1) return;
  308 
  309     switch(ftype)
  310     {
  311         case F_ASC:
  312             i = 0;
  313             while(i < scramble_length) 
  314             {
  315                 scramble[i] = chars[hash[i % hash_length] % num_chars];
  316                 i++;
  317             }
  318             break;
  319         case F_CHAR:
  320         case F_DOUBLE:
  321         case F_FLOAT:
  322         case F_INT:
  323         case F_UINT:
  324         case F_HEX:
  325             i = 0;
  326             while(i < scramble_length) 
  327             {
  328                 scramble[i] = hash[i % hash_length];
  329                 i++;
  330             }
  331             break;
  332         case F_BCD:
  333             i = 0;
  334             while(i < scramble_length)
  335             {
  336                 scramble[i] = '0' +  hash[i % hash_length] % 9;
  337                 i++;
  338             }
  339             break;
  340 
  341     }
  342 }
  343 
  344 /* make a hash using libcgryot 
  345  * write hash to hash and return the hash length
  346  */
  347 static int md_hash(unsigned char *hash,int input_length,uint8_t *input,int bytes)
  348 {
  349     size_t outlen=0;
  350 #ifdef HAVE_WORKING_LIBGCRYPT
  351     int algo;
  352     gcry_md_hd_t hd;
  353     gpg_error_t err;
  354     unsigned char *p;
  355 
  356     switch(bytes)
  357     {
  358         case 32:
  359             algo = GCRY_MD_SHA256;
  360             break;
  361         case 64:
  362             algo = GCRY_MD_SHA512;
  363             break;
  364         case 16:
  365         default:
  366             algo = GCRY_MD_MD5;
  367             break;
  368     }
  369 
  370     outlen = gcry_md_get_algo_dlen(algo);
  371 
  372     err = gcry_md_open(&hd,algo,0);
  373     if(err != GPG_ERR_NO_ERROR) panic("libgcrypt error",NULL,NULL);
  374 
  375     gcry_md_write(hd,input,input_length);
  376 
  377     p = gcry_md_read(hd,algo);   
  378     if (p == NULL) panic("libgcrypt error",NULL,NULL);
  379 
  380     memcpy(hash,p,outlen);
  381 
  382     gcry_md_close(hd);
  383 #else
  384     problem("Libgcrypt not availaible in this system",NULL,NULL);
  385 #endif
  386     return outlen;
  387 }
  388 
  389 static int md_random(unsigned char *rand,int rand_length)
  390 {
  391 #ifdef HAVE_WORKING_LIBGCRYPT
  392     gcry_create_nonce(rand,rand_length);
  393     return rand_length;
  394 #else
  395     problem("Libgcrypt not availaible in this system",NULL,NULL);
  396     return 0;
  397 #endif
  398 }
  399 
  400     
  401     
  402 
  403 /* make scramble data based on anonymization info and normalized input data
  404  * scramble length is the length indicated byt start pos, length and actual data length
  405  * return scramble length
  406  */
  407 #define HASH_BUFFER_LEN (MAX_NFIELD_LEN)
  408 static int create_scramble(int ftype,uint8_t *scramble,int normalized_length,uint8_t *normalized_field,struct anon_field *a)
  409 {
  410     int scramble_length;
  411     int hash_length;
  412     static unsigned char hash[HASH_BUFFER_LEN]; 
  413 
  414     if(a->start >= 0)   // from beginning
  415     {
  416         if(a->start > normalized_length) return 0;
  417         scramble_length = normalized_length - a->start + 1;
  418     } else 
  419     {
  420         if(abs(a->start) > normalized_length) return 0;
  421         scramble_length = normalized_length + a->start + 1;
  422     }
  423     if(scramble_length > a->length && a->length > 0) scramble_length = a->length;    
  424     if(scramble_length > MAX_NFIELD_LEN) scramble_length = MAX_NFIELD_LEN;
  425 
  426     switch(a->method)
  427     {
  428         case A_MASK:
  429             scramble_MASK(scramble,scramble_length,a->key_length > 0 ? a->key[0] : '0');
  430             break;
  431         case A_RANDOM:
  432             hash_length = md_random(hash,scramble_length > HASH_BUFFER_LEN ? HASH_BUFFER_LEN : scramble_length);
  433             if(hash_length) scramble_HASH(ftype,hash_length,hash,scramble_length,scramble,NUM_ASCII_CHARS,crypt_ascii_chars); else scramble_length = 0;
  434             break;
  435         case A_NRANDOM:
  436             hash_length = md_random(hash,scramble_length > HASH_BUFFER_LEN ? HASH_BUFFER_LEN : scramble_length);
  437             if(hash_length) scramble_HASH(ftype,hash_length,hash,scramble_length,scramble,NUM_NUMBER_CHARS,crypt_number_chars); else scramble_length = 0;
  438             break;
  439         case A_HASH:
  440             hash_length = md_hash(hash,normalized_length,normalized_field,a->key_length > 0 ? atoi(a->key) : 16);
  441             if(hash_length) scramble_HASH(ftype,hash_length,hash,scramble_length,scramble,NUM_ASCII_CHARS,crypt_ascii_chars); else scramble_length = 0;
  442         break;
  443         case A_NHASH:
  444             hash_length = md_hash(hash,normalized_length,normalized_field,a->key_length > 0 ? atoi(a->key) : 16);
  445             if(hash_length) scramble_HASH(ftype,hash_length,hash,scramble_length,scramble,NUM_NUMBER_CHARS,crypt_number_chars); else scramble_length = 0;
  446         break;
  447     }
  448     return scramble_length;
  449 }
  450 
  451 /* write scrambled data to normalized field
  452  */
  453 static void scramble_normalized(int scramble_len,uint8_t *scramble,int normalized_length,uint8_t *normalized_field,struct anon_field *a)
  454 {
  455 
  456     if(scramble_len == 0) return;
  457 
  458     if(a->start >= 0)   // from beginning
  459     {
  460         memcpy(&normalized_field[a->start - 1],scramble,scramble_len);
  461     } else
  462     {
  463         memcpy(&normalized_field[normalized_length + a->start - scramble_len + 1],scramble,scramble_len);
  464     }
  465 }
  466 
  467         
  468 
  469 
  470 /* Anonymize those fields which have anonymize info, 
  471  * data will be anonymized in input buffer
  472  */ 
  473 void anonymize_fields(char *type,uint8_t quote,struct record *r,int len,uint8_t *buffer)
  474 {
  475     static uint8_t normalized_field[MAX_NFIELD_LEN];
  476     static uint8_t scramble[MAX_NFIELD_LEN];
  477     struct field *f = r->f;
  478     int normalized_length;
  479     int scramble_len;
  480 
  481     while(f != NULL)
  482     {
  483         if(f->a != NULL)
  484         {
  485             normalized_length = get_normalized_field(f,type,quote,len,buffer,normalized_field);
  486             if(normalized_length)
  487             {
  488                 scramble_len = create_scramble(f->type,scramble,normalized_length,normalized_field,f->a);
  489                 if(scramble_len)
  490                 {
  491                     scramble_normalized(scramble_len,scramble,normalized_length,normalized_field,f->a);
  492                     write_scrambled_field(f,type,quote,buffer,normalized_length,normalized_field);
  493                 }
  494             } 
  495         }
  496         f = f->next;
  497     }
  498 }
  499