"Fossies" - the Fresh Open Source Software Archive

Member "duff-0.5.2/src/dufffile.c" (28 Jan 2012, 7537 Bytes) of package /linux/privat/old/duff-0.5.2.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 /*
    2  * duff - Duplicate file finder
    3  * Copyright (c) 2005 Camilla Berglund <elmindreda@elmindreda.org>
    4  *
    5  * This software is provided 'as-is', without any express or implied
    6  * warranty. In no event will the authors be held liable for any
    7  * damages arising from the use of this software.
    8  *
    9  * Permission is granted to anyone to use this software for any
   10  * purpose, including commercial applications, and to alter it and
   11  * redistribute it freely, subject to the following restrictions:
   12  *
   13  *  1. The origin of this software must not be misrepresented; you
   14  *     must not claim that you wrote the original software. If you use
   15  *     this software in a product, an acknowledgment in the product
   16  *     documentation would be appreciated but is not required.
   17  *
   18  *  2. Altered source versions must be plainly marked as such, and
   19  *     must not be misrepresented as being the original software.
   20  *
   21  *  3. This notice may not be removed or altered from any source
   22  *     distribution.
   23  */
   24 
   25 #if HAVE_CONFIG_H
   26 #include "config.h"
   27 #endif
   28 
   29 #if HAVE_SYS_TYPES_H
   30 #include <sys/types.h>
   31 #endif
   32 
   33 #if HAVE_SYS_STAT_H
   34 #include <sys/stat.h>
   35 #endif
   36 
   37 #if HAVE_ERRNO_H
   38 #include <errno.h>
   39 #endif
   40 
   41 #if HAVE_UNISTD_H
   42 #include <unistd.h>
   43 #endif
   44 
   45 #if HAVE_STDIO_H
   46 #include <stdio.h>
   47 #endif
   48 
   49 #if HAVE_STRING_H
   50 #include <string.h>
   51 #endif
   52 
   53 #if HAVE_STDLIB_H
   54 #include <stdlib.h>
   55 #endif
   56 
   57 #if HAVE_INTTYPES_H
   58 #include <inttypes.h>
   59 #elif HAVE_STDINT_H
   60 #include <stdint.h>
   61 #endif
   62 
   63 #include "duff.h"
   64 
   65 /* These flags are defined and documented in duff.c.
   66  */
   67 extern int quiet_flag;
   68 extern int thorough_flag;
   69 extern off_t sample_limit;
   70 
   71 /* These functions are documented below, where they are defined.
   72  */
   73 static int get_file_sample(File* file);
   74 static int get_file_digest(File* file);
   75 static int compare_file_digests(File* first, File* second);
   76 static int compare_file_samples(File* first, File* second);
   77 static int compare_file_contents(File* first, File* second);
   78 
   79 /* Initialises the specified file.
   80  */
   81 void init_file(File* file, const char* path, const struct stat* sb)
   82 {
   83   file->path = strdup(path);
   84   file->size = sb->st_size;
   85   file->device = sb->st_dev;
   86   file->inode = sb->st_ino;
   87   file->status = UNTOUCHED;
   88   file->digest = NULL;
   89   file->sample = NULL;
   90 }
   91 
   92 /* Frees any memory allocated for the specified file.
   93  */
   94 void free_file(File* file)
   95 {
   96   free(file->digest);
   97   free(file->sample);
   98   free(file->path);
   99 }
  100 
  101 /* This function defines the high-level comparison algorithm, using
  102  * lower level primitives.  This is the place to change or add
  103  * calls to comparison modes.  The general idea is to find proof of
  104  * equality or un-equality as early and as quickly as possible.
  105  */
  106 int compare_files(File* first, File* second)
  107 {
  108   if (first->size != second->size)
  109     return -1;
  110 
  111   if (first->size == 0)
  112     return 0;
  113 
  114   if (first->device == second->device && first->inode == second->inode)
  115     return 0;
  116 
  117   if (first->size >= sample_limit)
  118   {
  119     if (compare_file_samples(first, second) != 0)
  120       return -1;
  121 
  122     if (first->size <= SAMPLE_SIZE)
  123       return 0;
  124   }
  125 
  126   if (thorough_flag)
  127   {
  128     if (compare_file_contents(first, second) != 0)
  129       return -1;
  130   }
  131   else
  132   {
  133     /* NOTE: Skip calculating digests if potential cluster only has two files?
  134      * NOTE: Requires knowledge from higher level */
  135     if (compare_file_digests(first, second) != 0)
  136       return -1;
  137   }
  138 
  139   return 0;
  140 }
  141 
  142 /* Generates the digest for the specified file if it's not already present.
  143  */
  144 void generate_file_digest(File* file)
  145 {
  146   get_file_digest(file);
  147 }
  148 
  149 /* Retrieves sample from a file, if needed.
  150  */
  151 static int get_file_sample(File* file)
  152 {
  153   FILE* stream;
  154   size_t size;
  155   uint8_t* sample;
  156 
  157   if (file->status == SAMPLED || file->status == HASHED)
  158     return 0;
  159 
  160   stream = fopen(file->path, "rb");
  161   if (!stream)
  162   {
  163     if (!quiet_flag)
  164       warning("%s: %s", file->path, strerror(errno));
  165 
  166     file->status = INVALID;
  167     return -1;
  168   }
  169 
  170   size = SAMPLE_SIZE;
  171   if (size > file->size)
  172     size = file->size;
  173 
  174   sample = (uint8_t*) malloc(size);
  175 
  176   if (fread(sample, size, 1, stream) < 1)
  177   {
  178     if (!quiet_flag)
  179       warning("%s: %s", file->path, strerror(errno));
  180 
  181     free(sample);
  182     fclose(stream);
  183 
  184     file->status = INVALID;
  185     return -1;
  186   }
  187 
  188   fclose(stream);
  189 
  190   file->sample = sample;
  191   file->status = SAMPLED;
  192   return 0;
  193 }
  194 
  195 /* Calculates the digest of a file, if needed.
  196  */
  197 static int get_file_digest(File* file)
  198 {
  199   FILE* stream;
  200   size_t size;
  201   char buffer[BUFFER_SIZE];
  202 
  203   if (file->status == HASHED)
  204     return 0;
  205 
  206   digest_init();
  207 
  208   if (file->status == SAMPLED && file->size <= SAMPLE_SIZE)
  209     digest_update(file->sample, file->size);
  210   else if (file->size > 0)
  211   {
  212     stream = fopen(file->path, "rb");
  213     if (!stream)
  214     {
  215       if (!quiet_flag)
  216         warning("%s: %s", file->path, strerror(errno));
  217 
  218       file->status = INVALID;
  219       return -1;
  220     }
  221 
  222     for (;;)
  223     {
  224       size = fread(buffer, 1, sizeof(buffer), stream);
  225       if (ferror(stream))
  226       {
  227         if (!quiet_flag)
  228           warning("%s: %s", file->path, strerror(errno));
  229 
  230         fclose(stream);
  231 
  232         file->status = INVALID;
  233         return -1;
  234       }
  235 
  236       if (size == 0)
  237         break;
  238 
  239       digest_update(buffer, size);
  240     }
  241 
  242     fclose(stream);
  243   }
  244 
  245   file->digest = (uint8_t*) malloc(get_digest_size());
  246   digest_finish(file->digest);
  247 
  248   file->status = HASHED;
  249   return 0;
  250 }
  251 
  252 /* Compares the digests of two files, calculating them if neccessary.
  253  */
  254 static int compare_file_digests(File* first, File* second)
  255 {
  256   if (get_file_digest(first) != 0)
  257     return -1;
  258 
  259   if (get_file_digest(second) != 0)
  260     return -1;
  261 
  262   if (memcmp(first->digest, second->digest, get_digest_size()) != 0)
  263     return -1;
  264 
  265   return 0;
  266 }
  267 
  268 /* Compares the samples of two files, retrieving them if neccessary.
  269  */
  270 static int compare_file_samples(File* first, File* second)
  271 {
  272   if (get_file_sample(first) != 0)
  273     return -1;
  274 
  275   if (get_file_sample(second) != 0)
  276     return -1;
  277 
  278   size_t size = SAMPLE_SIZE;
  279   if (size > first->size)
  280     size = first->size;
  281 
  282   if (memcmp(first->sample, second->sample, size) != 0)
  283     return -1;
  284 
  285   return 0;
  286 }
  287 
  288 /* Performs byte-by-byte comparison of the contents of two files.
  289  * This is the action we most want to avoid ever having to do.
  290  * It is also completely un-optmimised.  Enjoy.
  291  * NOTE: This function assumes that the files are of equal size, as
  292  * there's little point in calling it otherwise.
  293  * TODO: Use a read buffer.
  294  */
  295 static int compare_file_contents(File* first, File* second)
  296 {
  297   int fc, sc;
  298   off_t count = 0;
  299   FILE* first_stream;
  300   FILE* second_stream;
  301 
  302   first_stream = fopen(first->path, "rb");
  303   if (!first_stream)
  304   {
  305     if (!quiet_flag)
  306       warning("%s: %s", first->path, strerror(errno));
  307 
  308     first->status = INVALID;
  309     return -1;
  310   }
  311 
  312   second_stream = fopen(second->path, "rb");
  313   if (!second_stream)
  314   {
  315     if (!quiet_flag)
  316       warning("%s: %s", second->path, strerror(errno));
  317 
  318     fclose(first_stream);
  319 
  320     second->status = INVALID;
  321     return -1;
  322   }
  323 
  324   for (;;)
  325   {
  326     fc = fgetc(first_stream);
  327     sc = fgetc(second_stream);
  328 
  329     if (fc != sc || fc == EOF)
  330       break;
  331 
  332     count++;
  333   }
  334 
  335   if (ferror(first_stream))
  336   {
  337     if (!quiet_flag)
  338       warning("%s: %s", first->path, strerror(errno));
  339 
  340     first->status = INVALID;
  341   }
  342 
  343   if (ferror(second_stream))
  344   {
  345     if (!quiet_flag)
  346       warning("%s: %s", second->path, strerror(errno));
  347 
  348     second->status = INVALID;
  349   }
  350 
  351   fclose(first_stream);
  352   fclose(second_stream);
  353 
  354   if (count != first->size)
  355     return -1;
  356 
  357   return 0;
  358 }
  359