"Fossies" - the Fresh Open Source Software Archive 
Member "duff-0.5.2/src/dufffile.c" (28 Jan 2012, 7537 Bytes) of package /linux/privat/old/duff-0.5.2.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
1 /*
2 * duff - Duplicate file finder
3 * Copyright (c) 2005 Camilla Berglund <elmindreda@elmindreda.org>
4 *
5 * This software is provided 'as-is', without any express or implied
6 * warranty. In no event will the authors be held liable for any
7 * damages arising from the use of this software.
8 *
9 * Permission is granted to anyone to use this software for any
10 * purpose, including commercial applications, and to alter it and
11 * redistribute it freely, subject to the following restrictions:
12 *
13 * 1. The origin of this software must not be misrepresented; you
14 * must not claim that you wrote the original software. If you use
15 * this software in a product, an acknowledgment in the product
16 * documentation would be appreciated but is not required.
17 *
18 * 2. Altered source versions must be plainly marked as such, and
19 * must not be misrepresented as being the original software.
20 *
21 * 3. This notice may not be removed or altered from any source
22 * distribution.
23 */
24
25 #if HAVE_CONFIG_H
26 #include "config.h"
27 #endif
28
29 #if HAVE_SYS_TYPES_H
30 #include <sys/types.h>
31 #endif
32
33 #if HAVE_SYS_STAT_H
34 #include <sys/stat.h>
35 #endif
36
37 #if HAVE_ERRNO_H
38 #include <errno.h>
39 #endif
40
41 #if HAVE_UNISTD_H
42 #include <unistd.h>
43 #endif
44
45 #if HAVE_STDIO_H
46 #include <stdio.h>
47 #endif
48
49 #if HAVE_STRING_H
50 #include <string.h>
51 #endif
52
53 #if HAVE_STDLIB_H
54 #include <stdlib.h>
55 #endif
56
57 #if HAVE_INTTYPES_H
58 #include <inttypes.h>
59 #elif HAVE_STDINT_H
60 #include <stdint.h>
61 #endif
62
63 #include "duff.h"
64
65 /* These flags are defined and documented in duff.c.
66 */
67 extern int quiet_flag;
68 extern int thorough_flag;
69 extern off_t sample_limit;
70
71 /* These functions are documented below, where they are defined.
72 */
73 static int get_file_sample(File* file);
74 static int get_file_digest(File* file);
75 static int compare_file_digests(File* first, File* second);
76 static int compare_file_samples(File* first, File* second);
77 static int compare_file_contents(File* first, File* second);
78
79 /* Initialises the specified file.
80 */
81 void init_file(File* file, const char* path, const struct stat* sb)
82 {
83 file->path = strdup(path);
84 file->size = sb->st_size;
85 file->device = sb->st_dev;
86 file->inode = sb->st_ino;
87 file->status = UNTOUCHED;
88 file->digest = NULL;
89 file->sample = NULL;
90 }
91
92 /* Frees any memory allocated for the specified file.
93 */
94 void free_file(File* file)
95 {
96 free(file->digest);
97 free(file->sample);
98 free(file->path);
99 }
100
101 /* This function defines the high-level comparison algorithm, using
102 * lower level primitives. This is the place to change or add
103 * calls to comparison modes. The general idea is to find proof of
104 * equality or un-equality as early and as quickly as possible.
105 */
106 int compare_files(File* first, File* second)
107 {
108 if (first->size != second->size)
109 return -1;
110
111 if (first->size == 0)
112 return 0;
113
114 if (first->device == second->device && first->inode == second->inode)
115 return 0;
116
117 if (first->size >= sample_limit)
118 {
119 if (compare_file_samples(first, second) != 0)
120 return -1;
121
122 if (first->size <= SAMPLE_SIZE)
123 return 0;
124 }
125
126 if (thorough_flag)
127 {
128 if (compare_file_contents(first, second) != 0)
129 return -1;
130 }
131 else
132 {
133 /* NOTE: Skip calculating digests if potential cluster only has two files?
134 * NOTE: Requires knowledge from higher level */
135 if (compare_file_digests(first, second) != 0)
136 return -1;
137 }
138
139 return 0;
140 }
141
142 /* Generates the digest for the specified file if it's not already present.
143 */
144 void generate_file_digest(File* file)
145 {
146 get_file_digest(file);
147 }
148
149 /* Retrieves sample from a file, if needed.
150 */
151 static int get_file_sample(File* file)
152 {
153 FILE* stream;
154 size_t size;
155 uint8_t* sample;
156
157 if (file->status == SAMPLED || file->status == HASHED)
158 return 0;
159
160 stream = fopen(file->path, "rb");
161 if (!stream)
162 {
163 if (!quiet_flag)
164 warning("%s: %s", file->path, strerror(errno));
165
166 file->status = INVALID;
167 return -1;
168 }
169
170 size = SAMPLE_SIZE;
171 if (size > file->size)
172 size = file->size;
173
174 sample = (uint8_t*) malloc(size);
175
176 if (fread(sample, size, 1, stream) < 1)
177 {
178 if (!quiet_flag)
179 warning("%s: %s", file->path, strerror(errno));
180
181 free(sample);
182 fclose(stream);
183
184 file->status = INVALID;
185 return -1;
186 }
187
188 fclose(stream);
189
190 file->sample = sample;
191 file->status = SAMPLED;
192 return 0;
193 }
194
195 /* Calculates the digest of a file, if needed.
196 */
197 static int get_file_digest(File* file)
198 {
199 FILE* stream;
200 size_t size;
201 char buffer[BUFFER_SIZE];
202
203 if (file->status == HASHED)
204 return 0;
205
206 digest_init();
207
208 if (file->status == SAMPLED && file->size <= SAMPLE_SIZE)
209 digest_update(file->sample, file->size);
210 else if (file->size > 0)
211 {
212 stream = fopen(file->path, "rb");
213 if (!stream)
214 {
215 if (!quiet_flag)
216 warning("%s: %s", file->path, strerror(errno));
217
218 file->status = INVALID;
219 return -1;
220 }
221
222 for (;;)
223 {
224 size = fread(buffer, 1, sizeof(buffer), stream);
225 if (ferror(stream))
226 {
227 if (!quiet_flag)
228 warning("%s: %s", file->path, strerror(errno));
229
230 fclose(stream);
231
232 file->status = INVALID;
233 return -1;
234 }
235
236 if (size == 0)
237 break;
238
239 digest_update(buffer, size);
240 }
241
242 fclose(stream);
243 }
244
245 file->digest = (uint8_t*) malloc(get_digest_size());
246 digest_finish(file->digest);
247
248 file->status = HASHED;
249 return 0;
250 }
251
252 /* Compares the digests of two files, calculating them if neccessary.
253 */
254 static int compare_file_digests(File* first, File* second)
255 {
256 if (get_file_digest(first) != 0)
257 return -1;
258
259 if (get_file_digest(second) != 0)
260 return -1;
261
262 if (memcmp(first->digest, second->digest, get_digest_size()) != 0)
263 return -1;
264
265 return 0;
266 }
267
268 /* Compares the samples of two files, retrieving them if neccessary.
269 */
270 static int compare_file_samples(File* first, File* second)
271 {
272 if (get_file_sample(first) != 0)
273 return -1;
274
275 if (get_file_sample(second) != 0)
276 return -1;
277
278 size_t size = SAMPLE_SIZE;
279 if (size > first->size)
280 size = first->size;
281
282 if (memcmp(first->sample, second->sample, size) != 0)
283 return -1;
284
285 return 0;
286 }
287
288 /* Performs byte-by-byte comparison of the contents of two files.
289 * This is the action we most want to avoid ever having to do.
290 * It is also completely un-optmimised. Enjoy.
291 * NOTE: This function assumes that the files are of equal size, as
292 * there's little point in calling it otherwise.
293 * TODO: Use a read buffer.
294 */
295 static int compare_file_contents(File* first, File* second)
296 {
297 int fc, sc;
298 off_t count = 0;
299 FILE* first_stream;
300 FILE* second_stream;
301
302 first_stream = fopen(first->path, "rb");
303 if (!first_stream)
304 {
305 if (!quiet_flag)
306 warning("%s: %s", first->path, strerror(errno));
307
308 first->status = INVALID;
309 return -1;
310 }
311
312 second_stream = fopen(second->path, "rb");
313 if (!second_stream)
314 {
315 if (!quiet_flag)
316 warning("%s: %s", second->path, strerror(errno));
317
318 fclose(first_stream);
319
320 second->status = INVALID;
321 return -1;
322 }
323
324 for (;;)
325 {
326 fc = fgetc(first_stream);
327 sc = fgetc(second_stream);
328
329 if (fc != sc || fc == EOF)
330 break;
331
332 count++;
333 }
334
335 if (ferror(first_stream))
336 {
337 if (!quiet_flag)
338 warning("%s: %s", first->path, strerror(errno));
339
340 first->status = INVALID;
341 }
342
343 if (ferror(second_stream))
344 {
345 if (!quiet_flag)
346 warning("%s: %s", second->path, strerror(errno));
347
348 second->status = INVALID;
349 }
350
351 fclose(first_stream);
352 fclose(second_stream);
353
354 if (count != first->size)
355 return -1;
356
357 return 0;
358 }
359