"Fossies" - the Fresh Open Source Software Archive 
Member "duff-0.5.2/src/duffdriver.c" (28 Jan 2012, 11224 Bytes) of package /linux/privat/old/duff-0.5.2.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
1 /*
2 * duff - Duplicate file finder
3 * Copyright (c) 2005 Camilla Berglund <elmindreda@elmindreda.org>
4 *
5 * This software is provided 'as-is', without any express or implied
6 * warranty. In no event will the authors be held liable for any
7 * damages arising from the use of this software.
8 *
9 * Permission is granted to anyone to use this software for any
10 * purpose, including commercial applications, and to alter it and
11 * redistribute it freely, subject to the following restrictions:
12 *
13 * 1. The origin of this software must not be misrepresented; you
14 * must not claim that you wrote the original software. If you use
15 * this software in a product, an acknowledgment in the product
16 * documentation would be appreciated but is not required.
17 *
18 * 2. Altered source versions must be plainly marked as such, and
19 * must not be misrepresented as being the original software.
20 *
21 * 3. This notice may not be removed or altered from any source
22 * distribution.
23 */
24
25 #if HAVE_CONFIG_H
26 #include "config.h"
27 #endif
28
29 #if HAVE_SYS_TYPES_H
30 #include <sys/types.h>
31 #endif
32
33 #if HAVE_SYS_STAT_H
34 #include <sys/stat.h>
35 #endif
36
37 #if HAVE_INTTYPES_H
38 #include <inttypes.h>
39 #elif HAVE_STDINT_H
40 #include <stdint.h>
41 #endif
42
43 #if HAVE_ERRNO_H
44 #include <errno.h>
45 #endif
46
47 #if HAVE_UNISTD_H
48 #include <unistd.h>
49 #endif
50
51 #if HAVE_STDIO_H
52 #include <stdio.h>
53 #endif
54
55 #if HAVE_STRING_H
56 #include <string.h>
57 #endif
58
59 #if HAVE_STDARG_H
60 #include <stdarg.h>
61 #endif
62
63 #if HAVE_STDLIB_H
64 #include <stdlib.h>
65 #endif
66
67 #if HAVE_DIRENT_H
68 #include <dirent.h>
69 #define NAMLEN(dirent) strlen((dirent)->d_name)
70 #else
71 #define dirent direct
72 #define NAMLEN(dirent) (dirent)->d_namlen
73 #if HAVE_SYS_NDIR_H
74 #include <sys/ndir.h>
75 #endif
76 #if HAVE_SYS_DIR_H
77 #include <sys/dir.h>
78 #endif
79 #if HAVE_NDIR_H
80 #include <ndir.h>
81 #endif
82 #endif
83
84 #include "duffstring.h"
85 #include "duff.h"
86
87 /* The number of buckets to use for files.
88 */
89 #define BUCKET_COUNT (1 << HASH_BITS)
90
91 /* Calculates the bucket index corresponding to the specified file size.
92 */
93 #define BUCKET_INDEX(size) ((size) & (BUCKET_COUNT - 1))
94
95 /* These flags are defined and documented in duff.c.
96 */
97 extern int follow_links_mode;
98 extern int all_files_flag;
99 extern int null_terminate_flag;
100 extern int recursive_flag;
101 extern int ignore_empty_flag;
102 extern int quiet_flag;
103 extern int physical_flag;
104 extern int excess_flag;
105 extern const char* header_format;
106 extern int header_uses_digest;
107
108 /* Typedefs for the structs below.
109 */
110 typedef struct Dir Dir;
111 typedef struct DirList DirList;
112
113 /* Represents a single physical directory.
114 */
115 struct Dir
116 {
117 dev_t device;
118 ino_t inode;
119 };
120
121 /* Represents a list of physical directories.
122 */
123 struct DirList
124 {
125 Dir* dirs;
126 size_t allocated;
127 size_t available;
128 };
129
130 /* List of traversed physical directories, used to avoid loops.
131 */
132 static DirList recorded_dirs;
133
134 /* Buckets of lists of collected files.
135 */
136 static FileList buckets[BUCKET_COUNT];
137
138 /* These functions are documented below, where they are defined.
139 */
140 static int stat_path(const char* path, struct stat* sb, int depth);
141 static int has_recorded_directory(dev_t device, ino_t inode);
142 static void record_directory(dev_t device, ino_t inode);
143 static void process_directory(const char* path,
144 const struct stat* sb,
145 int depth);
146 static void process_file(const char* path, struct stat* sb);
147 static void process_path(const char* path, int depth);
148 static void report_cluster(const FileList* cluster, unsigned int index);
149 static void process_clusters(void);
150
151 /* Initializes the driver, processes the specified arguments and reports the
152 * clusters found.
153 */
154 void process_args(int argc, char** argv)
155 {
156 size_t i;
157
158 memset(&recorded_dirs, 0, sizeof(DirList));
159
160 for (i = 0; i < BUCKET_COUNT; i++)
161 init_file_list(&buckets[i]);
162
163 if (argc)
164 {
165 /* Read file names from command line */
166 for (i = 0; i < argc; i++)
167 {
168 kill_trailing_slashes(argv[i]);
169 process_path(argv[i], 0);
170 }
171 }
172 else
173 {
174 char* path;
175
176 /* Read file names from stdin */
177 while ((path = read_path(stdin)))
178 {
179 kill_trailing_slashes(path);
180 process_path(path, 0);
181 free(path);
182 }
183 }
184
185 process_clusters();
186
187 for (i = 0; i < BUCKET_COUNT; i++)
188 free_file_list(&buckets[i]);
189
190 free(recorded_dirs.dirs);
191 memset(&recorded_dirs, 0, sizeof(DirList));
192 }
193
194 /* Stat:s a file according to the specified options.
195 */
196 static int stat_path(const char* path, struct stat* sb, int depth)
197 {
198 if (*path == '\0')
199 return -1;
200
201 if (lstat(path, sb) != 0)
202 {
203 if (!quiet_flag)
204 warning("%s: %s", path, strerror(errno));
205
206 return -1;
207 }
208
209 if (S_ISLNK(sb->st_mode))
210 {
211 if (follow_links_mode == ALL_SYMLINKS ||
212 (depth == 0 && follow_links_mode == ARG_SYMLINKS))
213 {
214 if (stat(path, sb) != 0)
215 {
216 if (!quiet_flag)
217 warning("%s: %s", path, strerror(errno));
218
219 return -1;
220 }
221
222 if (S_ISDIR(sb->st_mode))
223 return -1;
224 }
225 else
226 return -1;
227 }
228
229 return 0;
230 }
231
232 /* Returns true if the directory has already been recorded.
233 * TODO: Implement a more efficient data structure.
234 */
235 static int has_recorded_directory(dev_t device, ino_t inode)
236 {
237 size_t i;
238 const Dir* dirs = recorded_dirs.dirs;
239
240 for (i = 0; i < recorded_dirs.allocated; i++)
241 {
242 if (dirs[i].device == device && dirs[i].inode == inode)
243 return 1;
244 }
245
246 return 0;
247 }
248
249 /* Records the specified directory.
250 * TODO: Implement a more efficient data structure.
251 */
252 static void record_directory(dev_t device, ino_t inode)
253 {
254 if (recorded_dirs.allocated == recorded_dirs.available)
255 {
256 size_t count;
257
258 if (recorded_dirs.available)
259 count = recorded_dirs.available * 2;
260 else
261 count = 1024;
262
263 recorded_dirs.dirs = realloc(recorded_dirs.dirs, count * sizeof(Dir));
264 if (recorded_dirs.dirs == NULL)
265 error(_("Out of memory"));
266
267 recorded_dirs.available = count;
268 }
269
270 recorded_dirs.dirs[recorded_dirs.allocated].device = device;
271 recorded_dirs.dirs[recorded_dirs.allocated].inode = inode;
272 recorded_dirs.allocated++;
273 }
274
275 /* Recurses into a directory, collecting all or all non-hidden files,
276 * according to the specified options.
277 */
278 static void process_directory(const char* path,
279 const struct stat* sb,
280 int depth)
281 {
282 DIR* dir;
283 struct dirent* dir_entry;
284 char* child_path;
285 const char* name;
286
287 if (has_recorded_directory(sb->st_dev, sb->st_ino))
288 return;
289
290 record_directory(sb->st_dev, sb->st_ino);
291
292 dir = opendir(path);
293 if (!dir)
294 {
295 if (!quiet_flag)
296 warning("%s: %s", path, strerror(errno));
297
298 return;
299 }
300
301 while ((dir_entry = readdir(dir)))
302 {
303 name = dir_entry->d_name;
304 if (name[0] == '.')
305 {
306 if (!all_files_flag)
307 continue;
308
309 if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0)
310 continue;
311 }
312
313 if (asprintf(&child_path, "%s/%s", path, name) > 0)
314 {
315 process_path(child_path, depth);
316 free(child_path);
317 }
318 }
319
320 closedir(dir);
321 }
322
323 /* Processes a single file.
324 */
325 static void process_file(const char* path, struct stat* sb)
326 {
327 if (sb->st_size == 0)
328 {
329 if (ignore_empty_flag)
330 return;
331 }
332
333 /* NOTE: Check for duplicate arguments? */
334
335 if (physical_flag)
336 {
337 /* TODO: Make this less pessimal */
338
339 size_t i, bucket = BUCKET_INDEX(sb->st_size);
340
341 for (i = 0; i < buckets[bucket].allocated; i++)
342 {
343 if (buckets[bucket].files[i].device == sb->st_dev &&
344 buckets[bucket].files[i].inode == sb->st_ino)
345 {
346 return;
347 }
348 }
349 }
350
351 init_file(alloc_file(&buckets[BUCKET_INDEX(sb->st_size)]), path, sb);
352 }
353
354 /* Processes a path name according to its type, whether from the command line or
355 * from directory recursion.
356 *
357 * This function calls process_file and process_directory as needed.
358 */
359 static void process_path(const char* path, int depth)
360 {
361 mode_t mode;
362 struct stat sb;
363
364 if (stat_path(path, &sb, depth) != 0)
365 return;
366
367 mode = sb.st_mode & S_IFMT;
368 switch (mode)
369 {
370 case S_IFREG:
371 {
372 process_file(path, &sb);
373 break;
374 }
375
376 case S_IFDIR:
377 {
378 if (recursive_flag)
379 {
380 process_directory(path, &sb, depth + 1);
381 break;
382 }
383
384 /* FALLTHROUGH */
385 }
386
387 default:
388 {
389 if (!quiet_flag)
390 {
391 switch (mode)
392 {
393 case S_IFLNK:
394 warning(_("%s is a symbolic link; skipping"), path);
395 break;
396 case S_IFIFO:
397 warning(_("%s is a named pipe; skipping"), path);
398 break;
399 case S_IFBLK:
400 warning(_("%s is a block device; skipping"), path);
401 break;
402 case S_IFCHR:
403 warning(_("%s is a character device; skipping"), path);
404 break;
405 case S_IFDIR:
406 warning(_("%s is a directory; skipping"), path);
407 break;
408 case S_IFSOCK:
409 warning(_("%s is a socket; skipping"), path);
410 break;
411 default:
412 error(_("This cannot happen"));
413 }
414 }
415 }
416 }
417 }
418
419 /* Reports a cluster to stdout, according to the specified options.
420 */
421 static void report_cluster(const FileList* cluster, unsigned int index)
422 {
423 size_t i;
424 File* files = cluster->files;
425
426 if (excess_flag)
427 {
428 /* Report all but the first file in the cluster */
429 for (i = 1; i < cluster->allocated; i++)
430 {
431 printf("%s", files[i].path);
432 putchar(get_field_terminator());
433 }
434 }
435 else
436 {
437 /* Print header and report all files in the cluster */
438
439 if (*header_format != '\0')
440 {
441 if (header_uses_digest)
442 generate_file_digest(files);
443
444 print_cluster_header(header_format,
445 cluster->allocated,
446 index,
447 files->size,
448 files->digest);
449
450 putchar(get_field_terminator());
451 }
452
453 for (i = 0; i < cluster->allocated; i++)
454 {
455 printf("%s", files[i].path);
456 putchar(get_field_terminator());
457 }
458 }
459 }
460
461 /* Finds and reports all duplicate clusters in each bucket of collected files.
462 */
463 static void process_clusters(void)
464 {
465 size_t i, j, first, second, index = 1;
466 FileList duplicates;
467
468 init_file_list(&duplicates);
469
470 for (i = 0; i < BUCKET_COUNT; i++)
471 {
472 File* files = buckets[i].files;
473
474 for (first = 0; first < buckets[i].allocated; first++)
475 {
476 if (files[first].status == INVALID ||
477 files[first].status == REPORTED)
478 {
479 continue;
480 }
481
482 for (second = first + 1; second < buckets[i].allocated; second++)
483 {
484 if (files[second].status == INVALID ||
485 files[second].status == REPORTED)
486 {
487 continue;
488 }
489
490 if (compare_files(&files[first], &files[second]) == 0)
491 {
492 if (duplicates.allocated == 0)
493 {
494 *alloc_file(&duplicates) = files[first];
495 files[first].status = REPORTED;
496 }
497
498 *alloc_file(&duplicates) = files[second];
499 files[second].status = REPORTED;
500 }
501 else
502 {
503 if (files[first].status == INVALID)
504 break;
505 }
506 }
507
508 if (duplicates.allocated > 0)
509 {
510 report_cluster(&duplicates, index);
511 empty_file_list(&duplicates);
512
513 index++;
514 }
515 }
516
517 for (j = 0; j < buckets[i].allocated; j++)
518 free_file(&files[j]);
519 }
520
521 free_file_list(&duplicates);
522 }
523