"Fossies" - the Fresh Open Source Software Archive 
Member "fslint-2.46/fslint/supprt/rmlint/merge_hardlinks" (2 Feb 2017, 3434 Bytes) of package /linux/privat/fslint-2.46.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style:
standard) with prefixed line numbers.
Alternatively you can here
view or
download the uninterpreted source code file.
See also the latest
Fossies "Diffs" side-by-side code changes report for "merge_hardlinks":
2.44_vs_2.46.
1 #!/usr/bin/env python2
2
3 # This is a support script for the findup utility which:
4
5 # Filters unique file sizes.
6 # Note this can be done also with `uniq -3 -D` in findup.
7
8 # Also filters groups of files with the same size
9 # that _all_ have the same inode (hardlinks).
10 # This optimization is the reason for this seperate script.
11 # Previously we merged hardlinks to only one, but
12 # this wasn't correct for the case where there were
13 # multiple independent hardlinks to duplicate files.
14
15 # There is commented out code below which tries
16 # to filter out as many hardlinks as possible.
17 # However that is only usuable in a more integrated script
18 # which can select the particular inode it wants to keep.
19 # Note this would save disk access as well as CPU
20 # for files that are too large for cache.
21
22 import os
23 import sys
24
25 #class counter:
26 # def __init__(self):
27 # self.dict = {}
28 # def add(self,item):
29 # count = self.dict.get(item,0)
30 # self.dict[item] = count + 1
31 # def counts(self,descending=False):
32 # """Returns list of keys, sorted by values."""
33 # result = zip(self.dict.values(),self.dict.keys())
34 # result.sort()
35 # if descending: result.reverse()
36 # return result
37
38 last_size=0
39 last_inode=(0,0)
40 group_to_check=[]
41 write_group=0
42 #group_inodes_count=counter()
43
44 def write_out_group():
45 sys.stdout.writelines(group_to_check)
46
47 #Keep only one of most numerous inode
48 #inode_to_keep = group_inodes_count.counts(descending=True)[0][1]
49 #inode_to_keep_not_written = 1
50 #for path, dev, inode, size in group_to_check:
51 # if inode == inode_to_keep:
52 # if inode_to_keep_not_written:
53 # inode_to_keep_not_written = 0
54 # sys.stdout.write("%s %s %s %s\n" % (path, dev, inode, size))
55 # else:
56 # sys.stdout.write("%s %s %s %s\n" % (path, dev, inode, size))
57
58 if len(sys.argv) == 2 and sys.argv[1] == '--non-gui':
59 dups = []
60 inodes = {}
61 #inodes required to correctly report disk usage of
62 #duplicate files with seperate inode groups.
63 for line in sys.stdin.xreadlines():
64 path = line[:-1]
65 if path == '':
66 if len(inodes)>1:
67 sys.stdout.writelines(dups)
68 dups = [line]
69 inodes = {}
70 else:
71 try:
72 inode = os.stat(path).st_ino
73 dups.append(line)
74 inodes[inode] = True
75 except EnvironmentError:
76 #file may have been deleted, changed permissions, ...
77 sys.stderr.write(str(sys.exc_info()[1])+'\n')
78 else:
79 if len(inodes)>1:
80 sys.stdout.writelines(dups)
81 else: # Initial merging done for both gui and non gui
82 for line in sys.stdin.xreadlines():
83 path, dev, inode, size = line.rstrip().split(' ')
84 if last_size and size == last_size:
85 if (dev,inode) != last_inode:
86 write_group=1
87 group_to_check.append(line)
88 #group_to_check.append((path,dev,inode,size))
89 #group_inodes_count.add(inode)
90 else:
91 if write_group: write_out_group()
92 #group_to_check=[(path,dev,inode,size)]
93 #group_inodes_count.add(inode)
94 group_to_check=[line]
95 last_inode = (dev,inode)
96 last_size = size
97 write_group = 0
98 else: #output last group if required
99 if write_group: write_out_group()