merge_hardlinks (fslint-2.44) | : | merge_hardlinks (fslint-2.46) | ||
---|---|---|---|---|
#!/usr/bin/env python | #!/usr/bin/env python2 | |||
# This is a support script for the findup utility which: | # This is a support script for the findup utility which: | |||
# Filters unique file sizes. | # Filters unique file sizes. | |||
# Note this can be done also with `uniq -3 -D` in findup. | # Note this can be done also with `uniq -3 -D` in findup. | |||
# Also filters groups of files with the same size | # Also filters groups of files with the same size | |||
# that _all_ have the same inode (hardlinks). | # that _all_ have the same inode (hardlinks). | |||
# This optimization is the reason for this seperate script. | # This optimization is the reason for this seperate script. | |||
# Previously we merged hardlinks to only one, but | # Previously we merged hardlinks to only one, but | |||
# this wasn't correct for the case where there were | # this wasn't correct for the case where there were | |||
# multiple independent hardlinks to duplicate files. | # multiple independent hardlinks to duplicate files. | |||
# There is commented out code below which tries | # There is commented out code below which tries | |||
# to filter out as many hardlinks as possible. | # to filter out as many hardlinks as possible. | |||
# However that is only usuable in a more integrated script | # However that is only usuable in a more integrated script | |||
# which can select the particular inode it wants to keep. | # which can select the particular inode it wants to keep. | |||
# Note this would save disk access as well as CPU | # Note this would save disk access as well as CPU | |||
# for files that are too large for cache. | # for files that are too large for cache. | |||
import os | ||||
import sys | import sys | |||
#class counter: | #class counter: | |||
# def __init__(self): | # def __init__(self): | |||
# self.dict = {} | # self.dict = {} | |||
# def add(self,item): | # def add(self,item): | |||
# count = self.dict.get(item,0) | # count = self.dict.get(item,0) | |||
# self.dict[item] = count + 1 | # self.dict[item] = count + 1 | |||
# def counts(self,descending=False): | # def counts(self,descending=False): | |||
# """Returns list of keys, sorted by values.""" | # """Returns list of keys, sorted by values.""" | |||
skipping to change at line 57 | skipping to change at line 58 | |||
#inode_to_keep = group_inodes_count.counts(descending=True)[0][1] | #inode_to_keep = group_inodes_count.counts(descending=True)[0][1] | |||
#inode_to_keep_not_written = 1 | #inode_to_keep_not_written = 1 | |||
#for path, dev, inode, size in group_to_check: | #for path, dev, inode, size in group_to_check: | |||
# if inode == inode_to_keep: | # if inode == inode_to_keep: | |||
# if inode_to_keep_not_written: | # if inode_to_keep_not_written: | |||
# inode_to_keep_not_written = 0 | # inode_to_keep_not_written = 0 | |||
# sys.stdout.write("%s %s %s %s\n" % (path, dev, inode, size)) | # sys.stdout.write("%s %s %s %s\n" % (path, dev, inode, size)) | |||
# else: | # else: | |||
# sys.stdout.write("%s %s %s %s\n" % (path, dev, inode, size)) | # sys.stdout.write("%s %s %s %s\n" % (path, dev, inode, size)) | |||
for line in sys.stdin.xreadlines(): | if len(sys.argv) == 2 and sys.argv[1] == '--non-gui': | |||
path, dev, inode, size = line.rstrip().split(' ') | dups = [] | |||
if last_size and size == last_size: | inodes = {} | |||
if (dev,inode) != last_inode: | #inodes required to correctly report disk usage of | |||
write_group=1 | #duplicate files with seperate inode groups. | |||
group_to_check.append(line) | for line in sys.stdin.xreadlines(): | |||
#group_to_check.append((path,dev,inode,size)) | path = line[:-1] | |||
#group_inodes_count.add(inode) | if path == '': | |||
if len(inodes)>1: | ||||
sys.stdout.writelines(dups) | ||||
dups = [line] | ||||
inodes = {} | ||||
else: | ||||
try: | ||||
inode = os.stat(path).st_ino | ||||
dups.append(line) | ||||
inodes[inode] = True | ||||
except EnvironmentError: | ||||
#file may have been deleted, changed permissions, ... | ||||
sys.stderr.write(str(sys.exc_info()[1])+'\n') | ||||
else: | else: | |||
if len(inodes)>1: | ||||
sys.stdout.writelines(dups) | ||||
else: # Initial merging done for both gui and non gui | ||||
for line in sys.stdin.xreadlines(): | ||||
path, dev, inode, size = line.rstrip().split(' ') | ||||
if last_size and size == last_size: | ||||
if (dev,inode) != last_inode: | ||||
write_group=1 | ||||
group_to_check.append(line) | ||||
#group_to_check.append((path,dev,inode,size)) | ||||
#group_inodes_count.add(inode) | ||||
else: | ||||
if write_group: write_out_group() | ||||
#group_to_check=[(path,dev,inode,size)] | ||||
#group_inodes_count.add(inode) | ||||
group_to_check=[line] | ||||
last_inode = (dev,inode) | ||||
last_size = size | ||||
write_group = 0 | ||||
else: #output last group if required | ||||
if write_group: write_out_group() | if write_group: write_out_group() | |||
#group_to_check=[(path,dev,inode,size)] | ||||
#group_inodes_count.add(inode) | ||||
group_to_check=[line] | ||||
last_inode = (dev,inode) | ||||
last_size = size | ||||
write_group = 0 | ||||
else: #output last group if required | ||||
if write_group: write_out_group() | ||||
End of changes. 5 change blocks. | ||||
9 lines changed or deleted | 42 lines changed or added |