"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "fslint/supprt/rmlint/merge_hardlinks" between
fslint-2.44.tar.gz and fslint-2.46.tar.gz

About: fslint is a toolkit to find and clean various forms of lint on a filesystem (for e.g. duplicate files).

merge_hardlinks  (fslint-2.44):merge_hardlinks  (fslint-2.46)
#!/usr/bin/env python #!/usr/bin/env python2
# This is a support script for the findup utility which: # This is a support script for the findup utility which:
# Filters unique file sizes. # Filters unique file sizes.
# Note this can be done also with `uniq -3 -D` in findup. # Note this can be done also with `uniq -3 -D` in findup.
# Also filters groups of files with the same size # Also filters groups of files with the same size
# that _all_ have the same inode (hardlinks). # that _all_ have the same inode (hardlinks).
# This optimization is the reason for this seperate script. # This optimization is the reason for this seperate script.
# Previously we merged hardlinks to only one, but # Previously we merged hardlinks to only one, but
# this wasn't correct for the case where there were # this wasn't correct for the case where there were
# multiple independent hardlinks to duplicate files. # multiple independent hardlinks to duplicate files.
# There is commented out code below which tries # There is commented out code below which tries
# to filter out as many hardlinks as possible. # to filter out as many hardlinks as possible.
# However that is only usuable in a more integrated script # However that is only usuable in a more integrated script
# which can select the particular inode it wants to keep. # which can select the particular inode it wants to keep.
# Note this would save disk access as well as CPU # Note this would save disk access as well as CPU
# for files that are too large for cache. # for files that are too large for cache.
import os
import sys import sys
#class counter: #class counter:
# def __init__(self): # def __init__(self):
# self.dict = {} # self.dict = {}
# def add(self,item): # def add(self,item):
# count = self.dict.get(item,0) # count = self.dict.get(item,0)
# self.dict[item] = count + 1 # self.dict[item] = count + 1
# def counts(self,descending=False): # def counts(self,descending=False):
# """Returns list of keys, sorted by values.""" # """Returns list of keys, sorted by values."""
skipping to change at line 57 skipping to change at line 58
#inode_to_keep = group_inodes_count.counts(descending=True)[0][1] #inode_to_keep = group_inodes_count.counts(descending=True)[0][1]
#inode_to_keep_not_written = 1 #inode_to_keep_not_written = 1
#for path, dev, inode, size in group_to_check: #for path, dev, inode, size in group_to_check:
# if inode == inode_to_keep: # if inode == inode_to_keep:
# if inode_to_keep_not_written: # if inode_to_keep_not_written:
# inode_to_keep_not_written = 0 # inode_to_keep_not_written = 0
# sys.stdout.write("%s %s %s %s\n" % (path, dev, inode, size)) # sys.stdout.write("%s %s %s %s\n" % (path, dev, inode, size))
# else: # else:
# sys.stdout.write("%s %s %s %s\n" % (path, dev, inode, size)) # sys.stdout.write("%s %s %s %s\n" % (path, dev, inode, size))
for line in sys.stdin.xreadlines(): if len(sys.argv) == 2 and sys.argv[1] == '--non-gui':
path, dev, inode, size = line.rstrip().split(' ') dups = []
if last_size and size == last_size: inodes = {}
if (dev,inode) != last_inode: #inodes required to correctly report disk usage of
write_group=1 #duplicate files with seperate inode groups.
group_to_check.append(line) for line in sys.stdin.xreadlines():
#group_to_check.append((path,dev,inode,size)) path = line[:-1]
#group_inodes_count.add(inode) if path == '':
if len(inodes)>1:
sys.stdout.writelines(dups)
dups = [line]
inodes = {}
else:
try:
inode = os.stat(path).st_ino
dups.append(line)
inodes[inode] = True
except EnvironmentError:
#file may have been deleted, changed permissions, ...
sys.stderr.write(str(sys.exc_info()[1])+'\n')
else: else:
if len(inodes)>1:
sys.stdout.writelines(dups)
else: # Initial merging done for both gui and non gui
for line in sys.stdin.xreadlines():
path, dev, inode, size = line.rstrip().split(' ')
if last_size and size == last_size:
if (dev,inode) != last_inode:
write_group=1
group_to_check.append(line)
#group_to_check.append((path,dev,inode,size))
#group_inodes_count.add(inode)
else:
if write_group: write_out_group()
#group_to_check=[(path,dev,inode,size)]
#group_inodes_count.add(inode)
group_to_check=[line]
last_inode = (dev,inode)
last_size = size
write_group = 0
else: #output last group if required
if write_group: write_out_group() if write_group: write_out_group()
#group_to_check=[(path,dev,inode,size)]
#group_inodes_count.add(inode)
group_to_check=[line]
last_inode = (dev,inode)
last_size = size
write_group = 0
else: #output last group if required
if write_group: write_out_group()
 End of changes. 5 change blocks. 
9 lines changed or deleted 42 lines changed or added

Home  |  About  |  All  |  Newest  |  Fossies Dox  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTPS