"Fossies" - the Fresh Open Source Software Archive

Member "fslint-2.46/fslint/supprt/rmlint/merge_hardlinks" (2 Feb 2017, 3434 Bytes) of package /linux/privat/fslint-2.46.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. See also the latest Fossies "Diffs" side-by-side code changes report for "merge_hardlinks": 2.44_vs_2.46.

    1 #!/usr/bin/env python2
    2 
    3 # This is a support script for the findup utility which:
    4 
    5 # Filters unique file sizes.
    6 # Note this can be done also with `uniq -3 -D` in findup.
    7 
    8 # Also filters groups of files with the same size
    9 # that _all_ have the same inode (hardlinks).
   10 # This optimization is the reason for this seperate script.
   11 # Previously we merged hardlinks to only one, but
   12 # this wasn't correct for the case where there were
   13 # multiple independent hardlinks to duplicate files.
   14 
   15 # There is commented out code below which tries
   16 # to filter out as many hardlinks as possible.
   17 # However that is only usuable in a more integrated script
   18 # which can select the particular inode it wants to keep.
   19 # Note this would save disk access as well as CPU
   20 # for files that are too large for cache.
   21 
   22 import os
   23 import sys
   24 
   25 #class counter:
   26 #    def __init__(self):
   27 #        self.dict = {}
   28 #    def add(self,item):
   29 #        count = self.dict.get(item,0)
   30 #        self.dict[item] = count + 1
   31 #    def counts(self,descending=False):
   32 #        """Returns list of keys, sorted by values."""
   33 #        result = zip(self.dict.values(),self.dict.keys())
   34 #        result.sort()
   35 #        if descending: result.reverse()
   36 #        return result
   37 
   38 last_size=0
   39 last_inode=(0,0)
   40 group_to_check=[]
   41 write_group=0
   42 #group_inodes_count=counter()
   43 
   44 def write_out_group():
   45     sys.stdout.writelines(group_to_check)
   46 
   47     #Keep only one of most numerous inode
   48     #inode_to_keep = group_inodes_count.counts(descending=True)[0][1]
   49     #inode_to_keep_not_written = 1
   50     #for path, dev, inode, size in group_to_check:
   51     #    if inode == inode_to_keep:
   52     #        if inode_to_keep_not_written:
   53     #            inode_to_keep_not_written = 0
   54     #            sys.stdout.write("%s %s %s %s\n" % (path, dev, inode, size))
   55     #    else:
   56     #        sys.stdout.write("%s %s %s %s\n" % (path, dev, inode, size))
   57 
   58 if len(sys.argv) == 2 and sys.argv[1] == '--non-gui':
   59     dups = []
   60     inodes = {}
   61     #inodes required to correctly report disk usage of
   62     #duplicate files with seperate inode groups.
   63     for line in sys.stdin.xreadlines():
   64         path = line[:-1]
   65         if path == '':
   66             if len(inodes)>1:
   67                 sys.stdout.writelines(dups)
   68             dups = [line]
   69             inodes = {}
   70         else:
   71             try:
   72                 inode = os.stat(path).st_ino
   73                 dups.append(line)
   74                 inodes[inode] = True
   75             except EnvironmentError:
   76                 #file may have been deleted, changed permissions, ...
   77                 sys.stderr.write(str(sys.exc_info()[1])+'\n')
   78     else:
   79         if len(inodes)>1:
   80             sys.stdout.writelines(dups)
   81 else: # Initial merging done for both gui and non gui
   82     for line in sys.stdin.xreadlines():
   83         path, dev, inode, size = line.rstrip().split(' ')
   84         if last_size and size == last_size:
   85             if (dev,inode) != last_inode:
   86                 write_group=1
   87             group_to_check.append(line)
   88             #group_to_check.append((path,dev,inode,size))
   89             #group_inodes_count.add(inode)
   90         else:
   91             if write_group: write_out_group()
   92             #group_to_check=[(path,dev,inode,size)]
   93             #group_inodes_count.add(inode)
   94             group_to_check=[line]
   95             last_inode = (dev,inode)
   96             last_size = size
   97             write_group = 0
   98     else: #output last group if required
   99         if write_group: write_out_group()