"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "repeats/repeats.in" between
littleutils-1.2.3.tar.lz and littleutils-1.2.4.tar.lz

About: littleutils are a collection of small and simple utilities (rename files, search for duplicate files, ...).

repeats.in  (littleutils-1.2.3.tar.lz):repeats.in  (littleutils-1.2.4.tar.lz)
#! PROGDASH #! PROGBASH
# set vim: syntax=sh: # set vim: syntax=bash:
# repeats: Searches for duplicate files in the specified directories # repeats: Searches for duplicate files in the specified directories
# Copyright (C) 2004-2020 by Brian Lindholm. This file is part of the # Copyright (C) 2004-2021 by Brian Lindholm. This file is part of the
# littleutils utility set. # littleutils utility set.
# #
# The repeats utility is free software; you can redistribute it and/or modify # The repeats utility is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the Free # it under the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 3, or (at your option) any later version. # Software Foundation; either version 3, or (at your option) any later version.
# #
# The repeats utility is distributed in the hope that it will be useful, but # The repeats utility is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details. # more details.
# #
# You should have received a copy of the GNU General Public License along with # You should have received a copy of the GNU General Public License along with
# the littleutils. If not, see <https://www.gnu.org/licenses/>. # the littleutils. If not, see <https://www.gnu.org/licenses/>.
# get a valid temporary directory and set up traps
TMPWILD=`tempname -w repeats_$$` || exit 99
trap 'rm -f ${TMPWILD} ; exit 1' 1 2 3 13 15
trap 'rm -f ${TMPWILD} ; exit 0' 0
# get command-line options # get command-line options
ALGORITHM='8' declare -i ALGORITHM=8
HARDLINKS='n' HARDLINKS='n'
MIDSIZE='65536' declare -i MIDSIZE=65536
PARANOID='n' PARANOID='n'
VERBOSE='n' VERBOSE='n'
ZEROS='n' ZEROS='n'
while getopts a:hlm:pvz opts while getopts a:hlm:pvz opts
do do
case $opts in case $opts in
a) ALGORITHM=${OPTARG} ;; a) ALGORITHM=$OPTARG ;;
h) echo 'repeats LU_VERSION' h) echo 'repeats LU_VERSION'
echo 'usage: repeats [-a hash_algorithm] [-h(elp)] [-l(inks_hard)]' echo 'usage: repeats [-a hash_algorithm] [-h(elp)] [-l(inks_hard)]'
echo ' [-m(idsize) bytecount] [-p(aranoid)] [-v(erbose)] [-z(eros )]' echo ' [-m(idsize) bytecount] [-p(aranoid)] [-v(erbose)] [-z(eros )]'
echo ' [directory ...]' echo ' [directory ...]'
echo 'algorithms: 1 = MD5, 2 = SHA1, 3 = SHA224, 4 = SHA256, 5 = SHA384, ' echo 'algorithms: 1 = MD5, 2 = SHA1, 3 = SHA224, 4 = SHA256, 5 = SHA384, '
echo ' 6 = SHA512, 7 = BLAKE2B-256, 8 = BLAKE2B-512 (default) ' echo ' 6 = SHA512, 7 = BLAKE2B-256, 8 = BLAKE2B-512 (default) '
exit 0 ;; exit 0 ;;
l) HARDLINKS='y' ;; l) HARDLINKS='y' ;;
m) MIDSIZE=${OPTARG} ;; m) MIDSIZE=$OPTARG ;;
p) PARANOID='y' ;; p) PARANOID='y' ;;
v) VERBOSE='y' ;; v) VERBOSE='y' ;;
z) ZEROS='y' ;; z) ZEROS='y' ;;
*) echo 'repeats LU_VERSION' *) echo 'repeats LU_VERSION'
echo 'usage: repeats [-a hash_algorithm] [-h(elp)] [-l(inks_hard)]' echo 'usage: repeats [-a hash_algorithm] [-h(elp)] [-l(inks_hard)]'
echo ' [-m(idsize) bytecount] [-p(aranoid)] [-v(erbose)] [-z(eros )]' echo ' [-m(idsize) bytecount] [-p(aranoid)] [-v(erbose)] [-z(eros )]'
echo ' [directory ...]' echo ' [directory ...]'
echo 'algorithms: 1 = MD5, 2 = SHA1, 3 = SHA224, 4 = SHA256, 5 = SHA384, ' echo 'algorithms: 1 = MD5, 2 = SHA1, 3 = SHA224, 4 = SHA256, 5 = SHA384, '
echo ' 6 = SHA512, 7 = BLAKE2B-256, 8 = BLAKE2B-512 (default) ' echo ' 6 = SHA512, 7 = BLAKE2B-256, 8 = BLAKE2B-512 (default) '
exit 1 ;; exit 1 ;;
esac esac
done done
shift `expr ${OPTIND} - 1` shift $((${OPTIND}-1))
# set up traps
trap 'rm -f $TMPFILE0 $TMPFILE1 $TMPFILE2 $TMPFILE3 $TMPFILE4 $TMPFILE5 ; exit 1
' 1 2 3 13 15
# generate the initial list of files # generate the initial list of files
TMPFILE0=`tempname repeats_$$_1` || exit 99 TMPFILE0=$(tempname repeats_$$_0) || exit 99
if [ $# -eq 0 ]; then if [ $# -eq 0 ]; then
find . -type f -readable -print | sed -e 's/^\.\///' > ${TMPFILE0} find . -type f -readable -print | sed -e 's/^\.\///' > $TMPFILE0
elif [ $# -eq 1 ]; then elif [ $# -eq 1 ]; then
if [ -d "$1" -a -r "$1" -a -x "$1" ]; then if [ -d "$1" -a -r "$1" -a -x "$1" ]; then
find "$1" -type f -readable -print > ${TMPFILE0} find "$1" -type f -readable -print > $TMPFILE0
else else
echo "repeats error: $1 is not a readable directory" echo "repeats error: $1 is not a readable directory"
rm -f ${TMPWILD} rm -f $TMPFILE0
exit 1 exit 1
fi fi
else else
while [ $# -gt 0 ]; do while [ $# -gt 0 ]; do
if [ -d "$1" -a -r "$1" -a -x "$1" ]; then if [ -d "$1" -a -r "$1" -a -x "$1" ]; then
find "$1" -type f -readable -print >> ${TMPFILE0} find "$1" -type f -readable -print >> $TMPFILE0
else else
echo "repeats error: $1 is not a readable directory" echo "repeats error: $1 is not a readable directory"
rm -f ${TMPWILD} rm -f $TMPFILE0
exit 1 exit 1
fi fi
shift shift
done done
fi fi
if [ "$VERBOSE" = 'y' ]; then if [ "$VERBOSE" = 'y' ]; then
echo "repeats stage 0: total number of files = `wc -l ${TMPFILE0}`" | sed -e " s: ${TMPFILE0}::" 1>&2 echo "repeats stage 0: total number of files = $(wc -l $TMPFILE0)" | sed -e "s : ${TMPFILE0}::" 1>&2
fi fi
# grab filesizes and eliminated zero-length files if requested # grab filesizes and eliminated zero-length files if requested
TMPFILE1=`tempname repeats_$$_1` || exit 99 TMPFILE1=$(tempname repeats_$$_1) || exit 99
if [ "$ZEROS" = 'n' ]; then if [ "$ZEROS" = 'n' ]; then
sort -u ${TMPFILE0} | filesize -p | grep -v ' 0$' | sort -t ' ' -k2n,2n | REPSIZE > ${TMPFILE1} sort -u $TMPFILE0 | filesize -p | grep -v ' 0$' | sort -t ' ' -k2n,2n | REPSI ZE > $TMPFILE1
if [ "$VERBOSE" = 'y' ]; then if [ "$VERBOSE" = 'y' ]; then
echo "repeats stage 1: num files with non-unique and non-zero filesize = `wc -l ${TMPFILE1}`" | sed -e "s: ${TMPFILE1}::" 1>&2 echo "repeats stage 1: num files with non-unique and non-zero filesize = $(w c -l $TMPFILE1)" | sed -e "s: ${TMPFILE1}::" 1>&2
fi fi
else else
sort -u ${TMPFILE0} | filesize -p | sort -t ' ' -k2n,2n | REPSIZE > ${T MPFILE1} sort -u $TMPFILE0 | filesize -p | sort -t ' ' -k2n,2n | REPSIZE > $TMPFILE1
if [ "$VERBOSE" = 'y' ]; then if [ "$VERBOSE" = 'y' ]; then
echo "repeats stage 1: num files with non-unique filesize = `wc -l ${TMPFILE 1}`" | sed -e "s: ${TMPFILE1}::" 1>&2 echo "repeats stage 1: num files with non-unique filesize = $(wc -l $TMPFILE 1)" | sed -e "s: ${TMPFILE1}::" 1>&2
fi fi
fi fi
# search for duplicates based on node numbers (eliminate hardlinks) # search for duplicates based on node numbers (eliminate hardlinks)
TMPFILE2=`tempname repeats_$$_2` || exit 99 TMPFILE2=$(tempname repeats_$$_2) || exit 99
if [ "$HARDLINKS" = 'n' ]; then if [ "$HARDLINKS" = 'n' ]; then
sort ${TMPFILE1} | REPHARD -p | sort -t ' ' -k4n,4n -k3n,3n -k2n,2n -k1,1 | REPNODE > ${TMPFILE2} sort $TMPFILE1 | REPHARD -p | sort -t ' ' -k4n,4n -k3n,3n -k2n,2n -k1,1 | REPNODE > $TMPFILE2
if [ "$VERBOSE" = 'y' ]; then if [ "$VERBOSE" = 'y' ]; then
echo "repeats stage 2: num files excluding hardlinks = `wc -l ${TMPFILE2}`" | sed -e "s: ${TMPFILE2}::" 1>&2 echo "repeats stage 2: num files excluding hardlinks = $(wc -l $TMPFILE2)" | sed -e "s: ${TMPFILE2}::" 1>&2
fi fi
else else
mv ${TMPFILE1} ${TMPFILE2} mv $TMPFILE1 $TMPFILE2
fi fi
# search for duplicates based on a partial filehash # search for duplicates based on a partial filehash
TMPFILE3=`tempname repeats_$$_3` || exit 99 TMPFILE3=$(tempname repeats_$$_3) || exit 99
sort ${TMPFILE2} | filehash -v -s -${ALGORITHM} -p -n ${MIDSIZE} | sort -t ' ' sort $TMPFILE2 | filehash -v -s -$ALGORITHM -p -n $MIDSIZE | sort -t ' ' -k2n,2n
-k2n,2n -k3,3 -k1,1 | REPHASH > ${TMPFILE3} -k3,3 -k1,1 | REPHASH > $TMPFILE3
if [ "$VERBOSE" = 'y' ]; then if [ "$VERBOSE" = 'y' ]; then
echo "repeats stage 3: num file pairs with matching digest after ${MIDSIZE} by tes = `wc -l ${TMPFILE3}`" | sed -e "s: ${TMPFILE3}::" 1>&2 echo "repeats stage 3: num file pairs with matching digest after $MIDSIZE byte s = $(wc -l $TMPFILE3)" | sed -e "s: ${TMPFILE3}::" 1>&2
fi fi
# search for duplicates based on a complete filehash # search for duplicates based on a complete filehash
TMPFILE4=`tempname repeats_$$_4` || exit 99 TMPFILE4=$(tempname repeats_$$_4) || exit 99
sed -e 's/\t/\n/' ${TMPFILE3} | sort -u | filehash -v -s -${ALGORITHM} -p | sort sed -e 's/\t/\n/' $TMPFILE3 | sort -u | filehash -v -s -$ALGORITHM -p | sort -t
-t ' ' -k2n,2n -k3,3 -k1,1 | REPHASH > ${TMPFILE4} ' ' -k2n,2n -k3,3 -k1,1 | REPHASH > $TMPFILE4
if [ "$VERBOSE" = 'y' ]; then if [ "$VERBOSE" = 'y' ]; then
echo "repeats stage 4: num file pairs with matching complete digest = `wc -l $ {TMPFILE4}`" | sed -e "s: ${TMPFILE4}::" 1>&2 echo "repeats stage 4: num file pairs with matching complete digest = $(wc -l $TMPFILE4)" | sed -e "s: ${TMPFILE4}::" 1>&2
fi fi
# do final paranoia check if requested # do final paranoia check if requested
if [ "$PARANOID" = 'n' ]; then if [ "$PARANOID" = 'n' ]; then
# make it final: print results # make it final: print results
sort ${TMPFILE4} sort $TMPFILE4
else else
TMPFILE5=`tempname repeats_$$_5` || exit 99 TMPFILE5=$(tempname repeats_$$_5) || exit 99
sort ${TMPFILE4} | REPCMP > ${TMPFILE5} sort $TMPFILE4 | REPCMP > $TMPFILE5
if [ "$VERBOSE" = 'y' ]; then if [ "$VERBOSE" = 'y' ]; then
echo "repeats stage 5: num file pairs based on cmp results = `wc -l ${TMPFIL E5}`" | sed -e "s: ${TMPFILE5}::" 1>&2 echo "repeats stage 5: num file pairs based on cmp results = $(wc -l $TMPFIL E5)" | sed -e "s: ${TMPFILE5}::" 1>&2
fi fi
# make it final: print results # make it final: print results
cat ${TMPFILE5} cat $TMPFILE5
fi fi
# clean up temp files # clean up temp files
rm -f ${TMPWILD} rm -f $TMPFILE0 $TMPFILE1 $TMPFILE2 $TMPFILE3 $TMPFILE4 $TMPFILE5
 End of changes. 33 change blocks. 
42 lines changed or deleted 41 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)