"Fossies" - the Fresh Open Source Software Archive

Member "Tardis-1.2.1/tools/checkDB2.py" (9 Jun 2021, 9950 Bytes) of package /linux/privat/Tardis-1.2.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. For more information about "checkDB2.py" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 1.1.5_vs_1.2.1.

    1 #! /usr/bin/env python3
    2 # vim: set et sw=4 sts=4 fileencoding=utf-8:
    3 #
    4 # Tardis: A Backup System
    5 # Copyright 2013-2020, Eric Koldinger, All Rights Reserved.
    6 # kolding@washington.edu
    7 #
    8 # Redistribution and use in source and binary forms, with or without
    9 # modification, are permitted provided that the following conditions are met:
   10 #
   11 #     * Redistributions of source code must retain the above copyright
   12 #       notice, this list of conditions and the following disclaimer.
   13 #     * Redistributions in binary form must reproduce the above copyright
   14 #       notice, this list of conditions and the following disclaimer in the
   15 #       documentation and/or other materials provided with the distribution.
   16 #     * Neither the name of the copyright holder nor the
   17 #       names of its contributors may be used to endorse or promote products
   18 #       derived from this software without specific prior written permission.
   19 #
   20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   21 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
   24 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   30 # POSSIBILITY OF SUCH DAMAGE.
   31 
   32 import time
   33 import sys
   34 import argparse
   35 import magic
   36 import os
   37 import io
   38 import json
   39 
   40 import Tardis
   41 from Tardis import Util
   42 from Tardis import TardisCrypto
   43 from Tardis import Config
   44 from Tardis import CacheDir
   45 from Tardis import CompressedBuffer
   46 
   47 args = None
   48 
   49 #tardis.log:      text/plain
   50 #tardis.log.br:   application/octet-stream
   51 #tardis.log.bz2:  application/x-bzip2
   52 #tardis.log.gz:   application/gzip
   53 #tardis.log.lzma: application/x-lzma
   54 #tardis.log.xz:   application/x-xz
   55 #tardis.log.zst:  application/x-zstd
   56  
   57 def processArgs():
   58     parser = argparse.ArgumentParser(description='Check contents of the DB against the file system', fromfile_prefix_chars='@', formatter_class=Util.HelpFormatter, add_help=False)
   59 
   60     (_, remaining) = Config.parseConfigOptions(parser)
   61 
   62     Config.addCommonOptions(parser)
   63     Config.addPasswordOptions(parser)
   64 
   65     #parser.add_argument('--output', '-o',   default=None, dest='output', required=True,           help='Output file')
   66 
   67     parser.add_argument('--authenticate', '-a', default='none', nargs='?', const='size',
   68                         dest='authenticate', choices=['none', 'size', 'all'], help='Authenticate files with incorrect sizes')
   69     parser.add_argument('--output', '-o', default=None, dest='output', type=argparse.FileType('w'), help='Output data')
   70 
   71     parser.add_argument('--verbose', '-v',  action='count', default=0, dest='verbose',                  help='Increase the verbosity')
   72     parser.add_argument('--version',        action='version', version='%(prog)s ' + Tardis.__versionstring__,    help='Show the version')
   73     parser.add_argument('--help', '-h',     action='help')
   74 
   75     Util.addGenCompletions(parser)
   76 
   77     return parser.parse_args(remaining)
   78 
   79 def listChecksums(tardis, chunksize=10000):
   80     rs = tardis.conn.execute("SELECT Checksum, DiskSize, Basis, Compressed, Encrypted, Added FROM Checksums WHERE isFile = 1 ORDER BY Checksum")
   81     data = rs.fetchmany(chunksize)
   82     while data:
   83         for row in data:
   84             yield(row[0], row[1], row[2], row[3], row[4], row[5])
   85         data = rs.fetchmany(chunksize)
   86 
   87 def decryptHeader(crypt, infile):
   88    # Get the IV, if it's not specified.
   89     infile.seek(0, os.SEEK_SET)
   90     iv = infile.read(crypt.ivLength)
   91 
   92     # Create the cipher
   93     encryptor = crypt.getContentEncryptor(iv)
   94 
   95     ct = infile.read(64 * 1024)
   96     pt = encryptor.decrypt(ct, False)
   97     outstream = io.BytesIO(pt)
   98     return outstream
   99 
  100 def authenticateFile(infile, size, crypt):
  101     # Get the IV, if it's not specified.
  102     infile.seek(0, os.SEEK_SET)
  103     iv = infile.read(crypt.ivLength)
  104 
  105     #logger.debug("Got IV: %d %s", len(iv), binascii.hexlify(iv))
  106 
  107     # Create the cipher
  108     encryptor = crypt.getContentEncryptor(iv)
  109 
  110     contentSize = size - crypt.ivLength - encryptor.getDigestSize()
  111     #self.logger.info("Computed Size: %d.  Specified size: %d.  Diff: %d", ctSize, size, (ctSize - size))
  112 
  113     try:
  114         rem = contentSize
  115         blocksize = 64 * 1024
  116         last = False
  117         while rem > 0:
  118             readsize = blocksize if rem > blocksize else rem
  119             if rem <= blocksize:
  120                 last = True
  121             ct = infile.read(readsize)
  122             pt = encryptor.decrypt(ct, last)
  123             if last:
  124                 # ie, we're the last block
  125                 digest = infile.read(encryptor.getDigestSize())
  126                 #logger.debug("Got HMAC Digest: %d %s", len(digest), binascii.hexlify(digest))
  127                 readsize += len(digest)
  128                 try:
  129                     encryptor.verify(digest)
  130                 except:
  131                     #logger.debug("HMAC's:  File: %-128s Computed: %-128s", binascii.hexlify(digest), binascii.hexlify(encryptor.digest()))
  132                     print("HMAC's:  File: %-128s Computed: %-128s", binascii.hexlify(digest), binascii.hexlify(encryptor.digest()))
  133                     return False
  134             rem -= readsize
  135         return True
  136     except:
  137         return False
  138 
  139 #tardis.log:      text/plain
  140 #tardis.log.br:   application/octet-stream
  141 #tardis.log.bz2:  application/x-bzip2
  142 #tardis.log.gz:   application/gzip
  143 #tardis.log.lzma: application/x-lzma
  144 #tardis.log.xz:   application/x-xz
  145 #tardis.log.zst:  application/x-zstd
  146 
  147 def checkCompression(mimetype, compresstype):
  148     if (compresstype == 'zlib' or compresstype == True or compresstype == 1):
  149         if mimetype != 'application/zlib':
  150             return False
  151     elif compresstype == 'zstd':
  152         if mimetype != 'application/x-std':
  153             return False
  154     elif compresstype == 'bzip':
  155         if mimetype != 'application/x-bzip2':
  156             return False
  157     elif compresstype == 'lzma': 
  158         if mimetype != 'application/x-lzma' and mimetype != 'application/x-xz':
  159             return False
  160     elif not (compresstype == 'none' or compresstype == False):
  161         print(f"Unknown compression type: {compresstype}")
  162     return True
  163 
  164 missing = []
  165 zero = []
  166 mismatch = []
  167 notdelta = []
  168 notauth = []
  169 badcomp = []
  170 sizes = {}
  171 
  172 def checkFile(cache, crypt, checksum, size, basis, compressed, encrypted, added, authCond):
  173     fsize = cache.size(checksum)
  174     if not cache.exists(checksum):
  175         #print(f"{checksum}: does not exist")
  176         missing.append(checksum)
  177     elif fsize == 0:
  178         print(f"{checksum} is empty")
  179         zero.append(checksum)
  180     else:
  181         authenticate = (authCond == 'all')
  182         if fsize != size:
  183             print(f"{checksum}: size mismatch Expected: {size}, found {fsize} ({fsize - size})-- {added} -- {basis is not None} ")
  184             mismatch.append((checksum, size, fsize, compressed))
  185             sizes.setdefault((fsize - size), []).append(checksum)
  186             if authCond != 'none':
  187                 authenticate = True
  188 
  189             try:
  190                 instream = decryptHeader(crypt, cache.open(checksum, "rb"))
  191                 uc = CompressedBuffer.UncompressedBufferedReader(instream, compressor="none")
  192                 data = uc.read(256)
  193                 mimetype = magic.from_buffer(data, mime=True)
  194                 if not checkCompression(mimetype, compressed):
  195                     print(f"{checksum} has wrong compression type.   Expected {compressed}.  Found {mimetype}")
  196                     badcomp.append((checksum, compressed, mimetype))
  197             except Exception as e:
  198                 print(f"Caught exception: {compressed} {e}")
  199                 badcomp.append((checksum, compressed, "error"))
  200 
  201         elif basis:
  202             #print(f"{checksum} -- {compressed} {encrypted}", flush=True)
  203             instream = decryptHeader(crypt, cache.open(checksum, "rb"))
  204             uc = CompressedBuffer.UncompressedBufferedReader(instream, compressor=compressed)
  205             data = uc.read(256)
  206             kind = magic.from_buffer(data)
  207             if kind != 'rdiff network-delta data':
  208                 print(f"{checksum}: Not a delta: {kind}")
  209                 notdelta.append((checksum, kind))
  210 
  211         if authenticate:
  212             with cache.open(checksum, "rb") as f:
  213                 if not authenticateFile(f, fsize, crypt):
  214                     print(f"{checksum} did not authenticate")
  215                     notauth.append(checksum)
  216 
  217 def main():
  218     global args
  219     args = processArgs()
  220 
  221     password = Util.getPassword(args.password, args.passwordfile, args.passwordprog, prompt="Password for %s: " % (args.client))
  222     (tardis, cache, crypt) = Util.setupDataConnection(args.database, args.client, password, args.keys, args.dbname, args.dbdir)
  223 
  224     count = 0
  225     for (checksum, size, basis, compressed, encrypted, added) in listChecksums(tardis):
  226         count += 1
  227         checkFile(cache, crypt, checksum, size, basis, compressed, encrypted, added, args.authenticate)
  228 
  229     print(f"Files: {count} Missing Files: {len(missing)} Empty: {len(zero)} Size mismatch: {len(mismatch)} Not Delta: {len(notdelta)} Wrong Comp: {len(badcomp)}")
  230     #for i in sizes:
  231     #    print(f"   Size: {i}: Count {len(sizes[i])}")
  232 
  233     if args.output:
  234         out = {
  235             "missing": missing,
  236             "empty": zero,
  237             "size": mismatch,
  238             "notauth": notauth,
  239             "notdelta": notdelta,
  240             "badcomp":  badcomp
  241             }
  242         json.dump(out, args.output, indent=2)
  243 
  244 
  245     return 0
  246 
  247 if __name__ == "__main__":
  248     sys.exit(main())