"Fossies" - the Fresh Open Source Software Archive

Member "Tardis-1.2.1/tools/checkDB.py" (9 Jun 2021, 4550 Bytes) of package /linux/privat/Tardis-1.2.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. For more information about "checkDB.py" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 1.1.5_vs_1.2.1.

    1 #! /usr/bin/env python3
    2 # vim: set et sw=4 sts=4 fileencoding=utf-8:
    3 #
    4 # Tardis: A Backup System
    5 # Copyright 2013-2020, Eric Koldinger, All Rights Reserved.
    6 # kolding@washington.edu
    7 #
    8 # Redistribution and use in source and binary forms, with or without
    9 # modification, are permitted provided that the following conditions are met:
   10 #
   11 #     * Redistributions of source code must retain the above copyright
   12 #       notice, this list of conditions and the following disclaimer.
   13 #     * Redistributions in binary form must reproduce the above copyright
   14 #       notice, this list of conditions and the following disclaimer in the
   15 #       documentation and/or other materials provided with the distribution.
   16 #     * Neither the name of the copyright holder nor the
   17 #       names of its contributors may be used to endorse or promote products
   18 #       derived from this software without specific prior written permission.
   19 #
   20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   21 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
   24 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   30 # POSSIBILITY OF SUCH DAMAGE.
   31 
   32 import os, os.path
   33 import sys
   34 import sqlite3
   35 import gettext
   36 import glob
   37 
   38 from Tardis import CacheDir
   39 from Tardis import Util
   40 
   41 def hexcount(lower, upper, digits):
   42     fmt = "%0" + str(digits) + "x"
   43     for i in range(lower, upper):
   44         out = fmt % (i)
   45         yield out
   46 
   47 def getdbfiles(conn, prefix):
   48     prefix += "%"
   49     ret = set()
   50     cur = conn.execute('SELECT Checksum FROM Checksums WHERE Checksum LIKE :prefix AND IsFile = 1', {"prefix": prefix})
   51     while True:
   52         batch = cur.fetchmany()
   53         if not batch:
   54             break
   55         ret.update([i[0] for i in batch])
   56     return ret
   57 
   58 def hasExt(x):
   59     (_, e) = os.path.splitext(x)
   60     return (e is not '' and e is not None)
   61 
   62 def main():
   63     d = sys.argv[1]
   64     cd = CacheDir.CacheDir(d, create=False)
   65 
   66 
   67     db = os.path.join(d, "tardis.db")
   68     print("Opening DB: " + db)
   69     conn = sqlite3.connect(db)
   70     print("Connected")
   71 
   72     missingData = set()
   73     unreferenced = set()
   74 
   75     for i in hexcount(0, 16 ** int(cd.partsize), int(cd.partsize)):
   76         print(f"Starting: {i}  ", end='')
   77         # Get all the files which start with i
   78         dbfiles = getdbfiles(conn, i)
   79         alldatafiles = set()
   80         # Grab each subdirectory, 
   81 
   82         path = os.path.join(d, i)
   83         try:
   84             if os.path.isdir(path):
   85                 pattern = ('?' * int(cd.partsize) + os.sep) * (int(cd.parts)-1) + "*"
   86                 pattern = os.path.join(d, i, pattern)
   87                 #print(pattern)
   88 
   89                 # contents = os.listdir(path)
   90                 contents = list(map(lambda x: os.path.basename(x), glob.glob(pattern, recursive=True)))
   91                 # print(contents)
   92                 print(f"{len(contents)} files")
   93                 metafiles = set(filter(hasExt, contents))
   94                 datafiles = set([x for x in contents if not hasExt(x)])
   95 
   96                 alldatafiles.update(datafiles)
   97 
   98                 #print path, " :: ", len(contents), len(metafiles), len(datafiles), " :: ", len(dbfiles)
   99                 # Process the signature files
  100                 for f in metafiles:
  101                     (data, _) = os.path.splitext(f)
  102                     if not data in datafiles:
  103                         print("{} without matching data file".format(f))
  104             else:
  105                 print()
  106         except Exception as e:
  107             print("Caught exception proecssing directory {}: {}".format(path), e)
  108 
  109         # Find missing data files
  110         missing = dbfiles.difference(alldatafiles)
  111         missingData.update(missing)
  112         for i in missing:
  113             print("Missing data file {}".format(i))
  114 
  115         # Find files which aren't in the DB
  116         unref = alldatafiles.difference(dbfiles)
  117         unreferenced.update(unref)
  118         for i in unref:
  119             print("Unreferenced data file: {}".format(i))
  120 
  121     conn.close()
  122 
  123 if __name__ == "__main__":
  124     main()