"Fossies" - the Fresh Open Source Software Archive

Member "zim-0.71.1/zim/notebook/index/files.py" (25 Apr 2019, 12457 Bytes) of package /linux/privat/zim-0.71.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. For more information about "files.py" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 0.69.1_vs_0.70.

    1 
    2 
    3 import os
    4 import logging
    5 
    6 logger = logging.getLogger('zim.notebook.index')
    7 
    8 
    9 # In addition allow indexing a page directly - file sync to happen later
   10 # Allow on_move, on_delete etc.
   11 # Allow force re-index
   12 
   13 # Files are indexed relative to the notebook folder, allow for absolute
   14 # path to change, e.g. when notebook is on an USB stick
   15 
   16 # Priority sorted, higher number overrules lower number
   17 STATUS_UPTODATE = 0
   18 STATUS_CHECK = 1
   19 STATUS_NEED_UPDATE = 2
   20 STATUS_NEED_DELETION = 3
   21 
   22 TYPE_FOLDER = 1
   23 TYPE_FILE = 2
   24 
   25 from zim.newfs import File, Folder, SEP
   26 from zim.signals import SignalEmitter
   27 
   28 
   29 class FilesIndexer(SignalEmitter):
   30     '''Class that will update the "files" table in the index based on
   31     changes seen on the file system.
   32 
   33     @signal: C{file-row-inserted (row, file)}: on new file found
   34     @signal: C{file-row-changed (row, file)}: on file content changed
   35     @signal: C{file-row-deleted (row)}: on file deleted
   36 
   37     '''
   38 
   39     # Note that there are no methods for new files or folders,
   40     # only methods for updating.
   41     # Logic is that we always start with update of a parent folder.
   42     # This means root folder always needs to be present in the table.
   43     #
   44     # Exception is a callback to let explicitly add a new file from
   45     # page save in notebook
   46 
   47     __signals__ = {
   48         'file-row-inserted': (None, None, (object,)),
   49         'file-row-changed': (None, None, (object,)),
   50         'file-row-deleted': (None, None, (object,)),
   51     }
   52 
   53     def __init__(self, db, folder):
   54         self.db = db
   55         self.folder = folder
   56 
   57         self.db.executescript('''
   58         CREATE TABLE IF NOT EXISTS files(
   59             id INTEGER PRIMARY KEY,
   60             parent INTEGER REFERENCES files(id),
   61 
   62             path TEXT UNIQUE NOT NULL,
   63             node_type INTEGER NOT NULL,
   64             mtime TIMESTAMP,
   65 
   66             index_status INTEGER DEFAULT 3
   67 
   68             CONSTRAINT no_self_ref CHECK (parent <> id)
   69         );
   70         ''')
   71         row = self.db.execute('SELECT * FROM files WHERE id == 1').fetchone()
   72         if row is None:
   73             c = self.db.execute(
   74                 'INSERT INTO files(parent, path, node_type, index_status)'
   75                 ' VALUES (?, ? , ?, ?)',
   76                 (0, '.', TYPE_FOLDER, STATUS_NEED_UPDATE)
   77             )
   78             assert c.lastrowid == 1 # ensure we start empty
   79 
   80     def is_uptodate(self):
   81         row = self.db.execute(
   82             'SELECT * FROM files WHERE index_status=?',
   83             (STATUS_NEED_UPDATE,)
   84         ).fetchone()
   85         return row is None
   86 
   87     def update_iter(self):
   88         '''Generator function for the actual update'''
   89         for i in self._update_iter_inner():
   90             yield
   91 
   92     def _update_iter_inner(self, prefix=''):
   93         # sort folders before files: first index structure, then contents
   94         # this makes e.g. index links more efficient and robust
   95         # sort by id to ensure parents are found before children
   96         while True:
   97             row = self.db.execute(
   98                 'SELECT id, path, node_type FROM files'
   99                 ' WHERE index_status = ? AND path LIKE ?'
  100                 ' ORDER BY node_type, id',
  101                 (STATUS_NEED_UPDATE, prefix + '%')
  102             ).fetchone()
  103 
  104             if row:
  105                 node_id, path, node_type = row
  106             else:
  107                 break
  108 
  109             try:
  110                 if node_type == TYPE_FOLDER:
  111                     folder = self.folder.folder(path)
  112                     if folder.exists():
  113                         self.update_folder(node_id, folder)
  114                     else:
  115                         self.delete_folder(node_id)
  116                 else:
  117                     file = self.folder.file(path)
  118                     if file.exists():
  119                         self.update_file(node_id, file)
  120                     else:
  121                         self.delete_file(node_id)
  122             except:
  123                 logger.exception('Error while indexing: %s', path)
  124                 self.db.execute( # avoid looping
  125                     'UPDATE files SET index_status = ? WHERE id = ?',
  126                     (STATUS_UPTODATE, node_id)
  127                 )
  128 
  129             self.db.commit()
  130             yield
  131 
  132     def interactive_add_file(self, file):
  133         assert isinstance(file, File) and file.exists()
  134         parent_id = self._add_parent(file.parent())
  135         path = file.relpath(self.folder)
  136         self.db.execute(
  137             'INSERT INTO files(path, node_type, index_status, parent)'
  138             ' VALUES (?, ?, ?, ?)',
  139             (path, TYPE_FILE, STATUS_NEED_UPDATE, parent_id),
  140         )
  141         row = self.db.execute(
  142             'SELECT * FROM files WHERE path=?', (path,)
  143         ).fetchone()
  144 
  145         self.emit('file-row-inserted', row)
  146 
  147         self.update_file(row['id'], file)
  148 
  149     def interactive_add_folder(self, folder):
  150         assert isinstance(folder, Folder) and folder.exists()
  151         parent_id = self._add_parent(folder.parent())
  152         path = folder.relpath(self.folder)
  153         self.db.execute(
  154             'INSERT INTO files(path, node_type, index_status, parent)'
  155             ' VALUES (?, ?, ?, ?)',
  156             (path, TYPE_FOLDER, STATUS_NEED_UPDATE, parent_id),
  157         )
  158         row = self.db.execute(
  159             'SELECT * FROM files WHERE path=?', (path,)
  160         ).fetchone()
  161 
  162         self.emit('file-row-inserted', row)
  163 
  164         self.update_folder(row['id'], folder)
  165         for i in self._update_iter_inner(prefix=path):
  166             pass
  167 
  168     def _add_parent(self, folder):
  169         if folder == self.folder:
  170             return 1
  171 
  172         path = folder.relpath(self.folder)
  173         r = self.db.execute(
  174             'SELECT id FROM files WHERE path=?', (path,)
  175         ).fetchone()
  176         if r is None:
  177             parent_id = self._add_parent(folder.parent()) # recurs
  178             self.db.execute(
  179                 'INSERT INTO files(path, node_type, index_status, parent) '
  180                 'VALUES (?, ?, ?, ?)',
  181                 (path, TYPE_FOLDER, STATUS_CHECK, parent_id)
  182                 # We set status to check because we assume the file being
  183                 # added is the only child, but makes sense to verify later on
  184             )
  185             r = self.db.execute(
  186                 'SELECT id FROM files WHERE path=?', (path,)
  187             ).fetchone()
  188             return r[0]
  189         else:
  190             return r[0]
  191 
  192     def update_folder(self, node_id, folder):
  193         # First invalidate all, so any children that are not found in
  194         # update will be left with this status
  195         logger.debug('Index folder: %s', folder)
  196 
  197         children = {}
  198         for childpath, child_id, mtime, index_status in self.db.execute(
  199             'SELECT path, id, mtime, index_status FROM files WHERE parent = ?',
  200             (node_id,)
  201         ):
  202             children[childpath] = (child_id, mtime, index_status)
  203 
  204         self.db.execute(
  205             'UPDATE files SET index_status = ? WHERE parent = ?',
  206             (STATUS_NEED_DELETION, node_id)
  207         )
  208 
  209         mtime = folder.mtime() # get mtime before getting contents
  210         for child in folder:
  211             path = child.relpath(self.folder)
  212             if path in children:
  213                 child_id, child_mtime, index_status = children[path]
  214                 if index_status == STATUS_NEED_UPDATE or child.mtime() != child_mtime:
  215                     # If the status was "need update" already, don't overrule it
  216                     # here with mtime check - else we break flag_reindex()
  217                     self.db.execute(
  218                         'UPDATE files SET index_status = ? WHERE id = ?',
  219                         (STATUS_NEED_UPDATE, child_id)
  220                     )
  221                 else:
  222                     self.set_node_uptodate(child_id, child_mtime)
  223             else:
  224                 # new child
  225                 node_type = TYPE_FILE if isinstance(child, File) else TYPE_FOLDER
  226                 if node_type == TYPE_FILE:
  227                     self.db.execute(
  228                         'INSERT INTO files(path, node_type, index_status, parent)'
  229                         ' VALUES (?, ?, ?, ?)',
  230                         (path, node_type, STATUS_NEED_UPDATE, node_id),
  231                     )
  232                     row = self.db.execute(
  233                         'SELECT * FROM files WHERE path=?', (path,)
  234                     ).fetchone()
  235                     self.emit('file-row-inserted', row)
  236                 else:
  237                     self.db.execute(
  238                         'INSERT INTO files(path, node_type, index_status, parent)'
  239                         ' VALUES (?, ?, ?, ?)',
  240                         (path, node_type, STATUS_NEED_UPDATE, node_id),
  241                     )
  242 
  243         # Clean up nodes not found in listing
  244         for child_id, child_type in self.db.execute(
  245             'SELECT id, node_type FROM files WHERE parent=? AND index_status=?',
  246             (node_id, STATUS_NEED_DELETION)
  247         ):
  248             if child_type == TYPE_FOLDER:
  249                 self.delete_folder(child_id)
  250             else:
  251                 self.delete_file(child_id)
  252 
  253         self.set_node_uptodate(node_id, mtime)
  254 
  255     def update_file(self, node_id, file):
  256         logger.debug('Index file: %s', file)
  257         # get mtime before contents /signal
  258         self.set_node_uptodate(node_id, file.mtime())
  259         row = self.db.execute('SELECT * FROM files WHERE id=?', (node_id,)).fetchone()
  260         assert row is not None, 'No row matching id: %r' % node_id
  261         self.emit('file-row-changed', row)
  262 
  263     def set_node_uptodate(self, node_id, mtime):
  264         self.db.execute(
  265             'UPDATE files SET index_status = ?, mtime = ? WHERE id = ?',
  266             (STATUS_UPTODATE, mtime, node_id)
  267         )
  268 
  269     def delete_file(self, node_id):
  270         row = self.db.execute('SELECT * FROM files WHERE id=?', (node_id,)).fetchone()
  271         logger.debug('Drop file: %s', row['path'])
  272         self.emit('file-row-deleted', row)
  273         self.db.execute('DELETE FROM files WHERE id == ?', (node_id,))
  274 
  275     def delete_folder(self, node_id):
  276         assert node_id != 1, 'BUG: notebook folder went missing ?'
  277         for child_id, child_type in self.db.execute(
  278             'SELECT id, node_type FROM files WHERE parent == ?',
  279             (node_id,)
  280         ):
  281             if child_type == TYPE_FOLDER:
  282                 self.delete_folder(child_id) # recurs
  283             else:
  284                 self.delete_file(child_id)
  285 
  286         row = self.db.execute('SELECT * FROM files WHERE id=?', (node_id,)).fetchone()
  287         logger.debug('Drop folder: %s', row['path'])
  288         self.db.execute('DELETE FROM files WHERE id == ?', (node_id,))
  289 
  290 
  291 class FilesIndexChecker(object):
  292 
  293     def __init__(self, db, folder):
  294         self.db = db
  295         self.folder = folder
  296 
  297     def queue_check(self, file=None, recursive=True):
  298         if file is None:
  299             file = self.folder
  300         elif not (file == self.folder or file.ischild(self.folder)):
  301             raise ValueError('file must be child of %s' % self.folder)
  302 
  303         # If path is not indexed, find parent that is
  304         while not file == self.folder:
  305             row = self.db.execute(
  306                 'SELECT * FROM files WHERE path = ?',
  307                 (file.relpath(self.folder), )
  308             ).fetchone()
  309             if row is None:
  310                 file = file.parent()
  311             else:
  312                 break # continue with this file or folder
  313 
  314         # Queue check
  315         if recursive and file == self.folder:
  316             self.db.execute(
  317                 'UPDATE files SET index_status = ? WHERE index_status < ?',
  318                 (STATUS_CHECK, STATUS_CHECK)
  319             )
  320         else:
  321             path = '.' if file == self.folder else file.relpath(self.folder)
  322             self.db.execute(
  323                 'UPDATE files SET index_status = ? WHERE path = ? and index_status < ?',
  324                 (STATUS_CHECK, path, STATUS_CHECK)
  325             )
  326             if recursive and isinstance(file, Folder):
  327                 self.db.execute(
  328                     'UPDATE files SET index_status = ? WHERE path LIKE ? and index_status < ?',
  329                     (STATUS_CHECK, path + SEP + '%', STATUS_CHECK)
  330                 )
  331             self.db.commit()
  332 
  333     def check_iter(self):
  334         '''Generator function that walks existing records and flags
  335         records that are not longer valid. Yields in between checks
  336         to allow embedding in a loop.
  337         @returns: Yields C{True} when an out of
  338         date record is found.
  339         '''
  340         # Check for pending updates first
  341         row = self.db.execute(
  342             'SELECT id FROM files WHERE index_status=?',
  343             (STATUS_NEED_UPDATE,)
  344         ).fetchone()
  345         if row is not None:
  346             yield True
  347 
  348         # sort folders before files: first index structure, then contents
  349         # this makes e.g. index links more efficient and robust
  350         # sort by id to ensure parents are found before children
  351 
  352         while True:
  353             row = self.db.execute(
  354                 'SELECT id, path, node_type, mtime, index_status FROM files'
  355                 ' WHERE index_status > ? '
  356                 ' ORDER BY node_type, id',
  357                 (STATUS_UPTODATE,)
  358             ).fetchone()
  359 
  360             if row:
  361                 #~ logger.debug('Check %s', row['path'])
  362                 node_id, path, node_type, mtime, check = row
  363             else:
  364                 break # done
  365 
  366             if check == STATUS_NEED_UPDATE:
  367                 yield True
  368                 continue # let updater handle this first
  369 
  370             try:
  371                 if node_type == TYPE_FOLDER:
  372                     obj = self.folder.folder(path)
  373                 else:
  374                     obj = self.folder.file(path)
  375 
  376                 if not obj.exists():
  377                     check = STATUS_CHECK # update will drop children, no need to recurs anymore
  378                     new_status = STATUS_NEED_UPDATE
  379 
  380                 else:
  381                     if mtime == obj.mtime():
  382                         new_status = STATUS_UPTODATE
  383                     else:
  384                         new_status = STATUS_NEED_UPDATE
  385 
  386                 self.db.execute(
  387                     'UPDATE files SET index_status = ?'
  388                     ' WHERE id = ?',
  389                     (new_status, node_id)
  390                 )
  391                 self.db.commit()
  392 
  393             except:
  394                 logger.exception('Error while indexing: %s', path)
  395                 self.db.execute( # avoid looping
  396                     'UPDATE files SET index_status = ? WHERE id = ?',
  397                     (STATUS_NEED_UPDATE, node_id)
  398                 )
  399                 self.db.commit()
  400                 new_status = STATUS_NEED_UPDATE
  401 
  402             yield new_status == STATUS_NEED_UPDATE
  403 
  404 
  405 
  406 class TestFilesDBTable(object):
  407     # Mixin for test cases, defined here to have all SQL in one place
  408 
  409     def assertFilesDBConsistent(self, db):
  410         for row in db.execute('SELECT * FROM files'):
  411             if row['id'] > 1:
  412                 parent = db.execute(
  413                     'SELECT * FROM files WHERE id=?',
  414                     (row['id'],)
  415                 ).fetchone()
  416                 self.assertIsNotNone(parent,
  417                     'Missing parent for %s' % row['path'])
  418 
  419 
  420     def assertFilesDBEquals(self, db, paths):
  421         import os
  422         rows = db.execute('SELECT * FROM files WHERE id>1').fetchall()
  423 
  424         in_db = dict((r['path'], r['node_type']) for r in rows)
  425         wanted = dict(
  426             (p.strip(SEP), TYPE_FOLDER if p.endswith(SEP) else TYPE_FILE)
  427                 for p in paths
  428         )
  429 
  430         self.assertEqual(in_db, wanted)