"Fossies" - the Fresh Open Source Software Archive

Member "ncdc-1.22.1/src/fl_local.c" (6 Apr 2019, 36427 Bytes) of package /linux/privat/ncdc-1.22.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "fl_local.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 1.21_vs_1.22.

    1 /* ncdc - NCurses Direct Connect client
    2 
    3   Copyright (c) 2011-2019 Yoran Heling
    4 
    5   Permission is hereby granted, free of charge, to any person obtaining
    6   a copy of this software and associated documentation files (the
    7   "Software"), to deal in the Software without restriction, including
    8   without limitation the rights to use, copy, modify, merge, publish,
    9   distribute, sublicense, and/or sell copies of the Software, and to
   10   permit persons to whom the Software is furnished to do so, subject to
   11   the following conditions:
   12 
   13   The above copyright notice and this permission notice shall be included
   14   in all copies or substantial portions of the Software.
   15 
   16   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   17   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   18   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   19   IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
   20   CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   21   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   22   SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   23 
   24 */
   25 
   26 
   27 #include "ncdc.h"
   28 #include "fl_local.h"
   29 
   30 
   31 char           *fl_local_list_file;
   32 fl_list_t      *fl_local_list  = NULL;
   33 GQueue         *fl_refresh_queue = NULL;
   34 time_t          fl_refresh_last = 0; // time when the last full file list refresh has been queued
   35 static gboolean fl_needflush = FALSE;
   36 // Index of the files in fl_local_list. Key = TTH root, value = GSList of files.
   37 static GHashTable *fl_hash_index;
   38 guint64         fl_local_list_size;   // total share size, minus duplicate files
   39 int             fl_local_list_length; // total number of unique files in the share
   40 
   41 static GThreadPool *fl_scan_pool;
   42 static GThreadPool *fl_hash_pool;
   43 
   44 GHashTable       *fl_hash_queue = NULL; // set of files-to-hash
   45 guint64           fl_hash_queue_size = 0;
   46 static fl_list_t *fl_hash_cur = NULL;   // most recent file initiated for hashing
   47 ratecalc_t        fl_hash_rate;
   48 static guint64    fl_hash_reset = 0; // increased when fl_hash_cur is removed from the queue (to stop current hash operation)
   49 static GMutex    *fl_hash_resetlock;
   50 static GCond     *fl_hash_resetcond;
   51 
   52 #define TTH_BUFSIZE (512*1024)
   53 
   54 
   55 // Utility functions
   56 
   57 // Get full path to an item in our list. Result should be free'd. This function
   58 // isn't particularly fast.
   59 char *fl_local_path(fl_list_t *fl) {
   60   if(!fl->parent->parent)
   61     return g_strdup(db_share_path(fl->name));
   62   char *tmp, *path = g_strdup(fl->name);
   63   fl_list_t *cur = fl->parent;
   64   while(cur->parent && cur->parent->parent) {
   65     tmp = path;
   66     path = g_build_filename(cur->name, path, NULL);
   67     g_free(tmp);
   68     cur = cur->parent;
   69   }
   70   tmp = g_build_filename(db_share_path(cur->name), path, NULL);
   71   g_free(path);
   72   return tmp;
   73 }
   74 
   75 
   76 // Similar to fl_list_from_path, except this function starts from the root of
   77 // the local filelist, and also accepts filesystem paths. In the latter case,
   78 // the path must be absolute and "real" (i.e., what realpath() would return),
   79 // since that is the path that is stored in the config file. This function
   80 // makes no attempt to convert the given path into a real path.
   81 fl_list_t *fl_local_from_path(const char *path) {
   82   fl_list_t *n = fl_list_from_path(fl_local_list, path);
   83   if(!n && path[0] == '/') {
   84     db_share_item_t *l = db_share_list();
   85     for(; l->name; l++)
   86       if(strncmp(path, l->path, strlen(l->path)) == 0)
   87         break;
   88     if(l->name) {
   89       char *npath = g_strconcat(l->name, path+strlen(l->path), NULL);
   90       n = fl_list_from_path(fl_local_list, npath);
   91       g_free(npath);
   92     }
   93   }
   94   return n;
   95 }
   96 
   97 
   98 // Auto-complete for fl_local_from_path()
   99 void fl_local_suggest(char *path, char **sug) {
  100   fl_list_suggest(fl_local_list, path, sug);
  101   if(!sug[0])
  102     path_suggest(path, sug);
  103 }
  104 
  105 
  106 // get files with the (raw) TTH. Result does not have to be freed.
  107 GSList *fl_local_from_tth(const char *root) {
  108   return g_hash_table_lookup(fl_hash_index, root);
  109 }
  110 
  111 
  112 // Fill a bloom filter with all local hashes
  113 void fl_local_bloom(bloom_t *b) {
  114   GHashTableIter iter;
  115   const char *tth;
  116   g_hash_table_iter_init(&iter, fl_hash_index);
  117   while(g_hash_table_iter_next(&iter, (gpointer *)&tth, NULL))
  118     bloom_add(b, tth);
  119 }
  120 
  121 
  122 // should be run from a timer. periodically flushes all unsaved data to disk.
  123 gboolean fl_flush(gpointer dat) {
  124   if(fl_needflush) {
  125     // save our file list
  126     GError *err = NULL;
  127     if(!fl_save(fl_local_list, var_get(0, VAR_cid), 0, FALSE, NULL, fl_local_list_file, &err)) {
  128       // this is a pretty fatal error... oh well, better luck next time
  129       ui_mf(uit_main_tab, UIP_MED, "Error saving file list: %s", err->message);
  130       g_error_free(err);
  131     }
  132   }
  133   fl_needflush = FALSE;
  134   return TRUE;
  135 }
  136 
  137 // are we currently refreshing the share?
  138 gboolean fl_is_refreshing(void) {
  139   return fl_refresh_queue && fl_refresh_queue->head;
  140 }
  141 
  142 // Hash index interface. These operate on fl_hash_index and make sure
  143 // fl_local_list_size and _length stay correct.
  144 
  145 // Add to the hash index
  146 static void fl_hashindex_insert(fl_list_t *fl) {
  147   GSList *cur = g_hash_table_lookup(fl_hash_index, fl->tth);
  148   if(cur) {
  149     g_return_if_fail(cur == g_slist_insert(cur, fl, 1)); // insert item without modifying the pointer
  150   } else {
  151     cur = g_slist_prepend(cur, fl);
  152     g_hash_table_insert(fl_hash_index, fl->tth, cur);
  153     fl_local_list_size += fl->size;
  154   }
  155   fl_local_list_length = g_hash_table_size(fl_hash_index);
  156 }
  157 
  158 
  159 // ...and remove a file. This is done when a file is actually removed from the
  160 // share, or when its TTH information has been invalidated.
  161 static void fl_hashindex_del(fl_list_t *fl) {
  162   if(!fl->hastth)
  163     return;
  164   GSList *cur = g_hash_table_lookup(fl_hash_index, fl->tth);
  165   fl->hastth = FALSE;
  166 
  167   cur = g_slist_remove(cur, fl);
  168   if(!cur) {
  169     g_hash_table_remove(fl_hash_index, fl->tth);
  170     fl_local_list_size -= fl->size;
  171   // there's another file with the same TTH.
  172   } else
  173     g_hash_table_replace(fl_hash_index, ((fl_list_t *)cur->data)->tth, cur);
  174   fl_local_list_length = g_hash_table_size(fl_hash_index);
  175 }
  176 
  177 
  178 
  179 
  180 
  181 // Scanning directories
  182 
  183 // Note: The `file' structure points to a (sub-)item in fl_local_list, and will
  184 // be accessed from both the scan thread and the main thread. It is therefore
  185 // important that no changes are made to the local file list while the scan
  186 // thread is active.
  187 typedef struct fl_scan_t {
  188   fl_list_t **file, **res;
  189   char **path;
  190   GRegex *excl_regex;
  191   gboolean emptydirs;
  192   gboolean inc_hidden;
  193   gboolean symlink;
  194   gboolean (*donefun)(gpointer);
  195 } fl_scan_t;
  196 
  197 
  198 // Removes duplicate files (that is, files with the same name in a
  199 // case-insensitive context) from a dirtectory.
  200 static void fl_scan_rmdupes(fl_list_t *fl, const char *vpath) {
  201   int i = 1;
  202   while(i<fl->sub->len) {
  203     fl_list_t *a = g_ptr_array_index(fl->sub, i-1);
  204     fl_list_t *b = g_ptr_array_index(fl->sub, i);
  205     if(fl_list_cmp_strict(a, b) == 0) {
  206       char *tmp = g_build_filename(vpath, b->name, NULL);
  207       ui_mf(uit_main_tab, UIP_MED, "Not sharing \"%s\": Other file with same name (but different case) already shared.", tmp);
  208       fl_list_remove(b);
  209       g_free(tmp);
  210     } else
  211       i++;
  212   }
  213 }
  214 
  215 
  216 // Queue an id to be removed from the hash files. Files are removed in a batch
  217 // of DELETE FROM queries in a single transaction. This is significantly faster
  218 // than using a separate transaction for each DELETE.
  219 static void fl_scan_invalidate(gint64 id, gboolean force_flush) {
  220   static gint64 rmids[50];
  221   static int i = 0;
  222 
  223   if(id)
  224     rmids[i++] = id;
  225 
  226   if(i >= 50 || (force_flush && i > 0)) {
  227     db_fl_rmfiles(rmids, i);
  228     i = 0;
  229   }
  230 }
  231 
  232 
  233 // Fetches TTH information either from the database or from *oldpar, and
  234 // invalidates this data if the file has changed.
  235 static void fl_scan_check(fl_list_t *oldpar, fl_list_t *new, const char *real) {
  236   time_t oldlastmod;
  237   guint64 oldsize;
  238   char oldhash[24];
  239   gint64 oldid = 0;
  240 
  241   fl_list_t *old = oldpar && oldpar->sub ? fl_list_file_strict(oldpar, new) : NULL;
  242   // Get from the previous in-memory structure
  243   if(old && fl_list_getlocal(old).id) {
  244     oldid = fl_list_getlocal(old).id;
  245     oldlastmod = fl_list_getlocal(old).lastmod;
  246     oldsize = old->size;
  247     memcpy(oldhash, old->tth, 24);
  248   // Otherwise, do a database lookup on the file path
  249   } else
  250     oldid = db_fl_getfile(real, &oldlastmod, &oldsize, oldhash);
  251 
  252   // Check for file change
  253   if(oldid && (oldlastmod < fl_list_getlocal(new).lastmod || oldsize != new->size)) {
  254     g_debug("fl: Dropping hash information for `%s': file has changed.", real);
  255     fl_scan_invalidate(oldid, FALSE);
  256   // Otherwise, update *new
  257   } else if(oldid) {
  258     new->hastth = TRUE;
  259     memcpy(new->tth, oldhash, 24);
  260     fl_list_getlocal(new).lastmod = oldlastmod;
  261     fl_list_getlocal(new).id = oldid;
  262   }
  263 }
  264 
  265 
  266 // *name is in filesystem encoding. For *path and *vpath see fl_scan_dir().
  267 static fl_list_t *fl_scan_item(fl_list_t *old, const char *path, const char *vpath, const char *name, fl_scan_t *opts) {
  268   char *uname = NULL;  // name-to-UTF8
  269   char *vcpath = NULL; // vpath + uname
  270   char *ename = NULL;  // uname-to-filesystem
  271   char *cpath = NULL;  // path + ename
  272   char *real = NULL;   // path_expand(cpath)-to-UTF8
  273   fl_list_t *node = NULL;
  274 
  275   // Try to get a UTF-8 filename
  276   uname = g_filename_to_utf8(name, -1, NULL, NULL, NULL);
  277   if(!uname)
  278     uname = g_filename_display_name(name);
  279 
  280   // Check for share_exclude as soon as we have the confname
  281   if(opts->excl_regex && g_regex_match(opts->excl_regex, uname, 0, NULL))
  282     goto done;
  283 
  284   // Get the virtual path (for reporting purposes)
  285   vcpath = g_build_filename(vpath, uname, NULL);
  286 
  287   // Check that the UTF-8 filename can be converted back to something we can
  288   // access on the filesystem. If it can't be converted back, we won't share
  289   // the file at all. Keeping track of a raw-to-UTF-8 filename lookup table
  290   // isn't worth the effort.
  291   ename = g_filename_from_utf8(uname, -1, NULL, NULL, NULL);
  292   if(!ename) {
  293     ui_mf(uit_main_tab, UIP_MED, "Error reading directory entry in \"%s\": Invalid encoding.", vcpath);
  294     goto done;
  295   }
  296 
  297   // Get cpath and try to stat() the file
  298   cpath = g_build_filename(path, ename, NULL);
  299   struct stat dat;
  300   int r = opts->symlink ? stat(cpath, &dat) : lstat(cpath, &dat);
  301   if(r < 0 || S_ISLNK(dat.st_mode) || !(S_ISREG(dat.st_mode) || S_ISDIR(dat.st_mode))) {
  302     if(r < 0)
  303       ui_mf(uit_main_tab, UIP_MED, "Error stat'ing \"%s\": %s", vcpath, g_strerror(errno));
  304     else if(!S_ISLNK(dat.st_mode))
  305       ui_mf(uit_main_tab, UIP_MED, "Not sharing \"%s\": Neither file nor directory.", vcpath);
  306     goto done;
  307   }
  308 
  309   // Get the path_expand() (for lookup in the database)
  310   // TODO: this path is only used if fl_scan_check() can't find the item in the
  311   // old fl_list structure. It may be more efficient to only try to execute
  312   // this code when this is the case.
  313   char *tmp = path_expand(cpath);
  314   if(!tmp) {
  315     ui_mf(uit_main_tab, UIP_MED, "Error getting file path for \"%s\": %s", vcpath, g_strerror(errno));
  316     goto done;
  317   }
  318   real = g_filename_to_utf8(tmp, -1, NULL, NULL, NULL);
  319   g_free(tmp);
  320   if(!real) {
  321     ui_mf(uit_main_tab, UIP_MED, "Error getting file path for \"%s\": %s", vcpath, "Encoding error.");
  322     goto done;
  323   }
  324 
  325   // create the node
  326   node = fl_list_create(uname, S_ISREG(dat.st_mode) ? TRUE : FALSE);
  327   if(S_ISREG(dat.st_mode)) {
  328     node->isfile = TRUE;
  329     node->size = dat.st_size;
  330     fl_list_getlocal(node).lastmod = dat.st_mtime;
  331   }
  332 
  333   // Fetch id, tth, and hashtth fields.
  334   if(node->isfile)
  335     fl_scan_check(old, node, real);
  336 
  337 done:
  338   g_free(uname);
  339   g_free(vcpath);
  340   g_free(cpath);
  341   g_free(ename);
  342   g_free(real);
  343   return node;
  344 }
  345 
  346 
  347 // recursive
  348 // Doesn't handle paths longer than PATH_MAX, but I don't think it matters all that much.
  349 // *path is the filesystem path in filename encoding, vpath is the virtual path in UTF-8.
  350 static void fl_scan_dir(fl_list_t *parent, fl_list_t *old, const char *path, const char *vpath, fl_scan_t *opts) {
  351   GError *err = NULL;
  352   GDir *dir = g_dir_open(path, 0, &err);
  353   if(!dir) {
  354     ui_mf(uit_main_tab, UIP_MED, "Error reading directory \"%s\": %s", vpath, g_strerror(errno));
  355     g_error_free(err);
  356     return;
  357   }
  358   const char *name;
  359   while((name = g_dir_read_name(dir))) {
  360     if(strcmp(name, ".") == 0 || strcmp(name, "..") == 0)
  361       continue;
  362     if(!opts->inc_hidden && name[0] == '.')
  363       continue;
  364     // check with *excl, stat and create
  365     fl_list_t *item = fl_scan_item(old, path, vpath, name, opts);
  366     // and add it
  367     if(item)
  368       fl_list_add(parent, item, -1);
  369   }
  370   g_dir_close(dir);
  371 
  372   // Sort
  373   fl_list_sort(parent);
  374   fl_scan_rmdupes(parent, vpath);
  375 
  376   // check for directories (outside of the above loop, to avoid having too many
  377   // directories opened at the same time. Costs some extra CPU cycles, though...)
  378   int i;
  379   for(i=0; i<parent->sub->len; i++) {
  380     fl_list_t *cur = g_ptr_array_index(parent->sub, i);
  381     if(!cur->isfile) {
  382       char *enc = g_filename_from_utf8(cur->name, -1, NULL, NULL, NULL);
  383       char *cpath = g_build_filename(path, enc, NULL);
  384       char *virtpath = g_build_filename(vpath, cur->name, NULL);
  385       cur->sub = g_ptr_array_new_with_free_func(fl_list_free);
  386       fl_scan_dir(cur, old && old->sub ? fl_list_file_strict(old, cur) : NULL, cpath, virtpath, opts);
  387       g_free(virtpath);
  388       g_free(cpath);
  389       g_free(enc);
  390     }
  391     // Removes this item if it is an empty directory and if !opts->emptydirs.
  392     if(!opts->emptydirs && !cur->isfile && !cur->sub->len) {
  393       fl_list_remove(cur);
  394       i--; // Make sure that the index doesn't change with the next iteration
  395     }
  396   }
  397 }
  398 
  399 
  400 // Must be called in a separate thread.
  401 static void fl_scan_thread(gpointer data, gpointer udata) {
  402   fl_scan_t *args = data;
  403 
  404   int i, len = g_strv_length(args->path);
  405   for(i=0; i<len; i++) {
  406     fl_list_t *cur = fl_list_create("", FALSE);
  407     char *tmp = g_filename_from_utf8(args->path[i], -1, NULL, NULL, NULL);
  408     cur->sub = g_ptr_array_new_with_free_func(fl_list_free);
  409     fl_scan_dir(cur, args->file[i], tmp, args->path[i], args);
  410     g_free(tmp);
  411     args->res[i] = cur;
  412   }
  413 
  414   fl_scan_invalidate(0, TRUE);
  415   g_idle_add_full(G_PRIORITY_HIGH_IDLE, args->donefun, args, NULL);
  416 }
  417 
  418 
  419 
  420 
  421 
  422 
  423 // Hashing files
  424 
  425 
  426 // This struct is passed from the main thread to the hasher and back with modifications.
  427 typedef struct fl_hash_t {
  428   fl_list_t *file; // only accessed from main thread
  429   char *path;        // owned by main thread, read from hash thread
  430   guint64 filesize;  // set by main thread
  431   char root[24];     // set by hash thread
  432   GError *err;       // set by hash thread
  433   time_t lastmod;    // set by hash thread
  434   gint64 id;         // set by hash thread
  435   gdouble time;      // set by hash thread
  436   guint64 lastreset; // last value of fl_hash_reset when starting this thread
  437 } fl_hash_t;
  438 
  439 // Maximum number of levels, including root (level 0).  The ADC docs specify
  440 // that this should be at least 7, and everyone else uses 10. Since keeping 10
  441 // levels of hash data does eat up more space than I'd be willing to spend on
  442 // it, let's use 8.
  443 #if INTERFACE
  444 #define fl_hash_keep_level 8
  445 #endif
  446 /* Space requirements per file for different levels (in bytes, min - max) and
  447  * the block size of a 1GiB file.
  448  *
  449  * 10  6144 - 12288    2 MiB
  450  *  9  3072 -  6144    4 MiB
  451  *  8  1536 -  3072    8 MiB
  452  *  7   768 -  1536   16 MiB
  453  *  6   384 -   768   32 MiB
  454  */
  455 
  456 // there's no need for better granularity than this
  457 #define fl_hash_max_granularity G_GUINT64_CONSTANT(64 * 1024)
  458 
  459 
  460 
  461 // adding/removing items from the files-to-be-hashed queue
  462 // _append() assumes that fl->hastth is false.
  463 #define fl_hash_queue_append(fl) do {\
  464     g_warn_if_fail(!fl->hastth);\
  465     gboolean start = !g_hash_table_size(fl_hash_queue);\
  466     if(start || !g_hash_table_lookup(fl_hash_queue, fl))\
  467       fl_hash_queue_size += fl->size;\
  468     g_hash_table_insert(fl_hash_queue, fl, (void *)1);\
  469     if(start)\
  470       fl_hash_process();\
  471   } while(0)
  472 
  473 
  474 #define fl_hash_queue_del(fl) do {\
  475     if((fl)->isfile && g_hash_table_lookup(fl_hash_queue, fl)) {\
  476       fl_hash_queue_size -= fl->size;\
  477       g_hash_table_remove(fl_hash_queue, fl);\
  478       if((fl) == fl_hash_cur) {\
  479         g_mutex_lock(fl_hash_resetlock);\
  480         fl_hash_reset++;\
  481         g_cond_signal(fl_hash_resetcond);\
  482         g_mutex_unlock(fl_hash_resetlock);\
  483       }\
  484     }\
  485   } while(0)
  486 
  487 
  488 
  489 // Recursively deletes a fl_list structure from the hash queue
  490 static void fl_hash_queue_delrec(fl_list_t *f) {
  491   if(f->isfile)
  492     fl_hash_queue_del(f);
  493   else {
  494     int i;
  495     for(i=0; i<f->sub->len; i++)
  496       fl_hash_queue_delrec(g_ptr_array_index(f->sub, i));
  497   }
  498 }
  499 
  500 
  501 // Checks whether this hashing operation has been cancelled and waits until the
  502 // hash ratecalc object has enough burst to allow us to continue hashing again.
  503 // Returns the allowed burst, or 0 on cancellation.
  504 static int fl_hash_burst(guint64 lastreset) {
  505   int b = 0;
  506   g_mutex_lock(fl_hash_resetlock);
  507   while(fl_hash_reset == lastreset && (b = ratecalc_burst(&fl_hash_rate)) <= 0) {
  508     GTimeVal end;
  509     g_get_current_time(&end);
  510     g_time_val_add(&end, 100*1000); // Wake up every 100ms.
  511     g_cond_timed_wait(fl_hash_resetcond, fl_hash_resetlock, &end);
  512   }
  513   g_mutex_unlock(fl_hash_resetlock);
  514   return b;
  515 }
  516 
  517 
  518 static gboolean fl_hash_done(gpointer dat);
  519 
  520 static void fl_hash_thread(gpointer data, gpointer udata) {
  521   fl_hash_t *args = data;
  522   // static, since only one hash thread is allowed and this saves stack space
  523   tth_ctx_t tth;
  524   char *buf = g_malloc(TTH_BUFSIZE);
  525   char *blocks = NULL;
  526   int f = -1;
  527   char *real = NULL;
  528 
  529   time(&args->lastmod);
  530   GTimer *tm = g_timer_new();
  531 
  532   char *tmp = path_expand(args->path);
  533   if(!tmp) {
  534     g_set_error(&args->err, 1, 0, "Error getting file path: %s", g_strerror(errno));
  535     goto finish;
  536   }
  537   real = g_filename_to_utf8(tmp, -1, NULL, NULL, NULL);
  538   g_free(tmp);
  539   g_return_if_fail(real); // really shouldn't happen, we fetched this from a UTF-8 string after all.
  540 
  541   f = open(args->path, O_RDONLY);
  542   if(f < 0) {
  543     g_set_error(&args->err, 1, 0, "Error reading file: %s", g_strerror(errno));
  544     goto finish;
  545   }
  546 
  547   // Initialize some stuff
  548   guint64 blocksize = tth_blocksize(args->filesize, 1<<(fl_hash_keep_level-1));
  549   blocksize = MAX(blocksize, fl_hash_max_granularity);
  550   int blocks_num = tth_num_blocks(args->filesize, blocksize);
  551   blocks = g_malloc(24*blocks_num);
  552   tth_init(&tth);
  553 
  554   fadv_t adv;
  555   fadv_init(&adv, f, 0, VAR_FFC_HASH);
  556 
  557   int r, nr;
  558   guint64 rd = 0;
  559   int block_cur = 0;
  560   guint64 block_len = 0;
  561 
  562   if((nr = fl_hash_burst(args->lastreset)) <= 0)
  563     goto finish;
  564   while((r = read(f, buf, MIN(nr, TTH_BUFSIZE))) > 0) {
  565     rd += r;
  566     fadv_purge(&adv, r);
  567     // file has been modified. time to back out
  568     if(rd > args->filesize) {
  569       g_set_error_literal(&args->err, 1, 0, "File has been modified.");
  570       goto finish;
  571     }
  572     ratecalc_add(&fl_hash_rate, r);
  573     // and hash
  574     char *b = buf;
  575     while(r > 0) {
  576       int w = MIN(r, blocksize-block_len);
  577       tth_update(&tth, b, w);
  578       block_len += w;
  579       b += w;
  580       r -= w;
  581       if(block_len >= blocksize) {
  582         tth_final(&tth, blocks+(block_cur*24));
  583         tth_init(&tth);
  584         block_cur++;
  585         block_len = 0;
  586       }
  587     }
  588     if((nr = fl_hash_burst(args->lastreset)) <= 0)
  589       goto finish;
  590   }
  591   if(r < 0) {
  592     g_set_error(&args->err, 1, 0, "Error reading file: %s", g_strerror(errno));
  593     goto finish;
  594   }
  595   if(rd != args->filesize) {
  596     g_set_error_literal(&args->err, 1, 0, "File has been modified.");
  597     goto finish;
  598   }
  599   // Calculate last block
  600   if(!args->filesize || block_len) {
  601     tth_final(&tth, blocks+(block_cur*24));
  602     block_cur++;
  603   }
  604   g_return_if_fail(block_cur == blocks_num);
  605   // Calculate root hash
  606   tth_root(blocks, blocks_num, args->root);
  607 
  608   // Add to database
  609   args->id = db_fl_addhash(real, args->filesize, args->lastmod, args->root, blocks, 24*blocks_num);
  610   if(!args->id)
  611     g_set_error_literal(&args->err, 1, 0, "Error saving hash data to the database.");
  612 
  613 finish:
  614   if(f > 0) {
  615     fadv_close(&adv);
  616     close(f);
  617   }
  618   g_free(buf);
  619   g_free(real);
  620   g_free(blocks);
  621   args->time = g_timer_elapsed(tm, NULL);
  622   g_timer_destroy(tm);
  623   g_idle_add_full(G_PRIORITY_HIGH_IDLE, fl_hash_done, args, NULL);
  624 }
  625 
  626 
  627 static void fl_hash_process() {
  628   if(!g_hash_table_size(fl_hash_queue)) {
  629     ratecalc_unregister(&fl_hash_rate);
  630     ratecalc_reset(&fl_hash_rate);
  631     var_set_bool(0, VAR_fl_done, TRUE);
  632     return;
  633   }
  634   var_set_bool(0, VAR_fl_done, FALSE);
  635   ratecalc_register(&fl_hash_rate, RCC_HASH);
  636 
  637   // get one item from fl_hash_queue
  638   GHashTableIter iter;
  639   fl_list_t *file;
  640   g_hash_table_iter_init(&iter, fl_hash_queue);
  641   g_hash_table_iter_next(&iter, (gpointer *)&file, NULL);
  642   fl_hash_cur = file;
  643 
  644   // pass stuff to the hash thread
  645   fl_hash_t *args = g_new0(fl_hash_t, 1);
  646   args->file = file;
  647   char *tmp = fl_local_path(file);
  648   args->path = g_filename_from_utf8(tmp, -1, NULL, NULL, NULL);
  649   g_free(tmp);
  650   args->filesize = file->size;
  651   args->lastreset = fl_hash_reset;
  652   g_message("Start hashing %s", args->path);
  653   g_thread_pool_push(fl_hash_pool, args, NULL);
  654 }
  655 
  656 
  657 static gboolean fl_hash_done(gpointer dat) {
  658   fl_hash_t *args = dat;
  659   fl_list_t *fl = args->file;
  660 
  661   // remove file from queue, ignore this hash if the file was already removed
  662   // by some other process.
  663   if(!g_hash_table_remove(fl_hash_queue, fl))
  664     goto fl_hash_done_f;
  665 
  666   fl_hash_queue_size -= fl->size;
  667 
  668   if(args->err) {
  669     ui_mf(uit_main_tab, UIP_MED, "Error hashing \"%s\": %s", args->path, args->err->message);
  670     goto fl_hash_done_f;
  671   }
  672   g_message("Completed hashing %s in %.2fs", args->path, args->time);
  673 
  674   // update file and hash info
  675   memcpy(fl->tth, args->root, 24);
  676   fl->hastth = 1;
  677   fl_list_getlocal(fl).lastmod = args->lastmod;
  678   fl_list_getlocal(fl).id = args->id;
  679   fl_hashindex_insert(fl);
  680   fl_needflush = TRUE;
  681 
  682 fl_hash_done_f:
  683   if(args->err)
  684     g_error_free(args->err);
  685   g_free(args->path);
  686   g_free(args);
  687   // Hash next file in the queue
  688   fl_hash_process();
  689   return FALSE;
  690 }
  691 
  692 
  693 
  694 
  695 
  696 // Refresh filelist & (un)share directories
  697 
  698 
  699 // get or create a root directory
  700 static fl_list_t *fl_refresh_getroot(const char *name) {
  701   // no root? create!
  702   if(!fl_local_list) {
  703     fl_local_list = fl_list_create("", FALSE);
  704     fl_local_list->sub = g_ptr_array_new_with_free_func(fl_list_free);
  705   }
  706   fl_list_t *cur = fl_list_file(fl_local_list, name);
  707   // dir not present yet? create a stub
  708   if(!cur) {
  709     cur = fl_list_create(name, FALSE);
  710     cur->sub = g_ptr_array_new_with_free_func(fl_list_free);
  711     fl_list_add(fl_local_list, cur, -1);
  712     fl_list_sort(fl_local_list);
  713   }
  714   return cur;
  715 }
  716 
  717 
  718 // Recursively adds the files to either the hash index or the hash queue.
  719 static void fl_refresh_addhash(fl_list_t *cur) {
  720   if(cur->isfile) {
  721     if(cur->hastth)
  722       fl_hashindex_insert(cur);
  723     else
  724       fl_hash_queue_append(cur);
  725   } else {
  726     int i;
  727     for(i=0; i<cur->sub->len; i++)
  728       fl_refresh_addhash(g_ptr_array_index(cur->sub, i));
  729   }
  730 }
  731 
  732 
  733 // Recursively removes the files from the hash index. Unlike _addhash(), this
  734 // doesn't touch the hash queue. The files should have been removed from the
  735 // hash queue before doing the refresh.
  736 static void fl_refresh_delhash(fl_list_t *cur) {
  737   if(cur->isfile && cur->hastth)
  738     fl_hashindex_del(cur);
  739   else if(!cur->isfile) {
  740     int i;
  741     for(i=0; i<cur->sub->len; i++)
  742       fl_refresh_delhash(g_ptr_array_index(cur->sub, i));
  743   }
  744 }
  745 
  746 
  747 static void fl_refresh_compare(fl_list_t *old, fl_list_t *new) {
  748   int oldi = 0;
  749   int newi = 0;
  750   while(oldi < old->sub->len || newi < new->sub->len) {
  751     fl_list_t *oldl = oldi >= old->sub->len ? NULL : g_ptr_array_index(old->sub, oldi);
  752     fl_list_t *newl = newi >= new->sub->len ? NULL : g_ptr_array_index(new->sub, newi);
  753     // Don't use fl_list_cmp() here, since that one doesn't return 0
  754     int cmp = !oldl ? 1 : !newl ? -1 : fl_list_cmp_strict(oldl, newl);
  755     gboolean check = FALSE, remove = FALSE, insert = FALSE;
  756 
  757     // special case #1: old == new, but one is a directory and the other is a file
  758     // special case #2: old == new, but the file names have different case
  759     // In both situations we just delete our information and overwrite it with new.
  760     if(cmp == 0 && (!!oldl->isfile != !!newl->isfile || strcmp(oldl->name, newl->name) != 0))
  761       remove = insert = TRUE;
  762     else if(cmp == 0) // old == new, just check
  763       check = TRUE;
  764     else if(cmp > 0)  // old > new: insert new
  765       insert = TRUE;
  766     else              // old < new: remove
  767       remove = TRUE;
  768 
  769     // check
  770     if(check) {
  771       // File, update information
  772       if(oldl->isfile) {
  773         // Remove old file from the hash index if it was in there
  774         if(oldl->hastth)
  775           fl_hashindex_del(oldl);
  776         // Update old with info from new
  777         oldl->hastth = newl->hastth;
  778         oldl->size = newl->size; // TODO: Size of parent dir isn't updated
  779         memcpy(oldl->tth, newl->tth, 24);
  780         fl_list_getlocal(oldl).id = fl_list_getlocal(newl).id;
  781         fl_list_getlocal(oldl).lastmod = fl_list_getlocal(newl).lastmod;
  782         // Add updated file to either the hash queue or index
  783         fl_refresh_addhash(oldl);
  784       // Directory, recurse into it
  785       } else
  786         fl_refresh_compare(oldl, newl);
  787       oldi++;
  788       newi++;
  789     }
  790 
  791     // remove
  792     if(remove) {
  793       fl_refresh_delhash(oldl);
  794       fl_list_remove(oldl);
  795       // don't modify oldi, after deletion it will automatically point to the next item in the list
  796     }
  797 
  798     // insert
  799     if(insert) {
  800       fl_list_t *tmp = fl_list_copy(newl);
  801       fl_list_add(old, tmp, oldi);
  802       fl_refresh_addhash(tmp);
  803       oldi++; // after fl_list_add(), oldi points to the new item. But we don't have to check that one again, so increase.
  804       newi++;
  805     }
  806   }
  807 }
  808 
  809 /* A visual explanation of the above algorithm:
  810  * old new
  811  *  a  a  same (new == old; new++, old++)
  812  *  b  b  same
  813  *  d  c  insert c (!old || new < old; new++, old stays)
  814  *  d  d  same
  815  *  e  f  delete e (!new || new > old; new stays, old++)
  816  *  f  f  same
  817  *
  818  * More advanced example:
  819  *  a  c  delete a
  820  *  b  c  delete b
  821  *  e  c  insert c
  822  *  e  d  insert d
  823  *  e  e  same
  824  *  f  -  delete f
  825  */
  826 
  827 static gboolean fl_refresh_scanned(gpointer dat);
  828 
  829 static void fl_refresh_process() {
  830   if(!fl_refresh_queue->head)
  831     return;
  832 
  833   // construct the list of to-be-scanned directories
  834   fl_list_t *dir = fl_refresh_queue->head->data;
  835   fl_scan_t *args = g_slice_new0(fl_scan_t);
  836   args->donefun = fl_refresh_scanned;
  837   args->emptydirs = var_get_bool(0, VAR_share_emptydirs);
  838   args->inc_hidden = var_get_bool(0, VAR_share_hidden);
  839   args->symlink = var_get_bool(0, VAR_share_symlinks);
  840 
  841   char *excl = var_get(0, VAR_share_exclude);
  842   if(excl)
  843     args->excl_regex = g_regex_new(excl, G_REGEX_OPTIMIZE, 0, NULL);
  844 
  845   // Don't allow files in the scanned directory to be hashed while refreshing.
  846   // Since the refresh thread will create a completely new fl_list structure,
  847   // any changes to the old one will be lost.
  848   fl_hash_queue_delrec(dir);
  849 
  850   // one dir, the simple case
  851   if(dir != fl_local_list) {
  852     args->file = g_new0(fl_list_t *, 2);
  853     args->res = g_new0(fl_list_t *, 2);
  854     args->path = g_new0(char *, 2);
  855     args->file[0] = dir;
  856     args->path[0] = fl_local_path(dir);
  857 
  858   // refresh the entire share, which consists of multiple dirs.
  859   } else {
  860     time(&fl_refresh_last);
  861     db_share_item_t *l;
  862     int i, len = 0;
  863     for(l=db_share_list(); l->name; l++)
  864       len++;
  865     args->file = g_new0(fl_list_t *, len+1);
  866     args->res  = g_new0(fl_list_t *, len+1);
  867     args->path = g_new0(char *, len+1);
  868     for(i=0,l=db_share_list(); l->name; i++,l++) {
  869       args->file[i] = fl_refresh_getroot(l->name);
  870       args->path[i] = g_strdup(l->path);
  871     }
  872   }
  873 
  874   // scan the requested directories in the background
  875   g_thread_pool_push(fl_scan_pool, args, NULL);
  876 }
  877 
  878 
  879 static gboolean fl_refresh_scanned(gpointer dat) {
  880   fl_scan_t *args = dat;
  881 
  882   int i, len = g_strv_length(args->path);
  883   for(i=0; i<len; i++) {
  884     fl_refresh_compare(args->file[i], args->res[i]);
  885     fl_list_free(args->res[i]);
  886   }
  887 
  888   // If the hash queue is empty after calling fl_refresh_compare() then it
  889   // means the file list is completely hashed.
  890   if(!g_hash_table_size(fl_hash_queue))
  891     var_set_bool(0, VAR_fl_done, TRUE);
  892 
  893   fl_needflush = TRUE;
  894   g_strfreev(args->path);
  895   if(args->excl_regex)
  896     g_regex_unref(args->excl_regex);
  897   g_free(args->file);
  898   g_free(args->res);
  899   g_slice_free(fl_scan_t, args);
  900 
  901   g_queue_pop_head(fl_refresh_queue);
  902   if(fl_refresh_queue->head)
  903     fl_refresh_process();
  904   else // force a flush when all queued refreshes have been processed
  905     fl_flush(NULL);
  906   return FALSE;
  907 }
  908 
  909 
  910 void fl_refresh(fl_list_t *dir) {
  911   if(!dir)
  912     dir = fl_local_list;
  913   GList *n;
  914   for(n=fl_refresh_queue->head; n; n=n->next) {
  915     fl_list_t *c = n->data;
  916     // if current dir is part of listed dir then it's already queued
  917     if(dir == c || fl_list_is_child(c, dir))
  918       return;
  919     // if listed dir is part of current dir, then we can remove that item (provided it's not being refreshed currently)
  920     if(n->prev && (dir == fl_local_list || fl_list_is_child(c, dir))) {
  921       n = n->prev;
  922       g_queue_delete_link(fl_refresh_queue, n->next);
  923     }
  924   }
  925 
  926   // add to queue and process
  927   g_queue_push_tail(fl_refresh_queue, dir);
  928   if(fl_refresh_queue->head == fl_refresh_queue->tail)
  929     fl_refresh_process();
  930 }
  931 
  932 
  933 // Adds a directory to the file list and initiates a refresh on it (Assumes the
  934 // directory has already been added to the config file).
  935 void fl_share(const char *dir) {
  936   fl_refresh(fl_refresh_getroot(dir));
  937 }
  938 
  939 
  940 // Only affects the filelist and hash data, does not modify the config file.
  941 // This function is far more efficient than removing the dir from the config
  942 // and doing a /refresh. (Which may also result in some errors being displayed
  943 // when a currently-being-hashed file is removed due to the directory not being
  944 // present in the config file anymore).
  945 void fl_unshare(const char *dir) {
  946   if(dir) {
  947     fl_list_t *fl = fl_list_file(fl_local_list, dir);
  948     g_return_if_fail(fl);
  949     fl_hash_queue_delrec(fl);
  950     fl_refresh_delhash(fl);
  951     fl_list_remove(fl);
  952   } else if(fl_local_list) {
  953     fl_hash_queue_delrec(fl_local_list);
  954     fl_refresh_delhash(fl_local_list);
  955     fl_list_free(fl_local_list);
  956     fl_local_list = fl_list_create("", FALSE);
  957     fl_local_list->sub = g_ptr_array_new_with_free_func(fl_list_free);
  958   }
  959   // force a refresh, people may be in a hurry with removing stuff
  960   fl_needflush = TRUE;
  961   fl_flush(NULL);
  962 }
  963 
  964 
  965 
  966 
  967 
  968 
  969 // Initialize local filelist
  970 
  971 
  972 // Walks through the file list and inserts everything into the fl_hashindex.
  973 static void fl_init_list(fl_list_t *fl) {
  974   int i;
  975   for(i=0; i<fl->sub->len; i++) {
  976     fl_list_t *c = g_ptr_array_index(fl->sub, i);
  977     if(c->isfile && c->hastth)
  978       fl_hashindex_insert(c);
  979     else if(!c->isfile)
  980       fl_init_list(c);
  981   }
  982 }
  983 
  984 
  985 static gboolean fl_init_autorefresh(gpointer dat) {
  986   int r = var_get_int(0, VAR_autorefresh);
  987   time_t t = time(NULL);
  988   if(r && fl_refresh_last+r < t && !fl_is_refreshing() && fl_hash_queue_size == 0)
  989     fl_refresh(NULL);
  990   return TRUE;
  991 }
  992 
  993 
  994 void fl_init() {
  995   GError *err = NULL;
  996   gboolean dorefresh = FALSE;
  997 
  998   // init stuff
  999   fl_local_list = NULL;
 1000   fl_local_list_file = g_build_filename(db_dir, "files.xml.bz2", NULL);
 1001   fl_refresh_queue = g_queue_new();
 1002   fl_scan_pool = g_thread_pool_new(fl_scan_thread, NULL, 1, FALSE, NULL);
 1003   fl_hash_pool = g_thread_pool_new(fl_hash_thread, NULL, 1, FALSE, NULL);
 1004   fl_hash_resetlock = g_mutex_new();
 1005   fl_hash_resetcond = g_cond_new();
 1006   fl_hash_queue = g_hash_table_new(g_direct_hash, g_direct_equal);
 1007   // Even though the keys are the tth roots, we can just use g_int_hash. The
 1008   // first four bytes provide enough unique data anyway.
 1009   fl_hash_index = g_hash_table_new(g_int_hash, tiger_hash_equal);
 1010   ratecalc_init(&fl_hash_rate);
 1011 
 1012   // flush unsaved data to disk every 60 seconds
 1013   g_timeout_add_seconds_full(G_PRIORITY_LOW, 60, fl_flush, NULL, NULL);
 1014   // Check every 60 seconds whether we need to refresh. This automatically
 1015   // adapts itself to changes to the autorefresh config variable. Unlike using
 1016   // the configured interval as a timeout, in which case we need to manually
 1017   // adjust the timer on every change.
 1018   g_timeout_add_seconds_full(G_PRIORITY_LOW, 60, fl_init_autorefresh, NULL, NULL);
 1019 
 1020   // The following code may take a while on large shares, so indicate to the UI
 1021   // that we're busy.
 1022   ui_m(NULL, UIM_NOLOG|UIM_DIRECT, "Loading file list...");
 1023   ui_draw();
 1024 
 1025   // check whether something is shared
 1026   gboolean sharing = db_share_list()->name ? TRUE : FALSE;
 1027 
 1028   // load our files.xml.bz2
 1029   fl_local_list = sharing ? fl_load(fl_local_list_file, &err, TRUE) : NULL;
 1030   if(sharing && !fl_local_list) {
 1031     ui_mf(uit_main_tab, UIP_MED, "Error loading local filelist: %s. Re-building list.", err->message);
 1032     g_error_free(err);
 1033     dorefresh = TRUE;
 1034   } else if(!sharing)
 1035     // Force a refresh when we're not sharing anything. This makes sure that we
 1036     // at least have a files.xml.bz2
 1037     dorefresh = TRUE;
 1038   ui_m(NULL, UIM_NOLOG, NULL);
 1039 
 1040   // Always make sure we at least have an fl_local_list
 1041   if(!fl_local_list) {
 1042     fl_local_list = fl_list_create("", FALSE);
 1043     fl_local_list->sub = g_ptr_array_new_with_free_func(fl_list_free);
 1044   }
 1045 
 1046   // If ncdc was previously closed while hashing, make sure to force a refresh
 1047   // this time to continue the hash progress.
 1048   if(sharing && !var_get_bool(0, VAR_fl_done)) {
 1049     dorefresh = TRUE;
 1050     ui_m(uit_main_tab, UIM_NOTIFY, "File list incomplete, refreshing...");
 1051   }
 1052 
 1053   // Initialize the fl_hash_index
 1054   if(fl_local_list)
 1055     fl_init_list(fl_local_list);
 1056 
 1057   // reset loading indicator
 1058   if(!fl_local_list || !dorefresh)
 1059     ui_m(NULL, UIM_NOLOG|UIM_DIRECT, NULL);
 1060 
 1061   if(dorefresh || var_get_int(0, VAR_autorefresh))
 1062     fl_refresh(NULL);
 1063 }
 1064 
 1065 
 1066 
 1067 
 1068 // Garbage-collect. This will remove unused entries from the hashfiles table.
 1069 // The algorithm works as follows:
 1070 // - Create array of `active' ids.
 1071 //   (`active' = there is an fl_list entry in memory with that id)
 1072 // - Sort the array.
 1073 // - Walk through all ids from the hashfiles table in ascending order
 1074 //   - If an id is present in the hashfiles table but not in the sorted array
 1075 //     created earlier, add the id to a delete-these-ids array.
 1076 // - Delete all rows indicated with the delete-these-ids array
 1077 // This algorithm is intended to have low memory requirements, while still
 1078 // being quite fast and efficient.
 1079 
 1080 static GArray *fl_gc_active = NULL;
 1081 static int fl_gc_last = 0;
 1082 static GArray *fl_gc_remove = NULL;
 1083 
 1084 
 1085 static gint fl_gc_idcmp(gconstpointer a, gconstpointer b) {
 1086   const gint64 *na = a;
 1087   const gint64 *nb = b;
 1088   return *na > *nb ? 1 : *na == *nb ? 0 : -1;
 1089 }
 1090 
 1091 
 1092 // Called from db_fl_getids() with a new id. It is assumed that this id is
 1093 // larger than or equal to the given id in any previous calls.
 1094 static void fl_gc_id(gint64 id) {
 1095   while(fl_gc_last < fl_gc_active->len && g_array_index(fl_gc_active, gint64, fl_gc_last) < id)
 1096     fl_gc_last++;
 1097   if(fl_gc_last >= fl_gc_active->len || g_array_index(fl_gc_active, gint64, fl_gc_last) > id)
 1098     g_array_append_val(fl_gc_remove, id);
 1099 }
 1100 
 1101 
 1102 // Returns TRUE when it has done garbage collection, FALSE if it's not possible
 1103 // to create a list of `active' ids because no full file refresh has been
 1104 // performed yet.
 1105 gboolean fl_gc() {
 1106   if(!fl_refresh_last || fl_refresh_queue->head)
 1107     return FALSE;
 1108 
 1109   // Init data
 1110   fl_gc_active = g_array_sized_new(FALSE, FALSE, 8, g_hash_table_size(fl_hash_index));
 1111   fl_gc_remove = g_array_new(FALSE, FALSE, 8);
 1112   fl_gc_last = 0;
 1113 
 1114   // Fill fl_active array.  It is possible that two identical ids are added to
 1115   // the array, but this isn't a problem.
 1116   GSList *l;
 1117   GHashTableIter iter;
 1118   g_hash_table_iter_init(&iter, fl_hash_index);
 1119   while(g_hash_table_iter_next(&iter, NULL, (gpointer *)&l))
 1120     for(; l; l=l->next)
 1121       g_array_append_val(fl_gc_active, fl_list_getlocal((fl_list_t *)l->data).id);
 1122   g_array_sort(fl_gc_active, fl_gc_idcmp);
 1123 
 1124   // walk through hashfiles table and fill fl_gc_remove
 1125   db_fl_getids(fl_gc_id);
 1126 
 1127   // Delete rows and clean up
 1128   g_debug("fl-gc: Removing %d entries from hashrows.", fl_gc_remove->len);
 1129   db_fl_rmfiles((gint64 *)fl_gc_remove->data, fl_gc_remove->len);
 1130   g_array_unref(fl_gc_active);
 1131   g_array_unref(fl_gc_remove);
 1132   return TRUE;
 1133 }