"Fossies" - the Fresh Open Source Software Archive

Member "glusterfs-8.2/xlators/performance/io-cache/src/io-cache.c" (16 Sep 2020, 58592 Bytes) of package /linux/misc/glusterfs-8.2.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "io-cache.c" see the Fossies "Dox" file reference documentation.

    1 /*
    2   Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
    3   This file is part of GlusterFS.
    4 
    5   This file is licensed to you under your choice of the GNU Lesser
    6   General Public License, version 3 or any later version (LGPLv3 or
    7   later), or the GNU General Public License, version 2 (GPLv2), in all
    8   cases as published by the Free Software Foundation.
    9 */
   10 
   11 #include <math.h>
   12 #include <glusterfs/glusterfs.h>
   13 #include <glusterfs/logging.h>
   14 #include <glusterfs/dict.h>
   15 #include <glusterfs/xlator.h>
   16 #include "io-cache.h"
   17 #include "ioc-mem-types.h"
   18 #include <glusterfs/statedump.h>
   19 #include <assert.h>
   20 #include <sys/time.h>
   21 #include "io-cache-messages.h"
   22 int ioc_log2_page_size;
   23 
   24 uint32_t
   25 ioc_get_priority(ioc_table_t *table, const char *path);
   26 
   27 struct volume_options options[];
   28 
   29 static uint32_t
   30 ioc_hashfn(void *data, int len)
   31 {
   32     off_t offset;
   33 
   34     offset = *(off_t *)data;
   35 
   36     return (offset >> ioc_log2_page_size);
   37 }
   38 
   39 /* TODO: This function is not used, uncomment when we find a
   40          usage for this function.
   41 
   42 static ioc_inode_t *
   43 ioc_inode_reupdate (ioc_inode_t *ioc_inode)
   44 {
   45         ioc_table_t *table = NULL;
   46 
   47         table = ioc_inode->table;
   48 
   49         list_add_tail (&ioc_inode->inode_lru,
   50                        &table->inode_lru[ioc_inode->weight]);
   51 
   52         return ioc_inode;
   53 }
   54 
   55 
   56 static ioc_inode_t *
   57 ioc_get_inode (dict_t *dict, char *name)
   58 {
   59         ioc_inode_t *ioc_inode      = NULL;
   60         data_t      *ioc_inode_data = NULL;
   61         ioc_table_t *table          = NULL;
   62 
   63         ioc_inode_data = dict_get (dict, name);
   64         if (ioc_inode_data) {
   65                 ioc_inode = data_to_ptr (ioc_inode_data);
   66                 table = ioc_inode->table;
   67 
   68                 ioc_table_lock (table);
   69                 {
   70                         if (list_empty (&ioc_inode->inode_lru)) {
   71                                 ioc_inode = ioc_inode_reupdate (ioc_inode);
   72                         }
   73                 }
   74                 ioc_table_unlock (table);
   75         }
   76 
   77         return ioc_inode;
   78 }
   79 */
   80 
   81 int
   82 ioc_update_pages(call_frame_t *frame, ioc_inode_t *ioc_inode,
   83                  struct iovec *vector, int32_t count, int op_ret, off_t offset)
   84 {
   85     size_t size = 0;
   86     off_t rounded_offset = 0, rounded_end = 0, trav_offset = 0,
   87           write_offset = 0;
   88     off_t page_offset = 0, page_end = 0;
   89     ioc_page_t *trav = NULL;
   90 
   91     size = iov_length(vector, count);
   92     size = min(size, op_ret);
   93 
   94     rounded_offset = gf_floor(offset, ioc_inode->table->page_size);
   95     rounded_end = gf_roof(offset + size, ioc_inode->table->page_size);
   96 
   97     trav_offset = rounded_offset;
   98     ioc_inode_lock(ioc_inode);
   99     {
  100         while (trav_offset < rounded_end) {
  101             trav = __ioc_page_get(ioc_inode, trav_offset);
  102             if (trav && trav->ready) {
  103                 if (trav_offset == rounded_offset)
  104                     page_offset = offset - rounded_offset;
  105                 else
  106                     page_offset = 0;
  107 
  108                 if ((trav_offset + ioc_inode->table->page_size) >=
  109                     rounded_end) {
  110                     page_end = trav->size - (rounded_end - (offset + size));
  111                 } else {
  112                     page_end = trav->size;
  113                 }
  114 
  115                 iov_range_copy(trav->vector, trav->count, page_offset, vector,
  116                                count, write_offset, page_end - page_offset);
  117             } else if (trav) {
  118                 if (!trav->waitq)
  119                     ioc_inode->table->cache_used -= __ioc_page_destroy(trav);
  120             }
  121 
  122             if (trav_offset == rounded_offset)
  123                 write_offset += (ioc_inode->table->page_size -
  124                                  (offset - rounded_offset));
  125             else
  126                 write_offset += ioc_inode->table->page_size;
  127 
  128             trav_offset += ioc_inode->table->page_size;
  129         }
  130     }
  131     ioc_inode_unlock(ioc_inode);
  132 
  133     return 0;
  134 }
  135 
  136 int32_t
  137 ioc_inode_need_revalidate(ioc_inode_t *ioc_inode)
  138 {
  139     int8_t need_revalidate = 0;
  140     struct timeval tv = {
  141         0,
  142     };
  143     ioc_table_t *table = NULL;
  144 
  145     table = ioc_inode->table;
  146 
  147     gettimeofday(&tv, NULL);
  148 
  149     if (time_elapsed(&tv, &ioc_inode->cache.tv) >= table->cache_timeout)
  150         need_revalidate = 1;
  151 
  152     return need_revalidate;
  153 }
  154 
  155 /*
  156  * __ioc_inode_flush - flush all the cached pages of the given inode
  157  *
  158  * @ioc_inode:
  159  *
  160  * assumes lock is held
  161  */
  162 int64_t
  163 __ioc_inode_flush(ioc_inode_t *ioc_inode)
  164 {
  165     ioc_page_t *curr = NULL, *next = NULL;
  166     int64_t destroy_size = 0;
  167     int64_t ret = 0;
  168 
  169     list_for_each_entry_safe(curr, next, &ioc_inode->cache.page_lru, page_lru)
  170     {
  171         ret = __ioc_page_destroy(curr);
  172 
  173         if (ret != -1)
  174             destroy_size += ret;
  175     }
  176 
  177     return destroy_size;
  178 }
  179 
  180 void
  181 ioc_inode_flush(ioc_inode_t *ioc_inode)
  182 {
  183     int64_t destroy_size = 0;
  184 
  185     ioc_inode_lock(ioc_inode);
  186     {
  187         destroy_size = __ioc_inode_flush(ioc_inode);
  188     }
  189     ioc_inode_unlock(ioc_inode);
  190 
  191     if (destroy_size) {
  192         ioc_table_lock(ioc_inode->table);
  193         {
  194             ioc_inode->table->cache_used -= destroy_size;
  195         }
  196         ioc_table_unlock(ioc_inode->table);
  197     }
  198 
  199     return;
  200 }
  201 
  202 int32_t
  203 ioc_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
  204                 int32_t op_ret, int32_t op_errno, struct iatt *preop,
  205                 struct iatt *postop, dict_t *xdata)
  206 {
  207     STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, preop, postop, xdata);
  208     return 0;
  209 }
  210 
  211 int32_t
  212 ioc_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
  213             int32_t valid, dict_t *xdata)
  214 {
  215     uint64_t ioc_inode = 0;
  216 
  217     inode_ctx_get(loc->inode, this, &ioc_inode);
  218 
  219     if (ioc_inode &&
  220         ((valid & GF_SET_ATTR_ATIME) || (valid & GF_SET_ATTR_MTIME)))
  221         ioc_inode_flush((ioc_inode_t *)(long)ioc_inode);
  222 
  223     STACK_WIND(frame, ioc_setattr_cbk, FIRST_CHILD(this),
  224                FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
  225 
  226     return 0;
  227 }
  228 
  229 int32_t
  230 ioc_inode_update(xlator_t *this, inode_t *inode, char *path, struct iatt *iabuf)
  231 {
  232     ioc_table_t *table = NULL;
  233     uint64_t tmp_ioc_inode = 0;
  234     ioc_inode_t *ioc_inode = NULL;
  235     uint32_t weight = 0xffffffff;
  236     gf_boolean_t cache_still_valid = _gf_false;
  237 
  238     if (!this || !inode)
  239         goto out;
  240 
  241     table = this->private;
  242 
  243     LOCK(&inode->lock);
  244     {
  245         (void)__inode_ctx_get(inode, this, &tmp_ioc_inode);
  246         ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
  247 
  248         if (!ioc_inode) {
  249             weight = ioc_get_priority(table, path);
  250 
  251             ioc_inode = ioc_inode_create(table, inode, weight);
  252 
  253             (void)__inode_ctx_put(inode, this, (uint64_t)(long)ioc_inode);
  254         }
  255     }
  256     UNLOCK(&inode->lock);
  257 
  258     ioc_inode_lock(ioc_inode);
  259     {
  260         if (ioc_inode->cache.mtime == 0) {
  261             ioc_inode->cache.mtime = iabuf->ia_mtime;
  262             ioc_inode->cache.mtime_nsec = iabuf->ia_mtime_nsec;
  263         }
  264 
  265         ioc_inode->ia_size = iabuf->ia_size;
  266     }
  267     ioc_inode_unlock(ioc_inode);
  268 
  269     cache_still_valid = ioc_cache_still_valid(ioc_inode, iabuf);
  270 
  271     if (!cache_still_valid) {
  272         ioc_inode_flush(ioc_inode);
  273     }
  274 
  275     ioc_table_lock(ioc_inode->table);
  276     {
  277         list_move_tail(&ioc_inode->inode_lru,
  278                        &table->inode_lru[ioc_inode->weight]);
  279     }
  280     ioc_table_unlock(ioc_inode->table);
  281 
  282 out:
  283     return 0;
  284 }
  285 
  286 int32_t
  287 ioc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
  288                int32_t op_ret, int32_t op_errno, inode_t *inode,
  289                struct iatt *stbuf, dict_t *xdata, struct iatt *postparent)
  290 {
  291     ioc_local_t *local = NULL;
  292 
  293     if (op_ret != 0)
  294         goto out;
  295 
  296     local = frame->local;
  297     if (local == NULL) {
  298         op_ret = -1;
  299         op_errno = EINVAL;
  300         goto out;
  301     }
  302 
  303     if (!this || !this->private) {
  304         op_ret = -1;
  305         op_errno = EINVAL;
  306         goto out;
  307     }
  308 
  309     ioc_inode_update(this, inode, (char *)local->file_loc.path, stbuf);
  310 
  311 out:
  312     if (frame->local != NULL) {
  313         local = frame->local;
  314         loc_wipe(&local->file_loc);
  315     }
  316 
  317     STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, stbuf, xdata,
  318                         postparent);
  319     return 0;
  320 }
  321 
  322 int32_t
  323 ioc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
  324 {
  325     ioc_local_t *local = NULL;
  326     int32_t op_errno = -1, ret = -1;
  327 
  328     local = mem_get0(this->local_pool);
  329     if (local == NULL) {
  330         op_errno = ENOMEM;
  331         gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL);
  332         goto unwind;
  333     }
  334 
  335     ret = loc_copy(&local->file_loc, loc);
  336     if (ret != 0) {
  337         op_errno = ENOMEM;
  338         gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL);
  339         goto unwind;
  340     }
  341 
  342     frame->local = local;
  343 
  344     STACK_WIND(frame, ioc_lookup_cbk, FIRST_CHILD(this),
  345                FIRST_CHILD(this)->fops->lookup, loc, xdata);
  346 
  347     return 0;
  348 
  349 unwind:
  350     if (local != NULL) {
  351         loc_wipe(&local->file_loc);
  352         mem_put(local);
  353     }
  354 
  355     STACK_UNWIND_STRICT(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
  356 
  357     return 0;
  358 }
  359 
  360 /*
  361  * ioc_forget -
  362  *
  363  * @frame:
  364  * @this:
  365  * @inode:
  366  *
  367  */
  368 int32_t
  369 ioc_forget(xlator_t *this, inode_t *inode)
  370 {
  371     uint64_t ioc_inode = 0;
  372 
  373     inode_ctx_get(inode, this, &ioc_inode);
  374 
  375     if (ioc_inode)
  376         ioc_inode_destroy((ioc_inode_t *)(long)ioc_inode);
  377 
  378     return 0;
  379 }
  380 
  381 static int32_t
  382 ioc_invalidate(xlator_t *this, inode_t *inode)
  383 {
  384     uint64_t ioc_inode = 0;
  385 
  386     inode_ctx_get(inode, this, &ioc_inode);
  387 
  388     if (ioc_inode)
  389         ioc_inode_flush((ioc_inode_t *)(uintptr_t)ioc_inode);
  390 
  391     return 0;
  392 }
  393 
  394 /*
  395  * ioc_cache_validate_cbk -
  396  *
  397  * @frame:
  398  * @cookie:
  399  * @this:
  400  * @op_ret:
  401  * @op_errno:
  402  * @buf
  403  *
  404  */
  405 int32_t
  406 ioc_cache_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
  407                        int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
  408                        dict_t *xdata)
  409 {
  410     ioc_local_t *local = NULL;
  411     ioc_inode_t *ioc_inode = NULL;
  412     size_t destroy_size = 0;
  413     struct iatt *local_stbuf = NULL;
  414     struct timeval tv = {
  415         0,
  416     };
  417 
  418     local = frame->local;
  419     ioc_inode = local->inode;
  420     local_stbuf = stbuf;
  421 
  422     if ((op_ret == -1) ||
  423         ((op_ret >= 0) && !ioc_cache_still_valid(ioc_inode, stbuf))) {
  424         gf_msg_debug(ioc_inode->table->xl->name, 0,
  425                      "cache for inode(%p) is invalid. flushing all pages",
  426                      ioc_inode);
  427         /* NOTE: only pages with no waiting frames are flushed by
  428          * ioc_inode_flush. page_fault will be generated for all
  429          * the pages which have waiting frames by ioc_inode_wakeup()
  430          */
  431         ioc_inode_lock(ioc_inode);
  432         {
  433             destroy_size = __ioc_inode_flush(ioc_inode);
  434             if (op_ret >= 0) {
  435                 ioc_inode->cache.mtime = stbuf->ia_mtime;
  436                 ioc_inode->cache.mtime_nsec = stbuf->ia_mtime_nsec;
  437             }
  438         }
  439         ioc_inode_unlock(ioc_inode);
  440         local_stbuf = NULL;
  441     }
  442 
  443     if (destroy_size) {
  444         ioc_table_lock(ioc_inode->table);
  445         {
  446             ioc_inode->table->cache_used -= destroy_size;
  447         }
  448         ioc_table_unlock(ioc_inode->table);
  449     }
  450 
  451     if (op_ret < 0)
  452         local_stbuf = NULL;
  453 
  454     gettimeofday(&tv, NULL);
  455     ioc_inode_lock(ioc_inode);
  456     {
  457         memcpy(&ioc_inode->cache.tv, &tv, sizeof(struct timeval));
  458     }
  459     ioc_inode_unlock(ioc_inode);
  460 
  461     ioc_inode_wakeup(frame, ioc_inode, local_stbuf);
  462 
  463     /* any page-fault initiated by ioc_inode_wakeup() will have its own
  464      * fd_ref on fd, safe to unref validate frame's private copy
  465      */
  466     fd_unref(local->fd);
  467     dict_unref(local->xattr_req);
  468 
  469     STACK_DESTROY(frame->root);
  470 
  471     return 0;
  472 }
  473 
  474 int32_t
  475 ioc_wait_on_inode(ioc_inode_t *ioc_inode, ioc_page_t *page)
  476 {
  477     ioc_waitq_t *waiter = NULL, *trav = NULL;
  478     uint32_t page_found = 0;
  479     int32_t ret = 0;
  480 
  481     trav = ioc_inode->waitq;
  482 
  483     while (trav) {
  484         if (trav->data == page) {
  485             page_found = 1;
  486             break;
  487         }
  488         trav = trav->next;
  489     }
  490 
  491     if (!page_found) {
  492         waiter = GF_CALLOC(1, sizeof(ioc_waitq_t), gf_ioc_mt_ioc_waitq_t);
  493         if (waiter == NULL) {
  494             gf_smsg(ioc_inode->table->xl->name, GF_LOG_ERROR, ENOMEM,
  495                     IO_CACHE_MSG_NO_MEMORY, NULL);
  496             ret = -ENOMEM;
  497             goto out;
  498         }
  499 
  500         waiter->data = page;
  501         waiter->next = ioc_inode->waitq;
  502         ioc_inode->waitq = waiter;
  503     }
  504 
  505 out:
  506     return ret;
  507 }
  508 
  509 /*
  510  * ioc_cache_validate -
  511  *
  512  * @frame:
  513  * @ioc_inode:
  514  * @fd:
  515  *
  516  */
  517 int32_t
  518 ioc_cache_validate(call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd,
  519                    ioc_page_t *page)
  520 {
  521     call_frame_t *validate_frame = NULL;
  522     ioc_local_t *validate_local = NULL;
  523     ioc_local_t *local = NULL;
  524     int32_t ret = 0;
  525 
  526     local = frame->local;
  527     validate_local = mem_get0(THIS->local_pool);
  528     if (validate_local == NULL) {
  529         ret = -1;
  530         local->op_ret = -1;
  531         local->op_errno = ENOMEM;
  532         gf_smsg(ioc_inode->table->xl->name, GF_LOG_ERROR, 0,
  533                 IO_CACHE_MSG_NO_MEMORY, NULL);
  534         goto out;
  535     }
  536 
  537     validate_frame = copy_frame(frame);
  538     if (validate_frame == NULL) {
  539         ret = -1;
  540         local->op_ret = -1;
  541         local->op_errno = ENOMEM;
  542         mem_put(validate_local);
  543         gf_smsg(ioc_inode->table->xl->name, GF_LOG_ERROR, 0,
  544                 IO_CACHE_MSG_NO_MEMORY, NULL);
  545         goto out;
  546     }
  547 
  548     validate_local->fd = fd_ref(fd);
  549     validate_local->inode = ioc_inode;
  550     if (local && local->xattr_req)
  551         validate_local->xattr_req = dict_ref(local->xattr_req);
  552     validate_frame->local = validate_local;
  553 
  554     STACK_WIND(validate_frame, ioc_cache_validate_cbk, FIRST_CHILD(frame->this),
  555                FIRST_CHILD(frame->this)->fops->fstat, fd,
  556                validate_local->xattr_req);
  557 
  558 out:
  559     return ret;
  560 }
  561 
  562 static uint32_t
  563 is_match(const char *path, const char *pattern)
  564 {
  565     int32_t ret = 0;
  566 
  567     ret = fnmatch(pattern, path, FNM_NOESCAPE);
  568 
  569     return (ret == 0);
  570 }
  571 
  572 uint32_t
  573 ioc_get_priority(ioc_table_t *table, const char *path)
  574 {
  575     uint32_t priority = 1;
  576     struct ioc_priority *curr = NULL;
  577 
  578     if (list_empty(&table->priority_list) || !path)
  579         return priority;
  580 
  581     priority = 0;
  582     list_for_each_entry(curr, &table->priority_list, list)
  583     {
  584         if (is_match(path, curr->pattern))
  585             priority = curr->priority;
  586     }
  587 
  588     return priority;
  589 }
  590 
  591 /*
  592  * ioc_open_cbk - open callback for io cache
  593  *
  594  * @frame: call frame
  595  * @cookie:
  596  * @this:
  597  * @op_ret:
  598  * @op_errno:
  599  * @fd:
  600  *
  601  */
  602 int32_t
  603 ioc_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
  604              int32_t op_errno, fd_t *fd, dict_t *xdata)
  605 {
  606     uint64_t tmp_ioc_inode = 0;
  607     ioc_local_t *local = NULL;
  608     ioc_table_t *table = NULL;
  609     ioc_inode_t *ioc_inode = NULL;
  610 
  611     local = frame->local;
  612     if (!this || !this->private) {
  613         op_ret = -1;
  614         op_errno = EINVAL;
  615         goto out;
  616     }
  617 
  618     table = this->private;
  619 
  620     if (op_ret != -1) {
  621         inode_ctx_get(fd->inode, this, &tmp_ioc_inode);
  622         ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
  623 
  624         // TODO: see why inode context is NULL and handle it.
  625         if (!ioc_inode) {
  626             gf_smsg(this->name, GF_LOG_ERROR, EINVAL,
  627                     IO_CACHE_MSG_ENFORCEMENT_FAILED, "inode-gfid=%s",
  628                     uuid_utoa(fd->inode->gfid), NULL);
  629             goto out;
  630         }
  631 
  632         ioc_table_lock(ioc_inode->table);
  633         {
  634             list_move_tail(&ioc_inode->inode_lru,
  635                            &table->inode_lru[ioc_inode->weight]);
  636         }
  637         ioc_table_unlock(ioc_inode->table);
  638 
  639         ioc_inode_lock(ioc_inode);
  640         {
  641             if ((table->min_file_size > ioc_inode->ia_size) ||
  642                 ((table->max_file_size > 0) &&
  643                  (table->max_file_size < ioc_inode->ia_size))) {
  644                 fd_ctx_set(fd, this, 1);
  645             }
  646         }
  647         ioc_inode_unlock(ioc_inode);
  648 
  649         /* If O_DIRECT open, we disable caching on it */
  650         if ((local->flags & O_DIRECT)) {
  651             /* O_DIRECT is only for one fd, not the inode
  652              * as a whole
  653              */
  654             fd_ctx_set(fd, this, 1);
  655         }
  656     }
  657 
  658 out:
  659     mem_put(local);
  660     frame->local = NULL;
  661 
  662     STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata);
  663 
  664     return 0;
  665 }
  666 
  667 /*
  668  * ioc_create_cbk - create callback for io cache
  669  *
  670  * @frame: call frame
  671  * @cookie:
  672  * @this:
  673  * @op_ret:
  674  * @op_errno:
  675  * @fd:
  676  * @inode:
  677  * @buf:
  678  *
  679  */
  680 int32_t
  681 ioc_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
  682                int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
  683                struct iatt *buf, struct iatt *preparent,
  684                struct iatt *postparent, dict_t *xdata)
  685 {
  686     ioc_local_t *local = NULL;
  687     ioc_table_t *table = NULL;
  688     ioc_inode_t *ioc_inode = NULL;
  689     uint32_t weight = 0xffffffff;
  690     const char *path = NULL;
  691     int ret = -1;
  692 
  693     local = frame->local;
  694     if (!this || !this->private) {
  695         op_ret = -1;
  696         op_errno = EINVAL;
  697         goto out;
  698     }
  699 
  700     table = this->private;
  701     path = local->file_loc.path;
  702 
  703     if (op_ret != -1) {
  704         /* assign weight */
  705         weight = ioc_get_priority(table, path);
  706 
  707         ioc_inode = ioc_inode_create(table, inode, weight);
  708 
  709         ioc_inode_lock(ioc_inode);
  710         {
  711             ioc_inode->cache.mtime = buf->ia_mtime;
  712             ioc_inode->cache.mtime_nsec = buf->ia_mtime_nsec;
  713             ioc_inode->ia_size = buf->ia_size;
  714 
  715             if ((table->min_file_size > ioc_inode->ia_size) ||
  716                 ((table->max_file_size > 0) &&
  717                  (table->max_file_size < ioc_inode->ia_size))) {
  718                 ret = fd_ctx_set(fd, this, 1);
  719                 if (ret)
  720                     gf_smsg(this->name, GF_LOG_WARNING, ENOMEM,
  721                             IO_CACHE_MSG_SET_FD_FAILED, "path=%s",
  722                             local->file_loc.path, NULL);
  723             }
  724         }
  725         ioc_inode_unlock(ioc_inode);
  726 
  727         inode_ctx_put(fd->inode, this, (uint64_t)(long)ioc_inode);
  728 
  729         /* If O_DIRECT open, we disable caching on it */
  730         if (local->flags & O_DIRECT) {
  731             /*
  732              * O_DIRECT is only for one fd, not the inode
  733              * as a whole */
  734             ret = fd_ctx_set(fd, this, 1);
  735             if (ret)
  736                 gf_smsg(this->name, GF_LOG_WARNING, ENOMEM,
  737                         IO_CACHE_MSG_SET_FD_FAILED, "path=%s",
  738                         local->file_loc.path, NULL);
  739         }
  740 
  741         /* if weight == 0, we disable caching on it */
  742         if (!weight) {
  743             /* we allow a pattern-matched cache disable this way */
  744             ret = fd_ctx_set(fd, this, 1);
  745             if (ret)
  746                 gf_smsg(this->name, GF_LOG_WARNING, ENOMEM,
  747                         IO_CACHE_MSG_SET_FD_FAILED, "path=%s",
  748                         local->file_loc.path, NULL);
  749         }
  750     }
  751 
  752 out:
  753     frame->local = NULL;
  754     mem_put(local);
  755 
  756     STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
  757                         preparent, postparent, xdata);
  758 
  759     return 0;
  760 }
  761 
  762 int32_t
  763 ioc_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
  764               int32_t op_errno, inode_t *inode, struct iatt *buf,
  765               struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
  766 {
  767     ioc_local_t *local = NULL;
  768     ioc_table_t *table = NULL;
  769     ioc_inode_t *ioc_inode = NULL;
  770     uint32_t weight = 0xffffffff;
  771     const char *path = NULL;
  772 
  773     local = frame->local;
  774     if (!this || !this->private) {
  775         op_ret = -1;
  776         op_errno = EINVAL;
  777         goto out;
  778     }
  779 
  780     table = this->private;
  781     path = local->file_loc.path;
  782 
  783     if (op_ret != -1) {
  784         /* assign weight */
  785         weight = ioc_get_priority(table, path);
  786 
  787         ioc_inode = ioc_inode_create(table, inode, weight);
  788 
  789         ioc_inode_lock(ioc_inode);
  790         {
  791             ioc_inode->cache.mtime = buf->ia_mtime;
  792             ioc_inode->cache.mtime_nsec = buf->ia_mtime_nsec;
  793             ioc_inode->ia_size = buf->ia_size;
  794         }
  795         ioc_inode_unlock(ioc_inode);
  796 
  797         inode_ctx_put(inode, this, (uint64_t)(long)ioc_inode);
  798     }
  799 
  800 out:
  801     frame->local = NULL;
  802 
  803     loc_wipe(&local->file_loc);
  804     mem_put(local);
  805 
  806     STACK_UNWIND_STRICT(mknod, frame, op_ret, op_errno, inode, buf, preparent,
  807                         postparent, xdata);
  808     return 0;
  809 }
  810 
  811 int
  812 ioc_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
  813           dev_t rdev, mode_t umask, dict_t *xdata)
  814 {
  815     ioc_local_t *local = NULL;
  816     int32_t op_errno = -1, ret = -1;
  817 
  818     local = mem_get0(this->local_pool);
  819     if (local == NULL) {
  820         op_errno = ENOMEM;
  821         gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL);
  822         goto unwind;
  823     }
  824 
  825     ret = loc_copy(&local->file_loc, loc);
  826     if (ret != 0) {
  827         op_errno = ENOMEM;
  828         gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL);
  829         goto unwind;
  830     }
  831 
  832     frame->local = local;
  833 
  834     STACK_WIND(frame, ioc_mknod_cbk, FIRST_CHILD(this),
  835                FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
  836     return 0;
  837 
  838 unwind:
  839     if (local != NULL) {
  840         loc_wipe(&local->file_loc);
  841         mem_put(local);
  842     }
  843 
  844     STACK_UNWIND_STRICT(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
  845                         NULL);
  846 
  847     return 0;
  848 }
  849 
  850 /*
  851  * ioc_open - open fop for io cache
  852  * @frame:
  853  * @this:
  854  * @loc:
  855  * @flags:
  856  *
  857  */
  858 int32_t
  859 ioc_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
  860          fd_t *fd, dict_t *xdata)
  861 {
  862     ioc_local_t *local = NULL;
  863 
  864     local = mem_get0(this->local_pool);
  865     if (local == NULL) {
  866         gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL);
  867         STACK_UNWIND_STRICT(open, frame, -1, ENOMEM, NULL, NULL);
  868         return 0;
  869     }
  870 
  871     local->flags = flags;
  872     local->file_loc.path = loc->path;
  873     local->file_loc.inode = loc->inode;
  874 
  875     frame->local = local;
  876 
  877     STACK_WIND(frame, ioc_open_cbk, FIRST_CHILD(this),
  878                FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
  879 
  880     return 0;
  881 }
  882 
  883 /*
  884  * ioc_create - create fop for io cache
  885  *
  886  * @frame:
  887  * @this:
  888  * @pathname:
  889  * @flags:
  890  * @mode:
  891  *
  892  */
  893 int32_t
  894 ioc_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
  895            mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
  896 {
  897     ioc_local_t *local = NULL;
  898 
  899     local = mem_get0(this->local_pool);
  900     if (local == NULL) {
  901         gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL);
  902         STACK_UNWIND_STRICT(create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
  903                             NULL, NULL);
  904         return 0;
  905     }
  906 
  907     local->flags = flags;
  908     local->file_loc.path = loc->path;
  909     frame->local = local;
  910 
  911     STACK_WIND(frame, ioc_create_cbk, FIRST_CHILD(this),
  912                FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
  913                xdata);
  914 
  915     return 0;
  916 }
  917 
  918 /*
  919  * ioc_release - release fop for io cache
  920  *
  921  * @frame:
  922  * @this:
  923  * @fd:
  924  *
  925  */
  926 int32_t
  927 ioc_release(xlator_t *this, fd_t *fd)
  928 {
  929     return 0;
  930 }
  931 
  932 int32_t
  933 ioc_need_prune(ioc_table_t *table)
  934 {
  935     int64_t cache_difference = 0;
  936 
  937     ioc_table_lock(table);
  938     {
  939         cache_difference = table->cache_used - table->cache_size;
  940     }
  941     ioc_table_unlock(table);
  942 
  943     if (cache_difference > 0)
  944         return 1;
  945     else
  946         return 0;
  947 }
  948 
  949 /*
  950  * ioc_dispatch_requests -
  951  *
  952  * @frame:
  953  * @inode:
  954  *
  955  *
  956  */
  957 void
  958 ioc_dispatch_requests(call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd,
  959                       off_t offset, size_t size)
  960 {
  961     ioc_local_t *local = NULL;
  962     ioc_table_t *table = NULL;
  963     ioc_page_t *trav = NULL;
  964     ioc_waitq_t *waitq = NULL;
  965     off_t rounded_offset = 0;
  966     off_t rounded_end = 0;
  967     off_t trav_offset = 0;
  968     int32_t fault = 0;
  969     size_t trav_size = 0;
  970     off_t local_offset = 0;
  971     int32_t ret = -1;
  972     int8_t need_validate = 0;
  973     int8_t might_need_validate = 0; /*
  974                                      * if a page exists, do we need
  975                                      * to validate it?
  976                                      */
  977     local = frame->local;
  978     table = ioc_inode->table;
  979 
  980     rounded_offset = gf_floor(offset, table->page_size);
  981     rounded_end = gf_roof(offset + size, table->page_size);
  982     trav_offset = rounded_offset;
  983 
  984     /* once a frame does read, it should be waiting on something */
  985     local->wait_count++;
  986 
  987     /* Requested region can fall in three different pages,
  988      * 1. Ready - region is already in cache, we just have to serve it.
  989      * 2. In-transit - page fault has been generated on this page, we need
  990      *    to wait till the page is ready
  991      * 3. Fault - page is not in cache, we have to generate a page fault
  992      */
  993 
  994     might_need_validate = ioc_inode_need_revalidate(ioc_inode);
  995 
  996     while (trav_offset < rounded_end) {
  997         ioc_inode_lock(ioc_inode);
  998         {
  999             /* look for requested region in the cache */
 1000             trav = __ioc_page_get(ioc_inode, trav_offset);
 1001 
 1002             local_offset = max(trav_offset, offset);
 1003             trav_size = min(((offset + size) - local_offset), table->page_size);
 1004 
 1005             if (!trav) {
 1006                 /* page not in cache, we need to generate page
 1007                  * fault
 1008                  */
 1009                 trav = __ioc_page_create(ioc_inode, trav_offset);
 1010                 fault = 1;
 1011                 if (!trav) {
 1012                     gf_smsg(frame->this->name, GF_LOG_CRITICAL, ENOMEM,
 1013                             IO_CACHE_MSG_NO_MEMORY, NULL);
 1014                     local->op_ret = -1;
 1015                     local->op_errno = ENOMEM;
 1016                     ioc_inode_unlock(ioc_inode);
 1017                     goto out;
 1018                 }
 1019             }
 1020 
 1021             __ioc_wait_on_page(trav, frame, local_offset, trav_size);
 1022 
 1023             if (trav->ready) {
 1024                 /* page found in cache */
 1025                 if (!might_need_validate && !ioc_inode->waitq) {
 1026                     /* fresh enough */
 1027                     gf_msg_trace(frame->this->name, 0,
 1028                                  "cache hit for "
 1029                                  "trav_offset=%" PRId64
 1030                                  "/local_"
 1031                                  "offset=%" PRId64 "",
 1032                                  trav_offset, local_offset);
 1033                     waitq = __ioc_page_wakeup(trav, trav->op_errno);
 1034                 } else {
 1035                     /* if waitq already exists, fstat
 1036                      * revalidate is
 1037                      * already on the way
 1038                      */
 1039                     if (!ioc_inode->waitq) {
 1040                         need_validate = 1;
 1041                     }
 1042 
 1043                     ret = ioc_wait_on_inode(ioc_inode, trav);
 1044                     if (ret < 0) {
 1045                         local->op_ret = -1;
 1046                         local->op_errno = -ret;
 1047                         need_validate = 0;
 1048 
 1049                         waitq = __ioc_page_wakeup(trav, trav->op_errno);
 1050                         ioc_inode_unlock(ioc_inode);
 1051 
 1052                         ioc_waitq_return(waitq);
 1053                         waitq = NULL;
 1054                         goto out;
 1055                     }
 1056                 }
 1057             }
 1058         }
 1059         ioc_inode_unlock(ioc_inode);
 1060 
 1061         ioc_waitq_return(waitq);
 1062         waitq = NULL;
 1063 
 1064         if (fault) {
 1065             fault = 0;
 1066             /* new page created, increase the table->cache_used */
 1067             ioc_page_fault(ioc_inode, frame, fd, trav_offset);
 1068         }
 1069 
 1070         if (need_validate) {
 1071             need_validate = 0;
 1072             gf_msg_trace(frame->this->name, 0,
 1073                          "sending validate request for "
 1074                          "inode(%s) at offset=%" PRId64 "",
 1075                          uuid_utoa(fd->inode->gfid), trav_offset);
 1076             ret = ioc_cache_validate(frame, ioc_inode, fd, trav);
 1077             if (ret == -1) {
 1078                 ioc_inode_lock(ioc_inode);
 1079                 {
 1080                     waitq = __ioc_page_wakeup(trav, trav->op_errno);
 1081                 }
 1082                 ioc_inode_unlock(ioc_inode);
 1083 
 1084                 ioc_waitq_return(waitq);
 1085                 waitq = NULL;
 1086                 goto out;
 1087             }
 1088         }
 1089 
 1090         trav_offset += table->page_size;
 1091     }
 1092 
 1093 out:
 1094     ioc_frame_return(frame);
 1095 
 1096     if (ioc_need_prune(ioc_inode->table)) {
 1097         ioc_prune(ioc_inode->table);
 1098     }
 1099 
 1100     return;
 1101 }
 1102 
 1103 /*
 1104  * ioc_readv -
 1105  *
 1106  * @frame:
 1107  * @this:
 1108  * @fd:
 1109  * @size:
 1110  * @offset:
 1111  *
 1112  */
 1113 int32_t
 1114 ioc_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
 1115           off_t offset, uint32_t flags, dict_t *xdata)
 1116 {
 1117     uint64_t tmp_ioc_inode = 0;
 1118     ioc_inode_t *ioc_inode = NULL;
 1119     ioc_local_t *local = NULL;
 1120     uint32_t weight = 0;
 1121     ioc_table_t *table = NULL;
 1122     int32_t op_errno = EINVAL;
 1123 
 1124     if (!this) {
 1125         goto out;
 1126     }
 1127 
 1128     inode_ctx_get(fd->inode, this, &tmp_ioc_inode);
 1129     ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
 1130     if (!ioc_inode) {
 1131         /* caching disabled, go ahead with normal readv */
 1132         STACK_WIND_TAIL(frame, FIRST_CHILD(this),
 1133                         FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
 1134                         xdata);
 1135         return 0;
 1136     }
 1137 
 1138     if (flags & O_DIRECT) {
 1139         /* disable caching for this fd, if O_DIRECT is used */
 1140         STACK_WIND_TAIL(frame, FIRST_CHILD(this),
 1141                         FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
 1142                         xdata);
 1143         return 0;
 1144     }
 1145 
 1146     table = this->private;
 1147 
 1148     if (!table) {
 1149         gf_smsg(this->name, GF_LOG_ERROR, EINVAL, IO_CACHE_MSG_TABLE_NULL,
 1150                 NULL);
 1151         op_errno = EINVAL;
 1152         goto out;
 1153     }
 1154 
 1155     ioc_inode_lock(ioc_inode);
 1156     {
 1157         if (!ioc_inode->cache.page_table) {
 1158             ioc_inode->cache.page_table = rbthash_table_init(
 1159                 this->ctx, IOC_PAGE_TABLE_BUCKET_COUNT, ioc_hashfn, NULL, 0,
 1160                 table->mem_pool);
 1161 
 1162             if (ioc_inode->cache.page_table == NULL) {
 1163                 op_errno = ENOMEM;
 1164                 ioc_inode_unlock(ioc_inode);
 1165                 goto out;
 1166             }
 1167         }
 1168     }
 1169     ioc_inode_unlock(ioc_inode);
 1170 
 1171     if (!fd_ctx_get(fd, this, NULL)) {
 1172         /* disable caching for this fd, go ahead with normal readv */
 1173         STACK_WIND_TAIL(frame, FIRST_CHILD(this),
 1174                         FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
 1175                         xdata);
 1176         return 0;
 1177     }
 1178 
 1179     local = mem_get0(this->local_pool);
 1180     if (local == NULL) {
 1181         gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL);
 1182         op_errno = ENOMEM;
 1183         goto out;
 1184     }
 1185 
 1186     INIT_LIST_HEAD(&local->fill_list);
 1187 
 1188     frame->local = local;
 1189     local->pending_offset = offset;
 1190     local->pending_size = size;
 1191     local->offset = offset;
 1192     local->size = size;
 1193     local->inode = ioc_inode;
 1194     local->xattr_req = dict_ref(xdata);
 1195 
 1196     gf_msg_trace(this->name, 0,
 1197                  "NEW REQ (%p) offset "
 1198                  "= %" PRId64 " && size = %" GF_PRI_SIZET "",
 1199                  frame, offset, size);
 1200 
 1201     weight = ioc_inode->weight;
 1202 
 1203     ioc_table_lock(ioc_inode->table);
 1204     {
 1205         list_move_tail(&ioc_inode->inode_lru,
 1206                        &ioc_inode->table->inode_lru[weight]);
 1207     }
 1208     ioc_table_unlock(ioc_inode->table);
 1209 
 1210     ioc_dispatch_requests(frame, ioc_inode, fd, offset, size);
 1211     return 0;
 1212 
 1213 out:
 1214     STACK_UNWIND_STRICT(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
 1215     return 0;
 1216 }
 1217 
 1218 /*
 1219  * ioc_writev_cbk -
 1220  *
 1221  * @frame:
 1222  * @cookie:
 1223  * @this:
 1224  * @op_ret:
 1225  * @op_errno:
 1226  *
 1227  */
 1228 int32_t
 1229 ioc_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
 1230                int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
 1231                struct iatt *postbuf, dict_t *xdata)
 1232 {
 1233     ioc_local_t *local = NULL;
 1234     uint64_t ioc_inode = 0;
 1235 
 1236     local = frame->local;
 1237     frame->local = NULL;
 1238     inode_ctx_get(local->fd->inode, this, &ioc_inode);
 1239 
 1240     if (op_ret >= 0) {
 1241         ioc_update_pages(frame, (ioc_inode_t *)(long)ioc_inode, local->vector,
 1242                          local->op_ret, op_ret, local->offset);
 1243     }
 1244 
 1245     STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf,
 1246                         xdata);
 1247     if (local->iobref) {
 1248         iobref_unref(local->iobref);
 1249         GF_FREE(local->vector);
 1250     }
 1251 
 1252     mem_put(local);
 1253     return 0;
 1254 }
 1255 
 1256 /*
 1257  * ioc_writev
 1258  *
 1259  * @frame:
 1260  * @this:
 1261  * @fd:
 1262  * @vector:
 1263  * @count:
 1264  * @offset:
 1265  *
 1266  */
 1267 int32_t
 1268 ioc_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
 1269            int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
 1270            dict_t *xdata)
 1271 {
 1272     ioc_local_t *local = NULL;
 1273     uint64_t ioc_inode = 0;
 1274 
 1275     local = mem_get0(this->local_pool);
 1276     if (local == NULL) {
 1277         gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL);
 1278 
 1279         STACK_UNWIND_STRICT(writev, frame, -1, ENOMEM, NULL, NULL, NULL);
 1280         return 0;
 1281     }
 1282 
 1283     /* TODO: why is it not fd_ref'ed */
 1284     local->fd = fd;
 1285     frame->local = local;
 1286 
 1287     inode_ctx_get(fd->inode, this, &ioc_inode);
 1288     if (ioc_inode) {
 1289         local->iobref = iobref_ref(iobref);
 1290         local->vector = iov_dup(vector, count);
 1291         local->op_ret = count;
 1292         local->offset = offset;
 1293     }
 1294 
 1295     STACK_WIND(frame, ioc_writev_cbk, FIRST_CHILD(this),
 1296                FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
 1297                flags, iobref, xdata);
 1298 
 1299     return 0;
 1300 }
 1301 
 1302 /*
 1303  * ioc_truncate_cbk -
 1304  *
 1305  * @frame:
 1306  * @cookie:
 1307  * @this:
 1308  * @op_ret:
 1309  * @op_errno:
 1310  * @buf:
 1311  *
 1312  */
 1313 int32_t
 1314 ioc_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
 1315                  int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
 1316                  struct iatt *postbuf, dict_t *xdata)
 1317 {
 1318     STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf,
 1319                         xdata);
 1320     return 0;
 1321 }
 1322 
 1323 /*
 1324  * ioc_ftruncate_cbk -
 1325  *
 1326  * @frame:
 1327  * @cookie:
 1328  * @this:
 1329  * @op_ret:
 1330  * @op_errno:
 1331  * @buf:
 1332  *
 1333  */
 1334 int32_t
 1335 ioc_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
 1336                   int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
 1337                   struct iatt *postbuf, dict_t *xdata)
 1338 {
 1339     STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
 1340                         xdata);
 1341     return 0;
 1342 }
 1343 
 1344 /*
 1345  * ioc_truncate -
 1346  *
 1347  * @frame:
 1348  * @this:
 1349  * @loc:
 1350  * @offset:
 1351  *
 1352  */
 1353 int32_t
 1354 ioc_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
 1355              dict_t *xdata)
 1356 {
 1357     uint64_t ioc_inode = 0;
 1358 
 1359     inode_ctx_get(loc->inode, this, &ioc_inode);
 1360 
 1361     if (ioc_inode)
 1362         ioc_inode_flush((ioc_inode_t *)(long)ioc_inode);
 1363 
 1364     STACK_WIND(frame, ioc_truncate_cbk, FIRST_CHILD(this),
 1365                FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
 1366     return 0;
 1367 }
 1368 
 1369 /*
 1370  * ioc_ftruncate -
 1371  *
 1372  * @frame:
 1373  * @this:
 1374  * @fd:
 1375  * @offset:
 1376  *
 1377  */
 1378 int32_t
 1379 ioc_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
 1380               dict_t *xdata)
 1381 {
 1382     uint64_t ioc_inode = 0;
 1383 
 1384     inode_ctx_get(fd->inode, this, &ioc_inode);
 1385 
 1386     if (ioc_inode)
 1387         ioc_inode_flush((ioc_inode_t *)(long)ioc_inode);
 1388 
 1389     STACK_WIND(frame, ioc_ftruncate_cbk, FIRST_CHILD(this),
 1390                FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
 1391     return 0;
 1392 }
 1393 
 1394 int32_t
 1395 ioc_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
 1396            int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
 1397 {
 1398     STACK_UNWIND_STRICT(lk, frame, op_ret, op_errno, lock, xdata);
 1399     return 0;
 1400 }
 1401 
 1402 int32_t
 1403 ioc_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
 1404        struct gf_flock *lock, dict_t *xdata)
 1405 {
 1406     ioc_inode_t *ioc_inode = NULL;
 1407     uint64_t tmp_inode = 0;
 1408     struct timeval tv = {
 1409         0,
 1410     };
 1411 
 1412     inode_ctx_get(fd->inode, this, &tmp_inode);
 1413     ioc_inode = (ioc_inode_t *)(long)tmp_inode;
 1414     if (!ioc_inode) {
 1415         gf_msg_debug(this->name, EBADFD,
 1416                      "inode context is NULL: returning EBADFD");
 1417         STACK_UNWIND_STRICT(lk, frame, -1, EBADFD, NULL, NULL);
 1418         return 0;
 1419     }
 1420 
 1421     gettimeofday(&tv, NULL);
 1422     ioc_inode_lock(ioc_inode);
 1423     {
 1424         memcpy(&ioc_inode->cache.tv, &tv, sizeof(struct timeval));
 1425     }
 1426     ioc_inode_unlock(ioc_inode);
 1427 
 1428     STACK_WIND(frame, ioc_lk_cbk, FIRST_CHILD(this),
 1429                FIRST_CHILD(this)->fops->lk, fd, cmd, lock, xdata);
 1430 
 1431     return 0;
 1432 }
 1433 
 1434 int
 1435 ioc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
 1436                  int op_errno, gf_dirent_t *entries, dict_t *xdata)
 1437 {
 1438     gf_dirent_t *entry = NULL;
 1439     char *path = NULL;
 1440     fd_t *fd = NULL;
 1441 
 1442     fd = frame->local;
 1443     frame->local = NULL;
 1444 
 1445     if (op_ret <= 0)
 1446         goto unwind;
 1447 
 1448     list_for_each_entry(entry, &entries->list, list)
 1449     {
 1450         inode_path(fd->inode, entry->d_name, &path);
 1451         ioc_inode_update(this, entry->inode, path, &entry->d_stat);
 1452         GF_FREE(path);
 1453         path = NULL;
 1454     }
 1455 
 1456 unwind:
 1457     STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata);
 1458 
 1459     return 0;
 1460 }
 1461 
 1462 int
 1463 ioc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
 1464              off_t offset, dict_t *dict)
 1465 {
 1466     frame->local = fd;
 1467 
 1468     STACK_WIND(frame, ioc_readdirp_cbk, FIRST_CHILD(this),
 1469                FIRST_CHILD(this)->fops->readdirp, fd, size, offset, dict);
 1470 
 1471     return 0;
 1472 }
 1473 
 1474 static int32_t
 1475 ioc_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
 1476                 int32_t op_ret, int32_t op_errno, struct iatt *pre,
 1477                 struct iatt *post, dict_t *xdata)
 1478 {
 1479     STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, pre, post, xdata);
 1480     return 0;
 1481 }
 1482 
 1483 static int32_t
 1484 ioc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
 1485             size_t len, dict_t *xdata)
 1486 {
 1487     uint64_t ioc_inode = 0;
 1488 
 1489     inode_ctx_get(fd->inode, this, &ioc_inode);
 1490 
 1491     if (ioc_inode)
 1492         ioc_inode_flush((ioc_inode_t *)(long)ioc_inode);
 1493 
 1494     STACK_WIND(frame, ioc_discard_cbk, FIRST_CHILD(this),
 1495                FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
 1496     return 0;
 1497 }
 1498 
 1499 static int32_t
 1500 ioc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
 1501                  int32_t op_ret, int32_t op_errno, struct iatt *pre,
 1502                  struct iatt *post, dict_t *xdata)
 1503 {
 1504     STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, pre, post, xdata);
 1505     return 0;
 1506 }
 1507 
 1508 static int32_t
 1509 ioc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
 1510              off_t len, dict_t *xdata)
 1511 {
 1512     uint64_t ioc_inode = 0;
 1513 
 1514     inode_ctx_get(fd->inode, this, &ioc_inode);
 1515 
 1516     if (ioc_inode)
 1517         ioc_inode_flush((ioc_inode_t *)(long)ioc_inode);
 1518 
 1519     STACK_WIND(frame, ioc_zerofill_cbk, FIRST_CHILD(this),
 1520                FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
 1521     return 0;
 1522 }
 1523 
 1524 int32_t
 1525 ioc_get_priority_list(const char *opt_str, struct list_head *first)
 1526 {
 1527     int32_t max_pri = 1;
 1528     char *tmp_str = NULL;
 1529     char *tmp_str1 = NULL;
 1530     char *tmp_str2 = NULL;
 1531     char *dup_str = NULL;
 1532     char *stripe_str = NULL;
 1533     char *pattern = NULL;
 1534     char *priority = NULL;
 1535     char *string = NULL;
 1536     struct ioc_priority *curr = NULL, *tmp = NULL;
 1537 
 1538     string = gf_strdup(opt_str);
 1539     if (string == NULL) {
 1540         max_pri = -1;
 1541         goto out;
 1542     }
 1543 
 1544     /* Get the pattern for cache priority.
 1545      * "option priority *.jpg:1,abc*:2" etc
 1546      */
 1547     /* TODO: inode_lru in table is statically hard-coded to 5,
 1548      * should be changed to run-time configuration
 1549      */
 1550     stripe_str = strtok_r(string, ",", &tmp_str);
 1551     while (stripe_str) {
 1552         curr = GF_CALLOC(1, sizeof(struct ioc_priority),
 1553                          gf_ioc_mt_ioc_priority);
 1554         if (curr == NULL) {
 1555             max_pri = -1;
 1556             goto out;
 1557         }
 1558 
 1559         list_add_tail(&curr->list, first);
 1560 
 1561         dup_str = gf_strdup(stripe_str);
 1562         if (dup_str == NULL) {
 1563             max_pri = -1;
 1564             goto out;
 1565         }
 1566 
 1567         pattern = strtok_r(dup_str, ":", &tmp_str1);
 1568         if (!pattern) {
 1569             max_pri = -1;
 1570             goto out;
 1571         }
 1572 
 1573         priority = strtok_r(NULL, ":", &tmp_str1);
 1574         if (!priority) {
 1575             max_pri = -1;
 1576             goto out;
 1577         }
 1578 
 1579         gf_msg_trace("io-cache", 0, "ioc priority : pattern %s : priority %s",
 1580                      pattern, priority);
 1581 
 1582         curr->pattern = gf_strdup(pattern);
 1583         if (curr->pattern == NULL) {
 1584             max_pri = -1;
 1585             goto out;
 1586         }
 1587 
 1588         curr->priority = strtol(priority, &tmp_str2, 0);
 1589         if (tmp_str2 && (*tmp_str2)) {
 1590             max_pri = -1;
 1591             goto out;
 1592         } else {
 1593             max_pri = max(max_pri, curr->priority);
 1594         }
 1595 
 1596         GF_FREE(dup_str);
 1597         dup_str = NULL;
 1598 
 1599         stripe_str = strtok_r(NULL, ",", &tmp_str);
 1600     }
 1601 out:
 1602     GF_FREE(string);
 1603 
 1604     GF_FREE(dup_str);
 1605 
 1606     if (max_pri == -1) {
 1607         list_for_each_entry_safe(curr, tmp, first, list)
 1608         {
 1609             list_del_init(&curr->list);
 1610             GF_FREE(curr->pattern);
 1611             GF_FREE(curr);
 1612         }
 1613     }
 1614 
 1615     return max_pri;
 1616 }
 1617 
 1618 int32_t
 1619 mem_acct_init(xlator_t *this)
 1620 {
 1621     int ret = -1;
 1622 
 1623     if (!this)
 1624         return ret;
 1625 
 1626     ret = xlator_mem_acct_init(this, gf_ioc_mt_end + 1);
 1627 
 1628     if (ret != 0) {
 1629         gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
 1630                 IO_CACHE_MSG_MEMORY_INIT_FAILED, NULL);
 1631         return ret;
 1632     }
 1633 
 1634     return ret;
 1635 }
 1636 
 1637 static gf_boolean_t
 1638 check_cache_size_ok(xlator_t *this, uint64_t cache_size)
 1639 {
 1640     gf_boolean_t ret = _gf_true;
 1641     uint64_t total_mem = 0;
 1642     uint64_t max_cache_size = 0;
 1643     volume_option_t *opt = NULL;
 1644 
 1645     GF_ASSERT(this);
 1646     opt = xlator_volume_option_get(this, "cache-size");
 1647     if (!opt) {
 1648         ret = _gf_false;
 1649         gf_smsg(this->name, GF_LOG_ERROR, EINVAL,
 1650                 IO_CACHE_MSG_NO_CACHE_SIZE_OPT, NULL);
 1651         goto out;
 1652     }
 1653 
 1654     total_mem = get_mem_size();
 1655     if (-1 == total_mem)
 1656         max_cache_size = opt->max;
 1657     else
 1658         max_cache_size = total_mem;
 1659 
 1660     gf_msg_debug(this->name, 0, "Max cache size is %" PRIu64, max_cache_size);
 1661 
 1662     if (cache_size > max_cache_size) {
 1663         ret = _gf_false;
 1664         gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_INVALID_ARGUMENT,
 1665                 "Cache-size=%" PRIu64, cache_size, "max-size=%" PRIu64,
 1666                 max_cache_size, NULL);
 1667         goto out;
 1668     }
 1669 out:
 1670     return ret;
 1671 }
 1672 
 1673 int
 1674 reconfigure(xlator_t *this, dict_t *options)
 1675 {
 1676     data_t *data = NULL;
 1677     ioc_table_t *table = NULL;
 1678     int ret = -1;
 1679     uint64_t cache_size_new = 0;
 1680     if (!this || !this->private)
 1681         goto out;
 1682 
 1683     table = this->private;
 1684 
 1685     ioc_table_lock(table);
 1686     {
 1687         GF_OPTION_RECONF("pass-through", this->pass_through, options, bool,
 1688                          unlock);
 1689 
 1690         GF_OPTION_RECONF("cache-timeout", table->cache_timeout, options, int32,
 1691                          unlock);
 1692 
 1693         data = dict_get(options, "priority");
 1694         if (data) {
 1695             char *option_list = data_to_str(data);
 1696 
 1697             gf_msg_trace(this->name, 0, "option path %s", option_list);
 1698             /* parse the list of pattern:priority */
 1699             table->max_pri = ioc_get_priority_list(option_list,
 1700                                                    &table->priority_list);
 1701 
 1702             if (table->max_pri == -1) {
 1703                 goto unlock;
 1704             }
 1705             table->max_pri++;
 1706         }
 1707 
 1708         GF_OPTION_RECONF("max-file-size", table->max_file_size, options,
 1709                          size_uint64, unlock);
 1710 
 1711         GF_OPTION_RECONF("min-file-size", table->min_file_size, options,
 1712                          size_uint64, unlock);
 1713 
 1714         if ((table->max_file_size <= UINT64_MAX) &&
 1715             (table->min_file_size > table->max_file_size)) {
 1716             gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_DEFAULTING_TO_OLD,
 1717                     "minimum-size=%" PRIu64, table->min_file_size,
 1718                     "maximum-size=%" PRIu64, table->max_file_size, NULL);
 1719             goto unlock;
 1720         }
 1721 
 1722         GF_OPTION_RECONF("cache-size", cache_size_new, options, size_uint64,
 1723                          unlock);
 1724         if (!check_cache_size_ok(this, cache_size_new)) {
 1725             ret = -1;
 1726             gf_smsg(this->name, GF_LOG_ERROR, 0,
 1727                     IO_CACHE_MSG_NOT_RECONFIG_CACHE_SIZE, NULL);
 1728             goto unlock;
 1729         }
 1730         table->cache_size = cache_size_new;
 1731 
 1732         ret = 0;
 1733     }
 1734 unlock:
 1735     ioc_table_unlock(table);
 1736 out:
 1737     return ret;
 1738 }
 1739 
 1740 /*
 1741  * init -
 1742  * @this:
 1743  *
 1744  */
 1745 int32_t
 1746 init(xlator_t *this)
 1747 {
 1748     ioc_table_t *table = NULL;
 1749     dict_t *xl_options = NULL;
 1750     uint32_t index = 0;
 1751     int32_t ret = -1;
 1752     glusterfs_ctx_t *ctx = NULL;
 1753     data_t *data = 0;
 1754     uint32_t num_pages = 0;
 1755 
 1756     xl_options = this->options;
 1757 
 1758     if (!this->children || this->children->next) {
 1759         gf_smsg(this->name, GF_LOG_ERROR, 0,
 1760                 IO_CACHE_MSG_XLATOR_CHILD_MISCONFIGURED, NULL);
 1761         goto out;
 1762     }
 1763 
 1764     if (!this->parents) {
 1765         gf_smsg(this->name, GF_LOG_WARNING, 0, IO_CACHE_MSG_VOL_MISCONFIGURED,
 1766                 NULL);
 1767     }
 1768 
 1769     table = (void *)GF_CALLOC(1, sizeof(*table), gf_ioc_mt_ioc_table_t);
 1770     if (table == NULL) {
 1771         gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL);
 1772         goto out;
 1773     }
 1774 
 1775     table->xl = this;
 1776     table->page_size = this->ctx->page_size;
 1777 
 1778     GF_OPTION_INIT("pass-through", this->pass_through, bool, out);
 1779 
 1780     GF_OPTION_INIT("cache-size", table->cache_size, size_uint64, out);
 1781 
 1782     GF_OPTION_INIT("cache-timeout", table->cache_timeout, int32, out);
 1783 
 1784     GF_OPTION_INIT("min-file-size", table->min_file_size, size_uint64, out);
 1785 
 1786     GF_OPTION_INIT("max-file-size", table->max_file_size, size_uint64, out);
 1787 
 1788     if (!check_cache_size_ok(this, table->cache_size)) {
 1789         ret = -1;
 1790         goto out;
 1791     }
 1792 
 1793     INIT_LIST_HEAD(&table->priority_list);
 1794     table->max_pri = 1;
 1795     data = dict_get(xl_options, "priority");
 1796     if (data) {
 1797         char *option_list = data_to_str(data);
 1798         gf_msg_trace(this->name, 0, "option path %s", option_list);
 1799         /* parse the list of pattern:priority */
 1800         table->max_pri = ioc_get_priority_list(option_list,
 1801                                                &table->priority_list);
 1802 
 1803         if (table->max_pri == -1) {
 1804             goto out;
 1805         }
 1806     }
 1807     table->max_pri++;
 1808 
 1809     INIT_LIST_HEAD(&table->inodes);
 1810 
 1811     if ((table->max_file_size <= UINT64_MAX) &&
 1812         (table->min_file_size > table->max_file_size)) {
 1813         gf_smsg("io-cache", GF_LOG_ERROR, 0, IO_CACHE_MSG_INVALID_ARGUMENT,
 1814                 "minimum-size=%" PRIu64, table->min_file_size,
 1815                 "maximum-size=%" PRIu64, table->max_file_size, NULL);
 1816         goto out;
 1817     }
 1818 
 1819     table->inode_lru = GF_CALLOC(table->max_pri, sizeof(struct list_head),
 1820                                  gf_ioc_mt_list_head);
 1821     if (table->inode_lru == NULL) {
 1822         goto out;
 1823     }
 1824 
 1825     for (index = 0; index < (table->max_pri); index++)
 1826         INIT_LIST_HEAD(&table->inode_lru[index]);
 1827 
 1828     this->local_pool = mem_pool_new(ioc_local_t, 64);
 1829     if (!this->local_pool) {
 1830         ret = -1;
 1831         gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
 1832                 IO_CACHE_MSG_CREATE_MEM_POOL_FAILED, NULL);
 1833         goto out;
 1834     }
 1835 
 1836     pthread_mutex_init(&table->table_lock, NULL);
 1837     this->private = table;
 1838 
 1839     num_pages = (table->cache_size / table->page_size) +
 1840                 ((table->cache_size % table->page_size) ? 1 : 0);
 1841 
 1842     table->mem_pool = mem_pool_new(rbthash_entry_t, num_pages);
 1843     if (!table->mem_pool) {
 1844         gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
 1845                 IO_CACHE_MSG_ALLOC_MEM_POOL_FAILED, NULL);
 1846         goto out;
 1847     }
 1848 
 1849     ret = 0;
 1850 
 1851     ctx = this->ctx;
 1852     ioc_log2_page_size = log_base2(ctx->page_size);
 1853 
 1854 out:
 1855     if (ret == -1) {
 1856         if (table != NULL) {
 1857             GF_FREE(table->inode_lru);
 1858             GF_FREE(table);
 1859         }
 1860     }
 1861 
 1862     return ret;
 1863 }
 1864 
 1865 void
 1866 ioc_page_waitq_dump(ioc_page_t *page, char *prefix)
 1867 {
 1868     ioc_waitq_t *trav = NULL;
 1869     call_frame_t *frame = NULL;
 1870     int32_t i = 0;
 1871     char key[GF_DUMP_MAX_BUF_LEN] = {
 1872         0,
 1873     };
 1874 
 1875     trav = page->waitq;
 1876 
 1877     while (trav) {
 1878         frame = trav->data;
 1879         sprintf(key, "waitq.frame[%d]", i++);
 1880         gf_proc_dump_write(key, "%" PRId64, frame->root->unique);
 1881 
 1882         trav = trav->next;
 1883     }
 1884 }
 1885 
 1886 void
 1887 __ioc_inode_waitq_dump(ioc_inode_t *ioc_inode, char *prefix)
 1888 {
 1889     ioc_waitq_t *trav = NULL;
 1890     ioc_page_t *page = NULL;
 1891     int32_t i = 0;
 1892     char key[GF_DUMP_MAX_BUF_LEN] = {
 1893         0,
 1894     };
 1895 
 1896     trav = ioc_inode->waitq;
 1897 
 1898     while (trav) {
 1899         page = trav->data;
 1900 
 1901         sprintf(key, "cache-validation-waitq.page[%d].offset", i++);
 1902         gf_proc_dump_write(key, "%" PRId64, page->offset);
 1903 
 1904         trav = trav->next;
 1905     }
 1906 }
 1907 
 1908 void
 1909 __ioc_page_dump(ioc_page_t *page, char *prefix)
 1910 {
 1911     int ret = -1;
 1912 
 1913     if (!page)
 1914         return;
 1915     /* ioc_page_lock can be used to hold the mutex. But in statedump
 1916      * its better to use trylock to avoid deadlocks.
 1917      */
 1918     ret = pthread_mutex_trylock(&page->page_lock);
 1919     if (ret)
 1920         goto out;
 1921     {
 1922         gf_proc_dump_write("offset", "%" PRId64, page->offset);
 1923         gf_proc_dump_write("size", "%" GF_PRI_SIZET, page->size);
 1924         gf_proc_dump_write("dirty", "%s", page->dirty ? "yes" : "no");
 1925         gf_proc_dump_write("ready", "%s", page->ready ? "yes" : "no");
 1926         ioc_page_waitq_dump(page, prefix);
 1927     }
 1928     pthread_mutex_unlock(&page->page_lock);
 1929 
 1930 out:
 1931     if (ret && page)
 1932         gf_proc_dump_write("Unable to dump the page information",
 1933                            "(Lock acquisition failed) %p", page);
 1934 
 1935     return;
 1936 }
 1937 
 1938 void
 1939 __ioc_cache_dump(ioc_inode_t *ioc_inode, char *prefix)
 1940 {
 1941     off_t offset = 0;
 1942     ioc_table_t *table = NULL;
 1943     ioc_page_t *page = NULL;
 1944     int i = 0;
 1945     char key[GF_DUMP_MAX_BUF_LEN] = {
 1946         0,
 1947     };
 1948     char timestr[256] = {
 1949         0,
 1950     };
 1951 
 1952     if ((ioc_inode == NULL) || (prefix == NULL)) {
 1953         goto out;
 1954     }
 1955 
 1956     table = ioc_inode->table;
 1957 
 1958     if (ioc_inode->cache.tv.tv_sec) {
 1959         gf_time_fmt(timestr, sizeof timestr, ioc_inode->cache.tv.tv_sec,
 1960                     gf_timefmt_FT);
 1961         snprintf(timestr + strlen(timestr), sizeof timestr - strlen(timestr),
 1962                  ".%" GF_PRI_SUSECONDS, ioc_inode->cache.tv.tv_usec);
 1963 
 1964         gf_proc_dump_write("last-cache-validation-time", "%s", timestr);
 1965     }
 1966 
 1967     for (offset = 0; offset < ioc_inode->ia_size; offset += table->page_size) {
 1968         page = __ioc_page_get(ioc_inode, offset);
 1969         if (page == NULL) {
 1970             continue;
 1971         }
 1972 
 1973         sprintf(key, "inode.cache.page[%d]", i++);
 1974         __ioc_page_dump(page, key);
 1975     }
 1976 out:
 1977     return;
 1978 }
 1979 
 1980 int
 1981 ioc_inode_dump(xlator_t *this, inode_t *inode)
 1982 {
 1983     char *path = NULL;
 1984     int ret = -1;
 1985     char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
 1986         0,
 1987     };
 1988     uint64_t tmp_ioc_inode = 0;
 1989     ioc_inode_t *ioc_inode = NULL;
 1990     gf_boolean_t section_added = _gf_false;
 1991     char uuid_str[64] = {
 1992         0,
 1993     };
 1994 
 1995     if (this == NULL || inode == NULL)
 1996         goto out;
 1997 
 1998     gf_proc_dump_build_key(key_prefix, "io-cache", "inode");
 1999 
 2000     inode_ctx_get(inode, this, &tmp_ioc_inode);
 2001     ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
 2002     if (ioc_inode == NULL)
 2003         goto out;
 2004 
 2005     /* Similar to ioc_page_dump function its better to use
 2006      * pthread_mutex_trylock and not to use gf_log in statedump
 2007      * to avoid deadlocks.
 2008      */
 2009     ret = pthread_mutex_trylock(&ioc_inode->inode_lock);
 2010     if (ret)
 2011         goto out;
 2012 
 2013     {
 2014         if (gf_uuid_is_null(ioc_inode->inode->gfid))
 2015             goto unlock;
 2016 
 2017         gf_proc_dump_add_section("%s", key_prefix);
 2018         section_added = _gf_true;
 2019 
 2020         __inode_path(ioc_inode->inode, NULL, &path);
 2021 
 2022         gf_proc_dump_write("inode.weight", "%d", ioc_inode->weight);
 2023 
 2024         if (path) {
 2025             gf_proc_dump_write("path", "%s", path);
 2026             GF_FREE(path);
 2027         }
 2028 
 2029         gf_proc_dump_write("uuid", "%s",
 2030                            uuid_utoa_r(ioc_inode->inode->gfid, uuid_str));
 2031         __ioc_cache_dump(ioc_inode, key_prefix);
 2032         __ioc_inode_waitq_dump(ioc_inode, key_prefix);
 2033     }
 2034 unlock:
 2035     pthread_mutex_unlock(&ioc_inode->inode_lock);
 2036 
 2037 out:
 2038     if (ret && ioc_inode) {
 2039         if (section_added == _gf_false)
 2040             gf_proc_dump_add_section("%s", key_prefix);
 2041         gf_proc_dump_write("Unable to print the status of ioc_inode",
 2042                            "(Lock acquisition failed) %s",
 2043                            uuid_utoa(inode->gfid));
 2044     }
 2045     return ret;
 2046 }
 2047 
 2048 int
 2049 ioc_priv_dump(xlator_t *this)
 2050 {
 2051     ioc_table_t *priv = NULL;
 2052     char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
 2053         0,
 2054     };
 2055     int ret = -1;
 2056     gf_boolean_t add_section = _gf_false;
 2057 
 2058     if (!this || !this->private)
 2059         goto out;
 2060 
 2061     priv = this->private;
 2062 
 2063     gf_proc_dump_build_key(key_prefix, "io-cache", "priv");
 2064     gf_proc_dump_add_section("%s", key_prefix);
 2065     add_section = _gf_true;
 2066 
 2067     ret = pthread_mutex_trylock(&priv->table_lock);
 2068     if (ret)
 2069         goto out;
 2070     {
 2071         gf_proc_dump_write("page_size", "%" PRIu64, priv->page_size);
 2072         gf_proc_dump_write("cache_size", "%" PRIu64, priv->cache_size);
 2073         gf_proc_dump_write("cache_used", "%" PRIu64, priv->cache_used);
 2074         gf_proc_dump_write("inode_count", "%u", priv->inode_count);
 2075         gf_proc_dump_write("cache_timeout", "%u", priv->cache_timeout);
 2076         gf_proc_dump_write("min-file-size", "%" PRIu64, priv->min_file_size);
 2077         gf_proc_dump_write("max-file-size", "%" PRIu64, priv->max_file_size);
 2078     }
 2079     pthread_mutex_unlock(&priv->table_lock);
 2080 out:
 2081     if (ret && priv) {
 2082         if (!add_section) {
 2083             gf_proc_dump_build_key(key_prefix,
 2084                                    "xlator."
 2085                                    "performance.io-cache",
 2086                                    "priv");
 2087             gf_proc_dump_add_section("%s", key_prefix);
 2088         }
 2089         gf_proc_dump_write(
 2090             "Unable to dump the state of private "
 2091             "structure of io-cache xlator",
 2092             "(Lock "
 2093             "acquisition failed) %s",
 2094             this->name);
 2095     }
 2096 
 2097     return 0;
 2098 }
 2099 
 2100 /*
 2101  * fini -
 2102  *
 2103  * @this:
 2104  *
 2105  */
 2106 void
 2107 fini(xlator_t *this)
 2108 {
 2109     ioc_table_t *table = NULL;
 2110     struct ioc_priority *curr = NULL, *tmp = NULL;
 2111 
 2112     table = this->private;
 2113 
 2114     if (table == NULL)
 2115         return;
 2116 
 2117     this->private = NULL;
 2118 
 2119     if (table->mem_pool != NULL) {
 2120         mem_pool_destroy(table->mem_pool);
 2121         table->mem_pool = NULL;
 2122     }
 2123 
 2124     list_for_each_entry_safe(curr, tmp, &table->priority_list, list)
 2125     {
 2126         list_del_init(&curr->list);
 2127         GF_FREE(curr->pattern);
 2128         GF_FREE(curr);
 2129     }
 2130 
 2131     /* inode_lru and inodes list can be empty in case fini() is
 2132      * called soon after init()? Hence commenting the below asserts.
 2133      */
 2134     /*for (i = 0; i < table->max_pri; i++) {
 2135             GF_ASSERT (list_empty (&table->inode_lru[i]));
 2136     }
 2137 
 2138     GF_ASSERT (list_empty (&table->inodes));
 2139     */
 2140     pthread_mutex_destroy(&table->table_lock);
 2141     GF_FREE(table);
 2142 
 2143     this->private = NULL;
 2144     return;
 2145 }
 2146 
 2147 struct xlator_fops fops = {
 2148     .open = ioc_open,
 2149     .create = ioc_create,
 2150     .readv = ioc_readv,
 2151     .writev = ioc_writev,
 2152     .truncate = ioc_truncate,
 2153     .ftruncate = ioc_ftruncate,
 2154     .lookup = ioc_lookup,
 2155     .lk = ioc_lk,
 2156     .setattr = ioc_setattr,
 2157     .mknod = ioc_mknod,
 2158 
 2159     .readdirp = ioc_readdirp,
 2160     .discard = ioc_discard,
 2161     .zerofill = ioc_zerofill,
 2162 };
 2163 
 2164 struct xlator_dumpops dumpops = {
 2165     .priv = ioc_priv_dump,
 2166     .inodectx = ioc_inode_dump,
 2167 };
 2168 
 2169 struct xlator_cbks cbks = {
 2170     .forget = ioc_forget,
 2171     .release = ioc_release,
 2172     .invalidate = ioc_invalidate,
 2173 };
 2174 
 2175 struct volume_options options[] = {
 2176     {
 2177         .key = {"io-cache"},
 2178         .type = GF_OPTION_TYPE_BOOL,
 2179         .default_value = "off",
 2180         .description = "enable/disable io-cache",
 2181         .op_version = {GD_OP_VERSION_6_0},
 2182         .flags = OPT_FLAG_SETTABLE,
 2183     },
 2184     {.key = {"priority"},
 2185      .type = GF_OPTION_TYPE_PRIORITY_LIST,
 2186      .default_value = "",
 2187      .description = "Assigns priority to filenames with specific "
 2188                     "patterns so that when a page needs to be ejected "
 2189                     "out of the cache, the page of a file whose "
 2190                     "priority is the lowest will be ejected earlier",
 2191      .op_version = {1},
 2192      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
 2193     {.key = {"cache-timeout", "force-revalidate-timeout"},
 2194      .type = GF_OPTION_TYPE_INT,
 2195      .min = 0,
 2196      .max = 60,
 2197      .default_value = "1",
 2198      .description = "The cached data for a file will be retained for "
 2199                     "'cache-refresh-timeout' seconds, after which data "
 2200                     "re-validation is performed.",
 2201      .op_version = {1},
 2202      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
 2203     {.key = {"cache-size"},
 2204      .type = GF_OPTION_TYPE_SIZET,
 2205      .min = 4 * GF_UNIT_MB,
 2206      .max = INFINITY,
 2207      .default_value = "32MB",
 2208      .description = "Size of the read cache.",
 2209      .op_version = {1},
 2210      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
 2211     {.key = {"min-file-size"},
 2212      .type = GF_OPTION_TYPE_SIZET,
 2213      .default_value = "0",
 2214      .description = "Minimum file size which would be cached by the "
 2215                     "io-cache translator.",
 2216      .op_version = {1},
 2217      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
 2218     {.key = {"max-file-size"},
 2219      .type = GF_OPTION_TYPE_SIZET,
 2220      .default_value = "0",
 2221      .description = "Maximum file size which would be cached by the "
 2222                     "io-cache translator.",
 2223      .op_version = {1},
 2224      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
 2225     {.key = {"pass-through"},
 2226      .type = GF_OPTION_TYPE_BOOL,
 2227      .default_value = "false",
 2228      .op_version = {GD_OP_VERSION_4_1_0},
 2229      .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
 2230      .tags = {"io-cache"},
 2231      .description = "Enable/Disable io cache translator"},
 2232     {.key = {NULL}},
 2233 };
 2234 
 2235 xlator_api_t xlator_api = {
 2236     .init = init,
 2237     .fini = fini,
 2238     .reconfigure = reconfigure,
 2239     .mem_acct_init = mem_acct_init,
 2240     .op_version = {1}, /* Present from the initial version */
 2241     .dumpops = &dumpops,
 2242     .fops = &fops,
 2243     .cbks = &cbks,
 2244     .options = options,
 2245     .identifier = "io-cache",
 2246     .category = GF_MAINTAINED,
 2247 };