"Fossies" - the Fresh Open Source Software Archive

Member "glusterfs-8.2/xlators/features/changelog/src/changelog-helpers.c" (16 Sep 2020, 57843 Bytes) of package /linux/misc/glusterfs-8.2.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "changelog-helpers.c" see the Fossies "Dox" file reference documentation.

    1 /*
    2    Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
    3    This file is part of GlusterFS.
    4 
    5    This file is licensed to you under your choice of the GNU Lesser
    6    General Public License, version 3 or any later version (LGPLv3 or
    7    later), or the GNU General Public License, version 2 (GPLv2), in all
    8    cases as published by the Free Software Foundation.
    9 */
   10 
   11 #include <glusterfs/xlator.h>
   12 #include <glusterfs/defaults.h>
   13 #include <glusterfs/logging.h>
   14 #include <glusterfs/iobuf.h>
   15 #include <glusterfs/syscall.h>
   16 
   17 #include "changelog-helpers.h"
   18 #include "changelog-encoders.h"
   19 #include "changelog-mem-types.h"
   20 #include "changelog-messages.h"
   21 
   22 #include "changelog-encoders.h"
   23 #include "changelog-rpc-common.h"
   24 #include <pthread.h>
   25 #include <time.h>
   26 
   27 static void
   28 changelog_cleanup_free_mutex(void *arg_mutex)
   29 {
   30     pthread_mutex_t *p_mutex = (pthread_mutex_t *)arg_mutex;
   31 
   32     if (p_mutex)
   33         pthread_mutex_unlock(p_mutex);
   34 }
   35 
   36 int
   37 changelog_thread_cleanup(xlator_t *this, pthread_t thr_id)
   38 {
   39     int ret = 0;
   40     void *retval = NULL;
   41 
   42     /* send a cancel request to the thread */
   43     ret = pthread_cancel(thr_id);
   44     if (ret != 0) {
   45         gf_smsg(this->name, GF_LOG_ERROR, errno,
   46                 CHANGELOG_MSG_PTHREAD_CANCEL_FAILED, NULL);
   47         goto out;
   48     }
   49 
   50     ret = pthread_join(thr_id, &retval);
   51     if ((ret != 0) || (retval != PTHREAD_CANCELED)) {
   52         gf_smsg(this->name, GF_LOG_ERROR, errno,
   53                 CHANGELOG_MSG_PTHREAD_CANCEL_FAILED, NULL);
   54     }
   55 
   56 out:
   57     return ret;
   58 }
   59 
   60 void *
   61 changelog_get_usable_buffer(changelog_local_t *local)
   62 {
   63     changelog_log_data_t *cld = NULL;
   64 
   65     if (!local)
   66         return NULL;
   67 
   68     cld = &local->cld;
   69     if (!cld->cld_iobuf)
   70         return NULL;
   71 
   72     return cld->cld_iobuf->ptr;
   73 }
   74 
   75 static int
   76 changelog_selector_index(unsigned int selector)
   77 {
   78     return (ffs(selector) - 1);
   79 }
   80 
   81 int
   82 changelog_ev_selected(xlator_t *this, changelog_ev_selector_t *selection,
   83                       unsigned int selector)
   84 {
   85     int idx = 0;
   86 
   87     idx = changelog_selector_index(selector);
   88     gf_msg_debug(this->name, 0, "selector ref count for %d (idx: %d): %d",
   89                  selector, idx, selection->ref[idx]);
   90     /* this can be lockless */
   91     return (idx < CHANGELOG_EV_SELECTION_RANGE && (selection->ref[idx] > 0));
   92 }
   93 
   94 void
   95 changelog_select_event(xlator_t *this, changelog_ev_selector_t *selection,
   96                        unsigned int selector)
   97 {
   98     int idx = 0;
   99 
  100     LOCK(&selection->reflock);
  101     {
  102         while (selector) {
  103             idx = changelog_selector_index(selector);
  104             if (idx < CHANGELOG_EV_SELECTION_RANGE) {
  105                 selection->ref[idx]++;
  106                 gf_msg_debug(this->name, 0, "selecting event %d", idx);
  107             }
  108             selector &= ~(1 << idx);
  109         }
  110     }
  111     UNLOCK(&selection->reflock);
  112 }
  113 
  114 void
  115 changelog_deselect_event(xlator_t *this, changelog_ev_selector_t *selection,
  116                          unsigned int selector)
  117 {
  118     int idx = 0;
  119 
  120     LOCK(&selection->reflock);
  121     {
  122         while (selector) {
  123             idx = changelog_selector_index(selector);
  124             if (idx < CHANGELOG_EV_SELECTION_RANGE) {
  125                 selection->ref[idx]--;
  126                 gf_msg_debug(this->name, 0, "de-selecting event %d", idx);
  127             }
  128             selector &= ~(1 << idx);
  129         }
  130     }
  131     UNLOCK(&selection->reflock);
  132 }
  133 
  134 int
  135 changelog_init_event_selection(xlator_t *this,
  136                                changelog_ev_selector_t *selection)
  137 {
  138     int ret = 0;
  139     int j = CHANGELOG_EV_SELECTION_RANGE;
  140 
  141     ret = LOCK_INIT(&selection->reflock);
  142     if (ret != 0)
  143         return -1;
  144 
  145     LOCK(&selection->reflock);
  146     {
  147         while (j--) {
  148             selection->ref[j] = 0;
  149         }
  150     }
  151     UNLOCK(&selection->reflock);
  152 
  153     return 0;
  154 }
  155 
  156 static void
  157 changelog_perform_dispatch(xlator_t *this, changelog_priv_t *priv, void *mem,
  158                            size_t size)
  159 {
  160     char *buf = NULL;
  161     void *opaque = NULL;
  162 
  163     buf = rbuf_reserve_write_area(priv->rbuf, size, &opaque);
  164     if (!buf) {
  165         gf_msg_callingfn(this->name, GF_LOG_WARNING, 0,
  166                          CHANGELOG_MSG_DISPATCH_EVENT_FAILED,
  167                          "failed to dispatch event");
  168         return;
  169     }
  170 
  171     memcpy(buf, mem, size);
  172     rbuf_write_complete(opaque);
  173 }
  174 
  175 void
  176 changelog_dispatch_event(xlator_t *this, changelog_priv_t *priv,
  177                          changelog_event_t *ev)
  178 {
  179     changelog_ev_selector_t *selection = NULL;
  180 
  181     selection = &priv->ev_selection;
  182     if (changelog_ev_selected(this, selection, ev->ev_type)) {
  183         changelog_perform_dispatch(this, priv, ev, CHANGELOG_EV_SIZE);
  184     }
  185 }
  186 
  187 void
  188 changelog_set_usable_record_and_length(changelog_local_t *local, size_t len,
  189                                        int xr)
  190 {
  191     changelog_log_data_t *cld = NULL;
  192 
  193     cld = &local->cld;
  194 
  195     cld->cld_ptr_len = len;
  196     cld->cld_xtra_records = xr;
  197 }
  198 
  199 void
  200 changelog_local_cleanup(xlator_t *xl, changelog_local_t *local)
  201 {
  202     int i = 0;
  203     changelog_opt_t *co = NULL;
  204     changelog_log_data_t *cld = NULL;
  205 
  206     if (!local)
  207         return;
  208 
  209     cld = &local->cld;
  210 
  211     /* cleanup dynamic allocation for extra records */
  212     if (cld->cld_xtra_records) {
  213         co = (changelog_opt_t *)cld->cld_ptr;
  214         for (; i < cld->cld_xtra_records; i++, co++)
  215             if (co->co_free)
  216                 co->co_free(co);
  217     }
  218 
  219     CHANGELOG_IOBUF_UNREF(cld->cld_iobuf);
  220 
  221     if (local->inode)
  222         inode_unref(local->inode);
  223 
  224     mem_put(local);
  225 }
  226 
  227 int
  228 changelog_write(int fd, char *buffer, size_t len)
  229 {
  230     ssize_t size = 0;
  231     size_t written = 0;
  232 
  233     while (written < len) {
  234         size = sys_write(fd, buffer + written, len - written);
  235         if (size <= 0)
  236             break;
  237 
  238         written += size;
  239     }
  240 
  241     return (written != len);
  242 }
  243 
  244 int
  245 htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts,
  246              char *buffer)
  247 {
  248     char changelog_path[PATH_MAX + 1] = {
  249         0,
  250     };
  251     int len = -1;
  252     char x_value[25] = {
  253         0,
  254     };
  255     /* time stamp(10) + : (1) + rolltime (12 ) + buffer (2) */
  256     int ret = 0;
  257 
  258     if (priv->htime_fd == -1) {
  259         gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR,
  260                 "reason=fd not available", NULL);
  261         ret = -1;
  262         goto out;
  263     }
  264     len = snprintf(changelog_path, PATH_MAX, "%s", buffer);
  265     if (len >= PATH_MAX) {
  266         ret = -1;
  267         goto out;
  268     }
  269     if (changelog_write(priv->htime_fd, (void *)changelog_path, len + 1) < 0) {
  270         gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR,
  271                 "reason=write failed", NULL);
  272         ret = -1;
  273         goto out;
  274     }
  275 
  276     len = snprintf(x_value, sizeof(x_value), "%lu:%d", ts,
  277                    priv->rollover_count);
  278     if (len >= sizeof(x_value)) {
  279         ret = -1;
  280         goto out;
  281     }
  282 
  283     if (sys_fsetxattr(priv->htime_fd, HTIME_KEY, x_value, len, XATTR_REPLACE)) {
  284         gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_ERROR,
  285                 "reason=xattr updation failed", "XATTR_REPLACE=true",
  286                 "changelog=%s", changelog_path, NULL);
  287 
  288         if (sys_fsetxattr(priv->htime_fd, HTIME_KEY, x_value, len, 0)) {
  289             gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_ERROR,
  290                     "reason=xattr updation failed", "changelog=%s",
  291                     changelog_path, NULL);
  292             ret = -1;
  293             goto out;
  294         }
  295     }
  296 
  297     priv->rollover_count += 1;
  298 
  299 out:
  300     return ret;
  301 }
  302 
  303 /*
  304  * Description: Check if the changelog to rollover is empty or not.
  305  * It is assumed that fd passed is already verified.
  306  *
  307  * Returns:
  308  * 1 : If found empty, changed path from "CHANGELOG.<TS>" to "changelog.<TS>"
  309  * 0 : If NOT empty, proceed usual.
  310  */
  311 int
  312 cl_is_empty(xlator_t *this, int fd)
  313 {
  314     int ret = -1;
  315     size_t elen = 0;
  316     int encoding = -1;
  317     char buffer[1024] = {
  318         0,
  319     };
  320     struct stat stbuf = {
  321         0,
  322     };
  323     int major_version = -1;
  324     int minor_version = -1;
  325 
  326     ret = sys_fstat(fd, &stbuf);
  327     if (ret) {
  328         gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSTAT_OP_FAILED,
  329                 NULL);
  330         goto out;
  331     }
  332 
  333     ret = sys_lseek(fd, 0, SEEK_SET);
  334     if (ret == -1) {
  335         gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_LSEEK_OP_FAILED,
  336                 NULL);
  337         goto out;
  338     }
  339 
  340     CHANGELOG_GET_HEADER_INFO(fd, buffer, sizeof(buffer), encoding,
  341                               major_version, minor_version, elen);
  342 
  343     if (elen == stbuf.st_size) {
  344         ret = 1;
  345     } else {
  346         ret = 0;
  347     }
  348 
  349 out:
  350     return ret;
  351 }
  352 
  353 /*
  354  * Description: Updates "CHANGELOG" to "changelog" for writing changelog path
  355  * to htime file.
  356  *
  357  * Returns:
  358  * 0  : Success
  359  * -1 : Error
  360  */
  361 int
  362 update_path(xlator_t *this, char *cl_path)
  363 {
  364     const char low_cl[] = "changelog";
  365     const char up_cl[] = "CHANGELOG";
  366     char *found = NULL;
  367     int ret = -1;
  368 
  369     found = strstr(cl_path, up_cl);
  370 
  371     if (found == NULL) {
  372         gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PATH_NOT_FOUND,
  373                 NULL);
  374         goto out;
  375     } else {
  376         memcpy(found, low_cl, sizeof(low_cl) - 1);
  377     }
  378 
  379     ret = 0;
  380 out:
  381     return ret;
  382 }
  383 
  384 static int
  385 changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv,
  386                              unsigned long ts)
  387 {
  388     int ret = -1;
  389     int notify = 0;
  390     int cl_empty_flag = 0;
  391     struct tm *gmt;
  392     char yyyymmdd[40];
  393     char ofile[PATH_MAX] = {
  394         0,
  395     };
  396     char nfile[PATH_MAX] = {
  397         0,
  398     };
  399     char nfile_dir[PATH_MAX] = {
  400         0,
  401     };
  402     changelog_event_t ev = {
  403         0,
  404     };
  405 
  406     if (priv->changelog_fd != -1) {
  407         ret = sys_fsync(priv->changelog_fd);
  408         if (ret < 0) {
  409             gf_smsg(this->name, GF_LOG_ERROR, errno,
  410                     CHANGELOG_MSG_FSYNC_OP_FAILED, NULL);
  411         }
  412         ret = cl_is_empty(this, priv->changelog_fd);
  413         if (ret == 1) {
  414             cl_empty_flag = 1;
  415         } else if (ret == -1) {
  416             /* Log error but proceed as usual */
  417             gf_smsg(this->name, GF_LOG_WARNING, 0,
  418                     CHANGELOG_MSG_DETECT_EMPTY_CHANGELOG_FAILED, NULL);
  419         }
  420         sys_close(priv->changelog_fd);
  421         priv->changelog_fd = -1;
  422     }
  423 
  424     time_t time = (time_t)ts;
  425 
  426     /* Get GMT time */
  427     gmt = gmtime(&time);
  428 
  429     strftime(yyyymmdd, sizeof(yyyymmdd), "%Y/%m/%d", gmt);
  430 
  431     (void)snprintf(ofile, PATH_MAX, "%s/" CHANGELOG_FILE_NAME,
  432                    priv->changelog_dir);
  433     (void)snprintf(nfile, PATH_MAX, "%s/%s/" CHANGELOG_FILE_NAME ".%lu",
  434                    priv->changelog_dir, yyyymmdd, ts);
  435     (void)snprintf(nfile_dir, PATH_MAX, "%s/%s", priv->changelog_dir, yyyymmdd);
  436 
  437     if (cl_empty_flag == 1) {
  438         ret = sys_unlink(ofile);
  439         if (ret) {
  440             gf_smsg(this->name, GF_LOG_ERROR, errno,
  441                     CHANGELOG_MSG_UNLINK_OP_FAILED, "path=%s", ofile, NULL);
  442             ret = 0; /* Error in unlinking empty changelog should
  443                         not break further changelog operation, so
  444                         reset return value to 0*/
  445         }
  446     } else {
  447         ret = sys_rename(ofile, nfile);
  448 
  449         /* Changelog file rename gets ENOENT when parent dir doesn't exist */
  450         if (errno == ENOENT) {
  451             ret = mkdir_p(nfile_dir, 0600, _gf_true);
  452 
  453             if ((ret == -1) && (EEXIST != errno)) {
  454                 gf_smsg(this->name, GF_LOG_ERROR, errno,
  455                         CHANGELOG_MSG_MKDIR_ERROR, "%s", nfile_dir, NULL);
  456                 goto out;
  457             }
  458 
  459             ret = sys_rename(ofile, nfile);
  460         }
  461 
  462         if (ret && (errno == ENOENT)) {
  463             ret = 0;
  464             goto out;
  465         }
  466         if (ret) {
  467             gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_RENAME_ERROR,
  468                     "from=%s", ofile, "to=%s", nfile, NULL);
  469         }
  470     }
  471 
  472     if (!ret && (cl_empty_flag == 0)) {
  473         notify = 1;
  474     }
  475 
  476     if (!ret) {
  477         if (cl_empty_flag) {
  478             update_path(this, nfile);
  479         }
  480         ret = htime_update(this, priv, ts, nfile);
  481         if (ret == -1) {
  482             gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR,
  483                     NULL);
  484             goto out;
  485         }
  486     }
  487 
  488     if (notify) {
  489         ev.ev_type = CHANGELOG_OP_TYPE_JOURNAL;
  490         memcpy(ev.u.journal.path, nfile, strlen(nfile) + 1);
  491         changelog_dispatch_event(this, priv, &ev);
  492     }
  493 out:
  494     /* If this is explicit rollover initiated by snapshot,
  495      * wakeup reconfigure thread waiting for changelog to
  496      * rollover. This should happen even in failure cases as
  497      * well otherwise snapshot will timeout and fail. Hence
  498      * moved under out.
  499      */
  500     if (priv->explicit_rollover) {
  501         priv->explicit_rollover = _gf_false;
  502 
  503         pthread_mutex_lock(&priv->bn.bnotify_mutex);
  504         {
  505             if (ret) {
  506                 priv->bn.bnotify_error = _gf_true;
  507                 gf_smsg(this->name, GF_LOG_ERROR, 0,
  508                         CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED, NULL);
  509             } else {
  510                 gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BNOTIFY_INFO,
  511                         "changelog=%s", nfile, NULL);
  512             }
  513             priv->bn.bnotify = _gf_false;
  514             pthread_cond_signal(&priv->bn.bnotify_cond);
  515         }
  516         pthread_mutex_unlock(&priv->bn.bnotify_mutex);
  517     }
  518     return ret;
  519 }
  520 
  521 int
  522 filter_cur_par_dirs(const struct dirent *entry)
  523 {
  524     if (entry == NULL)
  525         return 0;
  526 
  527     if ((strcmp(entry->d_name, ".") == 0) || (strcmp(entry->d_name, "..") == 0))
  528         return 0;
  529     else
  530         return 1;
  531 }
  532 
  533 /*
  534  * find_current_htime:
  535  *       It finds the latest htime file and sets the HTIME_CURRENT
  536  *       xattr.
  537  *       RETURN VALUE:
  538  *           -1 : Error
  539  *           ret: Number of directory entries;
  540  */
  541 
  542 int
  543 find_current_htime(int ht_dir_fd, const char *ht_dir_path, char *ht_file_bname)
  544 {
  545     struct dirent **namelist = NULL;
  546     int ret = 0;
  547     int cnt = 0;
  548     int i = 0;
  549     xlator_t *this = NULL;
  550 
  551     this = THIS;
  552     GF_ASSERT(this);
  553     GF_ASSERT(ht_dir_path);
  554 
  555     cnt = scandir(ht_dir_path, &namelist, filter_cur_par_dirs, alphasort);
  556     if (cnt < 0) {
  557         gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_SCAN_DIR_FAILED,
  558                 NULL);
  559     } else if (cnt > 0) {
  560         if (snprintf(ht_file_bname, NAME_MAX, "%s",
  561                      namelist[cnt - 1]->d_name) >= NAME_MAX) {
  562             ret = -1;
  563             goto out;
  564         }
  565         if (sys_fsetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname,
  566                           strlen(ht_file_bname), 0)) {
  567             gf_smsg(this->name, GF_LOG_ERROR, errno,
  568                     CHANGELOG_MSG_FSETXATTR_FAILED, "HTIME_CURRENT", NULL);
  569             ret = -1;
  570             goto out;
  571         }
  572 
  573         if (sys_fsync(ht_dir_fd) < 0) {
  574             gf_smsg(this->name, GF_LOG_ERROR, errno,
  575                     CHANGELOG_MSG_FSYNC_OP_FAILED, NULL);
  576             ret = -1;
  577             goto out;
  578         }
  579     }
  580 
  581 out:
  582     for (i = 0; i < cnt; i++)
  583         free(namelist[i]);
  584     free(namelist);
  585 
  586     if (ret)
  587         cnt = ret;
  588 
  589     return cnt;
  590 }
  591 
  592 /* Returns 0 on successful open of htime file
  593  * returns -1 on failure or error
  594  */
  595 int
  596 htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
  597 {
  598     int ht_file_fd = -1;
  599     int ht_dir_fd = -1;
  600     int ret = 0;
  601     int cnt = 0;
  602     char ht_dir_path[PATH_MAX] = {
  603         0,
  604     };
  605     char ht_file_path[PATH_MAX] = {
  606         0,
  607     };
  608     char ht_file_bname[NAME_MAX] = {
  609         0,
  610     };
  611     char x_value[NAME_MAX] = {
  612         0,
  613     };
  614     int flags = 0;
  615     unsigned long min_ts = 0;
  616     unsigned long max_ts = 0;
  617     unsigned long total = 0;
  618     unsigned long total1 = 0;
  619     ssize_t size = 0;
  620     struct stat stat_buf = {
  621         0,
  622     };
  623     unsigned long record_len = 0;
  624     int32_t len = 0;
  625 
  626     CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, ht_dir_path);
  627 
  628     /* Open htime directory to get HTIME_CURRENT */
  629     ht_dir_fd = open(ht_dir_path, O_RDONLY);
  630     if (ht_dir_fd == -1) {
  631         gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED,
  632                 "path=%s", ht_dir_path, NULL);
  633         ret = -1;
  634         goto out;
  635     }
  636 
  637     size = sys_fgetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname,
  638                          sizeof(ht_file_bname));
  639     if (size < 0) {
  640         gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FGETXATTR_FAILED,
  641                 "name=HTIME_CURRENT", NULL);
  642 
  643         /* If upgrade scenario, find the latest HTIME.TSTAMP file
  644          * and use the same. If error, create a new HTIME.TSTAMP
  645          * file.
  646          */
  647         cnt = find_current_htime(ht_dir_fd, ht_dir_path, ht_file_bname);
  648         if (cnt <= 0) {
  649             gf_smsg(this->name, GF_LOG_INFO, errno,
  650                     CHANGELOG_MSG_NO_HTIME_CURRENT, NULL);
  651             sys_close(ht_dir_fd);
  652             return htime_create(this, priv, ts);
  653         }
  654 
  655         gf_smsg(this->name, GF_LOG_ERROR, errno,
  656                 CHANGELOG_MSG_HTIME_CURRENT_ERROR, NULL);
  657     }
  658 
  659     gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_HTIME_CURRENT, "path=%s",
  660             ht_file_bname, NULL);
  661     len = snprintf(ht_file_path, PATH_MAX, "%s/%s", ht_dir_path, ht_file_bname);
  662     if ((len < 0) || (len >= PATH_MAX)) {
  663         ret = -1;
  664         goto out;
  665     }
  666 
  667     /* Open in append mode as existing htime file is used */
  668     flags |= (O_RDWR | O_SYNC | O_APPEND);
  669     ht_file_fd = open(ht_file_path, flags,
  670                       S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
  671     if (ht_file_fd < 0) {
  672         gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED,
  673                 "path=%s", ht_file_path, NULL);
  674         ret = -1;
  675         goto out;
  676     }
  677 
  678     /* save this htime_fd in priv->htime_fd */
  679     priv->htime_fd = ht_file_fd;
  680 
  681     ret = sys_fstat(ht_file_fd, &stat_buf);
  682     if (ret < 0) {
  683         gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_STAT_ERROR,
  684                 "path=%s", ht_file_path, NULL);
  685         ret = -1;
  686         goto out;
  687     }
  688 
  689     /* Initialize rollover-number in priv to current number */
  690     size = sys_fgetxattr(ht_file_fd, HTIME_KEY, x_value, sizeof(x_value));
  691     if (size < 0) {
  692         gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FGETXATTR_FAILED,
  693                 "name=%s", HTIME_KEY, "path=%s", ht_file_path, NULL);
  694         ret = -1;
  695         goto out;
  696     }
  697 
  698     sscanf(x_value, "%lu:%lu", &max_ts, &total);
  699 
  700     /* 22 = 1(/) + 20(CHANGELOG.TIMESTAMP) + 1(\x00) */
  701     record_len = strlen(priv->changelog_dir) + 22;
  702     total1 = stat_buf.st_size / record_len;
  703     if (total != total1) {
  704         gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_TOTAL_LOG_INFO,
  705                 "xattr_total=%lu", total, "size_total=%lu", total1, NULL);
  706     }
  707 
  708     gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_TOTAL_LOG_INFO, "min=%lu",
  709             min_ts, "max=%lu", max_ts, "total_changelogs=%lu", total, NULL);
  710 
  711     if (total < total1)
  712         priv->rollover_count = total1 + 1;
  713     else
  714         priv->rollover_count = total + 1;
  715 
  716 out:
  717     if (ht_dir_fd != -1)
  718         sys_close(ht_dir_fd);
  719     return ret;
  720 }
  721 
  722 /* Returns 0 on successful creation of htime file
  723  * returns -1 on failure or error
  724  */
  725 int
  726 htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
  727 {
  728     int ht_file_fd = -1;
  729     int ht_dir_fd = -1;
  730     int ret = 0;
  731     char ht_dir_path[PATH_MAX] = {
  732         0,
  733     };
  734     char ht_file_path[PATH_MAX] = {
  735         0,
  736     };
  737     char ht_file_bname[NAME_MAX + 1] = {
  738         0,
  739     };
  740     int flags = 0;
  741     int32_t len = 0;
  742 
  743     gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_NEW_HTIME_FILE,
  744             "name=%lu", ts, NULL);
  745 
  746     CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, ht_dir_path);
  747 
  748     /* get the htime file name in ht_file_path */
  749     len = snprintf(ht_file_path, PATH_MAX, "%s/%s.%lu", ht_dir_path,
  750                    HTIME_FILE_NAME, ts);
  751     if ((len < 0) || (len >= PATH_MAX)) {
  752         ret = -1;
  753         goto out;
  754     }
  755 
  756     flags |= (O_CREAT | O_RDWR | O_SYNC);
  757     ht_file_fd = open(ht_file_path, flags,
  758                       S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
  759     if (ht_file_fd < 0) {
  760         gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED,
  761                 "path=%s", ht_file_path, NULL);
  762         ret = -1;
  763         goto out;
  764     }
  765 
  766     if (sys_fsetxattr(ht_file_fd, HTIME_KEY, HTIME_INITIAL_VALUE,
  767                       sizeof(HTIME_INITIAL_VALUE) - 1, 0)) {
  768         gf_smsg(this->name, GF_LOG_ERROR, errno,
  769                 CHANGELOG_MSG_XATTR_INIT_FAILED, NULL);
  770         ret = -1;
  771         goto out;
  772     }
  773 
  774     ret = sys_fsync(ht_file_fd);
  775     if (ret < 0) {
  776         gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSYNC_OP_FAILED,
  777                 NULL);
  778         goto out;
  779     }
  780 
  781     /* save this htime_fd in priv->htime_fd */
  782     priv->htime_fd = ht_file_fd;
  783 
  784     ht_file_fd = -1;
  785 
  786     /* Set xattr HTIME_CURRENT on htime directory to htime filename */
  787     ht_dir_fd = open(ht_dir_path, O_RDONLY);
  788     if (ht_dir_fd == -1) {
  789         gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED,
  790                 "path=%s", ht_dir_path, NULL);
  791         ret = -1;
  792         goto out;
  793     }
  794 
  795     (void)snprintf(ht_file_bname, sizeof(ht_file_bname), "%s.%lu",
  796                    HTIME_FILE_NAME, ts);
  797     if (sys_fsetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname,
  798                       strlen(ht_file_bname), 0)) {
  799         gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSETXATTR_FAILED,
  800                 " HTIME_CURRENT", NULL);
  801         ret = -1;
  802         goto out;
  803     }
  804 
  805     ret = sys_fsync(ht_dir_fd);
  806     if (ret < 0) {
  807         gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSYNC_OP_FAILED,
  808                 NULL);
  809         goto out;
  810     }
  811 
  812     /* initialize rollover-number in priv to 1 */
  813     priv->rollover_count = 1;
  814 
  815 out:
  816     if (ht_dir_fd != -1)
  817         sys_close(ht_dir_fd);
  818     if (ht_file_fd != -1)
  819         sys_close(ht_file_fd);
  820     return ret;
  821 }
  822 
  823 /* Description:
  824  *      Opens the snap changelog to log call path fops in it.
  825  *      This changelos name is "CHANGELOG.SNAP", stored in
  826  *      path ".glusterfs/changelogs/csnap".
  827  * Returns:
  828  *       0  : On success.
  829  *      -1  : On failure.
  830  */
  831 int
  832 changelog_snap_open(xlator_t *this, changelog_priv_t *priv)
  833 {
  834     int fd = -1;
  835     int ret = 0;
  836     int flags = 0;
  837     char buffer[1024] = {
  838         0,
  839     };
  840     char c_snap_path[PATH_MAX] = {
  841         0,
  842     };
  843     char csnap_dir_path[PATH_MAX] = {
  844         0,
  845     };
  846     int32_t len = 0;
  847 
  848     CHANGELOG_FILL_CSNAP_DIR(priv->changelog_dir, csnap_dir_path);
  849 
  850     len = snprintf(c_snap_path, PATH_MAX, "%s/" CSNAP_FILE_NAME,
  851                    csnap_dir_path);
  852     if ((len < 0) || (len >= PATH_MAX)) {
  853         ret = -1;
  854         goto out;
  855     }
  856 
  857     flags |= (O_CREAT | O_RDWR | O_TRUNC);
  858 
  859     fd = open(c_snap_path, flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
  860     if (fd < 0) {
  861         gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED,
  862                 "path=%s", c_snap_path, NULL);
  863         ret = -1;
  864         goto out;
  865     }
  866     priv->c_snap_fd = fd;
  867 
  868     (void)snprintf(buffer, 1024, CHANGELOG_HEADER, CHANGELOG_VERSION_MAJOR,
  869                    CHANGELOG_VERSION_MINOR, priv->ce->encoder);
  870     ret = changelog_snap_write_change(priv, buffer, strlen(buffer));
  871     if (ret < 0) {
  872         sys_close(priv->c_snap_fd);
  873         priv->c_snap_fd = -1;
  874         goto out;
  875     }
  876 
  877 out:
  878     return ret;
  879 }
  880 
  881 /*
  882  * Description:
  883  *      Starts logging fop details in CSNAP journal.
  884  * Returns:
  885  *       0 : On success.
  886  *      -1 : On Failure.
  887  */
  888 int
  889 changelog_snap_logging_start(xlator_t *this, changelog_priv_t *priv)
  890 {
  891     int ret = 0;
  892 
  893     ret = changelog_snap_open(this, priv);
  894     gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_SNAP_INFO, "starting",
  895             NULL);
  896 
  897     return ret;
  898 }
  899 
  900 /*
  901  * Description:
  902  *      Stops logging fop details in CSNAP journal.
  903  * Returns:
  904  *       0 : On success.
  905  *      -1 : On Failure.
  906  */
  907 int
  908 changelog_snap_logging_stop(xlator_t *this, changelog_priv_t *priv)
  909 {
  910     int ret = 0;
  911 
  912     sys_close(priv->c_snap_fd);
  913     priv->c_snap_fd = -1;
  914 
  915     gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_SNAP_INFO, "Stopped",
  916             NULL);
  917 
  918     return ret;
  919 }
  920 
  921 int
  922 changelog_open_journal(xlator_t *this, changelog_priv_t *priv)
  923 {
  924     int fd = 0;
  925     int ret = -1;
  926     int flags = 0;
  927     char buffer[1024] = {
  928         0,
  929     };
  930     char changelog_path[PATH_MAX] = {
  931         0,
  932     };
  933 
  934     (void)snprintf(changelog_path, PATH_MAX, "%s/" CHANGELOG_FILE_NAME,
  935                    priv->changelog_dir);
  936 
  937     flags |= (O_CREAT | O_RDWR);
  938     if (priv->fsync_interval == 0)
  939         flags |= O_SYNC;
  940 
  941     fd = open(changelog_path, flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
  942     if (fd < 0) {
  943         gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED,
  944                 "path=%s", changelog_path, NULL);
  945         goto out;
  946     }
  947 
  948     priv->changelog_fd = fd;
  949 
  950     (void)snprintf(buffer, 1024, CHANGELOG_HEADER, CHANGELOG_VERSION_MAJOR,
  951                    CHANGELOG_VERSION_MINOR, priv->ce->encoder);
  952     ret = changelog_write_change(priv, buffer, strlen(buffer));
  953     if (ret) {
  954         sys_close(priv->changelog_fd);
  955         priv->changelog_fd = -1;
  956         goto out;
  957     }
  958 
  959     ret = 0;
  960 
  961 out:
  962     return ret;
  963 }
  964 
  965 int
  966 changelog_start_next_change(xlator_t *this, changelog_priv_t *priv,
  967                             unsigned long ts, gf_boolean_t finale)
  968 {
  969     int ret = -1;
  970 
  971     ret = changelog_rollover_changelog(this, priv, ts);
  972 
  973     if (!ret && !finale)
  974         ret = changelog_open_journal(this, priv);
  975 
  976     return ret;
  977 }
  978 
  979 /**
  980  * return the length of entry
  981  */
  982 size_t
  983 changelog_entry_length()
  984 {
  985     return sizeof(changelog_log_data_t);
  986 }
  987 
  988 int
  989 changelog_fill_rollover_data(changelog_log_data_t *cld, gf_boolean_t is_last)
  990 {
  991     struct timeval tv = {
  992         0,
  993     };
  994 
  995     cld->cld_type = CHANGELOG_TYPE_ROLLOVER;
  996 
  997     if (gettimeofday(&tv, NULL))
  998         return -1;
  999 
 1000     cld->cld_roll_time = (unsigned long)tv.tv_sec;
 1001     cld->cld_finale = is_last;
 1002     return 0;
 1003 }
 1004 
 1005 int
 1006 changelog_snap_write_change(changelog_priv_t *priv, char *buffer, size_t len)
 1007 {
 1008     return changelog_write(priv->c_snap_fd, buffer, len);
 1009 }
 1010 
 1011 int
 1012 changelog_write_change(changelog_priv_t *priv, char *buffer, size_t len)
 1013 {
 1014     return changelog_write(priv->changelog_fd, buffer, len);
 1015 }
 1016 
 1017 /*
 1018  * Descriptions:
 1019  *      Writes fop details in ascii format to CSNAP.
 1020  * Issues:
 1021  *      Not Encoding agnostic.
 1022  * Returns:
 1023  *      0 : On Success.
 1024  *     -1 : On Failure.
 1025  */
 1026 int
 1027 changelog_snap_handle_ascii_change(xlator_t *this, changelog_log_data_t *cld)
 1028 {
 1029     size_t off = 0;
 1030     size_t gfid_len = 0;
 1031     char *gfid_str = NULL;
 1032     char *buffer = NULL;
 1033     changelog_priv_t *priv = NULL;
 1034     int ret = 0;
 1035 
 1036     if (this == NULL) {
 1037         ret = -1;
 1038         goto out;
 1039     }
 1040 
 1041     priv = this->private;
 1042 
 1043     if (priv == NULL) {
 1044         ret = -1;
 1045         goto out;
 1046     }
 1047 
 1048     gfid_str = uuid_utoa(cld->cld_gfid);
 1049     gfid_len = strlen(gfid_str);
 1050 
 1051     /*  extra bytes for decorations */
 1052     buffer = alloca(gfid_len + cld->cld_ptr_len + 10);
 1053     CHANGELOG_STORE_ASCII(priv, buffer, off, gfid_str, gfid_len, cld);
 1054 
 1055     CHANGELOG_FILL_BUFFER(buffer, off, "\0", 1);
 1056 
 1057     ret = changelog_snap_write_change(priv, buffer, off);
 1058 
 1059     if (ret < 0) {
 1060         gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_WRITE_FAILED,
 1061                 "csnap", NULL);
 1062     }
 1063     gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_WROTE_TO_CSNAP, NULL);
 1064     ret = 0;
 1065 out:
 1066     return ret;
 1067 }
 1068 
 1069 int
 1070 changelog_handle_change(xlator_t *this, changelog_priv_t *priv,
 1071                         changelog_log_data_t *cld)
 1072 {
 1073     int ret = 0;
 1074 
 1075     if (CHANGELOG_TYPE_IS_ROLLOVER(cld->cld_type)) {
 1076         changelog_encode_change(priv);
 1077         ret = changelog_start_next_change(this, priv, cld->cld_roll_time,
 1078                                           cld->cld_finale);
 1079         if (ret)
 1080             gf_smsg(this->name, GF_LOG_ERROR, 0,
 1081                     CHANGELOG_MSG_GET_TIME_OP_FAILED, NULL);
 1082         goto out;
 1083     }
 1084 
 1085     /**
 1086      * case when there is reconfigure done (disabling changelog) and there
 1087      * are still fops that have updates in prgress.
 1088      */
 1089     if (priv->changelog_fd == -1)
 1090         return 0;
 1091 
 1092     if (CHANGELOG_TYPE_IS_FSYNC(cld->cld_type)) {
 1093         ret = sys_fsync(priv->changelog_fd);
 1094         if (ret < 0) {
 1095             gf_smsg(this->name, GF_LOG_ERROR, errno,
 1096                     CHANGELOG_MSG_FSYNC_OP_FAILED, NULL);
 1097         }
 1098         goto out;
 1099     }
 1100 
 1101     ret = priv->ce->encode(this, cld);
 1102     if (ret) {
 1103         gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_WRITE_FAILED,
 1104                 "changelog", NULL);
 1105     }
 1106 
 1107 out:
 1108     return ret;
 1109 }
 1110 
 1111 changelog_local_t *
 1112 changelog_local_init(xlator_t *this, inode_t *inode, uuid_t gfid,
 1113                      int xtra_records, gf_boolean_t update_flag)
 1114 {
 1115     changelog_local_t *local = NULL;
 1116     struct iobuf *iobuf = NULL;
 1117 
 1118     /**
 1119      * We relax the presence of inode if @update_flag is true.
 1120      * The caller (implementation of the fop) needs to be careful to
 1121      * not blindly use local->inode.
 1122      */
 1123     if (!update_flag && !inode) {
 1124         gf_msg_callingfn(this->name, GF_LOG_WARNING, 0,
 1125                          CHANGELOG_MSG_INODE_NOT_FOUND,
 1126                          "inode needed for version checking !!!");
 1127 
 1128         goto out;
 1129     }
 1130 
 1131     if (xtra_records) {
 1132         iobuf = iobuf_get2(this->ctx->iobuf_pool,
 1133                            xtra_records * CHANGELOG_OPT_RECORD_LEN);
 1134         if (!iobuf)
 1135             goto out;
 1136     }
 1137 
 1138     local = mem_get0(this->local_pool);
 1139     if (!local) {
 1140         CHANGELOG_IOBUF_UNREF(iobuf);
 1141         goto out;
 1142     }
 1143 
 1144     local->update_no_check = update_flag;
 1145 
 1146     gf_uuid_copy(local->cld.cld_gfid, gfid);
 1147 
 1148     local->cld.cld_iobuf = iobuf;
 1149     local->cld.cld_xtra_records = 0; /* set by the caller */
 1150 
 1151     if (inode)
 1152         local->inode = inode_ref(inode);
 1153 
 1154 out:
 1155     return local;
 1156 }
 1157 
 1158 int
 1159 changelog_forget(xlator_t *this, inode_t *inode)
 1160 {
 1161     uint64_t ctx_addr = 0;
 1162     changelog_inode_ctx_t *ctx = NULL;
 1163 
 1164     inode_ctx_del(inode, this, &ctx_addr);
 1165     if (!ctx_addr)
 1166         return 0;
 1167 
 1168     ctx = (changelog_inode_ctx_t *)(long)ctx_addr;
 1169     GF_FREE(ctx);
 1170 
 1171     return 0;
 1172 }
 1173 
 1174 int
 1175 changelog_inject_single_event(xlator_t *this, changelog_priv_t *priv,
 1176                               changelog_log_data_t *cld)
 1177 {
 1178     return priv->cd.dispatchfn(this, priv, priv->cd.cd_data, cld, NULL);
 1179 }
 1180 
 1181 /* Wait till all the black fops are drained */
 1182 void
 1183 changelog_drain_black_fops(xlator_t *this, changelog_priv_t *priv)
 1184 {
 1185     int ret = 0;
 1186 
 1187     /* clean up framework of pthread_mutex is required here as
 1188      * 'reconfigure' terminates the changelog_rollover thread
 1189      * on graph change.
 1190      */
 1191     pthread_cleanup_push(changelog_cleanup_free_mutex,
 1192                          &priv->dm.drain_black_mutex);
 1193     ret = pthread_mutex_lock(&priv->dm.drain_black_mutex);
 1194     if (ret)
 1195         gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR,
 1196                 "error=%d", ret, NULL);
 1197     while (priv->dm.black_fop_cnt > 0) {
 1198         gf_msg_debug(this->name, 0, "Conditional wait on black fops: %ld",
 1199                      priv->dm.black_fop_cnt);
 1200         priv->dm.drain_wait_black = _gf_true;
 1201         ret = pthread_cond_wait(&priv->dm.drain_black_cond,
 1202                                 &priv->dm.drain_black_mutex);
 1203         if (ret)
 1204             gf_smsg(this->name, GF_LOG_ERROR, errno,
 1205                     CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED, "error=%d", ret,
 1206                     NULL);
 1207     }
 1208     priv->dm.drain_wait_black = _gf_false;
 1209     ret = pthread_mutex_unlock(&priv->dm.drain_black_mutex);
 1210     if (ret)
 1211         gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR,
 1212                 "error=%d", ret, NULL);
 1213     pthread_cleanup_pop(0);
 1214     gf_msg_debug(this->name, 0, "Woke up: Conditional wait on black fops");
 1215 }
 1216 
 1217 /* Wait till all the white  fops are drained */
 1218 void
 1219 changelog_drain_white_fops(xlator_t *this, changelog_priv_t *priv)
 1220 {
 1221     int ret = 0;
 1222 
 1223     /* clean up framework of pthread_mutex is required here as
 1224      * 'reconfigure' terminates the changelog_rollover thread
 1225      * on graph change.
 1226      */
 1227     pthread_cleanup_push(changelog_cleanup_free_mutex,
 1228                          &priv->dm.drain_white_mutex);
 1229     ret = pthread_mutex_lock(&priv->dm.drain_white_mutex);
 1230     if (ret)
 1231         gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR,
 1232                 "error=%d", ret, NULL);
 1233     while (priv->dm.white_fop_cnt > 0) {
 1234         gf_msg_debug(this->name, 0, "Conditional wait on white fops : %ld",
 1235                      priv->dm.white_fop_cnt);
 1236         priv->dm.drain_wait_white = _gf_true;
 1237         ret = pthread_cond_wait(&priv->dm.drain_white_cond,
 1238                                 &priv->dm.drain_white_mutex);
 1239         if (ret)
 1240             gf_smsg(this->name, GF_LOG_ERROR, errno,
 1241                     CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED, "error=%d", ret,
 1242                     NULL);
 1243     }
 1244     priv->dm.drain_wait_white = _gf_false;
 1245     ret = pthread_mutex_unlock(&priv->dm.drain_white_mutex);
 1246     if (ret)
 1247         gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR,
 1248                 "error=%d", ret, NULL);
 1249     pthread_cleanup_pop(0);
 1250     gf_msg_debug(this->name, 0, "Woke up: Conditional wait on white fops");
 1251 }
 1252 
 1253 /**
 1254  * TODO: these threads have many thing in common (wake up after
 1255  * a certain time etc..). move them into separate routine.
 1256  */
 1257 void *
 1258 changelog_rollover(void *data)
 1259 {
 1260     int ret = 0;
 1261     xlator_t *this = NULL;
 1262     struct timespec tv = {
 1263         0,
 1264     };
 1265     changelog_log_data_t cld = {
 1266         0,
 1267     };
 1268     changelog_time_slice_t *slice = NULL;
 1269     changelog_priv_t *priv = data;
 1270 
 1271     this = priv->cr.this;
 1272     slice = &priv->slice;
 1273 
 1274     while (1) {
 1275         (void)pthread_testcancel();
 1276 
 1277         tv.tv_sec = time(NULL) + priv->rollover_time;
 1278         tv.tv_nsec = 0;
 1279         ret = 0; /* Reset ret to zero */
 1280 
 1281         /* The race between actual rollover and explicit rollover is
 1282          * handled. If actual rollover is being done and the
 1283          * explicit rollover event comes, the event is not missed.
 1284          * Since explicit rollover sets 'cr.notify' to true, this
 1285          * thread doesn't wait on 'pthread_cond_timedwait'.
 1286          */
 1287         pthread_cleanup_push(changelog_cleanup_free_mutex, &priv->cr.lock);
 1288         pthread_mutex_lock(&priv->cr.lock);
 1289         {
 1290             while (ret == 0 && !priv->cr.notify)
 1291                 ret = pthread_cond_timedwait(&priv->cr.cond, &priv->cr.lock,
 1292                                              &tv);
 1293             if (ret == 0)
 1294                 priv->cr.notify = _gf_false;
 1295         }
 1296         pthread_mutex_unlock(&priv->cr.lock);
 1297         pthread_cleanup_pop(0);
 1298 
 1299         if (ret == 0) {
 1300             gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BARRIER_INFO,
 1301                     NULL);
 1302             priv->explicit_rollover = _gf_true;
 1303         } else if (ret && ret != ETIMEDOUT) {
 1304             gf_smsg(this->name, GF_LOG_ERROR, errno,
 1305                     CHANGELOG_MSG_SELECT_FAILED, NULL);
 1306             continue;
 1307         } else if (ret && ret == ETIMEDOUT) {
 1308             gf_msg_debug(this->name, 0, "Wokeup on timeout");
 1309         }
 1310 
 1311         /* Reading curent_color without lock is fine here
 1312          * as it is only modified here and is next to reading.
 1313          */
 1314         if (priv->current_color == FOP_COLOR_BLACK) {
 1315             LOCK(&priv->lock);
 1316             priv->current_color = FOP_COLOR_WHITE;
 1317             UNLOCK(&priv->lock);
 1318             gf_msg_debug(this->name, 0,
 1319                          "Black fops"
 1320                          " to be drained:%ld",
 1321                          priv->dm.black_fop_cnt);
 1322             changelog_drain_black_fops(this, priv);
 1323         } else {
 1324             LOCK(&priv->lock);
 1325             priv->current_color = FOP_COLOR_BLACK;
 1326             UNLOCK(&priv->lock);
 1327             gf_msg_debug(this->name, 0,
 1328                          "White fops"
 1329                          " to be drained:%ld",
 1330                          priv->dm.white_fop_cnt);
 1331             changelog_drain_white_fops(this, priv);
 1332         }
 1333 
 1334         /* Adding delay of 1 second only during explicit rollover:
 1335          *
 1336          * Changelog rollover can happen either due to actual
 1337          * or the explicit rollover during snapshot. Actual
 1338          * rollover is controlled by tuneable called 'rollover-time'.
 1339          * The minimum granularity for rollover-time is 1 second.
 1340          * Explicit rollover is asynchronous in nature and happens
 1341          * during snapshot.
 1342          *
 1343          * Basically, rollover renames the current CHANGELOG file
 1344          * to CHANGELOG.TIMESTAMP. Let's assume, at time 't1',
 1345          * actual and explicit rollover raced against  each
 1346          * other and actual rollover won the race renaming the
 1347          * CHANGELOG file to CHANGELOG.t1 and opens a new
 1348          * CHANGELOG file. There is high chance that, an immediate
 1349          * explicit rollover at time 't1' can happen with in the same
 1350          * second to rename CHANGELOG file to CHANGELOG.t1 resulting in
 1351          * purging the earlier CHANGELOG.t1 file created by actual
 1352          * rollover. So adding a delay of 1 second guarantees unique
 1353          * CHANGELOG.TIMESTAMP during  explicit rollover.
 1354          */
 1355         if (priv->explicit_rollover == _gf_true)
 1356             sleep(1);
 1357 
 1358         ret = changelog_fill_rollover_data(&cld, _gf_false);
 1359         if (ret) {
 1360             gf_smsg(this->name, GF_LOG_ERROR, 0,
 1361                     CHANGELOG_MSG_ROLLOVER_DATA_FILL_FAILED, NULL);
 1362             continue;
 1363         }
 1364 
 1365         _mask_cancellation();
 1366 
 1367         LOCK(&priv->lock);
 1368         {
 1369             ret = changelog_inject_single_event(this, priv, &cld);
 1370             if (!ret)
 1371                 SLICE_VERSION_UPDATE(slice);
 1372         }
 1373         UNLOCK(&priv->lock);
 1374 
 1375         _unmask_cancellation();
 1376     }
 1377 
 1378     return NULL;
 1379 }
 1380 
 1381 void *
 1382 changelog_fsync_thread(void *data)
 1383 {
 1384     int ret = 0;
 1385     xlator_t *this = NULL;
 1386     struct timeval tv = {
 1387         0,
 1388     };
 1389     changelog_log_data_t cld = {
 1390         0,
 1391     };
 1392     changelog_priv_t *priv = data;
 1393 
 1394     this = priv->cf.this;
 1395     cld.cld_type = CHANGELOG_TYPE_FSYNC;
 1396 
 1397     while (1) {
 1398         (void)pthread_testcancel();
 1399 
 1400         tv.tv_sec = priv->fsync_interval;
 1401         tv.tv_usec = 0;
 1402 
 1403         ret = select(0, NULL, NULL, NULL, &tv);
 1404         if (ret)
 1405             continue;
 1406 
 1407         _mask_cancellation();
 1408 
 1409         ret = changelog_inject_single_event(this, priv, &cld);
 1410         if (ret)
 1411             gf_smsg(this->name, GF_LOG_ERROR, 0,
 1412                     CHANGELOG_MSG_INJECT_FSYNC_FAILED, NULL);
 1413 
 1414         _unmask_cancellation();
 1415     }
 1416 
 1417     return NULL;
 1418 }
 1419 
 1420 /* macros for inode/changelog version checks */
 1421 
 1422 #define INODE_VERSION_UPDATE(priv, inode, iver, slice, type)                   \
 1423     do {                                                                       \
 1424         LOCK(&inode->lock);                                                    \
 1425         {                                                                      \
 1426             LOCK(&priv->lock);                                                 \
 1427             {                                                                  \
 1428                 *iver = slice->changelog_version[type];                        \
 1429             }                                                                  \
 1430             UNLOCK(&priv->lock);                                               \
 1431         }                                                                      \
 1432         UNLOCK(&inode->lock);                                                  \
 1433     } while (0)
 1434 
 1435 #define INODE_VERSION_EQUALS_SLICE(priv, ver, slice, type, upd)                \
 1436     do {                                                                       \
 1437         LOCK(&priv->lock);                                                     \
 1438         {                                                                      \
 1439             upd = (ver == slice->changelog_version[type]) ? _gf_false          \
 1440                                                           : _gf_true;          \
 1441         }                                                                      \
 1442         UNLOCK(&priv->lock);                                                   \
 1443     } while (0)
 1444 
 1445 static int
 1446 __changelog_inode_ctx_set(xlator_t *this, inode_t *inode,
 1447                           changelog_inode_ctx_t *ctx)
 1448 {
 1449     uint64_t ctx_addr = (uint64_t)(uintptr_t)ctx;
 1450     return __inode_ctx_set(inode, this, &ctx_addr);
 1451 }
 1452 
 1453 /**
 1454  * one shot routine to get the address and the value of a inode version
 1455  * for a particular type.
 1456  */
 1457 changelog_inode_ctx_t *
 1458 __changelog_inode_ctx_get(xlator_t *this, inode_t *inode, unsigned long **iver,
 1459                           unsigned long *version, changelog_log_type type)
 1460 {
 1461     int ret = 0;
 1462     uint64_t ctx_addr = 0;
 1463     changelog_inode_ctx_t *ctx = NULL;
 1464 
 1465     ret = __inode_ctx_get(inode, this, &ctx_addr);
 1466     if (ret < 0)
 1467         ctx_addr = 0;
 1468     if (ctx_addr != 0) {
 1469         ctx = (changelog_inode_ctx_t *)(long)ctx_addr;
 1470         goto out;
 1471     }
 1472 
 1473     ctx = GF_CALLOC(1, sizeof(*ctx), gf_changelog_mt_inode_ctx_t);
 1474     if (!ctx)
 1475         goto out;
 1476 
 1477     ret = __changelog_inode_ctx_set(this, inode, ctx);
 1478     if (ret) {
 1479         GF_FREE(ctx);
 1480         ctx = NULL;
 1481     }
 1482 
 1483 out:
 1484     if (ctx && iver && version) {
 1485         *iver = CHANGELOG_INODE_VERSION_TYPE(ctx, type);
 1486         *version = **iver;
 1487     }
 1488 
 1489     return ctx;
 1490 }
 1491 
 1492 static changelog_inode_ctx_t *
 1493 changelog_inode_ctx_get(xlator_t *this, inode_t *inode, unsigned long **iver,
 1494                         unsigned long *version, changelog_log_type type)
 1495 {
 1496     changelog_inode_ctx_t *ctx = NULL;
 1497 
 1498     LOCK(&inode->lock);
 1499     {
 1500         ctx = __changelog_inode_ctx_get(this, inode, iver, version, type);
 1501     }
 1502     UNLOCK(&inode->lock);
 1503 
 1504     return ctx;
 1505 }
 1506 
 1507 /**
 1508  * This is the main update routine. Locking has been made granular so as to
 1509  * maximize parallelism of fops - I'll try to explain it below using execution
 1510  * timelines.
 1511  *
 1512  * Basically, the contention is between multiple execution threads of this
 1513  * routine and the roll-over thread. So, instead of having a big lock, we hold
 1514  * granular locks: inode->lock and priv->lock. Now I'll explain what happens
 1515  * when there is an update and a roll-over at just about the same time.
 1516  * NOTE:
 1517  *  - the dispatcher itself synchronizes updates via it's own lock
 1518  *  - the slice version in incremented by the roll-over thread
 1519  *
 1520  * Case 1: When the rollover thread wins before the inode version can be
 1521  * compared with the slice version.
 1522  *
 1523  *          [updater]                 |             [rollover]
 1524  *                                    |
 1525  *                                    |           <SLICE: 1, 1, 1>
 1526  * <changelog_update>                 |
 1527  *   <changelog_inode_ctx_get>        |
 1528  *      <CTX: 1, 1, 1>                |
 1529  *                                    |         <dispatch-rollover-event>
 1530  *                                    |         LOCK (&priv->lock)
 1531  *                                    |            <SLICE_VERSION_UPDATE>
 1532  *                                    |              <SLICE: 2, 2, 2>
 1533  *                                    |         UNLOCK (&priv->lock)
 1534  *                                    |
 1535  * LOCK (&priv->lock)                 |
 1536  *   <INODE_VERSION_EQUALS_SLICE>     |
 1537  *    I: 1 <-> S: 2                   |
 1538  *    update: true                    |
 1539  * UNLOCK (&priv->lock)               |
 1540  *                                    |
 1541  * <if update == true>                |
 1542  *  <dispath-update-event>            |
 1543  *  <INODE_VERSION_UPDATE>            |
 1544  *   LOCK (&inode->lock)              |
 1545  *    LOCK (&priv->lock)              |
 1546  *     <CTX: 2, 1, 1>                 |
 1547  *    UNLOCK (&priv->lock)            |
 1548  *   UNLOCK (&inode->lock)            |
 1549  *
 1550  * Therefore, the change gets recorded in the next change (no lost change). If
 1551  * the slice version was ahead of the inode version (say I:1, S: 2), then
 1552  * anyway the comparison would result in a update (I: 1, S: 3).
 1553  *
 1554  * If the rollover time is too less, then there is another contention when the
 1555  * updater tries to bring up inode version to the slice version (this is also
 1556  * the case when the roll-over thread wakes up during INODE_VERSION_UPDATE.
 1557  *
 1558  *   <CTX: 1, 1, 1>                   |       <SLICE: 2, 2, 2>
 1559  *                                    |
 1560  *                                    |
 1561  * <dispath-update-event>             |
 1562  * <INODE_VERSION_UPDATE>             |
 1563  *  LOCK (&inode->lock)               |
 1564  *   LOCK (&priv->lock)               |
 1565  *    <CTX: 2, 1, 1>                  |
 1566  *   UNLOCK (&priv->lock)             |
 1567  *  UNLOCK (&inode->lock)             |
 1568  *                                    |         <dispatch-rollover-event>
 1569  *                                    |         LOCK (&priv->lock)
 1570  *                                    |            <SLICE_VERSION_UPDATE>
 1571  *                                    |              <SLICE: 3, 3, 3>
 1572  *                                    |         UNLOCK (&priv->lock)
 1573  *
 1574  *
 1575  * Case 2: When the fop thread wins
 1576  *
 1577  *          [updater]                 |             [rollover]
 1578  *                                    |
 1579  *                                    |           <SLICE: 1, 1, 1>
 1580  * <changelog_update>                 |
 1581  *   <changelog_inode_ctx_get>        |
 1582  *      <CTX: 0, 0, 0>                |
 1583  *                                    |
 1584  * LOCK (&priv->lock)                 |
 1585  *   <INODE_VERSION_EQUALS_SLICE>     |
 1586  *    I: 0 <-> S: 1                   |
 1587  *    update: true                    |
 1588  * UNLOCK (&priv->lock)               |
 1589  *                                    |         <dispatch-rollover-event>
 1590  *                                    |         LOCK (&priv->lock)
 1591  *                                    |            <SLICE_VERSION_UPDATE>
 1592  *                                    |              <SLICE: 2, 2, 2>
 1593  *                                    |         UNLOCK (&priv->lock)
 1594  * <if update == true>                |
 1595  *  <dispath-update-event>            |
 1596  *  <INODE_VERSION_UPDATE>            |
 1597  *   LOCK (&inode->lock)              |
 1598  *    LOCK (&priv->lock)              |
 1599  *     <CTX: 2, 0, 0>                 |
 1600  *    UNLOCK (&priv->lock)            |
 1601  *   UNLOCK (&inode->lock)            |
 1602  *
 1603  * Here again, if the inode version was equal to the slice version (I: 1, S: 1)
 1604  * then there is no need to record an update (as the equality of the two version
 1605  * signifies an update was recorded in the current time slice).
 1606  */
 1607 void
 1608 changelog_update(xlator_t *this, changelog_priv_t *priv,
 1609                  changelog_local_t *local, changelog_log_type type)
 1610 {
 1611     int ret = 0;
 1612     unsigned long *iver = NULL;
 1613     unsigned long version = 0;
 1614     inode_t *inode = NULL;
 1615     changelog_time_slice_t *slice = NULL;
 1616     changelog_inode_ctx_t *ctx = NULL;
 1617     changelog_log_data_t *cld_0 = NULL;
 1618     changelog_log_data_t *cld_1 = NULL;
 1619     changelog_local_t *next_local = NULL;
 1620     gf_boolean_t need_upd = _gf_true;
 1621 
 1622     slice = &priv->slice;
 1623 
 1624     /**
 1625      * for fops that do not require inode version checking
 1626      */
 1627     if (local->update_no_check)
 1628         goto update;
 1629 
 1630     inode = local->inode;
 1631 
 1632     ctx = changelog_inode_ctx_get(this, inode, &iver, &version, type);
 1633     if (!ctx)
 1634         goto update;
 1635 
 1636     INODE_VERSION_EQUALS_SLICE(priv, version, slice, type, need_upd);
 1637 
 1638 update:
 1639     if (need_upd) {
 1640         cld_0 = &local->cld;
 1641         cld_0->cld_type = type;
 1642 
 1643         if ((next_local = local->prev_entry) != NULL) {
 1644             cld_1 = &next_local->cld;
 1645             cld_1->cld_type = type;
 1646         }
 1647 
 1648         ret = priv->cd.dispatchfn(this, priv, priv->cd.cd_data, cld_0, cld_1);
 1649 
 1650         /**
 1651          * update after the dispatcher has successfully done
 1652          * it's job.
 1653          */
 1654         if (!local->update_no_check && iver && !ret)
 1655             INODE_VERSION_UPDATE(priv, inode, iver, slice, type);
 1656     }
 1657 
 1658     return;
 1659 }
 1660 
 1661 /* Begin: Geo-rep snapshot dependency changes */
 1662 
 1663 /* changelog_color_fop_and_inc_cnt: Assign color and inc fop cnt.
 1664  *
 1665  * Assigning color and increment of corresponding fop count should happen
 1666  * in a lock (i.e., there should be no window between them). If it does not,
 1667  * we might miss draining those fops which are colored but not yet incremented
 1668  * the count. Let's assume black fops are draining. If the black fop count
 1669  * reaches zero, we say draining is completed but we miss black fops which are
 1670  * not incremented fop count but color is assigned black.
 1671  */
 1672 
 1673 void
 1674 changelog_color_fop_and_inc_cnt(xlator_t *this, changelog_priv_t *priv,
 1675                                 changelog_local_t *local)
 1676 {
 1677     if (!priv || !local)
 1678         return;
 1679 
 1680     LOCK(&priv->lock);
 1681     {
 1682         local->color = priv->current_color;
 1683         changelog_inc_fop_cnt(this, priv, local);
 1684     }
 1685     UNLOCK(&priv->lock);
 1686 }
 1687 
 1688 /* Increments the respective fop counter based on the fop color */
 1689 void
 1690 changelog_inc_fop_cnt(xlator_t *this, changelog_priv_t *priv,
 1691                       changelog_local_t *local)
 1692 {
 1693     int ret = 0;
 1694 
 1695     if (local) {
 1696         if (local->color == FOP_COLOR_BLACK) {
 1697             ret = pthread_mutex_lock(&priv->dm.drain_black_mutex);
 1698             CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out);
 1699             {
 1700                 priv->dm.black_fop_cnt++;
 1701             }
 1702             ret = pthread_mutex_unlock(&priv->dm.drain_black_mutex);
 1703             CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out);
 1704         } else {
 1705             ret = pthread_mutex_lock(&priv->dm.drain_white_mutex);
 1706             CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out);
 1707             {
 1708                 priv->dm.white_fop_cnt++;
 1709             }
 1710             ret = pthread_mutex_unlock(&priv->dm.drain_white_mutex);
 1711             CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out);
 1712         }
 1713     }
 1714 out:
 1715     return;
 1716 }
 1717 
 1718 /* Decrements the respective fop counter based on the fop color */
 1719 void
 1720 changelog_dec_fop_cnt(xlator_t *this, changelog_priv_t *priv,
 1721                       changelog_local_t *local)
 1722 {
 1723     int ret = 0;
 1724 
 1725     if (local) {
 1726         if (local->color == FOP_COLOR_BLACK) {
 1727             ret = pthread_mutex_lock(&priv->dm.drain_black_mutex);
 1728             CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out);
 1729             {
 1730                 priv->dm.black_fop_cnt--;
 1731                 if (priv->dm.black_fop_cnt == 0 &&
 1732                     priv->dm.drain_wait_black == _gf_true) {
 1733                     ret = pthread_cond_signal(&priv->dm.drain_black_cond);
 1734                     CHANGELOG_PTHREAD_ERROR_HANDLE_2(
 1735                         ret, out, priv->dm.drain_black_mutex);
 1736                     gf_msg_debug(this->name, 0,
 1737                                  "Signalled "
 1738                                  "draining of black");
 1739                 }
 1740             }
 1741             ret = pthread_mutex_unlock(&priv->dm.drain_black_mutex);
 1742             CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out);
 1743         } else {
 1744             ret = pthread_mutex_lock(&priv->dm.drain_white_mutex);
 1745             CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out);
 1746             {
 1747                 priv->dm.white_fop_cnt--;
 1748                 if (priv->dm.white_fop_cnt == 0 &&
 1749                     priv->dm.drain_wait_white == _gf_true) {
 1750                     ret = pthread_cond_signal(&priv->dm.drain_white_cond);
 1751                     CHANGELOG_PTHREAD_ERROR_HANDLE_2(
 1752                         ret, out, priv->dm.drain_white_mutex);
 1753                     gf_msg_debug(this->name, 0,
 1754                                  "Signalled "
 1755                                  "draining of white");
 1756                 }
 1757             }
 1758             ret = pthread_mutex_unlock(&priv->dm.drain_white_mutex);
 1759             CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out);
 1760         }
 1761     }
 1762 out:
 1763     return;
 1764 }
 1765 
 1766 /* Write to a pipe setup between changelog main thread and changelog
 1767  * rollover thread to initiate explicit rollover of changelog journal.
 1768  */
 1769 int
 1770 changelog_barrier_notify(changelog_priv_t *priv, char *buf)
 1771 {
 1772     int ret = 0;
 1773 
 1774     pthread_mutex_lock(&priv->cr.lock);
 1775     {
 1776         ret = pthread_cond_signal(&priv->cr.cond);
 1777         priv->cr.notify = _gf_true;
 1778     }
 1779     pthread_mutex_unlock(&priv->cr.lock);
 1780     return ret;
 1781 }
 1782 
 1783 /* Clean up flags set on barrier notification */
 1784 void
 1785 changelog_barrier_cleanup(xlator_t *this, changelog_priv_t *priv,
 1786                           struct list_head *queue)
 1787 {
 1788     int ret = 0;
 1789 
 1790     LOCK(&priv->bflags.lock);
 1791     priv->bflags.barrier_ext = _gf_false;
 1792     UNLOCK(&priv->bflags.lock);
 1793 
 1794     ret = pthread_mutex_lock(&priv->bn.bnotify_mutex);
 1795     CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out);
 1796     {
 1797         priv->bn.bnotify = _gf_false;
 1798     }
 1799     ret = pthread_mutex_unlock(&priv->bn.bnotify_mutex);
 1800     CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out);
 1801 
 1802     /* Disable changelog barrier and dequeue fops */
 1803     LOCK(&priv->lock);
 1804     {
 1805         if (priv->barrier_enabled == _gf_true)
 1806             __chlog_barrier_disable(this, queue);
 1807         else
 1808             ret = -1;
 1809     }
 1810     UNLOCK(&priv->lock);
 1811     if (ret == 0)
 1812         chlog_barrier_dequeue_all(this, queue);
 1813 
 1814 out:
 1815     return;
 1816 }
 1817 /* End: Geo-Rep snapshot dependency changes */
 1818 
 1819 int32_t
 1820 changelog_fill_entry_buf(call_frame_t *frame, xlator_t *this, loc_t *loc,
 1821                          changelog_local_t **local)
 1822 {
 1823     changelog_opt_t *co = NULL;
 1824     size_t xtra_len = 0;
 1825     char *dup_path = NULL;
 1826     char *bname = NULL;
 1827     inode_t *parent = NULL;
 1828 
 1829     GF_ASSERT(this);
 1830 
 1831     parent = inode_parent(loc->inode, 0, 0);
 1832     if (!parent) {
 1833         gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_INODE_NOT_FOUND,
 1834                 "type=parent", "gfid=%s", uuid_utoa(loc->inode->gfid), NULL);
 1835         goto err;
 1836     }
 1837 
 1838     CHANGELOG_INIT_NOCHECK(this, *local, loc->inode, loc->inode->gfid, 5);
 1839     if (!(*local)) {
 1840         gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_LOCAL_INIT_FAILED,
 1841                 NULL);
 1842         goto err;
 1843     }
 1844 
 1845     co = changelog_get_usable_buffer(*local);
 1846     if (!co) {
 1847         gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_GET_BUFFER_FAILED,
 1848                 NULL);
 1849         goto err;
 1850     }
 1851 
 1852     if (loc->inode->ia_type == IA_IFDIR) {
 1853         CHANGLOG_FILL_FOP_NUMBER(co, GF_FOP_MKDIR, fop_fn, xtra_len);
 1854         co++;
 1855         CHANGELOG_FILL_UINT32(co, S_IFDIR | 0755, number_fn, xtra_len);
 1856         co++;
 1857     } else {
 1858         CHANGLOG_FILL_FOP_NUMBER(co, GF_FOP_CREATE, fop_fn, xtra_len);
 1859         co++;
 1860         CHANGELOG_FILL_UINT32(co, S_IFREG | 0644, number_fn, xtra_len);
 1861         co++;
 1862     }
 1863 
 1864     CHANGELOG_FILL_UINT32(co, frame->root->uid, number_fn, xtra_len);
 1865     co++;
 1866 
 1867     CHANGELOG_FILL_UINT32(co, frame->root->gid, number_fn, xtra_len);
 1868     co++;
 1869 
 1870     dup_path = gf_strdup(loc->path);
 1871     bname = basename(dup_path);
 1872 
 1873     CHANGELOG_FILL_ENTRY(co, parent->gfid, bname, entry_fn, entry_free_fn,
 1874                          xtra_len, err);
 1875     changelog_set_usable_record_and_length(*local, xtra_len, 5);
 1876 
 1877     if (dup_path)
 1878         GF_FREE(dup_path);
 1879     if (parent)
 1880         inode_unref(parent);
 1881     return 0;
 1882 
 1883 err:
 1884     if (dup_path)
 1885         GF_FREE(dup_path);
 1886     if (parent)
 1887         inode_unref(parent);
 1888     return -1;
 1889 }
 1890 
 1891 /*
 1892  * resolve_pargfid_to_path:
 1893  *      It converts given pargfid to path by doing recursive readlinks at the
 1894  * backend. If bname is given, it suffixes bname to pargfid to form the
 1895  * complete path else it doesn't. It allocates memory for the path and is
 1896  * caller's responsibility to free the same. If bname is NULL and pargfid
 1897  * is ROOT, then it returns "."
 1898  */
 1899 
 1900 int
 1901 resolve_pargfid_to_path(xlator_t *this, const uuid_t pgfid, char **path,
 1902                         char *bname)
 1903 {
 1904     char *linkname = NULL;
 1905     char *dir_handle = NULL;
 1906     char *pgfidstr = NULL;
 1907     char *saveptr = NULL;
 1908     ssize_t len = 0;
 1909     int ret = 0;
 1910     uuid_t tmp_gfid = {
 1911         0,
 1912     };
 1913     uuid_t pargfid = {
 1914         0,
 1915     };
 1916     changelog_priv_t *priv = NULL;
 1917     char gpath[PATH_MAX] = {
 1918         0,
 1919     };
 1920     char result[PATH_MAX] = {
 1921         0,
 1922     };
 1923     char *dir_name = NULL;
 1924     char pre_dir_name[PATH_MAX] = {
 1925         0,
 1926     };
 1927 
 1928     GF_ASSERT(this);
 1929     priv = this->private;
 1930     GF_ASSERT(priv);
 1931 
 1932     gf_uuid_copy(pargfid, pgfid);
 1933     if (!path || gf_uuid_is_null(pargfid)) {
 1934         ret = -1;
 1935         goto out;
 1936     }
 1937 
 1938     if (__is_root_gfid(pargfid)) {
 1939         if (bname)
 1940             *path = gf_strdup(bname);
 1941         else
 1942             *path = gf_strdup(".");
 1943         return ret;
 1944     }
 1945 
 1946     dir_handle = alloca(PATH_MAX);
 1947     linkname = alloca(PATH_MAX);
 1948     (void)snprintf(gpath, PATH_MAX, "%s/.glusterfs/", priv->changelog_brick);
 1949 
 1950     while (!(__is_root_gfid(pargfid))) {
 1951         len = snprintf(dir_handle, PATH_MAX, "%s/%02x/%02x/%s", gpath,
 1952                        pargfid[0], pargfid[1], uuid_utoa(pargfid));
 1953         if ((len < 0) || (len >= PATH_MAX)) {
 1954             ret = -1;
 1955             goto out;
 1956         }
 1957 
 1958         len = sys_readlink(dir_handle, linkname, PATH_MAX);
 1959         if (len < 0) {
 1960             gf_smsg(this->name, GF_LOG_ERROR, errno,
 1961                     CHANGELOG_MSG_READLINK_OP_FAILED,
 1962                     "could not read the "
 1963                     "link from the gfid handle",
 1964                     "handle=%s", dir_handle, NULL);
 1965             ret = -1;
 1966             goto out;
 1967         }
 1968 
 1969         linkname[len] = '\0';
 1970 
 1971         pgfidstr = strtok_r(linkname + strlen("../../00/00/"), "/", &saveptr);
 1972         dir_name = strtok_r(NULL, "/", &saveptr);
 1973 
 1974         len = snprintf(result, PATH_MAX, "%s/%s", dir_name, pre_dir_name);
 1975         if ((len < 0) || (len >= PATH_MAX)) {
 1976             ret = -1;
 1977             goto out;
 1978         }
 1979         if (snprintf(pre_dir_name, len + 1, "%s", result) >= len + 1) {
 1980             ret = -1;
 1981             goto out;
 1982         }
 1983 
 1984         gf_uuid_parse(pgfidstr, tmp_gfid);
 1985         gf_uuid_copy(pargfid, tmp_gfid);
 1986     }
 1987 
 1988     if (bname)
 1989         strncat(result, bname, strlen(bname) + 1);
 1990 
 1991     *path = gf_strdup(result);
 1992 
 1993 out:
 1994     return ret;
 1995 }