"Fossies" - the Fresh Open Source Software Archive

Member "glusterfs-6.9/xlators/cluster/afr/src/afr.c" (23 Apr 2020, 43660 Bytes) of package /linux/misc/glusterfs-6.9.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "afr.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 7.1_vs_7.2.

    1 /*
    2   Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
    3   This file is part of GlusterFS.
    4 
    5   This file is licensed to you under your choice of the GNU Lesser
    6   General Public License, version 3 or any later version (LGPLv3 or
    7   later), or the GNU General Public License, version 2 (GPLv2), in all
    8   cases as published by the Free Software Foundation.
    9 */
   10 
   11 #include <libgen.h>
   12 #include <unistd.h>
   13 #include <fnmatch.h>
   14 #include <sys/time.h>
   15 #include <stdlib.h>
   16 #include <signal.h>
   17 
   18 #include "afr-common.c"
   19 #include "afr-messages.h"
   20 
   21 struct volume_options options[];
   22 
   23 static char *afr_favorite_child_policies[AFR_FAV_CHILD_POLICY_MAX + 1] = {
   24     [AFR_FAV_CHILD_NONE] = "none",
   25     [AFR_FAV_CHILD_BY_SIZE] = "size",
   26     [AFR_FAV_CHILD_BY_CTIME] = "ctime",
   27     [AFR_FAV_CHILD_BY_MTIME] = "mtime",
   28     [AFR_FAV_CHILD_BY_MAJORITY] = "majority",
   29     [AFR_FAV_CHILD_POLICY_MAX] = NULL,
   30 };
   31 
   32 int32_t
   33 notify(xlator_t *this, int32_t event, void *data, ...)
   34 {
   35     int ret = -1;
   36     va_list ap;
   37     void *data2 = NULL;
   38 
   39     va_start(ap, data);
   40     data2 = va_arg(ap, dict_t *);
   41     va_end(ap);
   42     ret = afr_notify(this, event, data, data2);
   43 
   44     return ret;
   45 }
   46 
   47 int32_t
   48 mem_acct_init(xlator_t *this)
   49 {
   50     int ret = -1;
   51 
   52     if (!this)
   53         return ret;
   54 
   55     ret = xlator_mem_acct_init(this, gf_afr_mt_end + 1);
   56 
   57     if (ret != 0) {
   58         return ret;
   59     }
   60 
   61     return ret;
   62 }
   63 
   64 int
   65 xlator_subvolume_index(xlator_t *this, xlator_t *subvol)
   66 {
   67     int index = -1;
   68     int i = 0;
   69     xlator_list_t *list = NULL;
   70 
   71     list = this->children;
   72 
   73     while (list) {
   74         if (subvol == list->xlator ||
   75             strcmp(subvol->name, list->xlator->name) == 0) {
   76             index = i;
   77             break;
   78         }
   79         list = list->next;
   80         i++;
   81     }
   82 
   83     return index;
   84 }
   85 
   86 static void
   87 fix_quorum_options(xlator_t *this, afr_private_t *priv, char *qtype,
   88                    dict_t *options)
   89 {
   90     if (dict_get_sizen(options, "quorum-type") == NULL) {
   91         /* If user doesn't configure anything enable auto-quorum if the
   92          * replica has more than two subvolumes */
   93         if (priv->child_count > 2)
   94             qtype = "auto";
   95     }
   96 
   97     if (priv->quorum_count && strcmp(qtype, "fixed")) {
   98         gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_OVERRIDE,
   99                "quorum-type %s overriding quorum-count %u", qtype,
  100                priv->quorum_count);
  101     }
  102 
  103     if (!strcmp(qtype, "none")) {
  104         priv->quorum_count = 0;
  105     } else if (!strcmp(qtype, "auto")) {
  106         priv->quorum_count = AFR_QUORUM_AUTO;
  107     }
  108 }
  109 
  110 int
  111 afr_set_favorite_child_policy(afr_private_t *priv, char *policy)
  112 {
  113     int index = -1;
  114 
  115     index = gf_get_index_by_elem(afr_favorite_child_policies, policy);
  116     if (index < 0 || index >= AFR_FAV_CHILD_POLICY_MAX)
  117         return -1;
  118 
  119     priv->fav_child_policy = index;
  120 
  121     return 0;
  122 }
  123 
  124 static void
  125 set_data_self_heal_algorithm(afr_private_t *priv, char *algo)
  126 {
  127     if (!algo) {
  128         priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_DYNAMIC;
  129     } else if (strcmp(algo, "full") == 0) {
  130         priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_FULL;
  131     } else if (strcmp(algo, "diff") == 0) {
  132         priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_DIFF;
  133     } else {
  134         priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_DYNAMIC;
  135     }
  136 }
  137 
  138 int
  139 reconfigure(xlator_t *this, dict_t *options)
  140 {
  141     afr_private_t *priv = NULL;
  142     xlator_t *read_subvol = NULL;
  143     int read_subvol_index = -1;
  144     int timeout_old = 0;
  145     int ret = -1;
  146     int index = -1;
  147     char *qtype = NULL;
  148     char *fav_child_policy = NULL;
  149     char *data_self_heal = NULL;
  150     char *data_self_heal_algorithm = NULL;
  151     char *locking_scheme = NULL;
  152     gf_boolean_t consistent_io = _gf_false;
  153     gf_boolean_t choose_local_old = _gf_false;
  154     gf_boolean_t enabled_old = _gf_false;
  155 
  156     priv = this->private;
  157 
  158     GF_OPTION_RECONF("metadata-splitbrain-forced-heal",
  159                      priv->metadata_splitbrain_forced_heal, options, bool, out);
  160 
  161     GF_OPTION_RECONF("background-self-heal-count",
  162                      priv->background_self_heal_count, options, uint32, out);
  163 
  164     GF_OPTION_RECONF("heal-wait-queue-length", priv->heal_wait_qlen, options,
  165                      uint32, out);
  166 
  167     GF_OPTION_RECONF("metadata-self-heal", priv->metadata_self_heal, options,
  168                      bool, out);
  169 
  170     GF_OPTION_RECONF("data-self-heal", data_self_heal, options, str, out);
  171     gf_string2boolean(data_self_heal, &priv->data_self_heal);
  172 
  173     GF_OPTION_RECONF("entry-self-heal", priv->entry_self_heal, options, bool,
  174                      out);
  175 
  176     GF_OPTION_RECONF("data-self-heal-window-size",
  177                      priv->data_self_heal_window_size, options, uint32, out);
  178 
  179     GF_OPTION_RECONF("data-self-heal-algorithm", data_self_heal_algorithm,
  180                      options, str, out);
  181     set_data_self_heal_algorithm(priv, data_self_heal_algorithm);
  182 
  183     GF_OPTION_RECONF("halo-enabled", priv->halo_enabled, options, bool, out);
  184 
  185     GF_OPTION_RECONF("halo-shd-max-latency", priv->shd.halo_max_latency_msec,
  186                      options, uint32, out);
  187 
  188     GF_OPTION_RECONF("halo-nfsd-max-latency", priv->nfsd.halo_max_latency_msec,
  189                      options, uint32, out);
  190 
  191     GF_OPTION_RECONF("halo-max-latency", priv->halo_max_latency_msec, options,
  192                      uint32, out);
  193 
  194     GF_OPTION_RECONF("halo-max-replicas", priv->halo_max_replicas, options,
  195                      uint32, out);
  196 
  197     GF_OPTION_RECONF("halo-min-replicas", priv->halo_min_replicas, options,
  198                      uint32, out);
  199 
  200     GF_OPTION_RECONF("read-subvolume", read_subvol, options, xlator, out);
  201 
  202     choose_local_old = priv->choose_local;
  203     GF_OPTION_RECONF("choose-local", priv->choose_local, options, bool, out);
  204 
  205     if (choose_local_old != priv->choose_local) {
  206         priv->read_child = -1;
  207         if (choose_local_old == _gf_false)
  208             priv->did_discovery = _gf_false;
  209     }
  210 
  211     GF_OPTION_RECONF("read-hash-mode", priv->hash_mode, options, uint32, out);
  212 
  213     if (read_subvol) {
  214         index = xlator_subvolume_index(this, read_subvol);
  215         if (index == -1) {
  216             gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
  217                    "%s not a subvolume", read_subvol->name);
  218             goto out;
  219         }
  220         priv->read_child = index;
  221     }
  222 
  223     GF_OPTION_RECONF("read-subvolume-index", read_subvol_index, options, int32,
  224                      out);
  225 
  226     if (read_subvol_index > -1) {
  227         index = read_subvol_index;
  228         if (index >= priv->child_count) {
  229             gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
  230                    "%d not a subvolume-index", index);
  231             goto out;
  232         }
  233         priv->read_child = index;
  234     }
  235 
  236     GF_OPTION_RECONF("pre-op-compat", priv->pre_op_compat, options, bool, out);
  237     GF_OPTION_RECONF("locking-scheme", locking_scheme, options, str, out);
  238     priv->granular_locks = (strcmp(locking_scheme, "granular") == 0);
  239     GF_OPTION_RECONF("full-lock", priv->full_lock, options, bool, out);
  240     GF_OPTION_RECONF("granular-entry-heal", priv->esh_granular, options, bool,
  241                      out);
  242 
  243     GF_OPTION_RECONF("eager-lock", priv->eager_lock, options, bool, out);
  244     GF_OPTION_RECONF("quorum-type", qtype, options, str, out);
  245     GF_OPTION_RECONF("quorum-count", priv->quorum_count, options, uint32, out);
  246     fix_quorum_options(this, priv, qtype, options);
  247     if (priv->quorum_count && !afr_has_quorum(priv->child_up, this, NULL))
  248         gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_FAIL,
  249                "Client-quorum is not met");
  250 
  251     GF_OPTION_RECONF("post-op-delay-secs", priv->post_op_delay_secs, options,
  252                      uint32, out);
  253 
  254     GF_OPTION_RECONF(AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size, options,
  255                      size_uint64, out);
  256     /* Reset this so we re-discover in case the topology changed.  */
  257     GF_OPTION_RECONF("ensure-durability", priv->ensure_durability, options,
  258                      bool, out);
  259 
  260     enabled_old = priv->shd.enabled;
  261     GF_OPTION_RECONF("self-heal-daemon", priv->shd.enabled, options, bool, out);
  262 
  263     GF_OPTION_RECONF("iam-self-heal-daemon", priv->shd.iamshd, options, bool,
  264                      out);
  265 
  266     timeout_old = priv->shd.timeout;
  267     GF_OPTION_RECONF("heal-timeout", priv->shd.timeout, options, int32, out);
  268 
  269     GF_OPTION_RECONF("consistent-metadata", priv->consistent_metadata, options,
  270                      bool, out);
  271 
  272     GF_OPTION_RECONF("shd-max-threads", priv->shd.max_threads, options, uint32,
  273                      out);
  274 
  275     GF_OPTION_RECONF("shd-wait-qlength", priv->shd.wait_qlength, options,
  276                      uint32, out);
  277 
  278     GF_OPTION_RECONF("favorite-child-policy", fav_child_policy, options, str,
  279                      out);
  280     if (afr_set_favorite_child_policy(priv, fav_child_policy) == -1)
  281         goto out;
  282 
  283     priv->did_discovery = _gf_false;
  284 
  285     GF_OPTION_RECONF("consistent-io", consistent_io, options, bool, out);
  286     if (priv->quorum_count != 0)
  287         consistent_io = _gf_false;
  288     priv->consistent_io = consistent_io;
  289 
  290     if (priv->shd.enabled) {
  291         if ((priv->shd.enabled != enabled_old) ||
  292             (timeout_old != priv->shd.timeout))
  293             afr_selfheal_childup(this, priv);
  294     }
  295 
  296     ret = 0;
  297 out:
  298     return ret;
  299 }
  300 
  301 static int
  302 afr_pending_xattrs_init(afr_private_t *priv, xlator_t *this)
  303 {
  304     int ret = -1;
  305     int i = 0;
  306     char *ptr = NULL;
  307     char *ptr1 = NULL;
  308     char *xattrs_list = NULL;
  309     xlator_list_t *trav = NULL;
  310     int child_count = -1;
  311 
  312     trav = this->children;
  313     child_count = priv->child_count;
  314     if (priv->thin_arbiter_count) {
  315         /* priv->pending_key[THIN_ARBITER_BRICK_INDEX] is used as the
  316          * name of the thin arbiter file for persistence across add/
  317          * removal of DHT subvols.*/
  318         child_count++;
  319     }
  320 
  321     GF_OPTION_INIT("afr-pending-xattr", xattrs_list, str, out);
  322     priv->pending_key = GF_CALLOC(sizeof(*priv->pending_key), child_count,
  323                                   gf_afr_mt_char);
  324     if (!priv->pending_key) {
  325         ret = -ENOMEM;
  326         goto out;
  327     }
  328     if (!xattrs_list) {
  329         gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_NO_CHANGELOG,
  330                "Unable to fetch afr-pending-xattr option from volfile."
  331                " Falling back to using client translator names. ");
  332 
  333         while (i < child_count) {
  334             ret = gf_asprintf(&priv->pending_key[i], "%s.%s", AFR_XATTR_PREFIX,
  335                               trav->xlator->name);
  336             if (ret == -1) {
  337                 ret = -ENOMEM;
  338                 goto out;
  339             }
  340             trav = trav->next;
  341             i++;
  342         }
  343         ret = 0;
  344         goto out;
  345     }
  346 
  347     ptr = ptr1 = gf_strdup(xattrs_list);
  348     if (!ptr) {
  349         ret = -ENOMEM;
  350         goto out;
  351     }
  352     for (i = 0, ptr = strtok(ptr, ","); ptr; ptr = strtok(NULL, ",")) {
  353         ret = gf_asprintf(&priv->pending_key[i], "%s.%s", AFR_XATTR_PREFIX,
  354                           ptr);
  355         if (ret == -1) {
  356             ret = -ENOMEM;
  357             goto out;
  358         }
  359         i++;
  360     }
  361     ret = 0;
  362 
  363 out:
  364     GF_FREE(ptr1);
  365     return ret;
  366 }
  367 
  368 void
  369 afr_ta_init(afr_private_t *priv)
  370 {
  371     priv->thin_arbiter_count = 1;
  372     priv->child_count--;
  373     priv->ta_child_up = 0;
  374     priv->ta_bad_child_index = AFR_CHILD_UNKNOWN;
  375     priv->ta_notify_dom_lock_offset = 0;
  376     priv->ta_in_mem_txn_count = 0;
  377     priv->ta_on_wire_txn_count = 0;
  378     priv->release_ta_notify_dom_lock = _gf_false;
  379     INIT_LIST_HEAD(&priv->ta_waitq);
  380     INIT_LIST_HEAD(&priv->ta_onwireq);
  381     gf_uuid_clear(priv->ta_gfid);
  382 }
  383 
  384 int32_t
  385 init(xlator_t *this)
  386 {
  387     afr_private_t *priv = NULL;
  388     int child_count = 0;
  389     xlator_list_t *trav = NULL;
  390     int i = 0;
  391     int ret = -1;
  392     GF_UNUSED int op_errno = 0;
  393     xlator_t *read_subvol = NULL;
  394     int read_subvol_index = -1;
  395     char *qtype = NULL;
  396     char *fav_child_policy = NULL;
  397     char *thin_arbiter = NULL;
  398     char *data_self_heal = NULL;
  399     char *locking_scheme = NULL;
  400     char *data_self_heal_algorithm = NULL;
  401 
  402     if (!this->children) {
  403         gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_CHILD_MISCONFIGURED,
  404                "replicate translator needs more than one "
  405                "subvolume defined.");
  406         return -1;
  407     }
  408 
  409     if (!this->parents) {
  410         gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_VOL_MISCONFIGURED,
  411                "Volume is dangling.");
  412     }
  413 
  414     this->private = GF_CALLOC(1, sizeof(afr_private_t),
  415                               gf_afr_mt_afr_private_t);
  416     if (!this->private)
  417         goto out;
  418 
  419     priv = this->private;
  420     LOCK_INIT(&priv->lock);
  421 
  422     child_count = xlator_subvolume_count(this);
  423 
  424     priv->child_count = child_count;
  425 
  426     priv->read_child = -1;
  427 
  428     GF_OPTION_INIT("arbiter-count", priv->arbiter_count, uint32, out);
  429     GF_OPTION_INIT("thin-arbiter", thin_arbiter, str, out);
  430     if (thin_arbiter && strlen(thin_arbiter) > 0) {
  431         afr_ta_init(priv);
  432     }
  433     INIT_LIST_HEAD(&priv->healing);
  434     INIT_LIST_HEAD(&priv->heal_waiting);
  435 
  436     priv->spb_choice_timeout = AFR_DEFAULT_SPB_CHOICE_TIMEOUT;
  437 
  438     GF_OPTION_INIT("afr-dirty-xattr", priv->afr_dirty, str, out);
  439 
  440     GF_OPTION_INIT("metadata-splitbrain-forced-heal",
  441                    priv->metadata_splitbrain_forced_heal, bool, out);
  442 
  443     GF_OPTION_INIT("read-subvolume", read_subvol, xlator, out);
  444     if (read_subvol) {
  445         priv->read_child = xlator_subvolume_index(this, read_subvol);
  446         if (priv->read_child == -1) {
  447             gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
  448                    "%s not a subvolume", read_subvol->name);
  449             goto out;
  450         }
  451     }
  452     GF_OPTION_INIT("read-subvolume-index", read_subvol_index, int32, out);
  453     if (read_subvol_index > -1) {
  454         if (read_subvol_index >= priv->child_count) {
  455             gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
  456                    "%d not a subvolume-index", read_subvol_index);
  457             goto out;
  458         }
  459         priv->read_child = read_subvol_index;
  460     }
  461     GF_OPTION_INIT("choose-local", priv->choose_local, bool, out);
  462 
  463     priv->pending_reads = GF_CALLOC(sizeof(*priv->pending_reads),
  464                                     priv->child_count, gf_afr_mt_atomic_t);
  465 
  466     GF_OPTION_INIT("read-hash-mode", priv->hash_mode, uint32, out);
  467 
  468     priv->favorite_child = -1;
  469 
  470     GF_OPTION_INIT("favorite-child-policy", fav_child_policy, str, out);
  471     if (afr_set_favorite_child_policy(priv, fav_child_policy) == -1)
  472         goto out;
  473 
  474     GF_OPTION_INIT("shd-max-threads", priv->shd.max_threads, uint32, out);
  475 
  476     GF_OPTION_INIT("shd-wait-qlength", priv->shd.wait_qlength, uint32, out);
  477 
  478     GF_OPTION_INIT("background-self-heal-count",
  479                    priv->background_self_heal_count, uint32, out);
  480 
  481     GF_OPTION_INIT("heal-wait-queue-length", priv->heal_wait_qlen, uint32, out);
  482 
  483     GF_OPTION_INIT("data-self-heal", data_self_heal, str, out);
  484     gf_string2boolean(data_self_heal, &priv->data_self_heal);
  485 
  486     GF_OPTION_INIT("data-self-heal-algorithm", data_self_heal_algorithm, str,
  487                    out);
  488     set_data_self_heal_algorithm(priv, data_self_heal_algorithm);
  489 
  490     GF_OPTION_INIT("data-self-heal-window-size",
  491                    priv->data_self_heal_window_size, uint32, out);
  492 
  493     GF_OPTION_INIT("metadata-self-heal", priv->metadata_self_heal, bool, out);
  494 
  495     GF_OPTION_INIT("entry-self-heal", priv->entry_self_heal, bool, out);
  496 
  497     GF_OPTION_INIT("halo-shd-max-latency", priv->shd.halo_max_latency_msec,
  498                    uint32, out);
  499 
  500     GF_OPTION_INIT("halo-max-latency", priv->halo_max_latency_msec, uint32,
  501                    out);
  502     GF_OPTION_INIT("halo-max-replicas", priv->halo_max_replicas, uint32, out);
  503     GF_OPTION_INIT("halo-min-replicas", priv->halo_min_replicas, uint32, out);
  504 
  505     GF_OPTION_INIT("halo-enabled", priv->halo_enabled, bool, out);
  506 
  507     GF_OPTION_INIT("halo-nfsd-max-latency", priv->nfsd.halo_max_latency_msec,
  508                    uint32, out);
  509 
  510     GF_OPTION_INIT("iam-nfs-daemon", priv->nfsd.iamnfsd, bool, out);
  511 
  512     GF_OPTION_INIT("optimistic-change-log", priv->optimistic_change_log, bool,
  513                    out);
  514 
  515     GF_OPTION_INIT("pre-op-compat", priv->pre_op_compat, bool, out);
  516     GF_OPTION_INIT("locking-scheme", locking_scheme, str, out);
  517     priv->granular_locks = (strcmp(locking_scheme, "granular") == 0);
  518     GF_OPTION_INIT("full-lock", priv->full_lock, bool, out);
  519     GF_OPTION_INIT("granular-entry-heal", priv->esh_granular, bool, out);
  520 
  521     GF_OPTION_INIT("eager-lock", priv->eager_lock, bool, out);
  522     GF_OPTION_INIT("quorum-type", qtype, str, out);
  523     GF_OPTION_INIT("quorum-count", priv->quorum_count, uint32, out);
  524     GF_OPTION_INIT(AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size, size_uint64,
  525                    out);
  526     fix_quorum_options(this, priv, qtype, this->options);
  527 
  528     GF_OPTION_INIT("post-op-delay-secs", priv->post_op_delay_secs, uint32, out);
  529     GF_OPTION_INIT("ensure-durability", priv->ensure_durability, bool, out);
  530 
  531     GF_OPTION_INIT("self-heal-daemon", priv->shd.enabled, bool, out);
  532 
  533     GF_OPTION_INIT("iam-self-heal-daemon", priv->shd.iamshd, bool, out);
  534     GF_OPTION_INIT("heal-timeout", priv->shd.timeout, int32, out);
  535 
  536     GF_OPTION_INIT("consistent-metadata", priv->consistent_metadata, bool, out);
  537     GF_OPTION_INIT("consistent-io", priv->consistent_io, bool, out);
  538 
  539     if (priv->quorum_count != 0)
  540         priv->consistent_io = _gf_false;
  541 
  542     priv->wait_count = 1;
  543 
  544     priv->local = GF_CALLOC(sizeof(unsigned char), child_count, gf_afr_mt_char);
  545     if (!priv->local) {
  546         ret = -ENOMEM;
  547         goto out;
  548     }
  549 
  550     priv->child_up = GF_CALLOC(sizeof(unsigned char), child_count,
  551                                gf_afr_mt_char);
  552 
  553     priv->child_latency = GF_MALLOC(sizeof(*priv->child_latency) * child_count,
  554                                     gf_afr_mt_child_latency_t);
  555 
  556     if (!priv->child_up || !priv->child_latency) {
  557         ret = -ENOMEM;
  558         goto out;
  559     }
  560     /*Initialize to -ve ping timeout so that they are not considered
  561      * in child-up events until ping-event comes*/
  562     for (i = 0; i < child_count; i++)
  563         priv->child_latency[i] = -1;
  564 
  565     priv->children = GF_CALLOC(sizeof(xlator_t *), child_count,
  566                                gf_afr_mt_xlator_t);
  567     if (!priv->children) {
  568         ret = -ENOMEM;
  569         goto out;
  570     }
  571 
  572     ret = afr_pending_xattrs_init(priv, this);
  573     if (ret)
  574         goto out;
  575 
  576     trav = this->children;
  577     i = 0;
  578     while (i < child_count) {
  579         priv->children[i] = trav->xlator;
  580         trav = trav->next;
  581         i++;
  582     }
  583 
  584     ret = gf_asprintf(&priv->sh_domain, AFR_SH_DATA_DOMAIN_FMT, this->name);
  585     if (-1 == ret) {
  586         ret = -ENOMEM;
  587         goto out;
  588     }
  589 
  590     priv->last_event = GF_CALLOC(child_count, sizeof(*priv->last_event),
  591                                  gf_afr_mt_int32_t);
  592     if (!priv->last_event) {
  593         ret = -ENOMEM;
  594         goto out;
  595     }
  596 
  597     this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this);
  598     if (!this->itable) {
  599         ret = -ENOMEM;
  600         goto out;
  601     }
  602 
  603     if (priv->shd.iamshd) {
  604         ret = afr_selfheal_daemon_init(this);
  605         if (ret) {
  606             ret = -ENOMEM;
  607             goto out;
  608         }
  609     }
  610 
  611     /* keep more local here as we may need them for self-heal etc */
  612     this->local_pool = mem_pool_new(afr_local_t, 512);
  613     if (!this->local_pool) {
  614         ret = -1;
  615         goto out;
  616     }
  617 
  618     priv->root_inode = NULL;
  619 
  620     ret = 0;
  621 out:
  622     return ret;
  623 }
  624 
  625 void
  626 fini(xlator_t *this)
  627 {
  628     afr_private_t *priv = NULL;
  629 
  630     priv = this->private;
  631     LOCK(&priv->lock);
  632     if (priv->timer != NULL) {
  633         gf_timer_call_cancel(this->ctx, priv->timer);
  634         priv->timer = NULL;
  635     }
  636     UNLOCK(&priv->lock);
  637     this->private = NULL;
  638     afr_priv_destroy(priv);
  639     if (this->itable) {
  640         inode_table_destroy(this->itable);
  641         this->itable = NULL;
  642     }
  643 
  644     return;
  645 }
  646 
  647 struct xlator_fops fops = {
  648     .lookup = afr_lookup,
  649     .lk = afr_lk,
  650     .flush = afr_flush,
  651     .statfs = afr_statfs,
  652     .fsyncdir = afr_fsyncdir,
  653     .inodelk = afr_inodelk,
  654     .finodelk = afr_finodelk,
  655     .entrylk = afr_entrylk,
  656     .fentrylk = afr_fentrylk,
  657     .ipc = afr_ipc,
  658     .lease = afr_lease,
  659 
  660     /* inode read */
  661     .access = afr_access,
  662     .stat = afr_stat,
  663     .fstat = afr_fstat,
  664     .readlink = afr_readlink,
  665     .getxattr = afr_getxattr,
  666     .fgetxattr = afr_fgetxattr,
  667     .readv = afr_readv,
  668 
  669     /* inode write */
  670     .writev = afr_writev,
  671     .truncate = afr_truncate,
  672     .ftruncate = afr_ftruncate,
  673     .setxattr = afr_setxattr,
  674     .fsetxattr = afr_fsetxattr,
  675     .setattr = afr_setattr,
  676     .fsetattr = afr_fsetattr,
  677     .removexattr = afr_removexattr,
  678     .fremovexattr = afr_fremovexattr,
  679     .fallocate = afr_fallocate,
  680     .discard = afr_discard,
  681     .zerofill = afr_zerofill,
  682     .xattrop = afr_xattrop,
  683     .fxattrop = afr_fxattrop,
  684     .fsync = afr_fsync,
  685 
  686     /*inode open*/
  687     .opendir = afr_opendir,
  688     .open = afr_open,
  689 
  690     /* dir read */
  691     .readdir = afr_readdir,
  692     .readdirp = afr_readdirp,
  693 
  694     /* dir write */
  695     .create = afr_create,
  696     .mknod = afr_mknod,
  697     .mkdir = afr_mkdir,
  698     .unlink = afr_unlink,
  699     .rmdir = afr_rmdir,
  700     .link = afr_link,
  701     .symlink = afr_symlink,
  702     .rename = afr_rename,
  703 };
  704 
  705 struct xlator_dumpops dumpops = {
  706     .priv = afr_priv_dump,
  707 };
  708 
  709 struct xlator_cbks cbks = {
  710     .release = afr_release,
  711     .releasedir = afr_releasedir,
  712     .forget = afr_forget,
  713 };
  714 
  715 struct volume_options options[] = {
  716     {.key = {"read-subvolume"},
  717      .type = GF_OPTION_TYPE_XLATOR,
  718      .op_version = {1},
  719      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
  720      .tags = {"replicate"},
  721      .description = "inode-read fops happen only on one of the bricks in "
  722                     "replicate. Afr will prefer the one specified using "
  723                     "this option if it is not stale. Option value must be "
  724                     "one of the xlator names of the children. "
  725                     "Ex: <volname>-client-0 till "
  726                     "<volname>-client-<number-of-bricks - 1>"},
  727     {.key = {"read-subvolume-index"},
  728      .type = GF_OPTION_TYPE_INT,
  729      .default_value = "-1",
  730      .op_version = {2},
  731      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
  732      .tags = {"replicate"},
  733      .description = "inode-read fops happen only on one of the bricks in "
  734                     "replicate. AFR will prefer the one specified using "
  735                     "this option if it is not stale. allowed options"
  736                     " include -1 till replica-count - 1"},
  737     {.key = {"read-hash-mode"},
  738      .type = GF_OPTION_TYPE_INT,
  739      .min = 0,
  740      .max = 3,
  741      .default_value = "1",
  742      .op_version = {2},
  743      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
  744      .tags = {"replicate"},
  745      .description =
  746          "inode-read fops happen only on one of the bricks in "
  747          "replicate. AFR will prefer the one computed using "
  748          "the method specified using this option.\n"
  749          "0 = first readable child of AFR, starting from 1st child.\n"
  750          "1 = hash by GFID of file (all clients use "
  751          "same subvolume).\n"
  752          "2 = hash by GFID of file and client PID.\n"
  753          "3 = brick having the least outstanding read requests."},
  754     {
  755         .key = {"choose-local"},
  756         .type = GF_OPTION_TYPE_BOOL,
  757         .default_value = "true",
  758         .op_version = {2},
  759         .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
  760         .tags = {"replicate"},
  761         .description = "Choose a local subvolume (i.e. Brick) to read from"
  762                        " if read-subvolume is not explicitly set.",
  763     },
  764     {.key = {"background-self-heal-count"},
  765      .type = GF_OPTION_TYPE_INT,
  766      .min = 0,
  767      .max = 256,
  768      .default_value = "8",
  769      .validate = GF_OPT_VALIDATE_MIN,
  770      .op_version = {1},
  771      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
  772      .tags = {"replicate"},
  773      .description = "This specifies the number of per client self-heal "
  774                     "jobs that can perform parallel heals in the "
  775                     "background."},
  776     {.key = {"halo-shd-max-latency"},
  777      .type = GF_OPTION_TYPE_INT,
  778      .min = 1,
  779      .max = 99999,
  780      .default_value = "99999",
  781      .op_version = {GD_OP_VERSION_3_11_0},
  782      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
  783      .tags = {"replicate", "halo"},
  784      .description = "Maximum latency for shd halo replication in msec."},
  785     {.key = {"halo-enabled"},
  786      .type = GF_OPTION_TYPE_BOOL,
  787      .default_value = "False",
  788      .op_version = {GD_OP_VERSION_3_11_0},
  789      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
  790      .tags = {"replicate", "halo"},
  791      .description = "Enable Halo (geo) replication mode."},
  792     {.key = {"halo-nfsd-max-latency"},
  793      .type = GF_OPTION_TYPE_INT,
  794      .min = 1,
  795      .max = 99999,
  796      .default_value = "5",
  797      .op_version = {GD_OP_VERSION_3_11_0},
  798      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
  799      .tags = {"replicate", "halo"},
  800      .description = "Maximum latency for nfsd halo replication in msec."},
  801     {.key = {"halo-max-latency"},
  802      .type = GF_OPTION_TYPE_INT,
  803      .min = 1,
  804      .max = AFR_HALO_MAX_LATENCY,
  805      .default_value = "5",
  806      .op_version = {GD_OP_VERSION_3_11_0},
  807      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
  808      .tags = {"replicate", "halo"},
  809      .description = "Maximum latency for halo replication in msec."},
  810     {.key = {"halo-max-replicas"},
  811      .type = GF_OPTION_TYPE_INT,
  812      .min = 1,
  813      .max = 99999,
  814      .default_value = "99999",
  815      .op_version = {GD_OP_VERSION_3_11_0},
  816      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
  817      .tags = {"replicate", "halo"},
  818      .description = "The maximum number of halo replicas; replicas"
  819                     " beyond this value will be written asynchronously"
  820                     "via the SHD."},
  821     {.key = {"halo-min-replicas"},
  822      .type = GF_OPTION_TYPE_INT,
  823      .min = 1,
  824      .max = 99999,
  825      .default_value = "2",
  826      .op_version = {GD_OP_VERSION_3_11_0},
  827      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
  828      .tags = {"replicate", "halo"},
  829      .description = "The minimmum number of halo replicas, before adding "
  830                     "out of region replicas."},
  831     {.key = {"heal-wait-queue-length"},
  832      .type = GF_OPTION_TYPE_INT,
  833      .min = 0,
  834      .max = 10000, /*Around 100MB with sizeof(afr_local_t)= 10496 bytes*/
  835      .default_value = "128",
  836      .validate = GF_OPT_VALIDATE_MIN,
  837      .op_version = {GD_OP_VERSION_3_7_10},
  838      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
  839      .tags = {"replicate"},
  840      .description = "This specifies the number of heals that can be queued"
  841                     " for the parallel background self heal jobs."},
  842     {.key = {"data-self-heal"},
  843      .type = GF_OPTION_TYPE_STR,
  844      .value = {"1", "on", "yes", "true", "enable", "0", "off", "no", "false",
  845                "disable", "open"},
  846      .default_value = "off",
  847      .op_version = {1},
  848      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
  849      .tags = {"replicate"},
  850      .description = "Using this option we can enable/disable data "
  851                     "self-heal on the file. \"open\" means data "
  852                     "self-heal action will only be triggered by file "
  853                     "open operations."},
  854     {.key = {"data-self-heal-algorithm"},
  855      .type = GF_OPTION_TYPE_STR,
  856      .op_version = {1},
  857      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
  858      .tags = {"replicate"},
  859      .description = "Select between \"full\", \"diff\". The "
  860                     "\"full\" algorithm copies the entire file from "
  861                     "source to sink. The \"diff\" algorithm copies to "
  862                     "sink only those blocks whose checksums don't match "
  863                     "with those of source. If no option is configured "
  864                     "the option is chosen dynamically as follows: "
  865                     "If the file does not exist on one of the sinks "
  866                     "or empty file exists or if the source file size is "
  867                     "about the same as page size the entire file will "
  868                     "be read and written i.e \"full\" algo, "
  869                     "otherwise \"diff\" algo is chosen.",
  870      .value = {"diff", "full"}},
  871     {.key = {"data-self-heal-window-size"},
  872      .type = GF_OPTION_TYPE_INT,
  873      .min = 1,
  874      .max = 1024,
  875      .default_value = "1",
  876      .op_version = {1},
  877      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
  878      .tags = {"replicate"},
  879      .description = "Maximum number blocks per file for which self-heal "
  880                     "process would be applied simultaneously."},
  881     {.key = {"metadata-self-heal"},
  882      .type = GF_OPTION_TYPE_BOOL,
  883      .default_value = "off",
  884      .op_version = {1},
  885      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
  886      .tags = {"replicate"},
  887      /*.validate_fn = validate_replica*/
  888      .description = "Using this option we can enable/disable metadata "
  889                     "i.e. Permissions, ownerships, xattrs self-heal on "
  890                     "the file/directory."},
  891     {.key = {"entry-self-heal"},
  892      .type = GF_OPTION_TYPE_BOOL,
  893      .default_value = "off",
  894      .op_version = {1},
  895      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
  896      .tags = {"replicate"},
  897      /*.validate_fn = validate_replica*/
  898      .description = "Using this option we can enable/disable entry "
  899                     "self-heal on the directory."},
  900     {.key = {"data-change-log"},
  901      .type = GF_OPTION_TYPE_BOOL,
  902      .default_value = "on",
  903      .op_version = {1},
  904      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
  905      .tags = {"replicate"},
  906      .description = "This option exists only for backward compatibility "
  907                     "and configuring it doesn't have any effect"},
  908     {.key = {"metadata-change-log"},
  909      .type = GF_OPTION_TYPE_BOOL,
  910      .default_value = "on",
  911      .op_version = {1},
  912      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
  913      .tags = {"replicate"},
  914      .description = "This option exists only for backward compatibility "
  915                     "and configuring it doesn't have any effect"},
  916     {.key = {"entry-change-log"},
  917      .type = GF_OPTION_TYPE_BOOL,
  918      .default_value = "on",
  919      .op_version = {1},
  920      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
  921      .tags = {"replicate"},
  922      .description = "This option exists only for backward compatibility "
  923                     "and configuring it doesn't have any effect"},
  924     {.key = {"optimistic-change-log"},
  925      .type = GF_OPTION_TYPE_BOOL,
  926      .default_value = "on",
  927      .description = "Entry/Metadata fops will not perform "
  928                     "pre fop changelog operations in afr transaction "
  929                     "if this option is enabled."},
  930     {.key = {"inodelk-trace"},
  931      .type = GF_OPTION_TYPE_BOOL,
  932      .default_value = "off",
  933      .description = "Enabling this option logs inode lock/unlocks"},
  934     {.key = {"entrylk-trace"},
  935      .type = GF_OPTION_TYPE_BOOL,
  936      .default_value = "off",
  937      .description = "Enabling this option logs entry lock/unlocks"},
  938     {.key = {"pre-op-compat"},
  939      .type = GF_OPTION_TYPE_BOOL,
  940      .default_value = "on",
  941      .description = "Use separate pre-op xattrop() FOP rather than "
  942                     "overloading xdata of the OP"},
  943     {.key = {"eager-lock"},
  944      .type = GF_OPTION_TYPE_BOOL,
  945      .default_value = "on",
  946      .op_version = {1},
  947      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
  948      .tags = {"replicate"},
  949      .description =
  950          "Enable/Disable eager lock for replica volume. "
  951          "Lock phase of a transaction has two sub-phases. "
  952          "First is an attempt to acquire locks in parallel by "
  953          "broadcasting non-blocking lock requests. If lock "
  954          "acquisition fails on any server, then the held locks "
  955          "are unlocked and we revert to a blocking locks mode "
  956          "sequentially on one server after another.  If this "
  957          "option is enabled the initial broadcasting lock "
  958          "request attempts to acquire a full lock on the entire file. "
  959          "If this fails, we revert back to the sequential "
  960          "\"regional\" blocking locks as before. In the case "
  961          "where such an \"eager\" lock is granted in the "
  962          "non-blocking phase, it gives rise to an opportunity "
  963          "for optimization. i.e, if the next write transaction "
  964          "on the same FD arrives before the unlock phase of "
  965          "the first transaction, it \"takes over\" the full "
  966          "file lock. Similarly if yet another data transaction "
  967          "arrives before the unlock phase of the \"optimized\" "
  968          "transaction, that in turn \"takes over\" the lock as "
  969          "well. The actual unlock now happens at the end of "
  970          "the last \"optimized\" transaction."
  971 
  972     },
  973     {.key = {"self-heal-daemon"},
  974      .type = GF_OPTION_TYPE_BOOL,
  975      .default_value = "on",
  976      .op_version = {1},
  977      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
  978      .tags = {"replicate"},
  979      /*.validate_fn   = validate_replica_heal_enable_disable*/
  980      .description = "This option applies to only self-heal-daemon. "
  981                     "Index directory crawl and automatic healing of files "
  982                     "will not be performed if this option is turned off."},
  983     {.key = {"iam-self-heal-daemon"},
  984      .type = GF_OPTION_TYPE_BOOL,
  985      .default_value = "off",
  986      .description = "This option differentiates if the replicate "
  987                     "translator is running as part of self-heal-daemon "
  988                     "or not."},
  989     {.key = {"iam-nfs-daemon"},
  990      .type = GF_OPTION_TYPE_BOOL,
  991      .default_value = "off",
  992      .description = "This option differentiates if the replicate "
  993                     "translator is running as part of an NFS daemon "
  994                     "or not."},
  995     {
  996         .key = {"quorum-type"},
  997         .type = GF_OPTION_TYPE_STR,
  998         .value = {"none", "auto", "fixed"},
  999         .default_value = "none",
 1000         .op_version = {1},
 1001         .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
 1002         .tags = {"replicate"},
 1003         /*.option = quorum-type*/
 1004         .description = "If value is \"fixed\" only allow writes if "
 1005                        "quorum-count bricks are present.  If value is "
 1006                        "\"auto\" only allow writes if more than half of "
 1007                        "bricks, or exactly half including the first, are "
 1008                        "present.",
 1009     },
 1010     {
 1011         .key = {"quorum-count"},
 1012         .type = GF_OPTION_TYPE_INT,
 1013         .min = 1,
 1014         .max = INT_MAX,
 1015         .default_value = 0,
 1016         .op_version = {1},
 1017         .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
 1018         .tags = {"replicate"},
 1019         /*.option = quorum-count*/
 1020         /*.validate_fn = validate_quorum_count*/
 1021         .description = "If quorum-type is \"fixed\" only allow writes if "
 1022                        "this many bricks are present.  Other quorum types "
 1023                        "will OVERWRITE this value.",
 1024     },
 1025     {
 1026         .key = {"quorum-reads"},
 1027         .type = GF_OPTION_TYPE_BOOL,
 1028         .default_value = "no",
 1029         .op_version = {GD_OP_VERSION_3_7_0},
 1030         .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
 1031         .tags = {"replicate"},
 1032         .description = "This option has been removed. Reads are not allowed "
 1033                        "if quorum is not met.",
 1034     },
 1035     {
 1036         .key = {"node-uuid"},
 1037         .type = GF_OPTION_TYPE_STR,
 1038         .description = "Local glusterd uuid string, used in starting "
 1039                        "self-heal-daemon so that it can crawl only on "
 1040                        "local index directories.",
 1041     },
 1042     {
 1043         .key = {"post-op-delay-secs"},
 1044         .type = GF_OPTION_TYPE_INT,
 1045         .min = 0,
 1046         .max = INT_MAX,
 1047         .default_value = "1",
 1048         .op_version = {2},
 1049         .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
 1050         .tags = {"replicate"},
 1051         .description = "Time interval induced artificially before "
 1052                        "post-operation phase of the transaction to "
 1053                        "enhance overlap of adjacent write operations.",
 1054     },
 1055     {
 1056         .key = {AFR_SH_READDIR_SIZE_KEY},
 1057         .type = GF_OPTION_TYPE_SIZET,
 1058         .description = "readdirp size for performing entry self-heal",
 1059         .min = 1024,
 1060         .max = 131072,
 1061         .op_version = {2},
 1062         .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
 1063         .tags = {"replicate"},
 1064         .default_value = "1KB",
 1065     },
 1066     {
 1067         .key = {"ensure-durability"},
 1068         .type = GF_OPTION_TYPE_BOOL,
 1069         .op_version = {3},
 1070         .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
 1071         .tags = {"replicate"},
 1072         .description = "Afr performs fsyncs for transactions if this "
 1073                        "option is on to make sure the changelogs/data is "
 1074                        "written to the disk",
 1075         .default_value = "on",
 1076     },
 1077     {
 1078         .key = {"afr-dirty-xattr"},
 1079         .type = GF_OPTION_TYPE_STR,
 1080         .default_value = AFR_DIRTY_DEFAULT,
 1081     },
 1082     {.key = {"afr-pending-xattr"},
 1083      .type = GF_OPTION_TYPE_STR,
 1084      .description = "Comma separated list of xattrs that are used to  "
 1085                     "capture information on pending heals."},
 1086     {
 1087         .key = {"metadata-splitbrain-forced-heal"},
 1088         .type = GF_OPTION_TYPE_BOOL,
 1089         .default_value = "off",
 1090     },
 1091     {.key = {"heal-timeout"},
 1092      .type = GF_OPTION_TYPE_INT,
 1093      .min = 5,
 1094      .max = INT_MAX,
 1095      .default_value = "600",
 1096      .op_version = {2},
 1097      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
 1098      .tags = {"replicate"},
 1099      .description = "time interval for checking the need to self-heal "
 1100                     "in self-heal-daemon"},
 1101     {
 1102         .key = {"consistent-metadata"},
 1103         .type = GF_OPTION_TYPE_BOOL,
 1104         .default_value = "no",
 1105         .op_version = {GD_OP_VERSION_3_7_0},
 1106         .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
 1107         .tags = {"replicate"},
 1108         .description = "If this option is enabled, readdirp will force "
 1109                        "lookups on those entries read whose read child is "
 1110                        "not the same as that of the parent. This will "
 1111                        "guarantee that all read operations on a file serve "
 1112                        "attributes from the same subvol as long as it holds "
 1113                        " a good copy of the file/dir.",
 1114     },
 1115     {.key = {"arbiter-count"},
 1116      .type = GF_OPTION_TYPE_INT,
 1117      .description = "subset of child_count. Has to be 0 or 1."},
 1118     {
 1119         .key = {"thin-arbiter"},
 1120         .type = GF_OPTION_TYPE_STR,
 1121         .op_version = {GD_OP_VERSION_4_1_0},
 1122         .flags = OPT_FLAG_SETTABLE,
 1123         .tags = {"replicate"},
 1124         .description = "contains host:path of thin abriter brick",
 1125     },
 1126     {.key = {"shd-max-threads"},
 1127      .type = GF_OPTION_TYPE_INT,
 1128      .min = 1,
 1129      .max = 64,
 1130      .default_value = "1",
 1131      .op_version = {GD_OP_VERSION_3_7_12},
 1132      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
 1133      .tags = {"replicate"},
 1134      .description = "Maximum number of parallel heals SHD can do per "
 1135                     "local brick. This can substantially lower heal times"
 1136                     ", but can also crush your bricks if you don't have "
 1137                     "the storage hardware to support this."},
 1138     {
 1139         .key = {"shd-wait-qlength"},
 1140         .type = GF_OPTION_TYPE_INT,
 1141         .min = 1,
 1142         .max = 655536,
 1143         .default_value = "1024",
 1144         .op_version = {GD_OP_VERSION_3_7_12},
 1145         .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
 1146         .tags = {"replicate"},
 1147         .description = "This option can be used to control number of heals"
 1148                        " that can wait in SHD per subvolume",
 1149     },
 1150     {
 1151         .key = {"locking-scheme"},
 1152         .type = GF_OPTION_TYPE_STR,
 1153         .value = {"full", "granular"},
 1154         .default_value = "full",
 1155         .op_version = {GD_OP_VERSION_3_7_12},
 1156         .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
 1157         .tags = {"replicate"},
 1158         .description = "If this option is set to granular, self-heal will "
 1159                        "stop being compatible with afr-v1, which helps afr "
 1160                        "be more granular while self-healing",
 1161     },
 1162     {.key = {"full-lock"},
 1163      .type = GF_OPTION_TYPE_BOOL,
 1164      .default_value = "yes",
 1165      .op_version = {GD_OP_VERSION_3_13_2},
 1166      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
 1167      .tags = {"replicate"},
 1168      .description = "If this option is disabled, then the IOs will take "
 1169                     "range locks same as versions till 3.13.1."},
 1170     {
 1171         .key = {"granular-entry-heal"},
 1172         .type = GF_OPTION_TYPE_BOOL,
 1173         .default_value = "no",
 1174         .op_version = {GD_OP_VERSION_3_8_0},
 1175         .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
 1176         .tags = {"replicate"},
 1177         .description = "If this option is enabled, self-heal will resort to "
 1178                        "granular way of recording changelogs and doing entry "
 1179                        "self-heal.",
 1180     },
 1181     {
 1182         .key = {"favorite-child-policy"},
 1183         .type = GF_OPTION_TYPE_STR,
 1184         .value = {"none", "size", "ctime", "mtime", "majority"},
 1185         .default_value = "none",
 1186         .op_version = {GD_OP_VERSION_3_7_12},
 1187         .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
 1188         .tags = {"replicate"},
 1189         .description = "This option can be used to automatically resolve "
 1190                        "split-brains using various policies without user "
 1191                        "intervention. \"size\" picks the file with the "
 1192                        "biggest size as the source. \"ctime\" and \"mtime\" "
 1193                        "pick the file with the latest ctime and mtime "
 1194                        "respectively as the source. \"majority\" picks a file"
 1195                        " with identical mtime and size in more than half the "
 1196                        "number of bricks in the replica.",
 1197     },
 1198     {
 1199         .key = {"consistent-io"},
 1200         .type = GF_OPTION_TYPE_BOOL,
 1201         .default_value = "no",
 1202         .description = "If this option is enabled, i/o will fail even if "
 1203                        "one of the bricks is down in the replicas",
 1204     },
 1205     {.key = {"use-compound-fops"},
 1206      .type = GF_OPTION_TYPE_BOOL,
 1207      .default_value = "no",
 1208      .op_version = {GD_OP_VERSION_3_8_4},
 1209      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
 1210      .tags = {"replicate"},
 1211      .description = "This option exists only for backward compatibility "
 1212                     "and configuring it doesn't have any effect"},
 1213     {.key = {NULL}},
 1214 };
 1215 
 1216 xlator_api_t xlator_api = {
 1217     .init = init,
 1218     .fini = fini,
 1219     .notify = notify,
 1220     .reconfigure = reconfigure,
 1221     .mem_acct_init = mem_acct_init,
 1222     .op_version = {1}, /* Present from the initial version */
 1223     .dumpops = &dumpops,
 1224     .fops = &fops,
 1225     .cbks = &cbks,
 1226     .options = options,
 1227     .identifier = "replicate",
 1228     .category = GF_MAINTAINED,
 1229 };