"Fossies" - the Fresh Open Source Software Archive

Member "glusterfs-6.9/xlators/cluster/afr/src/afr-self-heal-metadata.c" (23 Apr 2020, 16501 Bytes) of package /linux/misc/glusterfs-6.9.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "afr-self-heal-metadata.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 6.8_vs_6.9.

    1 /*
    2   Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
    3   This file is part of GlusterFS.
    4 
    5   This file is licensed to you under your choice of the GNU Lesser
    6   General Public License, version 3 or any later version (LGPLv3 or
    7   later), or the GNU General Public License, version 2 (GPLv2), in all
    8   cases as published by the Free Software Foundation.
    9 */
   10 
   11 #include "afr.h"
   12 #include "afr-self-heal.h"
   13 #include <glusterfs/byte-order.h>
   14 #include "protocol-common.h"
   15 #include <glusterfs/events.h>
   16 
   17 #define AFR_HEAL_ATTR (GF_SET_ATTR_UID | GF_SET_ATTR_GID | GF_SET_ATTR_MODE)
   18 
   19 static gf_boolean_t
   20 _afr_ignorable_key_match(dict_t *d, char *k, data_t *val, void *mdata)
   21 {
   22     return afr_is_xattr_ignorable(k);
   23 }
   24 
   25 void
   26 afr_delete_ignorable_xattrs(dict_t *xattr)
   27 {
   28     dict_foreach_match(xattr, _afr_ignorable_key_match, NULL,
   29                        dict_remove_foreach_fn, NULL);
   30 }
   31 
   32 int
   33 __afr_selfheal_metadata_do(call_frame_t *frame, xlator_t *this, inode_t *inode,
   34                            int source, unsigned char *healed_sinks,
   35                            struct afr_reply *locked_replies)
   36 {
   37     int ret = -1;
   38     loc_t loc = {
   39         0,
   40     };
   41     dict_t *xattr = NULL;
   42     dict_t *old_xattr = NULL;
   43     afr_private_t *priv = NULL;
   44     int i = 0;
   45 
   46     priv = this->private;
   47 
   48     loc.inode = inode_ref(inode);
   49     gf_uuid_copy(loc.gfid, inode->gfid);
   50 
   51     gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO,
   52            "performing metadata selfheal on %s", uuid_utoa(inode->gfid));
   53 
   54     ret = syncop_getxattr(priv->children[source], &loc, &xattr, NULL, NULL,
   55                           NULL);
   56     if (ret < 0) {
   57         ret = -EIO;
   58         goto out;
   59     }
   60 
   61     afr_delete_ignorable_xattrs(xattr);
   62 
   63     for (i = 0; i < priv->child_count; i++) {
   64         if (old_xattr) {
   65             dict_unref(old_xattr);
   66             old_xattr = NULL;
   67         }
   68 
   69         if (!healed_sinks[i])
   70             continue;
   71 
   72         ret = syncop_setattr(priv->children[i], &loc,
   73                              &locked_replies[source].poststat, AFR_HEAL_ATTR,
   74                              NULL, NULL, NULL, NULL);
   75         if (ret)
   76             healed_sinks[i] = 0;
   77 
   78         ret = syncop_getxattr(priv->children[i], &loc, &old_xattr, 0, NULL,
   79                               NULL);
   80         if (old_xattr) {
   81             afr_delete_ignorable_xattrs(old_xattr);
   82             ret = syncop_removexattr(priv->children[i], &loc, "", old_xattr,
   83                                      NULL);
   84             if (ret)
   85                 healed_sinks[i] = 0;
   86         }
   87 
   88         ret = syncop_setxattr(priv->children[i], &loc, xattr, 0, NULL, NULL);
   89         if (ret)
   90             healed_sinks[i] = 0;
   91     }
   92     ret = 0;
   93 
   94 out:
   95     loc_wipe(&loc);
   96     if (xattr)
   97         dict_unref(xattr);
   98     if (old_xattr)
   99         dict_unref(old_xattr);
  100 
  101     return ret;
  102 }
  103 
  104 static uint64_t
  105 mtime_ns(struct iatt *ia)
  106 {
  107     uint64_t ret;
  108 
  109     ret = (((uint64_t)(ia->ia_mtime)) * 1000000000) +
  110           (uint64_t)(ia->ia_mtime_nsec);
  111 
  112     return ret;
  113 }
  114 
  115 /*
  116  * When directory content is modified, [mc]time is updated. On
  117  * Linux, the filesystem does it, while at least on NetBSD, the
  118  * kernel file-system independent code does it. This means that
  119  * when entries are added while bricks are down, the kernel sends
  120  * a SETATTR [mc]time which will cause metadata split brain for
  121  * the directory. In this case, clear the split brain by finding
  122  * the source with the most recent modification date.
  123  */
  124 static int
  125 afr_dirtime_splitbrain_source(call_frame_t *frame, xlator_t *this,
  126                               struct afr_reply *replies,
  127                               unsigned char *locked_on)
  128 {
  129     afr_private_t *priv = NULL;
  130     int source = -1;
  131     struct iatt source_ia;
  132     struct iatt child_ia;
  133     uint64_t mtime = 0;
  134     int i;
  135     int ret = -1;
  136 
  137     priv = this->private;
  138 
  139     for (i = 0; i < priv->child_count; i++) {
  140         if (!locked_on[i])
  141             continue;
  142 
  143         if (!replies[i].valid)
  144             continue;
  145 
  146         if (replies[i].op_ret != 0)
  147             continue;
  148 
  149         if (mtime_ns(&replies[i].poststat) <= mtime)
  150             continue;
  151 
  152         mtime = mtime_ns(&replies[i].poststat);
  153         source = i;
  154     }
  155 
  156     if (source == -1)
  157         goto out;
  158 
  159     source_ia = replies[source].poststat;
  160     if (source_ia.ia_type != IA_IFDIR)
  161         goto out;
  162 
  163     for (i = 0; i < priv->child_count; i++) {
  164         if (i == source)
  165             continue;
  166 
  167         if (!replies[i].valid)
  168             continue;
  169 
  170         if (replies[i].op_ret != 0)
  171             continue;
  172 
  173         child_ia = replies[i].poststat;
  174 
  175         if (!IA_EQUAL(source_ia, child_ia, gfid) ||
  176             !IA_EQUAL(source_ia, child_ia, type) ||
  177             !IA_EQUAL(source_ia, child_ia, prot) ||
  178             !IA_EQUAL(source_ia, child_ia, uid) ||
  179             !IA_EQUAL(source_ia, child_ia, gid) ||
  180             !afr_xattrs_are_equal(replies[source].xdata, replies[i].xdata))
  181             goto out;
  182     }
  183 
  184     /*
  185      * Metadata split brain is just about [amc]time
  186      * We return our source.
  187      */
  188     ret = source;
  189 out:
  190     return ret;
  191 }
  192 
  193 static int
  194 __afr_selfheal_metadata_mark_pending_xattrs(call_frame_t *frame, xlator_t *this,
  195                                             inode_t *inode,
  196                                             struct afr_reply *replies,
  197                                             unsigned char *sources)
  198 {
  199     int ret = 0;
  200     int i = 0;
  201     int m_idx = 0;
  202     afr_private_t *priv = NULL;
  203     int raw[AFR_NUM_CHANGE_LOGS] = {0};
  204     dict_t *xattr = NULL;
  205 
  206     priv = this->private;
  207     m_idx = afr_index_for_transaction_type(AFR_METADATA_TRANSACTION);
  208     raw[m_idx] = 1;
  209 
  210     xattr = dict_new();
  211     if (!xattr)
  212         return -ENOMEM;
  213 
  214     for (i = 0; i < priv->child_count; i++) {
  215         if (sources[i])
  216             continue;
  217         ret = dict_set_static_bin(xattr, priv->pending_key[i], raw,
  218                                   sizeof(int) * AFR_NUM_CHANGE_LOGS);
  219         if (ret) {
  220             ret = -1;
  221             goto out;
  222         }
  223     }
  224 
  225     for (i = 0; i < priv->child_count; i++) {
  226         if (!sources[i])
  227             continue;
  228         ret = afr_selfheal_post_op(frame, this, inode, i, xattr, NULL);
  229         if (ret < 0) {
  230             gf_msg(this->name, GF_LOG_INFO, -ret, AFR_MSG_SELF_HEAL_INFO,
  231                    "Failed to set pending metadata xattr on child %d for %s", i,
  232                    uuid_utoa(inode->gfid));
  233             goto out;
  234         }
  235     }
  236 
  237     afr_replies_wipe(replies, priv->child_count);
  238     ret = afr_selfheal_unlocked_discover(frame, inode, inode->gfid, replies);
  239 
  240 out:
  241     if (xattr)
  242         dict_unref(xattr);
  243     return ret;
  244 }
  245 
  246 /*
  247  * Look for mismatching uid/gid or mode or user xattrs even if
  248  * AFR xattrs don't say so, and pick one arbitrarily as winner. */
  249 
  250 static int
  251 __afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this,
  252                                         inode_t *inode, unsigned char *sources,
  253                                         unsigned char *sinks,
  254                                         unsigned char *healed_sinks,
  255                                         unsigned char *undid_pending,
  256                                         unsigned char *locked_on,
  257                                         struct afr_reply *replies)
  258 {
  259     int i = 0;
  260     afr_private_t *priv = NULL;
  261     struct iatt srcstat = {
  262         0,
  263     };
  264     int source = -1;
  265     int sources_count = 0;
  266     int ret = 0;
  267 
  268     priv = this->private;
  269 
  270     sources_count = AFR_COUNT(sources, priv->child_count);
  271 
  272     if ((AFR_CMP(locked_on, healed_sinks, priv->child_count) == 0) ||
  273         !sources_count) {
  274         source = afr_mark_split_brain_source_sinks(
  275             frame, this, inode, sources, sinks, healed_sinks, locked_on,
  276             replies, AFR_METADATA_TRANSACTION);
  277         if (source >= 0) {
  278             _afr_fav_child_reset_sink_xattrs(
  279                 frame, this, inode, source, healed_sinks, undid_pending,
  280                 AFR_METADATA_TRANSACTION, locked_on, replies);
  281             goto out;
  282         }
  283 
  284         /* If this is a directory mtime/ctime only split brain
  285            use the most recent */
  286         source = afr_dirtime_splitbrain_source(frame, this, replies, locked_on);
  287         if (source != -1) {
  288             gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SPLIT_BRAIN,
  289                    "clear time "
  290                    "split brain on %s",
  291                    uuid_utoa(replies[source].poststat.ia_gfid));
  292             sources[source] = 1;
  293             healed_sinks[source] = 0;
  294             goto out;
  295         }
  296 
  297         if (!priv->metadata_splitbrain_forced_heal) {
  298             gf_event(EVENT_AFR_SPLIT_BRAIN,
  299                      "client-pid=%d;"
  300                      "subvol=%s;"
  301                      "type=metadata;file=%s",
  302                      this->ctx->cmd_args.client_pid, this->name,
  303                      uuid_utoa(inode->gfid));
  304             return -EIO;
  305         }
  306 
  307         /* Metadata split brain, select one subvol
  308            arbitrarily */
  309         for (i = 0; i < priv->child_count; i++) {
  310             if (locked_on[i] && healed_sinks[i]) {
  311                 sources[i] = 1;
  312                 healed_sinks[i] = 0;
  313                 break;
  314             }
  315         }
  316     }
  317 
  318     /* No split brain at this point. If we were called from
  319      * afr_heal_splitbrain_file(), abort.*/
  320     if (afr_dict_contains_heal_op(frame))
  321         return -EIO;
  322 
  323     source = afr_choose_source_by_policy(priv, sources,
  324                                          AFR_METADATA_TRANSACTION);
  325     srcstat = replies[source].poststat;
  326 
  327     for (i = 0; i < priv->child_count; i++) {
  328         if (!sources[i] || i == source)
  329             continue;
  330         if (!IA_EQUAL(srcstat, replies[i].poststat, type) ||
  331             !IA_EQUAL(srcstat, replies[i].poststat, uid) ||
  332             !IA_EQUAL(srcstat, replies[i].poststat, gid) ||
  333             !IA_EQUAL(srcstat, replies[i].poststat, prot)) {
  334             gf_msg_debug(this->name, 0,
  335                          "%s: iatt mismatch "
  336                          "for source(%d) vs (%d)",
  337                          uuid_utoa(replies[source].poststat.ia_gfid), source,
  338                          i);
  339             sources[i] = 0;
  340             healed_sinks[i] = 1;
  341         }
  342     }
  343 
  344     for (i = 0; i < priv->child_count; i++) {
  345         if (!sources[i] || i == source)
  346             continue;
  347         if (!afr_xattrs_are_equal(replies[source].xdata, replies[i].xdata)) {
  348             gf_msg_debug(this->name, 0,
  349                          "%s: xattr mismatch "
  350                          "for source(%d) vs (%d)",
  351                          uuid_utoa(replies[source].poststat.ia_gfid), source,
  352                          i);
  353             sources[i] = 0;
  354             healed_sinks[i] = 1;
  355         }
  356     }
  357     if ((sources_count == priv->child_count) && (source > -1) &&
  358         (AFR_COUNT(healed_sinks, priv->child_count) != 0)) {
  359         ret = __afr_selfheal_metadata_mark_pending_xattrs(frame, this, inode,
  360                                                           replies, sources);
  361         if (ret < 0)
  362             return ret;
  363     }
  364 out:
  365     afr_mark_active_sinks(this, sources, locked_on, healed_sinks);
  366     return source;
  367 }
  368 
  369 int
  370 __afr_selfheal_metadata_prepare(call_frame_t *frame, xlator_t *this,
  371                                 inode_t *inode, unsigned char *locked_on,
  372                                 unsigned char *sources, unsigned char *sinks,
  373                                 unsigned char *healed_sinks,
  374                                 unsigned char *undid_pending,
  375                                 struct afr_reply *replies, unsigned char *pflag)
  376 {
  377     int ret = -1;
  378     int source = -1;
  379     afr_private_t *priv = NULL;
  380     int i = 0;
  381     uint64_t *witness = NULL;
  382 
  383     priv = this->private;
  384 
  385     ret = afr_selfheal_unlocked_discover(frame, inode, inode->gfid, replies);
  386     if (ret)
  387         return ret;
  388 
  389     witness = alloca0(sizeof(*witness) * priv->child_count);
  390     ret = afr_selfheal_find_direction(frame, this, replies,
  391                                       AFR_METADATA_TRANSACTION, locked_on,
  392                                       sources, sinks, witness, pflag);
  393     if (ret)
  394         return ret;
  395 
  396     /* Initialize the healed_sinks[] array optimistically to
  397        the intersection of to-be-healed (i.e sinks[]) and
  398        the list of servers which are up (i.e locked_on[]).
  399 
  400        As we encounter failures in the healing process, we
  401        will unmark the respective servers in the healed_sinks[]
  402        array.
  403     */
  404     AFR_INTERSECT(healed_sinks, sinks, locked_on, priv->child_count);
  405 
  406     /* If any source has witness, pick first
  407      * witness source and make everybody else sinks */
  408     for (i = 0; i < priv->child_count; i++) {
  409         if (sources[i] && witness[i]) {
  410             source = i;
  411             break;
  412         }
  413     }
  414 
  415     if (source != -1) {
  416         for (i = 0; i < priv->child_count; i++) {
  417             if (i != source && sources[i]) {
  418                 sources[i] = 0;
  419                 healed_sinks[i] = 1;
  420             }
  421         }
  422     }
  423 
  424     source = __afr_selfheal_metadata_finalize_source(
  425         frame, this, inode, sources, sinks, healed_sinks, undid_pending,
  426         locked_on, replies);
  427 
  428     if (source < 0)
  429         return -EIO;
  430 
  431     return source;
  432 }
  433 
  434 int
  435 afr_selfheal_metadata(call_frame_t *frame, xlator_t *this, inode_t *inode)
  436 {
  437     afr_private_t *priv = NULL;
  438     int ret = -1;
  439     unsigned char *sources = NULL;
  440     unsigned char *sinks = NULL;
  441     unsigned char *data_lock = NULL;
  442     unsigned char *healed_sinks = NULL;
  443     unsigned char *undid_pending = NULL;
  444     struct afr_reply *locked_replies = NULL;
  445     gf_boolean_t did_sh = _gf_true;
  446     int source = -1;
  447 
  448     priv = this->private;
  449 
  450     sources = alloca0(priv->child_count);
  451     sinks = alloca0(priv->child_count);
  452     healed_sinks = alloca0(priv->child_count);
  453     undid_pending = alloca0(priv->child_count);
  454     data_lock = alloca0(priv->child_count);
  455 
  456     locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count);
  457 
  458     ret = afr_selfheal_inodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0,
  459                                data_lock);
  460     {
  461         if (ret < priv->child_count) {
  462             ret = -ENOTCONN;
  463             goto unlock;
  464         }
  465 
  466         ret = __afr_selfheal_metadata_prepare(
  467             frame, this, inode, data_lock, sources, sinks, healed_sinks,
  468             undid_pending, locked_replies, NULL);
  469         if (ret < 0)
  470             goto unlock;
  471 
  472         source = ret;
  473 
  474         if (AFR_COUNT(healed_sinks, priv->child_count) == 0) {
  475             did_sh = _gf_false;
  476             goto unlock;
  477         }
  478 
  479         ret = __afr_selfheal_metadata_do(frame, this, inode, source,
  480                                          healed_sinks, locked_replies);
  481         if (ret)
  482             goto unlock;
  483 
  484         /* Restore atime/mtime for files that don't need data heal as
  485          * restoring timestamps happens only as a part of data-heal.
  486          */
  487         if (!IA_ISREG(locked_replies[source].poststat.ia_type))
  488             afr_selfheal_restore_time(frame, this, inode, source, healed_sinks,
  489                                       locked_replies);
  490 
  491         ret = afr_selfheal_undo_pending(
  492             frame, this, inode, sources, sinks, healed_sinks, undid_pending,
  493             AFR_METADATA_TRANSACTION, locked_replies, data_lock);
  494     }
  495 unlock:
  496     afr_selfheal_uninodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0,
  497                            data_lock);
  498 
  499     if (did_sh)
  500         afr_log_selfheal(inode->gfid, this, ret, "metadata", source, sources,
  501                          healed_sinks);
  502     else
  503         ret = 1;
  504 
  505     if (locked_replies)
  506         afr_replies_wipe(locked_replies, priv->child_count);
  507     return ret;
  508 }
  509 
  510 int
  511 afr_selfheal_metadata_by_stbuf(xlator_t *this, struct iatt *stbuf)
  512 {
  513     inode_t *inode = NULL;
  514     inode_t *link_inode = NULL;
  515     call_frame_t *frame = NULL;
  516     int ret = 0;
  517 
  518     if (gf_uuid_is_null(stbuf->ia_gfid)) {
  519         ret = -EINVAL;
  520         goto out;
  521     }
  522 
  523     inode = inode_new(this->itable);
  524     if (!inode) {
  525         ret = -ENOMEM;
  526         goto out;
  527     }
  528 
  529     link_inode = inode_link(inode, NULL, NULL, stbuf);
  530     if (!link_inode) {
  531         ret = -ENOMEM;
  532         goto out;
  533     }
  534 
  535     frame = afr_frame_create(this, &ret);
  536     if (!frame) {
  537         ret = -ret;
  538         goto out;
  539     }
  540 
  541     ret = afr_selfheal_metadata(frame, this, link_inode);
  542 out:
  543     if (inode)
  544         inode_unref(inode);
  545     if (link_inode)
  546         inode_unref(link_inode);
  547     if (frame)
  548         AFR_STACK_DESTROY(frame);
  549     return ret;
  550 }