"Fossies" - the Fresh Open Source Software Archive

Member "glusterfs-8.2/xlators/cluster/ec/src/ec-inode-write.c" (16 Sep 2020, 70034 Bytes) of package /linux/misc/glusterfs-8.2.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "ec-inode-write.c" see the Fossies "Dox" file reference documentation.

    1 /*
    2   Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
    3   This file is part of GlusterFS.
    4 
    5   This file is licensed to you under your choice of the GNU Lesser
    6   General Public License, version 3 or any later version (LGPLv3 or
    7   later), or the GNU General Public License, version 2 (GPLv2), in all
    8   cases as published by the Free Software Foundation.
    9 */
   10 
   11 #include "ec-messages.h"
   12 #include "ec-helpers.h"
   13 #include "ec-common.h"
   14 #include "ec-combine.h"
   15 #include "ec-method.h"
   16 #include "ec-fops.h"
   17 #include "ec-mem-types.h"
   18 
   19 int32_t
   20 ec_update_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
   21                      int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
   22                      struct iatt *postbuf, dict_t *xdata)
   23 {
   24     ec_fop_data_t *fop = cookie;
   25     ec_cbk_data_t *cbk = NULL;
   26     ec_fop_data_t *parent = fop->parent;
   27     int i = 0;
   28 
   29     ec_trace("UPDATE_WRITEV_CBK", cookie, "ret=%d, errno=%d, parent-fop=%s",
   30              op_ret, op_errno, ec_fop_name(parent->id));
   31 
   32     if (op_ret < 0) {
   33         ec_fop_set_error(parent, op_errno);
   34         goto out;
   35     }
   36     cbk = ec_cbk_data_allocate(parent->frame, this, parent, parent->id, 0,
   37                                op_ret, op_errno);
   38     if (!cbk) {
   39         ec_fop_set_error(parent, ENOMEM);
   40         goto out;
   41     }
   42 
   43     if (xdata)
   44         cbk->xdata = dict_ref(xdata);
   45 
   46     if (prebuf)
   47         cbk->iatt[i++] = *prebuf;
   48 
   49     if (postbuf)
   50         cbk->iatt[i++] = *postbuf;
   51 
   52     LOCK(&parent->lock);
   53     {
   54         parent->good &= fop->good;
   55 
   56         if (gf_bits_count(parent->good) < parent->minimum) {
   57             __ec_fop_set_error(parent, EIO);
   58         } else if (fop->error == 0 && parent->answer == NULL) {
   59             parent->answer = cbk;
   60         }
   61     }
   62     UNLOCK(&parent->lock);
   63 out:
   64     return 0;
   65 }
   66 
   67 static int32_t
   68 ec_update_write(ec_fop_data_t *fop, uintptr_t mask, off_t offset, uint64_t size)
   69 {
   70     struct iobref *iobref = NULL;
   71     struct iobuf *iobuf = NULL;
   72     struct iovec vector;
   73     int32_t err = -ENOMEM;
   74 
   75     iobref = iobref_new();
   76     if (iobref == NULL) {
   77         goto out;
   78     }
   79     iobuf = iobuf_get(fop->xl->ctx->iobuf_pool);
   80     if (iobuf == NULL) {
   81         goto out;
   82     }
   83     err = iobref_add(iobref, iobuf);
   84     if (err != 0) {
   85         goto out;
   86     }
   87 
   88     if (fop->locks[0].lock)
   89         ec_lock_update_good(fop->locks[0].lock, fop);
   90     vector.iov_base = iobuf->ptr;
   91     vector.iov_len = size;
   92     memset(vector.iov_base, 0, vector.iov_len);
   93 
   94     ec_writev(fop->frame, fop->xl, mask, fop->minimum, ec_update_writev_cbk,
   95               NULL, fop->fd, &vector, 1, offset, 0, iobref, NULL);
   96 
   97     err = 0;
   98 
   99 out:
  100     if (iobuf != NULL) {
  101         iobuf_unref(iobuf);
  102     }
  103     if (iobref != NULL) {
  104         iobref_unref(iobref);
  105     }
  106 
  107     return err;
  108 }
  109 
  110 int
  111 ec_inode_write_cbk(call_frame_t *frame, xlator_t *this, void *cookie,
  112                    int op_ret, int op_errno, struct iatt *prestat,
  113                    struct iatt *poststat, dict_t *xdata)
  114 {
  115     ec_fop_data_t *fop = NULL;
  116     ec_cbk_data_t *cbk = NULL;
  117     int i = 0;
  118     int idx = 0;
  119 
  120     VALIDATE_OR_GOTO(this, out);
  121     GF_VALIDATE_OR_GOTO(this->name, frame, out);
  122     GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
  123     GF_VALIDATE_OR_GOTO(this->name, this->private, out);
  124 
  125     fop = frame->local;
  126     idx = (int32_t)(uintptr_t)cookie;
  127 
  128     ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
  129              op_ret, op_errno);
  130 
  131     cbk = ec_cbk_data_allocate(frame, this, fop, fop->id, idx, op_ret,
  132                                op_errno);
  133     if (!cbk)
  134         goto out;
  135 
  136     if (op_ret < 0)
  137         goto out;
  138 
  139     if (xdata)
  140         cbk->xdata = dict_ref(xdata);
  141 
  142     if (prestat)
  143         cbk->iatt[i++] = *prestat;
  144 
  145     if (poststat)
  146         cbk->iatt[i++] = *poststat;
  147 
  148 out:
  149     if (cbk)
  150         ec_combine(cbk, ec_combine_write);
  151 
  152     if (fop)
  153         ec_complete(fop);
  154     return 0;
  155 }
  156 /* FOP: removexattr */
  157 
  158 int32_t
  159 ec_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
  160                    int32_t op_ret, int32_t op_errno, dict_t *xdata)
  161 {
  162     return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, NULL, NULL,
  163                               xdata);
  164 }
  165 
  166 void
  167 ec_wind_removexattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
  168 {
  169     ec_trace("WIND", fop, "idx=%d", idx);
  170 
  171     STACK_WIND_COOKIE(fop->frame, ec_removexattr_cbk, (void *)(uintptr_t)idx,
  172                       ec->xl_list[idx], ec->xl_list[idx]->fops->removexattr,
  173                       &fop->loc[0], fop->str[0], fop->xdata);
  174 }
  175 
  176 void
  177 ec_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
  178              int32_t op_errno, dict_t *xdata)
  179 {
  180     ec_fop_data_t *fop = cookie;
  181     switch (fop->id) {
  182         case GF_FOP_SETXATTR:
  183             if (fop->cbks.setxattr) {
  184                 QUORUM_CBK(fop->cbks.setxattr, fop, frame, cookie, this, op_ret,
  185                            op_errno, xdata);
  186             }
  187             break;
  188         case GF_FOP_REMOVEXATTR:
  189             if (fop->cbks.removexattr) {
  190                 QUORUM_CBK(fop->cbks.removexattr, fop, frame, cookie, this,
  191                            op_ret, op_errno, xdata);
  192             }
  193             break;
  194         case GF_FOP_FSETXATTR:
  195             if (fop->cbks.fsetxattr) {
  196                 QUORUM_CBK(fop->cbks.fsetxattr, fop, frame, cookie, this,
  197                            op_ret, op_errno, xdata);
  198             }
  199             break;
  200         case GF_FOP_FREMOVEXATTR:
  201             if (fop->cbks.fremovexattr) {
  202                 QUORUM_CBK(fop->cbks.fremovexattr, fop, frame, cookie, this,
  203                            op_ret, op_errno, xdata);
  204             }
  205             break;
  206     }
  207 }
  208 
  209 int32_t
  210 ec_manager_xattr(ec_fop_data_t *fop, int32_t state)
  211 {
  212     ec_cbk_data_t *cbk;
  213 
  214     switch (state) {
  215         case EC_STATE_INIT:
  216         case EC_STATE_LOCK:
  217             if (fop->fd == NULL) {
  218                 ec_lock_prepare_inode(fop, &fop->loc[0],
  219                                       EC_UPDATE_META | EC_QUERY_INFO, 0,
  220                                       EC_RANGE_FULL);
  221             } else {
  222                 ec_lock_prepare_fd(fop, fop->fd, EC_UPDATE_META | EC_QUERY_INFO,
  223                                    0, EC_RANGE_FULL);
  224             }
  225             ec_lock(fop);
  226 
  227             return EC_STATE_DISPATCH;
  228 
  229         case EC_STATE_DISPATCH:
  230             ec_dispatch_all(fop);
  231 
  232             return EC_STATE_PREPARE_ANSWER;
  233 
  234         case EC_STATE_PREPARE_ANSWER:
  235             ec_fop_prepare_answer(fop, _gf_false);
  236 
  237             return EC_STATE_REPORT;
  238 
  239         case EC_STATE_REPORT:
  240             cbk = fop->answer;
  241 
  242             GF_ASSERT(cbk != NULL);
  243 
  244             ec_xattr_cbk(fop->req_frame, fop, fop->xl, cbk->op_ret,
  245                          cbk->op_errno, cbk->xdata);
  246 
  247             return EC_STATE_LOCK_REUSE;
  248 
  249         case -EC_STATE_INIT:
  250         case -EC_STATE_LOCK:
  251         case -EC_STATE_DISPATCH:
  252         case -EC_STATE_PREPARE_ANSWER:
  253         case -EC_STATE_REPORT:
  254             GF_ASSERT(fop->error != 0);
  255 
  256             ec_xattr_cbk(fop->req_frame, fop, fop->xl, -1, fop->error, NULL);
  257 
  258             return EC_STATE_LOCK_REUSE;
  259 
  260         case -EC_STATE_LOCK_REUSE:
  261         case EC_STATE_LOCK_REUSE:
  262             ec_lock_reuse(fop);
  263 
  264             return EC_STATE_UNLOCK;
  265 
  266         case -EC_STATE_UNLOCK:
  267         case EC_STATE_UNLOCK:
  268             ec_unlock(fop);
  269 
  270             return EC_STATE_END;
  271 
  272         default:
  273             gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
  274                    "Unhandled state %d for %s", state, ec_fop_name(fop->id));
  275 
  276             return EC_STATE_END;
  277     }
  278 }
  279 
  280 void
  281 ec_removexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
  282                uint32_t fop_flags, fop_removexattr_cbk_t func, void *data,
  283                loc_t *loc, const char *name, dict_t *xdata)
  284 {
  285     ec_cbk_t callback = {.removexattr = func};
  286     ec_fop_data_t *fop = NULL;
  287     int32_t error = ENOMEM;
  288 
  289     gf_msg_trace("ec", 0, "EC(REMOVEXATTR) %p", frame);
  290 
  291     VALIDATE_OR_GOTO(this, out);
  292     GF_VALIDATE_OR_GOTO(this->name, frame, out);
  293     GF_VALIDATE_OR_GOTO(this->name, this->private, out);
  294 
  295     fop = ec_fop_data_allocate(frame, this, GF_FOP_REMOVEXATTR, 0, target,
  296                                fop_flags, ec_wind_removexattr, ec_manager_xattr,
  297                                callback, data);
  298     if (fop == NULL) {
  299         goto out;
  300     }
  301 
  302     if (loc != NULL) {
  303         if (loc_copy(&fop->loc[0], loc) != 0) {
  304             gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
  305                    "Failed to copy a location.");
  306 
  307             goto out;
  308         }
  309     }
  310     if (name != NULL) {
  311         fop->str[0] = gf_strdup(name);
  312         if (fop->str[0] == NULL) {
  313             gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
  314                    "Failed to duplicate a string.");
  315 
  316             goto out;
  317         }
  318     }
  319     if (xdata != NULL) {
  320         fop->xdata = dict_copy_with_ref(xdata, NULL);
  321         if (fop->xdata == NULL) {
  322             gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
  323                    "Failed to reference a "
  324                    "dictionary.");
  325 
  326             goto out;
  327         }
  328     }
  329 
  330     error = 0;
  331 
  332 out:
  333     if (fop != NULL) {
  334         ec_manager(fop, error);
  335     } else {
  336         func(frame, NULL, this, -1, error, NULL);
  337     }
  338 }
  339 
  340 /* FOP: fremovexattr */
  341 
  342 int32_t
  343 ec_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
  344                     int32_t op_ret, int32_t op_errno, dict_t *xdata)
  345 {
  346     return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, NULL, NULL,
  347                               xdata);
  348 }
  349 
  350 void
  351 ec_wind_fremovexattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
  352 {
  353     ec_trace("WIND", fop, "idx=%d", idx);
  354 
  355     STACK_WIND_COOKIE(fop->frame, ec_fremovexattr_cbk, (void *)(uintptr_t)idx,
  356                       ec->xl_list[idx], ec->xl_list[idx]->fops->fremovexattr,
  357                       fop->fd, fop->str[0], fop->xdata);
  358 }
  359 
  360 void
  361 ec_fremovexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
  362                 uint32_t fop_flags, fop_fremovexattr_cbk_t func, void *data,
  363                 fd_t *fd, const char *name, dict_t *xdata)
  364 {
  365     ec_cbk_t callback = {.fremovexattr = func};
  366     ec_fop_data_t *fop = NULL;
  367     int32_t error = ENOMEM;
  368 
  369     gf_msg_trace("ec", 0, "EC(FREMOVEXATTR) %p", frame);
  370 
  371     VALIDATE_OR_GOTO(this, out);
  372     GF_VALIDATE_OR_GOTO(this->name, frame, out);
  373     GF_VALIDATE_OR_GOTO(this->name, this->private, out);
  374 
  375     fop = ec_fop_data_allocate(frame, this, GF_FOP_FREMOVEXATTR, 0, target,
  376                                fop_flags, ec_wind_fremovexattr,
  377                                ec_manager_xattr, callback, data);
  378     if (fop == NULL) {
  379         goto out;
  380     }
  381 
  382     fop->use_fd = 1;
  383 
  384     if (fd != NULL) {
  385         fop->fd = fd_ref(fd);
  386         if (fop->fd == NULL) {
  387             gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
  388                    "Failed to reference a "
  389                    "file descriptor.");
  390 
  391             goto out;
  392         }
  393     }
  394     if (name != NULL) {
  395         fop->str[0] = gf_strdup(name);
  396         if (fop->str[0] == NULL) {
  397             gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
  398                    "Failed to duplicate a string.");
  399 
  400             goto out;
  401         }
  402     }
  403     if (xdata != NULL) {
  404         fop->xdata = dict_copy_with_ref(xdata, NULL);
  405         if (fop->xdata == NULL) {
  406             gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
  407                    "Failed to reference a "
  408                    "dictionary.");
  409 
  410             goto out;
  411         }
  412     }
  413 
  414     error = 0;
  415 
  416 out:
  417     if (fop != NULL) {
  418         ec_manager(fop, error);
  419     } else {
  420         func(frame, NULL, this, -1, error, NULL);
  421     }
  422 }
  423 
  424 /* FOP: setattr */
  425 
  426 int32_t
  427 ec_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
  428                int32_t op_ret, int32_t op_errno, struct iatt *prestat,
  429                struct iatt *poststat, dict_t *xdata)
  430 {
  431     return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prestat,
  432                               poststat, xdata);
  433 }
  434 
  435 void
  436 ec_wind_setattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
  437 {
  438     ec_trace("WIND", fop, "idx=%d", idx);
  439 
  440     STACK_WIND_COOKIE(fop->frame, ec_setattr_cbk, (void *)(uintptr_t)idx,
  441                       ec->xl_list[idx], ec->xl_list[idx]->fops->setattr,
  442                       &fop->loc[0], &fop->iatt, fop->int32, fop->xdata);
  443 }
  444 
  445 int32_t
  446 ec_manager_setattr(ec_fop_data_t *fop, int32_t state)
  447 {
  448     ec_cbk_data_t *cbk;
  449 
  450     switch (state) {
  451         case EC_STATE_INIT:
  452         case EC_STATE_LOCK:
  453             if (fop->fd == NULL) {
  454                 ec_lock_prepare_inode(fop, &fop->loc[0],
  455                                       EC_UPDATE_META | EC_QUERY_INFO, 0,
  456                                       EC_RANGE_FULL);
  457             } else {
  458                 ec_lock_prepare_fd(fop, fop->fd, EC_UPDATE_META | EC_QUERY_INFO,
  459                                    0, EC_RANGE_FULL);
  460             }
  461             ec_lock(fop);
  462 
  463             return EC_STATE_DISPATCH;
  464 
  465         case EC_STATE_DISPATCH:
  466             ec_dispatch_all(fop);
  467 
  468             return EC_STATE_PREPARE_ANSWER;
  469 
  470         case EC_STATE_PREPARE_ANSWER:
  471             cbk = ec_fop_prepare_answer(fop, _gf_false);
  472             if (cbk != NULL) {
  473                 if (cbk->iatt[0].ia_type == IA_IFREG) {
  474                     ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, cbk->count);
  475 
  476                     /* This shouldn't fail because we have the inode locked. */
  477                     GF_ASSERT(ec_get_inode_size(fop,
  478                                                 fop->locks[0].lock->loc.inode,
  479                                                 &cbk->iatt[0].ia_size));
  480                     cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
  481                 }
  482             }
  483 
  484             return EC_STATE_REPORT;
  485 
  486         case EC_STATE_REPORT:
  487             cbk = fop->answer;
  488 
  489             GF_ASSERT(cbk != NULL);
  490 
  491             if (fop->id == GF_FOP_SETATTR) {
  492                 if (fop->cbks.setattr != NULL) {
  493                     QUORUM_CBK(fop->cbks.setattr, fop, fop->req_frame, fop,
  494                                fop->xl, cbk->op_ret, cbk->op_errno,
  495                                &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
  496                 }
  497             } else {
  498                 if (fop->cbks.fsetattr != NULL) {
  499                     QUORUM_CBK(fop->cbks.fsetattr, fop, fop->req_frame, fop,
  500                                fop->xl, cbk->op_ret, cbk->op_errno,
  501                                &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
  502                 }
  503             }
  504 
  505             return EC_STATE_LOCK_REUSE;
  506 
  507         case -EC_STATE_INIT:
  508         case -EC_STATE_LOCK:
  509         case -EC_STATE_DISPATCH:
  510         case -EC_STATE_PREPARE_ANSWER:
  511         case -EC_STATE_REPORT:
  512             GF_ASSERT(fop->error != 0);
  513 
  514             if (fop->id == GF_FOP_SETATTR) {
  515                 if (fop->cbks.setattr != NULL) {
  516                     fop->cbks.setattr(fop->req_frame, fop, fop->xl, -1,
  517                                       fop->error, NULL, NULL, NULL);
  518                 }
  519             } else {
  520                 if (fop->cbks.fsetattr != NULL) {
  521                     fop->cbks.fsetattr(fop->req_frame, fop, fop->xl, -1,
  522                                        fop->error, NULL, NULL, NULL);
  523                 }
  524             }
  525 
  526             return EC_STATE_LOCK_REUSE;
  527 
  528         case -EC_STATE_LOCK_REUSE:
  529         case EC_STATE_LOCK_REUSE:
  530             ec_lock_reuse(fop);
  531 
  532             return EC_STATE_UNLOCK;
  533 
  534         case -EC_STATE_UNLOCK:
  535         case EC_STATE_UNLOCK:
  536             ec_unlock(fop);
  537 
  538             return EC_STATE_END;
  539 
  540         default:
  541             gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
  542                    "Unhandled state %d for %s", state, ec_fop_name(fop->id));
  543 
  544             return EC_STATE_END;
  545     }
  546 }
  547 
  548 void
  549 ec_setattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
  550            uint32_t fop_flags, fop_setattr_cbk_t func, void *data, loc_t *loc,
  551            struct iatt *stbuf, int32_t valid, dict_t *xdata)
  552 {
  553     ec_cbk_t callback = {.setattr = func};
  554     ec_fop_data_t *fop = NULL;
  555     int32_t error = ENOMEM;
  556 
  557     gf_msg_trace("ec", 0, "EC(SETATTR) %p", frame);
  558 
  559     VALIDATE_OR_GOTO(this, out);
  560     GF_VALIDATE_OR_GOTO(this->name, frame, out);
  561     GF_VALIDATE_OR_GOTO(this->name, this->private, out);
  562 
  563     fop = ec_fop_data_allocate(frame, this, GF_FOP_SETATTR, 0, target,
  564                                fop_flags, ec_wind_setattr, ec_manager_setattr,
  565                                callback, data);
  566     if (fop == NULL) {
  567         goto out;
  568     }
  569 
  570     fop->int32 = valid;
  571 
  572     if (loc != NULL) {
  573         if (loc_copy(&fop->loc[0], loc) != 0) {
  574             gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
  575                    "Failed to copy a location.");
  576 
  577             goto out;
  578         }
  579     }
  580     if (stbuf != NULL) {
  581         fop->iatt = *stbuf;
  582     }
  583     if (xdata != NULL) {
  584         fop->xdata = dict_copy_with_ref(xdata, NULL);
  585         if (fop->xdata == NULL) {
  586             gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
  587                    "Failed to reference a "
  588                    "dictionary.");
  589 
  590             goto out;
  591         }
  592     }
  593 
  594     error = 0;
  595 
  596 out:
  597     if (fop != NULL) {
  598         ec_manager(fop, error);
  599     } else {
  600         func(frame, NULL, this, -1, error, NULL, NULL, NULL);
  601     }
  602 }
  603 
  604 /* FOP: fsetattr */
  605 
  606 int32_t
  607 ec_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
  608                 int32_t op_ret, int32_t op_errno, struct iatt *prestat,
  609                 struct iatt *poststat, dict_t *xdata)
  610 {
  611     return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prestat,
  612                               poststat, xdata);
  613 }
  614 
  615 void
  616 ec_wind_fsetattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
  617 {
  618     ec_trace("WIND", fop, "idx=%d", idx);
  619 
  620     STACK_WIND_COOKIE(fop->frame, ec_fsetattr_cbk, (void *)(uintptr_t)idx,
  621                       ec->xl_list[idx], ec->xl_list[idx]->fops->fsetattr,
  622                       fop->fd, &fop->iatt, fop->int32, fop->xdata);
  623 }
  624 
  625 void
  626 ec_fsetattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
  627             uint32_t fop_flags, fop_fsetattr_cbk_t func, void *data, fd_t *fd,
  628             struct iatt *stbuf, int32_t valid, dict_t *xdata)
  629 {
  630     ec_cbk_t callback = {.fsetattr = func};
  631     ec_fop_data_t *fop = NULL;
  632     int32_t error = ENOMEM;
  633 
  634     gf_msg_trace("ec", 0, "EC(FSETATTR) %p", frame);
  635 
  636     VALIDATE_OR_GOTO(this, out);
  637     GF_VALIDATE_OR_GOTO(this->name, frame, out);
  638     GF_VALIDATE_OR_GOTO(this->name, this->private, out);
  639 
  640     fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETATTR, 0, target,
  641                                fop_flags, ec_wind_fsetattr, ec_manager_setattr,
  642                                callback, data);
  643     if (fop == NULL) {
  644         goto out;
  645     }
  646 
  647     fop->use_fd = 1;
  648 
  649     fop->int32 = valid;
  650 
  651     if (fd != NULL) {
  652         fop->fd = fd_ref(fd);
  653         if (fop->fd == NULL) {
  654             gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
  655                    "Failed to reference a "
  656                    "file descriptor.");
  657 
  658             goto out;
  659         }
  660     }
  661     if (stbuf != NULL) {
  662         fop->iatt = *stbuf;
  663     }
  664     if (xdata != NULL) {
  665         fop->xdata = dict_copy_with_ref(xdata, NULL);
  666         if (fop->xdata == NULL) {
  667             gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
  668                    "Failed to reference a "
  669                    "dictionary.");
  670 
  671             goto out;
  672         }
  673     }
  674 
  675     error = 0;
  676 
  677 out:
  678     if (fop != NULL) {
  679         ec_manager(fop, error);
  680     } else {
  681         func(frame, NULL, this, -1, error, NULL, NULL, NULL);
  682     }
  683 }
  684 
  685 /* FOP: setxattr */
  686 
  687 int32_t
  688 ec_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
  689                 int32_t op_ret, int32_t op_errno, dict_t *xdata)
  690 {
  691     return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, NULL, NULL,
  692                               xdata);
  693 }
  694 
  695 void
  696 ec_wind_setxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
  697 {
  698     ec_trace("WIND", fop, "idx=%d", idx);
  699 
  700     STACK_WIND_COOKIE(fop->frame, ec_setxattr_cbk, (void *)(uintptr_t)idx,
  701                       ec->xl_list[idx], ec->xl_list[idx]->fops->setxattr,
  702                       &fop->loc[0], fop->dict, fop->int32, fop->xdata);
  703 }
  704 
  705 void
  706 ec_setxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
  707             uint32_t fop_flags, fop_setxattr_cbk_t func, void *data, loc_t *loc,
  708             dict_t *dict, int32_t flags, dict_t *xdata)
  709 {
  710     ec_cbk_t callback = {.setxattr = func};
  711     ec_fop_data_t *fop = NULL;
  712     int32_t error = ENOMEM;
  713 
  714     gf_msg_trace("ec", 0, "EC(SETXATTR) %p", frame);
  715 
  716     VALIDATE_OR_GOTO(this, out);
  717     GF_VALIDATE_OR_GOTO(this->name, frame, out);
  718     GF_VALIDATE_OR_GOTO(this->name, this->private, out);
  719 
  720     fop = ec_fop_data_allocate(frame, this, GF_FOP_SETXATTR, 0, target,
  721                                fop_flags, ec_wind_setxattr, ec_manager_xattr,
  722                                callback, data);
  723     if (fop == NULL) {
  724         goto out;
  725     }
  726 
  727     fop->int32 = flags;
  728 
  729     if (loc != NULL) {
  730         if (loc_copy(&fop->loc[0], loc) != 0) {
  731             gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
  732                    "Failed to copy a location.");
  733 
  734             goto out;
  735         }
  736     }
  737     if (dict != NULL) {
  738         fop->dict = dict_copy_with_ref(dict, NULL);
  739         if (fop->dict == NULL) {
  740             gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
  741                    "Failed to reference a "
  742                    "dictionary.");
  743 
  744             goto out;
  745         }
  746     }
  747     if (xdata != NULL) {
  748         fop->xdata = dict_copy_with_ref(xdata, NULL);
  749         if (fop->xdata == NULL) {
  750             gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
  751                    "Failed to reference a "
  752                    "dictionary.");
  753 
  754             goto out;
  755         }
  756     }
  757 
  758     error = 0;
  759 
  760 out:
  761     if (fop != NULL) {
  762         ec_manager(fop, error);
  763     } else {
  764         func(frame, NULL, this, -1, error, NULL);
  765     }
  766 }
  767 
  768 /* FOP: fsetxattr */
  769 
  770 int32_t
  771 ec_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
  772                  int32_t op_ret, int32_t op_errno, dict_t *xdata)
  773 {
  774     ec_fop_data_t *fop = NULL;
  775     ec_cbk_data_t *cbk = NULL;
  776     int32_t idx = (int32_t)(uintptr_t)cookie;
  777 
  778     VALIDATE_OR_GOTO(this, out);
  779     GF_VALIDATE_OR_GOTO(this->name, frame, out);
  780     GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
  781     GF_VALIDATE_OR_GOTO(this->name, this->private, out);
  782 
  783     fop = frame->local;
  784 
  785     ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
  786              op_ret, op_errno);
  787 
  788     cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FSETXATTR, idx, op_ret,
  789                                op_errno);
  790     if (cbk != NULL) {
  791         if (xdata != NULL) {
  792             cbk->xdata = dict_ref(xdata);
  793             if (cbk->xdata == NULL) {
  794                 gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
  795                        "Failed to reference a "
  796                        "dictionary.");
  797 
  798                 goto out;
  799             }
  800         }
  801 
  802         ec_combine(cbk, NULL);
  803     }
  804 
  805 out:
  806     if (fop != NULL) {
  807         ec_complete(fop);
  808     }
  809 
  810     return 0;
  811 }
  812 
  813 void
  814 ec_wind_fsetxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
  815 {
  816     ec_trace("WIND", fop, "idx=%d", idx);
  817 
  818     STACK_WIND_COOKIE(fop->frame, ec_fsetxattr_cbk, (void *)(uintptr_t)idx,
  819                       ec->xl_list[idx], ec->xl_list[idx]->fops->fsetxattr,
  820                       fop->fd, fop->dict, fop->int32, fop->xdata);
  821 }
  822 
  823 void
  824 ec_fsetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
  825              uint32_t fop_flags, fop_fsetxattr_cbk_t func, void *data, fd_t *fd,
  826              dict_t *dict, int32_t flags, dict_t *xdata)
  827 {
  828     ec_cbk_t callback = {.fsetxattr = func};
  829     ec_fop_data_t *fop = NULL;
  830     int32_t error = ENOMEM;
  831 
  832     gf_msg_trace("ec", 0, "EC(FSETXATTR) %p", frame);
  833 
  834     VALIDATE_OR_GOTO(this, out);
  835     GF_VALIDATE_OR_GOTO(this->name, frame, out);
  836     GF_VALIDATE_OR_GOTO(this->name, this->private, out);
  837 
  838     fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETXATTR, 0, target,
  839                                fop_flags, ec_wind_fsetxattr, ec_manager_xattr,
  840                                callback, data);
  841     if (fop == NULL) {
  842         goto out;
  843     }
  844 
  845     fop->use_fd = 1;
  846 
  847     fop->int32 = flags;
  848 
  849     if (fd != NULL) {
  850         fop->fd = fd_ref(fd);
  851         if (fop->fd == NULL) {
  852             gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
  853                    "Failed to reference a "
  854                    "file descriptor.");
  855 
  856             goto out;
  857         }
  858     }
  859     if (dict != NULL) {
  860         fop->dict = dict_copy_with_ref(dict, NULL);
  861         if (fop->dict == NULL) {
  862             gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
  863                    "Failed to reference a "
  864                    "dictionary.");
  865 
  866             goto out;
  867         }
  868     }
  869     if (xdata != NULL) {
  870         fop->xdata = dict_copy_with_ref(xdata, NULL);
  871         if (fop->xdata == NULL) {
  872             gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
  873                    "Failed to reference a "
  874                    "dictionary.");
  875 
  876             goto out;
  877         }
  878     }
  879 
  880     error = 0;
  881 
  882 out:
  883     if (fop != NULL) {
  884         ec_manager(fop, error);
  885     } else {
  886         func(frame, NULL, this, -1, error, NULL);
  887     }
  888 }
  889 
  890 /*********************************************************************
  891  *
  892  * File Operation : fallocate
  893  *
  894  *********************************************************************/
  895 
  896 int32_t
  897 ec_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
  898                  int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
  899                  struct iatt *postbuf, dict_t *xdata)
  900 {
  901     return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prebuf,
  902                               postbuf, xdata);
  903 }
  904 
  905 void
  906 ec_wind_fallocate(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
  907 {
  908     ec_trace("WIND", fop, "idx=%d", idx);
  909 
  910     STACK_WIND_COOKIE(fop->frame, ec_fallocate_cbk, (void *)(uintptr_t)idx,
  911                       ec->xl_list[idx], ec->xl_list[idx]->fops->fallocate,
  912                       fop->fd, fop->int32, fop->offset, fop->size, fop->xdata);
  913 }
  914 
  915 int32_t
  916 ec_manager_fallocate(ec_fop_data_t *fop, int32_t state)
  917 {
  918     ec_cbk_data_t *cbk = NULL;
  919 
  920     switch (state) {
  921         case EC_STATE_INIT:
  922             if (fop->size == 0) {
  923                 ec_fop_set_error(fop, EINVAL);
  924                 return EC_STATE_REPORT;
  925             }
  926             if (fop->int32 &
  927                 (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE |
  928                  FALLOC_FL_ZERO_RANGE | FALLOC_FL_PUNCH_HOLE)) {
  929                 ec_fop_set_error(fop, ENOTSUP);
  930                 return EC_STATE_REPORT;
  931             }
  932             fop->user_size = fop->offset + fop->size;
  933             fop->head = ec_adjust_offset_down(fop->xl->private, &fop->offset,
  934                                               _gf_true);
  935             fop->size += fop->head;
  936             ec_adjust_size_up(fop->xl->private, &fop->size, _gf_true);
  937 
  938             /* Fall through */
  939 
  940         case EC_STATE_LOCK:
  941             ec_lock_prepare_fd(fop, fop->fd,
  942                                EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO,
  943                                fop->offset, fop->size);
  944             ec_lock(fop);
  945 
  946             return EC_STATE_DISPATCH;
  947 
  948         case EC_STATE_DISPATCH:
  949 
  950             ec_dispatch_all(fop);
  951 
  952             return EC_STATE_PREPARE_ANSWER;
  953 
  954         case EC_STATE_PREPARE_ANSWER:
  955             cbk = ec_fop_prepare_answer(fop, _gf_false);
  956             if (cbk != NULL) {
  957                 ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, cbk->count);
  958 
  959                 /* This shouldn't fail because we have the inode locked. */
  960                 LOCK(&fop->locks[0].lock->loc.inode->lock);
  961                 {
  962                     GF_ASSERT(__ec_get_inode_size(fop,
  963                                                   fop->locks[0].lock->loc.inode,
  964                                                   &cbk->iatt[0].ia_size));
  965 
  966                     /*If mode has FALLOC_FL_KEEP_SIZE keep the size */
  967                     if (fop->int32 & FALLOC_FL_KEEP_SIZE) {
  968                         cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
  969                     } else if (fop->user_size > cbk->iatt[0].ia_size) {
  970                         cbk->iatt[1].ia_size = fop->user_size;
  971 
  972                         /* This shouldn't fail because we have the inode
  973                          * locked. */
  974                         GF_ASSERT(__ec_set_inode_size(
  975                             fop, fop->locks[0].lock->loc.inode,
  976                             cbk->iatt[1].ia_size));
  977                     } else {
  978                         cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
  979                     }
  980                 }
  981                 UNLOCK(&fop->locks[0].lock->loc.inode->lock);
  982             }
  983 
  984             return EC_STATE_REPORT;
  985 
  986         case EC_STATE_REPORT:
  987             cbk = fop->answer;
  988 
  989             GF_ASSERT(cbk != NULL);
  990 
  991             if (fop->cbks.fallocate != NULL) {
  992                 QUORUM_CBK(fop->cbks.fallocate, fop, fop->req_frame, fop,
  993                            fop->xl, cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
  994                            &cbk->iatt[1], cbk->xdata);
  995             }
  996 
  997             return EC_STATE_LOCK_REUSE;
  998 
  999         case -EC_STATE_INIT:
 1000         case -EC_STATE_LOCK:
 1001         case -EC_STATE_DISPATCH:
 1002         case -EC_STATE_PREPARE_ANSWER:
 1003         case -EC_STATE_REPORT:
 1004             GF_ASSERT(fop->error != 0);
 1005 
 1006             if (fop->cbks.fallocate != NULL) {
 1007                 fop->cbks.fallocate(fop->req_frame, fop, fop->xl, -1,
 1008                                     fop->error, NULL, NULL, NULL);
 1009             }
 1010 
 1011             return EC_STATE_LOCK_REUSE;
 1012 
 1013         case -EC_STATE_LOCK_REUSE:
 1014         case EC_STATE_LOCK_REUSE:
 1015             ec_lock_reuse(fop);
 1016 
 1017             return EC_STATE_UNLOCK;
 1018 
 1019         case -EC_STATE_UNLOCK:
 1020         case EC_STATE_UNLOCK:
 1021             ec_unlock(fop);
 1022 
 1023             return EC_STATE_END;
 1024 
 1025         default:
 1026             gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
 1027                    "Unhandled state %d for %s", state, ec_fop_name(fop->id));
 1028 
 1029             return EC_STATE_END;
 1030     }
 1031 }
 1032 
 1033 void
 1034 ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target,
 1035              uint32_t fop_flags, fop_fallocate_cbk_t func, void *data, fd_t *fd,
 1036              int32_t mode, off_t offset, size_t len, dict_t *xdata)
 1037 {
 1038     ec_cbk_t callback = {.fallocate = func};
 1039     ec_fop_data_t *fop = NULL;
 1040     int32_t error = ENOMEM;
 1041 
 1042     gf_msg_trace("ec", 0, "EC(FALLOCATE) %p", frame);
 1043 
 1044     VALIDATE_OR_GOTO(this, out);
 1045     GF_VALIDATE_OR_GOTO(this->name, frame, out);
 1046     GF_VALIDATE_OR_GOTO(this->name, this->private, out);
 1047 
 1048     fop = ec_fop_data_allocate(frame, this, GF_FOP_FALLOCATE, 0, target,
 1049                                fop_flags, ec_wind_fallocate,
 1050                                ec_manager_fallocate, callback, data);
 1051     if (fop == NULL) {
 1052         goto out;
 1053     }
 1054 
 1055     fop->use_fd = 1;
 1056     fop->int32 = mode;
 1057     fop->offset = offset;
 1058     fop->size = len;
 1059 
 1060     if (fd != NULL) {
 1061         fop->fd = fd_ref(fd);
 1062         if (fop->fd == NULL) {
 1063             gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
 1064                    "Failed to reference a "
 1065                    "file descriptor.");
 1066             goto out;
 1067         }
 1068     }
 1069 
 1070     if (xdata != NULL) {
 1071         fop->xdata = dict_ref(xdata);
 1072         if (fop->xdata == NULL) {
 1073             gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
 1074                    "Failed to reference a "
 1075                    "dictionary.");
 1076             goto out;
 1077         }
 1078     }
 1079 
 1080     error = 0;
 1081 
 1082 out:
 1083     if (fop != NULL) {
 1084         ec_manager(fop, error);
 1085     } else {
 1086         func(frame, NULL, this, -1, error, NULL, NULL, NULL);
 1087     }
 1088 }
 1089 
 1090 /*********************************************************************
 1091  *
 1092  * File Operation : Discard
 1093  *
 1094  *********************************************************************/
 1095 void
 1096 ec_update_discard_write(ec_fop_data_t *fop, uintptr_t mask)
 1097 {
 1098     ec_t *ec = fop->xl->private;
 1099     off_t off_head = 0;
 1100     off_t off_tail = 0;
 1101     uint64_t size_head = 0;
 1102     uint64_t size_tail = 0;
 1103     int error = 0;
 1104 
 1105     off_head = fop->offset * ec->fragments - fop->int32;
 1106     if (fop->size == 0) {
 1107         error = ec_update_write(fop, mask, off_head, fop->user_size);
 1108     } else {
 1109         size_head = fop->int32;
 1110         size_tail = (off_head + fop->user_size) % ec->stripe_size;
 1111         off_tail = off_head + fop->user_size - size_tail;
 1112         if (size_head) {
 1113             error = ec_update_write(fop, mask, off_head, size_head);
 1114             if (error) {
 1115                 goto out;
 1116             }
 1117         }
 1118         if (size_tail) {
 1119             error = ec_update_write(fop, mask, off_tail, size_tail);
 1120         }
 1121     }
 1122 out:
 1123     if (error)
 1124         ec_fop_set_error(fop, -error);
 1125 }
 1126 
 1127 void
 1128 ec_discard_adjust_offset_size(ec_fop_data_t *fop)
 1129 {
 1130     ec_t *ec = fop->xl->private;
 1131 
 1132     fop->user_size = fop->size;
 1133     /* If discard length covers at least a fragment on brick, we will
 1134      * perform discard operation(when fop->size is non-zero) else we just
 1135      * write zeros.
 1136      */
 1137     fop->int32 = ec_adjust_offset_up(ec, &fop->offset, _gf_true);
 1138     fop->frag_range.first = fop->offset;
 1139     if (fop->size < fop->int32) {
 1140         fop->size = 0;
 1141     } else {
 1142         fop->size -= fop->int32;
 1143         ec_adjust_size_down(ec, &fop->size, _gf_true);
 1144     }
 1145     fop->frag_range.last = fop->offset + fop->size;
 1146 }
 1147 
 1148 int32_t
 1149 ec_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
 1150                int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
 1151                struct iatt *postbuf, dict_t *xdata)
 1152 {
 1153     return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prebuf,
 1154                               postbuf, xdata);
 1155 }
 1156 
 1157 void
 1158 ec_wind_discard(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
 1159 {
 1160     ec_trace("WIND", fop, "idx=%d", idx);
 1161 
 1162     STACK_WIND_COOKIE(fop->frame, ec_discard_cbk, (void *)(uintptr_t)idx,
 1163                       ec->xl_list[idx], ec->xl_list[idx]->fops->discard,
 1164                       fop->fd, fop->offset, fop->size, fop->xdata);
 1165 }
 1166 
 1167 int32_t
 1168 ec_manager_discard(ec_fop_data_t *fop, int32_t state)
 1169 {
 1170     ec_cbk_data_t *cbk = NULL;
 1171     off_t fl_start = 0;
 1172     uint64_t fl_size = 0;
 1173 
 1174     switch (state) {
 1175         case EC_STATE_INIT:
 1176             if ((fop->size <= 0) || (fop->offset < 0)) {
 1177                 ec_fop_set_error(fop, EINVAL);
 1178                 return EC_STATE_REPORT;
 1179             }
 1180             /* Because of the head/tail writes, "discard" happens on the
 1181              * remaining regions, but we need to compute region including
 1182              * head/tail writes so compute them separately*/
 1183             fl_start = fop->offset;
 1184             fl_size = fop->size;
 1185             fl_size += ec_adjust_offset_down(fop->xl->private, &fl_start,
 1186                                              _gf_true);
 1187             ec_adjust_size_up(fop->xl->private, &fl_size, _gf_true);
 1188 
 1189             ec_discard_adjust_offset_size(fop);
 1190 
 1191             /* Fall through */
 1192 
 1193         case EC_STATE_LOCK:
 1194             ec_lock_prepare_fd(fop, fop->fd,
 1195                                EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO,
 1196                                fl_start, fl_size);
 1197             ec_lock(fop);
 1198 
 1199             return EC_STATE_DISPATCH;
 1200 
 1201         case EC_STATE_DISPATCH:
 1202 
 1203             /* Dispatch discard fop only if we have whole fragment
 1204              * to deallocate */
 1205             if (fop->size) {
 1206                 ec_dispatch_all(fop);
 1207                 return EC_STATE_DELAYED_START;
 1208             } else {
 1209                 /* Assume discard to have succeeded on all bricks */
 1210                 ec_succeed_all(fop);
 1211             }
 1212 
 1213             /* Fall through */
 1214 
 1215         case EC_STATE_DELAYED_START:
 1216 
 1217             if (fop->size) {
 1218                 if (fop->answer && fop->answer->op_ret == 0)
 1219                     ec_update_discard_write(fop, fop->answer->mask);
 1220             } else {
 1221                 ec_update_discard_write(fop, fop->mask);
 1222             }
 1223 
 1224             return EC_STATE_PREPARE_ANSWER;
 1225 
 1226         case EC_STATE_PREPARE_ANSWER:
 1227             cbk = ec_fop_prepare_answer(fop, _gf_false);
 1228             if (cbk != NULL) {
 1229                 ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, cbk->count);
 1230 
 1231                 /* This shouldn't fail because we have the inode locked. */
 1232                 GF_ASSERT(ec_get_inode_size(fop, fop->locks[0].lock->loc.inode,
 1233                                             &cbk->iatt[0].ia_size));
 1234 
 1235                 cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
 1236             }
 1237             return EC_STATE_REPORT;
 1238 
 1239         case EC_STATE_REPORT:
 1240             cbk = fop->answer;
 1241 
 1242             GF_ASSERT(cbk != NULL);
 1243 
 1244             if (fop->cbks.discard != NULL) {
 1245                 QUORUM_CBK(fop->cbks.discard, fop, fop->req_frame, fop, fop->xl,
 1246                            cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
 1247                            &cbk->iatt[1], cbk->xdata);
 1248             }
 1249 
 1250             return EC_STATE_LOCK_REUSE;
 1251 
 1252         case -EC_STATE_INIT:
 1253         case -EC_STATE_LOCK:
 1254         case -EC_STATE_DISPATCH:
 1255         case -EC_STATE_DELAYED_START:
 1256         case -EC_STATE_PREPARE_ANSWER:
 1257         case -EC_STATE_REPORT:
 1258             GF_ASSERT(fop->error != 0);
 1259 
 1260             if (fop->cbks.discard != NULL) {
 1261                 fop->cbks.discard(fop->req_frame, fop, fop->xl, -1, fop->error,
 1262                                   NULL, NULL, NULL);
 1263             }
 1264 
 1265             return EC_STATE_LOCK_REUSE;
 1266 
 1267         case -EC_STATE_LOCK_REUSE:
 1268         case EC_STATE_LOCK_REUSE:
 1269             ec_lock_reuse(fop);
 1270 
 1271             return EC_STATE_UNLOCK;
 1272 
 1273         case -EC_STATE_UNLOCK:
 1274         case EC_STATE_UNLOCK:
 1275             ec_unlock(fop);
 1276 
 1277             return EC_STATE_END;
 1278 
 1279         default:
 1280             gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
 1281                    "Unhandled state %d for %s", state, ec_fop_name(fop->id));
 1282 
 1283             return EC_STATE_END;
 1284     }
 1285 }
 1286 
 1287 void
 1288 ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target,
 1289            uint32_t fop_flags, fop_discard_cbk_t func, void *data, fd_t *fd,
 1290            off_t offset, size_t len, dict_t *xdata)
 1291 {
 1292     ec_cbk_t callback = {.discard = func};
 1293     ec_fop_data_t *fop = NULL;
 1294     int32_t error = ENOMEM;
 1295 
 1296     gf_msg_trace("ec", 0, "EC(DISCARD) %p", frame);
 1297 
 1298     VALIDATE_OR_GOTO(this, out);
 1299     GF_VALIDATE_OR_GOTO(this->name, frame, out);
 1300     GF_VALIDATE_OR_GOTO(this->name, this->private, out);
 1301 
 1302     fop = ec_fop_data_allocate(frame, this, GF_FOP_DISCARD, 0, target,
 1303                                fop_flags, ec_wind_discard, ec_manager_discard,
 1304                                callback, data);
 1305     if (fop == NULL) {
 1306         goto out;
 1307     }
 1308 
 1309     fop->use_fd = 1;
 1310     fop->offset = offset;
 1311     fop->size = len;
 1312 
 1313     if (fd != NULL) {
 1314         fop->fd = fd_ref(fd);
 1315     }
 1316 
 1317     if (xdata != NULL) {
 1318         fop->xdata = dict_ref(xdata);
 1319     }
 1320 
 1321     error = 0;
 1322 
 1323 out:
 1324     if (fop != NULL) {
 1325         ec_manager(fop, error);
 1326     } else {
 1327         func(frame, NULL, this, -1, error, NULL, NULL, NULL);
 1328     }
 1329 }
 1330 
 1331 /*********************************************************************
 1332  *
 1333  * File Operation : truncate
 1334  *
 1335  *********************************************************************/
 1336 
 1337 int32_t
 1338 ec_update_truncate_write(ec_fop_data_t *fop, uintptr_t mask)
 1339 {
 1340     ec_t *ec = fop->xl->private;
 1341     uint64_t size = fop->offset * ec->fragments - fop->user_size;
 1342     return ec_update_write(fop, mask, fop->user_size, size);
 1343 }
 1344 
 1345 int32_t
 1346 ec_truncate_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
 1347                      int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
 1348 {
 1349     ec_fop_data_t *fop = cookie;
 1350     int32_t err;
 1351 
 1352     fop->parent->good &= fop->good;
 1353     if (op_ret >= 0) {
 1354         fd_bind(fd);
 1355         err = ec_update_truncate_write(fop->parent, fop->answer->mask);
 1356         if (err != 0) {
 1357             ec_fop_set_error(fop->parent, -err);
 1358         }
 1359     }
 1360 
 1361     return 0;
 1362 }
 1363 
 1364 int32_t
 1365 ec_truncate_clean(ec_fop_data_t *fop)
 1366 {
 1367     if (fop->fd == NULL) {
 1368         fop->fd = fd_create(fop->loc[0].inode, fop->frame->root->pid);
 1369         if (fop->fd == NULL) {
 1370             return -ENOMEM;
 1371         }
 1372 
 1373         ec_open(fop->frame, fop->xl, fop->answer->mask, fop->minimum,
 1374                 ec_truncate_open_cbk, fop, &fop->loc[0], O_RDWR, fop->fd, NULL);
 1375 
 1376         return 0;
 1377     } else {
 1378         return ec_update_truncate_write(fop, fop->answer->mask);
 1379     }
 1380 }
 1381 
 1382 int32_t
 1383 ec_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
 1384                 int32_t op_ret, int32_t op_errno, struct iatt *prestat,
 1385                 struct iatt *poststat, dict_t *xdata)
 1386 {
 1387     return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prestat,
 1388                               poststat, xdata);
 1389 }
 1390 
 1391 void
 1392 ec_wind_truncate(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
 1393 {
 1394     ec_trace("WIND", fop, "idx=%d", idx);
 1395 
 1396     STACK_WIND_COOKIE(fop->frame, ec_truncate_cbk, (void *)(uintptr_t)idx,
 1397                       ec->xl_list[idx], ec->xl_list[idx]->fops->truncate,
 1398                       &fop->loc[0], fop->offset, fop->xdata);
 1399 }
 1400 
 1401 int32_t
 1402 ec_manager_truncate(ec_fop_data_t *fop, int32_t state)
 1403 {
 1404     ec_cbk_data_t *cbk;
 1405     off_t offset_down;
 1406 
 1407     switch (state) {
 1408         case EC_STATE_INIT:
 1409             fop->user_size = fop->offset;
 1410             ec_adjust_offset_up(fop->xl->private, &fop->offset, _gf_true);
 1411             fop->frag_range.first = fop->offset;
 1412             fop->frag_range.last = UINT64_MAX;
 1413 
 1414             /* Fall through */
 1415 
 1416         case EC_STATE_LOCK:
 1417             offset_down = fop->user_size;
 1418             ec_adjust_offset_down(fop->xl->private, &offset_down, _gf_true);
 1419 
 1420             if (fop->id == GF_FOP_TRUNCATE) {
 1421                 ec_lock_prepare_inode(
 1422                     fop, &fop->loc[0],
 1423                     EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO,
 1424                     offset_down, EC_RANGE_FULL);
 1425             } else {
 1426                 ec_lock_prepare_fd(
 1427                     fop, fop->fd,
 1428                     EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO,
 1429                     offset_down, EC_RANGE_FULL);
 1430             }
 1431             ec_lock(fop);
 1432 
 1433             return EC_STATE_DISPATCH;
 1434 
 1435         case EC_STATE_DISPATCH:
 1436             ec_dispatch_all(fop);
 1437 
 1438             return EC_STATE_PREPARE_ANSWER;
 1439 
 1440         case EC_STATE_PREPARE_ANSWER:
 1441             cbk = ec_fop_prepare_answer(fop, _gf_false);
 1442             if (cbk != NULL) {
 1443                 int32_t err;
 1444 
 1445                 ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, cbk->count);
 1446 
 1447                 /* This shouldn't fail because we have the inode locked. */
 1448                 /* Inode size doesn't need to be updated under locks, because
 1449                  * conflicting operations won't be in-flight
 1450                  */
 1451                 GF_ASSERT(ec_get_inode_size(fop, fop->locks[0].lock->loc.inode,
 1452                                             &cbk->iatt[0].ia_size));
 1453                 cbk->iatt[1].ia_size = fop->user_size;
 1454                 /* This shouldn't fail because we have the inode locked. */
 1455                 GF_ASSERT(ec_set_inode_size(fop, fop->locks[0].lock->loc.inode,
 1456                                             fop->user_size));
 1457                 if ((cbk->iatt[0].ia_size > cbk->iatt[1].ia_size) &&
 1458                     (fop->user_size != fop->offset)) {
 1459                     err = ec_truncate_clean(fop);
 1460                     if (err != 0) {
 1461                         ec_cbk_set_error(cbk, -err, _gf_false);
 1462                     }
 1463                 }
 1464             }
 1465 
 1466             return EC_STATE_REPORT;
 1467 
 1468         case EC_STATE_REPORT:
 1469             cbk = fop->answer;
 1470 
 1471             GF_ASSERT(cbk != NULL);
 1472 
 1473             if (fop->id == GF_FOP_TRUNCATE) {
 1474                 if (fop->cbks.truncate != NULL) {
 1475                     QUORUM_CBK(fop->cbks.truncate, fop, fop->req_frame, fop,
 1476                                fop->xl, cbk->op_ret, cbk->op_errno,
 1477                                &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
 1478                 }
 1479             } else {
 1480                 if (fop->cbks.ftruncate != NULL) {
 1481                     QUORUM_CBK(fop->cbks.ftruncate, fop, fop->req_frame, fop,
 1482                                fop->xl, cbk->op_ret, cbk->op_errno,
 1483                                &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
 1484                 }
 1485             }
 1486 
 1487             return EC_STATE_LOCK_REUSE;
 1488 
 1489         case -EC_STATE_INIT:
 1490         case -EC_STATE_LOCK:
 1491         case -EC_STATE_DISPATCH:
 1492         case -EC_STATE_PREPARE_ANSWER:
 1493         case -EC_STATE_REPORT:
 1494             GF_ASSERT(fop->error != 0);
 1495 
 1496             if (fop->id == GF_FOP_TRUNCATE) {
 1497                 if (fop->cbks.truncate != NULL) {
 1498                     fop->cbks.truncate(fop->req_frame, fop, fop->xl, -1,
 1499                                        fop->error, NULL, NULL, NULL);
 1500                 }
 1501             } else {
 1502                 if (fop->cbks.ftruncate != NULL) {
 1503                     fop->cbks.ftruncate(fop->req_frame, fop, fop->xl, -1,
 1504                                         fop->error, NULL, NULL, NULL);
 1505                 }
 1506             }
 1507 
 1508             return EC_STATE_LOCK_REUSE;
 1509 
 1510         case -EC_STATE_LOCK_REUSE:
 1511         case EC_STATE_LOCK_REUSE:
 1512             ec_lock_reuse(fop);
 1513 
 1514             return EC_STATE_UNLOCK;
 1515 
 1516         case -EC_STATE_UNLOCK:
 1517         case EC_STATE_UNLOCK:
 1518             ec_unlock(fop);
 1519 
 1520             return EC_STATE_END;
 1521 
 1522         default:
 1523             gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
 1524                    "Unhandled state %d for %s", state, ec_fop_name(fop->id));
 1525 
 1526             return EC_STATE_END;
 1527     }
 1528 }
 1529 
 1530 void
 1531 ec_truncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
 1532             uint32_t fop_flags, fop_truncate_cbk_t func, void *data, loc_t *loc,
 1533             off_t offset, dict_t *xdata)
 1534 {
 1535     ec_cbk_t callback = {.truncate = func};
 1536     ec_fop_data_t *fop = NULL;
 1537     int32_t error = ENOMEM;
 1538 
 1539     gf_msg_trace("ec", 0, "EC(TRUNCATE) %p", frame);
 1540 
 1541     VALIDATE_OR_GOTO(this, out);
 1542     GF_VALIDATE_OR_GOTO(this->name, frame, out);
 1543     GF_VALIDATE_OR_GOTO(this->name, this->private, out);
 1544 
 1545     fop = ec_fop_data_allocate(frame, this, GF_FOP_TRUNCATE, 0, target,
 1546                                fop_flags, ec_wind_truncate, ec_manager_truncate,
 1547                                callback, data);
 1548     if (fop == NULL) {
 1549         goto out;
 1550     }
 1551 
 1552     fop->offset = offset;
 1553 
 1554     if (loc != NULL) {
 1555         if (loc_copy(&fop->loc[0], loc) != 0) {
 1556             gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
 1557                    "Failed to copy a location.");
 1558 
 1559             goto out;
 1560         }
 1561     }
 1562     if (xdata != NULL) {
 1563         fop->xdata = dict_copy_with_ref(xdata, NULL);
 1564         if (fop->xdata == NULL) {
 1565             gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
 1566                    "Failed to reference a "
 1567                    "dictionary.");
 1568 
 1569             goto out;
 1570         }
 1571     }
 1572 
 1573     error = 0;
 1574 
 1575 out:
 1576     if (fop != NULL) {
 1577         ec_manager(fop, error);
 1578     } else {
 1579         func(frame, NULL, this, -1, error, NULL, NULL, NULL);
 1580     }
 1581 }
 1582 
 1583 /* FOP: ftruncate */
 1584 
 1585 int32_t
 1586 ec_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
 1587                  int32_t op_ret, int32_t op_errno, struct iatt *prestat,
 1588                  struct iatt *poststat, dict_t *xdata)
 1589 {
 1590     return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prestat,
 1591                               poststat, xdata);
 1592 }
 1593 
 1594 void
 1595 ec_wind_ftruncate(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
 1596 {
 1597     ec_trace("WIND", fop, "idx=%d", idx);
 1598 
 1599     STACK_WIND_COOKIE(fop->frame, ec_ftruncate_cbk, (void *)(uintptr_t)idx,
 1600                       ec->xl_list[idx], ec->xl_list[idx]->fops->ftruncate,
 1601                       fop->fd, fop->offset, fop->xdata);
 1602 }
 1603 
 1604 void
 1605 ec_ftruncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
 1606              uint32_t fop_flags, fop_ftruncate_cbk_t func, void *data, fd_t *fd,
 1607              off_t offset, dict_t *xdata)
 1608 {
 1609     ec_cbk_t callback = {.ftruncate = func};
 1610     ec_fop_data_t *fop = NULL;
 1611     int32_t error = ENOMEM;
 1612 
 1613     gf_msg_trace("ec", 0, "EC(FTRUNCATE) %p", frame);
 1614 
 1615     VALIDATE_OR_GOTO(this, out);
 1616     GF_VALIDATE_OR_GOTO(this->name, frame, out);
 1617     GF_VALIDATE_OR_GOTO(this->name, this->private, out);
 1618 
 1619     fop = ec_fop_data_allocate(frame, this, GF_FOP_FTRUNCATE, 0, target,
 1620                                fop_flags, ec_wind_ftruncate,
 1621                                ec_manager_truncate, callback, data);
 1622     if (fop == NULL) {
 1623         goto out;
 1624     }
 1625 
 1626     fop->use_fd = 1;
 1627 
 1628     fop->offset = offset;
 1629 
 1630     if (fd != NULL) {
 1631         fop->fd = fd_ref(fd);
 1632         if (fop->fd == NULL) {
 1633             gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
 1634                    "Failed to reference a "
 1635                    "file descriptor.");
 1636 
 1637             goto out;
 1638         }
 1639     }
 1640     if (xdata != NULL) {
 1641         fop->xdata = dict_copy_with_ref(xdata, NULL);
 1642         if (fop->xdata == NULL) {
 1643             gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
 1644                    "Failed to reference a "
 1645                    "dictionary.");
 1646 
 1647             goto out;
 1648         }
 1649     }
 1650 
 1651     error = 0;
 1652 
 1653 out:
 1654     if (fop != NULL) {
 1655         ec_manager(fop, error);
 1656     } else {
 1657         func(frame, NULL, this, -1, error, NULL, NULL, NULL);
 1658     }
 1659 }
 1660 
 1661 /* FOP: writev */
 1662 static ec_stripe_t *
 1663 ec_allocate_stripe(ec_t *ec, ec_stripe_list_t *stripe_cache)
 1664 {
 1665     ec_stripe_t *stripe = NULL;
 1666 
 1667     if (stripe_cache->count >= stripe_cache->max) {
 1668         GF_ASSERT(!list_empty(&stripe_cache->lru));
 1669         stripe = list_first_entry(&stripe_cache->lru, ec_stripe_t, lru);
 1670         list_move_tail(&stripe->lru, &stripe_cache->lru);
 1671         GF_ATOMIC_INC(ec->stats.stripe_cache.evicts);
 1672     } else {
 1673         stripe = GF_MALLOC(sizeof(ec_stripe_t) + ec->stripe_size,
 1674                            ec_mt_ec_stripe_t);
 1675         if (stripe != NULL) {
 1676             stripe_cache->count++;
 1677             list_add_tail(&stripe->lru, &stripe_cache->lru);
 1678             GF_ATOMIC_INC(ec->stats.stripe_cache.allocs);
 1679         } else {
 1680             GF_ATOMIC_INC(ec->stats.stripe_cache.errors);
 1681         }
 1682     }
 1683 
 1684     return stripe;
 1685 }
 1686 
 1687 static void
 1688 ec_write_stripe_data(ec_t *ec, ec_fop_data_t *fop, ec_stripe_t *stripe)
 1689 {
 1690     off_t base;
 1691 
 1692     base = fop->size - ec->stripe_size;
 1693     memcpy(stripe->data, fop->vector[0].iov_base + base, ec->stripe_size);
 1694     stripe->frag_offset = fop->frag_range.last - ec->fragment_size;
 1695 }
 1696 
 1697 static void
 1698 ec_add_stripe_in_cache(ec_t *ec, ec_fop_data_t *fop)
 1699 {
 1700     ec_inode_t *ctx = NULL;
 1701     ec_stripe_t *stripe = NULL;
 1702     ec_stripe_list_t *stripe_cache = NULL;
 1703     gf_boolean_t failed = _gf_true;
 1704 
 1705     LOCK(&fop->fd->inode->lock);
 1706 
 1707     ctx = __ec_inode_get(fop->fd->inode, fop->xl);
 1708     if (ctx == NULL) {
 1709         goto out;
 1710     }
 1711 
 1712     stripe_cache = &ctx->stripe_cache;
 1713     if (stripe_cache->max > 0) {
 1714         stripe = ec_allocate_stripe(ec, stripe_cache);
 1715         if (stripe == NULL) {
 1716             goto out;
 1717         }
 1718 
 1719         ec_write_stripe_data(ec, fop, stripe);
 1720     }
 1721 
 1722     failed = _gf_false;
 1723 
 1724 out:
 1725     UNLOCK(&fop->fd->inode->lock);
 1726 
 1727     if (failed) {
 1728         gf_msg(ec->xl->name, GF_LOG_DEBUG, ENOMEM, EC_MSG_FILE_DESC_REF_FAIL,
 1729                "Failed to create and add stripe in cache");
 1730     }
 1731 }
 1732 
 1733 int32_t
 1734 ec_writev_merge_tail(call_frame_t *frame, void *cookie, xlator_t *this,
 1735                      int32_t op_ret, int32_t op_errno, struct iovec *vector,
 1736                      int32_t count, struct iatt *stbuf, struct iobref *iobref,
 1737                      dict_t *xdata)
 1738 {
 1739     ec_t *ec = this->private;
 1740     ec_fop_data_t *fop = frame->local;
 1741     uint64_t size, base, tmp;
 1742 
 1743     if (op_ret >= 0) {
 1744         tmp = 0;
 1745         size = fop->size - fop->user_size - fop->head;
 1746         base = ec->stripe_size - size;
 1747         if (op_ret > base) {
 1748             tmp = min(op_ret - base, size);
 1749             ec_iov_copy_to(fop->vector[0].iov_base + fop->size - size, vector,
 1750                            count, base, tmp);
 1751 
 1752             size -= tmp;
 1753         }
 1754 
 1755         if (size > 0) {
 1756             memset(fop->vector[0].iov_base + fop->size - size, 0, size);
 1757         }
 1758 
 1759         if (ec->stripe_cache) {
 1760             ec_add_stripe_in_cache(ec, fop);
 1761         }
 1762     }
 1763     return 0;
 1764 }
 1765 
 1766 int32_t
 1767 ec_writev_merge_head(call_frame_t *frame, void *cookie, xlator_t *this,
 1768                      int32_t op_ret, int32_t op_errno, struct iovec *vector,
 1769                      int32_t count, struct iatt *stbuf, struct iobref *iobref,
 1770                      dict_t *xdata)
 1771 {
 1772     ec_t *ec = this->private;
 1773     ec_fop_data_t *fop = frame->local;
 1774     uint64_t size, base;
 1775 
 1776     if (op_ret >= 0) {
 1777         size = fop->head;
 1778         base = 0;
 1779 
 1780         if (op_ret > 0) {
 1781             base = min(op_ret, size);
 1782             ec_iov_copy_to(fop->vector[0].iov_base, vector, count, 0, base);
 1783 
 1784             size -= base;
 1785         }
 1786 
 1787         if (size > 0) {
 1788             memset(fop->vector[0].iov_base + base, 0, size);
 1789         }
 1790 
 1791         size = fop->size - fop->user_size - fop->head;
 1792         if ((size > 0) && (fop->size == ec->stripe_size)) {
 1793             ec_writev_merge_tail(frame, cookie, this, op_ret, op_errno, vector,
 1794                                  count, stbuf, iobref, xdata);
 1795         }
 1796     }
 1797 
 1798     return 0;
 1799 }
 1800 
 1801 static int
 1802 ec_make_internal_fop_xdata(dict_t **xdata)
 1803 {
 1804     dict_t *dict = NULL;
 1805 
 1806     if (*xdata)
 1807         return 0;
 1808 
 1809     dict = dict_new();
 1810     if (!dict)
 1811         goto out;
 1812 
 1813     if (dict_set_str(dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes"))
 1814         goto out;
 1815 
 1816     *xdata = dict;
 1817     return 0;
 1818 out:
 1819     if (dict)
 1820         dict_unref(dict);
 1821     return -1;
 1822 }
 1823 
 1824 static int32_t
 1825 ec_writev_prepare_buffers(ec_t *ec, ec_fop_data_t *fop)
 1826 {
 1827     struct iobref *iobref = NULL;
 1828     struct iovec *iov;
 1829     void *ptr;
 1830     int32_t err;
 1831 
 1832     fop->user_size = iov_length(fop->vector, fop->int32);
 1833     fop->head = ec_adjust_offset_down(ec, &fop->offset, _gf_false);
 1834     fop->frag_range.first = fop->offset / ec->fragments;
 1835     fop->size = fop->user_size + fop->head;
 1836     ec_adjust_size_up(ec, &fop->size, _gf_false);
 1837     fop->frag_range.last = fop->frag_range.first + fop->size / ec->fragments;
 1838 
 1839     if ((fop->int32 != 1) || (fop->head != 0) || (fop->size > fop->user_size) ||
 1840         !EC_ALIGN_CHECK(fop->vector[0].iov_base, EC_METHOD_WORD_SIZE)) {
 1841         err = ec_buffer_alloc(ec->xl, fop->size, &iobref, &ptr);
 1842         if (err != 0) {
 1843             goto out;
 1844         }
 1845 
 1846         ec_iov_copy_to(ptr + fop->head, fop->vector, fop->int32, 0,
 1847                        fop->user_size);
 1848 
 1849         fop->vector[0].iov_base = ptr;
 1850         fop->vector[0].iov_len = fop->size;
 1851 
 1852         iobref_unref(fop->buffers);
 1853         fop->buffers = iobref;
 1854     }
 1855 
 1856     if (fop->int32 != 2) {
 1857         iov = GF_MALLOC(VECTORSIZE(2), gf_common_mt_iovec);
 1858         if (iov == NULL) {
 1859             err = -ENOMEM;
 1860 
 1861             goto out;
 1862         }
 1863         iov[0].iov_base = fop->vector[0].iov_base;
 1864         iov[0].iov_len = fop->vector[0].iov_len;
 1865 
 1866         GF_FREE(fop->vector);
 1867         fop->vector = iov;
 1868     }
 1869 
 1870     fop->vector[1].iov_len = fop->size / ec->fragments;
 1871     err = ec_buffer_alloc(ec->xl, fop->vector[1].iov_len * ec->nodes,
 1872                           &fop->buffers, &fop->vector[1].iov_base);
 1873     if (err != 0) {
 1874         goto out;
 1875     }
 1876 
 1877     err = 0;
 1878 
 1879 out:
 1880     return err;
 1881 }
 1882 
 1883 static void
 1884 ec_merge_stripe_head_locked(ec_t *ec, ec_fop_data_t *fop, ec_stripe_t *stripe)
 1885 {
 1886     uint32_t head, size;
 1887 
 1888     head = fop->head;
 1889     memcpy(fop->vector[0].iov_base, stripe->data, head);
 1890 
 1891     size = ec->stripe_size - head;
 1892     if (size > fop->user_size) {
 1893         head += fop->user_size;
 1894         size = ec->stripe_size - head;
 1895         memcpy(fop->vector[0].iov_base + head, stripe->data + head, size);
 1896     }
 1897 }
 1898 
 1899 static void
 1900 ec_merge_stripe_tail_locked(ec_t *ec, ec_fop_data_t *fop, ec_stripe_t *stripe)
 1901 {
 1902     uint32_t head, tail;
 1903     off_t offset;
 1904 
 1905     offset = fop->user_size + fop->head;
 1906     tail = fop->size - offset;
 1907     head = ec->stripe_size - tail;
 1908 
 1909     memcpy(fop->vector[0].iov_base + offset, stripe->data + head, tail);
 1910 }
 1911 
 1912 static ec_stripe_t *
 1913 ec_get_stripe_from_cache_locked(ec_t *ec, ec_fop_data_t *fop,
 1914                                 uint64_t frag_offset)
 1915 {
 1916     ec_inode_t *ctx = NULL;
 1917     ec_stripe_t *stripe = NULL;
 1918     ec_stripe_list_t *stripe_cache = NULL;
 1919 
 1920     ctx = __ec_inode_get(fop->fd->inode, fop->xl);
 1921     if (ctx == NULL) {
 1922         GF_ATOMIC_INC(ec->stats.stripe_cache.errors);
 1923         return NULL;
 1924     }
 1925 
 1926     stripe_cache = &ctx->stripe_cache;
 1927     list_for_each_entry(stripe, &stripe_cache->lru, lru)
 1928     {
 1929         if (stripe->frag_offset == frag_offset) {
 1930             list_move_tail(&stripe->lru, &stripe_cache->lru);
 1931             GF_ATOMIC_INC(ec->stats.stripe_cache.hits);
 1932             return stripe;
 1933         }
 1934     }
 1935 
 1936     GF_ATOMIC_INC(ec->stats.stripe_cache.misses);
 1937 
 1938     return NULL;
 1939 }
 1940 
 1941 static gf_boolean_t
 1942 ec_get_and_merge_stripe(ec_t *ec, ec_fop_data_t *fop, ec_stripe_part_t which)
 1943 {
 1944     uint64_t frag_offset;
 1945     ec_stripe_t *stripe = NULL;
 1946     gf_boolean_t found = _gf_false;
 1947 
 1948     if (!ec->stripe_cache) {
 1949         return found;
 1950     }
 1951 
 1952     LOCK(&fop->fd->inode->lock);
 1953     if (which == EC_STRIPE_HEAD) {
 1954         frag_offset = fop->frag_range.first;
 1955         stripe = ec_get_stripe_from_cache_locked(ec, fop, frag_offset);
 1956         if (stripe) {
 1957             ec_merge_stripe_head_locked(ec, fop, stripe);
 1958             found = _gf_true;
 1959         }
 1960     }
 1961 
 1962     if (which == EC_STRIPE_TAIL) {
 1963         frag_offset = fop->frag_range.last - ec->fragment_size;
 1964         stripe = ec_get_stripe_from_cache_locked(ec, fop, frag_offset);
 1965         if (stripe) {
 1966             ec_merge_stripe_tail_locked(ec, fop, stripe);
 1967             found = _gf_true;
 1968         }
 1969     }
 1970     UNLOCK(&fop->fd->inode->lock);
 1971 
 1972     return found;
 1973 }
 1974 
 1975 static uintptr_t
 1976 ec_get_lock_good_mask(inode_t *inode, xlator_t *xl)
 1977 {
 1978     ec_lock_t *lock = NULL;
 1979     ec_inode_t *ictx = NULL;
 1980     LOCK(&inode->lock);
 1981     {
 1982         ictx = __ec_inode_get(inode, xl);
 1983         if (ictx)
 1984             lock = ictx->inode_lock;
 1985     }
 1986     UNLOCK(&inode->lock);
 1987     if (lock)
 1988         return lock->good_mask;
 1989     return 0;
 1990 }
 1991 
 1992 void
 1993 ec_writev_start(ec_fop_data_t *fop)
 1994 {
 1995     ec_t *ec = fop->xl->private;
 1996     ec_fd_t *ctx;
 1997     fd_t *fd;
 1998     dict_t *xdata = NULL;
 1999     uint64_t tail, current;
 2000     int32_t err = -ENOMEM;
 2001     gf_boolean_t found_stripe = _gf_false;
 2002 
 2003     /* This shouldn't fail because we have the inode locked. */
 2004     GF_ASSERT(ec_get_inode_size(fop, fop->fd->inode, &current));
 2005 
 2006     fd = fd_anonymous(fop->fd->inode);
 2007     if (fd == NULL) {
 2008         goto failed;
 2009     }
 2010 
 2011     fop->frame->root->uid = 0;
 2012     fop->frame->root->gid = 0;
 2013 
 2014     ctx = ec_fd_get(fop->fd, fop->xl);
 2015     if (ctx != NULL) {
 2016         if ((ctx->flags & O_APPEND) != 0) {
 2017             /* Appending writes take full locks so size won't change because
 2018              * of any parallel operations
 2019              */
 2020             fop->offset = current;
 2021         }
 2022     }
 2023 
 2024     err = ec_writev_prepare_buffers(ec, fop);
 2025     if (err != 0) {
 2026         goto failed_fd;
 2027     }
 2028     tail = fop->size - fop->user_size - fop->head;
 2029     if (fop->head > 0) {
 2030         if (current > fop->offset) {
 2031             found_stripe = ec_get_and_merge_stripe(ec, fop, EC_STRIPE_HEAD);
 2032             if (!found_stripe) {
 2033                 if (ec_make_internal_fop_xdata(&xdata)) {
 2034                     err = -ENOMEM;
 2035                     goto failed_xdata;
 2036                 }
 2037                 ec_readv(fop->frame, fop->xl,
 2038                          ec_get_lock_good_mask(fop->fd->inode, fop->xl),
 2039                          EC_MINIMUM_MIN, ec_writev_merge_head, NULL, fd,
 2040                          ec->stripe_size, fop->offset, 0, xdata);
 2041             }
 2042         } else {
 2043             memset(fop->vector[0].iov_base, 0, fop->head);
 2044             memset(fop->vector[0].iov_base + fop->size - tail, 0, tail);
 2045             if (ec->stripe_cache && (fop->size <= ec->stripe_size)) {
 2046                 ec_add_stripe_in_cache(ec, fop);
 2047             }
 2048         }
 2049     }
 2050 
 2051     if ((tail > 0) && ((fop->head == 0) || (fop->size > ec->stripe_size))) {
 2052         /* Current locking scheme will make sure the 'current' below will
 2053          * never decrease while the fop is in progress, so the checks will
 2054          * work as expected
 2055          */
 2056         if (current > fop->offset + fop->head + fop->user_size) {
 2057             found_stripe = ec_get_and_merge_stripe(ec, fop, EC_STRIPE_TAIL);
 2058             if (!found_stripe) {
 2059                 if (ec_make_internal_fop_xdata(&xdata)) {
 2060                     err = -ENOMEM;
 2061                     goto failed_xdata;
 2062                 }
 2063                 ec_readv(fop->frame, fop->xl,
 2064                          ec_get_lock_good_mask(fop->fd->inode, fop->xl),
 2065                          EC_MINIMUM_MIN, ec_writev_merge_tail, NULL, fd,
 2066                          ec->stripe_size,
 2067                          fop->offset + fop->size - ec->stripe_size, 0, xdata);
 2068             }
 2069         } else {
 2070             memset(fop->vector[0].iov_base + fop->size - tail, 0, tail);
 2071             if (ec->stripe_cache) {
 2072                 ec_add_stripe_in_cache(ec, fop);
 2073             }
 2074         }
 2075     }
 2076 
 2077     err = 0;
 2078 
 2079 failed_xdata:
 2080     if (xdata) {
 2081         dict_unref(xdata);
 2082     }
 2083 failed_fd:
 2084     fd_unref(fd);
 2085 failed:
 2086     ec_fop_set_error(fop, -err);
 2087 }
 2088 
 2089 int32_t
 2090 ec_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
 2091               int32_t op_errno, struct iatt *prestat, struct iatt *poststat,
 2092               dict_t *xdata)
 2093 {
 2094     ec_t *ec = NULL;
 2095     if (this && this->private) {
 2096         ec = this->private;
 2097         if ((op_ret > 0) && ((op_ret % ec->fragment_size) != 0)) {
 2098             op_ret = -1;
 2099             op_errno = EIO;
 2100         }
 2101     }
 2102     return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prestat,
 2103                               poststat, xdata);
 2104 }
 2105 
 2106 void
 2107 ec_wind_writev(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
 2108 {
 2109     ec_trace("WIND", fop, "idx=%d", idx);
 2110 
 2111     struct iovec vector[1];
 2112     size_t size;
 2113 
 2114     size = fop->vector[1].iov_len;
 2115 
 2116     vector[0].iov_base = fop->vector[1].iov_base + idx * size;
 2117     vector[0].iov_len = size;
 2118 
 2119     STACK_WIND_COOKIE(fop->frame, ec_writev_cbk, (void *)(uintptr_t)idx,
 2120                       ec->xl_list[idx], ec->xl_list[idx]->fops->writev, fop->fd,
 2121                       vector, 1, fop->offset / ec->fragments, fop->uint32,
 2122                       fop->buffers, fop->xdata);
 2123 }
 2124 
 2125 static void
 2126 ec_writev_encode(ec_fop_data_t *fop)
 2127 {
 2128     ec_t *ec = fop->xl->private;
 2129     void *blocks[ec->nodes];
 2130     uint32_t i;
 2131 
 2132     blocks[0] = fop->vector[1].iov_base;
 2133     for (i = 1; i < ec->nodes; i++) {
 2134         blocks[i] = blocks[i - 1] + fop->vector[1].iov_len;
 2135     }
 2136     ec_method_encode(&ec->matrix, fop->vector[0].iov_len,
 2137                      fop->vector[0].iov_base, blocks);
 2138 }
 2139 
 2140 int32_t
 2141 ec_manager_writev(ec_fop_data_t *fop, int32_t state)
 2142 {
 2143     ec_cbk_data_t *cbk;
 2144     ec_fd_t *ctx = NULL;
 2145     ec_t *ec = fop->xl->private;
 2146     off_t fl_start = 0;
 2147     uint64_t fl_size = LONG_MAX;
 2148 
 2149     switch (state) {
 2150         case EC_STATE_INIT:
 2151         case EC_STATE_LOCK:
 2152             ctx = ec_fd_get(fop->fd, fop->xl);
 2153             if (ctx != NULL) {
 2154                 if ((ctx->flags & O_APPEND) == 0) {
 2155                     off_t user_size = 0;
 2156                     off_t head = 0;
 2157 
 2158                     fl_start = fop->offset;
 2159                     user_size = iov_length(fop->vector, fop->int32);
 2160                     head = ec_adjust_offset_down(ec, &fl_start, _gf_true);
 2161                     fl_size = user_size + head;
 2162                     ec_adjust_size_up(ec, &fl_size, _gf_true);
 2163                 }
 2164             }
 2165             ec_lock_prepare_fd(fop, fop->fd,
 2166                                EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO,
 2167                                fl_start, fl_size);
 2168             ec_lock(fop);
 2169 
 2170             return EC_STATE_DISPATCH;
 2171 
 2172         case EC_STATE_DISPATCH:
 2173             ec_writev_start(fop);
 2174 
 2175             return EC_STATE_DELAYED_START;
 2176 
 2177         case EC_STATE_DELAYED_START:
 2178             /* Restore uid, gid if they were changed to do some partial
 2179              * reads. */
 2180             fop->frame->root->uid = fop->uid;
 2181             fop->frame->root->gid = fop->gid;
 2182 
 2183             ec_writev_encode(fop);
 2184 
 2185             ec_dispatch_all(fop);
 2186 
 2187             return EC_STATE_PREPARE_ANSWER;
 2188 
 2189         case EC_STATE_PREPARE_ANSWER:
 2190             cbk = ec_fop_prepare_answer(fop, _gf_false);
 2191             if (cbk != NULL) {
 2192                 ec_t *ec = fop->xl->private;
 2193                 uint64_t size;
 2194 
 2195                 ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, cbk->count);
 2196 
 2197                 /* This shouldn't fail because we have the inode locked. */
 2198                 LOCK(&fop->fd->inode->lock);
 2199                 {
 2200                     GF_ASSERT(__ec_get_inode_size(fop, fop->fd->inode,
 2201                                                   &cbk->iatt[0].ia_size));
 2202                     cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
 2203                     size = fop->offset + fop->head + fop->user_size;
 2204                     if (size > cbk->iatt[0].ia_size) {
 2205                         /* Only update inode size if this is a top level fop.
 2206                          * Otherwise this is an internal write and the top
 2207                          * level fop should take care of the real inode size.
 2208                          */
 2209                         if (fop->parent == NULL) {
 2210                             /* This shouldn't fail because we have the inode
 2211                              * locked. */
 2212                             GF_ASSERT(
 2213                                 __ec_set_inode_size(fop, fop->fd->inode, size));
 2214                         }
 2215                         cbk->iatt[1].ia_size = size;
 2216                     }
 2217                 }
 2218                 UNLOCK(&fop->fd->inode->lock);
 2219 
 2220                 if (fop->error == 0) {
 2221                     cbk->op_ret *= ec->fragments;
 2222                     if (cbk->op_ret < fop->head) {
 2223                         cbk->op_ret = 0;
 2224                     } else {
 2225                         cbk->op_ret -= fop->head;
 2226                     }
 2227                     if (cbk->op_ret > fop->user_size) {
 2228                         cbk->op_ret = fop->user_size;
 2229                     }
 2230                 }
 2231             }
 2232 
 2233             return EC_STATE_REPORT;
 2234 
 2235         case EC_STATE_REPORT:
 2236             cbk = fop->answer;
 2237 
 2238             GF_ASSERT(cbk != NULL);
 2239 
 2240             if (fop->cbks.writev != NULL) {
 2241                 QUORUM_CBK(fop->cbks.writev, fop, fop->req_frame, fop, fop->xl,
 2242                            cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
 2243                            &cbk->iatt[1], cbk->xdata);
 2244             }
 2245 
 2246             return EC_STATE_LOCK_REUSE;
 2247 
 2248         case -EC_STATE_DELAYED_START:
 2249             /* We have failed while doing partial reads. We need to restore
 2250              * original uid, gid. */
 2251             fop->frame->root->uid = fop->uid;
 2252             fop->frame->root->gid = fop->gid;
 2253 
 2254             /* Fall through */
 2255 
 2256         case -EC_STATE_INIT:
 2257         case -EC_STATE_LOCK:
 2258         case -EC_STATE_DISPATCH:
 2259         case -EC_STATE_PREPARE_ANSWER:
 2260         case -EC_STATE_REPORT:
 2261             GF_ASSERT(fop->error != 0);
 2262 
 2263             if (fop->cbks.writev != NULL) {
 2264                 fop->cbks.writev(fop->req_frame, fop, fop->xl, -1, fop->error,
 2265                                  NULL, NULL, NULL);
 2266             }
 2267 
 2268             return EC_STATE_LOCK_REUSE;
 2269 
 2270         case -EC_STATE_LOCK_REUSE:
 2271         case EC_STATE_LOCK_REUSE:
 2272             ec_lock_reuse(fop);
 2273 
 2274             return EC_STATE_UNLOCK;
 2275 
 2276         case -EC_STATE_UNLOCK:
 2277         case EC_STATE_UNLOCK:
 2278             ec_unlock(fop);
 2279 
 2280             return EC_STATE_END;
 2281 
 2282         default:
 2283             gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
 2284                    "Unhandled state %d for %s", state, ec_fop_name(fop->id));
 2285 
 2286             return EC_STATE_END;
 2287     }
 2288 }
 2289 
 2290 void
 2291 ec_writev(call_frame_t *frame, xlator_t *this, uintptr_t target,
 2292           uint32_t fop_flags, fop_writev_cbk_t func, void *data, fd_t *fd,
 2293           struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
 2294           struct iobref *iobref, dict_t *xdata)
 2295 {
 2296     ec_cbk_t callback = {.writev = func};
 2297     ec_fop_data_t *fop = NULL;
 2298     int32_t error = ENOMEM;
 2299 
 2300     gf_msg_trace("ec", 0, "EC(WRITE) %p", frame);
 2301 
 2302     VALIDATE_OR_GOTO(this, out);
 2303     GF_VALIDATE_OR_GOTO(this->name, frame, out);
 2304     GF_VALIDATE_OR_GOTO(this->name, this->private, out);
 2305 
 2306     fop = ec_fop_data_allocate(frame, this, GF_FOP_WRITE, 0, target, fop_flags,
 2307                                ec_wind_writev, ec_manager_writev, callback,
 2308                                data);
 2309     if (fop == NULL) {
 2310         goto out;
 2311     }
 2312 
 2313     fop->int32 = count;
 2314     fop->offset = offset;
 2315     fop->uint32 = flags;
 2316 
 2317     fop->use_fd = 1;
 2318 
 2319     if (fd != NULL) {
 2320         fop->fd = fd_ref(fd);
 2321         if (fop->fd == NULL) {
 2322             gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
 2323                    "Failed to reference a "
 2324                    "file descriptor.");
 2325 
 2326             goto out;
 2327         }
 2328     }
 2329     if (count > 0) {
 2330         fop->vector = iov_dup(vector, count);
 2331         if (fop->vector == NULL) {
 2332             gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
 2333                    "Failed to duplicate a "
 2334                    "vector list.");
 2335 
 2336             goto out;
 2337         }
 2338         fop->int32 = count;
 2339     }
 2340     if (iobref != NULL) {
 2341         fop->buffers = iobref_ref(iobref);
 2342         if (fop->buffers == NULL) {
 2343             gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_BUF_REF_FAIL,
 2344                    "Failed to reference a "
 2345                    "buffer.");
 2346 
 2347             goto out;
 2348         }
 2349     }
 2350     if (xdata != NULL) {
 2351         fop->xdata = dict_copy_with_ref(xdata, NULL);
 2352         if (fop->xdata == NULL) {
 2353             gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
 2354                    "Failed to reference a "
 2355                    "dictionary.");
 2356 
 2357             goto out;
 2358         }
 2359     }
 2360 
 2361     error = 0;
 2362 
 2363 out:
 2364     if (fop != NULL) {
 2365         ec_manager(fop, error);
 2366     } else {
 2367         func(frame, NULL, this, -1, error, NULL, NULL, NULL);
 2368     }
 2369 }