"Fossies" - the Fresh Open Source Software Archive

Member "aoe-87/linux/drivers/block/aoe/aoecmd.c" (23 Feb 2017, 38163 Bytes) of package /linux/misc/aoe-87.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "aoecmd.c" see the Fossies "Dox" file reference documentation.

    1 /* Copyright (c) 2013 Coraid, Inc.  See COPYING for GPL terms. */
    2 /*
    3  * aoecmd.c
    4  * Filesystem request handling methods
    5  */
    6 
    7 #include <linux/ata.h>
    8 #include <linux/hdreg.h>
    9 #include <linux/blkdev.h>
   10 #include <linux/skbuff.h>
   11 #include <linux/netdevice.h>
   12 #include <linux/genhd.h>
   13 #include <linux/moduleparam.h>
   14 #include <linux/workqueue.h>
   15 #include <linux/kthread.h>
   16 #include <net/net_namespace.h>
   17 #include <asm/unaligned.h>
   18 #include <linux/uio.h>
   19 #include "aoe.h"
   20 
   21 static void ktcomplete(struct frame *, struct sk_buff *);
   22 static int count_targets(struct aoedev *d, int *untainted);
   23 
   24 static struct buf *nextbuf(struct aoedev *);
   25 
   26 static int aoe_deadsecs = 60 * 3;
   27 module_param(aoe_deadsecs, int, 0644);
   28 MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");
   29 
   30 static int aoe_maxout = 128;
   31 module_param(aoe_maxout, int, 0644);
   32 MODULE_PARM_DESC(aoe_maxout,
   33     "Only aoe_maxout outstanding packets for every MAC on eX.Y.");
   34 
   35 static int aoe_maxioc = 8192;   /* default meant to avoid most soft lockups */
   36 module_param(aoe_maxioc, int, 0644);
   37 MODULE_PARM_DESC(aoe_maxioc, "When nonzero, perform at most aoe_maxioc I/O completions before scheduling.");
   38 
   39 /* The number of online cpus in the system,
   40  * this dictates the number of ktio threads
   41  * which will be spawned.
   42  */
   43 static int ncpus;
   44 
   45 /* mutex lock used for synchronization while thread spawning */
   46 static DEFINE_MUTEX(ktio_spawn_lock);
   47 
   48 static wait_queue_head_t *ktiowq;
   49 static struct ktstate *kts;
   50 
   51 /* io completion queue */
   52 struct iocq_ktio {
   53     struct list_head head;
   54     spinlock_t lock;
   55 };
   56 static struct iocq_ktio *iocq;
   57 
   58 /* empty_zero_page is not always exported */
   59 static struct page *empty_page;
   60 
   61 static struct sk_buff *
   62 new_skb(ulong len)
   63 {
   64     struct sk_buff *skb;
   65 
   66     skb = alloc_skb(len + MAX_HEADER, GFP_ATOMIC);
   67     if (skb) {
   68         skb_reserve(skb, MAX_HEADER);
   69         skb_reset_mac_header(skb);
   70         skb_reset_network_header(skb);
   71         skb->protocol = __constant_htons(ETH_P_AOE);
   72         skb->priority = 0;
   73         skb->next = skb->prev = NULL;
   74 
   75         /* tell the network layer not to perform IP checksums
   76          * or to get the NIC to do it
   77          */
   78         skb->ip_summed = CHECKSUM_NONE;
   79     }
   80     return skb;
   81 }
   82 
   83 static struct frame *
   84 getframe_deferred(struct aoedev *d, u32 tag)
   85 {
   86     struct list_head *head, *pos, *nx;
   87     struct frame *f;
   88 
   89     head = &d->rexmitq;
   90     list_for_each_safe(pos, nx, head) {
   91         f = list_entry(pos, struct frame, head);
   92         if (f->tag == tag) {
   93             list_del(pos);
   94             return f;
   95         }
   96     }
   97     return NULL;
   98 }
   99 
  100 static struct frame *
  101 getframe(struct aoedev *d, u32 tag)
  102 {
  103     struct frame *f;
  104     struct list_head *head, *pos, *nx;
  105     u32 n;
  106 
  107     n = tag % NFACTIVE;
  108     head = &d->factive[n];
  109     list_for_each_safe(pos, nx, head) {
  110         f = list_entry(pos, struct frame, head);
  111         if (f->tag == tag) {
  112             list_del(pos);
  113             return f;
  114         }
  115     }
  116     return NULL;
  117 }
  118 
  119 /*
  120  * Leave the top bit clear so we have tagspace for userland.
  121  * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
  122  * This driver reserves tag -1 to mean "unused frame."
  123  */
  124 static int
  125 newtag(struct aoedev *d)
  126 {
  127     register ulong n;
  128 
  129     n = jiffies & 0xffff;
  130     return n |= (++d->lasttag & 0x7fff) << 16;
  131 }
  132 
  133 static u32
  134 aoehdr_atainit(struct aoedev *d, struct aoetgt *t, struct aoe_hdr *h)
  135 {
  136     u32 host_tag = newtag(d);
  137 
  138     memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
  139     memcpy(h->dst, t->addr, sizeof h->dst);
  140     h->type = __constant_cpu_to_be16(ETH_P_AOE);
  141     h->verfl = AOE_HVER;
  142     h->major = cpu_to_be16(d->aoemajor);
  143     h->minor = d->aoeminor;
  144     h->cmd = AOECMD_ATA;
  145     h->tag = cpu_to_be32(host_tag);
  146 
  147     return host_tag;
  148 }
  149 
  150 static inline void
  151 put_lba(struct aoe_atahdr *ah, sector_t lba)
  152 {
  153     ah->lba0 = lba;
  154     ah->lba1 = lba >>= 8;
  155     ah->lba2 = lba >>= 8;
  156     ah->lba3 = lba >>= 8;
  157     ah->lba4 = lba >>= 8;
  158     ah->lba5 = lba >>= 8;
  159 }
  160 
  161 static struct aoeif *
  162 ifrotate(struct aoetgt *t)
  163 {
  164     struct aoeif *ifp;
  165 
  166     ifp = t->ifp;
  167     ifp++;
  168     if (ifp >= &t->ifs[NAOEIFS] || ifp->nd == NULL)
  169         ifp = t->ifs;
  170     if (ifp->nd == NULL)
  171         return NULL;
  172     return t->ifp = ifp;
  173 }
  174 
  175 static void
  176 skb_pool_put(struct aoedev *d, struct sk_buff *skb)
  177 {
  178     if (!d->skbpool_hd)
  179         d->skbpool_hd = skb;
  180     else
  181         d->skbpool_tl->next = skb;
  182     d->skbpool_tl = skb;
  183 }
  184 
  185 static struct sk_buff *
  186 skb_pool_get(struct aoedev *d)
  187 {
  188     struct sk_buff *skb;
  189 
  190     skb = d->skbpool_hd;
  191     if (skb)
  192         if (atomic_read(&skb_shinfo(skb)->dataref) == 1) {
  193             d->skbpool_hd = skb->next;
  194             skb->next = NULL;
  195             return skb;
  196         }
  197     if (d->nskbpool < NSKBPOOLMAX && (skb = new_skb(ETH_ZLEN))) {
  198         d->nskbpool++;
  199         return skb;
  200     }
  201     return NULL;
  202 }
  203 
  204 void
  205 aoe_freetframe(struct frame *f)
  206 {
  207     struct aoetgt *t;
  208 
  209     t = f->t;
  210     f->buf = NULL;
  211     memset(&f->iter, 0, sizeof(f->iter));
  212     f->r_skb = NULL;
  213     f->flags = 0;
  214     list_add(&f->head, &t->ffree);
  215 }
  216 
  217 struct frame *
  218 newtframe(struct aoedev *d, struct aoetgt *t)
  219 {
  220     struct frame *f;
  221     struct sk_buff *skb;
  222     struct list_head *pos;
  223 
  224     if (list_empty(&t->ffree)) {
  225         if (t->falloc >= NSKBPOOLMAX*2)
  226             return NULL;
  227         f = aoe_kcalloc(1, sizeof *f, GFP_ATOMIC);
  228         if (f == NULL)
  229             return NULL;
  230         t->falloc++;
  231         f->t = t;
  232     } else {
  233         pos = t->ffree.next;
  234         list_del(pos);
  235         f = list_entry(pos, struct frame, head);
  236     }
  237 
  238     skb = f->skb;
  239     if (skb == NULL) {
  240         f->skb = skb = new_skb(ETH_ZLEN);
  241         if (!skb) {
  242 bail:           aoe_freetframe(f);
  243             return NULL;
  244         }
  245     }
  246 
  247     if (atomic_read(&skb_shinfo(skb)->dataref) != 1) {
  248         skb = skb_pool_get(d);
  249         if (skb == NULL)
  250             goto bail;
  251         skb_pool_put(d, f->skb);
  252         f->skb = skb;
  253     }
  254 
  255     skb->truesize -= skb->data_len;
  256     skb_shinfo(skb)->nr_frags = skb->data_len = 0;
  257     skb_trim(skb, 0);
  258     return f;
  259 }
  260 
  261 static struct frame *
  262 newframe(struct aoedev *d)
  263 {
  264     struct frame *f;
  265     struct aoetgt *t, **tt;
  266     int totout = 0;
  267     int use_tainted;
  268     int has_untainted;
  269 
  270     if (!d->targets || !d->targets[0]) {
  271         printk(KERN_ERR "aoe: NULL TARGETS!\n");
  272         return NULL;
  273     }
  274     tt = d->tgt;    /* last used target */
  275     for (use_tainted = 0, has_untainted = 0;;) {
  276         tt++;
  277         if (tt >= &d->targets[d->ntargets] || !*tt)
  278             tt = d->targets;
  279         t = *tt;
  280         if (!t->taint) {
  281             has_untainted = 1;
  282             totout += t->nout;
  283         }
  284         if (t->nout < t->maxout
  285         && (use_tainted || !t->taint)
  286         && t->ifp->nd) {
  287             f = newtframe(d, t);
  288             if (f) {
  289                 ifrotate(t);
  290                 d->tgt = tt;
  291                 return f;
  292             }
  293         }
  294         if (tt == d->tgt) { /* we've looped and found nada */
  295             if (!use_tainted && !has_untainted)
  296                 use_tainted = 1;
  297             else
  298                 break;
  299         }
  300     }
  301     if (totout == 0) {
  302         d->kicked++;
  303         d->flags |= DEVFL_KICKME;
  304     }
  305     return NULL;
  306 }
  307 
  308 static void
  309 skb_fillup(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter)
  310 {
  311     int frag = 0;
  312     struct bio_vec bv;
  313 
  314     __bio_for_each_segment(bv, bio, iter, iter)
  315         skb_fill_page_desc(skb, frag++, bv.bv_page,
  316                    bv.bv_offset, bv.bv_len);
  317 }
  318 
  319 static void
  320 fhash(struct frame *f)
  321 {
  322     struct aoedev *d = f->t->d;
  323     u32 n;
  324 
  325     n = f->tag % NFACTIVE;
  326     list_add_tail(&f->head, &d->factive[n]);
  327 }
  328 
  329 static void
  330 ata_rw_frameinit(struct frame *f)
  331 {
  332     struct aoetgt *t;
  333     struct aoe_hdr *h;
  334     struct aoe_atahdr *ah;
  335     struct sk_buff *skb;
  336     char writebit, extbit;
  337 
  338     skb = f->skb;
  339     h = (struct aoe_hdr *) skb_mac_header(skb);
  340     ah = (struct aoe_atahdr *) (h + 1);
  341     skb_put(skb, sizeof(*h) + sizeof(*ah));
  342     memset(h, 0, skb->len);
  343 
  344     writebit = 0x10;
  345     extbit = 0x4;
  346 
  347     t = f->t;
  348     f->tag = aoehdr_atainit(t->d, t, h);
  349     fhash(f);
  350     t->nout++;
  351     f->nout = t->nout;
  352     f->waited = 0;
  353     f->waited_total = 0;
  354 
  355     /* set up ata header */
  356     ah->scnt = f->iter.bi_size >> 9;
  357     put_lba(ah, f->iter.bi_sector);
  358     if (t->d->flags & DEVFL_EXT) {
  359         ah->aflags |= AOEAFL_EXT;
  360     } else {
  361         extbit = 0;
  362         ah->lba3 &= 0x0f;
  363         ah->lba3 |= 0xe0;   /* LBA bit + obsolete 0xa0 */
  364     }
  365     if (f->buf && bio_data_dir(f->buf->bio) == WRITE) {
  366         skb_fillup(skb, f->buf->bio, f->iter);
  367         ah->aflags |= AOEAFL_WRITE;
  368         skb->len += f->iter.bi_size;
  369         skb->data_len = f->iter.bi_size;
  370         skb->truesize += f->iter.bi_size;
  371         t->wpkts++;
  372     } else {
  373         t->rpkts++;
  374         writebit = 0;
  375     }
  376 
  377     ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
  378     skb->dev = t->ifp->nd;
  379 }
  380 
  381 static int
  382 aoecmd_ata_rw(struct aoedev *d)
  383 {
  384     struct frame *f;
  385     struct buf *buf;
  386     struct aoetgt *t;
  387     struct sk_buff *skb;
  388 
  389     buf = nextbuf(d);
  390     if (buf == NULL)
  391         return 0;
  392     f = newframe(d);
  393     if (f == NULL)
  394         return 0;
  395     t = *d->tgt;
  396 
  397     /* initialize the headers & frame */
  398     f->buf = buf;
  399     f->iter = buf->iter;
  400     f->iter.bi_size = min_t(unsigned long,
  401                 d->maxbcnt ?: DEFAULTBCNT,
  402                 f->iter.bi_size);
  403     bio_advance_iter(buf->bio, &buf->iter, f->iter.bi_size);
  404 
  405     if (!buf->iter.bi_size)
  406         d->ip.buf = NULL;
  407 
  408     /* mark all tracking fields and load out */
  409     buf->nframesout += 1;
  410 
  411     ata_rw_frameinit(f);
  412 
  413     skb = skb_clone(f->skb, GFP_ATOMIC);
  414     if (skb) {
  415         do_gettimeofday(&f->sent);
  416         f->sent_jiffs = (u32) jiffies;
  417         aoenet_xmit(skb);
  418     }
  419     return 1;
  420 }
  421 
  422 /* some callers cannot sleep, and they can call this function,
  423  * transmitting the packets later, when interrupts are on
  424  */
  425 static struct sk_buff *
  426 aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff **tail)
  427 {
  428     struct aoe_hdr *h;
  429     struct aoe_cfghdr *ch;
  430     struct sk_buff *skb, *sl, *sl_tail;
  431     struct net_device *ifp;
  432 
  433     sl = sl_tail = NULL;
  434 
  435     read_lock(&dev_base_lock);
  436     for_each_netdev(&init_net, ifp) {
  437         dev_hold(ifp);
  438         if (!is_aoe_netif(ifp)) {
  439             dev_put(ifp);
  440             continue;
  441         }
  442 
  443         skb = new_skb(sizeof *h + sizeof *ch);
  444         if (skb == NULL) {
  445             printk(KERN_INFO "aoe: skb alloc failure\n");
  446             dev_put(ifp);
  447             continue;
  448         }
  449         skb->dev = ifp;
  450         skb_put(skb, sizeof *h + sizeof *ch);
  451         if (sl_tail == NULL)
  452             sl_tail = skb;
  453         h = (struct aoe_hdr *) skb_mac_header(skb);
  454         memset(h, 0, sizeof *h + sizeof *ch);
  455 
  456         memset(h->dst, 0xff, sizeof h->dst);
  457         memcpy(h->src, ifp->dev_addr, sizeof h->src);
  458         h->type = __constant_cpu_to_be16(ETH_P_AOE);
  459         h->verfl = AOE_HVER;
  460         h->major = cpu_to_be16(aoemajor);
  461         h->minor = aoeminor;
  462         h->cmd = AOECMD_CFG;
  463 
  464         skb->next = sl;
  465         sl = skb;
  466         dev_put(ifp);
  467     }
  468     read_unlock(&dev_base_lock);
  469 
  470     if (tail != NULL)
  471         *tail = sl_tail;
  472     return sl;
  473 }
  474 
  475 static void
  476 resend(struct aoedev *d, struct frame *f)
  477 {
  478     struct sk_buff *skb;
  479     struct aoe_hdr *h;
  480     struct aoe_atahdr *ah;
  481     struct aoetgt *t;
  482     char buf[128];
  483     u32 n;
  484 
  485     t = f->t;
  486     n = newtag(d);
  487     skb = f->skb;
  488     if (ifrotate(t) == NULL) {
  489         /* probably can't happen, but set it up to fail anyway */
  490         printk(KERN_INFO "aoe: resend: no interfaces to rotate to.");
  491         ktcomplete(f, NULL);
  492         return;
  493     }
  494     h = (struct aoe_hdr *) skb_mac_header(skb);
  495     ah = (struct aoe_atahdr *) (h+1);
  496 
  497     if (!(f->flags & FFL_PROBE)) {
  498         snprintf(buf, sizeof buf,
  499             "%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x "
  500             "s=%012llx d=%012llx nout=%d\n",
  501             f->buf ? "retransmit" : "reident",
  502             d->aoemajor, d->aoeminor, f->tag, jiffies, n,
  503             mac_addr(h->src),
  504             mac_addr(h->dst), t->nout);
  505         aoechr_error(buf);
  506     }
  507 
  508     f->nout = t->nout;
  509     f->tag = n;
  510     fhash(f);
  511     h->tag = cpu_to_be32(n);
  512     memcpy(h->dst, t->addr, sizeof h->dst);
  513     memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
  514 
  515     skb->dev = t->ifp->nd;
  516     skb = skb_clone(skb, GFP_ATOMIC);
  517     if (skb) {
  518         do_gettimeofday(&f->sent);
  519         f->sent_jiffs = (u32) jiffies;
  520         aoenet_xmit(skb);
  521     }
  522 }
  523 
  524 static int
  525 tsince_hr(struct frame *f)
  526 {
  527     struct timeval now;
  528     int n;
  529 
  530     do_gettimeofday(&now);
  531     n = now.tv_usec - f->sent.tv_usec;
  532     n += (now.tv_sec - f->sent.tv_sec) * USEC_PER_SEC;
  533 
  534     if (n < 0)
  535         n = -n;
  536 
  537     /* For relatively long periods, use jiffies to avoid
  538      * discrepancies caused by updates to the system time.
  539      *
  540      * On system with HZ of 1000, 32-bits is over 49 days
  541      * worth of jiffies, or over 71 minutes worth of usecs.
  542      *
  543      * Jiffies overflow is handled by subtraction of unsigned ints:
  544      * (gdb) print (unsigned) 2 - (unsigned) 0xfffffffe
  545      * $3 = 4
  546      * (gdb)
  547      */
  548     if (n > USEC_PER_SEC / 4) {
  549         n = ((u32) jiffies) - f->sent_jiffs;
  550         n *= USEC_PER_SEC / HZ;
  551     }
  552 
  553     return n;
  554 }
  555 
  556 static int
  557 tsince(u32 tag)
  558 {
  559     int n;
  560 
  561     n = jiffies & 0xffff;
  562     n -= tag & 0xffff;
  563     if (n < 0)
  564         n += 1<<16;
  565     return jiffies_to_usecs(n + 1);
  566 }
  567 
  568 static struct aoeif *
  569 getif(struct aoetgt *t, struct net_device *nd)
  570 {
  571     struct aoeif *p, *e;
  572 
  573     p = t->ifs;
  574     e = p + NAOEIFS;
  575     for (; p < e; p++)
  576         if (p->nd == nd)
  577             return p;
  578     return NULL;
  579 }
  580 
  581 static void
  582 ejectif(struct aoetgt *t, struct aoeif *ifp)
  583 {
  584     struct aoeif *e;
  585     struct net_device *nd;
  586     ulong n;
  587 
  588     nd = ifp->nd;
  589     e = t->ifs + NAOEIFS - 1;
  590     n = (e - ifp) * sizeof *ifp;
  591     memmove(ifp, ifp+1, n);
  592     e->nd = NULL;
  593     dev_put(nd);
  594 }
  595 
  596 static struct frame *
  597 reassign_frame(struct frame *f)
  598 {
  599     struct frame *nf;
  600     struct sk_buff *skb;
  601 
  602     nf = newframe(f->t->d);
  603     if (!nf)
  604         return NULL;
  605     if (nf->t == f->t) {
  606         aoe_freetframe(nf);
  607         return NULL;
  608     }
  609 
  610     skb = nf->skb;
  611     nf->skb = f->skb;
  612     nf->buf = f->buf;
  613     nf->iter = f->iter;
  614     nf->waited = 0;
  615     nf->waited_total = f->waited_total;
  616     nf->sent = f->sent;
  617     nf->sent_jiffs = f->sent_jiffs;
  618     f->skb = skb;
  619 
  620     return nf;
  621 }
  622 
  623 static void
  624 probe(struct aoetgt *t)
  625 {
  626     struct aoedev *d;
  627     struct frame *f;
  628     struct sk_buff *skb;
  629     size_t n, m;
  630     int frag;
  631 
  632     d = t->d;
  633     f = newtframe(d, t);
  634     if (!f) {
  635         printk(KERN_ERR
  636             "%s %012llx for e%ld.%d: %s\n",
  637             "aoe: cannot probe remote address",
  638             mac_addr(t->addr),
  639             (long) d->aoemajor, d->aoeminor,
  640             "no frame available");
  641         return;
  642     }
  643     f->flags |= FFL_PROBE;
  644     ifrotate(t);
  645     f->iter.bi_size = t->d->maxbcnt ? t->d->maxbcnt : DEFAULTBCNT;
  646     ata_rw_frameinit(f);
  647     skb = f->skb;
  648     for (frag = 0, n = f->iter.bi_size; n > 0; ++frag, n -= m) {
  649         if (n < PAGE_SIZE)
  650             m = n;
  651         else
  652             m = PAGE_SIZE;
  653         skb_fill_page_desc(skb, frag, empty_page, 0, m);
  654     }
  655     skb->len += f->iter.bi_size;
  656     skb->data_len = f->iter.bi_size;
  657     skb->truesize += f->iter.bi_size;
  658 
  659     skb = skb_clone(f->skb, GFP_ATOMIC);
  660     if (skb) {
  661         do_gettimeofday(&f->sent);
  662         f->sent_jiffs = (u32) jiffies;
  663         aoenet_xmit(skb);
  664     }
  665 }
  666 
  667 static long
  668 rto(struct aoedev *d)
  669 {
  670     long t;
  671 
  672     t = 2 * d->rttavg >> RTTSCALE;
  673     t += 8 * d->rttdev >> RTTDSCALE;
  674     if (t == 0)
  675         t = 1;
  676 
  677     return t;
  678 }
  679 
  680 static void
  681 rexmit_deferred(struct aoedev *d)
  682 {
  683     struct aoetgt *t;
  684     struct frame *f;
  685     struct frame *nf;
  686     struct list_head *pos, *nx, *head;
  687     int since;
  688     int untainted;
  689 
  690     count_targets(d, &untainted);
  691 
  692     head = &d->rexmitq;
  693     list_for_each_safe(pos, nx, head) {
  694         f = list_entry(pos, struct frame, head);
  695         t = f->t;
  696         if (t->taint) {
  697             if (!(f->flags & FFL_PROBE)) {
  698                 nf = reassign_frame(f);
  699                 if (nf) {
  700                     if (t->nout_probes == 0
  701                     && untainted > 0) {
  702                         probe(t);
  703                         t->nout_probes++;
  704                     }
  705                     list_replace(&f->head, &nf->head);
  706                     pos = &nf->head;
  707                     aoe_freetframe(f);
  708                     f = nf;
  709                     t = f->t;
  710                 }
  711             } else if (untainted < 1) {
  712                 /* don't probe w/o other untainted aoetgts */
  713                 goto stop_probe;
  714             } else if (tsince_hr(f) < t->taint * rto(d)) {
  715                 /* reprobe slowly when taint is high */
  716                 continue;
  717             }
  718         } else if (f->flags & FFL_PROBE) {
  719 stop_probe:     /* don't probe untainted aoetgts */
  720             list_del(pos);
  721             aoe_freetframe(f);
  722             /* leaving d->kicked, because this is routine */
  723             f->t->d->flags |= DEVFL_KICKME;
  724             continue;
  725         }
  726         if (t->nout >= t->maxout)
  727             continue;
  728         list_del(pos);
  729         t->nout++;
  730         if (f->flags & FFL_PROBE)
  731             t->nout_probes++;
  732         since = tsince_hr(f);
  733         f->waited += since;
  734         f->waited_total += since;
  735         resend(d, f);
  736     }
  737 }
  738 
  739 /* An aoetgt accumulates demerits quickly, and successful
  740  * probing redeems the aoetgt slowly.
  741  */
  742 static void
  743 scorn(struct aoetgt *t)
  744 {
  745     int n;
  746 
  747     n = t->taint++;
  748     t->taint += t->taint * 2;
  749     if (n > t->taint)
  750         t->taint = n;
  751     if (t->taint > MAX_TAINT)
  752         t->taint = MAX_TAINT;
  753 }
  754 
  755 static int
  756 count_targets(struct aoedev *d, int *untainted)
  757 {
  758     int i, good;
  759 
  760     for (i = good = 0; i < d->ntargets && d->targets[i]; ++i)
  761         if (d->targets[i]->taint == 0)
  762             good++;
  763 
  764     if (untainted)
  765         *untainted = good;
  766     return i;
  767 }
  768 
  769 static void
  770 rexmit_timer(ulong vp)
  771 {
  772     struct aoedev *d;
  773     struct aoetgt *t;
  774     struct aoeif *ifp;
  775     struct frame *f;
  776     struct list_head *head, *pos, *nx;
  777     LIST_HEAD(flist);
  778     register long timeout;
  779     ulong flags, n;
  780     int i;
  781     int utgts;  /* number of aoetgt descriptors (not slots) */
  782     int since;
  783 
  784     d = (struct aoedev *) vp;
  785 
  786     spin_lock_irqsave(&d->lock, flags);
  787 
  788     /* timeout based on observed timings and variations */
  789     timeout = rto(d);
  790 
  791     utgts = count_targets(d, NULL);
  792 
  793     if (d->flags & DEVFL_TKILL) {
  794         spin_unlock_irqrestore(&d->lock, flags);
  795         return;
  796     }
  797 
  798     /* collect all frames to rexmit into flist */
  799     for (i = 0; i < NFACTIVE; i++) {
  800         head = &d->factive[i];
  801         list_for_each_safe(pos, nx, head) {
  802             f = list_entry(pos, struct frame, head);
  803             if (tsince_hr(f) < timeout)
  804                 break;  /* end of expired frames */
  805             /* move to flist for later processing */
  806             list_move_tail(pos, &flist);
  807         }
  808     }
  809 
  810     /* process expired frames */
  811     while (!list_empty(&flist)) {
  812         pos = flist.next;
  813         f = list_entry(pos, struct frame, head);
  814         since = tsince_hr(f);
  815         n = f->waited_total + since;
  816         n /= USEC_PER_SEC;
  817         if (aoe_deadsecs
  818         && n > aoe_deadsecs
  819         && !(f->flags & FFL_PROBE)) {
  820             /* Waited too long.  Device failure.
  821              * Hang all frames on first hash bucket for downdev
  822              * to clean up.
  823              */
  824             list_splice(&flist, &d->factive[0]);
  825             aoedev_downdev(d);
  826             goto out;
  827         }
  828 
  829         t = f->t;
  830         n = f->waited + since;
  831         n /= USEC_PER_SEC;
  832         if (aoe_deadsecs && utgts > 0
  833         && (n > aoe_deadsecs / utgts || n > HARD_SCORN_SECS))
  834             scorn(t); /* avoid this target */
  835 
  836         t->lost += 1;   /* (decremented on unexpected response) */
  837         if (t->maxout != 1) {
  838             t->ssthresh = t->maxout / 2;
  839             t->maxout = 1;
  840         }
  841 
  842         if (f->flags & FFL_PROBE) {
  843             t->nout_probes--;
  844         } else {
  845             ifp = getif(t, f->skb->dev);
  846             if (ifp && ++ifp->lost > (t->nframes << 1)
  847             && (ifp != t->ifs || t->ifs[1].nd)) {
  848                 ejectif(t, ifp);
  849                 ifp = NULL;
  850             }
  851         }
  852 
  853         list_move_tail(pos, &d->rexmitq);
  854         t->nout--;
  855     }
  856     rexmit_deferred(d);
  857 
  858 out:
  859     if ((d->flags & DEVFL_KICKME) && d->blkq) {
  860         d->flags &= ~DEVFL_KICKME;
  861         d->blkq->request_fn(d->blkq);
  862     }
  863 
  864     d->timer.expires = jiffies + TIMERTICK;
  865     add_timer(&d->timer);
  866 
  867     spin_unlock_irqrestore(&d->lock, flags);
  868 }
  869 
  870 static unsigned long
  871 rqbiocnt(struct request *r)
  872 {
  873     struct bio *bio;
  874     unsigned long n = 0;
  875 
  876     __rq_for_each_bio(bio, r)
  877         n++;
  878     return n;
  879 }
  880 
  881 /* This can be removed if we are certain that no users of the block
  882  * layer will ever use zero-count pages in bios.  Otherwise we have to
  883  * protect against the put_page sometimes done by the network layer.
  884  *
  885  * See http://oss.sgi.com/archives/xfs/2007-01/msg00594.html for
  886  * discussion.
  887  *
  888  * We cannot use get_page in the workaround, because it insists on a
  889  * positive page count as a precondition.  So we use _count directly.
  890  */
  891 static void
  892 bio_pageinc(struct bio *bio)
  893 {
  894     struct bio_vec bv;
  895     struct page *page;
  896     struct bvec_iter iter;
  897 
  898     bio_for_each_segment(bv, bio, iter) {
  899         /* Non-zero page count for non-head members of
  900          * compound pages is no longer allowed by the kernel.
  901          */
  902         page = compound_trans_head(bv.bv_page);
  903         atomic_inc(&page->_count);
  904     }
  905 }
  906 
  907 static void
  908 bufinit(struct buf *buf, struct request *rq, struct bio *bio)
  909 {
  910     memset(buf, 0, sizeof *buf);
  911     buf->rq = rq;
  912     buf->bio = bio;
  913     buf->iter = bio->bi_iter;
  914     bio_pageinc(bio);
  915 }
  916 
  917 static struct buf *
  918 nextbuf(struct aoedev *d)
  919 {
  920     struct request *rq;
  921     struct request_queue *q;
  922     struct buf *buf;
  923     struct bio *bio;
  924 
  925     q = d->blkq;
  926     if (q == NULL)
  927         return NULL;    /* initializing */
  928     if (d->ip.buf)
  929         return d->ip.buf;
  930     rq = d->ip.rq;
  931     if (rq == NULL) {
  932         rq = blk_peek_request(q);
  933         if (rq == NULL)
  934             return NULL;
  935         blk_start_request(rq);
  936         d->ip.rq = rq;
  937         d->ip.nxbio = rq->bio;
  938         rq->special = (void *) rqbiocnt(rq);
  939     }
  940     buf = mempool_alloc(d->bufpool, GFP_ATOMIC);
  941     if (buf == NULL) {
  942         printk(KERN_ERR "aoe: nextbuf: unable to mempool_alloc!\n");
  943         return 0;
  944     }
  945     bio = d->ip.nxbio;
  946     bufinit(buf, rq, bio);
  947     bio = bio->bi_next;
  948     d->ip.nxbio = bio;
  949     if (bio == NULL)
  950         d->ip.rq = NULL;
  951     return d->ip.buf = buf;
  952 }
  953 
  954 /* enters with d->lock held */
  955 void
  956 aoecmd_work(struct aoedev *d)
  957 {
  958     rexmit_deferred(d);
  959     while (aoecmd_ata_rw(d))
  960         ;
  961 }
  962 
  963 /* this function performs work that has been deferred until sleeping is OK
  964  */
  965 void
  966 aoecmd_sleepwork(struct work_struct *work)
  967 {
  968     struct aoedev *d = container_of(work, struct aoedev, work);
  969     struct block_device *bd;
  970     u64 ssize;
  971 
  972     if (d->flags & DEVFL_GDALLOC) {
  973         aoedbg_devinit(d);
  974         aoeblk_gdalloc(d);
  975     }
  976 
  977     if (d->flags & DEVFL_NEWSIZE) {
  978         ssize = get_capacity(d->gd);
  979         bd = bdget_disk(d->gd, 0);
  980         if (bd) {
  981             mutex_lock(&bd->bd_inode->i_mutex);
  982             i_size_write(bd->bd_inode, (loff_t)ssize<<9);
  983             mutex_unlock(&bd->bd_inode->i_mutex);
  984             bdput(bd);
  985         }
  986         spin_lock_irq(&d->lock);
  987         d->flags |= DEVFL_UP;
  988         d->flags &= ~DEVFL_NEWSIZE;
  989         spin_unlock_irq(&d->lock);
  990     }
  991 }
  992 
  993 static void
  994 ata_ident_fixstring(u16 *id, int ns)
  995 {
  996     u16 s;
  997 
  998     while (ns-- > 0) {
  999         s = *id;
 1000         *id++ = s >> 8 | s << 8;
 1001     }
 1002 }
 1003 
 1004 static void
 1005 ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
 1006 {
 1007     u64 ssize;
 1008     u16 n;
 1009 
 1010     /* word 83: command set supported */
 1011     n = le16_to_cpu(get_unaligned((u16 *) &id[83<<1]));
 1012 
 1013     /* word 86: command set/feature enabled */
 1014     n |= le16_to_cpu(get_unaligned((u16 *) &id[86<<1]));
 1015 
 1016     if (n & (1<<10)) {  /* bit 10: LBA 48 */
 1017         d->flags |= DEVFL_EXT;
 1018 
 1019         /* word 100: number lba48 sectors */
 1020         ssize = le64_to_cpu(get_unaligned((u64 *) &id[100<<1]));
 1021 
 1022         /* set as in ide-disk.c:init_idedisk_capacity */
 1023         d->geo.cylinders = ssize;
 1024         d->geo.cylinders /= (255 * 63);
 1025         d->geo.heads = 255;
 1026         d->geo.sectors = 63;
 1027     } else {
 1028         d->flags &= ~DEVFL_EXT;
 1029 
 1030         /* number lba28 sectors */
 1031         ssize = le32_to_cpu(get_unaligned((u32 *) &id[60<<1]));
 1032 
 1033         /* NOTE: obsolete in ATA 6 */
 1034         d->geo.cylinders = le16_to_cpu(get_unaligned((u16 *) &id[54<<1]));
 1035         d->geo.heads = le16_to_cpu(get_unaligned((u16 *) &id[55<<1]));
 1036         d->geo.sectors = le16_to_cpu(get_unaligned((u16 *) &id[56<<1]));
 1037     }
 1038 
 1039     ata_ident_fixstring((u16 *) &id[10<<1], 10);    /* serial */
 1040     ata_ident_fixstring((u16 *) &id[23<<1], 4); /* firmware */
 1041     ata_ident_fixstring((u16 *) &id[27<<1], 20);    /* model */
 1042     memcpy(d->ident, id, sizeof(d->ident));
 1043 
 1044     if (d->ssize != ssize)
 1045         printk(KERN_INFO
 1046             "aoe: %012llx e%ld.%d v%04x has %llu sectors\n",
 1047             mac_addr(t->addr),
 1048             d->aoemajor, d->aoeminor,
 1049             d->fw_ver, (long long)ssize);
 1050     d->ssize = ssize;
 1051     d->geo.start = 0;
 1052     if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
 1053         return;
 1054     if (d->gd != NULL) {
 1055         set_capacity(d->gd, ssize);
 1056         d->flags |= DEVFL_NEWSIZE;
 1057     } else
 1058         d->flags |= DEVFL_GDALLOC;
 1059     schedule_work(&d->work);
 1060 }
 1061 
 1062 static void
 1063 calc_rttavg(struct aoedev *d, struct aoe_hdr *h,
 1064         struct frame *f,    /* for tracing RTTs */
 1065         struct aoetgt *t, int rtt)
 1066 {
 1067     register long n;
 1068 
 1069     n = rtt;
 1070     aoedbg_rtt(d, h, f, rtt);
 1071 
 1072     /* cf. Congestion Avoidance and Control, Jacobson & Karels, 1988 */
 1073     n -= d->rttavg >> RTTSCALE;
 1074     d->rttavg += n;
 1075     if (n < 0)
 1076         n = -n;
 1077     n -= d->rttdev >> RTTDSCALE;
 1078     d->rttdev += n;
 1079 
 1080     if (!t || t->maxout >= t->nframes)
 1081         return;
 1082     if (t->maxout < t->ssthresh)
 1083         t->maxout += 1;
 1084     else if (t->nout == t->maxout && t->next_cwnd-- == 0) {
 1085         t->maxout += 1;
 1086         t->next_cwnd = t->maxout;
 1087     }
 1088 }
 1089 
 1090 static struct aoetgt *
 1091 gettgt(struct aoedev *d, char *addr)
 1092 {
 1093     struct aoetgt **t, **e;
 1094 
 1095     t = d->targets;
 1096     e = t + d->ntargets;
 1097     for(; t < e && *t; t++)
 1098         if (memcmp((*t)->addr, addr, sizeof((*t)->addr)) == 0)
 1099             return *t;
 1100     return NULL;
 1101 }
 1102 
 1103 static void
 1104 bvcpy(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter, long cnt)
 1105 {
 1106     int soff = 0;
 1107     struct bio_vec bv;
 1108 
 1109     iter.bi_size = cnt;
 1110 
 1111     __bio_for_each_segment(bv, bio, iter, iter) {
 1112         char *p = page_address(bv.bv_page) + bv.bv_offset;
 1113         skb_copy_bits(skb, soff, p, bv.bv_len);
 1114         soff += bv.bv_len;
 1115     }
 1116 }
 1117 
 1118 void
 1119 aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
 1120 {
 1121     struct bio *bio;
 1122     int bok;
 1123     struct request_queue *q;
 1124 
 1125     q = d->blkq;
 1126     if (rq == d->ip.rq)
 1127         d->ip.rq = NULL;
 1128     do {
 1129         bio = rq->bio;
 1130         bok = !fastfail && !bio->bi_error;
 1131     } while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_iter.bi_size));
 1132 
 1133     /* cf. http://lkml.org/lkml/2006/10/31/28 */
 1134     if (!fastfail)
 1135         __blk_run_queue(q);
 1136 }
 1137 
 1138 static void
 1139 aoe_end_buf(struct aoedev *d, struct buf *buf)
 1140 {
 1141     struct request *rq;
 1142     unsigned long n;
 1143 
 1144     if (buf == d->ip.buf)
 1145         d->ip.buf = NULL;
 1146     rq = buf->rq;
 1147     aoe_bio_pagedec(buf->bio);
 1148     mempool_free(buf, d->bufpool);
 1149     n = (unsigned long) rq->special;
 1150     rq->special = (void *) --n;
 1151     if (n == 0)
 1152         aoe_end_request(d, rq, 0);
 1153 }
 1154 
 1155 static void
 1156 ktiocomplete(struct frame *f)
 1157 {
 1158     struct aoe_hdr *hin, *hout;
 1159     struct aoe_atahdr *ahin, *ahout;
 1160     struct buf *buf;
 1161     struct sk_buff *skb;
 1162     struct aoetgt *t;
 1163     struct aoeif *ifp;
 1164     struct aoedev *d;
 1165     long n;
 1166     int untainted;
 1167 
 1168     if (f == NULL)
 1169         return;
 1170 
 1171     t = f->t;
 1172     d = t->d;
 1173     skb = f->r_skb;
 1174     buf = f->buf;
 1175     if (f->flags & FFL_PROBE)
 1176         goto out;
 1177     if (!skb)       /* just fail the buf. */
 1178         goto noskb;
 1179 
 1180     hout = (struct aoe_hdr *) skb_mac_header(f->skb);
 1181     ahout = (struct aoe_atahdr *) (hout+1);
 1182 
 1183     hin = (struct aoe_hdr *) skb->data;
 1184     skb_pull(skb, sizeof(*hin));
 1185     ahin = (struct aoe_atahdr *) skb->data;
 1186     skb_pull(skb, sizeof(*ahin));
 1187 
 1188     if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */
 1189         printk(KERN_ERR
 1190             "aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n",
 1191             ahout->cmdstat, ahin->cmdstat,
 1192             d->aoemajor, d->aoeminor);
 1193 noskb:      if (buf)
 1194             buf->bio->bi_error = -EIO;
 1195         goto out;
 1196     }
 1197 
 1198     n = ahout->scnt << 9;
 1199     switch (ahout->cmdstat) {
 1200     case ATA_CMD_PIO_READ:
 1201     case ATA_CMD_PIO_READ_EXT:
 1202         if (skb->len < n) {
 1203             printk(KERN_ERR
 1204                 "aoe: runt data size in read from e%ld.%d."
 1205                 "  skb->len=%d need=%ld\n",
 1206                 (long) d->aoemajor, d->aoeminor,
 1207                    skb->len, n);
 1208             buf->bio->bi_error = -EIO;
 1209             break;
 1210         }
 1211         if (n > f->iter.bi_size) {
 1212             pr_err_ratelimited("%s e%ld.%d.  bytes=%ld need=%u\n",
 1213                 "aoe: too-large data size in read from",
 1214                 (long) d->aoemajor, d->aoeminor,
 1215                 n, f->iter.bi_size);
 1216             buf->bio->bi_error = -EIO;
 1217             break;
 1218         }
 1219         bvcpy(skb, f->buf->bio, f->iter, n);
 1220     case ATA_CMD_PIO_WRITE:
 1221     case ATA_CMD_PIO_WRITE_EXT:
 1222         spin_lock_irq(&d->lock);
 1223         ifp = getif(t, skb->dev);
 1224         if (ifp)
 1225             ifp->lost = 0;
 1226         spin_unlock_irq(&d->lock);
 1227         break;
 1228     case ATA_CMD_ID_ATA:
 1229         if (skb->len < 512) {
 1230             printk(KERN_INFO
 1231                 "aoe: runt data size in ataid from e%ld.%d."
 1232                 "  skb->len=%d need=512\n",
 1233                 (long) d->aoemajor, d->aoeminor,
 1234                 skb->len);
 1235             break;
 1236         }
 1237         if (skb_linearize(skb))
 1238             break;
 1239         spin_lock_irq(&d->lock);
 1240         ataid_complete(d, t, skb->data);
 1241         spin_unlock_irq(&d->lock);
 1242         break;
 1243     default:
 1244         printk(KERN_INFO
 1245             "aoe: unrecognized ata command %2.2Xh for %d.%d\n",
 1246             ahout->cmdstat,
 1247             be16_to_cpu(get_unaligned(&hin->major)),
 1248             hin->minor);
 1249     }
 1250 out:
 1251     spin_lock_irq(&d->lock);
 1252     if (t->taint > 0
 1253     && --t->taint > 0
 1254     && t->nout_probes == 0) {
 1255         count_targets(d, &untainted);
 1256         if (untainted > 0) {
 1257             probe(t);
 1258             t->nout_probes++;
 1259         }
 1260     }
 1261 
 1262     aoe_freetframe(f);
 1263 
 1264     if (buf && --buf->nframesout == 0 && buf->iter.bi_size == 0)
 1265         aoe_end_buf(d, buf);
 1266 
 1267     spin_unlock_irq(&d->lock);
 1268     aoedev_put(d);
 1269     dev_kfree_skb(skb);
 1270 }
 1271 
 1272 /* Enters with iocq.lock held.
 1273  * Returns true iff responses needing processing remain.
 1274  */
 1275 static int
 1276 ktio(int id)
 1277 {
 1278     struct frame *f;
 1279     struct list_head *pos;
 1280     int i;
 1281     int actual_id;
 1282 
 1283     for (i = 0; ; ++i) {
 1284         if (aoe_maxioc && i >= aoe_maxioc)
 1285             return 1;
 1286         if (list_empty(&iocq[id].head))
 1287             return 0;
 1288         pos = iocq[id].head.next;
 1289         list_del(pos);
 1290         f = list_entry(pos, struct frame, head);
 1291         spin_unlock_irq(&iocq[id].lock);
 1292         ktiocomplete(f);
 1293 
 1294         /* Figure out if extra threads are required. */
 1295         actual_id = f->t->d->aoeminor % ncpus;
 1296 
 1297         if (!kts[actual_id].active) {
 1298             BUG_ON(id != 0);
 1299             mutex_lock(&ktio_spawn_lock);
 1300             if (!kts[actual_id].active
 1301                 && aoe_ktstart(&kts[actual_id]) == 0)
 1302                 kts[actual_id].active = 1;
 1303             mutex_unlock(&ktio_spawn_lock);
 1304         }
 1305         spin_lock_irq(&iocq[id].lock);
 1306     }
 1307 }
 1308 
 1309 static int
 1310 kthread(void *vp)
 1311 {
 1312     struct ktstate *k;
 1313     DECLARE_WAITQUEUE(wait, current);
 1314     int more;
 1315 
 1316     k = vp;
 1317     current->flags |= PF_NOFREEZE;
 1318     set_user_nice(current, -10);
 1319     complete(&k->rendez);   /* tell spawner we're running */
 1320     do {
 1321         spin_lock_irq(k->lock);
 1322         more = k->fn(k->id);
 1323         if (!more) {
 1324             add_wait_queue(k->waitq, &wait);
 1325             __set_current_state(TASK_INTERRUPTIBLE);
 1326         }
 1327         spin_unlock_irq(k->lock);
 1328         if (!more) {
 1329             schedule();
 1330             remove_wait_queue(k->waitq, &wait);
 1331         } else
 1332             cond_resched();
 1333     } while (!kthread_should_stop());
 1334     complete(&k->rendez);   /* tell spawner we're stopping */
 1335     return 0;
 1336 }
 1337 
 1338 void
 1339 aoe_ktstop(struct ktstate *k)
 1340 {
 1341     kthread_stop(k->task);
 1342     wait_for_completion(&k->rendez);
 1343 }
 1344 
 1345 int
 1346 aoe_ktstart(struct ktstate *k)
 1347 {
 1348     struct task_struct *task;
 1349 
 1350     init_completion(&k->rendez);
 1351     task = kthread_run(kthread, k, k->name);
 1352     if (task == NULL || IS_ERR(task))
 1353         return -ENOMEM;
 1354     k->task = task;
 1355     wait_for_completion(&k->rendez); /* allow kthread to start */
 1356     init_completion(&k->rendez);    /* for waiting for exit later */
 1357     return 0;
 1358 }
 1359 
 1360 /* pass it off to kthreads for processing */
 1361 static void
 1362 ktcomplete(struct frame *f, struct sk_buff *skb)
 1363 {
 1364     int id;
 1365     ulong flags;
 1366 
 1367     f->r_skb = skb;
 1368     id = f->t->d->aoeminor % ncpus;
 1369     spin_lock_irqsave(&iocq[id].lock, flags);
 1370     if (!kts[id].active) {
 1371         spin_unlock_irqrestore(&iocq[id].lock, flags);
 1372         /* The thread with id has not been spawned yet,
 1373          * so delegate the work to the main thread and
 1374          * try spawning a new thread.
 1375          */
 1376         id = 0;
 1377         spin_lock_irqsave(&iocq[id].lock, flags);
 1378     }
 1379     list_add_tail(&f->head, &iocq[id].head);
 1380     spin_unlock_irqrestore(&iocq[id].lock, flags);
 1381     wake_up(&ktiowq[id]);
 1382 }
 1383 
 1384 struct sk_buff *
 1385 aoecmd_ata_rsp(struct sk_buff *skb)
 1386 {
 1387     struct aoedev *d;
 1388     struct aoe_hdr *h;
 1389     struct frame *f;
 1390     struct aoetgt *t;
 1391     u32 n;
 1392     ulong flags;
 1393     char ebuf[128];
 1394     u16 aoemajor;
 1395 
 1396     h = (struct aoe_hdr *) skb->data;
 1397     aoemajor = be16_to_cpu(get_unaligned(&h->major));
 1398     d = aoedev_by_aoeaddr(aoemajor, h->minor, 0);
 1399     if (d == NULL) {
 1400         snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
 1401             "for unknown device %d.%d\n",
 1402             aoemajor, h->minor);
 1403         aoechr_error(ebuf);
 1404         return skb;
 1405     }
 1406 
 1407     spin_lock_irqsave(&d->lock, flags);
 1408 
 1409     n = be32_to_cpu(get_unaligned(&h->tag));
 1410     f = getframe(d, n);
 1411     if (f) {
 1412         calc_rttavg(d, h, f, f->t, tsince_hr(f));
 1413         f->t->nout--;
 1414         if (f->flags & FFL_PROBE)
 1415             f->t->nout_probes--;
 1416     } else {
 1417         f = getframe_deferred(d, n);
 1418         if (f) {
 1419             calc_rttavg(d, h, f, NULL, tsince_hr(f));
 1420         } else {
 1421             calc_rttavg(d, h, NULL, NULL, tsince(n));
 1422             t = gettgt(d, h->src);
 1423             if (t)
 1424                 t->lost--; /* packet wasn't lost, just late */
 1425             spin_unlock_irqrestore(&d->lock, flags);
 1426             aoedev_put(d);
 1427             snprintf(ebuf, sizeof ebuf,
 1428                 "%15s e%d.%d    tag=%08x@%08lx s=%012llx d=%012llx\n",
 1429                 "unexpected rsp",
 1430                 be16_to_cpu(get_unaligned(&h->major)),
 1431                 h->minor,
 1432                 be32_to_cpu(get_unaligned(&h->tag)),
 1433                 jiffies,
 1434                 mac_addr(h->src),
 1435                 mac_addr(h->dst));
 1436             aoechr_error(ebuf);
 1437             return skb;
 1438         }
 1439     }
 1440     aoecmd_work(d);
 1441 
 1442     spin_unlock_irqrestore(&d->lock, flags);
 1443 
 1444     ktcomplete(f, skb);
 1445 
 1446     /*
 1447      * Note here that we do not perform an aoedev_put, as we are
 1448      * leaving this reference for the ktio to release.
 1449      */
 1450     return NULL;
 1451 }
 1452 
 1453 void
 1454 aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
 1455 {
 1456     struct sk_buff *sl;
 1457 
 1458     sl = aoecmd_cfg_pkts(aoemajor, aoeminor, NULL);
 1459 
 1460     aoenet_xmit(sl);
 1461 }
 1462 
 1463 struct sk_buff *
 1464 aoecmd_ata_id(struct aoedev *d)
 1465 {
 1466     struct aoe_hdr *h;
 1467     struct aoe_atahdr *ah;
 1468     struct frame *f;
 1469     struct sk_buff *skb;
 1470     struct aoetgt *t;
 1471 
 1472     f = newframe(d);
 1473     if (f == NULL)
 1474         return NULL;
 1475 
 1476     t = *d->tgt;
 1477 
 1478     /* initialize the headers & frame */
 1479     skb = f->skb;
 1480     h = (struct aoe_hdr *) skb_mac_header(skb);
 1481     ah = (struct aoe_atahdr *) (h+1);
 1482     skb_put(skb, sizeof *h + sizeof *ah);
 1483     memset(h, 0, skb->len);
 1484     f->tag = aoehdr_atainit(d, t, h);
 1485     fhash(f);
 1486     t->nout++;
 1487     f->waited = 0;
 1488     f->waited_total = 0;
 1489 
 1490     /* set up ata header */
 1491     ah->scnt = 1;
 1492     ah->cmdstat = ATA_CMD_ID_ATA;
 1493     ah->lba3 = 0xa0;
 1494 
 1495     skb->dev = t->ifp->nd;
 1496 
 1497     d->rttavg = RTTAVG_INIT;
 1498     d->rttdev = RTTDEV_INIT;
 1499     d->timer.function = rexmit_timer;
 1500 
 1501     skb = skb_clone(skb, GFP_ATOMIC);
 1502     if (skb) {
 1503         do_gettimeofday(&f->sent);
 1504         f->sent_jiffs = (u32) jiffies;
 1505     }
 1506 
 1507     return skb;
 1508 }
 1509 
 1510 static struct aoetgt **
 1511 grow_targets(struct aoedev *d)
 1512 {
 1513     ulong oldn, newn;
 1514     struct aoetgt **tt;
 1515 
 1516     oldn = d->ntargets;
 1517     newn = oldn * 2;
 1518     tt = aoe_kcalloc(newn, sizeof(*d->targets), GFP_ATOMIC);
 1519     if (!tt)
 1520         return NULL;
 1521     memmove(tt, d->targets, sizeof(*d->targets) * oldn);
 1522     d->tgt = tt + (d->tgt - d->targets);
 1523     kfree(d->targets);
 1524     d->targets = tt;
 1525     d->ntargets = newn;
 1526 
 1527     return &d->targets[oldn];
 1528 }
 1529 
 1530 static struct aoetgt *
 1531 addtgt(struct aoedev *d, char *addr, ulong nframes)
 1532 {
 1533     struct aoetgt *t, **tt, **te;
 1534 
 1535     tt = d->targets;
 1536     te = tt + d->ntargets;
 1537     for (; tt < te && *tt; tt++)
 1538         ;
 1539 
 1540     if (tt == te) {
 1541         tt = grow_targets(d);
 1542         if (!tt)
 1543             goto nomem;
 1544     }
 1545     t = aoe_kcalloc(1, sizeof(*t), GFP_ATOMIC);
 1546     if (!t)
 1547         goto nomem;
 1548     t->nframes = nframes;
 1549     t->d = d;
 1550     memcpy(t->addr, addr, sizeof t->addr);
 1551     t->ifp = t->ifs;
 1552     aoecmd_wreset(t);
 1553     t->maxout = t->nframes / 2;
 1554     INIT_LIST_HEAD(&t->ffree);
 1555     return *tt = t;
 1556 
 1557  nomem:
 1558     printk(KERN_INFO
 1559         "aoe: cannot allocate memory to add target\n");
 1560     return NULL;
 1561 }
 1562 
 1563 static void
 1564 setdbcnt(struct aoedev *d)
 1565 {
 1566     struct aoetgt **t, **e;
 1567     int bcnt = 0;
 1568 
 1569     t = d->targets;
 1570     e = t + d->ntargets;
 1571     for (; t<e && *t; t++)
 1572         if (bcnt == 0 || bcnt > (*t)->minbcnt)
 1573             bcnt = (*t)->minbcnt;
 1574     if (bcnt != d->maxbcnt) {
 1575         d->maxbcnt = bcnt;
 1576         printk(KERN_INFO
 1577             "aoe: e%ld.%d: setting %d byte data frames\n",
 1578             d->aoemajor, d->aoeminor, bcnt);
 1579     }
 1580 }
 1581 
 1582 static void
 1583 setifbcnt(struct aoetgt *t, struct net_device *nd, int bcnt)
 1584 {
 1585     struct aoedev *d;
 1586     struct aoeif *p, *e;
 1587     int minbcnt;
 1588 
 1589     d = t->d;
 1590     minbcnt = bcnt;
 1591     p = t->ifs;
 1592     e = p + NAOEIFS;
 1593     for (; p < e; p++) {
 1594         if (p->nd == NULL)
 1595             break;      /* end of the valid interfaces */
 1596         if (p->nd == nd) {
 1597             p->bcnt = bcnt; /* we're updating */
 1598             nd = NULL;
 1599         } else if (minbcnt > p->bcnt)
 1600             minbcnt = p->bcnt; /* find the min interface */
 1601     }
 1602     if (nd) {
 1603         if (p == e) {
 1604             printk(KERN_ERR
 1605                 "aoe: device setifbcnt failure; "
 1606                 "too many interfaces\n");
 1607             return;
 1608         }
 1609         dev_hold(nd);
 1610         p->nd = nd;
 1611         p->bcnt = bcnt;
 1612     }
 1613     t->minbcnt = minbcnt;
 1614     setdbcnt(d);
 1615 }
 1616 
 1617 void
 1618 aoecmd_cfg_rsp(struct sk_buff *skb)
 1619 {
 1620     struct aoedev *d;
 1621     struct aoe_hdr *h;
 1622     struct aoe_cfghdr *ch;
 1623     struct aoetgt *t;
 1624     ulong flags, aoemajor;
 1625     struct sk_buff *sl;
 1626     u16 n;
 1627 
 1628     sl = NULL;
 1629     h = (struct aoe_hdr *) skb_mac_header(skb);
 1630     ch = (struct aoe_cfghdr *) (h+1);
 1631 
 1632     /*
 1633      * Enough people have their dip switches set backwards to
 1634      * warrant a loud message for this special case.
 1635      */
 1636     aoemajor = be16_to_cpu(get_unaligned(&h->major));
 1637     if (aoemajor == 0xfff) {
 1638         printk(KERN_ERR "aoe: Warning: shelf address is all ones.  "
 1639             "Check shelf dip switches.\n");
 1640         return;
 1641     }
 1642     if (aoemajor == 0xffff) {
 1643         printk(KERN_INFO "aoe: e%ld.%d: broadcast shelf number invalid\n",
 1644             aoemajor, (int) h->minor);
 1645         return;
 1646     }
 1647     if (h->minor == 0xff) {
 1648         printk(KERN_INFO "aoe: e%ld.%d: broadcast slot number invalid\n",
 1649             aoemajor, (int) h->minor);
 1650         return;
 1651     }
 1652 
 1653     n = be16_to_cpu(ch->bufcnt);
 1654     if (n > aoe_maxout) /* keep it reasonable */
 1655         n = aoe_maxout;
 1656 
 1657     d = aoedev_by_aoeaddr(aoemajor, h->minor, 1);
 1658     if (d == NULL) {
 1659         printk(KERN_INFO "aoe: device allocation failure\n");
 1660         return;
 1661     }
 1662 
 1663     spin_lock_irqsave(&d->lock, flags);
 1664 
 1665     t = gettgt(d, h->src);
 1666     if (t) {
 1667         t->nframes = n;
 1668         if (n < t->maxout)
 1669             aoecmd_wreset(t);
 1670     } else {
 1671         t = addtgt(d, h->src, n);
 1672         if (!t)
 1673             goto bail;
 1674     }
 1675     n = skb->dev->mtu;
 1676     n -= sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr);
 1677     n /= 512;
 1678     if (n > ch->scnt)
 1679         n = ch->scnt;
 1680     n = n ? n * 512 : DEFAULTBCNT;
 1681     setifbcnt(t, skb->dev, n);
 1682 
 1683     /* don't change users' perspective */
 1684     if (d->nopen == 0) {
 1685         d->fw_ver = be16_to_cpu(ch->fwver);
 1686         sl = aoecmd_ata_id(d);
 1687     }
 1688 bail:
 1689     spin_unlock_irqrestore(&d->lock, flags);
 1690     aoedev_put(d);
 1691     aoenet_xmit(sl);
 1692 }
 1693 
 1694 void
 1695 aoecmd_wreset(struct aoetgt *t)
 1696 {
 1697     t->maxout = 1;
 1698     t->ssthresh = t->nframes / 2;
 1699     t->next_cwnd = t->nframes;
 1700 }
 1701 
 1702 void
 1703 aoecmd_cleanslate(struct aoedev *d)
 1704 {
 1705     struct aoetgt **t, **te;
 1706 
 1707     d->rttavg = RTTAVG_INIT;
 1708     d->rttdev = RTTDEV_INIT;
 1709     d->maxbcnt = 0;
 1710 
 1711     t = d->targets;
 1712     te = t + d->ntargets;
 1713     for (; t < te && *t; t++)
 1714         aoecmd_wreset(*t);
 1715 }
 1716 
 1717 void
 1718 aoe_failbuf(struct aoedev *d, struct buf *buf)
 1719 {
 1720     if (buf == NULL)
 1721         return;
 1722     buf->iter.bi_size = 0;
 1723     buf->bio->bi_error = -EIO;
 1724     if (buf->nframesout == 0)
 1725         aoe_end_buf(d, buf);
 1726 }
 1727 
 1728 void
 1729 aoe_flush_iocq(void)
 1730 {
 1731     int i;
 1732 
 1733     for (i = 0; i < ncpus; i++) {
 1734         if (kts[i].active)
 1735             aoe_flush_iocq_by_index(i);
 1736     }
 1737 }
 1738 
 1739 void
 1740 aoe_flush_iocq_by_index(int id)
 1741 {
 1742     struct frame *f;
 1743     struct aoedev *d;
 1744     LIST_HEAD(flist);
 1745     struct list_head *pos;
 1746     struct sk_buff *skb;
 1747     ulong flags;
 1748 
 1749     spin_lock_irqsave(&iocq[id].lock, flags);
 1750     list_splice(&iocq[id].head, &flist);
 1751     INIT_LIST_HEAD(&iocq[id].head);
 1752     spin_unlock_irqrestore(&iocq[id].lock, flags);
 1753     while (!list_empty(&flist)) {
 1754         pos = flist.next;
 1755         list_del(pos);
 1756         f = list_entry(pos, struct frame, head);
 1757         d = f->t->d;
 1758         skb = f->r_skb;
 1759         spin_lock_irqsave(&d->lock, flags);
 1760         if (f->buf) {
 1761             f->buf->nframesout--;
 1762             aoe_failbuf(d, f->buf);
 1763         }
 1764         aoe_freetframe(f);
 1765         spin_unlock_irqrestore(&d->lock, flags);
 1766         dev_kfree_skb(skb);
 1767         aoedev_put(d);
 1768     }
 1769 }
 1770 
 1771 int __init
 1772 aoecmd_init(void)
 1773 {
 1774     void *p;
 1775     int i;
 1776     int ret;
 1777 
 1778     /* get_zeroed_page returns page with ref count 1 */
 1779     p = (void *) get_zeroed_page(GFP_KERNEL | __GFP_REPEAT);
 1780     if (!p)
 1781         return -ENOMEM;
 1782     empty_page = virt_to_page(p);
 1783 
 1784 
 1785     /* The number of ktio threads to be spawned are governed by the
 1786     * number of online cpus available in the system.
 1787     */
 1788     ncpus = num_online_cpus();
 1789 
 1790     iocq = aoe_kcalloc(ncpus, sizeof(struct iocq_ktio), GFP_KERNEL);
 1791     if (!iocq)
 1792         return -ENOMEM;
 1793 
 1794     kts = aoe_kcalloc(ncpus, sizeof(struct ktstate), GFP_KERNEL);
 1795     if (!kts) {
 1796         ret = -ENOMEM;
 1797         goto kts_fail;
 1798     }
 1799 
 1800     ktiowq = aoe_kcalloc(ncpus, sizeof(wait_queue_head_t), GFP_KERNEL);
 1801     if (!ktiowq) {
 1802         ret = -ENOMEM;
 1803         goto ktiowq_fail;
 1804     }
 1805 
 1806     mutex_init(&ktio_spawn_lock);
 1807 
 1808     for (i = 0; i < ncpus; i++) {
 1809         INIT_LIST_HEAD(&iocq[i].head);
 1810         spin_lock_init(&iocq[i].lock);
 1811         init_waitqueue_head(&ktiowq[i]);
 1812         snprintf(kts[i].name, sizeof(kts[i].name), "aoe_ktio%d", i);
 1813         kts[i].fn = ktio;
 1814         kts[i].waitq = &ktiowq[i];
 1815         kts[i].lock = &iocq[i].lock;
 1816         kts[i].id = i;
 1817         kts[i].active = 0;
 1818     }
 1819     kts[0].active = 1;
 1820     if (aoe_ktstart(&kts[0])) {
 1821         ret = -ENOMEM;
 1822         goto ktstart_fail;
 1823     }
 1824     return 0;
 1825 
 1826 ktstart_fail:
 1827     kfree(ktiowq);
 1828 ktiowq_fail:
 1829     kfree(kts);
 1830 kts_fail:
 1831     kfree(iocq);
 1832 
 1833     return ret;
 1834 }
 1835 
 1836 void
 1837 aoecmd_exit(void)
 1838 {
 1839     int i;
 1840 
 1841     for (i = 0; i < ncpus; i++)
 1842         if (kts[i].active)
 1843             aoe_ktstop(&kts[i]);
 1844 
 1845     aoe_flush_iocq();
 1846 
 1847     /* Free up the iocq and thread speicific configuration
 1848     * allocated during startup.
 1849     */
 1850     kfree(iocq);
 1851     kfree(kts);
 1852     kfree(ktiowq);
 1853 
 1854     free_page((unsigned long) page_address(empty_page));
 1855     empty_page = NULL;
 1856 }