"Fossies" - the Fresh Open Source Software Archive

Member "NetPIPE-3.7.2/src/netpipe.c" (19 Aug 2010, 49538 Bytes) of package /linux/privat/old/NetPIPE-3.7.2.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "netpipe.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 4.x_vs_3.7.2.

    1 /*****************************************************************************/
    2 /* "NetPIPE" -- Network Protocol Independent Performance Evaluator.          */
    3 /* Copyright 1997, 1998 Iowa State University Research Foundation, Inc.      */
    4 /*                                                                           */
    5 /* This program is free software; you can redistribute it and/or modify      */
    6 /* it under the terms of the GNU General Public License as published by      */
    7 /* the Free Software Foundation.  You should have received a copy of the     */
    8 /* GNU General Public License along with this program; if not, write to the  */
    9 /* Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.   */
   10 /*                                                                           */
   11 /* Files needed for use:                                                     */
   12 /*     * netpipe.c       ---- Driver source                                  */
   13 /*     * netpipe.h       ---- General include file                           */
   14 /*     * tcp.c           ---- TCP calls source                               */
   15 /*     * tcp.h           ---- Include file for TCP calls and data structs    */
   16 /*     * mpi.c           ---- MPI calls source                               */
   17 /*     * pvm.c           ---- PVM calls source                               */
   18 /*     * pvm.h           ---- Include file for PVM calls and data structs    */
   19 /*     * tcgmsg.c        ---- TCGMSG calls source                            */
   20 /*     * tcgmsg.h        ---- Include file for TCGMSG calls and data structs */
   21 /*     * udapl.c         ---- uDAPL calls source                             */
   22 /*     * knem.c          ---- knem calls source                              */
   23 /*     * vmsplice.c      ---- vmsplice calls source                          */
   24 /*****************************************************************************/
   25 
   26 #include "netpipe.h"
   27 
   28 #if defined(MPLITE)
   29 #include "mplite.h" /* Included for the malloc wrapper to protect from */
   30 #endif
   31 
   32 
   33 extern char *optarg;
   34 
   35 int main(int argc, char **argv)
   36 {
   37     FILE        *out;           /* Output data file                          */
   38     char        s[255],s2[255],delim[255],*pstr; /* Generic strings          */
   39     int         *memcache;      /* used to flush cache                       */
   40 
   41     int         len_buf_align,  /* meaningful when args.cache is 0. buflen   */
   42                                 /* rounded up to be divisible by 8           */
   43                 num_buf_align;  /* meaningful when args.cache is 0. number   */
   44                                 /* of aligned buffers in memtmp              */
   45 
   46     int         c,              /* option index                              */
   47                 i, j, n, nq,    /* Loop indices                              */
   48                 asyncReceive=0, /* Pre-post a receive buffer?                */
   49                 bufalign=16*1024,/* Boundary to align buffer to              */
   50                 errFlag,        /* Error occurred in inner testing loop      */
   51                 nrepeat,        /* Number of time to do the transmission     */
   52                 nrepeat_const=0,/* Set if we are using a constant nrepeat    */
   53                 len,            /* Number of bytes to be transmitted         */
   54                 inc=0,          /* Increment value                           */
   55                 perturbation=DEFPERT, /* Perturbation value                  */
   56                 pert,
   57                 start= 1,       /* Starting value for signature curve        */
   58                 end=MAXINT,     /* Ending value for signature curve          */
   59                 streamopt=0,    /* Streaming mode flag                       */
   60                 reset_connection,/* Reset the connection between trials      */
   61         debug_wait=0;   /* spin and wait for a debugger          */
   62    
   63     ArgStruct   args;           /* Arguments for all the calls               */
   64 
   65     double      t, t0, t1, t2,  /* Time variables                            */
   66                 tlast,          /* Time for the last transmission            */
   67                 latency;        /* Network message latency                   */
   68 
   69     Data        bwdata[NSAMP];  /* Bandwidth curve data                      */
   70 
   71     int         integCheck=0;   /* Integrity check                           */
   72 
   73     /* Initialize vars that may change from default due to arguments */
   74 
   75     strcpy(s, "np.out");   /* Default output file */
   76 
   77     /* Let modules initialize related vars, and possibly call a library init
   78        function that requires argc and argv */
   79 
   80 
   81     Init(&args, &argc, &argv);   /* This will set args.tr and args.rcv */
   82 
   83     args.preburst = 0; /* Default to not bursting preposted receives */
   84     args.bidir = 0; /* Turn bi-directional mode off initially */
   85     args.cache = 1; /* Default to use cache */
   86     args.upper = end;
   87     args.host  = NULL;
   88     args.soffset=0; /* default to no offsets */
   89     args.roffset=0; 
   90     args.syncflag=0; /* use normal mpi_send */
   91     args.use_sdp=0; /* default to no SDP */
   92     args.port = DEFPORT; /* just in case the user doesn't set this. */
   93 
   94 
   95     /* TCGMSG launches NPtcgmsg with a -master master_hostname
   96      * argument, so ignore all arguments and set them manually 
   97      * in netpipe.c instead.
   98      */
   99 
  100 #if ! defined(TCGMSG)
  101 
  102     /* Parse the arguments. See Usage for description */
  103     while ((c = getopt(argc, argv, "AXSO:rIiszgfaB2h:p:o:l:u:b:m:n:t:c:d:D:P:")) != -1)
  104     {
  105         switch(c)
  106         {
  107         case 'A':
  108               args.use_sdp=1;
  109               break;
  110             case 'O':
  111                       strcpy(s2,optarg);
  112                       strcpy(delim,",");
  113                       if((pstr=strtok(s2,delim))!=NULL) {
  114                          args.soffset=atoi(pstr);
  115                          if((pstr=strtok((char *)NULL,delim))!=NULL)
  116                             args.roffset=atoi(pstr);
  117                          else /* only got one token */
  118                             args.roffset=args.soffset;
  119                       } else {
  120                          args.soffset=0; args.roffset=0;
  121                       }
  122                       printf("Transmit buffer offset: %d\nReceive buffer offset: %d\n",args.soffset,args.roffset);
  123                       break;
  124             case 'p': perturbation = atoi(optarg);
  125                       if( perturbation > 0 ) {
  126                          printf("Using a perturbation value of %d\n\n", perturbation);
  127                       } else {
  128                          perturbation = 0;
  129                          printf("Using no perturbations\n\n");
  130                       }
  131                       break;
  132 
  133             case 'B': if(integCheck == 1) {
  134                         fprintf(stderr, "Integrity check not supported with prepost burst\n");
  135                         exit(-1);
  136                       }
  137                       args.preburst = 1;
  138                       asyncReceive = 1;
  139                       printf("Preposting all receives before a timed run.\n");
  140                       printf("Some would consider this cheating,\n");
  141                       printf("but it is needed to match some vendor tests.\n"); fflush(stdout);
  142                       break;
  143 
  144             case 'I': args.cache = 0;
  145                       printf("Performance measured without cache effects\n\n"); fflush(stdout);
  146                       break;
  147 
  148             case 'o': strcpy(s,optarg);
  149                       printf("Sending output to %s\n", s); fflush(stdout);
  150                       break;
  151 
  152             case 's': streamopt = 1;
  153                       printf("Streaming in one direction only.\n\n");
  154 #if defined(TCP) && !defined(INFINIBAND) && !defined(OPENIB) && !defined(DAT) 
  155                       printf("Sockets are reset between trials to avoid\n");
  156                       printf("degradation from a collapsing window size.\n\n");
  157 #endif
  158                       args.reset_conn = 1;
  159                       printf("Streaming does not provide an accurate\n");
  160                       printf("measurement of the latency since small\n");
  161                       printf("messages may get bundled together.\n\n");
  162                       if( args.bidir == 1 ) {
  163                         printf("You can't use -s and -2 together\n");
  164                         exit(0);
  165                       }
  166                       fflush(stdout);
  167                       break;
  168 
  169             case 'l': start = atoi(optarg);
  170                       if (start < 1)
  171                       {
  172                         fprintf(stderr,"Need a starting value >= 1\n");
  173                         exit(0);
  174                       }
  175                       break;
  176 
  177             case 'u': end = atoi(optarg);
  178                       break;
  179 
  180 #if defined(TCP) && ! defined(INFINIBAND) && !defined(OPENIB) && !defined(DAT)
  181             case 'b': /* -b # resets the buffer size, -b 0 keeps system defs */
  182                       args.prot.sndbufsz = args.prot.rcvbufsz = atoi(optarg);
  183                       break;
  184 #endif
  185 
  186             case '2': args.bidir = 1;    /* Both procs are transmitters */
  187                          /* end will be maxed at sndbufsz+rcvbufsz */
  188                       printf("Passing data in both directions simultaneously.\n");
  189                       printf("Output is for the combined bandwidth.\n");
  190 #if defined(TCP) && ! defined(INFINIBAND) && !defined(OPENIB) && !defined(DAT)
  191                       printf("The socket buffer size limits the maximum test size.\n\n");
  192 #endif
  193                       if( streamopt ) {
  194                         printf("You can't use -s and -2 together\n");
  195                         exit(0);
  196                       }
  197                       break;
  198 
  199             case 'h': args.tr = 1;       /* -h implies transmit node */
  200                       args.rcv = 0;
  201                       args.host = (char *)malloc(strlen(optarg)+1);
  202                       strcpy(args.host, optarg);
  203                       break;
  204 
  205 #ifdef DISK
  206             case 'd': args.tr = 1;      /* -d to specify input/output file */
  207                       args.rcv = 0;
  208                       args.prot.read = 0;
  209                       args.prot.read_type = 'c';
  210                       args.prot.dfile_name = (char *)malloc(strlen(optarg)+1);
  211                       strcpy(args.prot.dfile_name, optarg);
  212                       break;
  213 
  214             case 'D': if( optarg[0] == 'r' )
  215                          args.prot.read = 1;
  216                       else
  217                          args.prot.read = 0;
  218                       args.prot.read_type = optarg[1];
  219                       break;
  220 #endif
  221 
  222             case 'i': if(args.preburst == 1) {
  223                         fprintf(stderr, "Integrity check not supported with prepost burst\n");
  224                         exit(-1);
  225                       }
  226                       integCheck = 1;
  227                       perturbation = 0;
  228                       start = sizeof(int)+1; /* Start with integer size */
  229                       printf("Doing an integrity check instead of measuring performance\n"); fflush(stdout);
  230                       break;
  231 
  232 #if defined(MPI)
  233             case 'z': args.source_node = -1;
  234                       printf("Receive using the ANY_SOURCE flag\n"); fflush(stdout);
  235                       break;
  236 
  237             case 'a': asyncReceive = 1;
  238                       printf("Preposting asynchronous receives\n"); fflush(stdout);
  239                       break;
  240 
  241             case 'S': args.syncflag=1;
  242                       fprintf(stderr,"Using synchronous sends\n");
  243                       break;
  244 #endif
  245 #if defined(MPI2)
  246             case 'g': if(args.prot.no_fence == 1) {
  247                         fprintf(stderr, "-f cannot be used with -g\n");
  248                         exit(-1);
  249                       } 
  250                       args.prot.use_get = 1;
  251                       printf("Using MPI-2 Get instead of Put\n");
  252                       break;
  253 
  254             case 'f': if(args.prot.use_get == 1) {
  255                          fprintf(stderr, "-f cannot be used with -g\n");
  256                          exit(-1);
  257                       }
  258                       args.prot.no_fence = 1;
  259                       bufalign = 0;
  260                       printf("Buffer alignment off (Required for no fence)\n");
  261                       break;
  262 #endif /* MPI2 */
  263 
  264 #if defined(INFINIBAND) && !defined(DAT)
  265             case 'm': switch(atoi(optarg)) {
  266                         case 256: args.prot.ib_mtu = MTU256;
  267                           break;
  268                         case 512: args.prot.ib_mtu = MTU512;
  269                           break;
  270                         case 1024: args.prot.ib_mtu = MTU1024;
  271                           break;
  272                         case 2048: args.prot.ib_mtu = MTU2048;
  273                           break;
  274                         case 4096: args.prot.ib_mtu = MTU4096;
  275                           break;
  276                         default: 
  277                           fprintf(stderr, "Invalid MTU size, must be one of "
  278                                           "256, 512, 1024, 2048, 4096\n");
  279                           exit(-1);
  280                       }
  281                       break;
  282 #endif
  283 
  284 #if defined(OPENIB) && !defined(DAT)
  285             case 'm': switch(atoi(optarg)) {
  286                         case 256: args.prot.ib_mtu = IBV_MTU_256;
  287                           break;
  288                         case 512: args.prot.ib_mtu = IBV_MTU_512;
  289                           break;
  290                         case 1024: args.prot.ib_mtu = IBV_MTU_1024;
  291                           break;
  292                         case 2048: args.prot.ib_mtu = IBV_MTU_2048;
  293                           break;
  294                         case 4096: args.prot.ib_mtu = IBV_MTU_4096;
  295                           break;
  296                         default: 
  297                           fprintf(stderr, "Invalid MTU size, must be one of "
  298                                           "256, 512, 1024, 2048, 4096\n");
  299                           exit(-1);
  300                       }
  301                       break;
  302 #endif
  303 
  304 #if defined(OPENIB)
  305             case 'D': args.prot.device_and_port = strdup(optarg);
  306                       break;
  307 #endif
  308 
  309 #if ( defined(OPENIB) || defined(INFINIBAND) ) && !defined(DAT)
  310             case 't': if( !strcmp(optarg, "send_recv") ) {
  311                          printf("Using Send/Receive communications\n");
  312                          args.prot.commtype = NP_COMM_SENDRECV;
  313                       } else if( !strcmp(optarg, "send_recv_with_imm") ) {
  314                          printf("Using Send/Receive communications with immediate data\n");
  315                          args.prot.commtype = NP_COMM_SENDRECV_WITH_IMM;
  316                       } else if( !strcmp(optarg, "rdma_write") ) {
  317                          printf("Using RDMA Write communications\n");
  318                          args.prot.commtype = NP_COMM_RDMAWRITE;
  319                       } else if( !strcmp(optarg, "rdma_write_with_imm") ) {
  320                          printf("Using RDMA Write communications with immediate data\n");
  321                          args.prot.commtype = NP_COMM_RDMAWRITE_WITH_IMM;
  322                       } else {
  323                          fprintf(stderr, "Invalid transfer type "
  324                                  "specified, please choose one of:\n\n"
  325                                  "\tsend_recv\t\tUse Send/Receive communications\t(default)\n"
  326                                  "\tsend_recv_with_imm\tSame as above with immediate data\n"
  327                                  "\trdma_write\t\tUse RDMA Write communications\n"
  328                                  "\trdma_write_with_imm\tSame as above with immediate data\n\n");
  329                          exit(-1);
  330                       }
  331                       break;
  332 
  333             case 'c': if( !strcmp(optarg, "local_poll") ) {
  334                          printf("Using local polling completion\n");
  335                          args.prot.comptype = NP_COMP_LOCALPOLL;
  336                       } else if( !strcmp(optarg, "vapi_poll") ) {
  337                          printf("Using VAPI polling completion\n");
  338                          args.prot.comptype = NP_COMP_VAPIPOLL;
  339                       } else if( !strcmp(optarg, "event") ) {
  340                          printf("Using VAPI event completion\n");
  341                          args.prot.comptype = NP_COMP_EVENT;
  342                       } else {
  343                          fprintf(stderr, "Invalid completion type specified, "
  344                                  "please choose one of:\n\n"
  345                                  "\tlocal_poll\tWait for last byte of data\t(default)\n"
  346                                  "\tvapi_poll\tUse VAPI polling function\n"
  347                                  "\tevent\t\tUse VAPI event handling function\n\n");
  348                          exit(-1);
  349                       }
  350                       break;
  351 #endif
  352 
  353 #if defined(DAT)
  354             case 't': if( !strcmp(optarg, "send_recv") ) {
  355                          printf("Using Send/Receive communications\n");
  356                          args.prot.commtype = NP_COMM_SENDRECV;
  357                       } else if( !strcmp(optarg, "rdma_write") ) {
  358                          printf("Using RDMA Write communications\n");
  359                          args.prot.commtype = NP_COMM_RDMAWRITE;
  360                       } else {
  361                          fprintf(stderr, "Invalid transfer type "
  362                                  "specified, please choose one of:\n\n"
  363                                  "\tsend_recv\t\tUse Send/Receive communications\t(default)\n"
  364                                  "\trdma_write\t\tUse RDMA Write communications\n");
  365                          exit(-1);
  366                       }
  367                       break;
  368 
  369             case 'c': if( !strcmp(optarg, "local_poll") ) {
  370                          printf("Using local polling completion\n");
  371                          args.prot.comptype = NP_COMP_LOCALPOLL;
  372                       } else if( !strcmp(optarg, "dq_poll") ) {
  373                          printf("Using EVD Dequeue to poll for completion\n");
  374                          args.prot.comptype = NP_COMP_DQPOLL;
  375                       } else if( !strcmp(optarg, "evd_wait") ) {
  376                          printf("Using EVD Wait for completion\n");
  377                          args.prot.comptype = NP_COMP_EVD;
  378                       } else if( !strcmp(optarg, "cno_wait") ) {
  379                          printf("Using CNO Wait for completion\n");
  380                          args.prot.comptype = NP_COMP_CNO;
  381                       } else {
  382                          fprintf(stderr, "Invalid completion type specified, "
  383                                  "please choose one of:\n\n"
  384                                  "\tlocal_poll\tWait for last byte of data (default)\n"
  385                                  "\tdq_poll\t\tUse EVD Dequeue to poll for completion\n"
  386                                  "\tevd_wait\tUse EVD wait for completion events\n"
  387                                  "\tcno_wait\tUse CNO wait for completion events\n\n");
  388                          exit(-1);
  389                       }
  390                       break;
  391 #endif
  392 
  393 #if defined(KNEM)
  394             case 't': if( !strcasecmp(optarg, "copy") ) {
  395                          printf("Using software copy mode\n");
  396                          args.prot.flags = 0;
  397                       } else if( !strcasecmp(optarg, "dma") ) {
  398                          printf("Using DMA copy mode\n");
  399                          args.prot.flags = KNEM_FLAG_DMA;
  400                       } else {
  401                          fprintf(stderr, "Invalid transfer type "
  402                                  "specified, please choose one of:\n\n"
  403                                  "\tcopy\t\tUse software copy mode\t(default)\n"
  404                                  "\tdma\t\tUse DMA copy mode\n");
  405                          exit(-1);
  406                       }
  407               break;
  408 
  409 #endif
  410 
  411         case 'P': 
  412               args.port = atoi(optarg);
  413               break;
  414 
  415             case 'n': nrepeat_const = atoi(optarg);
  416                       break;
  417 
  418 #if defined(TCP) && ! defined(INFINIBAND) && !defined(OPENIB)
  419             case 'r': args.reset_conn = 1;
  420                       printf("Resetting connection after every trial\n");
  421                       break;
  422 #endif
  423         case 'X': debug_wait = 1;
  424               printf("Enableing debug wait!\n");
  425               printf("Attach to pid %d and set debug_wait to 0 to conttinue\n", getpid());
  426               break;
  427 
  428             default: 
  429                      PrintUsage(); 
  430                      exit(-12);
  431        }
  432    }
  433 
  434    while(debug_wait){
  435        for(i=0;i<10000;i++){};
  436     };
  437 #endif /* ! defined TCGMSG */
  438 
  439 #if defined(OPENIB) || defined(INFINIBAND) || defined(DAT)
  440 
  441 #if defined(DAT)
  442    // if using RDMA_WRITE, no need to prepost receives since no receive requests are 
  443    // consumed by the RDMA_WRITE operations. however, if using a completion notification
  444    // method other than LOCAL_POLL along with RDMA_WRITES, the handshake receive packet
  445    // will need to be preposted.
  446    if ( (NP_COMM_RDMAWRITE == args.prot.commtype) && (NP_COMP_LOCALPOLL == args.prot.comptype))
  447    {
  448        asyncReceive = 0;
  449    } 
  450    else
  451    {
  452        asyncReceive = 1;
  453        fprintf(stderr, "Preposting asynchronous receive.\n");
  454    }
  455 #else
  456    asyncReceive = 1;
  457    fprintf(stderr, "Preposting asynchronous receives (required for Infiniband)\n");
  458 #endif
  459 
  460    if(args.bidir && (
  461           (args.cache && args.prot.commtype == NP_COMM_RDMAWRITE) || /* rdma_write only works with no-cache mode */
  462           (!args.preburst && args.prot.commtype != NP_COMM_RDMAWRITE) || /* anything besides rdma_write requires prepost burst */
  463           (args.preburst && args.prot.comptype == NP_COMP_LOCALPOLL && args.cache) || /* preburst with local polling in cache mode doesn't work */
  464           0)) {
  465 
  466       fprintf(stderr, 
  467          "\n"
  468          "Bi-directional mode currently only works with a subset of the\n"
  469          "Infiniband options. Restrictions are:\n"
  470          "\n"
  471          "  RDMA write (-t rdma_write) requires no-cache mode (-I).\n"
  472          "\n"
  473          "  Local polling (-c local_poll, default if no -c given) requires\n"
  474          "    no-cache mode (-I), and if not using RDMA write communication,\n"
  475          "    burst mode (-B).\n"
  476          "\n"
  477          "  Any other communication type and any other completion type\n"
  478          "    require burst mode (-B). No-cache mode (-I) may be used\n"
  479          "    optionally.\n"
  480          "\n"
  481          "  All other option combinations will fail.\n"
  482          "\n");
  483                
  484       exit(-1);      
  485 
  486    }
  487 #endif
  488 
  489    if (start > end)
  490    {
  491        fprintf(stderr, "Start MUST be LESS than end\n");
  492        exit(420132);
  493    }
  494    args.nbuff = TRIALS;
  495 
  496    Setup(&args);
  497 
  498    if( args.bidir && end > args.upper ) {
  499       end = args.upper;
  500       if( args.tr ) {
  501          printf("The upper limit is being set to %d Bytes\n", end);
  502 #if defined(TCP) && ! defined(INFINIBAND) && !defined(OPENIB) && !defined(DAT)
  503          printf("due to socket buffer size limitations\n\n");
  504 #endif
  505    }  }
  506 
  507 #if defined(GM)
  508 
  509    if(streamopt && (!nrepeat_const || nrepeat_const > args.prot.num_stokens)) {
  510      printf("\nGM is currently limited by the driver software to %d\n", 
  511             args.prot.num_stokens);
  512      printf("outstanding sends. The number of repeats will be set\n");
  513      printf("to this limit for every trial in streaming mode.  You\n");
  514      printf("may use the -n switch to set a smaller number of repeats\n\n");
  515 
  516      nrepeat_const = args.prot.num_stokens;
  517    }
  518 
  519 #endif
  520 
  521    if( args.tr )                     /* Primary transmitter */
  522    {
  523        if ((out = fopen(s, "w")) == NULL)
  524        {
  525            fprintf(stderr,"Can't open %s for output\n", s);
  526            exit(1);
  527        }
  528    }
  529    else out = stdout;
  530 
  531       /* Set a starting value for the message size increment. */
  532 
  533    inc = (start > 1) ? start / 2 : 1;
  534    nq = (start > 1) ? 1 : 0;
  535 
  536       /* Test the timing to set tlast for the first test */
  537 
  538    args.bufflen = start;
  539    MyMalloc(&args, args.bufflen, 0, 0);
  540    InitBufferData(&args, args.bufflen, 0, 0);
  541 
  542    if(args.cache) args.s_buff = args.r_buff;
  543    
  544    args.r_ptr = args.r_buff_orig = args.r_buff;
  545    args.s_ptr = args.s_buff_orig = args.s_buff;
  546       
  547    AfterAlignmentInit(&args);  /* MPI-2 needs this to create a window */
  548 
  549    /* Infiniband requires use of asynchronous communications, so we need
  550     * the PrepareToReceive calls below
  551     */
  552    if( asyncReceive )
  553       PrepareToReceive(&args);
  554    
  555    Sync(&args);    /* Sync to prevent race condition in armci module */
  556 
  557    /* For simplicity's sake, even if the real test below will be done in
  558     * bi-directional mode, we still do the ping-pong one-way-at-a-time test
  559     * here to estimate the one-way latency. Unless it takes significantly
  560     * longer to send data in both directions at once than it does to send data
  561     * one way at a time, this shouldn't be too far off anyway.
  562     */
  563    t0 = When();
  564       for( n=0; n<100; n++) {
  565          if( args.tr) {
  566             SendData(&args);
  567             RecvData(&args);
  568             if( asyncReceive && n<99 )
  569                PrepareToReceive(&args);
  570          } else if( args.rcv) {
  571             RecvData(&args);
  572             if( asyncReceive && n<99 )
  573                PrepareToReceive(&args);
  574             SendData(&args);
  575          }
  576       }
  577    tlast = (When() - t0)/200;
  578 
  579    /* Sync up and Reset before freeing the buffers */
  580 
  581    Sync(&args); 
  582 
  583    Reset(&args);
  584    
  585    /* Free the buffers and any other module-specific resources. */
  586    if(args.cache)
  587       FreeBuff(args.r_buff_orig, NULL);
  588    else
  589       FreeBuff(args.r_buff_orig, args.s_buff_orig);
  590 
  591       /* Do setup for no-cache mode, using two distinct buffers. */
  592 
  593    if (!args.cache)
  594    {
  595 
  596        /* Allocate dummy pool of memory to flush cache with */
  597 
  598        if ( (memcache = (int *)malloc(MEMSIZE)) == NULL)
  599        {
  600            perror("malloc");
  601            exit(1);
  602        }
  603        mymemset(memcache, 0, MEMSIZE/sizeof(int)); 
  604 
  605        /* Allocate large memory pools */
  606 
  607        MyMalloc(&args, MEMSIZE+bufalign, args.soffset, args.roffset); 
  608 
  609        /* Save buffer addresses */
  610        
  611        args.s_buff_orig = args.s_buff;
  612        args.r_buff_orig = args.r_buff;
  613 
  614        /* Align buffers */
  615 
  616        args.s_buff = AlignBuffer(args.s_buff, bufalign);
  617        args.r_buff = AlignBuffer(args.r_buff, bufalign);
  618 
  619        /* Post alignment initialization */
  620 
  621        AfterAlignmentInit(&args);
  622 
  623        /* Initialize send buffer pointer */
  624        
  625 /* both soffset and roffset should be zero if we don't have any offset stuff, so this should be fine */
  626        args.s_ptr = args.s_buff+args.soffset;
  627        args.r_ptr = args.r_buff+args.roffset;
  628    }
  629 
  630        /**************************
  631         * Main loop of benchmark *
  632         **************************/
  633 
  634    if( args.tr ) fprintf(stderr,"Now starting the main loop\n");
  635 
  636    for ( n = 0, len = start, errFlag = 0; 
  637         n < NSAMP - 3 && tlast < STOPTM && len <= end && !errFlag; 
  638         len = len + inc, nq++ )
  639    {
  640 
  641            /* Exponentially increase the block size.  */
  642 
  643        if (nq > 2) inc = ((nq % 2))? inc + inc: inc;
  644        
  645           /* This is a perturbation loop to test nearby values */
  646 
  647        for (pert = ((perturbation > 0) && (inc > perturbation+1)) ? -perturbation : 0;
  648             pert <= perturbation; 
  649             n++, pert += ((perturbation > 0) && (inc > perturbation+1)) ? perturbation : perturbation+1)
  650        {
  651 
  652            Sync(&args);    /* Sync to prevent race condition in armci module */
  653 
  654                /* Calculate how many times to repeat the experiment. */
  655 
  656            if( args.tr )
  657            {
  658                if (nrepeat_const) {
  659                    nrepeat = nrepeat_const;
  660 /*               } else if (len == start) {*/
  661 /*                   nrepeat = MAX( RUNTM/( 0.000020 + start/(8*1000) ), TRIALS);*/
  662                } else {
  663                    nrepeat = MAX((RUNTM / (((double)args.bufflen /
  664                                   (args.bufflen - inc + 1.0)) * tlast)),TRIALS);
  665                }
  666                SendRepeat(&args, nrepeat);
  667            }
  668            else if( args.rcv )
  669            {
  670                RecvRepeat(&args, &nrepeat);
  671            }
  672 
  673            args.bufflen = len + pert;
  674 
  675            if( args.tr )
  676                fprintf(stderr,"%3d: %7d bytes %6d times --> ",
  677                        n,args.bufflen,nrepeat);
  678 
  679            if (args.cache) /* Allow cache effects.  We use only one buffer */
  680            {
  681                /* Allocate the buffer with room for alignment*/
  682 
  683                MyMalloc(&args, args.bufflen+bufalign, args.soffset, args.roffset); 
  684 
  685                /* Save buffer address */
  686 
  687                args.r_buff_orig = args.r_buff;
  688                args.s_buff_orig = args.r_buff;
  689 
  690                /* Align buffer */
  691 
  692                args.r_buff = AlignBuffer(args.r_buff, bufalign);
  693                args.s_buff = args.r_buff;
  694                
  695                /* Initialize buffer with data
  696                 *
  697                 * NOTE: The buffers should be initialized with some sort of
  698                 * valid data, whether it is actually used for anything else,
  699                 * to get accurate results.  Performance increases noticeably
  700                 * if the buffers are left uninitialized, but this does not
  701                 * give very useful results as realworld apps tend to actually
  702                 * have data stored in memory.  We are not sure what causes
  703                 * the difference in performance at this time.
  704                 */
  705 
  706                InitBufferData(&args, args.bufflen, args.soffset, args.roffset);
  707 
  708 
  709                /* Post-alignment initialization */
  710 
  711                AfterAlignmentInit(&args);
  712 
  713                /* Initialize buffer pointers (We use r_ptr and s_ptr for
  714                 * compatibility with no-cache mode, as this makes the code
  715                 * simpler) 
  716                 */
  717                /* offsets are zero by default so this saves an #ifdef */
  718                args.r_ptr = args.r_buff+args.roffset;
  719                args.s_ptr = args.r_buff+args.soffset;
  720 
  721            }
  722            else /* Eliminate cache effects.  We use two distinct buffers */
  723            {
  724 
  725                /* this isn't truly set up for offsets yet */
  726                /* Size of an aligned memory block including trailing padding */
  727 
  728                len_buf_align = args.bufflen;
  729                if(bufalign != 0)
  730                  len_buf_align += bufalign - args.bufflen % bufalign;
  731  
  732                /* Initialize the buffers with data
  733                 *
  734                 * See NOTE above.
  735                 */
  736                InitBufferData(&args, MEMSIZE, args.soffset, args.roffset); 
  737                
  738 
  739                /* Reset buffer pointers to beginning of pools */
  740                args.r_ptr = args.r_buff+args.roffset;
  741                args.s_ptr = args.s_buff+args.soffset;
  742             }
  743 
  744             bwdata[n].t = LONGTIME;
  745 /*            t2 = t1 = 0;*/
  746 
  747             /* Finally, we get to transmit or receive and time */
  748 
  749             /* NOTE: If a module is running that uses only one process (e.g.
  750              * memcpy), we assume that it will always have the args.tr flag
  751              * set.  Thus we make some special allowances in the transmit 
  752              * section that are not in the receive section.
  753              */
  754 
  755             if( args.tr || args.bidir )
  756             {
  757                 /*
  758                    This is the transmitter: send the block TRIALS times, and
  759                    if we are not streaming, expect the receiver to return each
  760                    block.
  761                 */
  762 
  763                 for (i = 0; i < (integCheck ? 1 : TRIALS); i++)
  764                 {                    
  765                     if(args.preburst && asyncReceive && !streamopt)
  766                     {
  767 
  768                       /* We need to save the value of the recv ptr so
  769                        * we can reset it after we do the preposts, in case
  770                        * the module needs to use the same ptr values again
  771                        * so it can wait on the last byte to change to indicate
  772                        * the recv is finished.
  773                        */
  774 
  775                       SaveRecvPtr(&args);
  776 
  777                       for(j=0; j<nrepeat; j++)
  778                       {
  779                         PrepareToReceive(&args);
  780                         if(!args.cache)
  781                           AdvanceRecvPtr(&args, len_buf_align);
  782                       }
  783 
  784                       ResetRecvPtr(&args);
  785                     }
  786 
  787                     /* Flush the cache using the dummy buffer */
  788                     if (!args.cache)
  789                       flushcache(memcache, MEMSIZE/sizeof(int));
  790 
  791                     Sync(&args);
  792 
  793                     t0 = When();
  794 
  795                     for (j = 0; j < nrepeat; j++)
  796                     {
  797                         if (!args.preburst && asyncReceive && !streamopt)
  798                         {
  799                             PrepareToReceive(&args);
  800                         }
  801 
  802                         if (integCheck) SetIntegrityData(&args);
  803 
  804                         SendData(&args);
  805 
  806                         if (!streamopt)
  807                         {
  808                             RecvData(&args);
  809 
  810                             if (integCheck) VerifyIntegrity(&args);
  811 
  812                             if(!args.cache)
  813                               AdvanceRecvPtr(&args, len_buf_align);
  814 
  815                         }
  816                         
  817                         /* Wait to advance send pointer in case RecvData uses
  818                          * it (e.g. memcpy module).
  819                          */
  820                         if (!args.cache)
  821                           AdvanceSendPtr(&args, len_buf_align);
  822 
  823                     }
  824 
  825                        /* t is the 1-directional trasmission time */
  826 
  827                     t = (When() - t0)/ nrepeat;
  828 
  829                     if( !streamopt && !args.bidir) t /= 2; /* Normal ping-pong */
  830 
  831                     Reset(&args);
  832 
  833 /* NOTE: NetPIPE does each data point TRIALS times, bouncing the message
  834  * nrepeats times for each trial, then reports the lowest of the TRIALS
  835  * times.  -Dave Turner
  836  */
  837                     bwdata[n].t = MIN(bwdata[n].t, t);
  838 /*                    t1 += t;*/
  839 /*                    t2 += t*t;*/
  840                 }
  841 
  842                 if (streamopt){  /* Get time info from Recv node */
  843                     RecvTime(&args, &bwdata[n].t);
  844 /*                    RecvTime(&args, &t1);*/
  845 /*                    RecvTime(&args, &t2);*/
  846                 }
  847 
  848                    /* Calculate variance after completing this set of trials */
  849 
  850 /*                bwdata[n].variance = t2/TRIALS - t1/TRIALS * t1/TRIALS;*/
  851 
  852             }
  853             else if( args.rcv )
  854             {
  855                 /*
  856                    This is the receiver: receive the block TRIALS times, and
  857                    if we are not streaming, send the block back to the
  858                    sender.
  859                 */
  860                 for (i = 0; i < (integCheck ? 1 : TRIALS); i++)
  861                 {
  862                     if (asyncReceive)
  863                     {
  864                        if (args.preburst)
  865                        {
  866 
  867                          /* We need to save the value of the recv ptr so
  868                           * we can reset it after we do the preposts, in case
  869                           * the module needs to use the same ptr values again
  870                           * so it can wait on the last byte to change to 
  871                           * indicate the recv is finished.
  872                           */
  873 
  874                          SaveRecvPtr(&args);
  875 
  876                          for (j=0; j < nrepeat; j++)
  877                          {
  878                               PrepareToReceive(&args);
  879                               if (!args.cache)
  880                                  AdvanceRecvPtr(&args, len_buf_align);
  881                          }
  882                          
  883                          ResetRecvPtr(&args);
  884                          
  885                        }
  886                        else
  887                        {
  888                            PrepareToReceive(&args);
  889                        }
  890                       
  891                     }
  892                     
  893                     /* Flush the cache using the dummy buffer */
  894                     if (!args.cache)
  895                       flushcache(memcache, MEMSIZE/sizeof(int));
  896 
  897                     Sync(&args);
  898 
  899                     t0 = When();
  900                     for (j = 0; j < nrepeat; j++)
  901                     {
  902                         RecvData(&args);
  903 
  904                         if (integCheck) VerifyIntegrity(&args);
  905 
  906                         if (!args.cache)
  907                         { 
  908                             AdvanceRecvPtr(&args, len_buf_align);
  909                         }
  910                         
  911                         if (!args.preburst && asyncReceive && (j < nrepeat-1))
  912                         {
  913                             PrepareToReceive(&args);
  914                         }
  915 
  916                         if (!streamopt)
  917                         {
  918                             if (integCheck) SetIntegrityData(&args);
  919                             
  920                             SendData(&args);
  921 
  922                             if(!args.cache) 
  923                               AdvanceSendPtr(&args, len_buf_align);
  924                         }
  925 
  926                     }
  927                     t = (When() - t0)/ nrepeat;
  928 
  929                     if( !streamopt && !args.bidir) t /= 2; /* Normal ping-pong */
  930 
  931                     Reset(&args);
  932                     
  933                     bwdata[n].t = MIN(bwdata[n].t, t);
  934 /*                    t1 += t;*/
  935 /*                    t2 += t*t;*/
  936                 }
  937                 if (streamopt){  /* Recv proc calcs time and sends to Trans */
  938                     SendTime(&args, &bwdata[n].t);
  939 /*                    SendTime(&args, &t1);*/
  940 /*                    SendTime(&args, &t2);*/
  941                 }
  942             }
  943             else  /* Just going along for the ride */
  944             {
  945                 for (i = 0; i < (integCheck ? 1 : TRIALS); i++)
  946                 {
  947                     Sync(&args);
  948                 }
  949             }
  950 
  951             /* Streaming mode doesn't really calculate correct latencies
  952              * for small message sizes, and on some nics we can get
  953              * zero second latency after doing the math.  Protect against
  954              * this.
  955              */
  956             if(bwdata[n].t == 0.0) {
  957               bwdata[n].t = 0.000001;
  958             }
  959             
  960             tlast = bwdata[n].t;
  961             bwdata[n].bits = args.bufflen * CHARSIZE * (1+args.bidir);
  962             bwdata[n].bps = bwdata[n].bits / (bwdata[n].t * 1024 * 1024);
  963             bwdata[n].repeat = nrepeat;
  964             
  965             if (args.tr)
  966             {
  967                 if(integCheck) {
  968                   fprintf(out,"%8d %d", bwdata[n].bits / 8, nrepeat);
  969 
  970                 } else {
  971                   fprintf(out,"%8d %lf %12.8lf",
  972                         bwdata[n].bits / 8, bwdata[n].bps, bwdata[n].t);
  973 
  974                 }
  975                 fprintf(out, "\n");
  976                 fflush(out);
  977             }
  978     
  979             /* Free using original buffer addresses since we may have aligned
  980                r_buff and s_buff */
  981 
  982             if (args.cache)
  983                 FreeBuff(args.r_buff_orig, NULL);
  984             
  985             if ( args.tr ) {
  986                if(integCheck) {
  987                  fprintf(stderr, " Integrity check passed\n");
  988 
  989                } else {
  990                  fprintf(stderr," %8.2lf Mbps in %10.2lf usec\n", 
  991                          bwdata[n].bps, tlast*1.0e6);
  992                }
  993             }
  994 
  995 
  996         } /* End of perturbation loop */
  997 
  998     } /* End of main loop  */
  999  
 1000    /* Free using original buffer addresses since we may have aligned
 1001       r_buff and s_buff */
 1002 
 1003    if (!args.cache) {
 1004         FreeBuff(args.s_buff_orig, args.r_buff_orig);
 1005    }
 1006     if (args.tr) fclose(out);
 1007          
 1008     CleanUp(&args);
 1009     return 0;
 1010 }
 1011 
 1012 
 1013 /* Return the current time in seconds, using a double precision number.      */
 1014 double When()
 1015 {
 1016     struct timeval tp;
 1017     gettimeofday(&tp, NULL);
 1018     return ((double) tp.tv_sec + (double) tp.tv_usec * 1e-6);
 1019 }
 1020 
 1021 /* 
 1022  * The mymemset() function fills the first n integers of the memory area 
 1023  * pointed to by ptr with the constant integer c. 
 1024  */
 1025 void mymemset(int *ptr, int c, int n)  
 1026 {
 1027     int i;
 1028 
 1029     for (i = 0; i < n; i++) 
 1030         *(ptr + i) = c;
 1031 }
 1032 
 1033 /* Read the first n integers of the memmory area pointed to by ptr, to flush  
 1034  * out the cache   
 1035  */
 1036 void flushcache(int *ptr, int n)
 1037 {
 1038    static int flag = 0;
 1039    int    i; 
 1040 
 1041    flag = (flag + 1) % 2; 
 1042    if ( flag == 0) 
 1043        for (i = 0; i < n; i++)
 1044            *(ptr + i) = *(ptr + i) + 1;
 1045    else
 1046        for (i = 0; i < n; i++) 
 1047            *(ptr + i) = *(ptr + i) - 1; 
 1048     
 1049 }
 1050 
 1051 /* For integrity check, set each integer-sized block to the next consecutive
 1052  * integer, starting with the value 0 in the first block, and so on.  Earlier
 1053  * we made sure the memory allocated for the buffer is of size i*sizeof(int) +
 1054  * 1 so there is an extra byte that can be used as a flag to detect the end
 1055  * of a receive.
 1056  */
 1057 void SetIntegrityData(ArgStruct *p)
 1058 {
 1059   int i;
 1060   int num_segments;
 1061 
 1062   num_segments = p->bufflen / sizeof(int);
 1063 
 1064   for(i=0; i<num_segments; i++) {
 1065 
 1066     *( (int*)p->s_ptr + i ) = i;
 1067 
 1068   }
 1069 }
 1070 
 1071 void VerifyIntegrity(ArgStruct *p)
 1072 {
 1073   int i;
 1074   int num_segments;
 1075   int integrityVerified = 1;
 1076 
 1077   num_segments = p->bufflen / sizeof(int);
 1078 
 1079   for(i=0; i<num_segments; i++) {
 1080 
 1081     if( *( (int*)p->r_ptr + i )  != i ) {
 1082 
 1083       integrityVerified = 0;
 1084       break;
 1085 
 1086     }
 1087 
 1088   }
 1089 
 1090 
 1091   if(!integrityVerified) {
 1092     
 1093     fprintf(stderr, "Integrity check failed: Expecting %d but received %d\n",
 1094             i, *( (int*)p->r_ptr + i ) );
 1095 
 1096     /* Dump argstruct */
 1097     /*
 1098     fprintf(stderr, " args struct:\n");
 1099     fprintf(stderr, "  r_buff_orig %p [%c%c%c...]\n", p->r_buff_orig, p->r_buff_orig[i], p->r_buff_orig[i+1], p->r_buff_orig[i+2]);
 1100     fprintf(stderr, "  r_buff      %p [%c%c%c...]\n", p->r_buff,      p->r_buff[i],      p->r_buff[i+1],      p->r_buff[i+2]);
 1101     fprintf(stderr, "  r_ptr       %p [%c%c%c...]\n", p->r_ptr,       p->r_ptr[i],       p->r_ptr[i+1],       p->r_ptr[i+2]);
 1102     fprintf(stderr, "  s_buff_orig %p [%c%c%c...]\n", p->s_buff_orig, p->s_buff_orig[i], p->s_buff_orig[i+1], p->s_buff_orig[i+2]);
 1103     fprintf(stderr, "  s_buff      %p [%c%c%c...]\n", p->s_buff,      p->s_buff[i],      p->s_buff[i+1],      p->s_buff[i+2]);
 1104     fprintf(stderr, "  s_ptr       %p [%c%c%c...]\n", p->s_ptr,       p->s_ptr[i],       p->s_ptr[i+1],       p->s_ptr[i+2]);
 1105     */
 1106     exit(-1);
 1107 
 1108   }
 1109 
 1110 }  
 1111     
 1112 void PrintUsage()
 1113 {
 1114     printf("\n NETPIPE USAGE \n\n");
 1115 #if ! defined(INFINIBAND) && !defined(OPENIB) && !defined(DAT)
 1116     printf("a: asynchronous receive (a.k.a. preposted receive)\n");
 1117 #endif
 1118     printf("B: burst all preposts before measuring performance\n");
 1119 #if (defined(TCP) || defined(TCP6)) && ! defined(INFINIBAND) && !defined(DAT)
 1120     printf("b: specify TCP send/receive socket buffer sizes\n");
 1121 #endif
 1122 
 1123 #if defined(INFINIBAND) || defined(OPENIB)
 1124     printf("c: specify type of completion <-c type>\n"
 1125            "   valid types: local_poll, vapi_poll, event\n"
 1126            "   default: local_poll\n");
 1127 #endif
 1128 
 1129 #if defined(DAT)
 1130     printf("c: specify type of completion <-c type>\n"
 1131            "   valid types: local_poll, dq_poll, evd_wait, cno_wait\n"
 1132            "   default: local_poll\n");
 1133 #endif
 1134     
 1135 #if defined(MPI2)
 1136     printf("g: use get instead of put\n");
 1137     printf("f: do not use fence during timing segment; may not work with\n");
 1138     printf("   all MPI-2 implementations\n");
 1139 #endif
 1140 
 1141 #if defined(TCP) || defined(TCP6) || defined(SCTP) || defined(SCTP6) || defined(INFINIBAND) || defined(OPENIB) || defined(DAT)
 1142     printf("h: specify hostname of the receiver <-h host>\n");
 1143 #endif
 1144 
 1145     printf("I: Invalidate cache (measure performance without cache effects).\n"
 1146            "   This simulates data coming from main memory instead of cache.\n");
 1147     printf("i: Do an integrity check instead of measuring performance\n");
 1148     printf("l: lower bound start value e.g. <-l 1>\n");
 1149 
 1150 #if defined(INFINIBAND) || defined(OPENIB)
 1151     printf("m: set MTU for Infiniband adapter <-m mtu_size>\n");
 1152     printf("   valid sizes: 256, 512, 1024, 2048, 4096 (default 1024)\n");
 1153 #endif
 1154 
 1155     printf("n: Set a constant value for number of repeats <-n 50>\n");
 1156     printf("o: specify output filename <-o filename>\n");
 1157     printf("O: specify transmit and optionally receive buffer offsets <-O 1,3>\n");
 1158     printf("p: set the perturbation number <-p 1>\n"
 1159            "   (default = 3 Bytes, set to 0 for no perturbations)\n");
 1160 
 1161 #if (defined(TCP) || defined(TCP6) || defined(SCTP) || defined(SCTP6)) && ! defined(INFINIBAND) && !defined(OPENIB) && !defined(DAT)
 1162     printf("r: reset sockets for every trial\n");
 1163 #endif
 1164 
 1165     printf("s: stream data in one direction only.\n");
 1166 #if defined(MPI)
 1167     printf("S: Use synchronous sends.\n");
 1168 #endif
 1169 
 1170 #if defined(INFINIBAND) || defined(OPENIB)
 1171     printf("t: specify type of communications <-t type>\n"
 1172            "   valid types: send_recv, send_recv_with_imm,\n"
 1173            "                rdma_write, rdma_write_with_imm\n"
 1174            "   default: send_recv\n");
 1175 #endif
 1176 #if defined(OPENIB)
 1177     printf("D: specify an OpenFabrics device/port combination\n"
 1178            "   to use on the local host.  For example:\n"
 1179            "      -D mthca0:1\n"
 1180            "   Uses the first port on the \"mthca0\" device\n"
 1181            "   (NOTE: ports are indexed from 1, not 0)\n"
 1182            "      -D mthca1\n"
 1183            "   Uses the first active port on the mtcha1 device\n"
 1184            "   No specification will result in using the first\n"
 1185            "   active port on any valid device.\n");
 1186 #endif
 1187 
 1188 #if defined(DAT)
 1189     printf("t: specify type of communications <-t type>\n"
 1190            "   valid types: send_recv, rdma_write\n"
 1191            "   default: send_recv\n");
 1192 #endif
 1193     
 1194 #if defined(KNEM)
 1195     printf("t: specify type of communications <-t type>\n"
 1196            "   valid types: copy, dma\n"
 1197            "   default: copy\n");
 1198 #endif
 1199 
 1200 #if defined(KNEM) || defined(VMSPLICE)
 1201     printf("h: specify a filename to use to synchronize\n"
 1202            "   When run with this option, two copies of Netpipe\n"
 1203            "   must be invoked on the same machine with the\n"
 1204            "   -h argument values.\n");
 1205 #endif
 1206 
 1207     printf("u: upper bound stop value e.g. <-u 1048576>\n");
 1208  
 1209 #if defined(MPI)
 1210     printf("z: receive messages using the MPI_ANY_SOURCE flag\n");
 1211 #endif
 1212 
 1213     printf("2: Send data in both directions at the same time.\n");
 1214     printf("P: Set the port number to one other than the default.\n");
 1215 #if defined(MPI)
 1216     printf("   May need to use -a to choose asynchronous communications for MPI/n");
 1217 #endif
 1218 #if (defined(TCP) || defined(TCP6) || defined(SCTP) || defined (SCTP6)) && !defined(INFINIBAND) && !defined(OPENIB) && !defined(DAT)
 1219     printf("   The maximum test size is limited by the TCP buffer size\n");
 1220 #endif
 1221 #if defined(TCP)
 1222     printf("A: Use SDP Address familty (AF_INET_SDP)\n");
 1223 #endif
 1224     printf("\n");
 1225 }
 1226 
 1227 void* AlignBuffer(void* buff, int boundary)
 1228 {
 1229   if(boundary == 0)
 1230     return buff;
 1231   else
 1232     /* char* typecast required for cc on IRIX */
 1233     return ((char*)buff) + (boundary - ((unsigned long)buff % boundary) );
 1234 }
 1235 
 1236 void AdvanceSendPtr(ArgStruct* p, int blocksize)
 1237 {
 1238   /* Move the send buffer pointer forward if there is room */
 1239 
 1240   if(p->s_ptr + blocksize < p->s_buff + MEMSIZE - blocksize)
 1241     
 1242     p->s_ptr += blocksize;
 1243 
 1244   else /* Otherwise wrap around to the beginning of the aligned buffer */
 1245 
 1246     p->s_ptr = p->s_buff;
 1247 }
 1248 
 1249 void AdvanceRecvPtr(ArgStruct* p, int blocksize)
 1250 {
 1251   /* Move the send buffer pointer forward if there is room */
 1252 
 1253   if(p->r_ptr + blocksize < p->r_buff + MEMSIZE - blocksize)
 1254     
 1255     p->r_ptr += blocksize;
 1256 
 1257   else /* Otherwise wrap around to the beginning of the aligned buffer */
 1258 
 1259     p->r_ptr = p->r_buff;
 1260 }
 1261 
 1262 void SaveRecvPtr(ArgStruct* p)
 1263 {
 1264   /* Typecast prevents warning about loss of volatile qualifier */
 1265 
 1266   p->r_ptr_saved = (void*)p->r_ptr; 
 1267 }
 1268 
 1269 void ResetRecvPtr(ArgStruct* p)
 1270 {
 1271   p->r_ptr = p->r_ptr_saved;
 1272 }
 1273 
 1274 /* This is generic across all modules */
 1275 void InitBufferData(ArgStruct *p, int nbytes, int soffset, int roffset)
 1276 {
 1277   memset(p->r_buff, 'a', nbytes+MAX(soffset,roffset));
 1278 
 1279   /* If using cache mode, then we need to initialize the last byte
 1280    * to the proper value since the transmitter and receiver are waiting
 1281    * on different values to determine when the message has completely
 1282    * arrive.
 1283    */   
 1284   if(p->cache)
 1285 
 1286     p->r_buff[(nbytes+MAX(soffset,roffset))-1] = 'a' + p->tr;
 1287 
 1288   /* If using no-cache mode, then we have distinct send and receive
 1289    * buffers, so the send buffer starts out containing different values
 1290    * from the receive buffer
 1291    */
 1292   else
 1293 
 1294     memset(p->s_buff, 'b', nbytes+soffset);
 1295 }
 1296 #if !defined(OPENIB) && !defined(INFINIBAND) && !defined(DAT) && !defined(ARMCI) && !defined(LAPI) && !defined(GPSHMEM) && !defined(SHMEM) && !defined(GM) 
 1297 
 1298 void MyMalloc(ArgStruct *p, int bufflen, int soffset, int roffset)
 1299 {
 1300     if((p->r_buff=(char *)malloc(bufflen+MAX(soffset,roffset)))==(char *)NULL)
 1301     {
 1302         fprintf(stderr,"couldn't allocate memory for receive buffer\n");
 1303         exit(-1);
 1304     }
 1305        /* if pcache==1, use cache, so this line happens only if flushing cache */
 1306     
 1307     if(!p->cache) /* Allocate second buffer if limiting cache */
 1308       if((p->s_buff=(char *)malloc(bufflen+soffset))==(char *)NULL)
 1309       {
 1310           fprintf(stderr,"couldn't allocate memory for send buffer\n");
 1311           exit(-1);
 1312       }
 1313 }
 1314 
 1315 void FreeBuff(char *buff1, char *buff2)
 1316 {
 1317   if(buff1 != NULL)
 1318 
 1319    free(buff1);
 1320 
 1321 
 1322   if(buff2 != NULL)
 1323 
 1324    free(buff2);
 1325 }
 1326 
 1327 #endif