"Fossies" - the Fresh Open Source Software Archive

Member "openmpi-4.0.4/ompi/mca/io/romio321/romio/adio/common/ad_aggregate.c" (10 Jun 2020, 22497 Bytes) of package /linux/misc/openmpi-4.0.4.tar.bz2:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "ad_aggregate.c" see the Fossies "Dox" file reference documentation.

    1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
    2 /* 
    3  *   Copyright (C) 1997-2001 University of Chicago. 
    4  *   See COPYRIGHT notice in top-level directory.
    5  */
    6 
    7 #include "adio.h"
    8 #include "adio_extern.h"
    9 
   10 #ifdef AGGREGATION_PROFILE
   11 #include "mpe.h"
   12 #endif
   13 
   14 #undef AGG_DEBUG
   15 
   16 /* This file contains four functions:
   17  *
   18  * ADIOI_Calc_aggregator()
   19  * ADIOI_Calc_file_domains()
   20  * ADIOI_Calc_my_req()
   21  * ADIOI_Calc_others_req()
   22  *
   23  * The last three of these were originally in ad_read_coll.c, but they are
   24  * also shared with ad_write_coll.c.  I felt that they were better kept with
   25  * the rest of the shared aggregation code.  
   26  */
   27 
   28 /* Discussion of values available from above:
   29  *
   30  * ADIO_Offset st_offsets[0..nprocs-1]
   31  * ADIO_Offset end_offsets[0..nprocs-1]
   32  *    These contain a list of start and end offsets for each process in 
   33  *    the communicator.  For example, an access at loc 10, size 10 would
   34  *    have a start offset of 10 and end offset of 19.
   35  * int nprocs
   36  *    number of processors in the collective I/O communicator
   37  * ADIO_Offset min_st_offset
   38  * ADIO_Offset fd_start[0..nprocs_for_coll-1]
   39  *    starting location of "file domain"; region that a given process will
   40  *    perform aggregation for (i.e. actually do I/O)
   41  * ADIO_Offset fd_end[0..nprocs_for_coll-1]
   42  *    start + size - 1 roughly, but it can be less, or 0, in the case of 
   43  *    uneven distributions
   44  */
   45 
   46 /* ADIOI_Calc_aggregator()
   47  *
   48  * The intention here is to implement a function which provides basically 
   49  * the same functionality as in Rajeev's original version of 
   50  * ADIOI_Calc_my_req().  He used a ceiling division approach to assign the 
   51  * file domains, and we use the same approach here when calculating the
   52  * location of an offset/len in a specific file domain.  Further we assume
   53  * this same distribution when calculating the rank_index, which is later
   54  *  used to map to a specific process rank in charge of the file domain.
   55  *
   56  * A better (i.e. more general) approach would be to use the list of file
   57  * domains only.  This would be slower in the case where the
   58  * original ceiling division was used, but it would allow for arbitrary
   59  * distributions of regions to aggregators.  We'd need to know the 
   60  * nprocs_for_coll in that case though, which we don't have now.
   61  *
   62  * Note a significant difference between this function and Rajeev's old code:
   63  * this code doesn't necessarily return a rank in the range
   64  * 0..nprocs_for_coll; instead you get something in 0..nprocs.  This is a
   65  * result of the rank mapping; any set of ranks in the communicator could be
   66  * used now.
   67  *
   68  * Returns an integer representing a rank in the collective I/O communicator.
   69  *
   70  * The "len" parameter is also modified to indicate the amount of data
   71  * actually available in this file domain.
   72  */
   73 int ADIOI_Calc_aggregator(ADIO_File fd,
   74              ADIO_Offset off, 
   75              ADIO_Offset min_off, 
   76              ADIO_Offset *len, 
   77              ADIO_Offset fd_size,
   78              ADIO_Offset *fd_start,
   79              ADIO_Offset *fd_end)
   80 {
   81     int rank_index, rank;
   82     ADIO_Offset avail_bytes;
   83 
   84     ADIOI_UNREFERENCED_ARG(fd_start);
   85 
   86     /* get an index into our array of aggregators */
   87     rank_index = (int) ((off - min_off + fd_size)/ fd_size - 1);
   88 
   89     if (fd->hints->striping_unit > 0) {
   90         /* wkliao: implementation for file domain alignment
   91            fd_start[] and fd_end[] have been aligned with file lock
   92        boundaries when returned from ADIOI_Calc_file_domains() so cannot
   93        just use simple arithmatic as above */
   94         rank_index = 0;
   95         while (off > fd_end[rank_index]) rank_index++;
   96     }
   97 
   98     /* we index into fd_end with rank_index, and fd_end was allocated to be no
   99      * bigger than fd->hins->cb_nodes.   If we ever violate that, we're
  100      * overrunning arrays.  Obviously, we should never ever hit this abort */
  101     if (rank_index >= fd->hints->cb_nodes || rank_index < 0) {
  102         FPRINTF(stderr, "Error in ADIOI_Calc_aggregator(): rank_index(%d) >= fd->hints->cb_nodes (%d) fd_size=%lld off=%lld\n",
  103             rank_index,fd->hints->cb_nodes,fd_size,off);
  104         MPI_Abort(MPI_COMM_WORLD, 1);
  105     }
  106 
  107     /* remember here that even in Rajeev's original code it was the case that
  108      * different aggregators could end up with different amounts of data to
  109      * aggregate.  here we use fd_end[] to make sure that we know how much
  110      * data this aggregator is working with.  
  111      *
  112      * the +1 is to take into account the end vs. length issue.
  113      */
  114     avail_bytes = fd_end[rank_index] + 1 - off;
  115     if (avail_bytes < *len) {
  116     /* this file domain only has part of the requested contig. region */
  117     *len = avail_bytes;
  118     }
  119 
  120     /* map our index to a rank */
  121     /* NOTE: FOR NOW WE DON'T HAVE A MAPPING...JUST DO 0..NPROCS_FOR_COLL */
  122     rank = fd->hints->ranklist[rank_index];
  123 
  124     return rank;
  125 }
  126 
  127 void ADIOI_Calc_file_domains(ADIO_Offset *st_offsets, ADIO_Offset
  128                  *end_offsets, int nprocs, int nprocs_for_coll,
  129                  ADIO_Offset *min_st_offset_ptr,
  130                  ADIO_Offset **fd_start_ptr, ADIO_Offset 
  131                  **fd_end_ptr, int min_fd_size, 
  132                  ADIO_Offset *fd_size_ptr,
  133                  int striping_unit)
  134 {
  135 /* Divide the I/O workload among "nprocs_for_coll" processes. This is
  136    done by (logically) dividing the file into file domains (FDs); each
  137    process may directly access only its own file domain. */
  138 
  139     ADIO_Offset min_st_offset, max_end_offset, *fd_start, *fd_end, fd_size;
  140     int i;
  141 
  142 #ifdef AGGREGATION_PROFILE
  143     MPE_Log_event (5004, 0, NULL);
  144 #endif
  145 
  146 #ifdef AGG_DEBUG
  147     FPRINTF(stderr, "ADIOI_Calc_file_domains: %d aggregator(s)\n", 
  148         nprocs_for_coll);
  149 #endif
  150 
  151 /* find min of start offsets and max of end offsets of all processes */
  152 
  153     min_st_offset = st_offsets[0];
  154     max_end_offset = end_offsets[0];
  155 
  156     for (i=1; i<nprocs; i++) {
  157     min_st_offset = ADIOI_MIN(min_st_offset, st_offsets[i]);
  158     max_end_offset = ADIOI_MAX(max_end_offset, end_offsets[i]);
  159     }
  160 
  161 /* determine the "file domain (FD)" of each process, i.e., the portion of
  162    the file that will be "owned" by each process */
  163 
  164 /* partition the total file access range equally among nprocs_for_coll
  165    processes */ 
  166     fd_size = ((max_end_offset - min_st_offset + 1) + nprocs_for_coll -
  167            1)/nprocs_for_coll; 
  168     /* ceiling division as in HPF block distribution */
  169 
  170     /* Tweak the file domains so that no fd is smaller than a threshold.  We
  171      * have to strike a balance between efficency and parallelism: somewhere
  172      * between 10k processes sending 32-byte requests and one process sending a
  173      * 320k request is a (system-dependent) sweet spot */
  174 
  175     if (fd_size < min_fd_size)
  176     fd_size = min_fd_size;
  177 
  178     *fd_start_ptr = (ADIO_Offset *)
  179     ADIOI_Malloc(nprocs_for_coll*sizeof(ADIO_Offset)); 
  180     *fd_end_ptr = (ADIO_Offset *)
  181     ADIOI_Malloc(nprocs_for_coll*sizeof(ADIO_Offset)); 
  182 
  183     fd_start = *fd_start_ptr;
  184     fd_end = *fd_end_ptr;
  185 
  186     /* Wei-keng Liao: implementation for fild domain alignment to nearest file
  187      * lock boundary (as specified by striping_unit hint).  Could also
  188      * experiment with other alignment strategies here */
  189     if (striping_unit > 0) {
  190         ADIO_Offset end_off;
  191         int         rem_front, rem_back;
  192 
  193         /* align fd_end[0] to the nearest file lock boundary */
  194         fd_start[0] = min_st_offset;
  195         end_off     = fd_start[0] + fd_size;
  196         rem_front   = end_off % striping_unit;
  197         rem_back    = striping_unit - rem_front;
  198         if (rem_front < rem_back) 
  199         end_off -= rem_front;
  200         else                      
  201         end_off += rem_back;
  202         fd_end[0] = end_off - 1;
  203     
  204         /* align fd_end[i] to the nearest file lock boundary */
  205         for (i=1; i<nprocs_for_coll; i++) {
  206             fd_start[i] = fd_end[i-1] + 1;
  207             end_off     = min_st_offset + fd_size * (i+1);
  208             rem_front   = end_off % striping_unit;
  209             rem_back    = striping_unit - rem_front;
  210             if (rem_front < rem_back) 
  211             end_off -= rem_front;
  212             else                      
  213             end_off += rem_back;
  214             fd_end[i] = end_off - 1;
  215         }
  216         fd_end[nprocs_for_coll-1] = max_end_offset;
  217     }
  218     else { /* no hints set: do things the 'old' way */
  219         fd_start[0] = min_st_offset;
  220         fd_end[0] = min_st_offset + fd_size - 1;
  221 
  222         for (i=1; i<nprocs_for_coll; i++) {
  223             fd_start[i] = fd_end[i-1] + 1;
  224             fd_end[i] = fd_start[i] + fd_size - 1;
  225         }
  226     }
  227 
  228 /* take care of cases in which the total file access range is not
  229    divisible by the number of processes. In such cases, the last
  230    process, or the last few processes, may have unequal load (even 0).
  231    For example, a range of 97 divided among 16 processes.
  232    Note that the division is ceiling division. */
  233 
  234     for (i=0; i<nprocs_for_coll; i++) {
  235     if (fd_start[i] > max_end_offset)
  236         fd_start[i] = fd_end[i] = -1;
  237     if (fd_end[i] > max_end_offset)
  238         fd_end[i] = max_end_offset;
  239     }
  240 
  241     *fd_size_ptr = fd_size;
  242     *min_st_offset_ptr = min_st_offset;
  243 
  244 #ifdef AGGREGATION_PROFILE
  245     MPE_Log_event (5005, 0, NULL);
  246 #endif
  247 }
  248 
  249 
  250 /* ADIOI_Calc_my_req() - calculate what portions of the access requests
  251  * of this process are located in the file domains of various processes
  252  * (including this one)
  253  */
  254 void ADIOI_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, ADIO_Offset *len_list, 
  255                int contig_access_count, ADIO_Offset 
  256                min_st_offset, ADIO_Offset *fd_start,
  257                ADIO_Offset *fd_end, ADIO_Offset fd_size,
  258                        int nprocs,
  259                        int *count_my_req_procs_ptr,
  260                int **count_my_req_per_proc_ptr,
  261                ADIOI_Access **my_req_ptr,
  262                int **buf_idx_ptr)
  263 /* Possibly reconsider if buf_idx's are ok as int's, or should they be aints/offsets? 
  264    They are used as memory buffer indices so it seems like the 2G limit is in effect */
  265 {
  266     int *count_my_req_per_proc, count_my_req_procs, *buf_idx;
  267     int i, l, proc;
  268     ADIO_Offset fd_len, rem_len, curr_idx, off;
  269     ADIOI_Access *my_req;
  270 
  271 #ifdef AGGREGATION_PROFILE
  272     MPE_Log_event (5024, 0, NULL);
  273 #endif
  274 
  275     *count_my_req_per_proc_ptr = (int *) ADIOI_Calloc(nprocs,sizeof(int)); 
  276     count_my_req_per_proc = *count_my_req_per_proc_ptr;
  277 /* count_my_req_per_proc[i] gives the no. of contig. requests of this
  278    process in process i's file domain. calloc initializes to zero.
  279    I'm allocating memory of size nprocs, so that I can do an 
  280    MPI_Alltoall later on.*/
  281 
  282     buf_idx = (int *) ADIOI_Malloc(nprocs*sizeof(int));
  283 /* buf_idx is relevant only if buftype_is_contig.
  284    buf_idx[i] gives the index into user_buf where data received
  285    from proc. i should be placed. This allows receives to be done
  286    without extra buffer. This can't be done if buftype is not contig. */
  287    
  288     /* initialize buf_idx to -1 */
  289     for (i=0; i < nprocs; i++) buf_idx[i] = -1;
  290 
  291     /* one pass just to calculate how much space to allocate for my_req;
  292      * contig_access_count was calculated way back in ADIOI_Calc_my_off_len()
  293      */
  294     for (i=0; i < contig_access_count; i++) {
  295     /* short circuit offset/len processing if len == 0 
  296      *  (zero-byte  read/write */
  297     if (len_list[i] == 0) 
  298         continue;
  299     off = offset_list[i];
  300     fd_len = len_list[i];
  301     /* note: we set fd_len to be the total size of the access.  then
  302      * ADIOI_Calc_aggregator() will modify the value to return the 
  303      * amount that was available from the file domain that holds the
  304      * first part of the access.
  305      */
  306     proc = ADIOI_Calc_aggregator(fd, off, min_st_offset, &fd_len, fd_size, 
  307                      fd_start, fd_end);
  308     count_my_req_per_proc[proc]++;
  309 
  310     /* figure out how much data is remaining in the access (i.e. wasn't 
  311      * part of the file domain that had the starting byte); we'll take 
  312      * care of this data (if there is any) in the while loop below.
  313      */
  314     rem_len = len_list[i] - fd_len;
  315 
  316     while (rem_len != 0) {
  317         off += fd_len; /* point to first remaining byte */
  318         fd_len = rem_len; /* save remaining size, pass to calc */
  319         proc = ADIOI_Calc_aggregator(fd, off, min_st_offset, &fd_len, 
  320                      fd_size, fd_start, fd_end);
  321 
  322         count_my_req_per_proc[proc]++;
  323         rem_len -= fd_len; /* reduce remaining length by amount from fd */
  324     }
  325     }
  326 
  327 /* now allocate space for my_req, offset, and len */
  328 
  329     *my_req_ptr = (ADIOI_Access *)
  330     ADIOI_Malloc(nprocs*sizeof(ADIOI_Access)); 
  331     my_req = *my_req_ptr;
  332 
  333     count_my_req_procs = 0;
  334     for (i=0; i < nprocs; i++) {
  335     if (count_my_req_per_proc[i]) {
  336         my_req[i].offsets = (ADIO_Offset *)
  337         ADIOI_Malloc(count_my_req_per_proc[i] * sizeof(ADIO_Offset));
  338         my_req[i].lens =
  339         ADIOI_Malloc(count_my_req_per_proc[i] * sizeof(ADIO_Offset));
  340         count_my_req_procs++;
  341     }       
  342     my_req[i].count = 0;  /* will be incremented where needed
  343                       later */
  344     }
  345 
  346 /* now fill in my_req */
  347     curr_idx = 0;
  348     for (i=0; i<contig_access_count; i++) { 
  349     /* short circuit offset/len processing if len == 0 
  350      *  (zero-byte  read/write */
  351     if (len_list[i] == 0)
  352         continue;
  353     off = offset_list[i];
  354     fd_len = len_list[i];
  355     proc = ADIOI_Calc_aggregator(fd, off, min_st_offset, &fd_len, fd_size, 
  356                      fd_start, fd_end);
  357 
  358     /* for each separate contiguous access from this process */
  359     if (buf_idx[proc] == -1) 
  360   {
  361     ADIOI_Assert(curr_idx == (int) curr_idx);
  362     buf_idx[proc] = (int) curr_idx;
  363   }
  364 
  365     l = my_req[proc].count;
  366     curr_idx += fd_len; 
  367 
  368     rem_len = len_list[i] - fd_len;
  369 
  370     /* store the proc, offset, and len information in an array
  371          * of structures, my_req. Each structure contains the 
  372          * offsets and lengths located in that process's FD, 
  373      * and the associated count. 
  374      */
  375     my_req[proc].offsets[l] = off;
  376     my_req[proc].lens[l] = fd_len;
  377     my_req[proc].count++;
  378 
  379     while (rem_len != 0) {
  380         off += fd_len;
  381         fd_len = rem_len;
  382         proc = ADIOI_Calc_aggregator(fd, off, min_st_offset, &fd_len, 
  383                      fd_size, fd_start, fd_end);
  384 
  385         if (buf_idx[proc] == -1) 
  386       {
  387         ADIOI_Assert(curr_idx == (int) curr_idx);
  388         buf_idx[proc] = (int) curr_idx;
  389       }
  390 
  391         l = my_req[proc].count;
  392         curr_idx += fd_len;
  393         rem_len -= fd_len;
  394 
  395         my_req[proc].offsets[l] = off;
  396         my_req[proc].lens[l] = fd_len;
  397         my_req[proc].count++;
  398     }
  399     }
  400 
  401 #ifdef AGG_DEBUG
  402     for (i=0; i<nprocs; i++) {
  403     if (count_my_req_per_proc[i] > 0) {
  404         FPRINTF(stdout, "data needed from %d (count = %d):\n", i, 
  405             my_req[i].count);
  406         for (l=0; l < my_req[i].count; l++) {
  407         FPRINTF(stdout, "   off[%d] = %lld, len[%d] = %d\n", l,
  408             my_req[i].offsets[l], l, my_req[i].lens[l]);
  409         }
  410     FPRINTF(stdout, "buf_idx[%d] = 0x%x\n", i, buf_idx[i]);
  411     }
  412     }
  413 #endif
  414 
  415     *count_my_req_procs_ptr = count_my_req_procs;
  416     *buf_idx_ptr = buf_idx;
  417 #ifdef AGGREGATION_PROFILE
  418     MPE_Log_event (5025, 0, NULL);
  419 #endif
  420 }
  421 
  422 
  423 
  424 void ADIOI_Calc_others_req(ADIO_File fd, int count_my_req_procs, 
  425                 int *count_my_req_per_proc,
  426                 ADIOI_Access *my_req, 
  427                 int nprocs, int myrank,
  428                 int *count_others_req_procs_ptr,
  429                 ADIOI_Access **others_req_ptr)  
  430 {
  431 /* determine what requests of other processes lie in this process's
  432    file domain */
  433 
  434 /* count_others_req_procs = number of processes whose requests lie in
  435    this process's file domain (including this process itself) 
  436    count_others_req_per_proc[i] indicates how many separate contiguous
  437    requests of proc. i lie in this process's file domain. */
  438 
  439     int *count_others_req_per_proc, count_others_req_procs;
  440     int i, j;
  441     MPI_Request *requests;
  442     MPI_Status *statuses;
  443     ADIOI_Access *others_req;
  444 
  445 /* first find out how much to send/recv and from/to whom */
  446 #ifdef AGGREGATION_PROFILE
  447     MPE_Log_event (5026, 0, NULL);
  448 #endif
  449     count_others_req_per_proc = (int *) ADIOI_Malloc(nprocs*sizeof(int));
  450 
  451     MPI_Alltoall(count_my_req_per_proc, 1, MPI_INT,
  452          count_others_req_per_proc, 1, MPI_INT, fd->comm);
  453 
  454     *others_req_ptr = (ADIOI_Access *)
  455     ADIOI_Malloc(nprocs*sizeof(ADIOI_Access)); 
  456     others_req = *others_req_ptr;
  457 
  458     count_others_req_procs = 0;
  459     for (i=0; i<nprocs; i++) {
  460     if (count_others_req_per_proc[i]) {
  461         others_req[i].count = count_others_req_per_proc[i];
  462         others_req[i].offsets = (ADIO_Offset *)
  463         ADIOI_Malloc(count_others_req_per_proc[i]*sizeof(ADIO_Offset));
  464         others_req[i].lens =
  465         ADIOI_Malloc(count_others_req_per_proc[i]*sizeof(ADIO_Offset));
  466         others_req[i].mem_ptrs = (MPI_Aint *)
  467         ADIOI_Malloc(count_others_req_per_proc[i]*sizeof(MPI_Aint)); 
  468         count_others_req_procs++;
  469     }
  470     else others_req[i].count = 0;
  471     }
  472     
  473 /* now send the calculated offsets and lengths to respective processes */
  474 
  475     requests = (MPI_Request *)
  476     ADIOI_Malloc(1+2*(count_my_req_procs+count_others_req_procs)*sizeof(MPI_Request)); 
  477 /* +1 to avoid a 0-size malloc */
  478 
  479     j = 0;
  480     for (i=0; i<nprocs; i++) {
  481     if (others_req[i].count) {
  482         MPI_Irecv(others_req[i].offsets, others_req[i].count, 
  483                       ADIO_OFFSET, i, i+myrank, fd->comm, &requests[j]);
  484         j++;
  485         MPI_Irecv(others_req[i].lens, others_req[i].count, 
  486                       ADIO_OFFSET, i, i+myrank+1, fd->comm, &requests[j]);
  487         j++;
  488     }
  489     }
  490 
  491     for (i=0; i < nprocs; i++) {
  492     if (my_req[i].count) {
  493         MPI_Isend(my_req[i].offsets, my_req[i].count, 
  494                       ADIO_OFFSET, i, i+myrank, fd->comm, &requests[j]);
  495         j++;
  496         MPI_Isend(my_req[i].lens, my_req[i].count, 
  497                       ADIO_OFFSET, i, i+myrank+1, fd->comm, &requests[j]);
  498         j++;
  499     }
  500     }
  501 
  502     if (j) {
  503     statuses = (MPI_Status *) ADIOI_Malloc(j * sizeof(MPI_Status));
  504     MPI_Waitall(j, requests, statuses);
  505     ADIOI_Free(statuses);
  506     }
  507 
  508     ADIOI_Free(requests);
  509     ADIOI_Free(count_others_req_per_proc);
  510 
  511     *count_others_req_procs_ptr = count_others_req_procs;
  512 #ifdef AGGREGATION_PROFILE
  513     MPE_Log_event (5027, 0, NULL);
  514 #endif
  515 }
  516 
  517 
  518 /* Nonblocking version of ADIOI_Calc_others_req().
  519    It consists of three functions - ADIOI_Icalc_others_req(),
  520    ADIOI_Icalc_others_req_main(), and ADIOI_Icalc_others_req_fini(). */
  521 void ADIOI_Icalc_others_req(ADIOI_NBC_Request *nbc_req, int *error_code)
  522 {
  523     ADIOI_Icalc_others_req_vars *vars = nbc_req->cor_vars;
  524 
  525     /* count_others_req_per_proc[i] indicates how many separate contiguous
  526        requests of proc. i lie in this process's file domain. */
  527 
  528     /* first find out how much to send/recv and from/to whom */
  529 #ifdef AGGREGATION_PROFILE
  530     MPE_Log_event(5026, 0, NULL);
  531 #endif
  532     vars->count_others_req_per_proc =
  533         (int *)ADIOI_Malloc(vars->nprocs * sizeof(int));
  534 
  535     *error_code = MPI_Ialltoall(vars->count_my_req_per_proc, 1, MPI_INT,
  536             vars->count_others_req_per_proc, 1, MPI_INT, vars->fd->comm,
  537             &vars->req1);
  538 
  539     if (nbc_req->rdwr == ADIOI_READ) {
  540         nbc_req->data.rd.state = ADIOI_IRC_STATE_ICALC_OTHERS_REQ;
  541     } else {
  542         ADIOI_Assert(nbc_req->rdwr == ADIOI_WRITE);
  543         nbc_req->data.wr.state = ADIOI_IWC_STATE_ICALC_OTHERS_REQ;
  544     }
  545 }
  546 
  547 void ADIOI_Icalc_others_req_main(ADIOI_NBC_Request *nbc_req, int *error_code)
  548 {
  549     ADIOI_Icalc_others_req_vars *vars = nbc_req->cor_vars;
  550     ADIO_File fd = vars->fd;
  551     int count_my_req_procs = vars->count_my_req_procs;
  552     ADIOI_Access *my_req = vars->my_req;
  553     int nprocs = vars->nprocs;
  554     int myrank = vars->myrank;
  555     ADIOI_Access **others_req_ptr = vars->others_req_ptr;
  556 
  557     /* determine what requests of other processes lie in this process's
  558        file domain */
  559 
  560     /* count_others_req_procs = number of processes whose requests lie in
  561        this process's file domain (including this process itself)
  562        count_others_req_per_proc[i] indicates how many separate contiguous
  563        requests of proc. i lie in this process's file domain. */
  564 
  565     int *count_others_req_per_proc = vars->count_others_req_per_proc;
  566     int count_others_req_procs;
  567     int i, j;
  568     ADIOI_Access *others_req;
  569 
  570     *others_req_ptr = (ADIOI_Access *)ADIOI_Malloc(nprocs*sizeof(ADIOI_Access));
  571     others_req = *others_req_ptr;
  572 
  573     count_others_req_procs = 0;
  574     for (i = 0; i < nprocs; i++) {
  575         if (count_others_req_per_proc[i]) {
  576             others_req[i].count = count_others_req_per_proc[i];
  577             others_req[i].offsets = (ADIO_Offset *)
  578                 ADIOI_Malloc(count_others_req_per_proc[i]*sizeof(ADIO_Offset));
  579             others_req[i].lens =
  580                 ADIOI_Malloc(count_others_req_per_proc[i]*sizeof(ADIO_Offset));
  581             others_req[i].mem_ptrs = (MPI_Aint *)
  582                 ADIOI_Malloc(count_others_req_per_proc[i]*sizeof(MPI_Aint));
  583             count_others_req_procs++;
  584         }
  585         else others_req[i].count = 0;
  586     }
  587     vars->count_others_req_procs = count_others_req_procs;
  588 
  589     /* now send the calculated offsets and lengths to respective processes */
  590 
  591     vars->req2 = (MPI_Request *)
  592         ADIOI_Malloc(1+2*(count_my_req_procs+count_others_req_procs)
  593                      *sizeof(MPI_Request));
  594     /* +1 to avoid a 0-size malloc */
  595 
  596     j = 0;
  597     for (i = 0; i < nprocs; i++) {
  598         if (others_req[i].count) {
  599             MPI_Irecv(others_req[i].offsets, others_req[i].count,
  600                     ADIO_OFFSET, i, i+myrank, fd->comm, &vars->req2[j]);
  601             j++;
  602             MPI_Irecv(others_req[i].lens, others_req[i].count,
  603                     ADIO_OFFSET, i, i+myrank+1, fd->comm, &vars->req2[j]);
  604             j++;
  605         }
  606     }
  607 
  608     for (i=0; i < nprocs; i++) {
  609         if (my_req[i].count) {
  610             MPI_Isend(my_req[i].offsets, my_req[i].count,
  611                     ADIO_OFFSET, i, i+myrank, fd->comm, &vars->req2[j]);
  612             j++;
  613             MPI_Isend(my_req[i].lens, my_req[i].count,
  614                     ADIO_OFFSET, i, i+myrank+1, fd->comm, &vars->req2[j]);
  615             j++;
  616         }
  617     }
  618 
  619     /* keep the number of requests */
  620     vars->num_req2 = j;
  621 
  622     if (nbc_req->rdwr == ADIOI_READ) {
  623         nbc_req->data.rd.state = ADIOI_IRC_STATE_ICALC_OTHERS_REQ_MAIN;
  624     } else {
  625         ADIOI_Assert(nbc_req->rdwr == ADIOI_WRITE);
  626         nbc_req->data.wr.state = ADIOI_IWC_STATE_ICALC_OTHERS_REQ_MAIN;
  627     }
  628 }
  629 
  630 void ADIOI_Icalc_others_req_fini(ADIOI_NBC_Request *nbc_req, int *error_code)
  631 {
  632     ADIOI_Icalc_others_req_vars *vars = nbc_req->cor_vars;
  633     void (*next_fn)(ADIOI_NBC_Request *, int *);
  634 
  635     ADIOI_Free(vars->req2);
  636     ADIOI_Free(vars->count_others_req_per_proc);
  637 
  638     *vars->count_others_req_procs_ptr = vars->count_others_req_procs;
  639 #ifdef AGGREGATION_PROFILE
  640     MPE_Log_event(5027, 0, NULL);
  641 #endif
  642     /* end of the calculation */
  643 
  644     next_fn = vars->next_fn;
  645 
  646     /* free the struct for parameters and variables */
  647     ADIOI_Free(vars);
  648     nbc_req->cor_vars = NULL;
  649 
  650     /* move to the next function */
  651     next_fn(nbc_req, error_code);
  652 }
  653