"Fossies" - the Fresh Open Source Software Archive

Member "openmpi-4.1.2/ompi/mca/io/romio321/romio/adio/common/ad_coll_build_req_new.c" (24 Nov 2021, 61180 Bytes) of package /linux/misc/openmpi-4.1.2.tar.bz2:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "ad_coll_build_req_new.c" see the Fossies "Dox" file reference documentation.

    1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
    2 /* 
    3  *
    4  *   Copyright (C) 1997 University of Chicago. 
    5  *   See COPYRIGHT notice in top-level directory.
    6  */
    7 
    8 #include <assert.h>
    9 #include "adio.h"
   10 #include "adio_extern.h"
   11 #ifdef AGGREGATION_PROFILE
   12 #include "mpe.h"
   13 #endif
   14 #include "heap-sort.h"
   15 
   16 /*
   17 #define DEBUG1
   18 #define DEBUG2
   19 #define DEBUG3
   20 */
   21 /* #define DEBUG_HEAP */
   22 
   23 #define DTYPE_SKIP
   24 
   25 #ifdef DEBUG3
   26 static char *off_type_name[MAX_OFF_TYPE] = {"TEMP_OFFSETS",
   27                      "REAL_OFFSETS"};
   28 #endif
   29 
   30 /* Simple function to return the size of the view_state. */
   31 static inline ADIO_Offset view_state_get_cur_sz(view_state *tmp_view_state_p,
   32                                                 int op_type)
   33 {
   34     flatten_state *tmp_state_p = NULL;
   35     switch(op_type)
   36     {   
   37         case TEMP_OFF:
   38             tmp_state_p = &(tmp_view_state_p->tmp_state);
   39             break;
   40         case REAL_OFF:
   41             tmp_state_p = &(tmp_view_state_p->cur_state);
   42             break;
   43         default:
   44             fprintf(stderr, "op_type invalid\n");
   45     }
   46     return tmp_state_p->cur_sz;
   47 }
   48 
   49 /* Simple function to return the len of the next piece of the view_state. */
   50 static inline ADIO_Offset view_state_get_next_len(view_state *tmp_view_state_p,
   51                           int op_type)
   52 {
   53     flatten_state *tmp_state_p = NULL;
   54     switch(op_type)
   55     {
   56     case TEMP_OFF:
   57         tmp_state_p = &(tmp_view_state_p->tmp_state);
   58         break;
   59     case REAL_OFF:
   60         tmp_state_p = &(tmp_view_state_p->cur_state);
   61         break;
   62     default:
   63         fprintf(stderr, "op_type invalid\n");
   64     }
   65     return (ADIO_Offset) 
   66     tmp_view_state_p->flat_type_p->blocklens[tmp_state_p->idx] -
   67     tmp_state_p->cur_reg_off;
   68 }
   69 
   70 /* Add up to a region of a file view and no larger than a max size.
   71  * The view_state is always consistent with the abs_off and where the
   72  * index and cur_reg_off point to.  The regions should be coalesced if
   73  * possible later on. */
   74 static inline int view_state_add_region(
   75     ADIO_Offset max_sz,
   76     view_state *tmp_view_state_p, 
   77     ADIO_Offset *st_reg_p,
   78     ADIO_Offset *tmp_reg_sz_p,
   79     int op_type)
   80 {
   81     ADIOI_Flatlist_node *tmp_flat_type_p = NULL;
   82     flatten_state *tmp_state_p = NULL;
   83     int64_t data_sz = 0;
   84 
   85 #ifdef AGGREGATION_PROFILE
   86     /* MPE_Log_event (5020, 0, NULL); */
   87 #endif
   88 
   89     switch(op_type)
   90     {
   91     case TEMP_OFF:
   92         tmp_state_p = &(tmp_view_state_p->tmp_state);
   93         break;
   94     case REAL_OFF:
   95         tmp_state_p = &(tmp_view_state_p->cur_state);
   96         break;
   97     default:
   98         fprintf(stderr, "op_type invalid\n");
   99     }
  100 
  101     tmp_flat_type_p = tmp_view_state_p->flat_type_p;
  102 
  103     *st_reg_p = tmp_state_p->abs_off;
  104 
  105     /* Should be looking at some data (or it's a zero len blocklens
  106      * (i.e. placeholder). */
  107     assert(tmp_state_p->cur_reg_off != 
  108        tmp_flat_type_p->blocklens[tmp_state_p->idx]);
  109     /* Shouldn't have been called if the view_state is done. */
  110     assert(tmp_state_p->cur_sz != tmp_view_state_p->sz);
  111 
  112     /* Make sure we are not in a non-zero region in the flat_type */
  113     assert(tmp_flat_type_p->blocklens[tmp_state_p->idx] != 0);
  114     
  115 #ifdef DEBUG3
  116     fprintf(stderr, "view_state:(blocklens[%Ld]=%d,cur_reg_off=%Ld,"
  117         "max_sz=%Ld)\n", tmp_state_p->idx, 
  118         tmp_flat_type_p->blocklens[tmp_state_p->idx], 
  119         tmp_state_p->cur_reg_off, max_sz);
  120 #endif
  121 
  122     /* Can it add the whole piece? */
  123     if (tmp_flat_type_p->blocklens[tmp_state_p->idx] - 
  124     tmp_state_p->cur_reg_off <= max_sz)
  125     {
  126     data_sz = tmp_flat_type_p->blocklens[tmp_state_p->idx] -
  127             tmp_state_p->cur_reg_off;
  128 
  129     tmp_state_p->cur_sz += data_sz;
  130 
  131     /* Advance the abs_off to the beginning of the next piece */
  132     if (tmp_flat_type_p->count == 1)
  133     {
  134         assert(tmp_flat_type_p->blocklens[tmp_state_p->idx] != 0);
  135         tmp_state_p->abs_off += data_sz;
  136 #ifdef DEBUG3 
  137         fprintf(stderr, "view_state_add_region: %s contig type "
  138             "(old abs_off=%Ld,abs_off=%Ld,cur_sz=%Ld,reg size=%Ld)\n", 
  139             off_type_name[op_type], tmp_state_p->abs_off - data_sz, 
  140             tmp_state_p->abs_off, tmp_state_p->cur_sz, data_sz);
  141 #endif
  142     }
  143     else
  144     { 
  145         /* Is this the last region in the datatype? */
  146         if (tmp_state_p->idx == (tmp_flat_type_p->count - 1))
  147         {
  148         tmp_state_p->abs_off += data_sz -
  149             tmp_flat_type_p->indices[tmp_flat_type_p->count-1] -
  150             tmp_flat_type_p->blocklens[tmp_flat_type_p->count-1] +
  151             tmp_view_state_p->ext;
  152 #ifdef DEBUG3
  153         fprintf(stderr, "view_state_add_region: %s last region for type "
  154             "(old abs_off=%Ld,abs_off=%Ld,cur_sz=%Ld,reg size=%Ld)\n", 
  155             off_type_name[op_type], tmp_state_p->abs_off - data_sz, 
  156             tmp_state_p->abs_off, tmp_state_p->cur_sz, data_sz);
  157 #endif
  158         }
  159         else
  160         {
  161         tmp_state_p->abs_off += 
  162             tmp_flat_type_p->indices[tmp_state_p->idx + 1] -
  163             (tmp_flat_type_p->indices[tmp_state_p->idx] +
  164              tmp_state_p->cur_reg_off);
  165 #ifdef DEBUG3
  166         fprintf(stderr, "view_state_add_region: %s inner region type "
  167             "(old abs_off=%Ld,abs_off=%Ld,cur_sz=%Ld,reg size=%Ld)\n", 
  168             off_type_name[op_type], tmp_state_p->abs_off - 
  169             (tmp_flat_type_p->indices[tmp_state_p->idx + 1] -
  170                     (tmp_flat_type_p->indices[tmp_state_p->idx] +
  171                      tmp_state_p->cur_reg_off)), tmp_state_p->abs_off, 
  172             tmp_state_p->cur_sz, data_sz);
  173 #endif
  174         }
  175         /* Increment idx to next non-zero region in the flat_type */
  176         do {
  177         tmp_state_p->idx = 
  178             (tmp_state_p->idx + 1) % tmp_flat_type_p->count;
  179         } while (tmp_flat_type_p->blocklens[tmp_state_p->idx] == 0);
  180     }
  181     tmp_state_p->cur_reg_off = 0;
  182     }
  183     else /* Add part of the piece */
  184     {
  185     data_sz = max_sz;
  186     tmp_state_p->cur_reg_off += data_sz;
  187     tmp_state_p->abs_off += data_sz;
  188     tmp_state_p->cur_sz += data_sz;
  189 #ifdef DEBUG3 
  190         fprintf(stderr, "view_state_add_region: %s partial region type "
  191             "(cur_reg_off=%Ld,abs_off=%Ld,cur_sz=%Ld,reg size=%Ld\n", 
  192             off_type_name[op_type], tmp_state_p->cur_reg_off, 
  193             tmp_state_p->abs_off, tmp_state_p->cur_sz, data_sz);
  194 #endif
  195     }
  196 
  197     *tmp_reg_sz_p = data_sz;
  198 #ifdef AGGREGATION_PROFILE
  199     /* MPE_Log_event (5021, 0, NULL); */
  200 #endif
  201     return 0;
  202 }
  203 
  204 /* Set up the abs_off, idx, and cur_reg_off of a view_state for the
  205  * tmp_state or the cur_state. */
  206 int ADIOI_init_view_state(int file_ptr_type,
  207             int nprocs, 
  208             view_state *view_state_arr,
  209             int op_type)
  210 {
  211     ADIOI_Flatlist_node *tmp_flat_type_p = NULL;
  212     ADIO_Offset tmp_off_used = 0, st_reg = 0, tmp_reg_sz = 0;
  213     int i;
  214     flatten_state *tmp_state_p = NULL;
  215     view_state *tmp_view_p = NULL;
  216 
  217     for (i = 0; i < nprocs; i++)
  218     {
  219     switch(op_type)
  220     {
  221         case TEMP_OFF:
  222         tmp_state_p = &(view_state_arr[i].tmp_state);
  223         break;
  224         case REAL_OFF:
  225         tmp_state_p = &(view_state_arr[i].cur_state);
  226         break;
  227         default:
  228         fprintf(stderr, "op_type invalid\n");
  229     }
  230     
  231     tmp_view_p = &(view_state_arr[i]);
  232     tmp_flat_type_p = tmp_view_p->flat_type_p;
  233 
  234     if (file_ptr_type == ADIO_INDIVIDUAL)
  235         tmp_state_p->abs_off = tmp_view_p->fp_ind;
  236     else
  237         tmp_state_p->abs_off = tmp_view_p->disp;
  238     
  239     tmp_off_used = 0;
  240 
  241     /* initialize tmp_state idx */
  242     while (tmp_flat_type_p->blocklens[tmp_state_p->idx] == 0)
  243         tmp_state_p->idx = (tmp_state_p->idx + 1) % tmp_flat_type_p->count;
  244     if (file_ptr_type == ADIO_EXPLICIT_OFFSET)
  245         tmp_state_p->abs_off += tmp_flat_type_p->indices[tmp_state_p->idx];
  246 
  247     /* Initialize the abs_off by moving into the datatype 
  248      * byte_off bytes.  Since we only do this in the beginning, we
  249      * make the assumption that pieces are added whole until the last
  250      * piece which MAY be partial. */
  251     while (tmp_off_used != tmp_view_p->byte_off)
  252     {
  253         view_state_add_region(
  254         tmp_view_p->byte_off - tmp_off_used,
  255         &(view_state_arr[i]), &st_reg, &tmp_reg_sz, 
  256         op_type);
  257     }
  258     
  259     /* Re-initialize the cur_size so that the abs_off was set to
  260      * the proper position while the actual size = 0.*/
  261     tmp_state_p->cur_sz = 0;
  262 #ifdef DEBUG1
  263     fprintf(stderr, "init_view_state: %s (idx=%d,byte_off=%Ld,"
  264         "abs_off=%Ld,reg_off=%Ld,sz=%Ld)\n", off_type_name[op_type], 
  265         i, tmp_view_p->byte_off, tmp_state_p->abs_off,
  266         tmp_state_p->cur_reg_off, tmp_view_p->sz);
  267 #endif  
  268 
  269     }
  270     return 0;
  271 }
  272 
  273 /* Return the next file realm offset and length for this datatype state
  274  * within a particular file realm. */
  275 static inline int get_next_fr_off(ADIO_File fd,
  276                   ADIO_Offset off,
  277                   ADIO_Offset fr_st_off,
  278                   MPI_Datatype *fr_type_p,
  279                   ADIO_Offset *fr_next_off_p,
  280                   ADIO_Offset *fr_max_len_p) 
  281 {
  282     MPI_Aint fr_extent = -1, lb;
  283     ADIO_Offset tmp_off, off_rem;
  284     ADIOI_Flatlist_node *fr_node_p = ADIOI_Flatlist;
  285     int i = -1, fr_dtype_ct = 0;
  286 
  287     /* Should have already been flattened in calc_file_realms() */
  288     while (fr_node_p->type != (*fr_type_p))
  289     fr_node_p = fr_node_p->next;
  290     assert(fr_node_p != NULL);
  291 
  292     /* Did we get to the first region of the file realm? */
  293     if (off - fr_st_off < 0)
  294     {
  295     *fr_next_off_p = fr_st_off + fr_node_p->indices[0];
  296     *fr_max_len_p = fr_node_p->blocklens[0];
  297     return 0;
  298     }
  299 
  300     /* Calculate how many times to loop through the fr_type 
  301      * and where the next fr_off is. */
  302     MPI_Type_get_extent(*fr_type_p, &lb, &fr_extent);
  303     tmp_off = off - fr_st_off;
  304     fr_dtype_ct = tmp_off / fr_extent;
  305     off_rem = tmp_off % fr_extent;
  306     for (i = 0; i < fr_node_p->count; i++)
  307     {
  308     if (off_rem < fr_node_p->indices[i])
  309     {
  310         *fr_next_off_p = fr_st_off +
  311         (fr_dtype_ct * fr_extent) + fr_node_p->indices[i];
  312         *fr_max_len_p = fr_node_p->blocklens[i];
  313         return 0;
  314     }
  315     else if (off_rem < fr_node_p->indices[i] + fr_node_p->blocklens[i])
  316     {
  317         *fr_next_off_p = off;
  318         *fr_max_len_p = fr_node_p->blocklens[i] - 
  319         (off_rem - fr_node_p->indices[i]);
  320         return off;
  321     }
  322     }
  323     
  324     /* Shouldn't get here. */
  325     fprintf(stderr, "get_next_fr_off: Couldn't find the correct "
  326         "location of the next offset for this file realm.\n");
  327     return -1;
  328 }
  329 
  330 /* Look in all the view states for the first offset within a given
  331  * file realm.  Report the end of a contiguous region within the file
  332  * realm (possibly more than the actual view state may be able to
  333  * process contiguously). */
  334 static inline int find_next_off(ADIO_File fd,
  335                 view_state *view_state_p,
  336                 ADIO_Offset fr_st_off,
  337                 MPI_Datatype *fr_type_p,
  338                 int op_type,
  339                 ADIO_Offset *cur_off_p,
  340                 ADIO_Offset *cur_reg_max_len_p)
  341 {
  342     ADIOI_Flatlist_node *tmp_flat_type_p = NULL;
  343     ADIO_Offset tmp_off = -1, fr_next_off = -1, fr_max_len = -1, 
  344     tmp_fr_max_len = -1;
  345     int ret = 0;
  346     flatten_state *tmp_state_p = NULL;
  347     ADIO_Offset tmp_st_off = 0, tmp_reg_sz = 0;
  348 #ifdef DTYPE_SKIP
  349     int skip_type_ct;
  350 #endif
  351 
  352 #ifdef AGGREGATION_PROFILE
  353     /* MPE_Log_event (5022, 0, NULL); */
  354 #endif
  355 
  356     switch(op_type)
  357     {
  358     case TEMP_OFF:
  359         tmp_state_p = &(view_state_p->tmp_state);
  360         break;
  361     case REAL_OFF:
  362         tmp_state_p = &(view_state_p->cur_state);
  363         break;
  364     default:
  365         fprintf(stderr, "op_type invalid\n");
  366     }
  367     
  368     tmp_flat_type_p = view_state_p->flat_type_p;
  369 
  370     /* Can we use this proc? */
  371     if (tmp_state_p->cur_sz < view_state_p->sz) {
  372     tmp_st_off = 0;
  373     tmp_reg_sz = 0;
  374     /* If the current region is not within the file realm, advance
  375      * the state until it is and calculate the end of the next file 
  376      * realm in fr_max_len. */
  377     ret = get_next_fr_off(fd,
  378                   tmp_state_p->abs_off, 
  379                   fr_st_off,
  380                   fr_type_p,
  381                   &fr_next_off,
  382                   &fr_max_len);
  383     
  384     while ((tmp_state_p->abs_off < fr_next_off) &&
  385            (tmp_state_p->cur_sz != view_state_p->sz))
  386     {
  387         
  388     /* While this might appear to be erroneous at first,
  389      * view_state_add_region can only add a single piece at a
  390      * time.  Therefore, it will never overshoot the beginning
  391      * of the next file realm.  When it finally does enter the
  392      * next file realm it will not be able to go beyond its
  393      * first piece. */
  394         
  395 #ifdef DTYPE_SKIP
  396         if (tmp_flat_type_p->count > 1) {
  397         /* let's see if we can skip whole datatypes */
  398         skip_type_ct = (fr_next_off - tmp_state_p->abs_off) /
  399             view_state_p->ext;
  400         if (skip_type_ct > 0) {
  401             /* before we go on, let's check if we've actually
  402              * finished up already */
  403             tmp_state_p->cur_sz += skip_type_ct *
  404             view_state_p->type_sz;
  405             if (tmp_state_p->cur_sz >= view_state_p->sz) {
  406             tmp_state_p->cur_sz = view_state_p->sz;
  407             break;
  408             }
  409             tmp_state_p->abs_off += skip_type_ct * view_state_p->ext;
  410         }
  411         }
  412 #endif
  413         view_state_add_region(
  414         fr_next_off - tmp_state_p->abs_off,
  415         view_state_p,
  416         &tmp_st_off,
  417         &tmp_reg_sz,
  418         op_type);
  419 
  420         ret = get_next_fr_off(fd,
  421                   tmp_state_p->abs_off, 
  422                   fr_st_off,
  423                   fr_type_p,
  424                   &fr_next_off,
  425                   &fr_max_len);
  426     }
  427 
  428     if (tmp_state_p->cur_sz != view_state_p->sz) {
  429         tmp_off = tmp_state_p->abs_off;
  430         /* Calculate how much of the remaining file realm there is from the
  431          * current offset */
  432         tmp_fr_max_len = fr_next_off + fr_max_len - tmp_off;
  433     }
  434     }
  435 
  436     *cur_off_p = tmp_off;
  437     *cur_reg_max_len_p = tmp_fr_max_len;
  438 #ifdef AGGREGATION_PROFILE
  439     /* MPE_Log_event (5023, 0, NULL); */
  440 #endif
  441     return ret;
  442 }
  443 
  444 /* Upon completion of a full collective buffer, end of a file realm
  445  * region (data sieving), or the end of all I/O for an aggregator, we
  446  * should return a list of MPI_Datatypes that correspond to client
  447  * communication into a collective buffer, a list of corresponding
  448  * sizes, and an aggregate MPI_Datatype which will be used as a
  449  * filetype in MPI_File_write/read on the aggregator. */ 
  450 int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs,
  451              view_state *client_file_view_state_arr,
  452              MPI_Datatype *client_comm_dtype_arr,
  453              ADIO_Offset *client_comm_sz_arr,
  454              ADIO_Offset *agg_dtype_offset_p,
  455              MPI_Datatype *agg_dtype_p)
  456 {
  457     MPI_Aint **client_disp_arr = NULL, *agg_disp_arr = NULL;
  458     int **client_blk_arr = NULL, *agg_blk_arr = NULL;
  459     ADIO_Offset tmp_coll_buf_sz = 0, st_reg = 0, act_reg_sz = 0;
  460     ADIO_Offset cur_off = -1, cur_reg_max_len = -1;
  461     ADIO_Offset ds_fr_end = -1;
  462     ADIO_Offset *fr_st_off_arr = fd->file_realm_st_offs;
  463     MPI_Datatype *fr_type_arr = fd->file_realm_types;
  464     int *client_ol_ct_arr = NULL;
  465     int *client_ol_cur_ct_arr = NULL;
  466     int agg_ol_ct = 0, agg_ol_cur_ct = 0;
  467     int cur_off_proc = -1;
  468     int next_off_idx = -1;
  469     int i = 0, j = 0, all_done = -1;
  470     int agg_idx = fd->my_cb_nodes_index;
  471     heap_t offset_heap;
  472     ADIO_Offset next_off = -1, next_reg_max_len = -1;
  473 
  474     /* Used for coalescing ol pairs next to each other. */
  475     ADIO_Offset *client_comm_next_off_arr = NULL;
  476     ADIO_Offset agg_next_off = -1;
  477 #ifdef AGGREGATION_PROFILE
  478     MPE_Log_event (5016, 0, NULL);
  479 #endif
  480 
  481     memset(client_comm_sz_arr, 0, nprocs*sizeof(ADIO_Offset));
  482 
  483     if ((client_comm_next_off_arr = (ADIO_Offset *) 
  484      ADIOI_Malloc(nprocs*sizeof(ADIO_Offset))) == NULL)
  485     {
  486     fprintf(stderr, "ADIOI_Build_agg_reqs: malloc client_next_off_arr "
  487         "failed\n");
  488     return -1;
  489     }
  490     
  491     if ((client_ol_ct_arr = (int *) ADIOI_Calloc(nprocs, sizeof(int))) == NULL)
  492     {
  493     fprintf(stderr, "ADIOI_Build_agg_reqs: "
  494         "malloc client_ol_ct_arr failed\n");
  495     return -1;
  496     }
  497     if ((client_ol_cur_ct_arr = 
  498      (int *) ADIOI_Calloc(nprocs, sizeof(int))) == NULL)
  499     {
  500     fprintf(stderr, "ADIOI_Build_agg_reqs: "
  501         "malloc client_ol_cur_ct_arr failed\n");
  502     return -1;
  503     }
  504 
  505     /* On the first pass see how many offset-length pairs are
  506      * necessary for each client.  Then allocate the correct amount of
  507      * offset-length pairs for describing the collective buffer.  All
  508      * data is processed in order by the aggregator's file realm.  On
  509      * the second pass, set the offset-length pairs to the correct
  510      * values. */
  511     for (i = 0; i < MAX_OFF_TYPE; i++)
  512     {
  513     memset(client_comm_next_off_arr, -1, nprocs*sizeof(ADIO_Offset));
  514     tmp_coll_buf_sz = 0;
  515     ds_fr_end = -1;
  516 
  517     /* initialize heap */
  518     ADIOI_Heap_create(&offset_heap, nprocs);
  519     offset_heap.size = 0;
  520     
  521     for (j=0; j<nprocs; j++) {
  522         find_next_off(fd, 
  523               &client_file_view_state_arr[j],
  524               fr_st_off_arr[agg_idx],
  525               &(fr_type_arr[agg_idx]),
  526               i,
  527               &cur_off,
  528               &cur_reg_max_len);
  529         if ((cur_off != -1) && (cur_reg_max_len > 0)) {
  530         ADIOI_Heap_insert(&offset_heap, cur_off, j, cur_reg_max_len);
  531 #ifdef DEBUG_HEAP
  532         printf ("initial: inserting offset %lld with "
  533             "cur_reg_max_len = %lld for p%d\n",
  534             cur_off, cur_reg_max_len, j);
  535 #endif
  536         }
  537 
  538     }
  539     if (!offset_heap.size)
  540         ADIOI_Heap_insert(&offset_heap, -1, -1, -1);
  541 
  542     while (tmp_coll_buf_sz < fd->hints->cb_buffer_size)
  543     {
  544         /* Find the next process with the next region within the
  545          * file realm and the maximum amount that can be added for
  546          * this particular file realm as a contiguous region. */
  547         ADIOI_Heap_extract_min(&offset_heap, &cur_off, &cur_off_proc,
  548                  &cur_reg_max_len);
  549 #ifdef DEBUG_HEAP
  550         printf ("extracted cur_off %lld from proc %d\n",
  551             cur_off, cur_off_proc);
  552 #endif
  553 
  554         if (cur_off == -1)
  555         break;
  556         
  557 #ifdef DEBUG3
  558         fprintf(stderr, "ADIOI_Build_agg_reqs: %s proc %d start/add to"
  559             " list (max_reg_fr=%Ld,tmp_coll_buf_sz=%Ld,"
  560             "cb_buffer_size=%d)\n", off_type_name[i], cur_off_proc,
  561             cur_reg_max_len, tmp_coll_buf_sz, 
  562             fd->hints->cb_buffer_size);
  563 #endif
  564         
  565         /* We process only contiguous file realm regions if we are
  566          * using data sieving. Note that we only do this for
  567          * writes since reads can be data sieved across each other
  568          * without consistency issues. */
  569         if ((fd->hints->ds_write == ADIOI_HINT_ENABLE ||
  570          fd->hints->ds_write == ADIOI_HINT_AUTO) &&
  571         rw_type == ADIOI_WRITE && fd->hints->cb_nodes > 1)
  572         {
  573 #ifdef DEBUG2
  574         fprintf(stderr, "ADIOI_Build_agg_reqs: "
  575             "Warning - Data sieving writes on\n");
  576 #endif
  577         if (ds_fr_end == -1)
  578         {
  579             ds_fr_end = cur_off + cur_reg_max_len;
  580 #ifdef DEBUG1
  581         fprintf(stderr, "ADIOI_Build_agg_reqs: "
  582             "cur_off=%Ld, cur_reg_max_len=%Ld\n"
  583             "Data sieving file realm end initialized to %Ld\n",
  584             cur_off,
  585             cur_reg_max_len,
  586             ds_fr_end);
  587 #endif
  588         }
  589         else
  590         {
  591             /* The next off switched file realms, so we will stop
  592              * here. */
  593             if (ds_fr_end != cur_off + cur_reg_max_len)
  594             {
  595 #ifdef DEBUG1
  596             fprintf(stderr, "ADIOI_Build_agg_reqs: "
  597                 "Data sieving file realm end changed from "
  598                 "%Ld to %Ld\n", ds_fr_end, 
  599                 cur_off + cur_reg_max_len);
  600 #endif
  601             break;
  602             }
  603         }
  604         }
  605         
  606         /* Add up to the end of the file realm or the collective
  607          * buffer. */
  608         if (cur_reg_max_len > (fd->hints->cb_buffer_size - 
  609                    tmp_coll_buf_sz))
  610         cur_reg_max_len = fd->hints->cb_buffer_size - tmp_coll_buf_sz;
  611 
  612         view_state_add_region(
  613         cur_reg_max_len,
  614         &(client_file_view_state_arr[cur_off_proc]), 
  615         &st_reg, &act_reg_sz, i);
  616 
  617         switch(i)
  618         {
  619         case TEMP_OFF:
  620             /* Increment the ol list count for each proc and
  621              * the used part of the collective buffer if the
  622              * next region is not adjacent to the previous
  623              * region. */
  624             if (client_comm_next_off_arr[cur_off_proc] != 
  625             tmp_coll_buf_sz)
  626             {
  627             (client_ol_ct_arr[cur_off_proc])++;
  628             }
  629             client_comm_next_off_arr[cur_off_proc] = 
  630             tmp_coll_buf_sz + act_reg_sz;
  631             
  632             if (agg_next_off != st_reg)
  633             agg_ol_ct++;
  634             agg_next_off = st_reg + act_reg_sz;
  635             break;
  636         case REAL_OFF:
  637             /* Add this region to the proper client ol list if
  638              * the next region is not adjacent to the previous
  639              * region. */
  640             next_off_idx = client_ol_cur_ct_arr[cur_off_proc];
  641             if (client_comm_next_off_arr[cur_off_proc] != 
  642             tmp_coll_buf_sz)
  643             {
  644             client_disp_arr[cur_off_proc][next_off_idx] =
  645                 tmp_coll_buf_sz;
  646             client_blk_arr[cur_off_proc][next_off_idx] = 
  647                 act_reg_sz;
  648             (client_ol_cur_ct_arr[cur_off_proc])++;
  649             }
  650             else
  651             {
  652             client_blk_arr[cur_off_proc][next_off_idx - 1] 
  653                 += act_reg_sz;
  654             }
  655             client_comm_sz_arr[cur_off_proc] += act_reg_sz;
  656             client_comm_next_off_arr[cur_off_proc] =
  657             tmp_coll_buf_sz + act_reg_sz;
  658             
  659             /* Add to the aggregator filetype if the next
  660              * region is not adjacent to the previous
  661              * region. */
  662             if (agg_next_off != st_reg)
  663             {
  664             /* this will enable initial offsets much further into
  665              * the file than an MPI_Aint */
  666             if (!agg_ol_cur_ct)
  667                 *agg_dtype_offset_p = st_reg;
  668             agg_disp_arr[agg_ol_cur_ct] = st_reg -
  669                 (MPI_Aint) *agg_dtype_offset_p;
  670             agg_blk_arr[agg_ol_cur_ct] = act_reg_sz;    
  671             agg_ol_cur_ct++;
  672             }
  673             else
  674             {
  675             agg_blk_arr[agg_ol_cur_ct - 1] += act_reg_sz;
  676             }
  677             agg_next_off = st_reg + act_reg_sz;
  678             
  679             break;
  680         default:
  681             fprintf(stderr, "ADIOI_Build_agg_reqs: Impossible type\n");
  682         }
  683         tmp_coll_buf_sz += act_reg_sz;
  684 
  685         find_next_off(fd,
  686               &client_file_view_state_arr[cur_off_proc],
  687               fr_st_off_arr[agg_idx],
  688               &(fr_type_arr[agg_idx]),
  689               i,
  690               &next_off,
  691               &next_reg_max_len);
  692 
  693         if ((next_off != -1) || (!offset_heap.size)) {
  694         ADIOI_Heap_insert(&offset_heap, next_off, cur_off_proc,
  695                 next_reg_max_len);
  696 #ifdef DEBUG_HEAP
  697         printf ("inserting offset %lld for p%d\n", next_off,
  698             cur_off_proc);
  699 #endif
  700         }
  701     }
  702     
  703     if (i == TEMP_OFF)
  704     {
  705         /* Allocate offset-length pairs for creating hindexed
  706          * MPI_Datatypes for both the client and the aggregator. */
  707         if ((client_disp_arr = (MPI_Aint **) 
  708          ADIOI_Malloc(nprocs*sizeof(MPI_Aint *))) == NULL)
  709         {
  710         fprintf(stderr, "ADIOI_Build_agg_reqs: malloc "
  711             "client_disp_arr failed\n");
  712         return -1;
  713         }
  714         if ((client_blk_arr = (int **) ADIOI_Malloc(
  715              nprocs*sizeof(int *))) == NULL)
  716         {
  717         ADIOI_Free(client_disp_arr);
  718         fprintf(stderr, "ADIOI_Build_agg_reqs: malloc "
  719             "client_blk_arr failed\n");
  720         return -1;
  721         }    
  722         for (j = 0; j < nprocs; j++)
  723         {
  724         if ((client_disp_arr[j] = (MPI_Aint *) ADIOI_Malloc(
  725              client_ol_ct_arr[j]*sizeof(MPI_Aint))) == NULL)
  726         {
  727             fprintf(stderr, "ADIOI_Build_agg_reqs: malloc "
  728                 "client_disp_arr[%d] failed\n", j);
  729             return -1;
  730         }
  731         if ((client_blk_arr[j] = (int *) 
  732              ADIOI_Malloc(client_ol_ct_arr[j]*sizeof(int))) == NULL)
  733         {
  734             ADIOI_Free(client_disp_arr[j]);
  735             fprintf(stderr, "ADIOI_Build_agg_reqs: malloc "
  736                 "client_blk_arr[%d] failed\n", j);
  737             return -1;
  738         }
  739         }
  740         
  741         if (agg_ol_ct > 0) 
  742         {
  743         if ((agg_disp_arr = (MPI_Aint *) ADIOI_Malloc(
  744              agg_ol_ct*sizeof(MPI_Aint))) == NULL)
  745         {
  746             fprintf(stderr, 
  747                 "ADIOI_Build_agg_reqs: malloc disp_arr failed\n");
  748             return -1;
  749         }
  750         if ((agg_blk_arr = (int *) 
  751              ADIOI_Malloc(agg_ol_ct*sizeof(int))) == NULL)
  752         {
  753             ADIOI_Free(agg_disp_arr);
  754             fprintf(stderr, 
  755                 "ADIOI_Build_agg_reqs: malloc blk_arr failed\n");
  756             return -1;
  757         }
  758         }
  759     }
  760     ADIOI_Heap_free(&offset_heap);
  761     }
  762     
  763     /* Let the clients know if this aggregator is totally finished
  764      * with all possible client requests. */
  765     all_done = 1;
  766     for (i = 0; i < nprocs; i++)
  767     {
  768     if ((client_file_view_state_arr[i].cur_state.cur_sz !=
  769             client_file_view_state_arr[i].sz) ||
  770             client_comm_sz_arr[i] != 0)
  771     {
  772         all_done = 0;
  773         break;
  774     }
  775     }
  776     if (all_done == 1)
  777     {
  778     for (i = 0; i < nprocs; i++)
  779     {
  780         client_comm_sz_arr[i] = -1;
  781     }
  782     }
  783 
  784     /* Quick check to make sure we found all the ol pairs we thought
  785      * we did */
  786     for (i = 0; i < nprocs; i++)
  787     {
  788     if (client_ol_cur_ct_arr[i] != client_ol_ct_arr[i])
  789     {
  790         fprintf(stderr, "ADIOI_Build_agg_reqs: ERROR Process %d "
  791             "processed only %d out of %d ol pairs\n", i, 
  792             client_ol_cur_ct_arr[i],
  793             client_ol_ct_arr[i]);
  794         return -1;
  795     }
  796     }
  797 #ifdef DEBUG1
  798     fprintf(stderr, "ADIOI_Build_agg_reqs:(client,ol_pairs,size_req)=");
  799     for (i = 0; i < nprocs; i++)
  800     {
  801     fprintf(stderr, "(%d,%d,%Ld)", i, client_ol_ct_arr[i],
  802         client_comm_sz_arr[i]);
  803     if (i != nprocs - 1)
  804         fprintf(stderr, ",");
  805     }
  806     fprintf(stderr, "\n");
  807 #endif
  808 #ifdef DEBUG1
  809     fprintf(stderr, "ADIOI_Build_agg_reqs: Generated %d of %d "
  810         "aggregate offset-length pairs\n", agg_ol_cur_ct, agg_ol_ct);
  811 #endif
  812 #ifdef DEBUG2
  813     for (i = 0; i < nprocs; i++)
  814     {
  815     if (client_ol_ct_arr[i] > 0)
  816     {
  817         fprintf(stderr, "ADIOI_Build_agg_reqs: p %d (off,len) = ", i);
  818         for (j = 0; j < client_ol_ct_arr[i]; j++)
  819         {
  820         fprintf(stderr, "[%d](%d,%d) ", j, 
  821             client_disp_arr[i][j],
  822             client_blk_arr[i][j]);
  823         }
  824         fprintf(stderr, "\n");
  825     }
  826     }    
  827     if (agg_ol_ct) {
  828     fprintf(stderr, "ADIOI_Build_agg_reqs:agg_type(off,len)=");
  829     for (i = 0; i < agg_ol_ct; i++)
  830         {
  831         fprintf(stderr, "[%d](%d,%d)",
  832             i, agg_disp_arr[i], agg_blk_arr[i]);
  833         if (i != agg_ol_ct - 1)
  834             fprintf(stderr, ",");
  835         }
  836     fprintf(stderr, "\n");
  837     }
  838 #endif
  839 
  840     assert(agg_ol_cur_ct == agg_ol_ct);
  841 
  842     /* Create all the client and aggregate MPI_Datatypes */
  843     for (i = 0; i < nprocs; i++)
  844     {
  845     if (client_comm_sz_arr[i] > 0)
  846     {
  847         MPI_Type_create_hindexed(client_ol_ct_arr[i], client_blk_arr[i],
  848                          client_disp_arr[i], MPI_BYTE, 
  849                          &(client_comm_dtype_arr[i]));
  850         MPI_Type_commit(&(client_comm_dtype_arr[i]));
  851     }
  852     else
  853     {
  854         client_comm_dtype_arr[i] = MPI_BYTE;
  855     }
  856     ADIOI_Free(client_blk_arr[i]);
  857     ADIOI_Free(client_disp_arr[i]);
  858     }
  859     ADIOI_Free(client_blk_arr);
  860     ADIOI_Free(client_disp_arr);
  861 
  862     if (agg_ol_ct > 0) {
  863     if (agg_ol_ct == 1)
  864         MPI_Type_contiguous (agg_blk_arr[0], MPI_BYTE, agg_dtype_p);
  865     else if (agg_ol_ct > 1)
  866         MPI_Type_create_hindexed(agg_ol_ct, agg_blk_arr, agg_disp_arr, MPI_BYTE,
  867                          agg_dtype_p);    
  868 
  869     MPI_Type_commit(agg_dtype_p);
  870 
  871     ADIOI_Free(agg_disp_arr);
  872     ADIOI_Free(agg_blk_arr);
  873     }
  874     ADIOI_Free(client_ol_ct_arr);
  875     ADIOI_Free(client_ol_cur_ct_arr);
  876     ADIOI_Free(client_comm_next_off_arr);
  877 #ifdef AGGREGATION_PROFILE
  878     MPE_Log_event (5017, 0, NULL);
  879 #endif
  880     return 0;
  881 }
  882 
  883 /* All sizes from all aggregators are gathered on the clients, which
  884  * then call this function, which will generate the comm datatypes for
  885  * each aggregator (agg_comm_dtype_arr) in the upcoming
  886  * MPI_Alltoallw() */
  887 int ADIOI_Build_client_reqs(ADIO_File fd, 
  888                 int nprocs,
  889                 view_state *my_mem_view_state_arr,
  890                 view_state *agg_file_view_state_arr,
  891                 ADIO_Offset *agg_comm_sz_arr,
  892                 MPI_Datatype *agg_comm_dtype_arr)
  893 {
  894     MPI_Aint **agg_disp_arr = NULL;
  895     int **agg_blk_arr = NULL;
  896     view_state *tmp_mem_state_p = NULL, *tmp_file_state_p = NULL;
  897     ADIO_Offset total_agg_comm_sz = 0, cur_total_agg_comm_sz = 0;
  898     ADIO_Offset st_reg = 0, act_reg_sz = 0, tmp_reg_sz = 0;
  899     ADIO_Offset cur_off = -1, cur_reg_max_len = -1;
  900     ADIO_Offset tmp_cur_off = -1, tmp_cur_reg_max_len = -1;
  901     ADIO_Offset agg_mem_st_reg = 0, agg_mem_act_reg_sz = 0;
  902     ADIO_Offset *fr_st_off_arr = fd->file_realm_st_offs;
  903     ADIO_Offset *agg_comm_cur_sz_arr = NULL;
  904     MPI_Datatype *fr_type_arr = fd->file_realm_types;
  905     int *agg_ol_ct_arr = NULL;
  906     int *agg_ol_cur_ct_arr = NULL;
  907     int tmp_agg_fr_idx = -1;
  908     int cur_off_proc = -1;
  909     int i = 0, j = 0;
  910     int agg_next_off_idx = -1;
  911     /* Used for coalescing ol pairs next to each other. */
  912     ADIO_Offset *agg_mem_next_off_arr = NULL;
  913 #ifdef AGGREGATION_PROFILE
  914     MPE_Log_event (5018, 0, NULL);
  915 #endif
  916 
  917 #ifdef DEBUG
  918     fprintf(stderr, "ADIOI_Build_client_reqs:(agg,size_req)=");
  919     for (i = 0; i < nprocs; i++)
  920     {
  921     int tmp_agg_idx = ADIOI_Agg_idx(i, fd);
  922         if (tmp_agg_idx >= 0)
  923         {
  924         fprintf(stderr, "(%d,%Ld)", i, agg_comm_sz_arr[i]);
  925         if (i != fd->hints->cb_nodes - 1)
  926         fprintf(stderr, ",");
  927     }
  928     fprintf(stderr, "\n");
  929     }
  930 #endif
  931     
  932     if ((agg_mem_next_off_arr = (ADIO_Offset *) ADIOI_Malloc(
  933          nprocs*sizeof(ADIO_Offset))) == NULL)
  934     {
  935     fprintf(stderr, "ADIOI_Build_client_reqs: malloc agg_mem_next_off_arr"
  936         "failed\n");
  937     return -1;
  938     }
  939 
  940     if ((agg_comm_cur_sz_arr = (ADIO_Offset *) 
  941      ADIOI_Malloc(nprocs*sizeof(ADIO_Offset))) == NULL)
  942     {
  943     fprintf(stderr, "ADIOI_Build_client_reqs: malloc agg_comm_cur_sz_arr"
  944         " failed\n");
  945     return -1;
  946     }
  947     if ((agg_ol_ct_arr = (int *) ADIOI_Calloc(nprocs, sizeof(int)))
  948     == NULL)
  949     {
  950     fprintf(stderr, "ADIOI_Build_client_reqs: "
  951         "malloc agg_ol_ct_arr failed\n");
  952     return -1;
  953     }
  954     if ((agg_ol_cur_ct_arr = (int *) ADIOI_Calloc(nprocs, sizeof(int)))
  955     == NULL)
  956     {
  957     fprintf(stderr, "ADIOI_Build_client_reqs: "
  958         "malloc agg_ol_cur_ct_arr failed\n");
  959     return -1;
  960     }
  961 
  962     for (i = 0; i < nprocs; i++)
  963     {
  964     if (agg_comm_sz_arr[i] > 0)
  965         total_agg_comm_sz += agg_comm_sz_arr[i];
  966     }
  967     
  968     /* On the first pass see how many offset-length pairs are
  969      * necessary for each aggregator.  Then allocate the correct
  970      * amount of offset-length pairs for handling each aggregator's
  971      * particular data size.  On the last pass, we actually create the
  972      * offset-length pairs. */
  973     for (i = 0; i < MAX_OFF_TYPE; i++)
  974     {
  975     cur_total_agg_comm_sz = 0;
  976     memset(agg_comm_cur_sz_arr, 0, nprocs*sizeof(ADIO_Offset));
  977     memset(agg_mem_next_off_arr, -1, nprocs*sizeof(ADIO_Offset));
  978     while (total_agg_comm_sz > cur_total_agg_comm_sz)
  979     {
  980         /* Look for the next aggregator offset among all the
  981          * aggregators and their respective file realms. */
  982         cur_off = -1;
  983         for (j = 0; j < nprocs; j++)
  984         {
  985         tmp_agg_fr_idx = ADIOI_Agg_idx(j, fd);
  986                 assert(tmp_agg_fr_idx < fd->hints->cb_nodes);
  987         
  988         /* If this process is not an aggregator or we have
  989          * finished all the bytes for this aggregator, move
  990          * along. */
  991         if (tmp_agg_fr_idx < 0 || 
  992             agg_comm_cur_sz_arr[j] == agg_comm_sz_arr[j])
  993         {
  994             continue;
  995         }
  996 
  997         find_next_off(fd,
  998                   &(agg_file_view_state_arr[j]),
  999                   fr_st_off_arr[tmp_agg_fr_idx],
 1000                   &(fr_type_arr[tmp_agg_fr_idx]),
 1001                   i,
 1002                   &tmp_cur_off,
 1003                   &tmp_cur_reg_max_len);
 1004         if (tmp_cur_off == -1)
 1005             continue;          
 1006 
 1007         if ((cur_off == -1) || 
 1008             (cur_off > tmp_cur_off))
 1009         {
 1010             cur_off_proc = j;
 1011             cur_off = tmp_cur_off;
 1012             cur_reg_max_len = tmp_cur_reg_max_len;
 1013         }
 1014         }
 1015 
 1016         assert(cur_off_proc != -1);
 1017         
 1018         /* Add up to the end of the file realm or as many bytes
 1019          * are left for this particular aggregator in the client's
 1020          * filetype */
 1021         if (cur_reg_max_len > agg_comm_sz_arr[cur_off_proc] - 
 1022         agg_comm_cur_sz_arr[cur_off_proc])
 1023         {
 1024         cur_reg_max_len = agg_comm_sz_arr[cur_off_proc] - 
 1025             agg_comm_cur_sz_arr[cur_off_proc];
 1026         }
 1027         assert(cur_reg_max_len > 0);
 1028         
 1029         view_state_add_region(
 1030         cur_reg_max_len,
 1031         &(agg_file_view_state_arr[cur_off_proc]),
 1032         &st_reg, &act_reg_sz, i);
 1033         
 1034 #ifdef DEBUG2
 1035         fprintf(stderr, "ADIOI_Build_client_reqs: %s File region"
 1036             " (proc=%d,off=%Ld,sz=%Ld)\n",
 1037             off_type_name[i], cur_off_proc,
 1038             cur_off, act_reg_sz);
 1039 #endif
 1040 
 1041         /* Before translating the file regions to memory regions,
 1042          * we first must advance to the proper point in the
 1043          * mem_view_state for this aggregator to match the
 1044          * file_view_state. */
 1045         tmp_file_state_p = &(agg_file_view_state_arr[cur_off_proc]);
 1046         tmp_mem_state_p = &(my_mem_view_state_arr[cur_off_proc]);
 1047         assert(view_state_get_cur_sz(tmp_file_state_p, i) - act_reg_sz >=
 1048            view_state_get_cur_sz(tmp_mem_state_p, i));
 1049         while (view_state_get_cur_sz(tmp_file_state_p, i) - act_reg_sz != 
 1050            view_state_get_cur_sz(tmp_mem_state_p, i))
 1051         {
 1052         ADIO_Offset fill_st_reg = -1, fill_reg_sz = -1;
 1053         view_state_add_region(
 1054             view_state_get_cur_sz(tmp_file_state_p, i) - act_reg_sz -
 1055             view_state_get_cur_sz(tmp_mem_state_p, i),
 1056             tmp_mem_state_p,
 1057             &fill_st_reg,
 1058             &fill_reg_sz, i);
 1059         }
 1060         
 1061         /* Based on how large the act_reg_sz 1. Figure out how
 1062          * many memory offset-length pairs are necessary. 2. Set
 1063          * the offset-length pairs. */
 1064         tmp_reg_sz = 0;
 1065         while (tmp_reg_sz != act_reg_sz)
 1066         {
 1067         view_state_add_region(
 1068             act_reg_sz - tmp_reg_sz,
 1069             tmp_mem_state_p,
 1070             &agg_mem_st_reg, &agg_mem_act_reg_sz, 
 1071             i);
 1072         tmp_reg_sz += agg_mem_act_reg_sz;
 1073 
 1074 #ifdef DEBUG2
 1075         fprintf(stderr, "ADIOI_Build_client_reqs: Mem region %s"
 1076             "(proc=%d,off=%Ld,sz=%Ld)\n",
 1077             off_type_name[i], cur_off_proc,
 1078             agg_mem_st_reg, agg_mem_act_reg_sz);
 1079 #endif
 1080         agg_comm_cur_sz_arr[cur_off_proc] += agg_mem_act_reg_sz;
 1081         cur_total_agg_comm_sz += agg_mem_act_reg_sz;        
 1082         switch(i)
 1083         {
 1084             case TEMP_OFF:
 1085             /* Increment the ol list count a particular
 1086              * aggregator if next region is not adjacent
 1087              * to the previous region. */
 1088             if (agg_mem_next_off_arr[cur_off_proc] != 
 1089                 agg_mem_st_reg)
 1090             {
 1091                 agg_ol_ct_arr[cur_off_proc]++;
 1092             }
 1093             agg_mem_next_off_arr[cur_off_proc] = 
 1094                 agg_mem_st_reg + agg_mem_act_reg_sz;
 1095             break;
 1096             case REAL_OFF:
 1097             /* Set the ol list for the memtypes that will
 1098              * map to each aggregator, coaslescing if
 1099              * possible. */
 1100             agg_next_off_idx = agg_ol_cur_ct_arr[cur_off_proc];
 1101             if (agg_mem_next_off_arr[cur_off_proc] != 
 1102                 agg_mem_st_reg)
 1103             {
 1104                 agg_disp_arr[cur_off_proc][agg_next_off_idx] = 
 1105                 agg_mem_st_reg;
 1106                 agg_blk_arr[cur_off_proc][agg_next_off_idx] = 
 1107                 agg_mem_act_reg_sz;
 1108                 (agg_ol_cur_ct_arr[cur_off_proc])++;
 1109             }
 1110             else
 1111             {
 1112                 agg_blk_arr[cur_off_proc][agg_next_off_idx - 1]
 1113                 += agg_mem_act_reg_sz;
 1114             }
 1115             agg_mem_next_off_arr[cur_off_proc] = 
 1116                 agg_mem_st_reg + agg_mem_act_reg_sz;
 1117             break;
 1118             default:
 1119             fprintf(stderr, "ADIOI_Build_client_reqs: "
 1120                 "Impossible type\n");
 1121         }
 1122         }
 1123     }
 1124     
 1125     /* On the first pass, allocate the memory structures for
 1126      * creating the MPI_hindexed type. */
 1127     if (i == TEMP_OFF)
 1128     {       
 1129         /* Allocate offset-length pairs for creating hindexed
 1130          * MPI_Datatypes for each aggregator */
 1131         if ((agg_disp_arr = (MPI_Aint **) 
 1132          ADIOI_Malloc(nprocs*sizeof(MPI_Aint *))) == NULL)
 1133         {
 1134         fprintf(stderr, 
 1135             "ADIOI_Build_client_reqs: malloc agg_disp_arr failed\n");
 1136         return -1;
 1137         }
 1138         if ((agg_blk_arr = (int **) ADIOI_Malloc(nprocs*sizeof(int *))) 
 1139         == NULL)
 1140         {
 1141         ADIOI_Free(agg_disp_arr);
 1142         fprintf(stderr, 
 1143             "ADIOI_Build_client_reqs: malloc agg_blk_arr failed\n");
 1144         return -1;
 1145         }    
 1146         for (j = 0; j < nprocs; j++)
 1147         {
 1148         if ((agg_disp_arr[j] = (MPI_Aint *) 
 1149              ADIOI_Malloc(agg_ol_ct_arr[j]*sizeof(MPI_Aint))) == NULL)
 1150         {
 1151             fprintf(stderr, "ADIOI_Build_client_reqs: malloc "
 1152                 "agg_disp_arr[%d] failed\n", j);
 1153             return -1;
 1154         }
 1155         if ((agg_blk_arr[j] = (int *) 
 1156              ADIOI_Malloc(agg_ol_ct_arr[j]*sizeof(int))) == NULL)
 1157         {
 1158             ADIOI_Free(agg_disp_arr[j]);
 1159             fprintf(stderr, "ADIOI_Build_client_reqs: malloc "
 1160                 "agg_blk_arr[%d] failed\n", j);
 1161             return -1;
 1162         }
 1163         }
 1164     }
 1165     }
 1166 
 1167 #ifdef DEBUG
 1168     fprintf(stderr, "ADIOI_Build_client_reqs:(agg,cur_ol_count=ol_count)=");
 1169     for (i = 0; i < nprocs; i++)
 1170     {
 1171     int tmp_agg_idx = ADIOI_Agg_idx(i, fd);
 1172     if (tmp_agg_idx >= 0)
 1173     {
 1174         fprintf(stderr, "(%d,%d=%d)", i, agg_ol_cur_ct_arr[i],
 1175             agg_ol_ct_arr[i]);
 1176         assert(agg_ol_ct_arr[i] == agg_ol_cur_ct_arr[i]);
 1177         if (tmp_agg_idx != fd->hints->cb_nodes - 1)
 1178         fprintf(stderr, ",");
 1179     }
 1180     }
 1181     fprintf(stderr, "\n");
 1182 #endif
 1183 
 1184 #ifdef DEBUG2
 1185     for (i = 0; i < nprocs; i++)
 1186     {
 1187     if (agg_ol_ct_arr[i] > 0)
 1188     {
 1189         fprintf(stderr, "ADIOI_Build_client_reqs: p %d (off,len) = ", i);
 1190         for (j = 0; j < agg_ol_ct_arr[i]; j++)
 1191         {
 1192         fprintf(stderr, "[%d](%d,%d) ", j,
 1193             agg_disp_arr[i][j],
 1194             agg_blk_arr[i][j]);
 1195         }
 1196         fprintf(stderr, "\n");
 1197     }
 1198     }
 1199 #endif
 1200 
 1201     /* Create all the aggregator MPI_Datatypes */
 1202     for (i = 0; i < nprocs; i++)
 1203     {
 1204     if (agg_comm_sz_arr[i] > 0)
 1205     {
 1206         MPI_Type_create_hindexed(agg_ol_ct_arr[i], agg_blk_arr[i],
 1207                                      agg_disp_arr[i], MPI_BYTE,
 1208                                      &(agg_comm_dtype_arr[i]));
 1209             MPI_Type_commit(&(agg_comm_dtype_arr[i]));
 1210     }
 1211     else
 1212     {
 1213         agg_comm_dtype_arr[i] = MPI_BYTE;
 1214     }
 1215     ADIOI_Free(agg_blk_arr[i]);
 1216     ADIOI_Free(agg_disp_arr[i]);
 1217     }
 1218     ADIOI_Free(agg_blk_arr);
 1219     ADIOI_Free(agg_disp_arr);
 1220 
 1221     ADIOI_Free(agg_mem_next_off_arr);
 1222     ADIOI_Free(agg_comm_cur_sz_arr);
 1223     ADIOI_Free(agg_ol_ct_arr);
 1224     ADIOI_Free(agg_ol_cur_ct_arr);
 1225 #ifdef AGGREGATION_PROFILE
 1226     MPE_Log_event (5019, 0, NULL);
 1227 #endif    
 1228     return 0;
 1229 }
 1230 /* ADIOI_Build_client_pre_req allows a client to calculate the memtype
 1231  * offset-length pairs up (up to a limit - max_pre_req_sz or max
 1232  * ol_ct). It basically allows ADIOI_Build_client_req to do less work.
 1233  * If it called and there already exist some preprocessed memtype
 1234  * offset-length pairs, it will exit immediately if a limit has been
 1235  * reached or if will add on the old limites to reach the new
 1236  * limits. */
 1237 
 1238 int ADIOI_Build_client_pre_req(ADIO_File fd,
 1239                    int agg_rank, int agg_idx,
 1240                    view_state *my_mem_view_state_p,
 1241                    view_state *agg_file_view_state_p,
 1242                    ADIO_Offset max_pre_req_sz,
 1243                    int max_ol_ct)
 1244 {
 1245     ADIO_Offset act_reg_sz = 0, tmp_reg_sz = 0;
 1246     ADIO_Offset cur_off = -1, cur_reg_max_len = -1;
 1247     ADIO_Offset agg_mem_st_reg = 0, agg_mem_act_reg_sz = 0;
 1248     int agg_ol_ct = 0, agg_ol_cur_ct = 0;
 1249     int i, agg_next_off_idx = -1;
 1250 
 1251     ADIO_Offset cur_sz = 0, max_sz = 0, agg_mem_next_off = -1;
 1252     ADIO_Offset fill_st_reg = -1, fill_reg_sz = -1;
 1253     ADIO_Offset *fr_st_off_arr = fd->file_realm_st_offs;
 1254     MPI_Datatype *fr_type_arr = fd->file_realm_types;
 1255     MPI_Aint *tmp_disp_arr = NULL;
 1256     int *tmp_blk_arr = NULL, exit_loop = -1;
 1257     flatten_state *tmp_mem_state_p = NULL, *tmp_file_state_p = NULL;
 1258 #ifdef DTYPE_SKIP
 1259     int skip_type_ct;
 1260 #endif
 1261     if (agg_idx < 0 || agg_idx >= fd->hints->cb_nodes)
 1262     {
 1263         fprintf(stderr, "ADIOI_Build_client_pre_req: Invalid agg_idx %d\n",
 1264         agg_idx);
 1265         return -1;
 1266     }
 1267 
 1268     if (agg_file_view_state_p->cur_state.cur_sz == 
 1269     agg_file_view_state_p->sz || max_pre_req_sz <= 0 ||
 1270     max_ol_ct <= 0)
 1271     {
 1272 #ifdef DEBUG1
 1273     fprintf(stderr, 
 1274         "ADIOI_Build_client_pre_req: Nothing to preprocess\n");
 1275 #endif
 1276     return 0;
 1277     }
 1278 
 1279     /* The new limits have already been surpassed by what already
 1280      * exists.  Otherwise we will use the next restrictions */
 1281     if ((my_mem_view_state_p->pre_sz >= max_pre_req_sz) ||
 1282     (my_mem_view_state_p->pre_ol_ct >= max_ol_ct))
 1283     {
 1284 #ifdef DEBUG1
 1285     fprintf(stderr, 
 1286         "ADIOI_Build_client_pre_req:  Old values surpass new "
 1287         "pre_req values\n");
 1288 #endif
 1289     return 0;
 1290     }
 1291     
 1292     /* General idea is to first advance the filetype to the file realm
 1293      * and then the memtype to the filetype.  The memtype is advanced
 1294      * further by peeking at the filetype and then the filetype is
 1295      * advanced. */
 1296     for (i = 0; i < MAX_OFF_TYPE; i++)
 1297     {
 1298     switch(i)
 1299     {
 1300         case TEMP_OFF:
 1301         tmp_mem_state_p  = &(my_mem_view_state_p->tmp_state);
 1302         tmp_file_state_p = &(agg_file_view_state_p->tmp_state);
 1303         break;
 1304         case REAL_OFF:
 1305         tmp_mem_state_p  = &(my_mem_view_state_p->cur_state);
 1306         tmp_file_state_p = &(agg_file_view_state_p->cur_state);
 1307         break;
 1308         default:
 1309         fprintf(stderr, "ADIOI_Build_client_pre_req: "
 1310             "Invalid off type %d\n", i);
 1311     }
 1312 
 1313     if (i == TEMP_OFF && my_mem_view_state_p->pre_sz > 0)
 1314     {
 1315         cur_sz = my_mem_view_state_p->pre_sz;
 1316             agg_ol_ct = my_mem_view_state_p->pre_ol_ct;
 1317         /* Save the old arrays */
 1318         tmp_disp_arr = my_mem_view_state_p->pre_disp_arr;
 1319         tmp_blk_arr  = my_mem_view_state_p->pre_blk_arr;
 1320         my_mem_view_state_p->pre_disp_arr = NULL;
 1321         my_mem_view_state_p->pre_blk_arr  = NULL;
 1322             agg_mem_next_off =
 1323         tmp_disp_arr[agg_ol_ct - 1] + tmp_blk_arr[agg_ol_ct - 1];
 1324     }
 1325     else if (i == REAL_OFF && my_mem_view_state_p->pre_sz > 0)
 1326     {
 1327         cur_sz = my_mem_view_state_p->pre_sz;
 1328         agg_ol_cur_ct = my_mem_view_state_p->pre_ol_ct;
 1329         
 1330         /* Copy the old data to the new data, freeing the old
 1331          * arrays */
 1332         memcpy(my_mem_view_state_p->pre_disp_arr, tmp_disp_arr, 
 1333            my_mem_view_state_p->pre_ol_ct * sizeof(MPI_Aint));
 1334         memcpy(my_mem_view_state_p->pre_blk_arr, tmp_blk_arr, 
 1335            my_mem_view_state_p->pre_ol_ct * sizeof(int));
 1336 
 1337         ADIOI_Free(tmp_disp_arr);
 1338         ADIOI_Free(tmp_blk_arr);
 1339 
 1340         agg_mem_next_off = 
 1341         my_mem_view_state_p->pre_disp_arr[agg_ol_cur_ct - 1] +
 1342         my_mem_view_state_p->pre_blk_arr[agg_ol_cur_ct - 1];
 1343     }
 1344     else
 1345     {
 1346         cur_sz = 0;
 1347     }
 1348     
 1349     /* Max_pre_req_sz may be larger than the amount of data left
 1350      * to preprocess */
 1351     if (max_pre_req_sz - cur_sz > 
 1352         agg_file_view_state_p->sz - tmp_file_state_p->cur_sz)
 1353     {
 1354         max_sz = cur_sz +
 1355         agg_file_view_state_p->sz - tmp_file_state_p->cur_sz;
 1356     }
 1357     else
 1358         max_sz = max_pre_req_sz;
 1359     
 1360     assert(cur_sz != max_sz);
 1361 #ifdef DEBUG1
 1362     fprintf(stderr, 
 1363         "ADIOI_Build_client_pre_req: (cur_sz=%Ld,agg_ol_ct=%d,"
 1364         "agg_mem_next_off=%Ld,max_sz=%Ld,max_ol_ct=%d)\n", 
 1365         cur_sz, agg_ol_ct, agg_mem_next_off, max_sz, max_ol_ct);
 1366 #endif
 1367     while (cur_sz < max_sz)
 1368     {
 1369         find_next_off(fd, agg_file_view_state_p,
 1370               fr_st_off_arr[agg_rank],
 1371               &(fr_type_arr[agg_rank]),
 1372               i,
 1373               &cur_off,
 1374               &cur_reg_max_len);
 1375         
 1376         /* find_next_off may show that the file_view_state is done
 1377          * even if cur_sz != max_sz since find_next_off may
 1378          * advance the file_view_state to the end here and realize
 1379          * that it is done. */
 1380         if (cur_off == -1)
 1381         break;
 1382 
 1383         assert(cur_off != -1);
 1384         
 1385         /* Before translating the file regions to memory regions,
 1386          * we first must advance to the proper point in the
 1387          * mem_view_state for this aggregator to match the
 1388          * file_view_state. */
 1389         while (tmp_file_state_p->cur_sz != tmp_mem_state_p->cur_sz)
 1390         {
 1391 #ifdef DTYPE_SKIP
 1392         if (my_mem_view_state_p->flat_type_p->count > 1) {
 1393             /* let's see if we can skip whole memory datatypes */
 1394             skip_type_ct =
 1395             (tmp_file_state_p->cur_sz - tmp_mem_state_p->cur_sz) /
 1396             my_mem_view_state_p->type_sz;
 1397             if (skip_type_ct > 0) {
 1398             tmp_mem_state_p->cur_sz +=
 1399                 skip_type_ct * my_mem_view_state_p->type_sz;
 1400             tmp_mem_state_p->abs_off +=
 1401                 skip_type_ct * my_mem_view_state_p->ext;
 1402             if (tmp_mem_state_p->cur_sz ==
 1403                 tmp_file_state_p->cur_sz)
 1404                 break;
 1405             }
 1406         }
 1407 #endif
 1408         view_state_add_region(
 1409             tmp_file_state_p->cur_sz - tmp_mem_state_p->cur_sz,
 1410             my_mem_view_state_p,
 1411             &fill_st_reg,
 1412             &fill_reg_sz, i);
 1413         }
 1414 
 1415         /* Now that the filetype and memtype are advanced to the
 1416          * same position, add memtype ol-pairs while we have not
 1417          * overstepped the min(end of the current piece in the
 1418          * file view, end of the file realm, data left in
 1419          * max_sz) */
 1420         
 1421         if (cur_reg_max_len >  
 1422         view_state_get_next_len(agg_file_view_state_p, i))
 1423         cur_reg_max_len =  
 1424             view_state_get_next_len(agg_file_view_state_p, i);
 1425 
 1426         if (cur_reg_max_len > max_sz - cur_sz)
 1427         cur_reg_max_len = max_sz - cur_sz;
 1428 
 1429         assert(cur_reg_max_len > 0);
 1430 
 1431         /* Add memtype ol pairs while we have not passed
 1432          * cur_reg_max_len or the max number of ol pairs
 1433          * allowed */
 1434         act_reg_sz = 0;
 1435         exit_loop = 0;
 1436         while ((act_reg_sz < cur_reg_max_len) && 
 1437            (exit_loop == 0))
 1438         {
 1439         view_state_add_region(
 1440             cur_reg_max_len - act_reg_sz,
 1441             my_mem_view_state_p,
 1442             &agg_mem_st_reg, &agg_mem_act_reg_sz, 
 1443             i);
 1444         act_reg_sz += agg_mem_act_reg_sz;
 1445         
 1446 #ifdef DEBUG2
 1447         fprintf(stderr, "ADIOI_Build_client_pre_req: %s Mem region"
 1448             "(proc=%d,off=%Ld,sz=%Ld)\n",
 1449             off_type_name[i], agg_rank, agg_mem_st_reg, 
 1450             agg_mem_act_reg_sz);
 1451 #endif
 1452         switch(i)
 1453         {
 1454             case TEMP_OFF:
 1455             /* Increment the ol list count if the next
 1456              * region is not adjacent to the previous
 1457              * region. */
 1458             if (agg_mem_next_off != agg_mem_st_reg)
 1459             {
 1460                 agg_ol_ct++;
 1461                 if (agg_ol_ct == max_ol_ct)
 1462                 exit_loop = 1;
 1463             }
 1464             agg_mem_next_off = 
 1465                 agg_mem_st_reg + agg_mem_act_reg_sz;
 1466             break;
 1467             case REAL_OFF:
 1468             /* Set the ol list for the memtype that
 1469              * will map to our aggregator, coaslescing
 1470              * if possible. */
 1471             agg_next_off_idx = agg_ol_cur_ct;
 1472             if (agg_mem_next_off != agg_mem_st_reg)
 1473             {
 1474                 my_mem_view_state_p->
 1475                 pre_disp_arr[agg_next_off_idx] = 
 1476                 agg_mem_st_reg;
 1477                 my_mem_view_state_p->
 1478                 pre_blk_arr[agg_next_off_idx] = 
 1479                 agg_mem_act_reg_sz;
 1480                 agg_ol_cur_ct++;
 1481                 if (agg_ol_cur_ct == agg_ol_ct)
 1482                 exit_loop = 1;
 1483             }
 1484             else
 1485             {
 1486                 my_mem_view_state_p->
 1487                 pre_blk_arr[agg_next_off_idx - 1]
 1488                 += agg_mem_act_reg_sz;
 1489             }
 1490             agg_mem_next_off = 
 1491                 agg_mem_st_reg + agg_mem_act_reg_sz;
 1492             break;
 1493             default:
 1494             fprintf(stderr, "ADIOI_Build_client_pre_req: "
 1495                 "Impossible type\n");
 1496         }
 1497         }
 1498 
 1499         /* Advance the filetype flatten state appropriately to
 1500          * match the data advanced in the memtype flatten state.
 1501          * Should only take at most a single view_state_add_region
 1502          * call since the memtype cannot proceed beyond the end of
 1503          * a contig piece in the file type. */
 1504         view_state_add_region(act_reg_sz - tmp_reg_sz,
 1505                   agg_file_view_state_p,
 1506                   &fill_st_reg, &fill_reg_sz, i);
 1507 #ifdef DEBUG2
 1508         fprintf(stderr, "ADIOI_Build_client_pre_req: %s File region"
 1509             " (proc=%d,off=%Ld,sz=%Ld)\n",
 1510             off_type_name[i], agg_rank, fill_st_reg, fill_reg_sz);
 1511 #endif
 1512         if (fill_reg_sz != act_reg_sz)
 1513         {
 1514         fprintf(stderr, "ADIOI_Build_client_pre_req: "
 1515             "view_state_add_region failed to match the memtype\n");
 1516         return -1;
 1517         }
 1518         
 1519         cur_sz += act_reg_sz;
 1520     }
 1521     
 1522     /* On the first pass, allocate the memory structures for
 1523      * storing the preprocessed information */
 1524     if (i == TEMP_OFF)
 1525     {
 1526         if ((my_mem_view_state_p->pre_disp_arr = (MPI_Aint *)
 1527          ADIOI_Malloc(agg_ol_ct * sizeof(MPI_Aint))) == NULL)
 1528         {
 1529         fprintf(stderr, "ADIOI_Build_client_pre_req: malloc "
 1530                         "pre_disp_arr of size %ld failed\n",
 1531                         (long int)agg_ol_ct * sizeof(MPI_Aint));
 1532                 return -1;
 1533         }
 1534         if ((my_mem_view_state_p->pre_blk_arr = (int *) 
 1535          ADIOI_Malloc(agg_ol_ct * sizeof(int))) == NULL)
 1536         {
 1537         ADIOI_Free(my_mem_view_state_p->pre_disp_arr);
 1538         fprintf(stderr, "ADIOI_Build_client_pre_req: malloc "
 1539             "agg_blk_arr of size %ld failed\n",
 1540             (long int)agg_ol_ct * sizeof(int));
 1541         return -1;
 1542         }
 1543     }
 1544     }
 1545 
 1546     my_mem_view_state_p->pre_sz = cur_sz;
 1547     my_mem_view_state_p->pre_ol_ct = agg_ol_ct;
 1548 
 1549 #ifdef DEBUG1
 1550     fprintf(stderr, "ADIOI_Build_client_pre_req:(agg=%d,cur_ol_count=%d"
 1551         "=ol_count=%d)\n",
 1552         agg_rank, my_mem_view_state_p->pre_ol_ct, agg_ol_ct);
 1553 #endif
 1554 
 1555 #ifdef DEBUG2
 1556     if (agg_ol_ct > 0)
 1557     {
 1558     fprintf(stderr, "ADIOI_Build_client_pre_req: agg=%d,pre_sz=%Ld "
 1559         "(off,len) = \n", agg_rank, my_mem_view_state_p->pre_sz);
 1560     for (i = 0; i < my_mem_view_state_p->pre_ol_ct; i++)
 1561     {
 1562         fprintf(stderr, "[%d](%d,%d) ", i, 
 1563             my_mem_view_state_p->pre_disp_arr[i], 
 1564             my_mem_view_state_p->pre_blk_arr[i]);
 1565         if (i % 5 == 0 && i != 0)
 1566         fprintf(stderr, "\n");
 1567     }
 1568     fprintf(stderr, "\n");
 1569     }
 1570 #endif
 1571 
 1572     return 0;
 1573 }
 1574 
 1575 /* process_pre_req() allows ADIOI_Build_client_req to use the pre_req
 1576  * information. */
 1577 
 1578 static int process_pre_req(ADIO_File fd,
 1579                            int agg_rank,
 1580                            int agg_idx,
 1581                            view_state *my_mem_view_state_p,
 1582                            view_state *agg_file_view_state_p,
 1583                            ADIO_Offset agg_comm_sz,
 1584                int off_type,
 1585                MPI_Aint *agg_disp_arr,
 1586                int *agg_blk_arr,
 1587                ADIO_Offset *agg_comm_pre_sz_p,
 1588                ADIO_Offset *agg_comm_cur_sz_p,
 1589                ADIO_Offset *agg_comm_sz_p,
 1590                int *agg_ol_cur_ct_p,
 1591                int *agg_ol_ct_p,
 1592                ADIO_Offset *agg_mem_next_off_p)
 1593 {
 1594     int i, has_partial = 0;
 1595     MPI_Aint partial_disp = 0;
 1596     int partial_len = 0;
 1597     ADIO_Offset tmp_agg_comm_pre_sz = 0;
 1598 
 1599     assert (my_mem_view_state_p->pre_sz > 0);
 1600     switch(off_type)
 1601     {
 1602     case TEMP_OFF:
 1603         /* Use only some of the precalculated data */
 1604         if (my_mem_view_state_p->pre_sz > *agg_comm_sz_p)
 1605         {
 1606         for (i = 0; i < my_mem_view_state_p->pre_ol_ct; i++)
 1607         {
 1608             if ((my_mem_view_state_p->pre_blk_arr[i] + 
 1609              *agg_comm_pre_sz_p) > *agg_comm_sz_p)
 1610             {
 1611             has_partial = 1;
 1612             partial_len = *agg_comm_sz_p - *agg_comm_pre_sz_p;
 1613             *agg_comm_pre_sz_p = *agg_comm_sz_p;
 1614             i++;
 1615             break;
 1616             }
 1617             else if ((my_mem_view_state_p->pre_blk_arr[i] +
 1618                   *agg_comm_pre_sz_p) == *agg_comm_sz_p)
 1619             {
 1620             *agg_comm_pre_sz_p += 
 1621                 my_mem_view_state_p->pre_blk_arr[i];
 1622             i++;
 1623             break;
 1624             }
 1625             else
 1626             *agg_comm_pre_sz_p += 
 1627                 my_mem_view_state_p->pre_blk_arr[i];
 1628         }
 1629         
 1630         if (has_partial == 1)
 1631         {
 1632             *agg_mem_next_off_p = 
 1633             my_mem_view_state_p->pre_disp_arr[i - 1] + 
 1634             partial_len;
 1635         }
 1636         else
 1637         {
 1638             *agg_mem_next_off_p = 
 1639             my_mem_view_state_p->pre_disp_arr[i - 1] + 
 1640             my_mem_view_state_p->pre_blk_arr[i - 1];
 1641         }
 1642         
 1643         *agg_comm_cur_sz_p = *agg_comm_pre_sz_p;
 1644         *agg_ol_ct_p = i;
 1645         
 1646         }
 1647         else /* Use all the precalculated data */
 1648         {
 1649         *agg_comm_pre_sz_p = my_mem_view_state_p->pre_sz;
 1650         *agg_comm_cur_sz_p = *agg_comm_pre_sz_p;
 1651         *agg_ol_ct_p = my_mem_view_state_p->pre_ol_ct;
 1652         *agg_mem_next_off_p = 
 1653             my_mem_view_state_p->pre_disp_arr[
 1654             my_mem_view_state_p->pre_ol_ct - 1] +
 1655             my_mem_view_state_p->pre_blk_arr[
 1656             my_mem_view_state_p->pre_ol_ct - 1];
 1657         }
 1658 #ifdef DEBUG1
 1659         fprintf(stderr, "process_pre_req: TEMP_OFF "
 1660             "agg_comm_pre_sz=%Ld,agg_comm_cur_sz=%Ld,agg_ol_ct=%d\n",
 1661             *agg_comm_pre_sz_p, *agg_comm_cur_sz_p, *agg_ol_ct_p);
 1662 #endif
 1663         assert(*agg_comm_cur_sz_p <= *agg_comm_sz_p);
 1664         break;
 1665     case REAL_OFF:
 1666         /* Set the ol list for the memtype that will map to our
 1667          * aggregator, coaslescing if possible. */
 1668         for (i = 0; i < my_mem_view_state_p->pre_ol_ct; i++)
 1669         {
 1670         agg_disp_arr[i] = my_mem_view_state_p->pre_disp_arr[i];
 1671         agg_blk_arr[i]  = my_mem_view_state_p->pre_blk_arr[i];
 1672         
 1673         if ((my_mem_view_state_p->pre_blk_arr[i] + 
 1674              tmp_agg_comm_pre_sz) > *agg_comm_pre_sz_p)
 1675         {
 1676             has_partial = 1;
 1677             agg_blk_arr[i] = *agg_comm_pre_sz_p - tmp_agg_comm_pre_sz;
 1678             tmp_agg_comm_pre_sz = *agg_comm_pre_sz_p;
 1679             partial_disp = my_mem_view_state_p->pre_disp_arr[i] +
 1680             agg_blk_arr[i];
 1681             partial_len  = my_mem_view_state_p->pre_blk_arr[i] - 
 1682             agg_blk_arr[i];
 1683             i++;
 1684             break;
 1685         }
 1686         else if ((my_mem_view_state_p->pre_blk_arr[i] +
 1687               tmp_agg_comm_pre_sz) == *agg_comm_pre_sz_p)
 1688         {
 1689             tmp_agg_comm_pre_sz +=  
 1690             my_mem_view_state_p->pre_blk_arr[i];
 1691             i++;
 1692             break;
 1693         }
 1694         else
 1695             tmp_agg_comm_pre_sz +=
 1696             my_mem_view_state_p->pre_blk_arr[i];
 1697         }
 1698         *agg_mem_next_off_p = agg_disp_arr[i - 1] + agg_blk_arr[i - 1];
 1699         *agg_ol_cur_ct_p = i;
 1700         *agg_comm_cur_sz_p = *agg_comm_pre_sz_p;
 1701         
 1702         /* Clean up the ol pairs we used */     
 1703         if ((i < my_mem_view_state_p->pre_ol_ct) || (has_partial == 1))
 1704         {
 1705         int remain_ol_ct = 
 1706             my_mem_view_state_p->pre_ol_ct - i + has_partial;
 1707         MPI_Aint *new_pre_disp_arr = NULL;
 1708         int *new_pre_blk_arr = NULL;
 1709         
 1710         if ((new_pre_disp_arr = (MPI_Aint *)
 1711              ADIOI_Malloc(remain_ol_ct * sizeof(MPI_Aint))) == NULL)
 1712         {
 1713             fprintf(stderr, "process_pre_req: malloc "
 1714                 "new_pre_disp_arr failed\n");
 1715             return -1;
 1716         }
 1717         if ((new_pre_blk_arr = (int *)
 1718              ADIOI_Malloc(remain_ol_ct * sizeof(int))) == NULL)
 1719                 {
 1720                     fprintf(stderr, "process_pre_req: malloc "
 1721                             "new_pre_blk_arr failed\n");
 1722                     return -1;
 1723                 }
 1724         
 1725         memcpy(new_pre_disp_arr, 
 1726                &(my_mem_view_state_p->pre_disp_arr[i - has_partial]),
 1727                remain_ol_ct * sizeof(MPI_Aint));
 1728         memcpy(new_pre_blk_arr, 
 1729                &(my_mem_view_state_p->pre_blk_arr[i - has_partial]),
 1730                remain_ol_ct * sizeof(int));
 1731         
 1732         /* Set the partial len of the first piece */
 1733         if (has_partial == 1)
 1734         {
 1735             /* new_pre_disp_arr[remain_ol_ct - 1] = partial_disp;
 1736                new_pre_blk_arr[remain_ol_ct - 1]  = partial_len; */
 1737             new_pre_disp_arr[0] = partial_disp;
 1738             new_pre_blk_arr[0]  = partial_len;
 1739         }
 1740         
 1741         ADIOI_Free(my_mem_view_state_p->pre_disp_arr);
 1742         ADIOI_Free(my_mem_view_state_p->pre_blk_arr);
 1743         
 1744         my_mem_view_state_p->pre_disp_arr = new_pre_disp_arr;
 1745         my_mem_view_state_p->pre_blk_arr  = new_pre_blk_arr;
 1746         my_mem_view_state_p->pre_ol_ct = remain_ol_ct;
 1747         my_mem_view_state_p->pre_sz -= *agg_comm_pre_sz_p;
 1748         }
 1749         else /* Used all the precalculated ol pairs */
 1750         {
 1751         ADIOI_Free(my_mem_view_state_p->pre_disp_arr);
 1752         ADIOI_Free(my_mem_view_state_p->pre_blk_arr);
 1753         
 1754         my_mem_view_state_p->pre_disp_arr = NULL;
 1755         my_mem_view_state_p->pre_blk_arr = NULL;
 1756         my_mem_view_state_p->pre_ol_ct = 0;
 1757         my_mem_view_state_p->pre_sz = 0;
 1758         }
 1759 #ifdef DEBUG1
 1760         fprintf(stderr, "process_pre_req: REAL_OFF "
 1761             "agg_comm_pre_sz=%Ld,agg_comm_cur_sz=%Ld,agg_ol_ct=%d,"
 1762             "agg_ol_cur_ct=%d\n",
 1763             *agg_comm_pre_sz_p, *agg_comm_cur_sz_p, *agg_ol_ct_p, 
 1764             *agg_ol_cur_ct_p);
 1765 #endif
 1766         break;
 1767     default:
 1768         fprintf(stderr, "process_pre_req: Invalid off_type %d\n",
 1769             off_type);
 1770     }
 1771     return 0;
 1772 }
 1773 
 1774 /* ADIOI_Build_client_req() creates a memory datatype to transfer data
 1775  * to/from a particular aggregator. */
 1776 
 1777 int ADIOI_Build_client_req(ADIO_File fd,
 1778                int agg_rank,
 1779                int agg_idx,
 1780                view_state *my_mem_view_state_p,
 1781                view_state *agg_file_view_state_p,
 1782                ADIO_Offset agg_comm_sz,
 1783                MPI_Datatype *agg_comm_dtype_p)
 1784 {
 1785     MPI_Aint *agg_disp_arr = NULL;
 1786     int *agg_blk_arr = NULL;
 1787     ADIO_Offset st_reg = 0, act_reg_sz = 0, tmp_reg_sz = 0;
 1788     ADIO_Offset cur_off = -1, cur_reg_max_len = -1;
 1789     ADIO_Offset agg_mem_st_reg = 0, agg_mem_act_reg_sz = 0;
 1790     int agg_ol_ct = 0, agg_ol_cur_ct = 0;
 1791     int i = 0, agg_next_off_idx = -1;
 1792     ADIO_Offset agg_mem_next_off = 0, agg_comm_cur_sz = 0, agg_comm_pre_sz = 0;
 1793     ADIO_Offset *fr_st_off_arr = fd->file_realm_st_offs;
 1794     MPI_Datatype *fr_type_arr = fd->file_realm_types;
 1795     flatten_state *tmp_mem_state_p = NULL, *tmp_file_state_p = NULL;
 1796 #ifdef DTYPE_SKIP
 1797     int skip_type_ct;
 1798 #endif
 1799 
 1800     if (agg_idx < 0 || agg_idx >= fd->hints->cb_nodes)
 1801     {
 1802 #ifdef DEBUG1
 1803     fprintf(stderr, "ADIOI_Build_client_req: agg_rank %d does not map "
 1804         "to a valid node in cb_node\n", agg_rank);
 1805 #endif
 1806     return 0;
 1807     }
 1808 
 1809 #ifdef AGGREGATION_PROFILE
 1810     MPE_Log_event (5018, 0, NULL);
 1811 #endif
 1812 
 1813 #ifdef DEBUG1
 1814     fprintf(stderr, "ADIOI_Build_client_req:(agg=%d,size_req=%Ld)\n",
 1815         agg_idx, agg_comm_sz);
 1816 #endif
 1817     
 1818     /* On the first pass see how many offset-length pairs are
 1819      * necessary for each aggregator.  Then allocate the correct
 1820      * amount of offset-length pairs for handling each aggregator's
 1821      * particular data size.  On the last pass, we actually create the
 1822      * offset-length pairs. */
 1823     for (i = 0; i < MAX_OFF_TYPE; i++)
 1824     {
 1825     switch(i)
 1826     {
 1827         case TEMP_OFF:
 1828         tmp_mem_state_p  = &(my_mem_view_state_p->tmp_state);
 1829         tmp_file_state_p = &(agg_file_view_state_p->tmp_state);
 1830         break;
 1831         case REAL_OFF:
 1832         tmp_mem_state_p  = &(my_mem_view_state_p->cur_state);
 1833         tmp_file_state_p = &(agg_file_view_state_p->cur_state);
 1834         break;
 1835         default:
 1836         fprintf(stderr, "ADIOI_Build_client_pre_req: "
 1837             "Invalid off type %d\n", i);
 1838     }
 1839 
 1840     agg_comm_cur_sz = 0;
 1841     agg_mem_next_off = -1;
 1842 
 1843     /* First try to preprocess anything we can */
 1844     if (my_mem_view_state_p->pre_sz > 0)
 1845     {
 1846         process_pre_req(fd,
 1847                 agg_rank,
 1848                 agg_idx,
 1849                 my_mem_view_state_p,
 1850                 agg_file_view_state_p,
 1851                 agg_comm_sz,
 1852                 i,
 1853                 agg_disp_arr,
 1854                 agg_blk_arr,
 1855                 &agg_comm_pre_sz,
 1856                 &agg_comm_cur_sz,
 1857                 &agg_comm_sz,
 1858                 &agg_ol_cur_ct,
 1859                 &agg_ol_ct,
 1860                 &agg_mem_next_off);
 1861     }
 1862     
 1863     while (agg_comm_cur_sz < agg_comm_sz)
 1864     {   
 1865         find_next_off(fd, agg_file_view_state_p,
 1866               fr_st_off_arr[agg_idx],
 1867               &(fr_type_arr[agg_idx]),
 1868               i,
 1869               &cur_off,
 1870               &cur_reg_max_len);
 1871         
 1872         assert(cur_off != -1);
 1873         
 1874         /* Add up to the end of the file realm or as many bytes
 1875          * are left for this particular aggregator in the client's
 1876          * filetype */
 1877         if (cur_reg_max_len > (agg_comm_sz - agg_comm_cur_sz))
 1878         {
 1879         cur_reg_max_len = agg_comm_sz - agg_comm_cur_sz;
 1880         }
 1881         assert(cur_reg_max_len > 0);
 1882     
 1883         view_state_add_region(
 1884         cur_reg_max_len,
 1885         agg_file_view_state_p,
 1886         &st_reg, &act_reg_sz, i);
 1887         
 1888 #ifdef DEBUG2
 1889         fprintf(stderr, "ADIOI_Build_client_req: %s File region"
 1890             " (proc=%d,off=%Ld,sz=%Ld)\n",
 1891             off_type_name[i], agg_rank, cur_off, act_reg_sz);
 1892 #endif
 1893         
 1894         /* Before translating the file regions to memory regions,
 1895          * we first must advance to the proper point in the
 1896          * mem_view_state for this aggregator to match the
 1897          * file_view_state. */
 1898         
 1899         assert(tmp_file_state_p->cur_sz - act_reg_sz >= 
 1900            tmp_mem_state_p->cur_sz);
 1901         
 1902         while (tmp_file_state_p->cur_sz - act_reg_sz != 
 1903            tmp_mem_state_p->cur_sz)
 1904         {
 1905         ADIO_Offset fill_st_reg = -1, fill_reg_sz = -1;
 1906 #ifdef DTYPE_SKIP
 1907         if (my_mem_view_state_p->flat_type_p->count > 1) {
 1908             /* let's see if we can skip whole memory datatypes */
 1909             skip_type_ct =
 1910             (tmp_file_state_p->cur_sz - act_reg_sz -
 1911              tmp_mem_state_p->cur_sz) /
 1912             my_mem_view_state_p->type_sz;
 1913             if (skip_type_ct > 0) {
 1914             tmp_mem_state_p->cur_sz +=
 1915                 skip_type_ct * my_mem_view_state_p->type_sz;
 1916             tmp_mem_state_p->abs_off +=
 1917                 skip_type_ct * my_mem_view_state_p->ext;
 1918             if ((tmp_mem_state_p->cur_sz - act_reg_sz) ==
 1919                 tmp_file_state_p->cur_sz)
 1920                 break;
 1921             }
 1922         }
 1923 #endif
 1924         view_state_add_region(
 1925             tmp_file_state_p->cur_sz - 
 1926             act_reg_sz - tmp_mem_state_p->cur_sz,
 1927             my_mem_view_state_p,
 1928             &fill_st_reg,
 1929             &fill_reg_sz, i);
 1930         }
 1931         
 1932         /* Based on how large the act_reg_sz is, first figure
 1933          * out how many memory offset-length pairs are
 1934          * necessary and then set the offset-length pairs. */
 1935         tmp_reg_sz = 0;
 1936         while (tmp_reg_sz != act_reg_sz)
 1937         {
 1938         view_state_add_region(
 1939             act_reg_sz - tmp_reg_sz,
 1940             my_mem_view_state_p,
 1941             &agg_mem_st_reg, &agg_mem_act_reg_sz, 
 1942             i);
 1943         tmp_reg_sz += agg_mem_act_reg_sz;
 1944         
 1945 #ifdef DEBUG2
 1946         fprintf(stderr, "ADIOI_Build_client_req: %s Mem region"
 1947             "(off=%Ld,sz=%Ld)\n",
 1948             off_type_name[i], agg_mem_st_reg, 
 1949             agg_mem_act_reg_sz);
 1950 #endif
 1951         agg_comm_cur_sz += agg_mem_act_reg_sz;
 1952         switch(i)
 1953         {
 1954             case TEMP_OFF:
 1955             /* Increment the ol list count if the next
 1956              * region is not adjacent to the previous
 1957              * region. */
 1958             if (agg_mem_next_off != agg_mem_st_reg)
 1959             {
 1960                 agg_ol_ct++;
 1961             }
 1962             agg_mem_next_off = 
 1963                 agg_mem_st_reg + agg_mem_act_reg_sz;
 1964             break;
 1965             case REAL_OFF:
 1966             /* Set the ol list for the memtype that
 1967              * will map to our aggregator, coaslescing
 1968              * if possible. */
 1969             agg_next_off_idx = agg_ol_cur_ct;
 1970             if (agg_mem_next_off != agg_mem_st_reg)
 1971             {
 1972                 agg_disp_arr[agg_next_off_idx] = 
 1973                 agg_mem_st_reg;
 1974                 agg_blk_arr[agg_next_off_idx] = 
 1975                 agg_mem_act_reg_sz;
 1976                 agg_ol_cur_ct++;
 1977             }
 1978             else
 1979             {
 1980                 agg_blk_arr[agg_next_off_idx - 1]
 1981                 += agg_mem_act_reg_sz;
 1982             }
 1983             agg_mem_next_off = 
 1984                 agg_mem_st_reg + agg_mem_act_reg_sz;
 1985             break;
 1986             default:
 1987             fprintf(stderr, "ADIOI_Build_client_req: "
 1988                 "Impossible type\n");
 1989         }
 1990         }
 1991     }
 1992     
 1993     /* On the first pass, allocate the memory structures for
 1994      * creating the MPI_hindexed type. */
 1995     if (i == TEMP_OFF)
 1996     {       
 1997         /* Allocate offset-length pairs for creating hindexed
 1998          * MPI_Datatypes for each aggregator */
 1999         if ((agg_disp_arr = (MPI_Aint *) 
 2000          ADIOI_Malloc(agg_ol_ct * sizeof(MPI_Aint))) == NULL)
 2001         {
 2002         fprintf(stderr, "ADIOI_Build_client_req: malloc "
 2003             "agg_disp_arr of size %ld failed\n",
 2004             (long int)agg_ol_ct * sizeof(MPI_Aint));
 2005         return -1;
 2006         }
 2007         if ((agg_blk_arr = (int *) 
 2008          ADIOI_Malloc(agg_ol_ct * sizeof(int))) == NULL)
 2009         {
 2010         ADIOI_Free(agg_disp_arr);
 2011         fprintf(stderr, "ADIOI_Build_client_req: malloc "
 2012             "agg_blk_arr of size %ld failed\n",
 2013             (long int)agg_ol_ct * sizeof(int));
 2014         return -1;
 2015         }
 2016     }
 2017     }
 2018 
 2019     assert(agg_ol_ct == agg_ol_cur_ct);
 2020 #ifdef DEBUG1
 2021     fprintf(stderr, 
 2022         "ADIOI_Build_client_req:(agg=%d,cur_ol_count=%d=ol_count=%d)\n",
 2023         agg_rank, agg_ol_cur_ct, agg_ol_ct);
 2024 #endif
 2025 
 2026 #ifdef DEBUG2
 2027     if (agg_ol_ct > 0)
 2028     {
 2029     fprintf(stderr, "ADIOI_Build_client_req: p %d (off,len) = ", agg_rank);
 2030     for (i = 0; i < agg_ol_ct; i++)
 2031     {
 2032         fprintf(stderr, "[%d](%d,%d) ", i, 
 2033             agg_disp_arr[i], agg_blk_arr[i]);
 2034         if (i % 5 == 0 && i != 0)
 2035         fprintf(stderr, "\n");
 2036     }
 2037     fprintf(stderr, "\n");
 2038     }
 2039 #endif
 2040 #ifdef DEBUG1
 2041     fprintf(stderr, 
 2042         "ADIOI_Build_client_req:(agg=%d,pre_ol_count=%d)\n",
 2043         agg_idx, my_mem_view_state_p->pre_ol_ct);
 2044 #endif
 2045 
 2046 #ifdef DEBUG2
 2047     if (my_mem_view_state_p->pre_sz > 0)
 2048     {
 2049     fprintf(stderr, "ADIOI_Build_client_req: p %d pre(off,len) = ", 
 2050         agg_idx);
 2051     for (i = 0; i < my_mem_view_state_p->pre_ol_ct; i++)
 2052     {
 2053         fprintf(stderr, "[%d](%d,%d) ", i, 
 2054             my_mem_view_state_p->pre_disp_arr[i], 
 2055             my_mem_view_state_p->pre_blk_arr[i]);
 2056         if (i % 5 == 0 && i != 0)
 2057         fprintf(stderr, "\n");
 2058     }
 2059     fprintf(stderr, "\n");
 2060     }
 2061 #endif
 2062 
 2063     /* Create the aggregator MPI_Datatype */
 2064     if (agg_comm_sz > 0)
 2065     {
 2066     MPI_Type_create_hindexed(agg_ol_ct, agg_blk_arr, agg_disp_arr, MPI_BYTE,
 2067                      agg_comm_dtype_p);
 2068     MPI_Type_commit(agg_comm_dtype_p);
 2069     }
 2070     else
 2071     {
 2072     *agg_comm_dtype_p = MPI_BYTE;
 2073     }
 2074 
 2075     ADIOI_Free(agg_blk_arr);
 2076     ADIOI_Free(agg_disp_arr);
 2077 
 2078 #ifdef AGGREGATION_PROFILE
 2079     MPE_Log_event (5019, 0, NULL);
 2080 #endif    
 2081     return 0;
 2082 }
 2083 
 2084