"Fossies" - the Fresh Open Source Software Archive

Member "mvapich2-2.3.2/src/mpi/coll/red_scat_block_tuning.c" (8 Aug 2019, 13816 Bytes) of package /linux/misc/mvapich2-2.3.2.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "red_scat_block_tuning.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 2.3.1_vs_2.3.2.

    1 /* Copyright (c) 2001-2019, The Ohio State University. All rights
    2  * reserved.
    3  *
    4  * This file is part of the MVAPICH2 software package developed by the
    5  * team members of The Ohio State University's Network-Based Computing
    6  * Laboratory (NBCL), headed by Professor Dhabaleswar K. (DK) Panda.
    7  *
    8  * For detailed copyright and licensing information, please refer to the
    9  * copyright file COPYRIGHT in the top level MVAPICH2 directory.
   10  *
   11  */
   12 
   13 #include <regex.h>
   14 #include "red_scat_block_tuning.h"
   15 #include "mv2_arch_hca_detect.h"
   16 
   17 enum {
   18     RED_SCAT_BLOCK_RING = 1,
   19     RED_SCAT_BLOCK_RING_2LVL,
   20 };
   21 
   22 int mv2_size_red_scat_block_tuning_table = 0;
   23 mv2_red_scat_block_tuning_table *mv2_red_scat_block_thresholds_table = NULL;
   24 
   25 int MV2_set_red_scat_block_tuning_table(int heterogeneity, struct coll_info *colls_arch_hca)
   26 {
   27 #ifndef CHANNEL_PSM
   28     if (MV2_IS_ARCH_HCA_TYPE(MV2_get_arch_hca_type(),
   29         MV2_ARCH_INTEL_XEON_X5650_12, MV2_HCA_MLX_CX_EDR) && !heterogeneity){
   30         mv2_size_red_scat_block_tuning_table = 6;
   31         mv2_red_scat_block_thresholds_table = MPIU_Malloc(mv2_size_red_scat_block_tuning_table *
   32                                                   sizeof (mv2_red_scat_block_tuning_table));
   33         mv2_red_scat_block_tuning_table mv2_tmp_red_scat_block_thresholds_table[] = {
   34             {
   35                 12,
   36                 2,
   37                 {
   38                     {0, 65536, &MPIR_Reduce_scatter_block},
   39                     {65536, -1, &MPIR_Reduce_scatter_block_MV2},
   40                 },
   41             },
   42             {
   43                 24,
   44                 2,
   45                 {
   46                     {0, 65536, &MPIR_Reduce_scatter_block},
   47                     {65536, -1, &MPIR_Reduce_scatter_block_MV2},
   48                 },
   49             },
   50             {
   51                 48,
   52                 2,
   53                 {
   54                     {0, 65536, &MPIR_Reduce_scatter_block},
   55                     {65536, -1, &MPIR_Reduce_scatter_block_MV2},
   56                 },
   57             },
   58             {
   59                 96,
   60                 2,
   61                 {
   62                     {0, 65536, &MPIR_Reduce_scatter_block},
   63                     {65536, -1, &MPIR_Reduce_scatter_block_MV2},
   64                 },
   65             },
   66             {
   67                 192,
   68                 2,
   69                 {
   70                     {0, 65536, &MPIR_Reduce_scatter_block},
   71                     {65536, -1, &MPIR_Reduce_scatter_block_MV2},
   72                 },
   73             },
   74             {
   75                 384,
   76                 2,
   77                 {
   78                     {0, 65536, &MPIR_Reduce_scatter_block},
   79                     {65536, -1, &MPIR_Reduce_scatter_block_MV2},
   80                 },
   81             },
   82         }; 
   83         MPIU_Memcpy(mv2_red_scat_block_thresholds_table, mv2_tmp_red_scat_block_thresholds_table,
   84                   mv2_size_red_scat_block_tuning_table * sizeof (mv2_red_scat_block_tuning_table));
   85     } else if (MV2_IS_ARCH_HCA_TYPE(MV2_get_arch_hca_type(),
   86         MV2_ARCH_INTEL_XEON_E5_2680_16, MV2_HCA_MLX_CX_FDR) && !heterogeneity){
   87         mv2_size_red_scat_block_tuning_table = 6;
   88         mv2_red_scat_block_thresholds_table = MPIU_Malloc(mv2_size_red_scat_block_tuning_table *
   89                                                   sizeof (mv2_red_scat_block_tuning_table));
   90         mv2_red_scat_block_tuning_table mv2_tmp_red_scat_block_thresholds_table[] = {
   91             {
   92                 16,
   93                 2,
   94                 {
   95                     {0, 65536, &MPIR_Reduce_scatter_block},
   96                     {65536, -1, &MPIR_Reduce_scatter_block_MV2},
   97                 },
   98             },
   99             {
  100                 32,
  101                 2,
  102                 {
  103                     {0, 65536, &MPIR_Reduce_scatter_block},
  104                     {65536, -1, &MPIR_Reduce_scatter_block_MV2},
  105                 },
  106             },
  107             {
  108                 64,
  109                 2,
  110                 {
  111                     {0, 65536, &MPIR_Reduce_scatter_block},
  112                     {65536, -1, &MPIR_Reduce_scatter_block_MV2},
  113                 },
  114             },
  115             {
  116                 128,
  117                 2,
  118                 {
  119                     {0, 65536, &MPIR_Reduce_scatter_block},
  120                     {65536, -1, &MPIR_Reduce_scatter_block_MV2},
  121                 },
  122             },
  123             {
  124                 256,
  125                 2,
  126                 {
  127                     {0, 65536, &MPIR_Reduce_scatter_block},
  128                     {65536, -1, &MPIR_Reduce_scatter_block_MV2},
  129                 },
  130             },
  131             {
  132                 512,
  133                 2,
  134                 {
  135                     {0, 65536, &MPIR_Reduce_scatter_block},
  136                     {65536, -1, &MPIR_Reduce_scatter_block_MV2},
  137                 },
  138             },
  139 
  140         }; 
  141         MPIU_Memcpy(mv2_red_scat_block_thresholds_table, mv2_tmp_red_scat_block_thresholds_table,
  142                   mv2_size_red_scat_block_tuning_table * sizeof (mv2_red_scat_block_tuning_table));
  143     }  else if (MV2_IS_ARCH_HCA_TYPE(MV2_get_arch_hca_type(),
  144         MV2_ARCH_AMD_OPTERON_6136_32, MV2_HCA_MLX_CX_QDR) && !heterogeneity){
  145         mv2_size_red_scat_block_tuning_table = 6;
  146         mv2_red_scat_block_thresholds_table = MPIU_Malloc(mv2_size_red_scat_block_tuning_table *
  147                                                   sizeof (mv2_red_scat_block_tuning_table));
  148         mv2_red_scat_block_tuning_table mv2_tmp_red_scat_block_thresholds_table[] = {
  149             {
  150                 32,
  151                 2,
  152                 {
  153                     {0, 65536, &MPIR_Reduce_scatter_block},
  154                     {65536, -1, &MPIR_Reduce_scatter_block_MV2},
  155                 },
  156             },
  157             {
  158                 64,
  159                 2,
  160                 {
  161                     {0, 65536, &MPIR_Reduce_scatter_block},
  162                     {65536, -1, &MPIR_Reduce_scatter_block_MV2},
  163                 },
  164             },
  165             {
  166                 128,
  167                 2,
  168                 {
  169                     {0, 65536, &MPIR_Reduce_scatter_block},
  170                     {65536, -1, &MPIR_Reduce_scatter_block_MV2},
  171                 },
  172             },
  173             {
  174                 256,
  175                 2,
  176                 {
  177                     {0, 65536, &MPIR_Reduce_scatter_block},
  178                     {65536, -1, &MPIR_Reduce_scatter_block_MV2},
  179                 },
  180             },
  181             {
  182                 512,
  183                 2,
  184                 {
  185                     {0, 65536, &MPIR_Reduce_scatter_block},
  186                     {65536, -1, &MPIR_Reduce_scatter_block_MV2},
  187                 },
  188             },
  189             {
  190                 1024,
  191                 2,
  192                 {
  193                     {0, 65536, &MPIR_Reduce_scatter_block},
  194                     {65536, -1, &MPIR_Reduce_scatter_block_MV2},
  195                 },
  196             },
  197         }; 
  198         MPIU_Memcpy(mv2_red_scat_block_thresholds_table, mv2_tmp_red_scat_block_thresholds_table,
  199                   mv2_size_red_scat_block_tuning_table * sizeof (mv2_red_scat_block_tuning_table));
  200     } else
  201 
  202 
  203 #endif /* !CHANNEL_PSM */
  204     {
  205         mv2_size_red_scat_block_tuning_table = 7;
  206         mv2_red_scat_block_thresholds_table = MPIU_Malloc(mv2_size_red_scat_block_tuning_table *
  207                                                   sizeof (mv2_red_scat_block_tuning_table));
  208         mv2_red_scat_block_tuning_table mv2_tmp_red_scat_block_thresholds_table[] = {
  209             {
  210                 8,
  211                 2,
  212                 {
  213                     {0, 65536, &MPIR_Reduce_scatter_block},
  214                     {65536, -1, &MPIR_Reduce_scatter_block_MV2},
  215                 },
  216             },
  217             {
  218                 16,
  219                 2,
  220                 {
  221                     {0, 65536, &MPIR_Reduce_scatter_block},
  222                     {65536, -1, &MPIR_Reduce_scatter_block_MV2},
  223                 },
  224             },
  225             {
  226                 32,
  227                 2,
  228                 {
  229                     {0, 65536, &MPIR_Reduce_scatter_block},
  230                     {65536, -1, &MPIR_Reduce_scatter_block_MV2},
  231                 },
  232             },
  233             {
  234                 64,
  235                 2,
  236                 {
  237                     {0, 65536, &MPIR_Reduce_scatter_block},
  238                     {65536, -1, &MPIR_Reduce_scatter_block_MV2},
  239                 },
  240             },
  241             {
  242                 128,
  243                 2,
  244                 {
  245                     {0, 65536, &MPIR_Reduce_scatter_block},
  246                     {65536, -1, &MPIR_Reduce_scatter_block_MV2},
  247                 },
  248             },
  249             {
  250                 256,
  251                 2,
  252                 {
  253                     {0, 65536, &MPIR_Reduce_scatter_block},
  254                     {65536, -1, &MPIR_Reduce_scatter_block_MV2},
  255                 },
  256             },
  257             {
  258                 512,
  259                 2,
  260                 {
  261                     {0, 65536, &MPIR_Reduce_scatter_block},
  262                     {65536, -1, &MPIR_Reduce_scatter_block_MV2},
  263                 },
  264             },
  265         };
  266         MPIU_Memcpy(mv2_red_scat_block_thresholds_table, mv2_tmp_red_scat_block_thresholds_table,
  267                   mv2_size_red_scat_block_tuning_table * sizeof (mv2_red_scat_block_tuning_table));
  268 
  269     }
  270     return 0;
  271 }
  272 
  273 void MV2_cleanup_red_scat_block_tuning_table()
  274 {
  275     if (mv2_red_scat_block_thresholds_table != NULL) {
  276         MPIU_Free(mv2_red_scat_block_thresholds_table);
  277     }
  278 
  279 }
  280 
  281 /* Return the number of separator inside a string */
  282 static int count_sep(char *string)
  283 {
  284     return *string == '\0' ? 0 : (count_sep(string + 1) + (*string == ','));
  285 }
  286 
  287 int MV2_internode_Red_scat_block_is_define(char *mv2_user_red_scat_block_inter)
  288 {
  289     int i = 0;
  290     int nb_element = count_sep(mv2_user_red_scat_block_inter) + 1;
  291 
  292     /* If one red_scat tuning table is already defined */
  293     if (mv2_red_scat_block_thresholds_table != NULL) {
  294         MPIU_Free(mv2_red_scat_block_thresholds_table);
  295     }
  296 
  297     mv2_red_scat_block_tuning_table mv2_tmp_red_scat_block_thresholds_table[1];
  298     mv2_size_red_scat_block_tuning_table = 1;
  299 
  300     /* We realloc the space for the new red_scat tuning table */
  301     mv2_red_scat_block_thresholds_table = MPIU_Malloc(mv2_size_red_scat_block_tuning_table *
  302                                              sizeof (mv2_red_scat_block_tuning_table));
  303 
  304     if (nb_element == 1) {
  305 
  306         mv2_tmp_red_scat_block_thresholds_table[0].numproc = 1;
  307         mv2_tmp_red_scat_block_thresholds_table[0].size_inter_table = 1;
  308         mv2_tmp_red_scat_block_thresholds_table[0].inter_leader[0].min = 0;
  309         mv2_tmp_red_scat_block_thresholds_table[0].inter_leader[0].max = -1;
  310     
  311         switch (atoi(mv2_user_red_scat_block_inter)) {
  312         case RED_SCAT_BLOCK_RING:
  313             mv2_tmp_red_scat_block_thresholds_table[0].inter_leader[0].MV2_Red_scat_block_function =
  314                 &MPIR_Reduce_scatter_block_ring_MV2;
  315             break;
  316         case RED_SCAT_BLOCK_RING_2LVL:
  317             mv2_tmp_red_scat_block_thresholds_table[0].inter_leader[0].MV2_Red_scat_block_function =
  318                 &MPIR_Reduce_scatter_block_ring_2lvl_MV2;
  319             break;
  320         default:
  321             mv2_tmp_red_scat_block_thresholds_table[0].inter_leader[0].MV2_Red_scat_block_function =
  322                 &MPIR_Reduce_scatter_block_ring_MV2;
  323         }
  324         
  325     } else {
  326         char *dup, *p, *save_p;
  327         regmatch_t match[NMATCH];
  328         regex_t preg;
  329         const char *regexp = "([0-9]+):([0-9]+)-([0-9]+|\\+)";
  330 
  331         if (!(dup = MPIU_Strdup(mv2_user_red_scat_block_inter))) {
  332             fprintf(stderr, "failed to duplicate `%s'\n", mv2_user_red_scat_block_inter);
  333             return -1;
  334         }
  335 
  336         if (regcomp(&preg, regexp, REG_EXTENDED)) {
  337             fprintf(stderr, "failed to compile regexp `%s'\n", mv2_user_red_scat_block_inter);
  338             MPIU_Free(dup);
  339             return -1;
  340         }
  341 
  342         mv2_tmp_red_scat_block_thresholds_table[0].numproc = 1;
  343         mv2_tmp_red_scat_block_thresholds_table[0].size_inter_table = nb_element;
  344 
  345         i = 0;
  346         for (p = strtok_r(dup, ",", &save_p); p; p = strtok_r(NULL, ",", &save_p)) {
  347             if (regexec(&preg, p, NMATCH, match, 0)) {
  348                 fprintf(stderr, "failed to match on `%s'\n", p);
  349                 regfree(&preg);
  350                 MPIU_Free(dup);
  351                 return -1;
  352             }
  353             /* given () start at 1 */
  354             switch (atoi(p + match[1].rm_so)) {
  355                 case RED_SCAT_BLOCK_RING:
  356                     mv2_tmp_red_scat_block_thresholds_table[0].inter_leader[0].MV2_Red_scat_block_function =
  357                         &MPIR_Reduce_scatter_block_ring_MV2;
  358                     break;
  359                 case RED_SCAT_BLOCK_RING_2LVL:
  360                     mv2_tmp_red_scat_block_thresholds_table[0].inter_leader[0].MV2_Red_scat_block_function =
  361                         &MPIR_Reduce_scatter_block_ring_2lvl_MV2;
  362                     break;
  363                 default:
  364                     mv2_tmp_red_scat_block_thresholds_table[0].inter_leader[0].MV2_Red_scat_block_function =
  365                         &MPIR_Reduce_scatter_block_ring_MV2;
  366             }
  367 
  368             mv2_tmp_red_scat_block_thresholds_table[0].inter_leader[i].min = atoi(p +
  369                                                                          match[2].rm_so);
  370             if (p[match[3].rm_so] == '+') {
  371                 mv2_tmp_red_scat_block_thresholds_table[0].inter_leader[i].max = -1;
  372             } else {
  373                 mv2_tmp_red_scat_block_thresholds_table[0].inter_leader[i].max =
  374                     atoi(p + match[3].rm_so);
  375                 }
  376             i++;
  377         }
  378         MPIU_Free(dup);
  379         regfree(&preg);
  380     }
  381     MPIU_Memcpy(mv2_red_scat_block_thresholds_table, mv2_tmp_red_scat_block_thresholds_table, sizeof
  382                 (mv2_red_scat_block_tuning_table));
  383     return 0;
  384 }