"Fossies" - the Fresh Open Source Software Archive

Member "mvapich2-2.3.2/src/mpid/ch3/channels/psm/src/psm_entry.c" (8 Aug 2019, 36401 Bytes) of package /linux/misc/mvapich2-2.3.2.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "psm_entry.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 2.3.1_vs_2.3.2.

    1 /* Copyright (c) 2001-2019, The Ohio State University. All rights
    2  * reserved.
    3  * Copyright (c) 2016, Intel, Inc. All rights reserved.
    4  *
    5  * This file is part of the MVAPICH2 software package developed by the
    6  * team members of The Ohio State University's Network-Based Computing
    7  * Laboratory (NBCL), headed by Professor Dhabaleswar K. (DK) Panda.
    8  *
    9  * For detailed copyright and licensing information, please refer to the
   10  * copyright file COPYRIGHT in the top level MVAPICH2 directory.
   11  *
   12  */
   13 #include <unistd.h>
   14 #include "psmpriv.h"
   15 #include "psm_vbuf.h"
   16 #include <dirent.h>
   17 #include "coll_shmem.h"
   18 #include "debug_utils.h"
   19 #include "mv2_utils.h"
   20 #include <mv2_arch_hca_detect.h>
   21 #include <upmi.h>
   22 
   23 extern int g_mv2_num_cpus;
   24 
   25 volatile unsigned int MPIDI_CH3I_progress_completion_count = 0; //ODOT: what is this ?
   26 volatile int MPIDI_CH3I_progress_blocked = FALSE;
   27 volatile int MPIDI_CH3I_progress_wakeup_signalled = FALSE;
   28 
   29 /* Globals */
   30 /* psm device instance */
   31 struct psmdev_info_t    psmdev_cw;
   32 uint32_t                ipath_rndv_thresh = DEFAULT_IPATH_RNDV_THRESH;
   33 uint32_t                mv2_hfi_rndv_thresh = DEFAULT_PSM_HFI_RNDV_THRESH;
   34 uint32_t                mv2_shm_rndv_thresh = DEFAULT_PSM_SHM_RNDV_THRESH;
   35 uint8_t                 ipath_debug_enable;
   36 uint32_t                ipath_dump_frequency;
   37 uint8_t                 ipath_enable_func_lock;
   38 uint32_t                ipath_progress_yield_count;
   39 size_t                  ipath_max_transfer_size = DEFAULT_IPATH_MAX_TRANSFER_SIZE;
   40 int g_mv2_show_env_info = 0;
   41 int mv2_psm_bcast_uuid  = 0;
   42 int mv2_use_pmi_ibarrier = 0;
   43 int mv2_use_on_demand_cm = 0;
   44 int mv2_homogeneous_cluster = 0;
   45 int mv2_on_demand_threshold = MPIDI_PSM_DEFAULT_ON_DEMAND_THRESHOLD;
   46 mv2_arch_hca_type g_mv2_arch_hca_type = 0;
   47 
   48 /* Number of retry attempts if psm_ep_open fails */
   49 static int mv2_psm_ep_open_retry_count = 10;
   50 /* Number of seconds to sleep between psm_ep_open retries */
   51 static int mv2_psm_ep_open_retry_secs  = 10;
   52 
   53 int mv2_pmi_max_keylen;
   54 int mv2_pmi_max_vallen;
   55 char *mv2_pmi_key;
   56 char *mv2_pmi_val;
   57 
   58 static char    scratch[WRBUFSZ];
   59 static char             *kvsid;
   60 static PSM_UUID_T       psm_uuid;
   61 
   62 static void psm_read_user_params(void);
   63 static int  psm_bcast_uuid(int pg_size, int pg_rank);
   64 static int  psm_create_uuid(void);
   65 static int  psm_start_epid_exchange(PSM_EPID_T myid, int pg_size, int pg_rank);
   66 static void psm_other_init(MPIDI_PG_t *pg);
   67 static void psm_preinit(MPIDI_PG_t *pg);
   68 static int  decode(unsigned s_len, char *src, unsigned d_len, char *dst);
   69 static int  encode(unsigned s_len, char *src, unsigned d_len, char *dst);
   70 static int psm_connect_alltoall(PSM_EPADDR_T *addrs, int pg_size, int pg_rank);
   71 static int psm_detect_heterogeneity(mv2_arch_hca_type myarch, int pg_size, int pg_rank);
   72 static unsigned int psm_hash_str(char *str);
   73 
   74 extern void MPIDI_CH3I_SHMEM_COLL_Cleanup();
   75 
   76 /* ensure that all procs have completed their call to psm_mq_init */
   77 static int psm_mq_init_barrier(PSM_MQ_T mq, int rank, int ranks, PSM_EPADDR_T* addrs)
   78 {
   79     int tmp_rc;
   80     int rc = PSM_OK;
   81 
   82     /* implement barrier dissemination algorithm */
   83     int dist = 1;
   84     while (dist < ranks) {
   85         /* compute rank of source for this phase */
   86         int src = rank - dist;
   87         if (src < 0) {
   88             src += ranks;
   89         }
   90 
   91         /* compute rank of destination for this phase */
   92         int dst = rank + dist;
   93         if (dst >= ranks) {
   94             dst -= ranks;
   95         }
   96 
   97         /* post non-blocking receive for message with tag equal to source rank plus one */
   98         #if PSM_VERNO >= PSM_2_1_VERSION
   99             psm2_mq_tag_t rtag, rtagsel;
  100             rtagsel.tag0 = MQ_TAGSEL_ALL;
  101             rtagsel.tag1 = MQ_TAGSEL_ALL;
  102             rtagsel.tag2 = MQ_TAGSEL_ALL;
  103 
  104         #else
  105             uint64_t rtag;
  106             uint64_t rtagsel = MQ_TAGSEL_ALL;
  107         #endif
  108 
  109         MAKE_PSM_SELECTOR(rtag, 0, 0, (src+1));
  110 
  111         PSM_MQ_REQ_T request;
  112 
  113         tmp_rc = PSM_IRECV(mq, rtag, rtagsel, MQ_FLAGS_NONE, NULL, 0, NULL, &request);
  114         if (tmp_rc != PSM_OK) {
  115             rc = tmp_rc;
  116         }
  117 
  118         /* post blocking send to destination, set tag to be our rank plus one */
  119         #if PSM_VERNO >= PSM_2_1_VERSION
  120             psm2_mq_tag_t stag;
  121         #else
  122             uint64_t stag;
  123         #endif
  124 
  125         MAKE_PSM_SELECTOR(stag, 0, 0, (rank+1));
  126 
  127         tmp_rc = PSM_SEND(mq, addrs[dst], MQ_FLAGS_NONE, stag, NULL, 0);
  128         if (tmp_rc != PSM_OK) {
  129             rc = tmp_rc;
  130         }
  131 
  132         /* wait on non-blocking receive to complete */
  133         tmp_rc = PSM_WAIT(&request, NULL);
  134 
  135         if (tmp_rc != PSM_OK) {
  136             rc = tmp_rc;
  137         }
  138 
  139         /* increase our distance by a factor of two */
  140         dist <<= 1;
  141     }
  142 
  143     return rc;
  144 }
  145 
  146 #define FUNCNAME split_type
  147 #undef FCNAME
  148 #define FCNAME MPL_QUOTE(FUNCNAME)
  149 static int split_type(MPID_Comm * comm_ptr, int stype, int key,
  150         MPID_Info *info_ptr, MPID_Comm ** newcomm_ptr)
  151 {
  152     MPID_Node_id_t id;
  153     MPIDI_Rank_t nid;
  154     int mpi_errno = MPI_SUCCESS;
  155 
  156     mpi_errno = MPID_Get_node_id(comm_ptr, comm_ptr->rank, &id);
  157     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
  158 
  159     nid = (stype == MPI_COMM_TYPE_SHARED) ? id : MPI_UNDEFINED;
  160     mpi_errno = MPIR_Comm_split_impl(comm_ptr, nid, key, newcomm_ptr);
  161     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
  162 
  163 fn_exit:
  164     return mpi_errno;
  165 
  166     /* --BEGIN ERROR HANDLING-- */
  167 fn_fail:
  168     goto fn_exit;
  169     /* --END ERROR HANDLING-- */
  170 }
  171 
  172 static MPID_CommOps comm_fns = {
  173     split_type
  174 };
  175 
  176 void mv2_print_env_info(struct coll_info *colls_arch_hca)
  177 {
  178     int i = 0;
  179 
  180     mv2_arch_type arch_type = MV2_GET_ARCH(g_mv2_arch_hca_type);
  181     mv2_hca_type hca_type = MV2_GET_HCA(g_mv2_arch_hca_type);
  182     mv2_cpu_family_type family_type = mv2_get_cpu_family();
  183 
  184     fprintf(stderr, "\n MVAPICH2-%s Parameters\n", MPIR_Version_string);
  185     fprintf(stderr,
  186             "---------------------------------------------------------------------\n");
  187     fprintf(stderr, "\tPROCESSOR ARCH NAME            : %s\n",
  188             mv2_get_arch_name(arch_type));
  189     fprintf(stderr, "\tPROCESSOR FAMILY NAME          : %s\n",
  190             mv2_get_cpu_family_name(family_type));
  191     fprintf(stderr, "\tPROCESSOR MODEL NUMBER         : %d\n",
  192             mv2_get_cpu_model());
  193     fprintf(stderr, "\tHCA NAME                       : %s\n",
  194             mv2_get_hca_name(hca_type));
  195     fprintf(stderr, "\tHeterogeneity                  : %s\n",
  196             (!mv2_homogeneous_cluster) ? "YES" : "NO");
  197     fprintf(stderr, "\tSMP Eagersize                  : %d\n",
  198             mv2_shm_rndv_thresh);
  199     fprintf(stderr, "\tHFI Eagersize                  : %d\n",
  200             mv2_hfi_rndv_thresh);
  201     fprintf(stderr, "\t%s                  : %s %s\n", "Tuning Table:", mv2_get_arch_name(colls_arch_hca[0].arch_type), mv2_get_hca_name(colls_arch_hca[0].hca_type));
  202     fprintf(stderr,
  203             "---------------------------------------------------------------------\n");
  204 
  205     if (atoi(getenv("MV2_SHOW_ENV_INFO")) >= 3) {
  206         fprintf(stderr, "\nCollective Tuning Tables\n");
  207         fprintf(stderr, "\t%-20s %-40s %-40s\n", "Collective", "Architecture", "Interconnect");
  208         for(i = 0; i < colls_max; i++) {
  209             fprintf(stderr, "\t%-20s %-40s %-40s\n", collective_names[i], mv2_get_arch_name(colls_arch_hca[i].arch_type), mv2_get_hca_name(colls_arch_hca[i].hca_type));
  210         }
  211         fprintf(stderr, "\n---------------------------------------------------------------------\n");
  212     }
  213 }
  214 
  215 #undef FUNCNAME
  216 #define FUNCNAME MV2_get_arch_hca_type
  217 #undef FCNAME
  218 #define FCNAME MPL_QUOTE(FUNCNAME)
  219 mv2_arch_hca_type MV2_get_arch_hca_type(void)
  220 {
  221     if(g_mv2_arch_hca_type)
  222         return g_mv2_arch_hca_type;
  223 
  224 #if defined(HAVE_LIBIBVERBS)
  225     int num_devices = 0, i;
  226     struct ibv_device **dev_list = NULL;
  227     mv2_hca_type hca_type = 0;
  228     mv2_arch_type arch_type = 0;
  229     dev_list = ibv_get_device_list(&num_devices);
  230 
  231     for(i=0; i<num_devices; i++){
  232         hca_type = mv2_get_hca_type(dev_list[i]);
  233         if(MV2_IS_INTEL_CARD(hca_type) || MV2_IS_QLE_CARD(hca_type))
  234             break;
  235     }
  236 
  237     if(i == num_devices)
  238         hca_type = MV2_HCA_ANY;
  239 
  240     arch_type = mv2_get_arch_type();
  241     g_mv2_arch_hca_type = arch_type;
  242     g_mv2_arch_hca_type <<= 16;
  243     g_mv2_arch_hca_type |= hca_type;
  244     g_mv2_arch_hca_type <<= 16;
  245     g_mv2_arch_hca_type |= g_mv2_num_cpus;
  246 
  247     if (dev_list) {
  248         ibv_free_device_list(dev_list);
  249     }
  250 #else
  251     g_mv2_arch_hca_type = mv2_get_arch_hca_type(NULL);
  252 #endif
  253     return g_mv2_arch_hca_type;
  254 }
  255 
  256 /* print error string to stderr, flush stderr, and return error */
  257 static PSM_ERROR_T mv2_psm_err_handler(PSM_EP_T ep, const PSM_ERROR_T error,
  258         const char* error_string, PSM_ERROR_TOKEN_T token)
  259 {
  260     /* print error and flush stderr */
  261     PRINT_ERROR("PSM error handler: %s : %s\n",
  262                 PSM_ERROR_GET_STRING(error), error_string);
  263     return error;
  264 }
  265 
  266 #undef FUNCNAME
  267 #define FUNCNAME psm_doinit
  268 #undef FCNAME
  269 #define FCNAME MPL_QUOTE(FUNCNAME)
  270 int psm_doinit(int has_parent, MPIDI_PG_t *pg, int pg_rank)
  271 {
  272     char *flag = NULL;
  273     int verno_major, verno_minor;
  274     int pg_size, mpi_errno;
  275     int heterogeneity, i; 
  276     PSM_ERROR_T err;
  277     struct PSM_EP_OPEN_OPTS psm_opts;
  278 
  279     /* Override split_type */
  280     MPID_Comm_fns = &comm_fns;
  281 
  282     pg_size = MPIDI_PG_Get_size(pg);
  283     MPIU_Assert(pg_rank < pg_size);
  284     MPIDI_PG_GetConnKVSname(&kvsid);
  285     psmdev_cw.pg_rank = pg_rank;
  286     psmdev_cw.pg_size = pg_size;
  287     verno_major = PSM_VERNO_MAJOR;
  288     verno_minor = PSM_VERNO_MINOR;
  289 
  290     mv2_allocate_pmi_keyval();
  291     psm_read_user_params();
  292     if(pg_size > mv2_on_demand_threshold) {
  293         mv2_use_on_demand_cm = 1;
  294     }
  295 
  296     mpi_errno = MPIDI_CH3U_Comm_register_create_hook(MPIDI_CH3I_comm_create, NULL);
  297     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
  298 
  299     /* detect architecture and hca type */
  300     g_mv2_arch_hca_type = MV2_get_arch_hca_type();
  301 
  302     /* Detect heterogeneity if not overriden by user */
  303     psm_detect_heterogeneity(g_mv2_arch_hca_type, pg_size, pg_rank);
  304 
  305     /* initialize tuning-table for collectives. 
  306      * Its ok to pass heterogeneity as 0. We anyway fall-back to the 
  307      * basic case for PSM */ 
  308     heterogeneity = !mv2_homogeneous_cluster;
  309     struct coll_info colls_arch_hca[colls_max];
  310     mpi_errno = MV2_collectives_arch_init(heterogeneity, colls_arch_hca); 
  311     if (mpi_errno != MPI_SUCCESS) {
  312         MPIR_ERR_POP(mpi_errno);
  313     }
  314 
  315     /* initialize shared memory for collectives */
  316     if (mv2_enable_shmem_collectives) {
  317         if ((mpi_errno = MPIDI_CH3I_SHMEM_COLL_init(pg, pg->ch.local_process_id)) != MPI_SUCCESS)
  318         {
  319             mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPI_ERR_OTHER,
  320                    FCNAME, __LINE__, MPI_ERR_OTHER, "**fail",
  321                    "%s", "SHMEM_COLL_init failed");
  322             goto cleanup_files;
  323         }
  324 
  325         UPMI_BARRIER();
  326 
  327         /* Memory Mapping shared files for collectives*/
  328         if ((mpi_errno = MPIDI_CH3I_SHMEM_COLL_Mmap(pg, pg->ch.local_process_id)) != MPI_SUCCESS)
  329         {
  330            mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPI_ERR_OTHER,
  331                  FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", "%s",
  332                  "SHMEM_COLL_Mmap failed");
  333            goto cleanup_files;
  334         }
  335 
  336         MPIDI_CH3I_SHMEM_COLL_Unlink();
  337     }  
  338 
  339     if (mv2_psm_bcast_uuid) {
  340         mpi_errno = psm_bcast_uuid(pg_size, pg_rank);
  341     } else {
  342         mpi_errno = psm_create_uuid();
  343     }
  344     if(mpi_errno != MPI_SUCCESS) {
  345         goto fn_fail;
  346     }
  347 
  348     psm_preinit(pg);
  349 
  350     /* override global error handler so we can print error messages */
  351     PSM_ERROR_REGISTER_HANDLER(NULL, mv2_psm_err_handler);
  352 
  353     err = PSM_INIT(&verno_major, &verno_minor);
  354     if(err != PSM_OK) {
  355         #if PSM_VERNO >= PSM_2_1_VERSION
  356             fprintf(stderr, "psm2_init failed with error: %s\n", PSM_ERROR_GET_STRING(err));
  357         #else
  358             fprintf(stderr, "psm_init failed with error: %s\n", PSM_ERROR_GET_STRING(err));
  359         #endif
  360 
  361         MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**psminit");
  362     }
  363 
  364     /* By default, PSM sets cpu affinity on a process if it's not
  365      * already set.  We disable cpu affinity in PSM here.  MVAPICH
  366      * or the process launcher will set affinity, unless the user
  367      * disabled it, but in that case, he probably doesn't want
  368      * PSM to set it either.
  369      */
  370     PSM_EP_OPEN_OPTS_GET_DEFAULTS(&psm_opts);
  371     psm_opts.affinity = PSM_EP_OPEN_AFFINITY_SKIP;
  372 
  373     /* number of times to retry psm_ep_open upon failure */
  374     if ((flag = getenv("MV2_PSM_EP_OPEN_RETRY_COUNT")) != NULL) {
  375         int value = atoi(flag);
  376 
  377         if (value >= 0) {
  378             mv2_psm_ep_open_retry_count = value;
  379         } else {
  380             PRINT_ERROR("MV2_WARNING: Attempted to set "
  381                     "MV2_PSM_EP_OPEN_RETRY_COUNT to invalid value [%s]\n",
  382                     flag);
  383             PRINT_ERROR("MV2_WARNING: Using default value of `%d' instead\n",
  384                     mv2_psm_ep_open_retry_count);
  385         }
  386     }
  387 
  388     /* sleep time in seconds between open retries */
  389     if ((flag = getenv("MV2_PSM_EP_OPEN_RETRY_SECS")) != NULL) {
  390         int value = atoi(flag);
  391 
  392         if (value > 0) {
  393             mv2_psm_ep_open_retry_secs = value;
  394         } else {
  395             PRINT_ERROR("MV2_WARNING: Attempted to set "
  396                     "MV2_PSM_EP_OPEN_RETRY_SECS to invalid value [%s]\n",
  397                     flag);
  398             PRINT_ERROR("MV2_WARNING: Using default value of `%d' instead\n",
  399                     mv2_psm_ep_open_retry_secs);
  400         }
  401     }
  402 
  403     int attempts = 0;
  404     do {
  405         if (err != PSM_OK) {
  406             PRINT_ERROR("MV2_WARNING: Failed to open an end-point: %s,"
  407                         " retry attempt %d of %d in %d seconds\n",
  408                         PSM_ERROR_GET_STRING(err), attempts,
  409                         mv2_psm_ep_open_retry_count, mv2_psm_ep_open_retry_secs);
  410             sleep(mv2_psm_ep_open_retry_secs);
  411         }
  412         err = PSM_EP_OPEN(psm_uuid, &psm_opts, &psmdev_cw.ep, &psmdev_cw.epid);
  413         attempts++;
  414     } while ((err != PSM_OK) && (attempts <= mv2_psm_ep_open_retry_count));
  415     if (err != PSM_OK) {
  416         fprintf(stderr, "psm_ep_open failed with error %s\n",
  417                 PSM_ERROR_GET_STRING(err));
  418         MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**psmepopen");
  419     }
  420 
  421     mpi_errno = psm_start_epid_exchange(psmdev_cw.epid, pg_size, pg_rank);
  422     if(mpi_errno != MPI_SUCCESS) {
  423         goto fn_fail;
  424     }
  425 
  426     psmdev_cw.epaddrs = (PSM_EPADDR_T *) MPIU_Malloc(pg_size * sizeof(PSM_EPADDR_T));
  427     if(psmdev_cw.epaddrs == NULL) {
  428         MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_NO_MEM, "**psmnomem");
  429     }
  430     MPIU_Memset(psmdev_cw.epaddrs, 0, pg_size * sizeof(PSM_EPADDR_T));
  431 
  432     if((err = PSM_MQ_INIT(psmdev_cw.ep, PSM_MQ_ORDERMASK_ALL, NULL, 0,
  433                 &psmdev_cw.mq)) != PSM_OK) {
  434         PRINT_ERROR("psm_mq_init failed\n");
  435         MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_INTERN, "**psm_mqinitfailed");
  436     }
  437 
  438     if (!mv2_use_on_demand_cm) {
  439         psm_connect_alltoall(psmdev_cw.epaddrs, pg_size, pg_rank);
  440 
  441         /* execute barrier to ensure all tasks have returned from psm_ep_connect */
  442         if((err = psm_mq_init_barrier(psmdev_cw.mq, pg_rank, pg_size, psmdev_cw.epaddrs)) != PSM_OK) {
  443             PRINT_ERROR("psm_mq_init_barrier failed\n");
  444             MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_INTERN, "**fail");
  445         }
  446     }
  447 
  448     /* initialize VC state, eager size value, queues etc */
  449     psm_other_init(pg);
  450 
  451     if(0==pg_rank && g_mv2_show_env_info){
  452         mv2_print_env_info(colls_arch_hca);
  453     }
  454 
  455     mpi_errno = MPIDI_CH3U_Comm_register_destroy_hook(MPIDI_CH3I_comm_destroy, NULL);
  456     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
  457 
  458     /* Initialize progress hook slots */
  459     for (i = 0; i < MAX_PROGRESS_HOOKS; i++) {
  460         progress_hooks[i].func_ptr = NULL;
  461         progress_hooks[i].active = FALSE;
  462     }
  463 
  464     return MPI_SUCCESS;
  465 
  466 cleanup_files:
  467     MPIDI_CH3I_SHMEM_COLL_Cleanup();
  468 fn_fail:
  469     return MPI_ERR_INTERN;
  470 }
  471 
  472 /*  handle special psm init. PSM_DEVICES init, version test for setting
  473  *  MPI_LOCALRANKS, MPI_LOCALRANKID 
  474  *  Updated on Fed 2 2010 based on patch provided by Ben Truscott. Refer to 
  475  *  TRAC Ticket #457 i
  476  *  Updated on Jan 3 2018 to remove unnecessary barriers */
  477 static void psm_preinit(MPIDI_PG_t *pg)
  478 {
  479     int id, n;
  480     int pg_size, universesize;
  481 
  482     pg_size = MPIDI_PG_Get_size(pg);
  483     id = pg->ch.local_process_id;
  484     n = pg->ch.num_local_processes;
  485 
  486     if(pg_size > 0)
  487         universesize = pg_size;
  488     else
  489         universesize = 1; /*May be started without mpiexec.*/
  490 
  491     PRINT_DEBUG(DEBUG_CM_verbose, "localid %d localranks %d\n", id, n);
  492 
  493     /* We should not override user settings for these parameters. 
  494      * This might cause problems with the new greedy context acquisition 
  495      * when multiple jobs share the same node. Refer to TRAC Ticket #457
  496      * putenv("PSM_SHAREDCONTEXTS=1");
  497      * putenv("PSM_SHAREDCONTEXTS_MAX=16");*/
  498 
  499     /* for psm versions 2.0 or later, hints are needed for context sharing */
  500     if(PSM_VERNO_MAJOR >= PSM_2_VERSION_MAJOR) {
  501         snprintf(scratch, sizeof(scratch), "%d", n);
  502         setenv("MPI_LOCALNRANKS", scratch, 1);
  503         snprintf(scratch, sizeof(scratch), "%d", id);
  504         setenv("MPI_LOCALRANKID", scratch, 1);
  505 
  506         /* Should not override user settings. Updating to handle all 
  507          * possible scenarios. Refer to TRAC Ticket #457 */
  508         #if PSM_VERNO >= PSM_2_1_VERSION
  509             /* PSM2 renamed PSM_* env vars to equivalent PSM2_* vars */
  510             if ( getenv("PSM2_DEVICES") == NULL ) {
  511         #else
  512             if ( getenv("PSM_DEVICES") == NULL ) {
  513         #endif
  514             if (universesize > n && n > 1) {
  515                 /* There are both local and remote ranks present;
  516                  * we require both the shm and ipath devices in
  517                  * this case. */
  518                 #if PSM_VERNO >= PSM_2_1_VERSION
  519                     putenv("PSM2_DEVICES=self,shm,hfi");
  520                 #else
  521                     putenv("PSM_DEVICES=self,shm,ipath");
  522                 #endif
  523             }
  524             else if (universesize > n && n == 1) {
  525                 /* There are only remote ranks; we do not require
  526                  * the shm device. */
  527                 #if PSM_VERNO >= PSM_2_1_VERSION
  528                     putenv("PSM2_DEVICES=self,hfi");
  529                 #else
  530                     putenv("PSM_DEVICES=self,ipath");
  531                 #endif
  532             }
  533             else if (universesize == n && n > 1) {
  534                 /* There are only local ranks; we do not require the
  535                  * ipath device. */
  536                 #if PSM_VERNO >= PSM_2_1_VERSION
  537                     putenv("PSM2_DEVICES=self,shm");
  538                 #else
  539                     putenv("PSM_DEVICES=self,shm");
  540                 #endif
  541             }
  542             else if (universesize == 1 && n == 1) {
  543                 /* This is the only rank; we do not need either the
  544                    shm or the ipath device. */
  545                 #if PSM_VERNO >= PSM_2_1_VERSION
  546                     putenv("PSM2_DEVICES=self");
  547                 #else
  548                     putenv("PSM_DEVICES=self");
  549                 #endif
  550             }
  551             else {
  552                 /* Impossible situation? Leave PSM_DEVICES as it
  553                  * previously was. */
  554             }
  555         }
  556     }
  557 
  558 }
  559 
  560 /* detect if arch and hca type is same for all processes */
  561 static int psm_detect_heterogeneity(mv2_arch_hca_type myarch, int pg_size, int pg_rank)
  562 {
  563     int i, mpi_errno = MPI_SUCCESS;
  564     mv2_arch_hca_type arch = 0;
  565     mv2_homogeneous_cluster = 1;
  566     char *flag;
  567 
  568     if ((flag = getenv("MV2_HOMOGENEOUS_CLUSTER")) != NULL) {
  569         mv2_homogeneous_cluster = !!atoi(flag);
  570         goto fn_exit;
  571     }
  572 
  573     if (pg_size == 1) {
  574         mv2_homogeneous_cluster = 1;
  575         goto fn_exit;
  576     }
  577 
  578     PRINT_DEBUG(DEBUG_CM_verbose>1, "my arch_hca_type = %016lx\n", myarch);
  579     MPL_snprintf(mv2_pmi_key, mv2_pmi_max_keylen, "pmi_ahkey_%d", pg_rank);
  580     MPL_snprintf(mv2_pmi_val, mv2_pmi_max_vallen, "%016lx", myarch);
  581 
  582     if(UPMI_KVS_PUT(kvsid, mv2_pmi_key, mv2_pmi_val) != UPMI_SUCCESS) {
  583         MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ahtype_putfailed");
  584     }
  585     if(UPMI_KVS_COMMIT(kvsid) != UPMI_SUCCESS) {
  586         MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ahtype_putcommit");
  587     }
  588     if(UPMI_BARRIER() != UPMI_SUCCESS) {
  589         MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ahtype_putcommit");
  590     }
  591 
  592     for (i = 0; i < pg_size; i++) {
  593         if (i != pg_rank) {
  594             MPL_snprintf(mv2_pmi_key, mv2_pmi_max_keylen, "pmi_ahkey_%d", i);
  595             mpi_errno = UPMI_KVS_GET(kvsid, mv2_pmi_key, mv2_pmi_val, mv2_pmi_max_vallen);
  596             if(mpi_errno != UPMI_SUCCESS) {
  597                 MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**epid_getfailed");
  598             }
  599 
  600             sscanf(mv2_pmi_val, "%016lx", &arch);
  601             PRINT_DEBUG(DEBUG_CM_verbose>1,
  602                     "peer: %d, val: %s, arch: %016lx\n", i, mv2_pmi_val, arch);
  603             if (arch != myarch) {
  604                 mv2_homogeneous_cluster = 0;
  605                 break;
  606             }
  607         }
  608     }
  609 
  610 fn_exit:
  611     PRINT_DEBUG(DEBUG_CM_verbose>1, "mv2_homogeneous_cluster = %d\n", mv2_homogeneous_cluster);
  612     return mpi_errno;
  613 
  614 fn_fail:
  615     PRINT_ERROR("ahtype put/commit/get failed\n");
  616     goto fn_exit;
  617 }
  618 
  619 /* all ranks provide their epid via PMI put/get */
  620 static int psm_start_epid_exchange(PSM_EPID_T myid, int pg_size, int pg_rank)
  621 {
  622     int mpi_errno = MPI_SUCCESS;
  623 
  624     if (pg_size == 1) {
  625         goto fn_exit;
  626     }
  627 
  628     PRINT_DEBUG(DEBUG_CM_verbose>1, "[%d] my epid = %lu\n", pg_rank, myid);
  629     MPL_snprintf(mv2_pmi_key, mv2_pmi_max_keylen, "pmi_epidkey_%d", pg_rank);
  630     MPL_snprintf(mv2_pmi_val, mv2_pmi_max_vallen, "%lu", myid);
  631     if(UPMI_KVS_PUT(kvsid, mv2_pmi_key, mv2_pmi_val) != UPMI_SUCCESS) {
  632         MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**epid_putfailed");
  633     }
  634     if(UPMI_KVS_COMMIT(kvsid) != UPMI_SUCCESS) {
  635         MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**epid_putcommit");
  636     }
  637     if (mv2_use_pmi_ibarrier) {
  638         mpi_errno = UPMI_IBARRIER();
  639     } else {
  640         mpi_errno = UPMI_BARRIER();
  641     }
  642     if(mpi_errno != UPMI_SUCCESS) {
  643         MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**epid_putcommit");
  644     }
  645 
  646 fn_exit:
  647     return mpi_errno;
  648 
  649 fn_fail:
  650     PRINT_ERROR("epid put/commit/get failed\n");
  651     goto fn_exit;
  652 }
  653 
  654 #undef FUNCNAME
  655 #define FUNCNAME psm_connect_peer
  656 #undef FCNAME
  657 #define FCNAME MPL_QUOTE(FUNCNAME)
  658 int psm_connect_peer(int peer)
  659 {
  660     int err, mpi_errno = MPI_SUCCESS;
  661     PSM_EPID_T epidlist[1];
  662     PSM_ERROR_T errs[1];
  663 
  664     /* Should it fail if connection is already established? */
  665     assert(!PSM_ADDR_RESOLVED(peer));
  666     if (mv2_use_pmi_ibarrier) {
  667         UPMI_WAIT();
  668     }
  669 
  670     PRINT_DEBUG(DEBUG_CM_verbose>0, "Connecting to peer %d\n", peer);
  671 
  672     if (unlikely(psmdev_cw.pg_rank == peer)) {
  673         epidlist[0] = psmdev_cw.epid;
  674     } else {
  675         MPL_snprintf(mv2_pmi_key, mv2_pmi_max_keylen, "pmi_epidkey_%d", peer);
  676         if(UPMI_KVS_GET(kvsid, mv2_pmi_key, mv2_pmi_val, mv2_pmi_max_vallen) != UPMI_SUCCESS) {
  677             MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**epid_getfailed");
  678         }
  679         PRINT_DEBUG(DEBUG_CM_verbose>1, "peer: %d, got epid: %s\n", peer, mv2_pmi_val);
  680         sscanf(mv2_pmi_val, "%lu", &epidlist[0]);
  681     }
  682 
  683     if((err = PSM_EP_CONNECT(psmdev_cw.ep, 1, epidlist, NULL, errs,
  684                 &psmdev_cw.epaddrs[peer], TIMEOUT * SEC_IN_NS)) != PSM_OK) {
  685         fprintf(stderr, "psm_ep_connect failed with error %s\n", PSM_ERROR_GET_STRING(err));
  686         MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_INTERN, "**psmconnectfailed");
  687     }
  688 
  689     PRINT_DEBUG(DEBUG_CM_verbose>0, "Connection established with peer %d\n", peer);
  690 
  691     return MPI_SUCCESS;
  692 
  693 fn_fail:
  694     PRINT_ERROR("psm_connect_peer failed\n");
  695     return MPI_ERR_INTERN;
  696 }
  697 
  698 #undef FUNCNAME
  699 #define FUNCNAME psm_connect_alltoall
  700 #undef FCNAME
  701 #define FCNAME MPL_QUOTE(FUNCNAME)
  702 static int psm_connect_alltoall(PSM_EPADDR_T *addrs, int pg_size, int pg_rank)
  703 {
  704     int i;
  705     int err, mpi_errno = MPI_SUCCESS;
  706     PSM_EPID_T *epidlist = NULL;
  707     PSM_ERROR_T *errlist = NULL;
  708 
  709     if (pg_size == 1) {
  710         goto fn_exit;
  711     }
  712 
  713     PRINT_DEBUG(DEBUG_CM_verbose>0, "Establishing alltoall connectivity\n");
  714     epidlist = (PSM_EPID_T*) MPIU_Malloc (pg_size * sizeof(PSM_EPID_T));
  715     errlist = (PSM_ERROR_T*) MPIU_Malloc (pg_size * sizeof(PSM_ERROR_T));
  716 
  717     if (epidlist == NULL || errlist == NULL) {
  718         MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_NO_MEM, "**psmnomem");
  719     }
  720 
  721     if (mv2_use_pmi_ibarrier) {
  722         UPMI_WAIT();
  723     }
  724 
  725     PRINT_DEBUG(DEBUG_CM_verbose>0, "Looking up epids\n");
  726     for (i=0; i<pg_size; i++) {
  727         MPL_snprintf(mv2_pmi_key, mv2_pmi_max_keylen, "pmi_epidkey_%d", i);
  728         if(UPMI_KVS_GET(kvsid, mv2_pmi_key, mv2_pmi_val, mv2_pmi_max_vallen) != UPMI_SUCCESS) {
  729             MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**epid_getfailed");
  730         }
  731         PRINT_DEBUG(DEBUG_CM_verbose>1, "peer: %d, got epid: %s\n", i, mv2_pmi_val);
  732         sscanf(mv2_pmi_val, "%lu", &epidlist[i]);
  733     }
  734 
  735     PRINT_DEBUG(DEBUG_CM_verbose>0, "Connecting to peers\n");
  736     if((err = PSM_EP_CONNECT(psmdev_cw.ep, pg_size, epidlist, NULL, errlist,
  737                     addrs, TIMEOUT * SEC_IN_NS)) != PSM_OK) {
  738         fprintf(stderr, "psm_ep_connect failed with error %s\n", PSM_ERROR_GET_STRING(err));
  739         MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_INTERN, "**psmconnectfailed");
  740     }
  741 
  742     PRINT_DEBUG(DEBUG_CM_verbose>0, "Successfully established alltoall connectivity\n");
  743 fn_exit:
  744     MPIU_Free(epidlist);
  745     MPIU_Free(errlist);
  746     return MPI_SUCCESS;
  747 
  748 fn_fail:
  749     PRINT_ERROR("psm_connect_alltoall failed\n");
  750     return MPI_ERR_INTERN;
  751 }
  752 
  753 static int psm_create_uuid(void)
  754 {
  755     int mpi_errno = MPI_SUCCESS;
  756     int i, len = sizeof(PSM_UUID_T);
  757     char *uuid_str = NULL;
  758     unsigned int kvs_hash = 0;
  759     
  760     uuid_str = MPIU_Malloc(sizeof(char) * (len+1));
  761     MPIU_Memset(uuid_str, 0, len+1);
  762 
  763     kvs_hash = psm_hash_str(kvsid);
  764     srand(kvs_hash);
  765 
  766     for (i=0; i<len; i++) {
  767         uuid_str[i] = psm_uuid[i] = rand() % UCHAR_MAX;
  768     }
  769     uuid_str[i] = '\0';
  770 
  771     PRINT_DEBUG(DEBUG_CM_verbose, "kvsid: %s, kvs_hash: %u\n", kvsid, kvs_hash);
  772 
  773     MPIU_Free(uuid_str);
  774     return mpi_errno;
  775 }
  776 
  777 /* broadcast the uuid to all ranks via PMI put/get */
  778 static int psm_bcast_uuid(int pg_size, int pg_rank)
  779 {
  780     int mpi_errno = MPI_SUCCESS;
  781     int srclen = sizeof(PSM_UUID_T);
  782     int dstlen = mv2_pmi_max_vallen;
  783 
  784     if(pg_rank == ROOT) {
  785         PSM_UUID_GENERATE(psm_uuid);
  786     }
  787 
  788     if(pg_size == 1)
  789         return MPI_SUCCESS;
  790 
  791     MPIU_Memset(mv2_pmi_key, 0, mv2_pmi_max_keylen);
  792     MPIU_Memset(mv2_pmi_val, 0, mv2_pmi_max_vallen);
  793 
  794     if(pg_rank == ROOT) {
  795         snprintf(mv2_pmi_key, mv2_pmi_max_keylen, "%s", MPID_PSM_UUID);
  796         encode(srclen, (char *)&psm_uuid, dstlen, mv2_pmi_val);
  797         PRINT_DEBUG(DEBUG_CM_verbose>1, "uuid key: %s, value: %s\n", mv2_pmi_key, mv2_pmi_val);
  798     } else {
  799         snprintf(mv2_pmi_key, mv2_pmi_max_keylen, "dummy-key");
  800         strcpy(mv2_pmi_val, "dummy-value");
  801     }
  802 
  803     if(UPMI_KVS_PUT(kvsid, mv2_pmi_key, mv2_pmi_val) != UPMI_SUCCESS) {
  804         MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**pmiputuuid");
  805     }
  806     if(UPMI_KVS_COMMIT(kvsid) != UPMI_SUCCESS) {
  807         MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**pmicommituuid");
  808     }
  809     UPMI_BARRIER();
  810 
  811     if(pg_rank != ROOT) {
  812         snprintf(mv2_pmi_key, mv2_pmi_max_keylen, "%s", MPID_PSM_UUID);
  813         if(UPMI_KVS_GET(kvsid, mv2_pmi_key, mv2_pmi_val, mv2_pmi_max_vallen) != UPMI_SUCCESS) {
  814             MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**pmigetuuid");
  815         }
  816 
  817         PRINT_DEBUG(DEBUG_CM_verbose>1, "uuid key: %s, value: %s\n", mv2_pmi_key, mv2_pmi_val);
  818         strcat(mv2_pmi_val, "==");
  819         srclen = strlen(mv2_pmi_val);
  820         if(decode(srclen, mv2_pmi_val, sizeof(PSM_UUID_T), (char *)&psm_uuid)) {
  821             fprintf(stderr, "base-64 decode failed of UUID\n");
  822             goto fn_fail;
  823         }
  824     }
  825 
  826     return MPI_SUCCESS;
  827 
  828 fn_fail:
  829     PRINT_ERROR("uuid bcast failed\n");
  830     return MPI_ERR_INTERN;
  831 }
  832 
  833 static void psm_read_user_params(void)
  834 {
  835     char *flag;
  836     ipath_debug_enable = 0;
  837     if((flag = getenv("MV2_PSM_DEBUG")) != NULL) {
  838         ipath_debug_enable = !!atoi(flag);
  839     }
  840     ipath_dump_frequency = 10;
  841     if((flag = getenv("MV2_PSM_DUMP_FREQUENCY")) != NULL) {
  842         ipath_dump_frequency = atoi(flag);
  843     }
  844     ipath_enable_func_lock = 1;
  845     if((flag = getenv("MV2_PSM_ENABLE_FUNC_LOCK")) != NULL) {
  846         ipath_enable_func_lock = atoi(flag);
  847     }
  848     ipath_progress_yield_count = 3;
  849     if((flag = getenv("MV2_PSM_YIELD_COUNT")) != NULL) {
  850         ipath_progress_yield_count = atoi(flag);
  851     }
  852     if ((flag = getenv("MV2_PSM_BCAST_UUID")) != NULL) {
  853         mv2_psm_bcast_uuid = !!atoi(flag);
  854     }
  855     if ((flag = getenv("MV2_SHOW_ENV_INFO")) != NULL) {
  856         g_mv2_show_env_info = atoi(flag);
  857     }
  858     if ((flag = getenv("MV2_ON_DEMAND_THRESHOLD")) != NULL) {
  859         mv2_on_demand_threshold = atoi(flag);
  860     }
  861 #if (defined(HAVE_PMI2_KVS_IFENCE) && defined(HAVE_PMI2_KVS_WAIT)) \
  862     || (defined(HAVE_PMI_IBARRIER) && defined(HAVE_PMI_WAIT))
  863     mv2_use_pmi_ibarrier = 1; /* enable by default if available */
  864 
  865     if ((flag = getenv("MV2_USE_PMI_IBARRIER")) != NULL) {
  866         mv2_use_pmi_ibarrier = !!atoi(flag);
  867     }
  868 #endif
  869 }
  870 
  871 /* Ch3 expects channel to initialize VC fields.
  872    force_eager is used because psm internally manages eager/rndv so
  873    we can just force one code-path for all message sizes */
  874 
  875 static void psm_other_init(MPIDI_PG_t *pg)
  876 {
  877     MPIDI_VC_t *vc;
  878     int i;
  879     char *flag;
  880     uint32_t value = 0;
  881 
  882     for(i = 0; i < MPIDI_PG_Get_size(pg); i++) {
  883         MPIDI_PG_Get_vc(pg, i, &vc);
  884 
  885         vc->state = MPIDI_VC_STATE_ACTIVE;
  886         vc->force_eager = 1;
  887         vc->eager_max_msg_sz = PSM_VBUFSZ;
  888         vc->rndvSend_fn = NULL;
  889         vc->rndvRecv_fn = NULL;
  890     }
  891 
  892     if ((flag = getenv("MV2_IBA_EAGER_THRESHOLD")) != NULL) {
  893         mv2_hfi_rndv_thresh = user_val_to_bytes(flag, "MV2_IBA_EAGER_THRESHOLD");
  894     } else {
  895         /* Check if default PSM2 threshold is higher and if so, use it */
  896         PSM_MQ_GETOPT(psmdev_cw.mq, PSM_MQ_RNDV_IPATH_SZ, &value);
  897         if (value > mv2_hfi_rndv_thresh) {
  898             mv2_hfi_rndv_thresh = value;
  899         }
  900     }
  901     /* Set the value of HFI rendezvous threshold */
  902     PSM_MQ_SETOPT(psmdev_cw.mq, PSM_MQ_RNDV_IPATH_SZ, &mv2_hfi_rndv_thresh);
  903     /* Validate that the desired values were set */
  904     PSM_MQ_GETOPT(psmdev_cw.mq, PSM_MQ_RNDV_IPATH_SZ, &value);
  905     if (value != mv2_hfi_rndv_thresh) {
  906         PRINT_ERROR("Specified HFI rendezvous threshold was not set correctly by PSM.\n");
  907         PRINT_ERROR("Requested: %d, Set: %d\n", mv2_hfi_rndv_thresh, value);
  908     }
  909 
  910     if ((flag = getenv("MV2_SMP_EAGERSIZE")) != NULL) {
  911         mv2_shm_rndv_thresh = user_val_to_bytes(flag, "MV2_SMP_EAGERSIZE");
  912     } else {
  913         /* Check if default PSM2 threshold is higher and if so, use it */
  914         PSM_MQ_GETOPT(psmdev_cw.mq, PSM_MQ_RNDV_SHM_SZ, &value);
  915         if (value > mv2_shm_rndv_thresh) {
  916             mv2_shm_rndv_thresh = value;
  917         }
  918     }
  919     /* Set the value of SHM rendezvous threshold */
  920     PSM_MQ_SETOPT(psmdev_cw.mq, PSM_MQ_RNDV_SHM_SZ, &mv2_shm_rndv_thresh);
  921     /* Validate that the desired values were set */
  922     PSM_MQ_GETOPT(psmdev_cw.mq, PSM_MQ_RNDV_SHM_SZ, &value);
  923     if (value != mv2_shm_rndv_thresh) {
  924         PRINT_ERROR("Specified SHM rendezvous threshold was not set correctly by PSM\n");
  925         PRINT_ERROR("Requested: %d, Set: %d\n", mv2_shm_rndv_thresh, value);
  926     }
  927 
  928     /* Select the smaller threshold */
  929     if (mv2_shm_rndv_thresh < mv2_hfi_rndv_thresh) {
  930         ipath_rndv_thresh = mv2_shm_rndv_thresh;
  931     } else {
  932         ipath_rndv_thresh = mv2_hfi_rndv_thresh;
  933     }
  934     PRINT_DEBUG(DEBUG_CM_verbose>0,
  935             "hfi threshold: %d, shm threshold: %d, blocking threshold %d\n",
  936             mv2_hfi_rndv_thresh, mv2_shm_rndv_thresh, ipath_rndv_thresh);
  937 
  938     psm_queue_init();
  939     psm_init_vbuf_lock();
  940     psm_allocate_vbufs(PSM_INITIAL_POOL_SZ);
  941     psm_init_1sided();
  942 }
  943 
  944 static char base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  945                        "abcdefghijklmnopqrstuvwxyz"
  946                        "0123456789"
  947                        "+/";
  948 /*
  949 ** ENCODE RAW into BASE64
  950 */
  951 
  952 /* Encode source from raw data into Base64 encoded string */
  953 static int encode(unsigned s_len, char *src, unsigned d_len, char *dst)
  954 {
  955     unsigned triad;
  956 
  957     for (triad = 0; triad < s_len; triad += 3) {
  958         unsigned long int sr;
  959         unsigned byte;
  960 
  961         for (byte = 0; (byte<3)&&(triad+byte<s_len); ++byte) {
  962             sr <<= 8;
  963             sr |= (*(src+triad+byte) & 0xff);
  964         }
  965 
  966         sr <<= (6-((8*byte)%6))%6; /*shift left to next 6bit alignment*/
  967 
  968         if (d_len < 4) 
  969             return 1; /* error - dest too short */
  970 
  971         *(dst+0) = *(dst+1) = *(dst+2) = *(dst+3) = '=';
  972         switch(byte) {
  973         case 3:
  974             *(dst+3) = base64[sr&0x3f];
  975             sr >>= 6;
  976         case 2:
  977             *(dst+2) = base64[sr&0x3f];
  978             sr >>= 6;
  979         case 1:
  980             *(dst+1) = base64[sr&0x3f];
  981             sr >>= 6;
  982             *(dst+0) = base64[sr&0x3f];
  983         }
  984         dst += 4; d_len -= 4;
  985     }
  986     return 0;
  987 }
  988 
  989 /*
  990 ** DECODE BASE64 into RAW
  991 */
  992 
  993 /* determine which sextet value a Base64 character represents */
  994 static int tlu(int byte)
  995 {
  996     int index;
  997 
  998     for (index = 0; index < 64; ++index)
  999         if (base64[index] == byte)
 1000             break;
 1001         if (index > 63) index = -1;
 1002             return index;
 1003 }
 1004 
 1005 /*
 1006 ** Decode source from Base64 encoded string into raw data
 1007 **
 1008 ** Returns: 0 - Success
 1009 ** 1 - Error - Source underflow - need more base64 data
 1010 ** 2 - Error - Chunk contains half a byte of data
 1011 ** 3 - Error - Decoded results will overflow output buffer
 1012 */
 1013 static int decode(unsigned s_len, char *src, unsigned d_len, char *dst)
 1014 {
 1015     unsigned six, dix;
 1016 
 1017     dix = 0;
 1018 
 1019     for (six = 0; six < s_len; six += 4) {
 1020         unsigned long sr;
 1021         unsigned ix;
 1022 
 1023         sr = 0;
 1024         for (ix = 0; ix < 4; ++ix) {
 1025             int sextet;
 1026 
 1027             if (six+ix >= s_len)
 1028                 return 1;
 1029             if ((sextet = tlu(*(src+six+ix))) < 0)
 1030                 break;
 1031             sr <<= 6;
 1032             sr |= (sextet & 0x3f);
 1033         }
 1034 
 1035         switch (ix) {
 1036         case 0: /* end of data, no padding */
 1037             return 0;
 1038 
 1039         case 1: /* can't happen */
 1040             return 2;
 1041 
 1042         case 2: /* 1 result byte */
 1043             sr >>= 4;
 1044             if (dix > d_len) 
 1045                 return 3;
 1046             *(dst+dix) = (sr & 0xff);
 1047             ++dix;
 1048             break;
 1049 
 1050         case 3: /* 2 result bytes */
 1051             sr >>= 2;
 1052             if (dix+1 > d_len) 
 1053                 return 3;
 1054             *(dst+dix+1) = (sr & 0xff);
 1055             sr >>= 8;
 1056             *(dst+dix) = (sr & 0xff);
 1057             dix += 2;
 1058             break;
 1059 
 1060         case 4: /* 3 result bytes */
 1061             if (dix+2 > d_len) 
 1062                 return 3;
 1063             *(dst+dix+2) = (sr & 0xff);
 1064             sr >>= 8;
 1065             *(dst+dix+1) = (sr & 0xff);
 1066             sr >>= 8;
 1067             *(dst+dix) = (sr & 0xff);
 1068             dix += 3;
 1069             break;
 1070         }
 1071     }
 1072     return 0;
 1073 }
 1074 
 1075 /* djb2 hash function */
 1076 static unsigned int psm_hash_str(char *str)
 1077 {
 1078     unsigned int hash = 5381;
 1079     int c;
 1080 
 1081     while ((c = *str++))
 1082         hash = ((hash << 5) + hash) + c;
 1083 
 1084     return hash;
 1085 }
 1086 
 1087 int mv2_allocate_pmi_keyval(void)
 1088 {
 1089     if (!mv2_pmi_max_keylen) {
 1090         UPMI_KVS_GET_KEY_LENGTH_MAX(&mv2_pmi_max_keylen);
 1091     }
 1092     if (!mv2_pmi_max_vallen) {
 1093         UPMI_KVS_GET_VALUE_LENGTH_MAX(&mv2_pmi_max_vallen);
 1094     }
 1095 
 1096     mv2_pmi_key = MPIU_Malloc(mv2_pmi_max_keylen+1);
 1097     mv2_pmi_val = MPIU_Malloc(mv2_pmi_max_vallen+1);
 1098 
 1099     if (mv2_pmi_key==NULL || mv2_pmi_val==NULL) {
 1100         mv2_free_pmi_keyval();
 1101         return -1; 
 1102     }
 1103     return 0;
 1104 }
 1105 
 1106 void mv2_free_pmi_keyval(void)
 1107 {
 1108     if (mv2_pmi_key!=NULL) {
 1109         MPIU_Free(mv2_pmi_key);
 1110         mv2_pmi_key = NULL;
 1111     }
 1112 
 1113     if (mv2_pmi_val!=NULL) {
 1114         MPIU_Free(mv2_pmi_val);
 1115         mv2_pmi_val = NULL;
 1116     }
 1117 }
 1118