"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "src/mpid/ch3/channels/mrail/src/rdma/ch3_rndvtransfer.c" between
mvapich2-2.3.1.tar.gz and mvapich2-2.3.2.tar.gz

About: MVAPICH2 offers "MPI over InfiniBand, 10GigE/iWARP and RDMA over Converged Ethernet (RoCE)" to achieve best performance, scalability and fault tolerance for high-end computing systems and servers.

ch3_rndvtransfer.c  (mvapich2-2.3.1):ch3_rndvtransfer.c  (mvapich2-2.3.2)
skipping to change at line 30 skipping to change at line 30
#include "vbuf.h" #include "vbuf.h"
#include "upmi.h" #include "upmi.h"
#include "mpiutil.h" #include "mpiutil.h"
#include "rdma_impl.h" #include "rdma_impl.h"
#include "smp_smpi.h" #include "smp_smpi.h"
#include "dreg.h" #include "dreg.h"
static int MPIDI_CH3_SMP_Rendezvous_push(MPIDI_VC_t *, MPID_Request *); static int MPIDI_CH3_SMP_Rendezvous_push(MPIDI_VC_t *, MPID_Request *);
#if defined(_SMP_CMA_)
static int MPIDI_CH3_CMA_Rendezvous_push(MPIDI_VC_t *, MPID_Request *);
extern int MPIDI_CH3I_SMP_do_cma_put(MPIDI_VC_t * vc, const void *src, void *dst
, ssize_t len);
extern int MPIDI_CH3I_SMP_do_cma_get(MPIDI_VC_t * vc, const void *src, void *dst
, ssize_t len);
#endif
MPIDI_VC_t *flowlist; MPIDI_VC_t *flowlist;
#undef DEBUG_PRINT #undef DEBUG_PRINT
#ifdef DEBUG #ifdef DEBUG
#define DEBUG_PRINT(args...) \ #define DEBUG_PRINT(args...) \
do { \ do { \
int rank; \ int rank; \
UPMI_GET_RANK(&rank); \ UPMI_GET_RANK(&rank); \
fprintf(stderr, "[%d][%s:%d] ", rank, __FILE__, __LINE__);\ fprintf(stderr, "[%d][%s:%d] ", rank, __FILE__, __LINE__);\
fprintf(stderr, args); \ fprintf(stderr, args); \
skipping to change at line 97 skipping to change at line 103
#endif #endif
#if defined(_ENABLE_CUDA_) #if defined(_ENABLE_CUDA_)
if(rdma_enable_cuda && rreq->mrail.cuda_transfer_mode != NONE) { if(rdma_enable_cuda && rreq->mrail.cuda_transfer_mode != NONE) {
cts_pkt->rndv.cuda_transfer_mode = DEVICE_TO_DEVICE; cts_pkt->rndv.cuda_transfer_mode = DEVICE_TO_DEVICE;
} else { } else {
cts_pkt->rndv.cuda_transfer_mode = NONE; cts_pkt->rndv.cuda_transfer_mode = NONE;
} }
#endif #endif
if (IS_VC_SMP(vc) && cts_pkt->type == MPIDI_CH3_PKT_RMA_RNDV_CLR_TO_SEND) {
rreq->mrail.protocol = MV2_RNDV_PROTOCOL_R3;
cts_pkt->rndv.protocol = MV2_RNDV_PROTOCOL_R3;
}
switch (rreq->mrail.protocol) { switch (rreq->mrail.protocol) {
case MV2_RNDV_PROTOCOL_R3: case MV2_RNDV_PROTOCOL_R3:
{ {
cts_pkt->rndv.protocol = MV2_RNDV_PROTOCOL_R3; cts_pkt->rndv.protocol = MV2_RNDV_PROTOCOL_R3;
/*MRAILI_Prepost_R3(); */ /*MRAILI_Prepost_R3(); */
break; break;
} }
case MV2_RNDV_PROTOCOL_RPUT: case MV2_RNDV_PROTOCOL_RPUT:
{ {
MPIDI_CH3I_MRAIL_Prepare_rndv(vc, rreq); MPIDI_CH3I_MRAIL_Prepare_rndv(vc, rreq);
skipping to change at line 120 skipping to change at line 131
} }
case MV2_RNDV_PROTOCOL_RGET: case MV2_RNDV_PROTOCOL_RGET:
{ {
int rank; int rank;
UPMI_GET_RANK(&rank); UPMI_GET_RANK(&rank);
fprintf(stderr, "[%d][%s:%d] ", rank, __FILE__, __LINE__); fprintf(stderr, "[%d][%s:%d] ", rank, __FILE__, __LINE__);
fprintf(stderr, "RGET preparing CTS?\n"); fprintf(stderr, "RGET preparing CTS?\n");
mpi_errno = -1; mpi_errno = -1;
break; break;
} }
break;
#ifdef _ENABLE_UD_ #ifdef _ENABLE_UD_
case MV2_RNDV_PROTOCOL_UD_ZCOPY: case MV2_RNDV_PROTOCOL_UD_ZCOPY:
{ {
MPIDI_CH3I_MRAIL_Prepare_rndv_zcopy(vc, rreq); MPIDI_CH3I_MRAIL_Prepare_rndv_zcopy(vc, rreq);
MPIDI_CH3I_MRAIL_SET_PKT_RNDV(cts_pkt, rreq); MPIDI_CH3I_MRAIL_SET_PKT_RNDV(cts_pkt, rreq);
if (rreq->mrail.protocol == MV2_RNDV_PROTOCOL_UD_ZCOPY) { if (rreq->mrail.protocol == MV2_RNDV_PROTOCOL_UD_ZCOPY) {
cts_pkt->rndv.rndv_qpn = ((mv2_rndv_qp_t *) cts_pkt->rndv.rndv_qpn = ((mv2_rndv_qp_t *)
rreq->mrail.rndv_qp_entry)->ud_qp->qp_num; rreq->mrail.rndv_qp_entry)->ud_qp->qp_num;
cts_pkt->rndv.hca_index = ((mv2_rndv_qp_t *) cts_pkt->rndv.hca_index = ((mv2_rndv_qp_t *)
rreq->mrail.rndv_qp_entry)->hca_num; rreq->mrail.rndv_qp_entry)->hca_num;
skipping to change at line 375 skipping to change at line 385
MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_RNDV_PUSH); MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_RNDV_PUSH);
MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_RNDV_PUSH); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_RNDV_PUSH);
if (SMP_INIT if (SMP_INIT
&& vc->smp.local_nodes >= 0 && vc->smp.local_nodes >= 0
#if defined(_ENABLE_CUDA_) #if defined(_ENABLE_CUDA_)
&& sreq->mrail.protocol != MV2_RNDV_PROTOCOL_CUDAIPC && sreq->mrail.protocol != MV2_RNDV_PROTOCOL_CUDAIPC
#endif #endif
&& vc->smp.local_nodes != g_smpi.my_local_id) && vc->smp.local_nodes != g_smpi.my_local_id)
{ {
MPIU_Assert(sreq->mrail.protocol == MV2_RNDV_PROTOCOL_R3);
MPIDI_CH3_SMP_Rendezvous_push(vc, sreq); MPIDI_CH3_SMP_Rendezvous_push(vc, sreq);
return MPI_SUCCESS; return MPI_SUCCESS;
} }
switch (sreq->mrail.protocol) switch (sreq->mrail.protocol)
{ {
case MV2_RNDV_PROTOCOL_RPUT: case MV2_RNDV_PROTOCOL_RPUT:
MPIDI_CH3I_MRAILI_Rendezvous_rput_push(vc, sreq); MPIDI_CH3I_MRAILI_Rendezvous_rput_push(vc, sreq);
break; break;
case MV2_RNDV_PROTOCOL_RGET: case MV2_RNDV_PROTOCOL_RGET:
skipping to change at line 408 skipping to change at line 417
#endif #endif
default: default:
MPIDI_CH3_Rendezvous_r3_push(vc, sreq); MPIDI_CH3_Rendezvous_r3_push(vc, sreq);
break; break;
} }
MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_RNDV_PUSH); MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_RNDV_PUSH);
return MPI_SUCCESS; return MPI_SUCCESS;
} }
#if defined(_SMP_CMA_)
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3_CMA_Rendezvous_push
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
static int MPIDI_CH3_CMA_Rendezvous_push(MPIDI_VC_t * vc,
MPID_Request * sreq)
{
int mpi_errno = MPI_SUCCESS;
int complete = 0, rail = -1;
void *src, *dst;
ssize_t len, offset;
int type = MPIDI_Request_get_type(sreq);
int my_lrank = MPIDI_Process.my_pg->ch.local_process_id;
MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_CMA_RNDV_PUSH);
MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_CMA_RNDV_PUSH);
PRINT_DEBUG(DEBUG_RNDV_verbose>2,
"req type: %d, protocol: %d, partner_id: %08x, iov count: %d, offset
%llu, len: %llu\n",
type, sreq->mrail.protocol, sreq->mrail.partner_id, sreq->dev.iov_co
unt,
sreq->dev.iov_offset, sreq->dev.iov[0].MPL_IOV_LEN);
/* Non-contig sends are handled using the R3 protocol */
MPIU_Assert(sreq->dev.iov_count == 1);
MPIU_Assert(sreq->mrail.protocol == MV2_RNDV_PROTOCOL_RPUT ||
sreq->mrail.protocol == MV2_RNDV_PROTOCOL_RGET );
if (sreq->mrail.protocol == MV2_RNDV_PROTOCOL_RPUT) {
src = sreq->dev.iov[0].MPL_IOV_BUF;
len = sreq->dev.iov[0].MPL_IOV_LEN;
dst = sreq->mrail.remote_addr;
mpi_errno = MPIDI_CH3I_SMP_do_cma_put(vc, src, dst, len);
if (MPI_SUCCESS != mpi_errno) {
vc->ch.state = MPIDI_CH3I_VC_STATE_FAILED;
sreq->status.MPI_ERROR = MPI_ERR_INTERN;
MPID_Request_complete(sreq);
return mpi_errno;
}
MPIDI_CH3U_Handle_send_req(vc, sreq, &complete);
MRAILI_RDMA_Put_finish(vc, sreq, rail);
sreq->mrail.nearly_complete = 1;
} else if (sreq->mrail.protocol == MV2_RNDV_PROTOCOL_RGET) {
dst = sreq->dev.iov[0].MPL_IOV_BUF;
len = sreq->dev.iov[0].MPL_IOV_LEN;
src = sreq->mrail.remote_addr;
mpi_errno = MPIDI_CH3I_SMP_do_cma_get(vc, src, dst, len);
if (MPI_SUCCESS != mpi_errno) {
vc->ch.state = MPIDI_CH3I_VC_STATE_FAILED;
sreq->status.MPI_ERROR = MPI_ERR_INTERN;
MPID_Request_complete(sreq);
return mpi_errno;
}
sreq->mrail.nearly_complete = 1;
sreq->mrail.num_rdma_read_completions = 1;
MRAILI_RDMA_Get_finish(vc, sreq, rail);
} else {
mpi_errno =
MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME,
__LINE__, MPI_ERR_OTHER, "**notimpl", 0);
return mpi_errno;
}
fn_exit:
MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_CMA_RNDV_PUSH);
return mpi_errno;
}
#endif
#undef FUNCNAME #undef FUNCNAME
#define FUNCNAME MPIDI_CH3_SMP_Rendezvous_push #define FUNCNAME MPIDI_CH3_SMP_Rendezvous_push
#undef FCNAME #undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME) #define FCNAME MPL_QUOTE(FUNCNAME)
static int MPIDI_CH3_SMP_Rendezvous_push(MPIDI_VC_t * vc, static int MPIDI_CH3_SMP_Rendezvous_push(MPIDI_VC_t * vc,
MPID_Request * sreq) MPID_Request * sreq)
{ {
int nb; int nb;
int complete = 0; int complete = 0;
int seqnum; int seqnum;
int mpi_errno; int mpi_errno;
MPIDI_CH3_Pkt_rndv_r3_data_t pkt_head; MPIDI_CH3_Pkt_rndv_r3_data_t pkt_head;
MPID_Request * send_req; MPID_Request * send_req;
#if defined (_ENABLE_CUDA_) #if defined (_ENABLE_CUDA_)
int iov_isdev = 0; int iov_isdev = 0;
#endif #endif
MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SMP_RNDV_PUSH); MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SMP_RNDV_PUSH);
MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SMP_RNDV_PUSH); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SMP_RNDV_PUSH);
if (sreq->mrail.protocol != MV2_RNDV_PROTOCOL_R3) {
#if defined(_SMP_CMA_)
if(g_smp_use_cma) {
return MPIDI_CH3_CMA_Rendezvous_push(vc, sreq);
}
#endif
}
MPIDI_Pkt_init(&pkt_head, MPIDI_CH3_PKT_RNDV_R3_DATA); MPIDI_Pkt_init(&pkt_head, MPIDI_CH3_PKT_RNDV_R3_DATA);
pkt_head.receiver_req_id = sreq->mrail.partner_id; pkt_head.receiver_req_id = sreq->mrail.partner_id;
MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_VC_FAI_send_seqnum(vc, seqnum);
MPIDI_Pkt_set_seqnum(&pkt_head, seqnum); MPIDI_Pkt_set_seqnum(&pkt_head, seqnum);
MPIDI_Request_set_seqnum(sreq, seqnum); MPIDI_Request_set_seqnum(sreq, seqnum);
#if defined(_SMP_CMA_) || defined(_SMP_LIMIC_) #if defined(_SMP_CMA_) || defined(_SMP_LIMIC_)
#if defined(_SMP_CMA_) #if defined(_SMP_CMA_)
int use_cma = g_smp_use_cma; int use_cma = g_smp_use_cma;
#else #else
skipping to change at line 482 skipping to change at line 572
#endif #endif
) )
{ {
pkt_head.csend_req_id = NULL; pkt_head.csend_req_id = NULL;
pkt_head.send_req_id = sreq; pkt_head.send_req_id = sreq;
} }
#endif #endif
#endif #endif
PRINT_DEBUG(DEBUG_RNDV_verbose>1,
"Sending R3 Data to %d, sreq: %08x, partner: %08x, niov: %d, cma: %p
, limic: %p\n",
vc->pg_rank, sreq, sreq->mrail.partner_id, sreq->dev.iov_count,
pkt_head.csend_req_id, pkt_head.send_req_id);
mpi_errno = MPIDI_CH3_iStartMsg(vc, &pkt_head, mpi_errno = MPIDI_CH3_iStartMsg(vc, &pkt_head,
sizeof(MPIDI_CH3_Pkt_rndv_r3_data_t), sizeof(MPIDI_CH3_Pkt_rndv_r3_data_t),
&send_req); &send_req);
if (mpi_errno != MPI_SUCCESS) { if (mpi_errno != MPI_SUCCESS) {
MPIU_Object_set_ref(sreq, 0); MPIU_Object_set_ref(sreq, 0);
MPIDI_CH3_Request_destroy(sreq); MPIDI_CH3_Request_destroy(sreq);
sreq = NULL; sreq = NULL;
mpi_errno = mpi_errno =
MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME,
skipping to change at line 509 skipping to change at line 604
MPID_Request_release(send_req); MPID_Request_release(send_req);
} }
#if defined(_SMP_LIMIC_) || defined(_SMP_CMA_) #if defined(_SMP_LIMIC_) || defined(_SMP_CMA_)
if (pkt_head.send_req_id || pkt_head.csend_req_id) { if (pkt_head.send_req_id || pkt_head.csend_req_id) {
sreq->mrail.nearly_complete = 1; sreq->mrail.nearly_complete = 1;
return MPI_SUCCESS; return MPI_SUCCESS;
} }
#endif #endif
DEBUG_PRINT("r3 sent req is %p\n", sreq); PRINT_DEBUG(DEBUG_RNDV_verbose>1, "R3 Data sent to %d, sreq: %08x\n", vc->pg _rank, sreq);
if (MPIDI_CH3I_SMP_SendQ_empty(vc)) { if (MPIDI_CH3I_SMP_SendQ_empty(vc)) {
#if defined(_ENABLE_CUDA_) #if defined(_ENABLE_CUDA_)
if (rdma_enable_cuda && s_smp_cuda_pipeline) { if (rdma_enable_cuda && s_smp_cuda_pipeline) {
iov_isdev = is_device_buffer((void *) sreq->dev.iov[sreq->dev.iov_of fset].MPL_IOV_BUF); iov_isdev = is_device_buffer((void *) sreq->dev.iov[sreq->dev.iov_of fset].MPL_IOV_BUF);
} }
#endif #endif
vc->smp.send_current_pkt_type = SMP_RNDV_MSG; vc->smp.send_current_pkt_type = SMP_RNDV_MSG;
for (;;) { for (;;) {
DEBUG_PRINT("iov count (sreq): %d, offset %d, len[1] %d\n", PRINT_DEBUG(DEBUG_RNDV_verbose>1,
sreq->dev.iov_count, sreq->dev.iov_offset, "sreq: %08x, iov count: %d, offset %d, len[0]: %d\n",
sreq->dev.iov[0].MPL_IOV_LEN); sreq, sreq->dev.iov_count, sreq->dev.iov_offset,
sreq->dev.iov[0].MPL_IOV_LEN);
if (vc->smp.send_current_pkt_type == SMP_RNDV_MSG) { if (vc->smp.send_current_pkt_type == SMP_RNDV_MSG) {
#if defined(_ENABLE_CUDA_) #if defined(_ENABLE_CUDA_)
if (iov_isdev) { if (iov_isdev) {
mpi_errno = MPIDI_CH3I_SMP_writev_rndv_data_cuda(vc, mpi_errno = MPIDI_CH3I_SMP_writev_rndv_data_cuda(vc,
sreq, sreq,
&sreq->dev.iov[sreq->dev.iov_offset], &sreq->dev.iov[sreq->dev.iov_offset],
sreq->dev.iov_count - sreq->dev.iov_offset, sreq->dev.iov_count - sreq->dev.iov_offset,
&nb, &nb,
0/*is_cont*/); 0/*is_cont*/);
skipping to change at line 570 skipping to change at line 666
} }
if (MPI_SUCCESS != mpi_errno) { if (MPI_SUCCESS != mpi_errno) {
vc->ch.state = MPIDI_CH3I_VC_STATE_FAILED; vc->ch.state = MPIDI_CH3I_VC_STATE_FAILED;
sreq->status.MPI_ERROR = MPI_ERR_INTERN; sreq->status.MPI_ERROR = MPI_ERR_INTERN;
MPID_Request_complete(sreq); MPID_Request_complete(sreq);
return mpi_errno; return mpi_errno;
} }
if (nb > 0) { if (nb > 0) {
PRINT_DEBUG(DEBUG_RNDV_verbose>1,
"Wrote R3 data, dest: %d, sreq: %08x, bytes: %d\n",
vc->pg_rank, sreq, nb);
if (MPIDI_CH3I_Request_adjust_iov(sreq, nb)) { if (MPIDI_CH3I_Request_adjust_iov(sreq, nb)) {
#if defined(_ENABLE_CUDA_) #if defined(_ENABLE_CUDA_)
if (iov_isdev) { if (iov_isdev) {
/* the request is completed when the device to /* the request is completed when the device to
* host copy is complete*/ * host copy is complete*/
sreq->mrail.nearly_complete = 1; sreq->mrail.nearly_complete = 1;
break; break;
} }
#endif #endif
skipping to change at line 594 skipping to change at line 693
} else { } else {
vc->smp.send_current_pkt_type = SMP_RNDV_MSG_CONT; vc->smp.send_current_pkt_type = SMP_RNDV_MSG_CONT;
} }
} else { } else {
sreq->ch.reqtype = REQUEST_RNDV_R3_DATA; sreq->ch.reqtype = REQUEST_RNDV_R3_DATA;
MPIDI_CH3I_SMP_SendQ_enqueue_head(vc, sreq); MPIDI_CH3I_SMP_SendQ_enqueue_head(vc, sreq);
vc->smp.send_active = sreq; vc->smp.send_active = sreq;
sreq->mrail.nearly_complete = 1; sreq->mrail.nearly_complete = 1;
vc->smp.send_current_pkt_type = SMP_RNDV_MSG_CONT; vc->smp.send_current_pkt_type = SMP_RNDV_MSG_CONT;
MV2_INC_NUM_POSTED_SEND(); MV2_INC_NUM_POSTED_SEND();
PRINT_DEBUG(DEBUG_RNDV_verbose>1,
"Enqueue next R3 data, dest: %d, sreq: %08x\n",
vc->pg_rank, sreq);
break; break;
} }
} else { } else {
sreq->ch.reqtype = REQUEST_RNDV_R3_DATA; sreq->ch.reqtype = REQUEST_RNDV_R3_DATA;
MPIDI_CH3I_SMP_SendQ_enqueue_head(vc, sreq); MPIDI_CH3I_SMP_SendQ_enqueue_head(vc, sreq);
vc->smp.send_active = sreq; vc->smp.send_active = sreq;
sreq->mrail.nearly_complete = 1; sreq->mrail.nearly_complete = 1;
PRINT_DEBUG(DEBUG_RNDV_verbose>1,
"Enqueue R3 data, dest: %d, sreq: %08x\n",
vc->pg_rank, sreq);
break; break;
} }
} }
} else { } else {
sreq->ch.reqtype = REQUEST_RNDV_R3_DATA; sreq->ch.reqtype = REQUEST_RNDV_R3_DATA;
MPIDI_CH3I_SMP_SendQ_enqueue(vc, sreq); MPIDI_CH3I_SMP_SendQ_enqueue(vc, sreq);
sreq->mrail.nearly_complete = 1; sreq->mrail.nearly_complete = 1;
DEBUG_PRINT("Enqueue sreq %p", sreq); PRINT_DEBUG(DEBUG_RNDV_verbose>1,
"Enqueue R3 data, dest: %d, sreq: %08x\n",
vc->pg_rank, sreq);
} }
MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SMP_RNDV_PUSH); MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SMP_RNDV_PUSH);
return MPI_SUCCESS; return MPI_SUCCESS;
} }
#undef FUNCNAME #undef FUNCNAME
#define FUNCNAME MPIDI_CH3_Rendezvous_r3_push #define FUNCNAME MPIDI_CH3_Rendezvous_r3_push
#undef FCNAME #undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME) #define FCNAME MPL_QUOTE(FUNCNAME)
void MPIDI_CH3_Rendezvous_r3_push(MPIDI_VC_t * vc, MPID_Request * sreq) void MPIDI_CH3_Rendezvous_r3_push(MPIDI_VC_t * vc, MPID_Request * sreq)
{ {
vbuf *buf; vbuf *buf;
MPL_IOV iov[MPL_IOV_LIMIT + 1]; MPL_IOV iov[MPL_IOV_LIMIT + 1] = {0};
int n_iov; int n_iov;
int msg_buffered = 0; int msg_buffered = 0;
int nb = 0; int nb = 0;
int complete = 0; int complete = 0;
int seqnum; int seqnum;
int finished = 0; int finished = 0;
int mpi_errno; int mpi_errno;
int wait_for_rndv_r3_ack = 0; int wait_for_rndv_r3_ack = 0;
MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_RNDV_R3_PUSH); MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_RNDV_R3_PUSH);
MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_RNDV_R3_PUSH); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_RNDV_R3_PUSH);
skipping to change at line 741 skipping to change at line 848
#undef FUNCNAME #undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_MRAILI_Process_rndv #define FUNCNAME MPIDI_CH3I_MRAILI_Process_rndv
#undef FCNAME #undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME) #define FCNAME MPL_QUOTE(FUNCNAME)
void MPIDI_CH3I_MRAILI_Process_rndv() void MPIDI_CH3I_MRAILI_Process_rndv()
{ {
MPID_Request *sreq; MPID_Request *sreq;
MPIDI_VC_t *pending_flowlist = NULL, *temp_vc = NULL; MPIDI_VC_t *pending_flowlist = NULL, *temp_vc = NULL;
int need_vc_enqueue = 0; int need_vc_enqueue = 0;
int complete = 0;
MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROCESS_RNDV); MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROCESS_RNDV);
MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PROCESS_RNDV); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PROCESS_RNDV);
while (flowlist) { while (flowlist) {
/* Push on the the first ongoing receive with /* Push on the the first ongoing receive with
* MPIDI_CH3_Rendezvous_push. If the receive * MPIDI_CH3_Rendezvous_push. If the receive
* finishes, it will advance the shandle_head * finishes, it will advance the shandle_head
* pointer on the connection. * pointer on the connection.
* *
* xxx the side effect of MPIDI_CH3_Rendezvous_push is * xxx the side effect of MPIDI_CH3_Rendezvous_push is
skipping to change at line 780 skipping to change at line 888
#endif #endif
sreq = flowlist->mrail.sreq_head; sreq = flowlist->mrail.sreq_head;
while (sreq != NULL) { while (sreq != NULL) {
#ifdef CKPT #ifdef CKPT
if (flowlist->ch.rput_stop if (flowlist->ch.rput_stop
&& MV2_RNDV_PROTOCOL_RPUT == sreq->mrail.protocol) { && MV2_RNDV_PROTOCOL_RPUT == sreq->mrail.protocol) {
break; /*VC will be push back when the rput_stop becomes 0*/ break; /*VC will be push back when the rput_stop becomes 0*/
} }
#endif #endif
MPIDI_CH3_Rendezvous_push(flowlist, sreq); MPIDI_CH3_Rendezvous_push(flowlist, sreq);
DEBUG_PRINT("[process rndv] after rndv push\n"); PRINT_DEBUG(DEBUG_RNDV_verbose>1,
"after rndv push, sreq: %08x, nearly_complete: %d, local_com
plete: %d, remote_complete: %d\n",
sreq, sreq->mrail.nearly_complete, sreq->mrail.local_complet
e, sreq->mrail.remote_complete);
if (1 != sreq->mrail.nearly_complete) { if (1 != sreq->mrail.nearly_complete) {
break; break;
} }
DEBUG_PRINT PRINT_DEBUG(DEBUG_RNDV_verbose, "sreq: %08x, protocol: %d, "
("[process rndv] nearly complete, remove from list\n"); "nearly_complete: %d, local_complete: %d, remote_complete: %
d\n",
sreq, sreq->mrail.protocol, sreq->mrail.nearly_complete,
sreq->mrail.local_complete, sreq->mrail.remote_complete);
RENDEZVOUS_DONE(flowlist); RENDEZVOUS_DONE(flowlist);
sreq = flowlist->mrail.sreq_head; sreq = flowlist->mrail.sreq_head;
} }
temp_vc = flowlist; temp_vc = flowlist;
need_vc_enqueue = 0; need_vc_enqueue = 0;
if (sreq && 1 != sreq->mrail.nearly_complete) { if (sreq && 1 != sreq->mrail.nearly_complete) {
need_vc_enqueue = 1; need_vc_enqueue = 1;
} }
skipping to change at line 851 skipping to change at line 963
MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_RNDV_R3_RCV_DATA); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_RNDV_R3_RCV_DATA);
MPID_Request_get_ptr(((MPIDI_CH3_Pkt_rndv_r3_data_t *) (buffer-> MPID_Request_get_ptr(((MPIDI_CH3_Pkt_rndv_r3_data_t *) (buffer->
pheader))-> pheader))->
receiver_req_id, rreq); receiver_req_id, rreq);
if (!(MV2_RNDV_PROTOCOL_R3 == rreq->mrail.protocol || if (!(MV2_RNDV_PROTOCOL_R3 == rreq->mrail.protocol ||
MV2_RNDV_PROTOCOL_RPUT == rreq->mrail.protocol)) { MV2_RNDV_PROTOCOL_RPUT == rreq->mrail.protocol)) {
int rank; int rank;
UPMI_GET_RANK(&rank); UPMI_GET_RANK(&rank);
DEBUG_PRINT( "[rank %d]get wrong req protocol, req %p, protocol %d\n", r ank, DEBUG_PRINT( "[rank %d]get wrong req protocol, req %08x, protocol %d\n", rank,
rreq, rreq->mrail.protocol); rreq, rreq->mrail.protocol);
MPIU_Assert(MV2_RNDV_PROTOCOL_R3 == rreq->mrail.protocol || MPIU_Assert(MV2_RNDV_PROTOCOL_R3 == rreq->mrail.protocol ||
MV2_RNDV_PROTOCOL_RPUT == rreq->mrail.protocol); MV2_RNDV_PROTOCOL_RPUT == rreq->mrail.protocol);
} }
rreq->mrail.protocol = MV2_RNDV_PROTOCOL_R3; rreq->mrail.protocol = MV2_RNDV_PROTOCOL_R3;
mpi_errno = MPIDI_CH3I_MRAIL_Fill_Request(rreq, buffer, skipsize, &nb); mpi_errno = MPIDI_CH3I_MRAIL_Fill_Request(rreq, buffer, skipsize, &nb);
if (mpi_errno != MPI_SUCCESS) if (mpi_errno != MPI_SUCCESS)
{ {
skipping to change at line 972 skipping to change at line 1084
int MPIDI_CH3_Rendezvous_rget_send_finish(MPIDI_VC_t * vc, int MPIDI_CH3_Rendezvous_rget_send_finish(MPIDI_VC_t * vc,
MPIDI_CH3_Pkt_rget_finish_t *rget_pkt) MPIDI_CH3_Pkt_rget_finish_t *rget_pkt)
{ {
int mpi_errno = MPI_SUCCESS; int mpi_errno = MPI_SUCCESS;
int complete; int complete;
MPID_Request *sreq; MPID_Request *sreq;
MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_RNDV_RGET_SEND_FINISH); MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_RNDV_RGET_SEND_FINISH);
MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_RNDV_RGET_SEND_FINISH); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_RNDV_RGET_SEND_FINISH);
MPID_Request_get_ptr(rget_pkt->sender_req_id, sreq); MPID_Request_get_ptr(rget_pkt->sender_req_id, sreq);
PRINT_DEBUG(DEBUG_RNDV_verbose,
"Received RGET finish, sreq: %08x, protocol: %d, local: %d, remote:
%d\n",
sreq, sreq->mrail.protocol, sreq->mrail.local_complete, sreq->mrail.
remote_complete);
#if defined (_ENABLE_CUDA_) && defined(HAVE_CUDA_IPC) #if defined (_ENABLE_CUDA_) && defined(HAVE_CUDA_IPC)
cudaError_t cudaerr = cudaSuccess; cudaError_t cudaerr = cudaSuccess;
if (rdma_enable_cuda && sreq->mrail.ipc_cuda_event) { if (rdma_enable_cuda && sreq->mrail.ipc_cuda_event) {
cudaerr = cudaStreamWaitEvent(0, sreq->mrail.ipc_cuda_event->event, 0); cudaerr = cudaStreamWaitEvent(0, sreq->mrail.ipc_cuda_event->event, 0);
if (cudaerr != cudaSuccess) { if (cudaerr != cudaSuccess) {
ibv_error_abort(IBV_RETURN_ERR,"cudaStreamWaitEvent failed\n"); ibv_error_abort(IBV_RETURN_ERR,"cudaStreamWaitEvent failed\n");
} }
if (sreq->mrail.ipc_cuda_event) { if (sreq->mrail.ipc_cuda_event) {
release_cudaipc_event(sreq->mrail.ipc_cuda_event); release_cudaipc_event(sreq->mrail.ipc_cuda_event);
} }
sreq->mrail.ipc_cuda_event = NULL; sreq->mrail.ipc_cuda_event = NULL;
} }
#endif #endif
sreq->mrail.remote_complete = UINT32_MAX;
if (!MPIDI_CH3I_MRAIL_Finish_request(sreq)) { if (!MPIDI_CH3I_MRAIL_Finish_request(sreq)) {
return MPI_SUCCESS; return MPI_SUCCESS;
} }
MPIDI_CH3I_MRAILI_RREQ_RNDV_FINISH(sreq); MPIDI_CH3I_MRAILI_RREQ_RNDV_FINISH(sreq);
#if 0 #if 0
if(mv2_MPIDI_CH3I_RDMA_Process.has_hsam && if(mv2_MPIDI_CH3I_RDMA_Process.has_hsam &&
((req->mrail.rndv_buf_sz > rdma_large_msg_rail_sharing_threshold))) { ((req->mrail.rndv_buf_sz > rdma_large_msg_rail_sharing_threshold))) {
skipping to change at line 1215 skipping to change at line 1331
int MPIDI_CH3_Rendezvous_rput_finish(MPIDI_VC_t * vc, int MPIDI_CH3_Rendezvous_rput_finish(MPIDI_VC_t * vc,
MPIDI_CH3_Pkt_rput_finish_t * rf_pkt) MPIDI_CH3_Pkt_rput_finish_t * rf_pkt)
{ {
int mpi_errno = MPI_SUCCESS; int mpi_errno = MPI_SUCCESS;
MPID_Request *rreq; MPID_Request *rreq;
int complete; int complete;
MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_RNDV_RPUT_FINISH); MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_RNDV_RPUT_FINISH);
MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_RNDV_RPUT_FINISH); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_RNDV_RPUT_FINISH);
MPID_Request_get_ptr(rf_pkt->receiver_req_id, rreq); MPID_Request_get_ptr(rf_pkt->receiver_req_id, rreq);
PRINT_DEBUG(DEBUG_RNDV_verbose, "Received RPUT finish, rreq: %08x, protocol:
%d, local: %d, remote: %d\n",
rreq, rreq->mrail.protocol, rreq->mrail.local_complete, rreq->mrail.
remote_complete);
#if defined(_ENABLE_CUDA_) #if defined(_ENABLE_CUDA_)
if (rdma_enable_cuda && rreq->mrail.cuda_transfer_mode != NONE if (rdma_enable_cuda && rreq->mrail.cuda_transfer_mode != NONE
&& rreq->mrail.cuda_transfer_mode != DEVICE_TO_HOST) { && rreq->mrail.cuda_transfer_mode != DEVICE_TO_HOST) {
if (MPIDI_CH3I_MRAILI_Process_cuda_finish(vc, rreq, rf_pkt) != 1) { if (MPIDI_CH3I_MRAILI_Process_cuda_finish(vc, rreq, rf_pkt) != 1) {
goto fn_exit; goto fn_exit;
} }
} else } else
#endif #endif
{ {
if (IS_VC_SMP(vc)) {
rreq->mrail.remote_complete = UINT32_MAX;
} else {
rreq->mrail.remote_complete++;
if (rreq->mrail.remote_complete == rdma_num_rails) {
rreq->mrail.remote_complete = UINT32_MAX;
}
}
if (!MPIDI_CH3I_MRAIL_Finish_request(rreq)) if (!MPIDI_CH3I_MRAIL_Finish_request(rreq))
{ {
return MPI_SUCCESS; return MPI_SUCCESS;
} }
} }
if (rreq->mrail.rndv_buf_alloc == 1){ if (rreq->mrail.rndv_buf_alloc == 1){
MPIDI_CH3_Rendezvous_unpack_data(vc, rreq); MPIDI_CH3_Rendezvous_unpack_data(vc, rreq);
} else { } else {
rreq->mrail.rndv_buf = NULL; rreq->mrail.rndv_buf = NULL;
skipping to change at line 1254 skipping to change at line 1380
mpi_errno = MPIDI_CH3U_Handle_recv_req(vc, rreq, &complete); mpi_errno = MPIDI_CH3U_Handle_recv_req(vc, rreq, &complete);
if (mpi_errno != MPI_SUCCESS) if (mpi_errno != MPI_SUCCESS)
{ {
mpi_errno = mpi_errno =
MPIR_Err_create_code(mpi_errno, MPIR_Err_create_code(mpi_errno,
MPIR_ERR_RECOVERABLE, FCNAME, MPIR_ERR_RECOVERABLE, FCNAME,
__LINE__, MPI_ERR_OTHER, "**fail", 0); __LINE__, MPI_ERR_OTHER, "**fail", 0);
} }
PRINT_DEBUG(DEBUG_RNDV_verbose, "rreq: %08x, complete: %d\n", rreq, complete );
if (complete) if (complete)
{ {
vc->ch.recv_active = NULL; vc->ch.recv_active = NULL;
} }
else else
{ {
mpi_errno = mpi_errno =
MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL,
FCNAME, __LINE__, FCNAME, __LINE__,
MPI_ERR_OTHER, "**fail", 0); MPI_ERR_OTHER, "**fail", 0);
 End of changes. 23 change blocks. 
12 lines changed or deleted 151 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)