"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "src/mpid/ch3/channels/mrail/src/gen2/ibv_channel_manager.c" between
mvapich2-2.3.1.tar.gz and mvapich2-2.3.2.tar.gz

About: MVAPICH2 offers "MPI over InfiniBand, 10GigE/iWARP and RDMA over Converged Ethernet (RoCE)" to achieve best performance, scalability and fault tolerance for high-end computing systems and servers.

ibv_channel_manager.c  (mvapich2-2.3.1):ibv_channel_manager.c  (mvapich2-2.3.2)
skipping to change at line 45 skipping to change at line 45
fprintf(stderr, args); \ fprintf(stderr, args); \
fflush(stderr); \ fflush(stderr); \
} while (0) } while (0)
#else #else
#define DEBUG_PRINT(args...) #define DEBUG_PRINT(args...)
#endif #endif
static pthread_spinlock_t g_apm_lock; static pthread_spinlock_t g_apm_lock;
static int num_cqes[MAX_NUM_HCAS] = { 0 }; static int num_cqes[MAX_NUM_HCAS] = { 0 };
static int curr_cqe[MAX_NUM_HCAS] = { 0 }; static int curr_cqe[MAX_NUM_HCAS] = { 0 };
static struct ibv_wc wc[MAX_NUM_HCAS][RDMA_MAX_CQE_ENTRIES_PER_POLL]; static struct ibv_wc wc[MAX_NUM_HCAS][RDMA_MAX_CQE_ENTRIES_PER_POLL] = {0};
static unsigned long nspin = 0; static unsigned long nspin = 0;
MPIR_T_PVAR_ULONG_COUNTER_DECL_EXTERN(MV2, mv2_vbuf_allocated); MPIR_T_PVAR_ULONG_COUNTER_DECL_EXTERN(MV2, mv2_vbuf_allocated);
MPIR_T_PVAR_ULONG_COUNTER_DECL_EXTERN(MV2, mv2_vbuf_freed); MPIR_T_PVAR_ULONG_COUNTER_DECL_EXTERN(MV2, mv2_vbuf_freed);
MPIR_T_PVAR_ULONG_LEVEL_DECL_EXTERN(MV2, mv2_vbuf_available); MPIR_T_PVAR_ULONG_LEVEL_DECL_EXTERN(MV2, mv2_vbuf_available);
MPIR_T_PVAR_ULONG_COUNTER_DECL_EXTERN(MV2, mv2_ud_vbuf_allocated); MPIR_T_PVAR_ULONG_COUNTER_DECL_EXTERN(MV2, mv2_ud_vbuf_allocated);
MPIR_T_PVAR_ULONG_COUNTER_DECL_EXTERN(MV2, mv2_ud_vbuf_freed); MPIR_T_PVAR_ULONG_COUNTER_DECL_EXTERN(MV2, mv2_ud_vbuf_freed);
MPIR_T_PVAR_ULONG_LEVEL_DECL_EXTERN(MV2, mv2_ud_vbuf_available); MPIR_T_PVAR_ULONG_LEVEL_DECL_EXTERN(MV2, mv2_ud_vbuf_available);
MPIR_T_PVAR_ULONG_COUNTER_DECL_EXTERN(MV2, mv2_rdmafp_ctrl_packet_count); MPIR_T_PVAR_ULONG_COUNTER_DECL_EXTERN(MV2, mv2_rdmafp_ctrl_packet_count);
skipping to change at line 604 skipping to change at line 604
/* This is a receive completion */ /* This is a receive completion */
if (mv2_MPIDI_CH3I_RDMA_Process.has_srq if (mv2_MPIDI_CH3I_RDMA_Process.has_srq
#ifdef _ENABLE_UD_ #ifdef _ENABLE_UD_
|| v->transport == IB_TRANSPORT_UD || v->transport == IB_TRANSPORT_UD
#endif #endif
) { ) {
SET_PKT_LEN_HEADER(v, wc); SET_PKT_LEN_HEADER(v, wc);
SET_PKT_HEADER_OFFSET(v); SET_PKT_HEADER_OFFSET(v);
p = v->pheader; p = v->pheader;
#ifdef _ENABLE_UD_ #ifdef _ENABLE_UD_
MPIDI_PG_Get_vc(MPIDI_Process.my_pg, p->src.rank, &vc); if(rdma_enable_hybrid)
#else {
vc = (MPIDI_VC_t *)p->src.vc_addr; MPIDI_PG_Get_vc(MPIDI_Process.my_pg, p->src.rank, &vc);
} else
#endif #endif
{
vc = (MPIDI_VC_t *)p->src.vc_addr;
}
v->vc = vc; v->vc = vc;
v->rail = p->rail; v->rail = p->rail;
} }
if (likely(NULL == vc_req || vc_req == vc)) { if (likely(NULL == vc_req || vc_req == vc)) {
/* In this case, we should return the vbuf /* In this case, we should return the vbuf
* any way if it is next expected*/ * any way if it is next expected*/
int seqnum = GetSeqNumVbuf(v); int seqnum = GetSeqNumVbuf(v);
*vbuf_handle = v; *vbuf_handle = v;
SET_PKT_LEN_HEADER(v, wc); SET_PKT_LEN_HEADER(v, wc);
SET_PKT_HEADER_OFFSET(v); SET_PKT_HEADER_OFFSET(v);
skipping to change at line 939 skipping to change at line 944
num_cqs, cq_choice, i); num_cqs, cq_choice, i);
curr_cqe[i]++; curr_cqe[i]++;
/* Drain till we get in-order recv or run out of polled CQEs */ /* Drain till we get in-order recv or run out of polled CQEs */
} while ((*vbuf_handle == NULL) && (curr_cqe[i] < num_cqes[i])); } while ((*vbuf_handle == NULL) && (curr_cqe[i] < num_cqes[i]));
if (*vbuf_handle != NULL) { if (*vbuf_handle != NULL) {
/* We got in-order data, deliver it to higher level */ /* We got in-order data, deliver it to higher level */
goto fn_exit; goto fn_exit;
} }
} else { } else {
memset(wc[i], 0, sizeof(struct ibv_wc)*RDMA_MAX_CQE_ENTRIES_PER_ POLL); memset(wc[i], 0, sizeof(struct ibv_wc) * num_cqes[i]);
ne = ibv_poll_cq(chosen_cq, rdma_num_cqes_per_poll, wc[i]); ne = ibv_poll_cq(chosen_cq, rdma_num_cqes_per_poll, wc[i]);
if (unlikely(ne < 0)) { if (unlikely(ne < 0)) {
ibv_error_abort(IBV_RETURN_ERR, "Fail to poll cq\n"); ibv_error_abort(IBV_RETURN_ERR, "Fail to poll cq\n");
} else if (ne) { } else if (ne) {
curr_cqe[i] = 0; curr_cqe[i] = 0;
num_cqes[i] = ne; num_cqes[i] = ne;
do { do {
type = handle_cqe(vbuf_handle, vc_req, receiving, wc[i][ curr_cqe[i]], type = handle_cqe(vbuf_handle, vc_req, receiving, wc[i][ curr_cqe[i]],
skipping to change at line 1018 skipping to change at line 1023
curr_cqe[i]++; curr_cqe[i]++;
/* Drain till we get in-order recv or run out of polled CQEs */ /* Drain till we get in-order recv or run out of polled CQEs */
} while ((*vbuf_handle == NULL) && (curr_cqe[i] < num_cqes[i])); } while ((*vbuf_handle == NULL) && (curr_cqe[i] < num_cqes[i]));
if (*vbuf_handle != NULL) { if (*vbuf_handle != NULL) {
/* We got in-order data, deliver it to higher level */ /* We got in-order data, deliver it to higher level */
goto fn_exit; goto fn_exit;
} }
} else { } else {
get_blocking_message: get_blocking_message:
memset(wc[i], 0, sizeof(struct ibv_wc)*RDMA_MAX_CQE_ENTRIES_PER_POLL ); memset(wc[i], 0, sizeof(struct ibv_wc) * num_cqes[i]);
ne = ibv_poll_cq(chosen_cq, rdma_num_cqes_per_poll, wc[i]); ne = ibv_poll_cq(chosen_cq, rdma_num_cqes_per_poll, wc[i]);
if (unlikely(ne < 0)) { if (unlikely(ne < 0)) {
ibv_error_abort(IBV_RETURN_ERR, "Fail to poll cq\n"); ibv_error_abort(IBV_RETURN_ERR, "Fail to poll cq\n");
} else if (ne) { } else if (ne) {
curr_cqe[i] = 0; curr_cqe[i] = 0;
num_cqes[i] = ne; num_cqes[i] = ne;
do { do {
type = handle_cqe(vbuf_handle, vc_req, receiving, wc[i][curr _cqe[i]], type = handle_cqe(vbuf_handle, vc_req, receiving, wc[i][curr _cqe[i]],
skipping to change at line 1108 skipping to change at line 1113
#ifdef _ENABLE_XRC_ #ifdef _ENABLE_XRC_
if (event.event_type & IBV_XRC_QP_EVENT_FLAG) { if (event.event_type & IBV_XRC_QP_EVENT_FLAG) {
event.event_type ^= IBV_XRC_QP_EVENT_FLAG; event.event_type ^= IBV_XRC_QP_EVENT_FLAG;
xrc_event = 1; xrc_event = 1;
} }
#endif #endif
switch (event.event_type) { switch (event.event_type) {
/* Fatal */ /* Fatal */
case IBV_EVENT_CQ_ERR: case IBV_EVENT_CQ_ERR:
ibv_va_error_abort(GEN_EXIT_ERR, "Got FATAL event %s on CQ %d\n"
,
ibv_event_type_str(event.event_type),
event.element.cq);
break;
case IBV_EVENT_COMM_EST:
case IBV_EVENT_SQ_DRAINED:
PRINT_DEBUG(DEBUG_CHM_verbose, "Async event %s on QP 0x%x\n",
ibv_event_type_str(event.event_type),
event.element.qp->qp_num);
break;
case IBV_EVENT_QP_FATAL: case IBV_EVENT_QP_FATAL:
case IBV_EVENT_QP_REQ_ERR: case IBV_EVENT_QP_REQ_ERR:
case IBV_EVENT_QP_ACCESS_ERR: case IBV_EVENT_QP_ACCESS_ERR:
ibv_va_error_abort(GEN_EXIT_ERR, "Got FATAL event %d\n", ibv_va_error_abort(GEN_EXIT_ERR, "Got FATAL event %s on QP 0x%x\
event.event_type); n",
ibv_event_type_str(event.event_type),
event.element.qp->qp_num);
break; break;
case IBV_EVENT_PATH_MIG_ERR: case IBV_EVENT_PATH_MIG_ERR:
#ifdef DEBUG #ifdef DEBUG
if(mv2_MPIDI_CH3I_RDMA_Process.has_apm) { if(mv2_MPIDI_CH3I_RDMA_Process.has_apm) {
DEBUG_PRINT("Path Migration Failed\n"); DEBUG_PRINT("Path Migration Failed\n");
} }
#endif /* ifdef DEBUG */ #endif /* ifdef DEBUG */
ibv_va_error_abort(GEN_EXIT_ERR, "Got FATAL event %d\n", ibv_va_error_abort(GEN_EXIT_ERR, "Got FATAL event %s on QP 0x%x\
event.event_type); n",
ibv_event_type_str(event.event_type),
event.element.qp->qp_num);
break; break;
case IBV_EVENT_PATH_MIG: case IBV_EVENT_PATH_MIG:
if(mv2_MPIDI_CH3I_RDMA_Process.has_apm && !apm_tester){ if(mv2_MPIDI_CH3I_RDMA_Process.has_apm && !apm_tester){
DEBUG_PRINT("Path Migration Successful\n"); DEBUG_PRINT("Path Migration Successful\n");
reload_alternate_path((&event)->element.qp); reload_alternate_path((&event)->element.qp);
} }
if(!mv2_MPIDI_CH3I_RDMA_Process.has_apm) { if(!mv2_MPIDI_CH3I_RDMA_Process.has_apm) {
ibv_va_error_abort(GEN_EXIT_ERR, "Got FATAL event %d\n", ibv_va_error_abort(GEN_EXIT_ERR, "Got FATAL event %s on QP 0
event.event_type); x%x\n",
ibv_event_type_str(event.event_type),
event.element.qp->qp_num);
} }
break; break;
case IBV_EVENT_DEVICE_FATAL: case IBV_EVENT_DEVICE_FATAL:
case IBV_EVENT_PORT_ERR:
ibv_va_error_abort(GEN_EXIT_ERR, "Got FATAL event %s on port %d\
n",
ibv_event_type_str(event.event_type),
event.element.port_num);
break;
case IBV_EVENT_SRQ_ERR: case IBV_EVENT_SRQ_ERR:
ibv_va_error_abort(GEN_EXIT_ERR, "Got FATAL event %d\n", ibv_va_error_abort(GEN_EXIT_ERR, "Got FATAL event %s on SRQ %p\n
event.event_type); ",
ibv_event_type_str(event.event_type),
event.element.srq);
break;
case IBV_EVENT_QP_LAST_WQE_REACHED:
PRINT_DEBUG(DEBUG_CHM_verbose, "Async event %s on SRQ %p\n",
ibv_event_type_str(event.event_type),
event.element.srq);
break; break;
case IBV_EVENT_COMM_EST:
case IBV_EVENT_PORT_ACTIVE: case IBV_EVENT_PORT_ACTIVE:
case IBV_EVENT_SQ_DRAINED:
case IBV_EVENT_PORT_ERR:
case IBV_EVENT_LID_CHANGE: case IBV_EVENT_LID_CHANGE:
case IBV_EVENT_PKEY_CHANGE: case IBV_EVENT_PKEY_CHANGE:
case IBV_EVENT_SM_CHANGE: case IBV_EVENT_SM_CHANGE:
case IBV_EVENT_QP_LAST_WQE_REACHED: PRINT_DEBUG(DEBUG_CHM_verbose, "Async event %s on port %d",
ibv_event_type_str(event.event_type),
event.element.port_num);
break; break;
case IBV_EVENT_SRQ_LIMIT_REACHED: case IBV_EVENT_SRQ_LIMIT_REACHED:
pthread_spin_lock(&mv2_MPIDI_CH3I_RDMA_Process.srq_post_spin_loc k); pthread_spin_lock(&mv2_MPIDI_CH3I_RDMA_Process.srq_post_spin_loc k);
if(-1 == hca_num) { if(-1 == hca_num) {
/* Was not able to find the context, /* Was not able to find the context,
* error condition */ * error condition */
ibv_error_abort(GEN_EXIT_ERR, ibv_error_abort(GEN_EXIT_ERR,
 End of changes. 15 change blocks. 
20 lines changed or deleted 52 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)