openmpi  3.1.6
About: Open MPI is a high performance Message Passing Interface (MPI) library project combining technologies and resources from several other projects (FT-MPI, LA-MPI, LAM/MPI, and PACX-MPI) in order to build the best MPI library available. 3.x series.
  Fossies Dox: openmpi-3.1.6.tar.bz2  ("unofficial" and yet experimental doxygen-generated source code documentation)  

opal_cr.h File Reference
#include "opal_config.h"
#include "opal/mca/crs/crs.h"
#include "opal/mca/event/event.h"
#include "opal/util/output.h"
#include "opal/prefetch.h"
Include dependency graph for opal_cr.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define OPAL_CR_DONE   ((char) 0)
 
#define OPAL_CR_ACK   ((char) 1)
 
#define OPAL_CR_CHECKPOINT   ((char) 2)
 
#define OPAL_CR_NAMED_PROG_R   ("opal_cr_prog_read")
 
#define OPAL_CR_NAMED_PROG_W   ("opal_cr_prog_write")
 
#define OPAL_CR_BASE_ENV_NAME   ("opal_cr_restart-env")
 
#define OPAL_CR_TEST_CHECKPOINT_READY()   ;
 
#define OPAL_CR_TEST_CHECKPOINT_READY_STALL()   ;
 
#define OPAL_CR_INIT_LIBRARY()   ;
 
#define OPAL_CR_FINALIZE_LIBRARY()   ;
 
#define OPAL_CR_ABORT_LIBRARY()   ;
 
#define OPAL_CR_ENTER_LIBRARY()   ;
 
#define OPAL_CR_EXIT_LIBRARY()   ;
 
#define OPAL_CR_NOOP_PROGRESS()   ;
 
#define OPAL_CR_TIMER_ENTRY0   0
 
#define OPAL_CR_TIMER_ENTRY1   1
 
#define OPAL_CR_TIMER_ENTRY2   2
 
#define OPAL_CR_TIMER_CRCPBR0   3
 
#define OPAL_CR_TIMER_CRCP0   4
 
#define OPAL_CR_TIMER_CRCPBR1   5
 
#define OPAL_CR_TIMER_P2P0   6
 
#define OPAL_CR_TIMER_P2P1   7
 
#define OPAL_CR_TIMER_P2PBR0   8
 
#define OPAL_CR_TIMER_CORE0   9
 
#define OPAL_CR_TIMER_CORE1   10
 
#define OPAL_CR_TIMER_COREBR0   11
 
#define OPAL_CR_TIMER_P2P2   12
 
#define OPAL_CR_TIMER_P2PBR1   13
 
#define OPAL_CR_TIMER_P2P3   14
 
#define OPAL_CR_TIMER_P2PBR2   15
 
#define OPAL_CR_TIMER_CRCP1   16
 
#define OPAL_CR_TIMER_COREBR1   17
 
#define OPAL_CR_TIMER_CORE2   18
 
#define OPAL_CR_TIMER_ENTRY3   19
 
#define OPAL_CR_TIMER_ENTRY4   20
 
#define OPAL_CR_TIMER_MAX   21
 
#define OPAL_CR_CLEAR_TIMERS()
 
#define OPAL_CR_SET_TIMER(idx)
 
#define OPAL_CR_DISPLAY_ALL_TIMERS()
 

Typedefs

typedef enum opal_cr_ckpt_cmd_state_t opal_cr_ckpt_cmd_state_t
 
typedef int(* opal_cr_notify_callback_fn_t) (opal_cr_ckpt_cmd_state_t)
 Notification Routines. More...
 
typedef int(* opal_cr_user_inc_callback_fn_t) (opal_cr_user_inc_callback_event_t event, opal_cr_user_inc_callback_state_t state)
 User coordination callback routine. More...
 
typedef int(* opal_cr_coord_callback_fn_t) (int)
 Coordination Routines. More...
 

Enumerations

enum  opal_cr_ckpt_cmd_state_t {
  OPAL_CHECKPOINT_CMD_START, OPAL_CHECKPOINT_CMD_IN_PROGRESS, OPAL_CHECKPOINT_CMD_NULL, OPAL_CHECKPOINT_CMD_ERROR,
  OPAL_CR_STATUS_NONE, OPAL_CR_STATUS_REQUESTED, OPAL_CR_STATUS_RUNNING, OPAL_CR_STATUS_TERM,
  OPAL_CR_STATUS_CONTINUE, OPAL_CR_STATUS_RESTART_PRE, OPAL_CR_STATUS_RESTART_POST
}
 
enum  opal_cr_user_inc_callback_event_t {
  OPAL_CR_INC_PRE_CRS_PRE_MPI = 0, OPAL_CR_INC_PRE_CRS_POST_MPI = 1, OPAL_CR_INC_CRS_PRE_CKPT = 2, OPAL_CR_INC_CRS_POST_CKPT = 3,
  OPAL_CR_INC_POST_CRS_PRE_MPI = 4, OPAL_CR_INC_POST_CRS_POST_MPI = 5, OPAL_CR_INC_MAX = 6
}
 User Coordination Routines. More...
 
enum  opal_cr_user_inc_callback_state_t { OPAL_CR_INC_STATE_PREPARE = 0, OPAL_CR_INC_STATE_CONTINUE = 1, OPAL_CR_INC_STATE_RESTART = 2, OPAL_CR_INC_STATE_ERROR = 3 }
 

Functions

OPAL_DECLSPEC int opal_cr_refresh_environ (int prev_pid)
 
OPAL_DECLSPEC int opal_cr_set_enabled (bool)
 
OPAL_DECLSPEC int opal_cr_init (void)
 Initialize the notification and coordination elements. More...
 
OPAL_DECLSPEC int opal_cr_finalize (void)
 Finalize the notification and coordination elements. More...
 
OPAL_DECLSPEC void opal_cr_test_if_checkpoint_ready (void)
 Check to see if a checkpoint has been requested. More...
 
OPAL_DECLSPEC int opal_cr_reg_notify_callback (opal_cr_notify_callback_fn_t new_func, opal_cr_notify_callback_fn_t *prev_func)
 
OPAL_DECLSPEC int opal_cr_inc_core (pid_t pid, opal_crs_base_snapshot_t *snapshot, opal_crs_base_ckpt_options_t *options, int *state)
 Function to go through the INC. More...
 
OPAL_DECLSPEC int opal_cr_inc_core_prep (void)
 Notification Routines. More...
 
OPAL_DECLSPEC int opal_cr_inc_core_ckpt (pid_t pid, opal_crs_base_snapshot_t *snapshot, opal_crs_base_ckpt_options_t *options, int *state)
 
OPAL_DECLSPEC int opal_cr_inc_core_recover (int state)
 
OPAL_DECLSPEC int opal_cr_user_inc_register_callback (opal_cr_user_inc_callback_event_t event, opal_cr_user_inc_callback_fn_t function, opal_cr_user_inc_callback_fn_t *prev_function)
 
OPAL_DECLSPEC int ompi_trigger_user_inc_callback (opal_cr_user_inc_callback_event_t event, opal_cr_user_inc_callback_state_t state)
 
OPAL_DECLSPEC int opal_cr_reg_coord_callback (opal_cr_coord_callback_fn_t new_func, opal_cr_coord_callback_fn_t *prev_func)
 Register a checkpoint coodination routine for a higher level. More...
 
OPAL_DECLSPEC int opal_cr_coord (int state)
 OPAL Checkpoint Coordination Routine. More...
 
OPAL_DECLSPEC void opal_cr_set_time (int idx)
 Checkpoint life-cycle timing. More...
 
OPAL_DECLSPEC void opal_cr_display_all_timers (void)
 
OPAL_DECLSPEC void opal_cr_clear_timers (void)
 

Variables

OPAL_DECLSPEC int opal_cr_output
 
OPAL_DECLSPEC char * opal_cr_pipe_dir
 Interface Functions & Vars. More...
 
OPAL_DECLSPEC int opal_cr_entry_point_signal
 
OPAL_DECLSPEC bool opal_cr_is_enabled
 
OPAL_DECLSPEC bool opal_cr_is_tool
 
OPAL_DECLSPEC int opal_cr_checkpoint_request
 
OPAL_DECLSPEC int opal_cr_checkpointing_state
 
OPAL_DECLSPEC bool opal_cr_continue_like_restart
 
OPAL_DECLSPEC bool opal_cr_stall_check
 Global Var Decls. More...
 
OPAL_DECLSPEC bool opal_cr_currently_stalled
 
OPAL_DECLSPEC bool opal_cr_timing_enabled
 
OPAL_DECLSPEC bool opal_cr_timing_barrier_enabled
 
OPAL_DECLSPEC int opal_cr_timing_my_rank
 
OPAL_DECLSPEC int opal_cr_timing_target_rank
 

Detailed Description

Checkpoint functionality for Open MPI

Definition in file opal_cr.h.

Macro Definition Documentation

◆ OPAL_CR_ABORT_LIBRARY

#define OPAL_CR_ABORT_LIBRARY ( )    ;

Definition at line 203 of file opal_cr.h.

◆ OPAL_CR_ACK

#define OPAL_CR_ACK   ((char) 1)

Definition at line 44 of file opal_cr.h.

◆ OPAL_CR_BASE_ENV_NAME

#define OPAL_CR_BASE_ENV_NAME   ("opal_cr_restart-env")

Definition at line 48 of file opal_cr.h.

◆ OPAL_CR_CHECKPOINT

#define OPAL_CR_CHECKPOINT   ((char) 2)

Definition at line 45 of file opal_cr.h.

◆ OPAL_CR_CLEAR_TIMERS

#define OPAL_CR_CLEAR_TIMERS ( )
Value:
{ \
opal_cr_clear_timers(); \
} \
}

Definition at line 396 of file opal_cr.h.

◆ OPAL_CR_DISPLAY_ALL_TIMERS

#define OPAL_CR_DISPLAY_ALL_TIMERS ( )
Value:
{ \
opal_cr_display_all_timers(); \
} \
}

Definition at line 410 of file opal_cr.h.

◆ OPAL_CR_DONE

#define OPAL_CR_DONE   ((char) 0)

Definition at line 43 of file opal_cr.h.

◆ OPAL_CR_ENTER_LIBRARY

#define OPAL_CR_ENTER_LIBRARY ( )    ;

Definition at line 204 of file opal_cr.h.

◆ OPAL_CR_EXIT_LIBRARY

#define OPAL_CR_EXIT_LIBRARY ( )    ;

Definition at line 205 of file opal_cr.h.

◆ OPAL_CR_FINALIZE_LIBRARY

#define OPAL_CR_FINALIZE_LIBRARY ( )    ;

Definition at line 202 of file opal_cr.h.

◆ OPAL_CR_INIT_LIBRARY

#define OPAL_CR_INIT_LIBRARY ( )    ;

Definition at line 201 of file opal_cr.h.

◆ OPAL_CR_NAMED_PROG_R

#define OPAL_CR_NAMED_PROG_R   ("opal_cr_prog_read")

Definition at line 46 of file opal_cr.h.

◆ OPAL_CR_NAMED_PROG_W

#define OPAL_CR_NAMED_PROG_W   ("opal_cr_prog_write")

Definition at line 47 of file opal_cr.h.

◆ OPAL_CR_NOOP_PROGRESS

#define OPAL_CR_NOOP_PROGRESS ( )    ;

Definition at line 206 of file opal_cr.h.

◆ OPAL_CR_SET_TIMER

#define OPAL_CR_SET_TIMER (   idx)
Value:
{ \
opal_cr_set_time(idx); \
} \
}

Definition at line 403 of file opal_cr.h.

◆ OPAL_CR_TEST_CHECKPOINT_READY

#define OPAL_CR_TEST_CHECKPOINT_READY ( )    ;

Definition at line 199 of file opal_cr.h.

◆ OPAL_CR_TEST_CHECKPOINT_READY_STALL

#define OPAL_CR_TEST_CHECKPOINT_READY_STALL ( )    ;

Definition at line 200 of file opal_cr.h.

◆ OPAL_CR_TIMER_CORE0

#define OPAL_CR_TIMER_CORE0   9

Definition at line 381 of file opal_cr.h.

◆ OPAL_CR_TIMER_CORE1

#define OPAL_CR_TIMER_CORE1   10

Definition at line 382 of file opal_cr.h.

◆ OPAL_CR_TIMER_CORE2

#define OPAL_CR_TIMER_CORE2   18

Definition at line 390 of file opal_cr.h.

◆ OPAL_CR_TIMER_COREBR0

#define OPAL_CR_TIMER_COREBR0   11

Definition at line 383 of file opal_cr.h.

◆ OPAL_CR_TIMER_COREBR1

#define OPAL_CR_TIMER_COREBR1   17

Definition at line 389 of file opal_cr.h.

◆ OPAL_CR_TIMER_CRCP0

#define OPAL_CR_TIMER_CRCP0   4

Definition at line 376 of file opal_cr.h.

◆ OPAL_CR_TIMER_CRCP1

#define OPAL_CR_TIMER_CRCP1   16

Definition at line 388 of file opal_cr.h.

◆ OPAL_CR_TIMER_CRCPBR0

#define OPAL_CR_TIMER_CRCPBR0   3

Definition at line 375 of file opal_cr.h.

◆ OPAL_CR_TIMER_CRCPBR1

#define OPAL_CR_TIMER_CRCPBR1   5

Definition at line 377 of file opal_cr.h.

◆ OPAL_CR_TIMER_ENTRY0

#define OPAL_CR_TIMER_ENTRY0   0

Definition at line 372 of file opal_cr.h.

◆ OPAL_CR_TIMER_ENTRY1

#define OPAL_CR_TIMER_ENTRY1   1

Definition at line 373 of file opal_cr.h.

◆ OPAL_CR_TIMER_ENTRY2

#define OPAL_CR_TIMER_ENTRY2   2

Definition at line 374 of file opal_cr.h.

◆ OPAL_CR_TIMER_ENTRY3

#define OPAL_CR_TIMER_ENTRY3   19

Definition at line 391 of file opal_cr.h.

◆ OPAL_CR_TIMER_ENTRY4

#define OPAL_CR_TIMER_ENTRY4   20

Definition at line 392 of file opal_cr.h.

◆ OPAL_CR_TIMER_MAX

#define OPAL_CR_TIMER_MAX   21

Definition at line 393 of file opal_cr.h.

◆ OPAL_CR_TIMER_P2P0

#define OPAL_CR_TIMER_P2P0   6

Definition at line 378 of file opal_cr.h.

◆ OPAL_CR_TIMER_P2P1

#define OPAL_CR_TIMER_P2P1   7

Definition at line 379 of file opal_cr.h.

◆ OPAL_CR_TIMER_P2P2

#define OPAL_CR_TIMER_P2P2   12

Definition at line 384 of file opal_cr.h.

◆ OPAL_CR_TIMER_P2P3

#define OPAL_CR_TIMER_P2P3   14

Definition at line 386 of file opal_cr.h.

◆ OPAL_CR_TIMER_P2PBR0

#define OPAL_CR_TIMER_P2PBR0   8

Definition at line 380 of file opal_cr.h.

◆ OPAL_CR_TIMER_P2PBR1

#define OPAL_CR_TIMER_P2PBR1   13

Definition at line 385 of file opal_cr.h.

◆ OPAL_CR_TIMER_P2PBR2

#define OPAL_CR_TIMER_P2PBR2   15

Definition at line 387 of file opal_cr.h.

Typedef Documentation

◆ opal_cr_ckpt_cmd_state_t

Definition at line 37 of file opal_cr.h.

◆ opal_cr_coord_callback_fn_t

typedef int(* opal_cr_coord_callback_fn_t) (int)

Coordination Routines.

Coordination callback routine signature

Definition at line 343 of file opal_cr.h.

◆ opal_cr_notify_callback_fn_t

typedef int(* opal_cr_notify_callback_fn_t) (opal_cr_ckpt_cmd_state_t)

Notification Routines.

Notification Routines A function to respond to the async checkpoint request this is useful when figuring out who should respond when stalling.

Definition at line 277 of file opal_cr.h.

◆ opal_cr_user_inc_callback_fn_t

typedef int(* opal_cr_user_inc_callback_fn_t) (opal_cr_user_inc_callback_event_t event, opal_cr_user_inc_callback_state_t state)

User coordination callback routine.

Definition at line 325 of file opal_cr.h.

Enumeration Type Documentation

◆ opal_cr_ckpt_cmd_state_t

Enumerator
OPAL_CHECKPOINT_CMD_START 
OPAL_CHECKPOINT_CMD_IN_PROGRESS 
OPAL_CHECKPOINT_CMD_NULL 
OPAL_CHECKPOINT_CMD_ERROR 
OPAL_CR_STATUS_NONE 
OPAL_CR_STATUS_REQUESTED 
OPAL_CR_STATUS_RUNNING 
OPAL_CR_STATUS_TERM 
OPAL_CR_STATUS_CONTINUE 
OPAL_CR_STATUS_RESTART_PRE 
OPAL_CR_STATUS_RESTART_POST 

Definition at line 52 of file opal_cr.h.

◆ opal_cr_user_inc_callback_event_t

User Coordination Routines.

Enumerator
OPAL_CR_INC_PRE_CRS_PRE_MPI 
OPAL_CR_INC_PRE_CRS_POST_MPI 
OPAL_CR_INC_CRS_PRE_CKPT 
OPAL_CR_INC_CRS_POST_CKPT 
OPAL_CR_INC_POST_CRS_PRE_MPI 
OPAL_CR_INC_POST_CRS_POST_MPI 
OPAL_CR_INC_MAX 

Definition at line 305 of file opal_cr.h.

◆ opal_cr_user_inc_callback_state_t

Enumerator
OPAL_CR_INC_STATE_PREPARE 
OPAL_CR_INC_STATE_CONTINUE 
OPAL_CR_INC_STATE_RESTART 
OPAL_CR_INC_STATE_ERROR 

Definition at line 315 of file opal_cr.h.

Function Documentation

◆ ompi_trigger_user_inc_callback()

OPAL_DECLSPEC int ompi_trigger_user_inc_callback ( opal_cr_user_inc_callback_event_t  event,
opal_cr_user_inc_callback_state_t  state 
)

◆ opal_cr_clear_timers()

OPAL_DECLSPEC void opal_cr_clear_timers ( void  )

Definition at line 1228 of file opal_cr.c.

References OPAL_CR_TIMER_MAX, and timer_start.

◆ opal_cr_coord()

OPAL_DECLSPEC int opal_cr_coord ( int  state)

OPAL Checkpoint Coordination Routine.

OPAL Checkpoint Coordination Routine.

Current Coordination callback routines

Definition at line 799 of file opal_cr.c.

References mca_base_framework_close(), opal_cr_checkpointing_state, OPAL_CR_STATUS_RESTART_POST, OPAL_CRS_CHECKPOINT, OPAL_CRS_CONTINUE, OPAL_CRS_RESTART, OPAL_CRS_TERM, opal_event_reinit, opal_if_base_framework, opal_output_reopen_all(), OPAL_SUCCESS, and opal_sync_event_base.

Referenced by opal_cr_init().

◆ opal_cr_display_all_timers()

◆ opal_cr_finalize()

◆ opal_cr_inc_core()

OPAL_DECLSPEC int opal_cr_inc_core ( pid_t  pid,
opal_crs_base_snapshot_t snapshot,
opal_crs_base_ckpt_options_t options,
int *  state 
)

Function to go through the INC.

  • Call Registered INC_Coord(CHECKPOINT)
  • Call the CRS.checkpoint()
  • Call Registered INC_Coord(state)

Definition at line 761 of file opal_cr.c.

References opal_cr_inc_core_ckpt(), opal_cr_inc_core_prep(), opal_cr_inc_core_recover(), OPAL_SUCCESS, and pid.

◆ opal_cr_inc_core_ckpt()

◆ opal_cr_inc_core_prep()

◆ opal_cr_inc_core_recover()

◆ opal_cr_init()

◆ opal_cr_refresh_environ()

◆ opal_cr_reg_coord_callback()

OPAL_DECLSPEC int opal_cr_reg_coord_callback ( opal_cr_coord_callback_fn_t  new_func,
opal_cr_coord_callback_fn_t prev_func 
)

Register a checkpoint coodination routine for a higher level.

Definition at line 899 of file opal_cr.c.

References cur_coord_callback, NULL, and OPAL_SUCCESS.

Referenced by ompi_cr_init(), opal_cr_init(), and orte_cr_init().

◆ opal_cr_reg_notify_callback()

OPAL_DECLSPEC int opal_cr_reg_notify_callback ( opal_cr_notify_callback_fn_t  new_func,
opal_cr_notify_callback_fn_t prev_func 
)

Definition at line 845 of file opal_cr.c.

References cur_notify_callback, NULL, and OPAL_SUCCESS.

Referenced by app_coord_init().

◆ opal_cr_set_enabled()

OPAL_DECLSPEC int opal_cr_set_enabled ( bool  )

◆ opal_cr_set_time()

OPAL_DECLSPEC void opal_cr_set_time ( int  idx)

Checkpoint life-cycle timing.

Definition at line 1219 of file opal_cr.c.

References opal_cr_get_time(), OPAL_CR_TIMER_MAX, and timer_start.

◆ opal_cr_test_if_checkpoint_ready()

OPAL_DECLSPEC void opal_cr_test_if_checkpoint_ready ( void  )

Check to see if a checkpoint has been requested.

When the checkpoint thread is disabled: This will be checked whenever the MPI Library is entered by the application. It will stop the application for the duration of the entire checkpoint. When the checkpoint thread is enabled: The request is handled in the thread parallel with the execution of the program regardless of where the program is in exection. The problem with this method is that it requires the support of progress threads which is currently not working properly :/

Definition at line 552 of file opal_cr.c.

References opal_crs_base_module_1_0_0_t::crs_checkpoint, cur_notify_callback, NULL, OPAL_CHECKPOINT_CMD_IN_PROGRESS, OPAL_CHECKPOINT_CMD_NULL, OPAL_CHECKPOINT_CMD_START, opal_cr_checkpoint_request, opal_cr_checkpointing_state, opal_cr_currently_stalled, opal_cr_output, OPAL_CR_STATUS_NONE, OPAL_CR_STATUS_REQUESTED, OPAL_CR_STATUS_RUNNING, opal_crs, opal_output(), opal_output_verbose(), and OPAL_SUCCESS.

◆ opal_cr_user_inc_register_callback()

OPAL_DECLSPEC int opal_cr_user_inc_register_callback ( opal_cr_user_inc_callback_event_t  event,
opal_cr_user_inc_callback_fn_t  function,
opal_cr_user_inc_callback_fn_t prev_function 
)

Definition at line 866 of file opal_cr.c.

References cur_user_coord_callback, NULL, OPAL_CR_INC_MAX, OPAL_ERROR, and OPAL_SUCCESS.

Referenced by OMPI_CR_INC_register_callback().

Variable Documentation

◆ opal_cr_checkpoint_request

OPAL_DECLSPEC int opal_cr_checkpoint_request
extern

◆ opal_cr_checkpointing_state

◆ opal_cr_continue_like_restart

OPAL_DECLSPEC bool opal_cr_continue_like_restart
extern

◆ opal_cr_currently_stalled

OPAL_DECLSPEC bool opal_cr_currently_stalled
extern

◆ opal_cr_entry_point_signal

◆ opal_cr_is_enabled

◆ opal_cr_is_tool

◆ opal_cr_output

◆ opal_cr_pipe_dir

OPAL_DECLSPEC char* opal_cr_pipe_dir
extern

◆ opal_cr_stall_check

◆ opal_cr_timing_barrier_enabled

OPAL_DECLSPEC bool opal_cr_timing_barrier_enabled
extern

◆ opal_cr_timing_enabled

OPAL_DECLSPEC bool opal_cr_timing_enabled
extern

Definition at line 98 of file opal_cr.c.

Referenced by opal_cr_register().

◆ opal_cr_timing_my_rank

OPAL_DECLSPEC int opal_cr_timing_my_rank
extern

Definition at line 99 of file opal_cr.c.

Referenced by app_notify_resp_stage_1(), and opal_cr_display_all_timers().

◆ opal_cr_timing_target_rank

OPAL_DECLSPEC int opal_cr_timing_target_rank
extern

Definition at line 100 of file opal_cr.c.

Referenced by opal_cr_display_all_timers(), and opal_cr_register().

OPAL_UNLIKELY
#define OPAL_UNLIKELY(expression)
Definition: prefetch.h:47
opal_cr_timing_enabled
OPAL_DECLSPEC bool opal_cr_timing_enabled
Definition: opal_cr.c:98