"Fossies" - the Fresh Open Source Software Archive

Member "redis-6.2.5/tests/integration/failover.tcl" (21 Jul 2021, 10050 Bytes) of package /linux/misc/redis-6.2.5.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Tcl/Tk source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. See also the latest Fossies "Diffs" side-by-side code changes report for "failover.tcl": 6.2.4_vs_6.2.5.

    1 start_server {tags {"failover"}} {
    2 start_server {} {
    3 start_server {} {
    4     set node_0 [srv 0 client]
    5     set node_0_host [srv 0 host]
    6     set node_0_port [srv 0 port]
    7     set node_0_pid [srv 0 pid]
    8 
    9     set node_1 [srv -1 client]
   10     set node_1_host [srv -1 host]
   11     set node_1_port [srv -1 port]
   12     set node_1_pid [srv -1 pid]
   13 
   14     set node_2 [srv -2 client]
   15     set node_2_host [srv -2 host]
   16     set node_2_port [srv -2 port]
   17     set node_2_pid [srv -2 pid]
   18 
   19     proc assert_digests_match {n1 n2 n3} {
   20         assert_equal [$n1 debug digest] [$n2 debug digest]
   21         assert_equal [$n2 debug digest] [$n3 debug digest]
   22     }
   23 
   24     test {failover command fails without connected replica} {
   25         catch { $node_0 failover to $node_1_host $node_1_port } err
   26         if {! [string match "ERR*" $err]} {
   27             fail "failover command succeeded when replica not connected"
   28         }
   29     }
   30 
   31     test {setup replication for following tests} {
   32         $node_1 replicaof $node_0_host $node_0_port
   33         $node_2 replicaof $node_0_host $node_0_port
   34         wait_for_sync $node_1
   35         wait_for_sync $node_2
   36     }
   37 
   38     test {failover command fails with invalid host} {
   39         catch { $node_0 failover to invalidhost $node_1_port } err
   40         assert_match "ERR*" $err
   41     }
   42 
   43     test {failover command fails with invalid port} {
   44         catch { $node_0 failover to $node_1_host invalidport } err
   45         assert_match "ERR*" $err
   46     }
   47 
   48     test {failover command fails with just force and timeout} {
   49         catch { $node_0 FAILOVER FORCE TIMEOUT 100} err
   50         assert_match "ERR*" $err
   51     }
   52 
   53     test {failover command fails when sent to a replica} {
   54         catch { $node_1 failover to $node_1_host $node_1_port } err
   55         assert_match "ERR*" $err
   56     }
   57 
   58     test {failover command fails with force without timeout} {
   59         catch { $node_0 failover to $node_1_host $node_1_port FORCE } err
   60         assert_match "ERR*" $err
   61     }
   62 
   63     test {failover command to specific replica works} {
   64         set initial_psyncs [s -1 sync_partial_ok]
   65         set initial_syncs [s -1 sync_full]
   66 
   67         # Generate a delta between primary and replica
   68         set load_handler [start_write_load $node_0_host $node_0_port 5]
   69         exec kill -SIGSTOP [srv -1 pid]
   70         wait_for_condition 50 100 {
   71             [s 0 total_commands_processed] > 100
   72         } else {
   73             fail "Node 0 did not accept writes"
   74         }
   75         exec kill -SIGCONT [srv -1 pid]
   76 
   77         # Execute the failover
   78         $node_0 failover to $node_1_host $node_1_port
   79 
   80         # Wait for failover to end
   81         wait_for_condition 50 100 {
   82             [s 0 master_failover_state] == "no-failover"
   83         } else {
   84             fail "Failover from node 0 to node 1 did not finish"
   85         }
   86 
   87         # stop the write load and make sure no more commands processed
   88         stop_write_load $load_handler
   89         wait_load_handlers_disconnected
   90 
   91         $node_2 replicaof $node_1_host $node_1_port
   92         wait_for_sync $node_0
   93         wait_for_sync $node_2
   94 
   95         assert_match *slave* [$node_0 role]
   96         assert_match *master* [$node_1 role]
   97         assert_match *slave* [$node_2 role]
   98 
   99         # We should accept psyncs from both nodes
  100         assert_equal [expr [s -1 sync_partial_ok] - $initial_psyncs] 2
  101         assert_equal [expr [s -1 sync_full] - $initial_psyncs] 0
  102         assert_digests_match $node_0 $node_1 $node_2
  103     }
  104 
  105     test {failover command to any replica works} {
  106         set initial_psyncs [s -2 sync_partial_ok]
  107         set initial_syncs [s -2 sync_full]
  108 
  109         wait_for_ofs_sync $node_1 $node_2
  110         # We stop node 0 to and make sure node 2 is selected
  111         exec kill -SIGSTOP $node_0_pid
  112         $node_1 set CASE 1
  113         $node_1 FAILOVER
  114 
  115         # Wait for failover to end
  116         wait_for_condition 50 100 {
  117             [s -1 master_failover_state] == "no-failover"
  118         } else {
  119             fail "Failover from node 1 to node 2 did not finish"
  120         }
  121         exec kill -SIGCONT $node_0_pid
  122         $node_0 replicaof $node_2_host $node_2_port
  123 
  124         wait_for_sync $node_0
  125         wait_for_sync $node_1
  126 
  127         assert_match *slave* [$node_0 role]
  128         assert_match *slave* [$node_1 role]
  129         assert_match *master* [$node_2 role]
  130 
  131         # We should accept Psyncs from both nodes
  132         assert_equal [expr [s -2 sync_partial_ok] - $initial_psyncs] 2
  133         assert_equal [expr [s -1 sync_full] - $initial_psyncs] 0
  134         assert_digests_match $node_0 $node_1 $node_2
  135     }
  136 
  137     test {failover to a replica with force works} {
  138         set initial_psyncs [s 0 sync_partial_ok]
  139         set initial_syncs [s 0 sync_full]
  140 
  141         exec kill -SIGSTOP $node_0_pid
  142         # node 0 will never acknowledge this write
  143         $node_2 set case 2
  144         $node_2 failover to $node_0_host $node_0_port TIMEOUT 100 FORCE
  145 
  146         # Wait for node 0 to give up on sync attempt and start failover
  147         wait_for_condition 50 100 {
  148             [s -2 master_failover_state] == "failover-in-progress"
  149         } else {
  150             fail "Failover from node 2 to node 0 did not timeout"
  151         }
  152 
  153         # Quick check that everyone is a replica, we never want a 
  154         # state where there are two masters.
  155         assert_match *slave* [$node_1 role]
  156         assert_match *slave* [$node_2 role]
  157 
  158         exec kill -SIGCONT $node_0_pid
  159 
  160         # Wait for failover to end
  161         wait_for_condition 50 100 {
  162             [s -2 master_failover_state] == "no-failover"
  163         } else {
  164             fail "Failover from node 2 to node 0 did not finish"
  165         }
  166         $node_1 replicaof $node_0_host $node_0_port
  167 
  168         wait_for_sync $node_1
  169         wait_for_sync $node_2
  170 
  171         assert_match *master* [$node_0 role]
  172         assert_match *slave* [$node_1 role]
  173         assert_match *slave* [$node_2 role]
  174 
  175         assert_equal [count_log_message -2 "time out exceeded, failing over."] 1
  176 
  177         # We should accept both psyncs, although this is the condition we might not
  178         # since we didn't catch up.
  179         assert_equal [expr [s 0 sync_partial_ok] - $initial_psyncs] 2
  180         assert_equal [expr [s 0 sync_full] - $initial_syncs] 0
  181         assert_digests_match $node_0 $node_1 $node_2
  182     }
  183 
  184     test {failover with timeout aborts if replica never catches up} {
  185         set initial_psyncs [s 0 sync_partial_ok]
  186         set initial_syncs [s 0 sync_full]
  187 
  188         # Stop replica so it never catches up
  189         exec kill -SIGSTOP [srv -1 pid]
  190         $node_0 SET CASE 1
  191         
  192         $node_0 failover to [srv -1 host] [srv -1 port] TIMEOUT 500
  193         # Wait for failover to end
  194         wait_for_condition 50 20 {
  195             [s 0 master_failover_state] == "no-failover"
  196         } else {
  197             fail "Failover from node_0 to replica did not finish"
  198         }
  199 
  200         exec kill -SIGCONT [srv -1 pid]
  201 
  202         # We need to make sure the nodes actually sync back up
  203         wait_for_ofs_sync $node_0 $node_1
  204         wait_for_ofs_sync $node_0 $node_2
  205 
  206         assert_match *master* [$node_0 role]
  207         assert_match *slave* [$node_1 role]
  208         assert_match *slave* [$node_2 role]
  209 
  210         # Since we never caught up, there should be no syncs
  211         assert_equal [expr [s 0 sync_partial_ok] - $initial_psyncs] 0
  212         assert_equal [expr [s 0 sync_full] - $initial_syncs] 0
  213         assert_digests_match $node_0 $node_1 $node_2
  214     }
  215 
  216     test {failovers can be aborted} {
  217         set initial_psyncs [s 0 sync_partial_ok]
  218         set initial_syncs [s 0 sync_full]
  219     
  220         # Stop replica so it never catches up
  221         exec kill -SIGSTOP [srv -1 pid]
  222         $node_0 SET CASE 2
  223         
  224         $node_0 failover to [srv -1 host] [srv -1 port] TIMEOUT 60000
  225         assert_match [s 0 master_failover_state] "waiting-for-sync"
  226 
  227         # Sanity check that read commands are still accepted
  228         $node_0 GET CASE
  229 
  230         $node_0 failover abort
  231         assert_match [s 0 master_failover_state] "no-failover"
  232 
  233         exec kill -SIGCONT [srv -1 pid]
  234 
  235         # Just make sure everything is still synced
  236         wait_for_ofs_sync $node_0 $node_1
  237         wait_for_ofs_sync $node_0 $node_2
  238 
  239         assert_match *master* [$node_0 role]
  240         assert_match *slave* [$node_1 role]
  241         assert_match *slave* [$node_2 role]
  242 
  243         # Since we never caught up, there should be no syncs
  244         assert_equal [expr [s 0 sync_partial_ok] - $initial_psyncs] 0
  245         assert_equal [expr [s 0 sync_full] - $initial_syncs] 0
  246         assert_digests_match $node_0 $node_1 $node_2
  247     }
  248 
  249     test {failover aborts if target rejects sync request} {
  250         set initial_psyncs [s 0 sync_partial_ok]
  251         set initial_syncs [s 0 sync_full]
  252 
  253         # We block psync, so the failover will fail
  254         $node_1 acl setuser default -psync
  255 
  256         # We pause the target long enough to send a write command
  257         # during the pause. This write will not be interrupted.
  258         exec kill -SIGSTOP [srv -1 pid]
  259         set rd [redis_deferring_client]
  260         $rd SET FOO BAR
  261         $node_0 failover to $node_1_host $node_1_port
  262         exec kill -SIGCONT [srv -1 pid]
  263 
  264         # Wait for failover to end
  265         wait_for_condition 50 100 {
  266             [s 0 master_failover_state] == "no-failover"
  267         } else {
  268             fail "Failover from node_0 to replica did not finish"
  269         }
  270 
  271         assert_equal [$rd read] "OK"
  272         $rd close
  273 
  274         # restore access to psync
  275         $node_1 acl setuser default +psync
  276 
  277         # We need to make sure the nodes actually sync back up
  278         wait_for_sync $node_1
  279         wait_for_sync $node_2
  280 
  281         assert_match *master* [$node_0 role]
  282         assert_match *slave* [$node_1 role]
  283         assert_match *slave* [$node_2 role]
  284 
  285         # We will cycle all of our replicas here and force a psync.
  286         assert_equal [expr [s 0 sync_partial_ok] - $initial_psyncs] 2
  287         assert_equal [expr [s 0 sync_full] - $initial_syncs] 0
  288 
  289         assert_equal [count_log_message 0 "Failover target rejected psync request"] 1
  290         assert_digests_match $node_0 $node_1 $node_2
  291     }
  292 }
  293 }
  294 }