"Fossies" - the Fresh Open Source Software Archive

Member "redis-6.2.5/tests/integration/replication.tcl" (21 Jul 2021, 39758 Bytes) of package /linux/misc/redis-6.2.5.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Tcl/Tk source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. See also the latest Fossies "Diffs" side-by-side code changes report for "replication.tcl": 6.2.4_vs_6.2.5.

    1 proc log_file_matches {log pattern} {
    2     set fp [open $log r]
    3     set content [read $fp]
    4     close $fp
    5     string match $pattern $content
    6 }
    7 
    8 start_server {tags {"repl network"}} {
    9     set slave [srv 0 client]
   10     set slave_host [srv 0 host]
   11     set slave_port [srv 0 port]
   12     set slave_log [srv 0 stdout]
   13     start_server {} {
   14         set master [srv 0 client]
   15         set master_host [srv 0 host]
   16         set master_port [srv 0 port]
   17 
   18         # Configure the master in order to hang waiting for the BGSAVE
   19         # operation, so that the slave remains in the handshake state.
   20         $master config set repl-diskless-sync yes
   21         $master config set repl-diskless-sync-delay 1000
   22 
   23         # Use a short replication timeout on the slave, so that if there
   24         # are no bugs the timeout is triggered in a reasonable amount
   25         # of time.
   26         $slave config set repl-timeout 5
   27 
   28         # Start the replication process...
   29         $slave slaveof $master_host $master_port
   30 
   31         test {Slave enters handshake} {
   32             wait_for_condition 50 1000 {
   33                 [string match *handshake* [$slave role]]
   34             } else {
   35                 fail "Replica does not enter handshake state"
   36             }
   37         }
   38 
   39         # But make the master unable to send
   40         # the periodic newlines to refresh the connection. The slave
   41         # should detect the timeout.
   42         $master debug sleep 10
   43 
   44         test {Slave is able to detect timeout during handshake} {
   45             wait_for_condition 50 1000 {
   46                 [log_file_matches $slave_log "*Timeout connecting to the MASTER*"]
   47             } else {
   48                 fail "Replica is not able to detect timeout"
   49             }
   50         }
   51     }
   52 }
   53 
   54 start_server {tags {"repl"}} {
   55     set A [srv 0 client]
   56     set A_host [srv 0 host]
   57     set A_port [srv 0 port]
   58     start_server {} {
   59         set B [srv 0 client]
   60         set B_host [srv 0 host]
   61         set B_port [srv 0 port]
   62 
   63         test {Set instance A as slave of B} {
   64             $A slaveof $B_host $B_port
   65             wait_for_condition 50 100 {
   66                 [lindex [$A role] 0] eq {slave} &&
   67                 [string match {*master_link_status:up*} [$A info replication]]
   68             } else {
   69                 fail "Can't turn the instance into a replica"
   70             }
   71         }
   72 
   73         test {INCRBYFLOAT replication, should not remove expire} {
   74             r set test 1 EX 100
   75             r incrbyfloat test 0.1
   76             after 1000
   77             assert_equal [$A debug digest] [$B debug digest]
   78         }
   79 
   80         test {GETSET replication} {
   81             $A config resetstat
   82             $A config set loglevel debug
   83             $B config set loglevel debug
   84             r set test foo
   85             assert_equal [r getset test bar] foo
   86             wait_for_condition 500 10 {
   87                 [$A get test] eq "bar"
   88             } else {
   89                 fail "getset wasn't propagated"
   90             }
   91             assert_equal [r set test vaz get] bar
   92             wait_for_condition 500 10 {
   93                 [$A get test] eq "vaz"
   94             } else {
   95                 fail "set get wasn't propagated"
   96             }
   97             assert_match {*calls=3,*} [cmdrstat set $A]
   98             assert_match {} [cmdrstat getset $A]
   99         }
  100 
  101         test {BRPOPLPUSH replication, when blocking against empty list} {
  102             $A config resetstat
  103             set rd [redis_deferring_client]
  104             $rd brpoplpush a b 5
  105             r lpush a foo
  106             wait_for_condition 50 100 {
  107                 [$A debug digest] eq [$B debug digest]
  108             } else {
  109                 fail "Master and replica have different digest: [$A debug digest] VS [$B debug digest]"
  110             }
  111             assert_match {*calls=1,*} [cmdrstat rpoplpush $A]
  112             assert_match {} [cmdrstat lmove $A]
  113         }
  114 
  115         test {BRPOPLPUSH replication, list exists} {
  116             $A config resetstat
  117             set rd [redis_deferring_client]
  118             r lpush c 1
  119             r lpush c 2
  120             r lpush c 3
  121             $rd brpoplpush c d 5
  122             after 1000
  123             assert_equal [$A debug digest] [$B debug digest]
  124             assert_match {*calls=1,*} [cmdrstat rpoplpush $A]
  125             assert_match {} [cmdrstat lmove $A]
  126         }
  127 
  128         foreach wherefrom {left right} {
  129             foreach whereto {left right} {
  130                 test "BLMOVE ($wherefrom, $whereto) replication, when blocking against empty list" {
  131                     $A config resetstat
  132                     set rd [redis_deferring_client]
  133                     $rd blmove a b $wherefrom $whereto 5
  134                     r lpush a foo
  135                     wait_for_condition 50 100 {
  136                         [$A debug digest] eq [$B debug digest]
  137                     } else {
  138                         fail "Master and replica have different digest: [$A debug digest] VS [$B debug digest]"
  139                     }
  140                     assert_match {*calls=1,*} [cmdrstat lmove $A]
  141                     assert_match {} [cmdrstat rpoplpush $A]
  142                 }
  143 
  144                 test "BLMOVE ($wherefrom, $whereto) replication, list exists" {
  145                     $A config resetstat
  146                     set rd [redis_deferring_client]
  147                     r lpush c 1
  148                     r lpush c 2
  149                     r lpush c 3
  150                     $rd blmove c d $wherefrom $whereto 5
  151                     after 1000
  152                     assert_equal [$A debug digest] [$B debug digest]
  153                     assert_match {*calls=1,*} [cmdrstat lmove $A]
  154                     assert_match {} [cmdrstat rpoplpush $A]
  155                 }
  156             }
  157         }
  158 
  159         test {BLPOP followed by role change, issue #2473} {
  160             set rd [redis_deferring_client]
  161             $rd blpop foo 0 ; # Block while B is a master
  162 
  163             # Turn B into master of A
  164             $A slaveof no one
  165             $B slaveof $A_host $A_port
  166             wait_for_condition 50 100 {
  167                 [lindex [$B role] 0] eq {slave} &&
  168                 [string match {*master_link_status:up*} [$B info replication]]
  169             } else {
  170                 fail "Can't turn the instance into a replica"
  171             }
  172 
  173             # Push elements into the "foo" list of the new replica.
  174             # If the client is still attached to the instance, we'll get
  175             # a desync between the two instances.
  176             $A rpush foo a b c
  177             after 100
  178 
  179             wait_for_condition 50 100 {
  180                 [$A debug digest] eq [$B debug digest] &&
  181                 [$A lrange foo 0 -1] eq {a b c} &&
  182                 [$B lrange foo 0 -1] eq {a b c}
  183             } else {
  184                 fail "Master and replica have different digest: [$A debug digest] VS [$B debug digest]"
  185             }
  186         }
  187     }
  188 }
  189 
  190 start_server {tags {"repl"}} {
  191     r set mykey foo
  192 
  193     start_server {} {
  194         test {Second server should have role master at first} {
  195             s role
  196         } {master}
  197 
  198         test {SLAVEOF should start with link status "down"} {
  199             r multi
  200             r slaveof [srv -1 host] [srv -1 port]
  201             r info replication
  202             r exec
  203         } {*master_link_status:down*}
  204 
  205         test {The role should immediately be changed to "replica"} {
  206             s role
  207         } {slave}
  208 
  209         wait_for_sync r
  210         test {Sync should have transferred keys from master} {
  211             r get mykey
  212         } {foo}
  213 
  214         test {The link status should be up} {
  215             s master_link_status
  216         } {up}
  217 
  218         test {SET on the master should immediately propagate} {
  219             r -1 set mykey bar
  220 
  221             wait_for_condition 500 100 {
  222                 [r  0 get mykey] eq {bar}
  223             } else {
  224                 fail "SET on master did not propagated on replica"
  225             }
  226         }
  227 
  228         test {FLUSHALL should replicate} {
  229             r -1 flushall
  230             if {$::valgrind} {after 2000}
  231             list [r -1 dbsize] [r 0 dbsize]
  232         } {0 0}
  233 
  234         test {ROLE in master reports master with a slave} {
  235             set res [r -1 role]
  236             lassign $res role offset slaves
  237             assert {$role eq {master}}
  238             assert {$offset > 0}
  239             assert {[llength $slaves] == 1}
  240             lassign [lindex $slaves 0] master_host master_port slave_offset
  241             assert {$slave_offset <= $offset}
  242         }
  243 
  244         test {ROLE in slave reports slave in connected state} {
  245             set res [r role]
  246             lassign $res role master_host master_port slave_state slave_offset
  247             assert {$role eq {slave}}
  248             assert {$slave_state eq {connected}}
  249         }
  250     }
  251 }
  252 
  253 foreach mdl {no yes} {
  254     foreach sdl {disabled swapdb} {
  255         start_server {tags {"repl"}} {
  256             set master [srv 0 client]
  257             $master config set repl-diskless-sync $mdl
  258             $master config set repl-diskless-sync-delay 1
  259             set master_host [srv 0 host]
  260             set master_port [srv 0 port]
  261             set slaves {}
  262             start_server {} {
  263                 lappend slaves [srv 0 client]
  264                 start_server {} {
  265                     lappend slaves [srv 0 client]
  266                     start_server {} {
  267                         lappend slaves [srv 0 client]
  268                         test "Connect multiple replicas at the same time (issue #141), master diskless=$mdl, replica diskless=$sdl" {
  269                             # start load handles only inside the test, so that the test can be skipped
  270                             set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000000]
  271                             set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000000]
  272                             set load_handle2 [start_bg_complex_data $master_host $master_port 12 100000000]
  273                             set load_handle3 [start_write_load $master_host $master_port 8]
  274                             set load_handle4 [start_write_load $master_host $master_port 4]
  275                             after 5000 ;# wait for some data to accumulate so that we have RDB part for the fork
  276 
  277                             # Send SLAVEOF commands to slaves
  278                             [lindex $slaves 0] config set repl-diskless-load $sdl
  279                             [lindex $slaves 1] config set repl-diskless-load $sdl
  280                             [lindex $slaves 2] config set repl-diskless-load $sdl
  281                             [lindex $slaves 0] slaveof $master_host $master_port
  282                             [lindex $slaves 1] slaveof $master_host $master_port
  283                             [lindex $slaves 2] slaveof $master_host $master_port
  284 
  285                             # Wait for all the three slaves to reach the "online"
  286                             # state from the POV of the master.
  287                             set retry 500
  288                             while {$retry} {
  289                                 set info [r -3 info]
  290                                 if {[string match {*slave0:*state=online*slave1:*state=online*slave2:*state=online*} $info]} {
  291                                     break
  292                                 } else {
  293                                     incr retry -1
  294                                     after 100
  295                                 }
  296                             }
  297                             if {$retry == 0} {
  298                                 error "assertion:Slaves not correctly synchronized"
  299                             }
  300 
  301                             # Wait that slaves acknowledge they are online so
  302                             # we are sure that DBSIZE and DEBUG DIGEST will not
  303                             # fail because of timing issues.
  304                             wait_for_condition 500 100 {
  305                                 [lindex [[lindex $slaves 0] role] 3] eq {connected} &&
  306                                 [lindex [[lindex $slaves 1] role] 3] eq {connected} &&
  307                                 [lindex [[lindex $slaves 2] role] 3] eq {connected}
  308                             } else {
  309                                 fail "Slaves still not connected after some time"
  310                             }
  311 
  312                             # Stop the write load
  313                             stop_bg_complex_data $load_handle0
  314                             stop_bg_complex_data $load_handle1
  315                             stop_bg_complex_data $load_handle2
  316                             stop_write_load $load_handle3
  317                             stop_write_load $load_handle4
  318 
  319                             # Make sure no more commands processed
  320                             wait_load_handlers_disconnected
  321 
  322                             wait_for_ofs_sync $master [lindex $slaves 0]
  323                             wait_for_ofs_sync $master [lindex $slaves 1]
  324                             wait_for_ofs_sync $master [lindex $slaves 2]
  325 
  326                             # Check digests
  327                             set digest [$master debug digest]
  328                             set digest0 [[lindex $slaves 0] debug digest]
  329                             set digest1 [[lindex $slaves 1] debug digest]
  330                             set digest2 [[lindex $slaves 2] debug digest]
  331                             assert {$digest ne 0000000000000000000000000000000000000000}
  332                             assert {$digest eq $digest0}
  333                             assert {$digest eq $digest1}
  334                             assert {$digest eq $digest2}
  335                         }
  336                    }
  337                 }
  338             }
  339         }
  340     }
  341 }
  342 
  343 start_server {tags {"repl"}} {
  344     set master [srv 0 client]
  345     set master_host [srv 0 host]
  346     set master_port [srv 0 port]
  347     start_server {} {
  348         test "Master stream is correctly processed while the replica has a script in -BUSY state" {
  349             set load_handle0 [start_write_load $master_host $master_port 3]
  350             set slave [srv 0 client]
  351             $slave config set lua-time-limit 500
  352             $slave slaveof $master_host $master_port
  353 
  354             # Wait for the slave to be online
  355             wait_for_condition 500 100 {
  356                 [lindex [$slave role] 3] eq {connected}
  357             } else {
  358                 fail "Replica still not connected after some time"
  359             }
  360 
  361             # Wait some time to make sure the master is sending data
  362             # to the slave.
  363             after 5000
  364 
  365             # Stop the ability of the slave to process data by sendig
  366             # a script that will put it in BUSY state.
  367             $slave eval {for i=1,3000000000 do end} 0
  368 
  369             # Wait some time again so that more master stream will
  370             # be processed.
  371             after 2000
  372 
  373             # Stop the write load
  374             stop_write_load $load_handle0
  375 
  376             # number of keys
  377             wait_for_condition 500 100 {
  378                 [$master debug digest] eq [$slave debug digest]
  379             } else {
  380                 fail "Different datasets between replica and master"
  381             }
  382         }
  383     }
  384 }
  385 
  386 test {slave fails full sync and diskless load swapdb recovers it} {
  387     start_server {tags {"repl"}} {
  388         set slave [srv 0 client]
  389         set slave_host [srv 0 host]
  390         set slave_port [srv 0 port]
  391         set slave_log [srv 0 stdout]
  392         start_server {} {
  393             set master [srv 0 client]
  394             set master_host [srv 0 host]
  395             set master_port [srv 0 port]
  396 
  397             # Put different data sets on the master and slave
  398             # we need to put large keys on the master since the slave replies to info only once in 2mb
  399             $slave debug populate 2000 slave 10
  400             $master debug populate 200 master 100000
  401             $master config set rdbcompression no
  402 
  403             # Set master and slave to use diskless replication
  404             $master config set repl-diskless-sync yes
  405             $master config set repl-diskless-sync-delay 0
  406             $slave config set repl-diskless-load swapdb
  407 
  408             # Set master with a slow rdb generation, so that we can easily disconnect it mid sync
  409             # 10ms per key, with 200 keys is 2 seconds
  410             $master config set rdb-key-save-delay 10000
  411 
  412             # Start the replication process...
  413             $slave slaveof $master_host $master_port
  414 
  415             # wait for the slave to start reading the rdb
  416             wait_for_condition 50 100 {
  417                 [s -1 loading] eq 1
  418             } else {
  419                 fail "Replica didn't get into loading mode"
  420             }
  421 
  422             # make sure that next sync will not start immediately so that we can catch the slave in betweeen syncs
  423             $master config set repl-diskless-sync-delay 5
  424             # for faster server shutdown, make rdb saving fast again (the fork is already uses the slow one)
  425             $master config set rdb-key-save-delay 0
  426 
  427             # waiting slave to do flushdb (key count drop)
  428             wait_for_condition 50 100 {
  429                 2000 != [scan [regexp -inline {keys\=([\d]*)} [$slave info keyspace]] keys=%d]
  430             } else {
  431                 fail "Replica didn't flush"
  432             }
  433 
  434             # make sure we're still loading
  435             assert_equal [s -1 loading] 1
  436 
  437             # kill the slave connection on the master
  438             set killed [$master client kill type slave]
  439 
  440             # wait for loading to stop (fail)
  441             wait_for_condition 50 100 {
  442                 [s -1 loading] eq 0
  443             } else {
  444                 fail "Replica didn't disconnect"
  445             }
  446 
  447             # make sure the original keys were restored
  448             assert_equal [$slave dbsize] 2000
  449         }
  450     }
  451 }
  452 
  453 test {diskless loading short read} {
  454     start_server {tags {"repl"}} {
  455         set replica [srv 0 client]
  456         set replica_host [srv 0 host]
  457         set replica_port [srv 0 port]
  458         start_server {} {
  459             set master [srv 0 client]
  460             set master_host [srv 0 host]
  461             set master_port [srv 0 port]
  462 
  463             # Set master and replica to use diskless replication
  464             $master config set repl-diskless-sync yes
  465             $master config set rdbcompression no
  466             $replica config set repl-diskless-load swapdb
  467             $master config set hz 500
  468             $replica config set hz 500
  469             $master config set dynamic-hz no
  470             $replica config set dynamic-hz no
  471             # Try to fill the master with all types of data types / encodings
  472             set start [clock clicks -milliseconds]
  473             for {set k 0} {$k < 3} {incr k} {
  474                 for {set i 0} {$i < 10} {incr i} {
  475                     r set "$k int_$i" [expr {int(rand()*10000)}]
  476                     r expire "$k int_$i" [expr {int(rand()*10000)}]
  477                     r set "$k string_$i" [string repeat A [expr {int(rand()*1000000)}]]
  478                     r hset "$k hash_small" [string repeat A [expr {int(rand()*10)}]]  0[string repeat A [expr {int(rand()*10)}]]
  479                     r hset "$k hash_large" [string repeat A [expr {int(rand()*10000)}]] [string repeat A [expr {int(rand()*1000000)}]]
  480                     r sadd "$k set_small" [string repeat A [expr {int(rand()*10)}]]
  481                     r sadd "$k set_large" [string repeat A [expr {int(rand()*1000000)}]]
  482                     r zadd "$k zset_small" [expr {rand()}] [string repeat A [expr {int(rand()*10)}]]
  483                     r zadd "$k zset_large" [expr {rand()}] [string repeat A [expr {int(rand()*1000000)}]]
  484                     r lpush "$k list_small" [string repeat A [expr {int(rand()*10)}]]
  485                     r lpush "$k list_large" [string repeat A [expr {int(rand()*1000000)}]]
  486                     for {set j 0} {$j < 10} {incr j} {
  487                         r xadd "$k stream" * foo "asdf" bar "1234"
  488                     }
  489                     r xgroup create "$k stream" "mygroup_$i" 0
  490                     r xreadgroup GROUP "mygroup_$i" Alice COUNT 1 STREAMS "$k stream" >
  491                 }
  492             }
  493 
  494             if {$::verbose} {
  495                 set end [clock clicks -milliseconds]
  496                 set duration [expr $end - $start]
  497                 puts "filling took $duration ms (TODO: use pipeline)"
  498                 set start [clock clicks -milliseconds]
  499             }
  500 
  501             # Start the replication process...
  502             set loglines [count_log_lines -1]
  503             $master config set repl-diskless-sync-delay 0
  504             $replica replicaof $master_host $master_port
  505 
  506             # kill the replication at various points
  507             set attempts 100
  508             if {$::accurate} { set attempts 500 }
  509             for {set i 0} {$i < $attempts} {incr i} {
  510                 # wait for the replica to start reading the rdb
  511                 # using the log file since the replica only responds to INFO once in 2mb
  512                 set res [wait_for_log_messages -1 {"*Loading DB in memory*"} $loglines 2000 1]
  513                 set loglines [lindex $res 1]
  514 
  515                 # add some additional random sleep so that we kill the master on a different place each time
  516                 after [expr {int(rand()*50)}]
  517 
  518                 # kill the replica connection on the master
  519                 set killed [$master client kill type replica]
  520 
  521                 set res [wait_for_log_messages -1 {"*Internal error in RDB*" "*Finished with success*" "*Successful partial resynchronization*"} $loglines 1000 1]
  522                 if {$::verbose} { puts $res }
  523                 set log_text [lindex $res 0]
  524                 set loglines [lindex $res 1]
  525                 if {![string match "*Internal error in RDB*" $log_text]} {
  526                     # force the replica to try another full sync
  527                     $master multi
  528                     $master client kill type replica
  529                     $master set asdf asdf
  530                     # the side effect of resizing the backlog is that it is flushed (16k is the min size)
  531                     $master config set repl-backlog-size [expr {16384 + $i}]
  532                     $master exec
  533                 }
  534                 # wait for loading to stop (fail)
  535                 wait_for_condition 1000 1 {
  536                     [s -1 loading] eq 0
  537                 } else {
  538                     fail "Replica didn't disconnect"
  539                 }
  540             }
  541             if {$::verbose} {
  542                 set end [clock clicks -milliseconds]
  543                 set duration [expr $end - $start]
  544                 puts "test took $duration ms"
  545             }
  546             # enable fast shutdown
  547             $master config set rdb-key-save-delay 0
  548         }
  549     }
  550 }
  551 
  552 # get current stime and utime metrics for a thread (since it's creation)
  553 proc get_cpu_metrics { statfile } {
  554     if { [ catch {
  555         set fid   [ open $statfile r ]
  556         set data  [ read $fid 1024 ]
  557         ::close $fid
  558         set data  [ split $data ]
  559 
  560         ;## number of jiffies it has been scheduled...
  561         set utime [ lindex $data 13 ]
  562         set stime [ lindex $data 14 ]
  563     } err ] } {
  564         error "assertion:can't parse /proc: $err"
  565     }
  566     set mstime [clock milliseconds]
  567     return [ list $mstime $utime $stime ]
  568 }
  569 
  570 # compute %utime and %stime of a thread between two measurements
  571 proc compute_cpu_usage {start end} {
  572     set clock_ticks [exec getconf CLK_TCK]
  573     # convert ms time to jiffies and calc delta
  574     set dtime [ expr { ([lindex $end 0] - [lindex $start 0]) * double($clock_ticks) / 1000 } ]
  575     set utime [ expr { [lindex $end 1] - [lindex $start 1] } ]
  576     set stime [ expr { [lindex $end 2] - [lindex $start 2] } ]
  577     set pucpu  [ expr { ($utime / $dtime) * 100 } ]
  578     set pscpu  [ expr { ($stime / $dtime) * 100 } ]
  579     return [ list $pucpu $pscpu ]
  580 }
  581 
  582 
  583 # test diskless rdb pipe with multiple replicas, which may drop half way
  584 start_server {tags {"repl"}} {
  585     set master [srv 0 client]
  586     $master config set repl-diskless-sync yes
  587     $master config set repl-diskless-sync-delay 1
  588     set master_host [srv 0 host]
  589     set master_port [srv 0 port]
  590     set master_pid [srv 0 pid]
  591     # put enough data in the db that the rdb file will be bigger than the socket buffers
  592     # and since we'll have key-load-delay of 100, 20000 keys will take at least 2 seconds
  593     # we also need the replica to process requests during transfer (which it does only once in 2mb)
  594     $master debug populate 20000 test 10000
  595     $master config set rdbcompression no
  596     # If running on Linux, we also measure utime/stime to detect possible I/O handling issues
  597     set os [catch {exec uname}]
  598     set measure_time [expr {$os == "Linux"} ? 1 : 0]
  599     foreach all_drop {no slow fast all timeout} {
  600         test "diskless $all_drop replicas drop during rdb pipe" {
  601             set replicas {}
  602             set replicas_alive {}
  603             # start one replica that will read the rdb fast, and one that will be slow
  604             start_server {} {
  605                 lappend replicas [srv 0 client]
  606                 lappend replicas_alive [srv 0 client]
  607                 start_server {} {
  608                     lappend replicas [srv 0 client]
  609                     lappend replicas_alive [srv 0 client]
  610 
  611                     # start replication
  612                     # it's enough for just one replica to be slow, and have it's write handler enabled
  613                     # so that the whole rdb generation process is bound to that
  614                     set loglines [count_log_lines -1]
  615                     [lindex $replicas 0] config set repl-diskless-load swapdb
  616                     [lindex $replicas 0] config set key-load-delay 100 ;# 20k keys and 100 microseconds sleep means at least 2 seconds
  617                     [lindex $replicas 0] replicaof $master_host $master_port
  618                     [lindex $replicas 1] replicaof $master_host $master_port
  619 
  620                     # wait for the replicas to start reading the rdb
  621                     # using the log file since the replica only responds to INFO once in 2mb
  622                     wait_for_log_messages -1 {"*Loading DB in memory*"} $loglines 800 10
  623 
  624                     if {$measure_time} {
  625                         set master_statfile "/proc/$master_pid/stat"
  626                         set master_start_metrics [get_cpu_metrics $master_statfile]
  627                         set start_time [clock seconds]
  628                     }
  629 
  630                     # wait a while so that the pipe socket writer will be
  631                     # blocked on write (since replica 0 is slow to read from the socket)
  632                     after 500
  633 
  634                     # add some command to be present in the command stream after the rdb.
  635                     $master incr $all_drop
  636 
  637                     # disconnect replicas depending on the current test
  638                     set loglines [count_log_lines -2]
  639                     if {$all_drop == "all" || $all_drop == "fast"} {
  640                         exec kill [srv 0 pid]
  641                         set replicas_alive [lreplace $replicas_alive 1 1]
  642                     }
  643                     if {$all_drop == "all" || $all_drop == "slow"} {
  644                         exec kill [srv -1 pid]
  645                         set replicas_alive [lreplace $replicas_alive 0 0]
  646                     }
  647                     if {$all_drop == "timeout"} {
  648                         $master config set repl-timeout 2
  649                         # we want the slow replica to hang on a key for very long so it'll reach repl-timeout
  650                         exec kill -SIGSTOP [srv -1 pid]
  651                         after 2000
  652                     }
  653 
  654                     # wait for rdb child to exit
  655                     wait_for_condition 500 100 {
  656                         [s -2 rdb_bgsave_in_progress] == 0
  657                     } else {
  658                         fail "rdb child didn't terminate"
  659                     }
  660 
  661                     # make sure we got what we were aiming for, by looking for the message in the log file
  662                     if {$all_drop == "all"} {
  663                         wait_for_log_messages -2 {"*Diskless rdb transfer, last replica dropped, killing fork child*"} $loglines 1 1
  664                     }
  665                     if {$all_drop == "no"} {
  666                         wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 2 replicas still up*"} $loglines 1 1
  667                     }
  668                     if {$all_drop == "slow" || $all_drop == "fast"} {
  669                         wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 1 replicas still up*"} $loglines 1 1
  670                     }
  671                     if {$all_drop == "timeout"} {
  672                         wait_for_log_messages -2 {"*Disconnecting timedout replica (full sync)*"} $loglines 1 1
  673                         wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 1 replicas still up*"} $loglines 1 1
  674                         # master disconnected the slow replica, remove from array
  675                         set replicas_alive [lreplace $replicas_alive 0 0]
  676                         # release it
  677                         exec kill -SIGCONT [srv -1 pid]
  678                     }
  679 
  680                     # make sure we don't have a busy loop going thought epoll_wait
  681                     if {$measure_time} {
  682                         set master_end_metrics [get_cpu_metrics $master_statfile]
  683                         set time_elapsed [expr {[clock seconds]-$start_time}]
  684                         set master_cpu [compute_cpu_usage $master_start_metrics $master_end_metrics]
  685                         set master_utime [lindex $master_cpu 0]
  686                         set master_stime [lindex $master_cpu 1]
  687                         if {$::verbose} {
  688                             puts "elapsed: $time_elapsed"
  689                             puts "master utime: $master_utime"
  690                             puts "master stime: $master_stime"
  691                         }
  692                         if {!$::no_latency && ($all_drop == "all" || $all_drop == "slow" || $all_drop == "timeout")} {
  693                             assert {$master_utime < 70}
  694                             assert {$master_stime < 70}
  695                         }
  696                         if {!$::no_latency && ($all_drop == "none" || $all_drop == "fast")} {
  697                             assert {$master_utime < 15}
  698                             assert {$master_stime < 15}
  699                         }
  700                     }
  701 
  702                     # verify the data integrity
  703                     foreach replica $replicas_alive {
  704                         # Wait that replicas acknowledge they are online so
  705                         # we are sure that DBSIZE and DEBUG DIGEST will not
  706                         # fail because of timing issues.
  707                         wait_for_condition 150 100 {
  708                             [lindex [$replica role] 3] eq {connected}
  709                         } else {
  710                             fail "replicas still not connected after some time"
  711                         }
  712 
  713                         # Make sure that replicas and master have same
  714                         # number of keys
  715                         wait_for_condition 50 100 {
  716                             [$master dbsize] == [$replica dbsize]
  717                         } else {
  718                             fail "Different number of keys between master and replicas after too long time."
  719                         }
  720 
  721                         # Check digests
  722                         set digest [$master debug digest]
  723                         set digest0 [$replica debug digest]
  724                         assert {$digest ne 0000000000000000000000000000000000000000}
  725                         assert {$digest eq $digest0}
  726                     }
  727                 }
  728             }
  729         }
  730     }
  731 }
  732 
  733 test "diskless replication child being killed is collected" {
  734     # when diskless master is waiting for the replica to become writable
  735     # it removes the read event from the rdb pipe so if the child gets killed
  736     # the replica will hung. and the master may not collect the pid with waitpid
  737     start_server {tags {"repl"}} {
  738         set master [srv 0 client]
  739         set master_host [srv 0 host]
  740         set master_port [srv 0 port]
  741         set master_pid [srv 0 pid]
  742         $master config set repl-diskless-sync yes
  743         $master config set repl-diskless-sync-delay 0
  744         # put enough data in the db that the rdb file will be bigger than the socket buffers
  745         $master debug populate 20000 test 10000
  746         $master config set rdbcompression no
  747         start_server {} {
  748             set replica [srv 0 client]
  749             set loglines [count_log_lines 0]
  750             $replica config set repl-diskless-load swapdb
  751             $replica config set key-load-delay 1000000
  752             $replica replicaof $master_host $master_port
  753 
  754             # wait for the replicas to start reading the rdb
  755             wait_for_log_messages 0 {"*Loading DB in memory*"} $loglines 800 10
  756 
  757             # wait to be sure the eplica is hung and the master is blocked on write
  758             after 500
  759 
  760             # simulate the OOM killer or anyone else kills the child
  761             set fork_child_pid [get_child_pid -1]
  762             exec kill -9 $fork_child_pid
  763 
  764             # wait for the parent to notice the child have exited
  765             wait_for_condition 50 100 {
  766                 [s -1 rdb_bgsave_in_progress] == 0
  767             } else {
  768                 fail "rdb child didn't terminate"
  769             }
  770         }
  771     }
  772 }
  773 
  774 test "diskless replication read pipe cleanup" {
  775     # In diskless replication, we create a read pipe for the RDB, between the child and the parent.
  776     # When we close this pipe (fd), the read handler also needs to be removed from the event loop (if it still registered).
  777     # Otherwise, next time we will use the same fd, the registration will be fail (panic), because
  778     # we will use EPOLL_CTL_MOD (the fd still register in the event loop), on fd that already removed from epoll_ctl
  779     start_server {tags {"repl"}} {
  780         set master [srv 0 client]
  781         set master_host [srv 0 host]
  782         set master_port [srv 0 port]
  783         set master_pid [srv 0 pid]
  784         $master config set repl-diskless-sync yes
  785         $master config set repl-diskless-sync-delay 0
  786 
  787         # put enough data in the db, and slowdown the save, to keep the parent busy at the read process
  788         $master config set rdb-key-save-delay 100000
  789         $master debug populate 20000 test 10000
  790         $master config set rdbcompression no
  791         start_server {} {
  792             set replica [srv 0 client]
  793             set loglines [count_log_lines 0]
  794             $replica config set repl-diskless-load swapdb
  795             $replica replicaof $master_host $master_port
  796 
  797             # wait for the replicas to start reading the rdb
  798             wait_for_log_messages 0 {"*Loading DB in memory*"} $loglines 800 10
  799 
  800             set loglines [count_log_lines 0]
  801             # send FLUSHALL so the RDB child will be killed
  802             $master flushall
  803 
  804             # wait for another RDB child process to be started
  805             wait_for_log_messages -1 {"*Background RDB transfer started by pid*"} $loglines 800 10
  806 
  807             # make sure master is alive
  808             $master ping
  809         }
  810     }
  811 }
  812 
  813 test {replicaof right after disconnection} {
  814     # this is a rare race condition that was reproduced sporadically by the psync2 unit.
  815     # see details in #7205
  816     start_server {tags {"repl"}} {
  817         set replica1 [srv 0 client]
  818         set replica1_host [srv 0 host]
  819         set replica1_port [srv 0 port]
  820         set replica1_log [srv 0 stdout]
  821         start_server {} {
  822             set replica2 [srv 0 client]
  823             set replica2_host [srv 0 host]
  824             set replica2_port [srv 0 port]
  825             set replica2_log [srv 0 stdout]
  826             start_server {} {
  827                 set master [srv 0 client]
  828                 set master_host [srv 0 host]
  829                 set master_port [srv 0 port]
  830                 $replica1 replicaof $master_host $master_port
  831                 $replica2 replicaof $master_host $master_port
  832 
  833                 wait_for_condition 50 100 {
  834                     [string match {*master_link_status:up*} [$replica1 info replication]] &&
  835                     [string match {*master_link_status:up*} [$replica2 info replication]]
  836                 } else {
  837                     fail "Can't turn the instance into a replica"
  838                 }
  839 
  840                 set rd [redis_deferring_client -1]
  841                 $rd debug sleep 1
  842                 after 100
  843 
  844                 # when replica2 will wake up from the sleep it will find both disconnection
  845                 # from it's master and also a replicaof command at the same event loop
  846                 $master client kill type replica
  847                 $replica2 replicaof $replica1_host $replica1_port
  848                 $rd read
  849 
  850                 wait_for_condition 50 100 {
  851                     [string match {*master_link_status:up*} [$replica2 info replication]]
  852                 } else {
  853                     fail "role change failed."
  854                 }
  855 
  856                 # make sure psync succeeded, and there were no unexpected full syncs.
  857                 assert_equal [status $master sync_full] 2
  858                 assert_equal [status $replica1 sync_full] 0
  859                 assert_equal [status $replica2 sync_full] 0
  860             }
  861         }
  862     }
  863 }
  864 
  865 test {Kill rdb child process if its dumping RDB is not useful} {
  866     start_server {tags {"repl"}} {
  867         set slave1 [srv 0 client]
  868         start_server {} {
  869             set slave2 [srv 0 client]
  870             start_server {} {
  871                 set master [srv 0 client]
  872                 set master_host [srv 0 host]
  873                 set master_port [srv 0 port]
  874                 for {set i 0} {$i < 10} {incr i} {
  875                     $master set $i $i
  876                 }
  877                 # Generating RDB will cost 10s(10 * 1s)
  878                 $master config set rdb-key-save-delay 1000000
  879                 $master config set repl-diskless-sync no
  880                 $master config set save ""
  881 
  882                 $slave1 slaveof $master_host $master_port
  883                 $slave2 slaveof $master_host $master_port
  884 
  885                 # Wait for starting child
  886                 wait_for_condition 50 100 {
  887                     ([s 0 rdb_bgsave_in_progress] == 1) &&
  888                     ([string match "*wait_bgsave*" [s 0 slave0]]) &&
  889                     ([string match "*wait_bgsave*" [s 0 slave1]])
  890                 } else {
  891                     fail "rdb child didn't start"
  892                 }
  893 
  894                 # Slave1 disconnect with master
  895                 $slave1 slaveof no one
  896                 # Shouldn't kill child since another slave wait for rdb
  897                 after 100
  898                 assert {[s 0 rdb_bgsave_in_progress] == 1}
  899 
  900                 # Slave2 disconnect with master
  901                 $slave2 slaveof no one
  902                 # Should kill child
  903                 wait_for_condition 100 10 {
  904                     [s 0 rdb_bgsave_in_progress] eq 0
  905                 } else {
  906                     fail "can't kill rdb child"
  907                 }
  908 
  909                 # If have save parameters, won't kill child
  910                 $master config set save "900 1"
  911                 $slave1 slaveof $master_host $master_port
  912                 $slave2 slaveof $master_host $master_port
  913                 wait_for_condition 50 100 {
  914                     ([s 0 rdb_bgsave_in_progress] == 1) &&
  915                     ([string match "*wait_bgsave*" [s 0 slave0]]) &&
  916                     ([string match "*wait_bgsave*" [s 0 slave1]])
  917                 } else {
  918                     fail "rdb child didn't start"
  919                 }
  920                 $slave1 slaveof no one
  921                 $slave2 slaveof no one
  922                 after 200
  923                 assert {[s 0 rdb_bgsave_in_progress] == 1}
  924                 catch {$master shutdown nosave}
  925             }
  926         }
  927     }
  928 }