From 7caf26ce94231ac0add0f879d3780a95bad2805c Mon Sep 17 00:00:00 2001 From: Andras Banki-Horvath Date: Thu, 11 Mar 2021 17:23:18 +0100 Subject: [PATCH] itest: add itest for failover after forcefull shutdown --- lntest/harness.go | 5 +++ lntest/itest/lnd_etcd_failover_test.go | 54 ++++++++++++++++++++++++-- lntest/node.go | 5 +++ 3 files changed, 61 insertions(+), 3 deletions(-) diff --git a/lntest/harness.go b/lntest/harness.go index af747891..0d8bf520 100644 --- a/lntest/harness.go +++ b/lntest/harness.go @@ -788,6 +788,11 @@ func (n *NetworkHarness) ShutdownNode(node *HarnessNode) error { return nil } +// KillNode kills the node (but won't wait for the node process to stop). +func (n *NetworkHarness) KillNode(node *HarnessNode) error { + return node.kill() +} + // StopNode stops the target node, but doesn't yet clean up its directories. // This can be used to temporarily bring a node down during a test, to be later // started up again. diff --git a/lntest/itest/lnd_etcd_failover_test.go b/lntest/itest/lnd_etcd_failover_test.go index e76502e7..8fcf8d7f 100644 --- a/lntest/itest/lnd_etcd_failover_test.go +++ b/lntest/itest/lnd_etcd_failover_test.go @@ -5,6 +5,7 @@ package itest import ( "context" "io/ioutil" + "testing" "time" "github.com/btcsuite/btcutil" @@ -29,7 +30,41 @@ func assertLeader(ht *harnessTest, observer cluster.LeaderElector, } } +// testEtcdFailover tests that in a cluster setup where two LND nodes form a +// single cluster (sharing the same identity) one can hand over the leader role +// to the other (failing over after graceful shutdown or forceful abort). func testEtcdFailover(net *lntest.NetworkHarness, ht *harnessTest) { + testCases := []struct { + name string + kill bool + }{{ + name: "failover after shutdown", + kill: false, + }, { + name: "failover after abort", + kill: true, + }} + + for _, test := range testCases { + test := test + + ht.t.Run(test.name, func(t1 *testing.T) { + ht1 := newHarnessTest(t1, ht.lndHarness) + ht1.RunTestCase(&testCase{ + name: test.name, + test: func(_ *lntest.NetworkHarness, + tt *harnessTest) { + + testEtcdFailoverCase(net, tt, test.kill) + }, + }) + }) + } +} + +func testEtcdFailoverCase(net *lntest.NetworkHarness, ht *harnessTest, + kill bool) { + ctxb := context.Background() tmpDir, err := ioutil.TempDir("", "etcd") @@ -104,9 +139,22 @@ func testEtcdFailover(net *lntest.NetworkHarness, ht *harnessTest) { FeeLimitSat: noFeeLimitMsat, }) - // Shut down Carol-1 and wait for Carol-2 to become the leader. - shutdownAndAssert(net, ht, carol1) - err = carol2.WaitUntilLeader(30 * time.Second) + // Shut down or kill Carol-1 and wait for Carol-2 to become the leader. + var failoverTimeout time.Duration + if kill { + err = net.KillNode(carol1) + if err != nil { + ht.Fatalf("Can't kill Carol-1: %v", err) + } + + failoverTimeout = 2 * time.Minute + + } else { + shutdownAndAssert(net, ht, carol1) + failoverTimeout = 30 * time.Second + } + + err = carol2.WaitUntilLeader(failoverTimeout) if err != nil { ht.Fatalf("Waiting for Carol-2 to become the leader failed: %v", err) diff --git a/lntest/node.go b/lntest/node.go index 988b4249..e81ff696 100644 --- a/lntest/node.go +++ b/lntest/node.go @@ -1147,6 +1147,11 @@ func (hn *HarnessNode) shutdown() error { return nil } +// kill kills the lnd process +func (hn *HarnessNode) kill() error { + return hn.cmd.Process.Kill() +} + // closeChanWatchRequest is a request to the lightningNetworkWatcher to be // notified once it's detected within the test Lightning Network, that a // channel has either been added or closed.