From cb927e89b02a5d33511b93626d3c2607bdcaabc2 Mon Sep 17 00:00:00 2001 From: carla Date: Fri, 23 Apr 2021 08:39:36 +0200 Subject: [PATCH 01/13] routing/test: add check that sendpayment completes As is, we don't check that our SendPayment call in TestRouterPaymentStateMachine completes. This makes it easier to create malformed tests that just run through steps but leave the SendPayment call hanging. This commit adds a check that we have completed our payment to help catch tests like this. We also remove an unused quit channel. --- routing/payment_lifecycle_test.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/routing/payment_lifecycle_test.go b/routing/payment_lifecycle_test.go index e83ac17f..70346966 100644 --- a/routing/payment_lifecycle_test.go +++ b/routing/payment_lifecycle_test.go @@ -564,9 +564,6 @@ func TestRouterPaymentStateMachine(t *testing.T) { control.failPayment = make(chan failPaymentArgs, 20) control.fetchInFlight = make(chan struct{}, 20) - quit := make(chan struct{}) - defer close(quit) - // setupRouter is a helper method that creates and starts the router in // the desired configuration for this test. setupRouter := func() (*ChannelRouter, chan error, @@ -672,9 +669,11 @@ func TestRouterPaymentStateMachine(t *testing.T) { // Send the payment. Since this is new payment hash, the // information should be registered with the ControlTower. paymentResult := make(chan error) + done := make(chan struct{}) go func() { _, _, err := router.SendPayment(&payment) paymentResult <- err + close(done) }() var resendResult chan error @@ -894,5 +893,11 @@ func TestRouterPaymentStateMachine(t *testing.T) { t.Fatalf("unknown step %v", step) } } + + select { + case <-done: + case <-time.After(testTimeout): + t.Fatalf("SendPayment didn't exit") + } } } From 806c4cbd577ec0e55398b40432fa4f66f6f67f11 Mon Sep 17 00:00:00 2001 From: carla Date: Fri, 23 Apr 2021 08:39:37 +0200 Subject: [PATCH 02/13] routing/test: run each test case individually, add names Update our payment lifecycle test to run each test case with a fresh router. This prevents test cases from interacting with each other. Names are also added for easy debugging. --- routing/payment_lifecycle_test.go | 717 ++++++++++++++++-------------- 1 file changed, 378 insertions(+), 339 deletions(-) diff --git a/routing/payment_lifecycle_test.go b/routing/payment_lifecycle_test.go index 70346966..d30ca02c 100644 --- a/routing/payment_lifecycle_test.go +++ b/routing/payment_lifecycle_test.go @@ -48,6 +48,101 @@ func createTestRoute(amt lnwire.MilliSatoshi, ) } +// paymentLifecycleTestCase contains the steps that we expect for a payment +// lifecycle test, and the routes that pathfinding should deliver. +type paymentLifecycleTestCase struct { + name string + + // steps is a list of steps to perform during the testcase. + steps []string + + // routes is the sequence of routes we will provide to the + // router when it requests a new route. + routes []*route.Route +} + +const ( + // routerInitPayment is a test step where we expect the router + // to call the InitPayment method on the control tower. + routerInitPayment = "Router:init-payment" + + // routerRegisterAttempt is a test step where we expect the + // router to call the RegisterAttempt method on the control + // tower. + routerRegisterAttempt = "Router:register-attempt" + + // routerSettleAttempt is a test step where we expect the + // router to call the SettleAttempt method on the control + // tower. + routerSettleAttempt = "Router:settle-attempt" + + // routerFailAttempt is a test step where we expect the router + // to call the FailAttempt method on the control tower. + routerFailAttempt = "Router:fail-attempt" + + // routerFailPayment is a test step where we expect the router + // to call the Fail method on the control tower. + routerFailPayment = "Router:fail-payment" + + // sendToSwitchSuccess is a step where we expect the router to + // call send the payment attempt to the switch, and we will + // respond with a non-error, indicating that the payment + // attempt was successfully forwarded. + sendToSwitchSuccess = "SendToSwitch:success" + + // sendToSwitchResultFailure is a step where we expect the + // router to send the payment attempt to the switch, and we + // will respond with a forwarding error. This can happen when + // forwarding fail on our local links. + sendToSwitchResultFailure = "SendToSwitch:failure" + + // getPaymentResultSuccess is a test step where we expect the + // router to call the GetPaymentResult method, and we will + // respond with a successful payment result. + getPaymentResultSuccess = "GetPaymentResult:success" + + // getPaymentResultTempFailure is a test step where we expect the + // router to call the GetPaymentResult method, and we will + // respond with a forwarding error, expecting the router to retry. + getPaymentResultTempFailure = "GetPaymentResult:temp-failure" + + // getPaymentResultTerminalFailure is a test step where we + // expect the router to call the GetPaymentResult method, and + // we will respond with a terminal error, expecting the router + // to stop making payment attempts. + getPaymentResultTerminalFailure = "GetPaymentResult:terminal-failure" + + // resendPayment is a test step where we manually try to resend + // the same payment, making sure the router responds with an + // error indicating that it is already in flight. + resendPayment = "ResendPayment" + + // startRouter is a step where we manually start the router, + // used to test that it automatically will resume payments at + // startup. + startRouter = "StartRouter" + + // stopRouter is a test step where we manually make the router + // shut down. + stopRouter = "StopRouter" + + // paymentSuccess is a step where assert that we receive a + // successful result for the original payment made. + paymentSuccess = "PaymentSuccess" + + // paymentError is a step where assert that we receive an error + // for the original payment made. + paymentError = "PaymentError" + + // resentPaymentSuccess is a step where assert that we receive + // a successful result for a payment that was resent. + resentPaymentSuccess = "ResentPaymentSuccess" + + // resentPaymentError is a step where assert that we receive an + // error for a payment that was resent. + resentPaymentError = "ResentPaymentError" +) + // TestRouterPaymentStateMachine tests that the router interacts as expected // with the ControlTower during a payment lifecycle, such that it payment // attempts are not sent twice to the switch, and results are handled after a @@ -95,102 +190,11 @@ func TestRouterPaymentStateMachine(t *testing.T) { t.Fatalf("unable to create route: %v", err) } - // A payment state machine test case consists of several ordered steps, - // that we use for driving the scenario. - type testCase struct { - // steps is a list of steps to perform during the testcase. - steps []string - - // routes is the sequence of routes we will provide to the - // router when it requests a new route. - routes []*route.Route - } - - const ( - // routerInitPayment is a test step where we expect the router - // to call the InitPayment method on the control tower. - routerInitPayment = "Router:init-payment" - - // routerRegisterAttempt is a test step where we expect the - // router to call the RegisterAttempt method on the control - // tower. - routerRegisterAttempt = "Router:register-attempt" - - // routerSettleAttempt is a test step where we expect the - // router to call the SettleAttempt method on the control - // tower. - routerSettleAttempt = "Router:settle-attempt" - - // routerFailAttempt is a test step where we expect the router - // to call the FailAttempt method on the control tower. - routerFailAttempt = "Router:fail-attempt" - - // routerFailPayment is a test step where we expect the router - // to call the Fail method on the control tower. - routerFailPayment = "Router:fail-payment" - - // sendToSwitchSuccess is a step where we expect the router to - // call send the payment attempt to the switch, and we will - // respond with a non-error, indicating that the payment - // attempt was successfully forwarded. - sendToSwitchSuccess = "SendToSwitch:success" - - // sendToSwitchResultFailure is a step where we expect the - // router to send the payment attempt to the switch, and we - // will respond with a forwarding error. This can happen when - // forwarding fail on our local links. - sendToSwitchResultFailure = "SendToSwitch:failure" - - // getPaymentResultSuccess is a test step where we expect the - // router to call the GetPaymentResult method, and we will - // respond with a successful payment result. - getPaymentResultSuccess = "GetPaymentResult:success" - - // getPaymentResultTempFailure is a test step where we expect the - // router to call the GetPaymentResult method, and we will - // respond with a forwarding error, expecting the router to retry. - getPaymentResultTempFailure = "GetPaymentResult:temp-failure" - - // getPaymentResultTerminalFailure is a test step where we - // expect the router to call the GetPaymentResult method, and - // we will respond with a terminal error, expecting the router - // to stop making payment attempts. - getPaymentResultTerminalFailure = "GetPaymentResult:terminal-failure" - - // resendPayment is a test step where we manually try to resend - // the same payment, making sure the router responds with an - // error indicating that it is already in flight. - resendPayment = "ResendPayment" - - // startRouter is a step where we manually start the router, - // used to test that it automatically will resume payments at - // startup. - startRouter = "StartRouter" - - // stopRouter is a test step where we manually make the router - // shut down. - stopRouter = "StopRouter" - - // paymentSuccess is a step where assert that we receive a - // successful result for the original payment made. - paymentSuccess = "PaymentSuccess" - - // paymentError is a step where assert that we receive an error - // for the original payment made. - paymentError = "PaymentError" - - // resentPaymentSuccess is a step where assert that we receive - // a successful result for a payment that was resent. - resentPaymentSuccess = "ResentPaymentSuccess" - - // resentPaymentError is a step where assert that we receive an - // error for a payment that was resent. - resentPaymentError = "ResentPaymentError" - ) - - tests := []testCase{ + tests := []paymentLifecycleTestCase{ { // Tests a normal payment flow that succeeds. + name: "single shot success", + steps: []string{ routerInitPayment, routerRegisterAttempt, @@ -204,6 +208,8 @@ func TestRouterPaymentStateMachine(t *testing.T) { { // A payment flow with a failure on the first attempt, // but that succeeds on the second attempt. + name: "single shot retry", + steps: []string{ routerInitPayment, routerRegisterAttempt, @@ -228,6 +234,8 @@ func TestRouterPaymentStateMachine(t *testing.T) { // A payment flow with a forwarding failure first time // sending to the switch, but that succeeds on the // second attempt. + name: "single shot switch failure", + steps: []string{ routerInitPayment, routerRegisterAttempt, @@ -251,6 +259,8 @@ func TestRouterPaymentStateMachine(t *testing.T) { // A payment that fails on the first attempt, and has // only one route available to try. It will therefore // fail permanently. + name: "single shot route fails", + steps: []string{ routerInitPayment, routerRegisterAttempt, @@ -270,6 +280,8 @@ func TestRouterPaymentStateMachine(t *testing.T) { { // We expect the payment to fail immediately if we have // no routes to try. + name: "single shot no route", + steps: []string{ routerInitPayment, routerFailPayment, @@ -282,6 +294,8 @@ func TestRouterPaymentStateMachine(t *testing.T) { // the same payment after each step. This ensures that // the router don't attempt to resend a payment already // in flight. + name: "single shot resend", + steps: []string{ routerInitPayment, routerRegisterAttempt, @@ -322,6 +336,8 @@ func TestRouterPaymentStateMachine(t *testing.T) { { // Tests that the router is able to handle the // receieved payment result after a restart. + name: "single shot restart", + steps: []string{ routerInitPayment, routerRegisterAttempt, @@ -344,6 +360,8 @@ func TestRouterPaymentStateMachine(t *testing.T) { { // Tests that we are allowed to resend a payment after // it has permanently failed. + name: "single shot resend fail", + steps: []string{ routerInitPayment, routerRegisterAttempt, @@ -382,6 +400,8 @@ func TestRouterPaymentStateMachine(t *testing.T) { // ===================================== { // Tests a simple successful MP payment of 4 shards. + name: "MP success", + steps: []string{ routerInitPayment, @@ -422,6 +442,8 @@ func TestRouterPaymentStateMachine(t *testing.T) { { // An MP payment scenario where we need several extra // attempts before the payment finally settle. + name: "MP failed shards", + steps: []string{ routerInitPayment, @@ -474,6 +496,8 @@ func TestRouterPaymentStateMachine(t *testing.T) { // However the last shard settle, which means we get // the preimage and should consider the overall payment // a success. + name: "MP one shard success", + steps: []string{ routerInitPayment, @@ -513,6 +537,8 @@ func TestRouterPaymentStateMachine(t *testing.T) { { // An MP payment scenario a shard fail with a terminal // error, causing the router to stop attempting. + name: "MP terminal", + steps: []string{ routerInitPayment, @@ -554,6 +580,21 @@ func TestRouterPaymentStateMachine(t *testing.T) { }, } + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + testPaymentLifecycle( + t, test, paymentAmt, startingBlockHeight, + testGraph, + ) + }) + } +} + +func testPaymentLifecycle(t *testing.T, test paymentLifecycleTestCase, + paymentAmt lnwire.MilliSatoshi, startingBlockHeight uint32, + testGraph *testGraphInstance) { + // Create a mock control tower with channels set up, that we use to // synchronize and listen for events. control := makeMockControlTower() @@ -644,260 +685,258 @@ func TestRouterPaymentStateMachine(t *testing.T) { } }() - for _, test := range tests { - // Craft a LightningPayment struct. - var preImage lntypes.Preimage - if _, err := rand.Read(preImage[:]); err != nil { - t.Fatalf("unable to generate preimage") - } + // Craft a LightningPayment struct. + var preImage lntypes.Preimage + if _, err := rand.Read(preImage[:]); err != nil { + t.Fatalf("unable to generate preimage") + } - payHash := preImage.Hash() + payHash := preImage.Hash() - payment := LightningPayment{ - Target: testGraph.aliasMap["c"], - Amount: paymentAmt, - FeeLimit: noFeeLimit, - PaymentHash: payHash, - } + payment := LightningPayment{ + Target: testGraph.aliasMap["c"], + Amount: paymentAmt, + FeeLimit: noFeeLimit, + PaymentHash: payHash, + } - router.cfg.SessionSource = &mockPaymentSessionSource{ - routes: test.routes, - } + router.cfg.SessionSource = &mockPaymentSessionSource{ + routes: test.routes, + } - router.cfg.MissionControl = &mockMissionControl{} + router.cfg.MissionControl = &mockMissionControl{} - // Send the payment. Since this is new payment hash, the - // information should be registered with the ControlTower. - paymentResult := make(chan error) - done := make(chan struct{}) - go func() { - _, _, err := router.SendPayment(&payment) - paymentResult <- err - close(done) - }() + // Send the payment. Since this is new payment hash, the + // information should be registered with the ControlTower. + paymentResult := make(chan error) + done := make(chan struct{}) + go func() { + _, _, err := router.SendPayment(&payment) + paymentResult <- err + close(done) + }() - var resendResult chan error - for _, step := range test.steps { - switch step { + var resendResult chan error + for _, step := range test.steps { + switch step { - case routerInitPayment: - var args initArgs - select { - case args = <-control.init: - case <-time.After(stepTimeout): - t.Fatalf("no init payment with control") - } - - if args.c == nil { - t.Fatalf("expected non-nil CreationInfo") - } - - // In this step we expect the router to make a call to - // register a new attempt with the ControlTower. - case routerRegisterAttempt: - var args registerAttemptArgs - select { - case args = <-control.registerAttempt: - case <-time.After(stepTimeout): - t.Fatalf("attempt not registered " + - "with control") - } - - if args.a == nil { - t.Fatalf("expected non-nil AttemptInfo") - } - - // In this step we expect the router to call the - // ControlTower's SettleAttempt method with the preimage. - case routerSettleAttempt: - select { - case <-control.settleAttempt: - case <-time.After(stepTimeout): - t.Fatalf("attempt settle not " + - "registered with control") - } - - // In this step we expect the router to call the - // ControlTower's FailAttempt method with a HTLC fail - // info. - case routerFailAttempt: - select { - case <-control.failAttempt: - case <-time.After(stepTimeout): - t.Fatalf("attempt fail not " + - "registered with control") - } - - // In this step we expect the router to call the - // ControlTower's Fail method, to indicate that the - // payment failed. - case routerFailPayment: - select { - case <-control.failPayment: - case <-time.After(stepTimeout): - t.Fatalf("payment fail not " + - "registered with control") - } - - // In this step we expect the SendToSwitch method to be - // called, and we respond with a nil-error. - case sendToSwitchSuccess: - select { - case sendResult <- nil: - case <-time.After(stepTimeout): - t.Fatalf("unable to send result") - } - - // In this step we expect the SendToSwitch method to be - // called, and we respond with a forwarding error - case sendToSwitchResultFailure: - select { - case sendResult <- htlcswitch.NewForwardingError( - &lnwire.FailTemporaryChannelFailure{}, - 1, - ): - case <-time.After(stepTimeout): - t.Fatalf("unable to send result") - } - - // In this step we expect the GetPaymentResult method - // to be called, and we respond with the preimage to - // complete the payment. - case getPaymentResultSuccess: - select { - case getPaymentResult <- &htlcswitch.PaymentResult{ - Preimage: preImage, - }: - case <-time.After(stepTimeout): - t.Fatalf("unable to send result") - } - - // In this state we expect the GetPaymentResult method - // to be called, and we respond with a forwarding - // error, indicating that the router should retry. - case getPaymentResultTempFailure: - failure := htlcswitch.NewForwardingError( - &lnwire.FailTemporaryChannelFailure{}, - 1, - ) - - select { - case getPaymentResult <- &htlcswitch.PaymentResult{ - Error: failure, - }: - case <-time.After(stepTimeout): - t.Fatalf("unable to get result") - } - - // In this state we expect the router to call the - // GetPaymentResult method, and we will respond with a - // terminal error, indiating the router should stop - // making payment attempts. - case getPaymentResultTerminalFailure: - failure := htlcswitch.NewForwardingError( - &lnwire.FailIncorrectDetails{}, - 1, - ) - - select { - case getPaymentResult <- &htlcswitch.PaymentResult{ - Error: failure, - }: - case <-time.After(stepTimeout): - t.Fatalf("unable to get result") - } - - // In this step we manually try to resend the same - // payment, making sure the router responds with an - // error indicating that it is already in flight. - case resendPayment: - resendResult = make(chan error) - go func() { - _, _, err := router.SendPayment(&payment) - resendResult <- err - }() - - // In this step we manually stop the router. - case stopRouter: - select { - case getPaymentResultErr <- fmt.Errorf( - "shutting down"): - case <-time.After(stepTimeout): - t.Fatalf("unable to send payment " + - "result error") - } - - if err := router.Stop(); err != nil { - t.Fatalf("unable to restart: %v", err) - } - - // In this step we manually start the router. - case startRouter: - router, sendResult, getPaymentResult, - getPaymentResultErr = setupRouter() - - // In this state we expect to receive an error for the - // original payment made. - case paymentError: - select { - case err := <-paymentResult: - if err == nil { - t.Fatalf("expected error") - } - - case <-time.After(stepTimeout): - t.Fatalf("got no payment result") - } - - // In this state we expect the original payment to - // succeed. - case paymentSuccess: - select { - case err := <-paymentResult: - if err != nil { - t.Fatalf("did not expect "+ - "error %v", err) - } - - case <-time.After(stepTimeout): - t.Fatalf("got no payment result") - } - - // In this state we expect to receive an error for the - // resent payment made. - case resentPaymentError: - select { - case err := <-resendResult: - if err == nil { - t.Fatalf("expected error") - } - - case <-time.After(stepTimeout): - t.Fatalf("got no payment result") - } - - // In this state we expect the resent payment to - // succeed. - case resentPaymentSuccess: - select { - case err := <-resendResult: - if err != nil { - t.Fatalf("did not expect error %v", err) - } - - case <-time.After(stepTimeout): - t.Fatalf("got no payment result") - } - - default: - t.Fatalf("unknown step %v", step) + case routerInitPayment: + var args initArgs + select { + case args = <-control.init: + case <-time.After(stepTimeout): + t.Fatalf("no init payment with control") } - } - select { - case <-done: - case <-time.After(testTimeout): - t.Fatalf("SendPayment didn't exit") + if args.c == nil { + t.Fatalf("expected non-nil CreationInfo") + } + + // In this step we expect the router to make a call to + // register a new attempt with the ControlTower. + case routerRegisterAttempt: + var args registerAttemptArgs + select { + case args = <-control.registerAttempt: + case <-time.After(stepTimeout): + t.Fatalf("attempt not registered " + + "with control") + } + + if args.a == nil { + t.Fatalf("expected non-nil AttemptInfo") + } + + // In this step we expect the router to call the + // ControlTower's SettleAttempt method with the preimage. + case routerSettleAttempt: + select { + case <-control.settleAttempt: + case <-time.After(stepTimeout): + t.Fatalf("attempt settle not " + + "registered with control") + } + + // In this step we expect the router to call the + // ControlTower's FailAttempt method with a HTLC fail + // info. + case routerFailAttempt: + select { + case <-control.failAttempt: + case <-time.After(stepTimeout): + t.Fatalf("attempt fail not " + + "registered with control") + } + + // In this step we expect the router to call the + // ControlTower's Fail method, to indicate that the + // payment failed. + case routerFailPayment: + select { + case <-control.failPayment: + case <-time.After(stepTimeout): + t.Fatalf("payment fail not " + + "registered with control") + } + + // In this step we expect the SendToSwitch method to be + // called, and we respond with a nil-error. + case sendToSwitchSuccess: + select { + case sendResult <- nil: + case <-time.After(stepTimeout): + t.Fatalf("unable to send result") + } + + // In this step we expect the SendToSwitch method to be + // called, and we respond with a forwarding error + case sendToSwitchResultFailure: + select { + case sendResult <- htlcswitch.NewForwardingError( + &lnwire.FailTemporaryChannelFailure{}, + 1, + ): + case <-time.After(stepTimeout): + t.Fatalf("unable to send result") + } + + // In this step we expect the GetPaymentResult method + // to be called, and we respond with the preimage to + // complete the payment. + case getPaymentResultSuccess: + select { + case getPaymentResult <- &htlcswitch.PaymentResult{ + Preimage: preImage, + }: + case <-time.After(stepTimeout): + t.Fatalf("unable to send result") + } + + // In this state we expect the GetPaymentResult method + // to be called, and we respond with a forwarding + // error, indicating that the router should retry. + case getPaymentResultTempFailure: + failure := htlcswitch.NewForwardingError( + &lnwire.FailTemporaryChannelFailure{}, + 1, + ) + + select { + case getPaymentResult <- &htlcswitch.PaymentResult{ + Error: failure, + }: + case <-time.After(stepTimeout): + t.Fatalf("unable to get result") + } + + // In this state we expect the router to call the + // GetPaymentResult method, and we will respond with a + // terminal error, indiating the router should stop + // making payment attempts. + case getPaymentResultTerminalFailure: + failure := htlcswitch.NewForwardingError( + &lnwire.FailIncorrectDetails{}, + 1, + ) + + select { + case getPaymentResult <- &htlcswitch.PaymentResult{ + Error: failure, + }: + case <-time.After(stepTimeout): + t.Fatalf("unable to get result") + } + + // In this step we manually try to resend the same + // payment, making sure the router responds with an + // error indicating that it is already in flight. + case resendPayment: + resendResult = make(chan error) + go func() { + _, _, err := router.SendPayment(&payment) + resendResult <- err + }() + + // In this step we manually stop the router. + case stopRouter: + select { + case getPaymentResultErr <- fmt.Errorf( + "shutting down"): + case <-time.After(stepTimeout): + t.Fatalf("unable to send payment " + + "result error") + } + + if err := router.Stop(); err != nil { + t.Fatalf("unable to restart: %v", err) + } + + // In this step we manually start the router. + case startRouter: + router, sendResult, getPaymentResult, + getPaymentResultErr = setupRouter() + + // In this state we expect to receive an error for the + // original payment made. + case paymentError: + select { + case err := <-paymentResult: + if err == nil { + t.Fatalf("expected error") + } + + case <-time.After(stepTimeout): + t.Fatalf("got no payment result") + } + + // In this state we expect the original payment to + // succeed. + case paymentSuccess: + select { + case err := <-paymentResult: + if err != nil { + t.Fatalf("did not expect "+ + "error %v", err) + } + + case <-time.After(stepTimeout): + t.Fatalf("got no payment result") + } + + // In this state we expect to receive an error for the + // resent payment made. + case resentPaymentError: + select { + case err := <-resendResult: + if err == nil { + t.Fatalf("expected error") + } + + case <-time.After(stepTimeout): + t.Fatalf("got no payment result") + } + + // In this state we expect the resent payment to + // succeed. + case resentPaymentSuccess: + select { + case err := <-resendResult: + if err != nil { + t.Fatalf("did not expect error %v", err) + } + + case <-time.After(stepTimeout): + t.Fatalf("got no payment result") + } + + default: + t.Fatalf("unknown step %v", step) } } + + select { + case <-done: + case <-time.After(testTimeout): + t.Fatalf("SendPayment didn't exit") + } } From b2d941ebfb6965fc180afe95c323b1ee90400f27 Mon Sep 17 00:00:00 2001 From: carla Date: Fri, 23 Apr 2021 08:39:38 +0200 Subject: [PATCH 03/13] routing/test: remove test channel buffers Now that we run each test individually, we don't need to buffer our mock's channels anymore. This helps to tighten our test loop, which currently can move on from a step before it's actually been processed by the mock. This removal ensures that our payment loop processes each of the test's steps before moving on to the next once. --- routing/payment_lifecycle_test.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/routing/payment_lifecycle_test.go b/routing/payment_lifecycle_test.go index d30ca02c..62c85c36 100644 --- a/routing/payment_lifecycle_test.go +++ b/routing/payment_lifecycle_test.go @@ -598,12 +598,12 @@ func testPaymentLifecycle(t *testing.T, test paymentLifecycleTestCase, // Create a mock control tower with channels set up, that we use to // synchronize and listen for events. control := makeMockControlTower() - control.init = make(chan initArgs, 20) - control.registerAttempt = make(chan registerAttemptArgs, 20) - control.settleAttempt = make(chan settleAttemptArgs, 20) - control.failAttempt = make(chan failAttemptArgs, 20) - control.failPayment = make(chan failPaymentArgs, 20) - control.fetchInFlight = make(chan struct{}, 20) + control.init = make(chan initArgs) + control.registerAttempt = make(chan registerAttemptArgs) + control.settleAttempt = make(chan settleAttemptArgs) + control.failAttempt = make(chan failAttemptArgs) + control.failPayment = make(chan failPaymentArgs) + control.fetchInFlight = make(chan struct{}) // setupRouter is a helper method that creates and starts the router in // the desired configuration for this test. From 9a78e9da7353e642844c1a4ddc3f64bfa066a04b Mon Sep 17 00:00:00 2001 From: carla Date: Fri, 23 Apr 2021 08:39:39 +0200 Subject: [PATCH 04/13] routing/tests: move lock acquisition after state driving channels In our payment lifecycle tests, we have two goroutines that compete for the lock in our mock control tower: the resumePayment loop which tries to call RegisterAttempt, and the collectResult handler which is launched in a goroutine by collectResultAsync and is responsible for various settle/fail calls. The order that the lock is acquired by these goroutines is arbitrary, and can lead to flakes in our tests if the step that we do not intend to execute first gets the lock (eg, we want to fail a payment in collectResult, but RegisterAttempt gets there first). This commit moves contention for this lock after our mock's various "state driving" channels, so that the lock will be acquired in the order that the test intends it. --- routing/mock_test.go | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/routing/mock_test.go b/routing/mock_test.go index 9c4f79e5..ae67075d 100644 --- a/routing/mock_test.go +++ b/routing/mock_test.go @@ -248,13 +248,13 @@ func makeMockControlTower() *mockControlTower { func (m *mockControlTower) InitPayment(phash lntypes.Hash, c *channeldb.PaymentCreationInfo) error { - m.Lock() - defer m.Unlock() - if m.init != nil { m.init <- initArgs{c} } + m.Lock() + defer m.Unlock() + // Don't allow re-init a successful payment. if _, ok := m.successful[phash]; ok { return channeldb.ErrAlreadyPaid @@ -279,13 +279,13 @@ func (m *mockControlTower) InitPayment(phash lntypes.Hash, func (m *mockControlTower) RegisterAttempt(phash lntypes.Hash, a *channeldb.HTLCAttemptInfo) error { - m.Lock() - defer m.Unlock() - if m.registerAttempt != nil { m.registerAttempt <- registerAttemptArgs{a} } + m.Lock() + defer m.Unlock() + // Cannot register attempts for successful or failed payments. if _, ok := m.successful[phash]; ok { return channeldb.ErrPaymentAlreadySucceeded @@ -312,13 +312,13 @@ func (m *mockControlTower) SettleAttempt(phash lntypes.Hash, pid uint64, settleInfo *channeldb.HTLCSettleInfo) ( *channeldb.HTLCAttempt, error) { - m.Lock() - defer m.Unlock() - if m.settleAttempt != nil { m.settleAttempt <- settleAttemptArgs{settleInfo.Preimage} } + m.Lock() + defer m.Unlock() + // Only allow setting attempts if the payment is known. p, ok := m.payments[phash] if !ok { @@ -353,13 +353,13 @@ func (m *mockControlTower) SettleAttempt(phash lntypes.Hash, func (m *mockControlTower) FailAttempt(phash lntypes.Hash, pid uint64, failInfo *channeldb.HTLCFailInfo) (*channeldb.HTLCAttempt, error) { - m.Lock() - defer m.Unlock() - if m.failAttempt != nil { m.failAttempt <- failAttemptArgs{failInfo} } + m.Lock() + defer m.Unlock() + // Only allow failing attempts if the payment is known. p, ok := m.payments[phash] if !ok { @@ -437,13 +437,13 @@ func (m *mockControlTower) FetchPayment(phash lntypes.Hash) ( func (m *mockControlTower) FetchInFlightPayments() ( []*channeldb.InFlightPayment, error) { - m.Lock() - defer m.Unlock() - if m.fetchInFlight != nil { m.fetchInFlight <- struct{}{} } + m.Lock() + defer m.Unlock() + // In flight are all payments not successful or failed. var fl []*channeldb.InFlightPayment for hash, p := range m.payments { From e0c52e447339311c2a1e09825c9a48bd7fa2c71e Mon Sep 17 00:00:00 2001 From: carla Date: Fri, 23 Apr 2021 08:39:40 +0200 Subject: [PATCH 05/13] routing/test: close payment result channel on shutdown, mimicking switch This commit updates our mock to more closely follow the behavior of the switch for mocked calls to GetPaymentResult. As it stands, our tests send a test-created error from the switch when we want to mock shutdown. In reality, the switch will close its result channel, so we update this test to follow that behavior. This matters for the commit that follows, because we start checking the error our payments return. If we have an error from the switch, our tests will fail with an error that we do not encounter in practice. --- routing/mock_test.go | 19 +++++++++++-------- routing/payment_lifecycle_test.go | 26 +++++++++----------------- 2 files changed, 20 insertions(+), 25 deletions(-) diff --git a/routing/mock_test.go b/routing/mock_test.go index ae67075d..2caf9336 100644 --- a/routing/mock_test.go +++ b/routing/mock_test.go @@ -155,10 +155,9 @@ func (m *mockPaymentSession) RequestRoute(_, _ lnwire.MilliSatoshi, } type mockPayer struct { - sendResult chan error - paymentResultErr chan error - paymentResult chan *htlcswitch.PaymentResult - quit chan struct{} + sendResult chan error + paymentResult chan *htlcswitch.PaymentResult + quit chan struct{} } var _ PaymentAttemptDispatcher = (*mockPayer)(nil) @@ -180,12 +179,16 @@ func (m *mockPayer) GetPaymentResult(paymentID uint64, _ lntypes.Hash, _ htlcswitch.ErrorDecrypter) (<-chan *htlcswitch.PaymentResult, error) { select { - case res := <-m.paymentResult: + case res, ok := <-m.paymentResult: resChan := make(chan *htlcswitch.PaymentResult, 1) - resChan <- res + if !ok { + close(resChan) + } else { + resChan <- res + } + return resChan, nil - case err := <-m.paymentResultErr: - return nil, err + case <-m.quit: return nil, fmt.Errorf("test quitting") } diff --git a/routing/payment_lifecycle_test.go b/routing/payment_lifecycle_test.go index 62c85c36..a122f870 100644 --- a/routing/payment_lifecycle_test.go +++ b/routing/payment_lifecycle_test.go @@ -2,7 +2,6 @@ package routing import ( "crypto/rand" - "fmt" "sync/atomic" "testing" "time" @@ -608,7 +607,7 @@ func testPaymentLifecycle(t *testing.T, test paymentLifecycleTestCase, // setupRouter is a helper method that creates and starts the router in // the desired configuration for this test. setupRouter := func() (*ChannelRouter, chan error, - chan *htlcswitch.PaymentResult, chan error) { + chan *htlcswitch.PaymentResult) { chain := newMockChain(startingBlockHeight) chainView := newMockChainView(chain) @@ -616,13 +615,11 @@ func testPaymentLifecycle(t *testing.T, test paymentLifecycleTestCase, // We set uo the use the following channels and a mock Payer to // synchonize with the interaction to the Switch. sendResult := make(chan error) - paymentResultErr := make(chan error) paymentResult := make(chan *htlcswitch.PaymentResult) payer := &mockPayer{ - sendResult: sendResult, - paymentResult: paymentResult, - paymentResultErr: paymentResultErr, + sendResult: sendResult, + paymentResult: paymentResult, } router, err := New(Config{ @@ -675,10 +672,10 @@ func testPaymentLifecycle(t *testing.T, test paymentLifecycleTestCase, t.Fatalf("did not fetch in flight payments at startup") } - return router, sendResult, paymentResult, paymentResultErr + return router, sendResult, paymentResult } - router, sendResult, getPaymentResult, getPaymentResultErr := setupRouter() + router, sendResult, getPaymentResult := setupRouter() defer func() { if err := router.Stop(); err != nil { t.Fatal(err) @@ -859,13 +856,9 @@ func testPaymentLifecycle(t *testing.T, test paymentLifecycleTestCase, // In this step we manually stop the router. case stopRouter: - select { - case getPaymentResultErr <- fmt.Errorf( - "shutting down"): - case <-time.After(stepTimeout): - t.Fatalf("unable to send payment " + - "result error") - } + // On shutdown, the switch closes our result channel. + // Mimic this behavior in our mock. + close(getPaymentResult) if err := router.Stop(); err != nil { t.Fatalf("unable to restart: %v", err) @@ -873,8 +866,7 @@ func testPaymentLifecycle(t *testing.T, test paymentLifecycleTestCase, // In this step we manually start the router. case startRouter: - router, sendResult, getPaymentResult, - getPaymentResultErr = setupRouter() + router, sendResult, getPaymentResult = setupRouter() // In this state we expect to receive an error for the // original payment made. From 198d567cb2436a7023a2af9c206359d88028e7e0 Mon Sep 17 00:00:00 2001 From: carla Date: Fri, 23 Apr 2021 08:39:41 +0200 Subject: [PATCH 06/13] routing/test: assert error value for payment failures --- routing/payment_lifecycle_test.go | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/routing/payment_lifecycle_test.go b/routing/payment_lifecycle_test.go index a122f870..80c4c281 100644 --- a/routing/payment_lifecycle_test.go +++ b/routing/payment_lifecycle_test.go @@ -14,6 +14,7 @@ import ( "github.com/lightningnetwork/lnd/lntypes" "github.com/lightningnetwork/lnd/lnwire" "github.com/lightningnetwork/lnd/routing/route" + "github.com/stretchr/testify/require" ) const stepTimeout = 5 * time.Second @@ -58,6 +59,11 @@ type paymentLifecycleTestCase struct { // routes is the sequence of routes we will provide to the // router when it requests a new route. routes []*route.Route + + // paymentErr is the error we expect our payment to fail with. This + // should be nil for tests with paymentSuccess steps and non-nil for + // payments with paymentError steps. + paymentErr error } const ( @@ -274,7 +280,8 @@ func TestRouterPaymentStateMachine(t *testing.T) { routerFailPayment, paymentError, }, - routes: []*route.Route{rt}, + routes: []*route.Route{rt}, + paymentErr: channeldb.FailureReasonNoRoute, }, { // We expect the payment to fail immediately if we have @@ -286,7 +293,8 @@ func TestRouterPaymentStateMachine(t *testing.T) { routerFailPayment, paymentError, }, - routes: []*route.Route{}, + routes: []*route.Route{}, + paymentErr: channeldb.FailureReasonNoRoute, }, { // A normal payment flow, where we attempt to resend @@ -354,7 +362,8 @@ func TestRouterPaymentStateMachine(t *testing.T) { getPaymentResultSuccess, routerSettleAttempt, }, - routes: []*route.Route{rt}, + routes: []*route.Route{rt}, + paymentErr: ErrRouterShuttingDown, }, { // Tests that we are allowed to resend a payment after @@ -391,7 +400,8 @@ func TestRouterPaymentStateMachine(t *testing.T) { routerSettleAttempt, resentPaymentSuccess, }, - routes: []*route.Route{rt}, + routes: []*route.Route{rt}, + paymentErr: channeldb.FailureReasonNoRoute, }, // ===================================== @@ -576,6 +586,7 @@ func TestRouterPaymentStateMachine(t *testing.T) { routes: []*route.Route{ shard, shard, shard, shard, shard, shard, }, + paymentErr: channeldb.FailureReasonPaymentDetails, }, } @@ -871,11 +882,12 @@ func testPaymentLifecycle(t *testing.T, test paymentLifecycleTestCase, // In this state we expect to receive an error for the // original payment made. case paymentError: + require.Error(t, test.paymentErr, + "paymentError not set") + select { case err := <-paymentResult: - if err == nil { - t.Fatalf("expected error") - } + require.Equal(t, test.paymentErr, err) case <-time.After(stepTimeout): t.Fatalf("got no payment result") @@ -884,6 +896,8 @@ func testPaymentLifecycle(t *testing.T, test paymentLifecycleTestCase, // In this state we expect the original payment to // succeed. case paymentSuccess: + require.Nil(t, test.paymentErr) + select { case err := <-paymentResult: if err != nil { From a68155545c9a001d990dba35ae4139bfa6064ffe Mon Sep 17 00:00:00 2001 From: carla Date: Fri, 23 Apr 2021 08:39:42 +0200 Subject: [PATCH 07/13] routing/test: use half shard for single success case Update our single shard success case to use a route which splits the payment amount in half. This change still tests the case where reveal of the preimage counts as a success, even if we don't have the full amount. This change is made to cut down on potential races in this test case. While we are waiting for collectResultAsync to report a success, the payment lifecycle will continue trying to dispatch shards. In the case where we send 1/4 of the payment amount, we send 1 or 2 more shards, depending on how long collectAsync takes. Reducing this test to send 1/2 of the payment amount means that we will always only try one more shard before waiting for our shard. --- routing/payment_lifecycle_test.go | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/routing/payment_lifecycle_test.go b/routing/payment_lifecycle_test.go index 80c4c281..4fc5a015 100644 --- a/routing/payment_lifecycle_test.go +++ b/routing/payment_lifecycle_test.go @@ -190,6 +190,9 @@ func TestRouterPaymentStateMachine(t *testing.T) { t.Fatalf("unable to create route: %v", err) } + halfShard, err := createTestRoute(paymentAmt/2, testGraph.aliasMap) + require.NoError(t, err, "unable to create half route") + shard, err := createTestRoute(paymentAmt/4, testGraph.aliasMap) if err != nil { t.Fatalf("unable to create route: %v", err) @@ -501,10 +504,8 @@ func TestRouterPaymentStateMachine(t *testing.T) { }, }, { - // An MP payment scenario where 3 of the shards fail. - // However the last shard settle, which means we get - // the preimage and should consider the overall payment - // a success. + // An MP payment scenario where one of the shards fails, + // but we still receive a single success shard. name: "MP one shard success", steps: []string{ @@ -518,30 +519,18 @@ func TestRouterPaymentStateMachine(t *testing.T) { routerRegisterAttempt, sendToSwitchSuccess, - // shard 2 - routerRegisterAttempt, - sendToSwitchSuccess, - - // shard 3 - routerRegisterAttempt, - sendToSwitchSuccess, - - // 3 shards fail, and should be failed by the + // shard 0 fails, and should be failed by the // router. getPaymentResultTempFailure, - getPaymentResultTempFailure, - getPaymentResultTempFailure, - routerFailAttempt, - routerFailAttempt, routerFailAttempt, - // The fourth shard succeed against all odds, + // The second shard succeed against all odds, // making the overall payment succeed. getPaymentResultSuccess, routerSettleAttempt, paymentSuccess, }, - routes: []*route.Route{shard, shard, shard, shard}, + routes: []*route.Route{halfShard, halfShard}, }, { // An MP payment scenario a shard fail with a terminal From 125980afb7c8ef551831ebbf41a2f1b10f1935fe Mon Sep 17 00:00:00 2001 From: carla Date: Fri, 23 Apr 2021 08:39:43 +0200 Subject: [PATCH 08/13] routing/test: block on pathfinding in tests This commit adds a step to our payment lifecycle test to add control over when we find a path for our payment, This is required for testing race conditions around pathfinding completing and payment failures being reported. --- routing/mock_test.go | 17 ++++++++-- routing/payment_lifecycle_test.go | 52 ++++++++++++++++++++++++++++++- 2 files changed, 66 insertions(+), 3 deletions(-) diff --git a/routing/mock_test.go b/routing/mock_test.go index 2caf9336..30d5dedc 100644 --- a/routing/mock_test.go +++ b/routing/mock_test.go @@ -83,7 +83,8 @@ func (m *mockPaymentAttemptDispatcher) setPaymentResult( } type mockPaymentSessionSource struct { - routes []*route.Route + routes []*route.Route + routeRelease chan struct{} } var _ PaymentSessionSource = (*mockPaymentSessionSource)(nil) @@ -91,7 +92,10 @@ var _ PaymentSessionSource = (*mockPaymentSessionSource)(nil) func (m *mockPaymentSessionSource) NewPaymentSession( _ *LightningPayment) (PaymentSession, error) { - return &mockPaymentSession{m.routes}, nil + return &mockPaymentSession{ + routes: m.routes, + release: m.routeRelease, + }, nil } func (m *mockPaymentSessionSource) NewPaymentSessionForRoute( @@ -137,6 +141,11 @@ func (m *mockMissionControl) GetProbability(fromNode, toNode route.Vertex, type mockPaymentSession struct { routes []*route.Route + + // release is a channel that optionally blocks requesting a route + // from our mock payment channel. If this value is nil, we will just + // release the route automatically. + release chan struct{} } var _ PaymentSession = (*mockPaymentSession)(nil) @@ -144,6 +153,10 @@ var _ PaymentSession = (*mockPaymentSession)(nil) func (m *mockPaymentSession) RequestRoute(_, _ lnwire.MilliSatoshi, _, height uint32) (*route.Route, error) { + if m.release != nil { + m.release <- struct{}{} + } + if len(m.routes) == 0 { return nil, errNoPathFound } diff --git a/routing/payment_lifecycle_test.go b/routing/payment_lifecycle_test.go index 4fc5a015..036dc8ff 100644 --- a/routing/payment_lifecycle_test.go +++ b/routing/payment_lifecycle_test.go @@ -89,6 +89,10 @@ const ( // to call the Fail method on the control tower. routerFailPayment = "Router:fail-payment" + // routeRelease is a test step where we unblock pathfinding and + // allow it to respond to our test with a route. + routeRelease = "PaymentSession:release" + // sendToSwitchSuccess is a step where we expect the router to // call send the payment attempt to the switch, and we will // respond with a non-error, indicating that the payment @@ -205,6 +209,7 @@ func TestRouterPaymentStateMachine(t *testing.T) { steps: []string{ routerInitPayment, + routeRelease, routerRegisterAttempt, sendToSwitchSuccess, getPaymentResultSuccess, @@ -220,6 +225,7 @@ func TestRouterPaymentStateMachine(t *testing.T) { steps: []string{ routerInitPayment, + routeRelease, routerRegisterAttempt, sendToSwitchSuccess, @@ -228,6 +234,7 @@ func TestRouterPaymentStateMachine(t *testing.T) { routerFailAttempt, // The router should retry. + routeRelease, routerRegisterAttempt, sendToSwitchSuccess, @@ -246,6 +253,7 @@ func TestRouterPaymentStateMachine(t *testing.T) { steps: []string{ routerInitPayment, + routeRelease, routerRegisterAttempt, // Make the first sent attempt fail. @@ -253,6 +261,7 @@ func TestRouterPaymentStateMachine(t *testing.T) { routerFailAttempt, // The router should retry. + routeRelease, routerRegisterAttempt, sendToSwitchSuccess, @@ -271,6 +280,7 @@ func TestRouterPaymentStateMachine(t *testing.T) { steps: []string{ routerInitPayment, + routeRelease, routerRegisterAttempt, sendToSwitchSuccess, @@ -278,6 +288,8 @@ func TestRouterPaymentStateMachine(t *testing.T) { getPaymentResultTempFailure, routerFailAttempt, + routeRelease, + // Since there are no more routes to try, the // payment should fail. routerFailPayment, @@ -293,6 +305,7 @@ func TestRouterPaymentStateMachine(t *testing.T) { steps: []string{ routerInitPayment, + routeRelease, routerFailPayment, paymentError, }, @@ -308,6 +321,7 @@ func TestRouterPaymentStateMachine(t *testing.T) { steps: []string{ routerInitPayment, + routeRelease, routerRegisterAttempt, // Manually resend the payment, the router @@ -350,6 +364,7 @@ func TestRouterPaymentStateMachine(t *testing.T) { steps: []string{ routerInitPayment, + routeRelease, routerRegisterAttempt, sendToSwitchSuccess, @@ -375,6 +390,7 @@ func TestRouterPaymentStateMachine(t *testing.T) { steps: []string{ routerInitPayment, + routeRelease, routerRegisterAttempt, sendToSwitchSuccess, @@ -390,6 +406,7 @@ func TestRouterPaymentStateMachine(t *testing.T) { // Since we have no more routes to try, the // original payment should fail. + routeRelease, routerFailPayment, paymentError, @@ -397,6 +414,7 @@ func TestRouterPaymentStateMachine(t *testing.T) { // allowed, since the payment has failed. resendPayment, routerInitPayment, + routeRelease, routerRegisterAttempt, sendToSwitchSuccess, getPaymentResultSuccess, @@ -418,18 +436,22 @@ func TestRouterPaymentStateMachine(t *testing.T) { routerInitPayment, // shard 0 + routeRelease, routerRegisterAttempt, sendToSwitchSuccess, // shard 1 + routeRelease, routerRegisterAttempt, sendToSwitchSuccess, // shard 2 + routeRelease, routerRegisterAttempt, sendToSwitchSuccess, // shard 3 + routeRelease, routerRegisterAttempt, sendToSwitchSuccess, @@ -460,18 +482,22 @@ func TestRouterPaymentStateMachine(t *testing.T) { routerInitPayment, // shard 0 + routeRelease, routerRegisterAttempt, sendToSwitchSuccess, // shard 1 + routeRelease, routerRegisterAttempt, sendToSwitchSuccess, // shard 2 + routeRelease, routerRegisterAttempt, sendToSwitchSuccess, // shard 3 + routeRelease, routerRegisterAttempt, sendToSwitchSuccess, @@ -481,8 +507,10 @@ func TestRouterPaymentStateMachine(t *testing.T) { routerFailAttempt, routerFailAttempt, + routeRelease, routerRegisterAttempt, sendToSwitchSuccess, + routeRelease, routerRegisterAttempt, sendToSwitchSuccess, @@ -512,10 +540,12 @@ func TestRouterPaymentStateMachine(t *testing.T) { routerInitPayment, // shard 0 + routeRelease, routerRegisterAttempt, sendToSwitchSuccess, // shard 1 + routeRelease, routerRegisterAttempt, sendToSwitchSuccess, @@ -524,6 +554,10 @@ func TestRouterPaymentStateMachine(t *testing.T) { getPaymentResultTempFailure, routerFailAttempt, + // We will try one more shard because we haven't + // sent the full payment amount. + routeRelease, + // The second shard succeed against all odds, // making the overall payment succeed. getPaymentResultSuccess, @@ -541,18 +575,22 @@ func TestRouterPaymentStateMachine(t *testing.T) { routerInitPayment, // shard 0 + routeRelease, routerRegisterAttempt, sendToSwitchSuccess, // shard 1 + routeRelease, routerRegisterAttempt, sendToSwitchSuccess, // shard 2 + routeRelease, routerRegisterAttempt, sendToSwitchSuccess, // shard 3 + routeRelease, routerRegisterAttempt, sendToSwitchSuccess, @@ -697,8 +735,12 @@ func testPaymentLifecycle(t *testing.T, test paymentLifecycleTestCase, PaymentHash: payHash, } + // Setup our payment session source to block on release of + // routes. + routeChan := make(chan struct{}) router.cfg.SessionSource = &mockPaymentSessionSource{ - routes: test.routes, + routes: test.routes, + routeRelease: routeChan, } router.cfg.MissionControl = &mockMissionControl{} @@ -729,6 +771,14 @@ func testPaymentLifecycle(t *testing.T, test paymentLifecycleTestCase, t.Fatalf("expected non-nil CreationInfo") } + case routeRelease: + select { + case <-routeChan: + + case <-time.After(stepTimeout): + t.Fatalf("no route requested") + } + // In this step we expect the router to make a call to // register a new attempt with the ControlTower. case routerRegisterAttempt: From 80451afe48337b44d43bd0bfac7ee52babd0f1a2 Mon Sep 17 00:00:00 2001 From: carla Date: Fri, 23 Apr 2021 08:39:44 +0200 Subject: [PATCH 09/13] routing/test: add test to demonstrate stuck payment, single shard This commit adds a test which demonstrates that payments can get stuck if we receive a payment failure while we're pathfinding for another shard, then try to dispatch a shard after we've recorded a permanent failure. It also updates our mock to only consider payments with no in-flight htlcs as in-flight, to more closely represent our actual RegisterAttempt. --- routing/mock_test.go | 36 +++++++++++++++++++++++-------- routing/payment_lifecycle_test.go | 35 ++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 9 deletions(-) diff --git a/routing/mock_test.go b/routing/mock_test.go index 30d5dedc..cf828baf 100644 --- a/routing/mock_test.go +++ b/routing/mock_test.go @@ -302,20 +302,38 @@ func (m *mockControlTower) RegisterAttempt(phash lntypes.Hash, m.Lock() defer m.Unlock() - // Cannot register attempts for successful or failed payments. - if _, ok := m.successful[phash]; ok { - return channeldb.ErrPaymentAlreadySucceeded - } - - if _, ok := m.failed[phash]; ok { - return channeldb.ErrPaymentAlreadyFailed - } - + // Lookup payment. p, ok := m.payments[phash] if !ok { return channeldb.ErrPaymentNotInitiated } + var inFlight bool + for _, a := range p.attempts { + if a.Settle != nil { + continue + } + + if a.Failure != nil { + continue + } + + inFlight = true + } + + // Cannot register attempts for successful or failed payments. + _, settled := m.successful[phash] + _, failed := m.failed[phash] + + if settled && !inFlight { + return channeldb.ErrPaymentAlreadySucceeded + } + + if failed && !inFlight { + return channeldb.ErrPaymentAlreadyFailed + } + + // Add attempt to payment. p.attempts = append(p.attempts, channeldb.HTLCAttempt{ HTLCAttemptInfo: *a, }) diff --git a/routing/payment_lifecycle_test.go b/routing/payment_lifecycle_test.go index 036dc8ff..e8aa8c51 100644 --- a/routing/payment_lifecycle_test.go +++ b/routing/payment_lifecycle_test.go @@ -615,6 +615,41 @@ func TestRouterPaymentStateMachine(t *testing.T) { }, paymentErr: channeldb.FailureReasonPaymentDetails, }, + { + // A MP payment scenario when our path finding returns + // after we've just received a terminal failure, and + // demonstrates a bug where the payment will return with + // and "unexpected" ErrPaymentAlreadyFailed rather than + // failing with a permanent error. This results in the + // payment getting stuck. + name: "MP path found after failure", + + steps: []string{ + routerInitPayment, + + // shard 0 + routeRelease, + routerRegisterAttempt, + sendToSwitchSuccess, + + // The first shard fail with a terminal error. + getPaymentResultTerminalFailure, + routerFailAttempt, + routerFailPayment, + + // shard 1 fails because we've had a terminal + // failure. + routeRelease, + routerRegisterAttempt, + + // Payment fails. + paymentError, + }, + routes: []*route.Route{ + shard, shard, + }, + paymentErr: channeldb.ErrPaymentAlreadyFailed, + }, } for _, test := range tests { From 12136a97a905ab57e9de4a8c6a1c5662af2513f4 Mon Sep 17 00:00:00 2001 From: carla Date: Fri, 23 Apr 2021 08:39:45 +0200 Subject: [PATCH 10/13] routing/test: add test for stuck payment with in-flight htlcs Add an additional stuck-payment case, where our payment gets a terminal error while it has other htlcs in-flight, and a shard fails with ErrTerminalPayment. This payment also falls in our class of expected errors, but is not currently handled. The mock is updated accordingly, using the same ordering as in our real RegisterAttempt implementation. --- routing/mock_test.go | 4 +++ routing/payment_lifecycle_test.go | 50 +++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/routing/mock_test.go b/routing/mock_test.go index cf828baf..0d0b7a5b 100644 --- a/routing/mock_test.go +++ b/routing/mock_test.go @@ -333,6 +333,10 @@ func (m *mockControlTower) RegisterAttempt(phash lntypes.Hash, return channeldb.ErrPaymentAlreadyFailed } + if settled || failed { + return channeldb.ErrPaymentTerminal + } + // Add attempt to payment. p.attempts = append(p.attempts, channeldb.HTLCAttempt{ HTLCAttemptInfo: *a, diff --git a/routing/payment_lifecycle_test.go b/routing/payment_lifecycle_test.go index e8aa8c51..01383d21 100644 --- a/routing/payment_lifecycle_test.go +++ b/routing/payment_lifecycle_test.go @@ -650,6 +650,56 @@ func TestRouterPaymentStateMachine(t *testing.T) { }, paymentErr: channeldb.ErrPaymentAlreadyFailed, }, + { + // A MP payment scenario when our path finding returns + // after we've just received a terminal failure, and + // we have another shard still in flight. + name: "MP shard in flight after terminal", + + steps: []string{ + routerInitPayment, + + // shard 0 + routeRelease, + routerRegisterAttempt, + sendToSwitchSuccess, + + // shard 1 + routeRelease, + routerRegisterAttempt, + sendToSwitchSuccess, + + // shard 2 + routeRelease, + routerRegisterAttempt, + sendToSwitchSuccess, + + // We find a path for another shard. + routeRelease, + + // shard 0 fails with a terminal error. + getPaymentResultTerminalFailure, + routerFailAttempt, + routerFailPayment, + + // We try to register our final shard after + // processing a terminal failure. + routerRegisterAttempt, + + // Our in-flight shards fail. + getPaymentResultTempFailure, + getPaymentResultTempFailure, + routerFailAttempt, + routerFailAttempt, + + // Payment fails. + paymentError, + }, + routes: []*route.Route{ + shard, shard, shard, shard, + }, + paymentErr: channeldb.ErrPaymentTerminal, + }, } for _, test := range tests { From 58d95be4dd7e2b39e79f675bda4c14bd225fc477 Mon Sep 17 00:00:00 2001 From: carla Date: Fri, 23 Apr 2021 08:39:45 +0200 Subject: [PATCH 11/13] multi: change RegisterAttempt error checking order Move our more generic terminal check forward so that we only need to handle a single class of expected errors. This change is mirrored in our mock, and our reproducing tests are updated to assert that this move catches both classes of errors we get. --- channeldb/payment_control.go | 13 +++++++------ channeldb/payment_control_test.go | 6 +----- routing/mock_test.go | 8 ++++---- routing/payment_lifecycle_test.go | 4 ++-- 4 files changed, 14 insertions(+), 17 deletions(-) diff --git a/channeldb/payment_control.go b/channeldb/payment_control.go index 9ed5e639..c820778c 100644 --- a/channeldb/payment_control.go +++ b/channeldb/payment_control.go @@ -290,18 +290,19 @@ func (p *PaymentControl) RegisterAttempt(paymentHash lntypes.Hash, return err } - // Ensure the payment is in-flight. - if err := ensureInFlight(p); err != nil { - return err - } - // We cannot register a new attempt if the payment already has - // reached a terminal condition: + // reached a terminal condition. We check this before + // ensureInFlight because it is a more general check. settle, fail := p.TerminalInfo() if settle != nil || fail != nil { return ErrPaymentTerminal } + // Ensure the payment is in-flight. + if err := ensureInFlight(p); err != nil { + return err + } + // Make sure any existing shards match the new one with regards // to MPP options. mpp := attempt.Route.FinalHop().MPP diff --git a/channeldb/payment_control_test.go b/channeldb/payment_control_test.go index e13f9d72..5bf6a05d 100644 --- a/channeldb/payment_control_test.go +++ b/channeldb/payment_control_test.go @@ -1013,19 +1013,15 @@ func TestPaymentControlMultiShard(t *testing.T) { // up in the Succeeded state. If both failed the payment should // also be Failed at this poinnt. finalStatus := StatusFailed - expRegErr := ErrPaymentAlreadyFailed if test.settleFirst || test.settleLast { finalStatus = StatusSucceeded - expRegErr = ErrPaymentAlreadySucceeded } assertPaymentStatus(t, pControl, info.PaymentHash, finalStatus) // Finally assert we cannot register more attempts. _, err = pControl.RegisterAttempt(info.PaymentHash, &b) - if err != expRegErr { - t.Fatalf("expected error %v, got: %v", expRegErr, err) - } + require.Equal(t, ErrPaymentTerminal, err) } for _, test := range tests { diff --git a/routing/mock_test.go b/routing/mock_test.go index 0d0b7a5b..a477fb10 100644 --- a/routing/mock_test.go +++ b/routing/mock_test.go @@ -325,6 +325,10 @@ func (m *mockControlTower) RegisterAttempt(phash lntypes.Hash, _, settled := m.successful[phash] _, failed := m.failed[phash] + if settled || failed { + return channeldb.ErrPaymentTerminal + } + if settled && !inFlight { return channeldb.ErrPaymentAlreadySucceeded } @@ -333,10 +337,6 @@ func (m *mockControlTower) RegisterAttempt(phash lntypes.Hash, return channeldb.ErrPaymentAlreadyFailed } - if settled || failed { - return channeldb.ErrPaymentTerminal - } - // Add attempt to payment. p.attempts = append(p.attempts, channeldb.HTLCAttempt{ HTLCAttemptInfo: *a, diff --git a/routing/payment_lifecycle_test.go b/routing/payment_lifecycle_test.go index 01383d21..f9b6bbb6 100644 --- a/routing/payment_lifecycle_test.go +++ b/routing/payment_lifecycle_test.go @@ -619,7 +619,7 @@ func TestRouterPaymentStateMachine(t *testing.T) { // A MP payment scenario when our path finding returns // after we've just received a terminal failure, and // demonstrates a bug where the payment will return with - // and "unexpected" ErrPaymentAlreadyFailed rather than + // and "unexpected" ErrPaymentTerminal rather than // failing with a permanent error. This results in the // payment getting stuck. name: "MP path found after failure", @@ -648,7 +648,7 @@ func TestRouterPaymentStateMachine(t *testing.T) { routes: []*route.Route{ shard, shard, }, - paymentErr: channeldb.ErrPaymentAlreadyFailed, + paymentErr: channeldb.ErrPaymentTerminal, }, { // A MP payment scenario when our path finding returns From a63640c488d226aebca03262efb05830971c10e3 Mon Sep 17 00:00:00 2001 From: carla Date: Fri, 23 Apr 2021 08:39:46 +0200 Subject: [PATCH 12/13] routing: account for payment terminal errors If we have processed a terminal state while we're pathfinding for another shard, the payment loop should not error out on ErrPaymentTerminal. Instead, it would wait for our shards to complete then cleanly exit. --- routing/payment_lifecycle.go | 13 ++++++++++++- routing/payment_lifecycle_test.go | 10 ++++------ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/routing/payment_lifecycle.go b/routing/payment_lifecycle.go index 0e61dec1..766d3bc7 100644 --- a/routing/payment_lifecycle.go +++ b/routing/payment_lifecycle.go @@ -247,7 +247,18 @@ func (p *paymentLifecycle) resumePayment() ([32]byte, *route.Route, error) { // We found a route to try, launch a new shard. attempt, outcome, err := shardHandler.launchShard(rt) - if err != nil { + switch { + // We may get a terminal error if we've processed a shard with + // a terminal state (settled or permanent failure), while we + // were pathfinding. We know we're in a terminal state here, + // so we can continue and wait for our last shards to return. + case err == channeldb.ErrPaymentTerminal: + log.Infof("Payment: %v in terminal state, abandoning "+ + "shard", p.paymentHash) + + continue + + case err != nil: return [32]byte{}, nil, err } diff --git a/routing/payment_lifecycle_test.go b/routing/payment_lifecycle_test.go index f9b6bbb6..4f928057 100644 --- a/routing/payment_lifecycle_test.go +++ b/routing/payment_lifecycle_test.go @@ -618,10 +618,8 @@ func TestRouterPaymentStateMachine(t *testing.T) { { // A MP payment scenario when our path finding returns // after we've just received a terminal failure, and - // demonstrates a bug where the payment will return with - // and "unexpected" ErrPaymentTerminal rather than - // failing with a permanent error. This results in the - // payment getting stuck. + // attempts to dispatch a new shard. Testing that we + // correctly abandon the shard and conclude the payment. name: "MP path found after failure", steps: []string{ @@ -648,7 +646,7 @@ func TestRouterPaymentStateMachine(t *testing.T) { routes: []*route.Route{ shard, shard, }, - paymentErr: channeldb.ErrPaymentTerminal, + paymentErr: channeldb.FailureReasonPaymentDetails, }, { // A MP payment scenario when our path finding returns @@ -698,7 +696,7 @@ func TestRouterPaymentStateMachine(t *testing.T) { routes: []*route.Route{ shard, shard, shard, shard, }, - paymentErr: channeldb.ErrPaymentTerminal, + paymentErr: channeldb.FailureReasonPaymentDetails, }, } From b43ddfdb1192811911d405af9202a33653020854 Mon Sep 17 00:00:00 2001 From: carla Date: Fri, 23 Apr 2021 08:51:07 +0200 Subject: [PATCH 13/13] routing: label payment lifecycle loop --- routing/payment_lifecycle.go | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/routing/payment_lifecycle.go b/routing/payment_lifecycle.go index 766d3bc7..be9ade0d 100644 --- a/routing/payment_lifecycle.go +++ b/routing/payment_lifecycle.go @@ -115,6 +115,7 @@ func (p *paymentLifecycle) resumePayment() ([32]byte, *route.Route, error) { // We'll continue until either our payment succeeds, or we encounter a // critical error during path finding. +lifecycle: for { // Start by quickly checking if there are any outcomes already // available to handle before we reevaluate our state. @@ -171,7 +172,7 @@ func (p *paymentLifecycle) resumePayment() ([32]byte, *route.Route, error) { if err := shardHandler.waitForShard(); err != nil { return [32]byte{}, nil, err } - continue + continue lifecycle } // Before we attempt any new shard, we'll check to see if @@ -195,7 +196,7 @@ func (p *paymentLifecycle) resumePayment() ([32]byte, *route.Route, error) { return [32]byte{}, nil, saveErr } - continue + continue lifecycle case <-p.router.quit: return [32]byte{}, nil, ErrRouterShuttingDown @@ -234,7 +235,7 @@ func (p *paymentLifecycle) resumePayment() ([32]byte, *route.Route, error) { return [32]byte{}, nil, saveErr } - continue + continue lifecycle } // We still have active shards, we'll wait for an @@ -242,7 +243,7 @@ func (p *paymentLifecycle) resumePayment() ([32]byte, *route.Route, error) { if err := shardHandler.waitForShard(); err != nil { return [32]byte{}, nil, err } - continue + continue lifecycle } // We found a route to try, launch a new shard. @@ -256,7 +257,7 @@ func (p *paymentLifecycle) resumePayment() ([32]byte, *route.Route, error) { log.Infof("Payment: %v in terminal state, abandoning "+ "shard", p.paymentHash) - continue + continue lifecycle case err != nil: return [32]byte{}, nil, err @@ -281,7 +282,7 @@ func (p *paymentLifecycle) resumePayment() ([32]byte, *route.Route, error) { // Error was handled successfully, continue to make a // new attempt. - continue + continue lifecycle } // Now that the shard was successfully sent, launch a go