From 22e21da370e46a5d25fa7086c63ead9137700ba3 Mon Sep 17 00:00:00 2001 From: "Johan T. Halseth" Date: Thu, 12 Jul 2018 11:02:51 +0200 Subject: [PATCH 01/18] htlcswitch tests: add missing OnChannelFailure to test link configs --- htlcswitch/link_test.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/htlcswitch/link_test.go b/htlcswitch/link_test.go index c518e78c..76fd4c50 100644 --- a/htlcswitch/link_test.go +++ b/htlcswitch/link_test.go @@ -1489,7 +1489,8 @@ func newSingleLinkTestHarness(chanAmt, chanReserve btcutil.Amount) ( }, FetchLastChannelUpdate: mockGetChanUpdateMessage, PreimageCache: pCache, - OnChannelFailure: func(lnwire.ChannelID, lnwire.ShortChannelID, LinkFailureError) { + OnChannelFailure: func(lnwire.ChannelID, + lnwire.ShortChannelID, LinkFailureError) { }, UpdateContractSignals: func(*contractcourt.ContractSignals) error { return nil @@ -3879,6 +3880,9 @@ func restartLink(aliceChannel *lnwallet.LightningChannel, aliceSwitch *Switch, }, FetchLastChannelUpdate: mockGetChanUpdateMessage, PreimageCache: pCache, + OnChannelFailure: func(lnwire.ChannelID, + lnwire.ShortChannelID, LinkFailureError) { + }, UpdateContractSignals: func(*contractcourt.ContractSignals) error { return nil }, From f8751350bc3af4a4479c0e0329f9094bf1e9783a Mon Sep 17 00:00:00 2001 From: "Johan T. Halseth" Date: Thu, 12 Jul 2018 11:02:51 +0200 Subject: [PATCH 02/18] lnd_test: set --nolisten for node being cheated In this commit we modify the integration tests slightly, by setting the parties that gets breached during the breach tests to --nolisten. We do this to ensure that once the data protection logic is in place, they nodes won't automatically connect, detect the state desync and recover before we are able to trigger the breach. --- lnd_test.go | 157 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 103 insertions(+), 54 deletions(-) diff --git a/lnd_test.go b/lnd_test.go index dae99f56..d1f76b5b 100644 --- a/lnd_test.go +++ b/lnd_test.go @@ -5084,7 +5084,7 @@ func testGarbageCollectLinkNodes(net *lntest.NetworkHarness, t *harnessTest) { closeChannelAndAssert(ctxt, t, net, net.Alice, persistentChanPoint, false) } -// testRevokedCloseRetribution tests that Alice is able carry out +// testRevokedCloseRetribution tests that Carol is able carry out // retribution in the event that she fails immediately after detecting Bob's // breach txn in the mempool. func testRevokedCloseRetribution(net *lntest.NetworkHarness, t *harnessTest) { @@ -5096,16 +5096,41 @@ func testRevokedCloseRetribution(net *lntest.NetworkHarness, t *harnessTest) { numInvoices = 6 ) - // In order to test Alice's response to an uncooperative channel + // Carol will be the breached party. We set --nolisten to ensure Bob + // won't be able to connect to her and trigger the channel data + // protection logic automatically. + carol, err := net.NewNode( + "Carol", + []string{"--debughtlc", "--hodl.exit-settle", "--nolisten"}, + ) + if err != nil { + t.Fatalf("unable to create new carol node: %v", err) + } + defer shutdownAndAssert(net, t, carol) + + // We must let Bob communicate with Carol before they are able to open + // channel, so we connect Bob and Carol, + if err := net.ConnectNodes(ctxb, carol, net.Bob); err != nil { + t.Fatalf("unable to connect dave to carol: %v", err) + } + + // Before we make a channel, we'll load up Carol with some coins sent + // directly from the miner. + err = net.SendCoins(ctxb, btcutil.SatoshiPerBitcoin, carol) + if err != nil { + t.Fatalf("unable to send coins to carol: %v", err) + } + + // In order to test Carol's response to an uncooperative channel // closure by Bob, we'll first open up a channel between them with a // 0.5 BTC value. ctxt, _ := context.WithTimeout(ctxb, timeout) chanPoint := openChannelAndAssert( - ctxt, t, net, net.Alice, net.Bob, chanAmt, 0, false, + ctxt, t, net, carol, net.Bob, chanAmt, 0, false, ) // With the channel open, we'll create a few invoices for Bob that - // Alice will pay to in order to advance the state of the channel. + // Carol will pay to in order to advance the state of the channel. bobPayReqs := make([]string, numInvoices) for i := 0; i < numInvoices; i++ { preimage := bytes.Repeat([]byte{byte(255 - i)}, 32) @@ -5138,18 +5163,18 @@ func testRevokedCloseRetribution(net *lntest.NetworkHarness, t *harnessTest) { return bobChannelInfo.Channels[0], nil } - // Wait for Alice to receive the channel edge from the funding manager. + // Wait for Carol to receive the channel edge from the funding manager. ctxt, _ = context.WithTimeout(ctxb, timeout) - err := net.Alice.WaitForNetworkChannelOpen(ctxt, chanPoint) + err = carol.WaitForNetworkChannelOpen(ctxt, chanPoint) if err != nil { - t.Fatalf("alice didn't see the alice->bob channel before "+ + t.Fatalf("carol didn't see the carol->bob channel before "+ "timeout: %v", err) } - // Send payments from Alice to Bob using 3 of Bob's payment hashes + // Send payments from Carol to Bob using 3 of Bob's payment hashes // generated above. ctxt, _ = context.WithTimeout(ctxb, timeout) - err = completePaymentRequests(ctxt, net.Alice, bobPayReqs[:numInvoices/2], + err = completePaymentRequests(ctxt, carol, bobPayReqs[:numInvoices/2], true) if err != nil { t.Fatalf("unable to send payments: %v", err) @@ -5199,10 +5224,10 @@ func testRevokedCloseRetribution(net *lntest.NetworkHarness, t *harnessTest) { t.Fatalf("unable to copy database files: %v", err) } - // Finally, send payments from Alice to Bob, consuming Bob's remaining + // Finally, send payments from Carol to Bob, consuming Bob's remaining // payment hashes. ctxt, _ = context.WithTimeout(ctxb, timeout) - err = completePaymentRequests(ctxt, net.Alice, bobPayReqs[numInvoices/2:], + err = completePaymentRequests(ctxt, carol, bobPayReqs[numInvoices/2:], true) if err != nil { t.Fatalf("unable to send payments: %v", err) @@ -5236,7 +5261,7 @@ func testRevokedCloseRetribution(net *lntest.NetworkHarness, t *harnessTest) { // Now force Bob to execute a *force* channel closure by unilaterally // broadcasting his current channel state. This is actually the // commitment transaction of a prior *revoked* state, so he'll soon - // feel the wrath of Alice's retribution. + // feel the wrath of Carol's retribution. var closeUpdates lnrpc.Lightning_CloseChannelClient force := true err = lntest.WaitPredicate(func() bool { @@ -5253,19 +5278,19 @@ func testRevokedCloseRetribution(net *lntest.NetworkHarness, t *harnessTest) { } // Wait for Bob's breach transaction to show up in the mempool to ensure - // that Alice's node has started waiting for confirmations. + // that Carol's node has started waiting for confirmations. _, err = waitForTxInMempool(net.Miner.Node, 5*time.Second) if err != nil { t.Fatalf("unable to find Bob's breach tx in mempool: %v", err) } - // Here, Alice sees Bob's breach transaction in the mempool, but is waiting - // for it to confirm before continuing her retribution. We restart Alice to + // Here, Carol sees Bob's breach transaction in the mempool, but is waiting + // for it to confirm before continuing her retribution. We restart Carol to // ensure that she is persisting her retribution state and continues // watching for the breach transaction to confirm even after her node // restarts. - if err := net.RestartNode(net.Alice, nil); err != nil { - t.Fatalf("unable to restart Alice's node: %v", err) + if err := net.RestartNode(carol, nil); err != nil { + t.Fatalf("unable to restart Carol's node: %v", err) } // Finally, generate a single block, wait for the final close status @@ -5279,12 +5304,12 @@ func testRevokedCloseRetribution(net *lntest.NetworkHarness, t *harnessTest) { } assertTxInBlock(t, block, breachTXID) - // Query the mempool for Alice's justice transaction, this should be + // Query the mempool for Carol's justice transaction, this should be // broadcast as Bob's contract breaching transaction gets confirmed // above. justiceTXID, err := waitForTxInMempool(net.Miner.Node, 5*time.Second) if err != nil { - t.Fatalf("unable to find Alice's justice tx in mempool: %v", err) + t.Fatalf("unable to find Carol's justice tx in mempool: %v", err) } time.Sleep(100 * time.Millisecond) @@ -5302,16 +5327,16 @@ func testRevokedCloseRetribution(net *lntest.NetworkHarness, t *harnessTest) { } } - // We restart Alice here to ensure that she persists her retribution state + // We restart Carol here to ensure that she persists her retribution state // and successfully continues exacting retribution after restarting. At - // this point, Alice has broadcast the justice transaction, but it hasn't - // been confirmed yet; when Alice restarts, she should start waiting for + // this point, Carol has broadcast the justice transaction, but it hasn't + // been confirmed yet; when Carol restarts, she should start waiting for // the justice transaction to confirm again. - if err := net.RestartNode(net.Alice, nil); err != nil { - t.Fatalf("unable to restart Alice's node: %v", err) + if err := net.RestartNode(carol, nil); err != nil { + t.Fatalf("unable to restart Carol's node: %v", err) } - // Now mine a block, this transaction should include Alice's justice + // Now mine a block, this transaction should include Carol's justice // transaction which was just accepted into the mempool. block = mineBlocks(t, net, 1)[0] @@ -5325,10 +5350,10 @@ func testRevokedCloseRetribution(net *lntest.NetworkHarness, t *harnessTest) { t.Fatalf("justice tx wasn't mined") } - assertNodeNumChannels(t, ctxb, net.Alice, 0) + assertNodeNumChannels(t, ctxb, carol, 0) } -// testRevokedCloseRetributionZeroValueRemoteOutput tests that Alice is able +// testRevokedCloseRetributionZeroValueRemoteOutput tests that Dave is able // carry out retribution in the event that she fails in state where the remote // commitment output has zero-value. func testRevokedCloseRetributionZeroValueRemoteOutput(net *lntest.NetworkHarness, @@ -5350,22 +5375,41 @@ func testRevokedCloseRetributionZeroValueRemoteOutput(net *lntest.NetworkHarness } defer shutdownAndAssert(net, t, carol) - // We must let Alice have an open channel before she can send a node + // Dave will be the breached party. We set --nolisten to ensure Carol + // won't be able to connect to him and trigger the channel data + // protection logic automatically. + dave, err := net.NewNode( + "Dave", + []string{"--debughtlc", "--hodl.exit-settle", "--nolisten"}, + ) + if err != nil { + t.Fatalf("unable to create new node: %v", err) + } + defer shutdownAndAssert(net, t, dave) + + // We must let Dave have an open channel before she can send a node // announcement, so we open a channel with Carol, - if err := net.ConnectNodes(ctxb, net.Alice, carol); err != nil { - t.Fatalf("unable to connect alice to carol: %v", err) + if err := net.ConnectNodes(ctxb, dave, carol); err != nil { + t.Fatalf("unable to connect dave to carol: %v", err) } - // In order to test Alice's response to an uncooperative channel + // Before we make a channel, we'll load up Dave with some coins sent + // directly from the miner. + err = net.SendCoins(ctxb, btcutil.SatoshiPerBitcoin, dave) + if err != nil { + t.Fatalf("unable to send coins to dave: %v", err) + } + + // In order to test Dave's response to an uncooperative channel // closure by Carol, we'll first open up a channel between them with a // 0.5 BTC value. ctxt, _ := context.WithTimeout(ctxb, timeout) chanPoint := openChannelAndAssert( - ctxt, t, net, net.Alice, carol, chanAmt, 0, false, + ctxt, t, net, dave, carol, chanAmt, 0, false, ) // With the channel open, we'll create a few invoices for Carol that - // Alice will pay to in order to advance the state of the channel. + // Dave will pay to in order to advance the state of the channel. carolPayReqs := make([]string, numInvoices) for i := 0; i < numInvoices; i++ { preimage := bytes.Repeat([]byte{byte(192 - i)}, 32) @@ -5398,11 +5442,11 @@ func testRevokedCloseRetributionZeroValueRemoteOutput(net *lntest.NetworkHarness return carolChannelInfo.Channels[0], nil } - // Wait for Alice to receive the channel edge from the funding manager. + // Wait for Dave to receive the channel edge from the funding manager. ctxt, _ = context.WithTimeout(ctxb, timeout) - err = net.Alice.WaitForNetworkChannelOpen(ctxt, chanPoint) + err = dave.WaitForNetworkChannelOpen(ctxt, chanPoint) if err != nil { - t.Fatalf("alice didn't see the alice->carol channel before "+ + t.Fatalf("dave didn't see the dave->carol channel before "+ "timeout: %v", err) } @@ -5438,9 +5482,9 @@ func testRevokedCloseRetributionZeroValueRemoteOutput(net *lntest.NetworkHarness t.Fatalf("unable to copy database files: %v", err) } - // Finally, send payments from Alice to Carol, consuming Carol's remaining + // Finally, send payments from Dave to Carol, consuming Carol's remaining // payment hashes. - err = completePaymentRequests(ctxb, net.Alice, carolPayReqs, false) + err = completePaymentRequests(ctxb, dave, carolPayReqs, false) if err != nil { t.Fatalf("unable to send payments: %v", err) } @@ -5473,7 +5517,7 @@ func testRevokedCloseRetributionZeroValueRemoteOutput(net *lntest.NetworkHarness // Now force Carol to execute a *force* channel closure by unilaterally // broadcasting his current channel state. This is actually the // commitment transaction of a prior *revoked* state, so he'll soon - // feel the wrath of Alice's retribution. + // feel the wrath of Dave's retribution. var ( closeUpdates lnrpc.Lightning_CloseChannelClient closeTxId *chainhash.Hash @@ -5507,11 +5551,11 @@ func testRevokedCloseRetributionZeroValueRemoteOutput(net *lntest.NetworkHarness // block. block := mineBlocks(t, net, 1)[0] - // Here, Alice receives a confirmation of Carol's breach transaction. - // We restart Alice to ensure that she is persisting her retribution + // Here, Dave receives a confirmation of Carol's breach transaction. + // We restart Dave to ensure that she is persisting her retribution // state and continues exacting justice after her node restarts. - if err := net.RestartNode(net.Alice, nil); err != nil { - t.Fatalf("unable to stop Alice's node: %v", err) + if err := net.RestartNode(dave, nil); err != nil { + t.Fatalf("unable to stop Dave's node: %v", err) } breachTXID, err := net.WaitForChannelClose(ctxb, closeUpdates) @@ -5520,12 +5564,12 @@ func testRevokedCloseRetributionZeroValueRemoteOutput(net *lntest.NetworkHarness } assertTxInBlock(t, block, breachTXID) - // Query the mempool for Alice's justice transaction, this should be + // Query the mempool for Dave's justice transaction, this should be // broadcast as Carol's contract breaching transaction gets confirmed // above. justiceTXID, err := waitForTxInMempool(net.Miner.Node, 15*time.Second) if err != nil { - t.Fatalf("unable to find Alice's justice tx in mempool: %v", + t.Fatalf("unable to find Dave's justice tx in mempool: %v", err) } time.Sleep(100 * time.Millisecond) @@ -5544,16 +5588,16 @@ func testRevokedCloseRetributionZeroValueRemoteOutput(net *lntest.NetworkHarness } } - // We restart Alice here to ensure that she persists her retribution state + // We restart Dave here to ensure that he persists her retribution state // and successfully continues exacting retribution after restarting. At - // this point, Alice has broadcast the justice transaction, but it hasn't - // been confirmed yet; when Alice restarts, she should start waiting for + // this point, Dave has broadcast the justice transaction, but it hasn't + // been confirmed yet; when Dave restarts, she should start waiting for // the justice transaction to confirm again. - if err := net.RestartNode(net.Alice, nil); err != nil { - t.Fatalf("unable to restart Alice's node: %v", err) + if err := net.RestartNode(dave, nil); err != nil { + t.Fatalf("unable to restart Dave's node: %v", err) } - // Now mine a block, this transaction should include Alice's justice + // Now mine a block, this transaction should include Dave's justice // transaction which was just accepted into the mempool. block = mineBlocks(t, net, 1)[0] @@ -5567,7 +5611,7 @@ func testRevokedCloseRetributionZeroValueRemoteOutput(net *lntest.NetworkHarness t.Fatalf("justice tx wasn't mined") } - assertNodeNumChannels(t, ctxb, net.Alice, 0) + assertNodeNumChannels(t, ctxb, dave, 0) } // testRevokedCloseRetributionRemoteHodl tests that Dave properly responds to a @@ -5596,8 +5640,13 @@ func testRevokedCloseRetributionRemoteHodl(net *lntest.NetworkHarness, // We'll also create a new node Dave, who will have a channel with // Carol, and also use similar settings so we can broadcast a commit - // with active HTLCs. - dave, err := net.NewNode("Dave", []string{"--debughtlc", "--hodl.exit-settle"}) + // with active HTLCs. Dave will be the breached party. We set + // --nolisten to ensure Carol won't be able to connect to him and + // trigger the channel data protection logic automatically. + dave, err := net.NewNode( + "Dave", + []string{"--debughtlc", "--hodl.exit-settle", "--nolisten"}, + ) if err != nil { t.Fatalf("unable to create new dave node: %v", err) } From 06ceba429f3377d3bc6eadeab137d096603e0909 Mon Sep 17 00:00:00 2001 From: "Johan T. Halseth" Date: Thu, 12 Jul 2018 11:02:52 +0200 Subject: [PATCH 03/18] lnwallet/channel: make NewUnilateralCloseSummary take commitPoint --- lnwallet/channel.go | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/lnwallet/channel.go b/lnwallet/channel.go index 4f41166e..657eb03a 100644 --- a/lnwallet/channel.go +++ b/lnwallet/channel.go @@ -4850,24 +4850,16 @@ type UnilateralCloseSummary struct { // NewUnilateralCloseSummary creates a new summary that provides the caller // with all the information required to claim all funds on chain in the event -// that the remote party broadcasts their commitment. If the -// remotePendingCommit value is set to true, then we'll use the next (second) -// unrevoked commitment point to construct the summary. Otherwise, we assume -// that the remote party broadcast the lower of their two possible commits. +// that the remote party broadcasts their commitment. The commitPoint argument +// should be set to the per_commitment_point corresponding to the spending +// commitment. func NewUnilateralCloseSummary(chanState *channeldb.OpenChannel, signer Signer, pCache PreimageCache, commitSpend *chainntnfs.SpendDetail, remoteCommit channeldb.ChannelCommitment, - remotePendingCommit bool) (*UnilateralCloseSummary, error) { + commitPoint *btcec.PublicKey) (*UnilateralCloseSummary, error) { // First, we'll generate the commitment point and the revocation point - // so we can re-construct the HTLC state and also our payment key. If - // this is the pending remote commitment, then we'll use the second - // unrevoked commit point in order to properly reconstruct the scripts - // we need to locate. - commitPoint := chanState.RemoteCurrentRevocation - if remotePendingCommit { - commitPoint = chanState.RemoteNextRevocation - } + // so we can re-construct the HTLC state and also our payment key. keyRing := deriveCommitmentKeys( commitPoint, false, &chanState.LocalChanCfg, &chanState.RemoteChanCfg, From d9e9b6197c04098c92f01c35cbd35a535bf141c5 Mon Sep 17 00:00:00 2001 From: "Johan T. Halseth" Date: Thu, 12 Jul 2018 11:02:52 +0200 Subject: [PATCH 04/18] lnwallet/channel test: take commitPoint in NewUnilateralCloseSummary --- lnwallet/channel_test.go | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/lnwallet/channel_test.go b/lnwallet/channel_test.go index 7d9af7d6..0370049e 100644 --- a/lnwallet/channel_test.go +++ b/lnwallet/channel_test.go @@ -4397,8 +4397,10 @@ func TestChannelUnilateralCloseHtlcResolution(t *testing.T) { SpenderTxHash: &commitTxHash, } aliceCloseSummary, err := NewUnilateralCloseSummary( - aliceChannel.channelState, aliceChannel.Signer, aliceChannel.pCache, - spendDetail, aliceChannel.channelState.RemoteCommitment, false, + aliceChannel.channelState, aliceChannel.Signer, + aliceChannel.pCache, spendDetail, + aliceChannel.channelState.RemoteCommitment, + aliceChannel.channelState.RemoteCurrentRevocation, ) if err != nil { t.Fatalf("unable to create alice close summary: %v", err) @@ -4545,8 +4547,10 @@ func TestChannelUnilateralClosePendingCommit(t *testing.T) { // using this commitment, but with the wrong state, we should find that // our output wasn't picked up. aliceWrongCloseSummary, err := NewUnilateralCloseSummary( - aliceChannel.channelState, aliceChannel.Signer, aliceChannel.pCache, - spendDetail, aliceChannel.channelState.RemoteCommitment, false, + aliceChannel.channelState, aliceChannel.Signer, + aliceChannel.pCache, spendDetail, + aliceChannel.channelState.RemoteCommitment, + aliceChannel.channelState.RemoteCurrentRevocation, ) if err != nil { t.Fatalf("unable to create alice close summary: %v", err) @@ -4564,8 +4568,10 @@ func TestChannelUnilateralClosePendingCommit(t *testing.T) { t.Fatalf("unable to fetch remote chain tip: %v", err) } aliceCloseSummary, err := NewUnilateralCloseSummary( - aliceChannel.channelState, aliceChannel.Signer, aliceChannel.pCache, - spendDetail, aliceRemoteChainTip.Commitment, true, + aliceChannel.channelState, aliceChannel.Signer, + aliceChannel.pCache, spendDetail, + aliceRemoteChainTip.Commitment, + aliceChannel.channelState.RemoteNextRevocation, ) if err != nil { t.Fatalf("unable to create alice close summary: %v", err) From 2626bba105ccf01d2913b3410b2c2c5bf09cc04a Mon Sep 17 00:00:00 2001 From: "Johan T. Halseth" Date: Thu, 12 Jul 2018 11:02:52 +0200 Subject: [PATCH 05/18] contractcourt/chain_watcher: use commitPoint directly instead of isPendingCommit --- contractcourt/chain_watcher.go | 35 +++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/contractcourt/chain_watcher.go b/contractcourt/chain_watcher.go index 7cda8f85..f0fdb830 100644 --- a/contractcourt/chain_watcher.go +++ b/contractcourt/chain_watcher.go @@ -5,14 +5,15 @@ import ( "sync" "sync/atomic" - "github.com/davecgh/go-spew/spew" - "github.com/lightningnetwork/lnd/chainntnfs" - "github.com/lightningnetwork/lnd/channeldb" - "github.com/lightningnetwork/lnd/lnwallet" + "github.com/btcsuite/btcd/btcec" "github.com/btcsuite/btcd/chaincfg" "github.com/btcsuite/btcd/txscript" "github.com/btcsuite/btcd/wire" "github.com/btcsuite/btcutil" + "github.com/davecgh/go-spew/spew" + "github.com/lightningnetwork/lnd/chainntnfs" + "github.com/lightningnetwork/lnd/channeldb" + "github.com/lightningnetwork/lnd/lnwallet" ) // LocalUnilateralCloseInfo encapsulates all the informnation we need to act @@ -343,7 +344,8 @@ func (c *chainWatcher) closeObserver(spendNtfn *chainntnfs.SpendEvent) { // as necessary. case broadcastStateNum == remoteStateNum: err := c.dispatchRemoteForceClose( - commitSpend, *remoteCommit, false, + commitSpend, *remoteCommit, + c.cfg.chanState.RemoteCurrentRevocation, ) if err != nil { log.Errorf("unable to handle remote "+ @@ -362,7 +364,7 @@ func (c *chainWatcher) closeObserver(spendNtfn *chainntnfs.SpendEvent) { err := c.dispatchRemoteForceClose( commitSpend, remoteChainTip.Commitment, - true, + c.cfg.chanState.RemoteNextRevocation, ) if err != nil { log.Errorf("unable to handle remote "+ @@ -557,12 +559,19 @@ func (c *chainWatcher) dispatchLocalForceClose( // the remote party. This function will prepare a UnilateralCloseSummary which // will then be sent to any subscribers allowing them to resolve all our funds // in the channel on chain. Once this close summary is prepared, all registered -// subscribers will receive a notification of this event. The -// isRemotePendingCommit argument should be set to true if the remote node -// broadcast their pending commitment (w/o revoking their current settled -// commitment). -func (c *chainWatcher) dispatchRemoteForceClose(commitSpend *chainntnfs.SpendDetail, - remoteCommit channeldb.ChannelCommitment, isRemotePendingCommit bool) error { +// subscribers will receive a notification of this event. The commitPoint +// argument should be set to the per_commitment_point corresponding to the +// spending commitment. +// +// NOTE: The remoteCommit argument should be set to the stored commitment for +// this particular state. If we don't have the commitment stored (should only +// happen in case we have lost state) it should be set to an empty struct, in +// which case we will attempt to sweep the non-HTLC output using the passed +// commitPoint. +func (c *chainWatcher) dispatchRemoteForceClose( + commitSpend *chainntnfs.SpendDetail, + remoteCommit channeldb.ChannelCommitment, + commitPoint *btcec.PublicKey) error { log.Infof("Unilateral close of ChannelPoint(%v) "+ "detected", c.cfg.chanState.FundingOutpoint) @@ -572,7 +581,7 @@ func (c *chainWatcher) dispatchRemoteForceClose(commitSpend *chainntnfs.SpendDet // channel on-chain. uniClose, err := lnwallet.NewUnilateralCloseSummary( c.cfg.chanState, c.cfg.signer, c.cfg.pCache, commitSpend, - remoteCommit, isRemotePendingCommit, + remoteCommit, commitPoint, ) if err != nil { return err From eed052eba57f30f677913171db844f9194bd6dfd Mon Sep 17 00:00:00 2001 From: "Johan T. Halseth" Date: Thu, 12 Jul 2018 11:02:52 +0200 Subject: [PATCH 06/18] lnwallet/channel: extract local balance from spend instead of stored commit --- lnwallet/channel.go | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/lnwallet/channel.go b/lnwallet/channel.go index 657eb03a..f8a182bb 100644 --- a/lnwallet/channel.go +++ b/lnwallet/channel.go @@ -4853,6 +4853,12 @@ type UnilateralCloseSummary struct { // that the remote party broadcasts their commitment. The commitPoint argument // should be set to the per_commitment_point corresponding to the spending // commitment. +// +// NOTE: The remoteCommit argument should be set to the stored commitment for +// this particular state. If we don't have the commitment stored (should only +// happen in case we have lost state) it should be set to an empty struct, in +// which case we will attempt to sweep the non-HTLC output using the passed +// commitPoint. func NewUnilateralCloseSummary(chanState *channeldb.OpenChannel, signer Signer, pCache PreimageCache, commitSpend *chainntnfs.SpendDetail, remoteCommit channeldb.ChannelCommitment, @@ -4885,13 +4891,19 @@ func NewUnilateralCloseSummary(chanState *channeldb.OpenChannel, signer Signer, if err != nil { return nil, fmt.Errorf("unable to create self commit script: %v", err) } - var selfPoint *wire.OutPoint + + var ( + selfPoint *wire.OutPoint + localBalance int64 + ) + for outputIndex, txOut := range commitTxBroadcast.TxOut { if bytes.Equal(txOut.PkScript, selfP2WKH) { selfPoint = &wire.OutPoint{ Hash: *commitSpend.SpenderTxHash, Index: uint32(outputIndex), } + localBalance = txOut.Value break } } @@ -4902,7 +4914,6 @@ func NewUnilateralCloseSummary(chanState *channeldb.OpenChannel, signer Signer, var commitResolution *CommitOutputResolution if selfPoint != nil { localPayBase := chanState.LocalChanCfg.PaymentBasePoint - localBalance := remoteCommit.LocalBalance.ToSatoshis() commitResolution = &CommitOutputResolution{ SelfOutPoint: *selfPoint, SelfOutputSignDesc: SignDescriptor{ @@ -4910,7 +4921,7 @@ func NewUnilateralCloseSummary(chanState *channeldb.OpenChannel, signer Signer, SingleTweak: keyRing.LocalCommitKeyTweak, WitnessScript: selfP2WKH, Output: &wire.TxOut{ - Value: int64(localBalance), + Value: localBalance, PkScript: selfP2WKH, }, HashType: txscript.SigHashAll, @@ -4919,7 +4930,6 @@ func NewUnilateralCloseSummary(chanState *channeldb.OpenChannel, signer Signer, } } - localBalance := remoteCommit.LocalBalance.ToSatoshis() closeSummary := channeldb.ChannelCloseSummary{ ChanPoint: chanState.FundingOutpoint, ChainHash: chanState.ChainHash, @@ -4927,7 +4937,7 @@ func NewUnilateralCloseSummary(chanState *channeldb.OpenChannel, signer Signer, CloseHeight: uint32(commitSpend.SpendingHeight), RemotePub: chanState.IdentityPub, Capacity: chanState.Capacity, - SettledBalance: localBalance, + SettledBalance: btcutil.Amount(localBalance), CloseType: channeldb.RemoteForceClose, IsPending: true, } From ea6aca26a5cbea5264388ba13f0f6b0e694638aa Mon Sep 17 00:00:00 2001 From: "Johan T. Halseth" Date: Tue, 31 Jul 2018 11:31:28 +0200 Subject: [PATCH 07/18] channeldb: make chanStatus unexported Since the ChanStatus field can be changed from concurrent callers, we make it unexported and add the method ChanStatus() for safe retrieval. --- channeldb/channel.go | 28 ++++++++++++++++++---------- channeldb/db.go | 2 +- peer.go | 2 +- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/channeldb/channel.go b/channeldb/channel.go index bc954761..7cdfb73e 100644 --- a/channeldb/channel.go +++ b/channeldb/channel.go @@ -8,14 +8,14 @@ import ( "net" "sync" - "github.com/coreos/bbolt" - "github.com/lightningnetwork/lnd/keychain" - "github.com/lightningnetwork/lnd/lnwire" - "github.com/lightningnetwork/lnd/shachain" "github.com/btcsuite/btcd/btcec" "github.com/btcsuite/btcd/chaincfg/chainhash" "github.com/btcsuite/btcd/wire" "github.com/btcsuite/btcutil" + "github.com/coreos/bbolt" + "github.com/lightningnetwork/lnd/keychain" + "github.com/lightningnetwork/lnd/lnwire" + "github.com/lightningnetwork/lnd/shachain" ) var ( @@ -354,9 +354,9 @@ type OpenChannel struct { // negotiate fees, or close the channel. IsInitiator bool - // ChanStatus is the current status of this channel. If it is not in + // chanStatus is the current status of this channel. If it is not in // the state Default, it should not be used for forwarding payments. - ChanStatus ChannelStatus + chanStatus ChannelStatus // FundingBroadcastHeight is the height in which the funding // transaction was broadcast. This value can be used by higher level @@ -468,6 +468,14 @@ func (c *OpenChannel) ShortChanID() lnwire.ShortChannelID { return c.ShortChannelID } +// ChanStatus returns the current ChannelStatus of this channel. +func (c *OpenChannel) ChanStatus() ChannelStatus { + c.RLock() + defer c.RUnlock() + + return c.chanStatus +} + // RefreshShortChanID updates the in-memory short channel ID using the latest // value observed on disk. func (c *OpenChannel) RefreshShortChanID() error { @@ -705,7 +713,7 @@ func (c *OpenChannel) putChanStatus(status ChannelStatus) error { return err } - channel.ChanStatus = status + channel.chanStatus = status return putOpenChannel(chanBucket, channel) }); err != nil { @@ -713,7 +721,7 @@ func (c *OpenChannel) putChanStatus(status ChannelStatus) error { } // Update the in-memory representation to keep it in sync with the DB. - c.ChanStatus = status + c.chanStatus = status return nil } @@ -2067,7 +2075,7 @@ func putChanInfo(chanBucket *bolt.Bucket, channel *OpenChannel) error { if err := WriteElements(&w, channel.ChanType, channel.ChainHash, channel.FundingOutpoint, channel.ShortChannelID, channel.IsPending, channel.IsInitiator, - channel.ChanStatus, channel.FundingBroadcastHeight, + channel.chanStatus, channel.FundingBroadcastHeight, channel.NumConfsRequired, channel.ChannelFlags, channel.IdentityPub, channel.Capacity, channel.TotalMSatSent, channel.TotalMSatReceived, @@ -2177,7 +2185,7 @@ func fetchChanInfo(chanBucket *bolt.Bucket, channel *OpenChannel) error { if err := ReadElements(r, &channel.ChanType, &channel.ChainHash, &channel.FundingOutpoint, &channel.ShortChannelID, &channel.IsPending, &channel.IsInitiator, - &channel.ChanStatus, &channel.FundingBroadcastHeight, + &channel.chanStatus, &channel.FundingBroadcastHeight, &channel.NumConfsRequired, &channel.ChannelFlags, &channel.IdentityPub, &channel.Capacity, &channel.TotalMSatSent, &channel.TotalMSatReceived, diff --git a/channeldb/db.go b/channeldb/db.go index 4d6958a3..f3b6ede1 100644 --- a/channeldb/db.go +++ b/channeldb/db.go @@ -456,7 +456,7 @@ func fetchChannels(d *DB, pending, waitingClose bool) ([]*OpenChannel, error) { // than Default, then it means it is // waiting to be closed. channelWaitingClose := - channel.ChanStatus != Default + channel.ChanStatus() != Default // Only include it if we requested // channels with the same waitingClose diff --git a/peer.go b/peer.go index 822425b3..f97a1c52 100644 --- a/peer.go +++ b/peer.go @@ -333,7 +333,7 @@ func (p *peer) loadActiveChannels(chans []*channeldb.OpenChannel) error { // Skip adding any permanently irreconcilable channels to the // htlcswitch. - if dbChan.ChanStatus != channeldb.Default { + if dbChan.ChanStatus() != channeldb.Default { peerLog.Warnf("ChannelPoint(%v) has status %v, won't "+ "start.", chanPoint, dbChan.ChanStatus) lnChan.Stop() From 6cdf0e2d6eda6de7a9a8cded76ccac221bdc0298 Mon Sep 17 00:00:00 2001 From: "Johan T. Halseth" Date: Thu, 12 Jul 2018 11:02:52 +0200 Subject: [PATCH 08/18] channeldb/channel: methods for marking borked+dataloss commitPoint in db --- channeldb/channel.go | 106 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 102 insertions(+), 4 deletions(-) diff --git a/channeldb/channel.go b/channeldb/channel.go index 7cdfb73e..4810b5e6 100644 --- a/channeldb/channel.go +++ b/channeldb/channel.go @@ -51,6 +51,10 @@ var ( // preimage producer and their preimage store. revocationStateKey = []byte("revocation-state-key") + // dataLossCommitPointKey stores the commitment point received from the + // remote peer during a channel sync in case we have lost channel state. + dataLossCommitPointKey = []byte("data-loss-commit-point-key") + // commitDiffKey stores the current pending commitment state we've // extended to the remote party (if any). Each time we propose a new // state, we store the information necessary to reconstruct this state @@ -97,6 +101,10 @@ var ( // decoded because the byte slice is of an invalid length. ErrInvalidCircuitKeyLen = fmt.Errorf( "length of serialized circuit key must be 16 bytes") + + // ErrNoCommitPoint is returned when no data loss commit point is found + // in the database. + ErrNoCommitPoint = fmt.Errorf("no commit point found") ) // ChannelType is an enum-like type that describes one of several possible @@ -285,8 +293,8 @@ type ChannelCommitment struct { // * lets just walk through } -// ChannelStatus is used to indicate whether an OpenChannel is in the default -// usable state, or a state where it shouldn't be used. +// ChannelStatus is a bit vector used to indicate whether an OpenChannel is in +// the default usable state, or a state where it shouldn't be used. type ChannelStatus uint8 var ( @@ -300,7 +308,14 @@ var ( // CommitmentBroadcasted indicates that a commitment for this channel // has been broadcasted. - CommitmentBroadcasted ChannelStatus = 2 + CommitmentBroadcasted ChannelStatus = 1 << 1 + + // LocalDataLoss indicates that we have lost channel state for this + // channel, and broadcasting our latest commitment might be considered + // a breach. + // TODO(halseh): actually enforce that we are not force closing such a + // channel. + LocalDataLoss ChannelStatus = 1 << 2 ) // String returns a human-readable representation of the ChannelStatus. @@ -312,8 +327,10 @@ func (c ChannelStatus) String() string { return "Borked" case CommitmentBroadcasted: return "CommitmentBroadcasted" + case LocalDataLoss: + return "LocalDataLoss" default: - return "Unknown" + return fmt.Sprintf("Unknown(%08b)", c) } } @@ -679,6 +696,85 @@ func (c *OpenChannel) MarkAsOpen(openLoc lnwire.ShortChannelID) error { return nil } +// MarkDataLoss marks sets the channel status to LocalDataLoss and stores the +// passed commitPoint for use to retrieve funds in case the remote force closes +// the channel. +func (c *OpenChannel) MarkDataLoss(commitPoint *btcec.PublicKey) error { + c.Lock() + defer c.Unlock() + + var status ChannelStatus + if err := c.Db.Update(func(tx *bolt.Tx) error { + chanBucket, err := updateChanBucket( + tx, c.IdentityPub, &c.FundingOutpoint, c.ChainHash, + ) + if err != nil { + return err + } + + channel, err := fetchOpenChannel(chanBucket, &c.FundingOutpoint) + if err != nil { + return err + } + + // Add status LocalDataLoss to the existing bitvector found in + // the DB. + status = channel.chanStatus | LocalDataLoss + channel.chanStatus = status + + var b bytes.Buffer + if err := WriteElement(&b, commitPoint); err != nil { + return err + } + + err = chanBucket.Put(dataLossCommitPointKey, b.Bytes()) + if err != nil { + return err + } + + return putOpenChannel(chanBucket, channel) + }); err != nil { + return err + } + + // Update the in-memory representation to keep it in sync with the DB. + c.chanStatus = status + + return nil +} + +// DataLossCommitPoint retrieves the stored commit point set during +// MarkDataLoss. If not found ErrNoCommitPoint is returned. +func (c *OpenChannel) DataLossCommitPoint() (*btcec.PublicKey, error) { + var commitPoint *btcec.PublicKey + + err := c.Db.View(func(tx *bolt.Tx) error { + chanBucket, err := readChanBucket(tx, c.IdentityPub, + &c.FundingOutpoint, c.ChainHash) + if err == ErrNoActiveChannels || err == ErrNoChanDBExists { + return ErrNoCommitPoint + } else if err != nil { + return err + } + + bs := chanBucket.Get(dataLossCommitPointKey) + if bs == nil { + return ErrNoCommitPoint + } + r := bytes.NewReader(bs) + if err := ReadElements(r, &commitPoint); err != nil { + return err + } + + return nil + }) + if err != nil { + return nil, err + } + + return commitPoint, nil +} + // MarkBorked marks the event when the channel as reached an irreconcilable // state, such as a channel breach or state desynchronization. Borked channels // should never be added to the switch. @@ -713,6 +809,8 @@ func (c *OpenChannel) putChanStatus(status ChannelStatus) error { return err } + // Add this status to the existing bitvector found in the DB. + status = channel.chanStatus | status channel.chanStatus = status return putOpenChannel(chanBucket, channel) From 3825ca71dd7ee72944c584f1092747148d94b365 Mon Sep 17 00:00:00 2001 From: "Johan T. Halseth" Date: Thu, 12 Jul 2018 11:02:52 +0200 Subject: [PATCH 09/18] lnwallet/channel: reduce scope of commitSecretCorrect --- lnwallet/channel.go | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/lnwallet/channel.go b/lnwallet/channel.go index f8a182bb..8ece0533 100644 --- a/lnwallet/channel.go +++ b/lnwallet/channel.go @@ -3138,7 +3138,6 @@ func (lc *LightningChannel) ProcessChanSyncMsg( // If the remote party included the optional fields, then we'll verify // their correctness first, as it will influence our decisions below. hasRecoveryOptions := msg.LocalUnrevokedCommitPoint != nil - commitSecretCorrect := true if hasRecoveryOptions && msg.RemoteCommitTailHeight != 0 { // We'll check that they've really sent a valid commit // secret from our shachain for our prior height, but only if @@ -3149,22 +3148,25 @@ func (lc *LightningChannel) ProcessChanSyncMsg( if err != nil { return nil, nil, nil, err } - commitSecretCorrect = bytes.Equal( + commitSecretCorrect := bytes.Equal( heightSecret[:], msg.LastRemoteCommitSecret[:], ) + + // If the commit secret they sent is incorrect then we'll fail + // the channel as the remote node has an inconsistent state. + if !commitSecretCorrect { + // In this case, we'll return an error to indicate the + // remote node sent us the wrong values. This will let + // the caller act accordingly. + walletLog.Errorf("ChannelPoint(%v), sync failed: "+ + "remote provided invalid commit secret!", + lc.channelState.FundingOutpoint) + return nil, nil, nil, ErrInvalidLastCommitSecret + } } // TODO(roasbeef): check validity of commitment point after the fact - // If the commit secret they sent is incorrect then we'll fail the - // channel as the remote node has an inconsistent state. - if !commitSecretCorrect { - // In this case, we'll return an error to indicate the remote - // node sent us the wrong values. This will let the caller act - // accordingly. - return nil, nil, nil, ErrInvalidLastCommitSecret - } - switch { // If we owe the remote party a revocation message, then we'll re-send // the last revocation message that we sent. This will be the @@ -3213,9 +3215,9 @@ func (lc *LightningChannel) ProcessChanSyncMsg( // If we don't owe the remote party a revocation, but their value for // what our remote chain tail should be doesn't match up, and their - // purported commitment secrete matches up, then we'll behind! - case (msg.RemoteCommitTailHeight > localChainTail.height && - hasRecoveryOptions && commitSecretCorrect): + // purported commitment secrete matches up, then we're behind! + case msg.RemoteCommitTailHeight > localChainTail.height && + hasRecoveryOptions: // In this case, we've likely lost data and shouldn't proceed // with channel updates. So we'll return the appropriate error From 48f1458ae5225e8e44506083edea5e1f54909416 Mon Sep 17 00:00:00 2001 From: "Johan T. Halseth" Date: Thu, 12 Jul 2018 11:02:53 +0200 Subject: [PATCH 10/18] lnwallet/channel: define channel sync errors This commit defines a few new errors that we can potentially encounter during channel reestablishment: * ErrInvalidLocalUnrevokedCommitPoint * ErrCommitSyncLocalDataLoss * ErrCommitSyncRemoteDataLoss in addition to the already defined errors * ErrInvalidLastCommitSecret * ErrCannotSyncCommitChains --- lnwallet/channel.go | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/lnwallet/channel.go b/lnwallet/channel.go index 8ece0533..d5982e72 100644 --- a/lnwallet/channel.go +++ b/lnwallet/channel.go @@ -66,7 +66,8 @@ var ( // ErrCannotSyncCommitChains is returned if, upon receiving a ChanSync // message, the state machine deems that is unable to properly - // synchronize states with the remote peer. + // synchronize states with the remote peer. In this case we should fail + // the channel, but we won't automatically force close. ErrCannotSyncCommitChains = fmt.Errorf("unable to sync commit chains") // ErrInvalidLastCommitSecret is returned in the case that the @@ -74,12 +75,31 @@ var ( // ChannelReestablish message doesn't match the last secret we sent. ErrInvalidLastCommitSecret = fmt.Errorf("commit secret is incorrect") - // ErrCommitSyncDataLoss is returned in the case that we receive a + // ErrInvalidLocalUnrevokedCommitPoint is returned in the case that the + // commitment point sent by the remote party in their + // ChannelReestablish message doesn't match the last unrevoked commit + // point they sent us. + ErrInvalidLocalUnrevokedCommitPoint = fmt.Errorf("unrevoked commit " + + "point is invalid") + + // ErrCommitSyncLocalDataLoss is returned in the case that we receive a // valid commit secret within the ChannelReestablish message from the // remote node AND they advertise a RemoteCommitTailHeight higher than - // our current known height. - ErrCommitSyncDataLoss = fmt.Errorf("possible commitment state data " + - "loss") + // our current known height. This means we have lost some critical + // data, and must fail the channel and MUST NOT force close it. Instead + // we should wait for the remote to force close it, such that we can + // attempt to sweep our funds. + ErrCommitSyncLocalDataLoss = fmt.Errorf("possible local commitment " + + "state data loss") + + // ErrCommitSyncRemoteDataLoss is returned in the case that we receive + // a ChannelReestablish message from the remote that advertises a + // NextLocalCommitHeight that is lower than what they have already + // ACKed, or a RemoteCommitTailHeight that is lower than our revoked + // height. In this case we should force close the channel such that + // both parties can retrieve their funds. + ErrCommitSyncRemoteDataLoss = fmt.Errorf("possible remote commitment " + + "state data loss") ) // channelState is an enum like type which represents the current state of a @@ -3222,7 +3242,7 @@ func (lc *LightningChannel) ProcessChanSyncMsg( // In this case, we've likely lost data and shouldn't proceed // with channel updates. So we'll return the appropriate error // to signal to the caller the current state. - return nil, nil, nil, ErrCommitSyncDataLoss + return nil, nil, nil, ErrCommitSyncLocalDataLoss // If we don't owe them a revocation, and the height of our commitment // chain reported by the remote party is not equal to our chain tail, From 7fb3be84dffca2e224213f504fc706f1fe7c481a Mon Sep 17 00:00:00 2001 From: "Johan T. Halseth" Date: Thu, 12 Jul 2018 11:02:53 +0200 Subject: [PATCH 11/18] lnwallet/channel test: rename ErrCommitSyncDataLoss->ErrCommitSyncLocalDataLoss --- lnwallet/channel_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lnwallet/channel_test.go b/lnwallet/channel_test.go index 0370049e..b53fe6e7 100644 --- a/lnwallet/channel_test.go +++ b/lnwallet/channel_test.go @@ -3809,8 +3809,8 @@ func TestChanSyncInvalidLastSecret(t *testing.T) { // Alice's former self should conclude that she possibly lost data as // Bob is sending a valid commit secret for the latest state. _, _, _, err = aliceOld.ProcessChanSyncMsg(bobChanSync) - if err != ErrCommitSyncDataLoss { - t.Fatalf("wrong error, expected ErrCommitSyncDataLoss "+ + if err != ErrCommitSyncLocalDataLoss { + t.Fatalf("wrong error, expected ErrCommitSyncLocalDataLoss "+ "instead got: %v", err) } From f1757d6da471b91b687b2ee80dc4b0f8ce9c0b12 Mon Sep 17 00:00:00 2001 From: "Johan T. Halseth" Date: Thu, 12 Jul 2018 11:02:53 +0200 Subject: [PATCH 12/18] lnwallet/channel: enumerate error cases from local chain desync This commit enumerates the various error cases we can encounter when we compare our local commit chain to the view the remote communicates to us via msg.RemoteCommitTailHeight. We now compare this height to our local tail height (note that there's never a local "tip" at this point), returning relevant error in case of a unrecoverable desync, and re-send a revocation in case we owe one. --- lnwallet/channel.go | 111 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 86 insertions(+), 25 deletions(-) diff --git a/lnwallet/channel.go b/lnwallet/channel.go index d5982e72..d11202de 100644 --- a/lnwallet/channel.go +++ b/lnwallet/channel.go @@ -3140,11 +3140,6 @@ func (lc *LightningChannel) ProcessChanSyncMsg( oweCommitment := (lc.remoteCommitChain.hasUnackedCommitment() && msg.NextLocalCommitHeight == remoteChainTip.height) - // We owe them a revocation if the tail of our current commitment is - // one greater than what they _think_ our commitment tail is. - localChainTail := lc.localCommitChain.tail() - oweRevocation := localChainTail.height == msg.RemoteCommitTailHeight+1 - // Now we'll examine the state we have, vs what was contained in the // chain sync message. If we're de-synchronized, then we'll send a // batch of messages which when applied will kick start the chain @@ -3187,13 +3182,88 @@ func (lc *LightningChannel) ProcessChanSyncMsg( // TODO(roasbeef): check validity of commitment point after the fact + // Take note of our current commit chain heights before we begin adding + // more to them. + var ( + localTailHeight = lc.localCommitChain.tail().height + ) + + // We'll now check that their view of our local chain is up-to-date. + // This means checking that what their view of our local chain tail + // height is what they believe. Note that the tail and tip height will + // always be the same for the local chain at this stage, as we won't + // store any received commitment to disk before it is ACKed. switch { - // If we owe the remote party a revocation message, then we'll re-send - // the last revocation message that we sent. This will be the - // revocation message for our prior chain tail. - case oweRevocation: + + // If their reported height for our local chain tail is ahead of our + // view, then we're behind! + case msg.RemoteCommitTailHeight > localTailHeight: + walletLog.Errorf("ChannelPoint(%v), sync failed with local "+ + "data loss: remote believes our tail height is %v, "+ + "while we have %v!", lc.channelState.FundingOutpoint, + msg.RemoteCommitTailHeight, localTailHeight) + + // We must check that we had recovery options to ensure the + // commitment secret matched up, and the remote is just not + // lying about its height. + if !hasRecoveryOptions { + // At this point we the remote is either lying about + // its height, or we are actually behind but the remote + // doesn't support data loss protection. In either case + // it is not safe for us to keep using the channel, so + // we mark it borked and fail the channel. + if err := lc.channelState.MarkBorked(); err != nil { + return nil, nil, nil, err + } + + walletLog.Errorf("ChannelPoint(%v), sync failed: "+ + "local data loss, but no recovery option.", + lc.channelState.FundingOutpoint) + return nil, nil, nil, ErrCannotSyncCommitChains + } + + // In this case, we've likely lost data and shouldn't proceed + // with channel updates. So we'll store the commit point we + // were given in the database, such that we can attempt to + // recover the funds if the remote force closes the channel. + err := lc.channelState.MarkDataLoss( + msg.LocalUnrevokedCommitPoint, + ) + if err != nil { + return nil, nil, nil, err + } + return nil, nil, nil, ErrCommitSyncLocalDataLoss + + // If the height of our commitment chain reported by the remote party + // is behind our view of the chain, then they probably lost some state, + // and we'll force close the channel. + case msg.RemoteCommitTailHeight+1 < localTailHeight: + walletLog.Errorf("ChannelPoint(%v), sync failed: remote "+ + "believes our tail height is %v, while we have %v!", + lc.channelState.FundingOutpoint, + msg.RemoteCommitTailHeight, localTailHeight) + + if err := lc.channelState.MarkBorked(); err != nil { + return nil, nil, nil, err + } + return nil, nil, nil, ErrCommitSyncRemoteDataLoss + + // Their view of our commit chain is consistent with our view. + case msg.RemoteCommitTailHeight == localTailHeight: + // In sync, don't have to do anything. + + // We owe them a revocation if the tail of our current commitment chain + // is one greater than what they _think_ our commitment tail is. In + // this case we'll re-send the last revocation message that we sent. + // This will be the revocation message for our prior chain tail. + case msg.RemoteCommitTailHeight+1 == localTailHeight: + walletLog.Debugf("ChannelPoint(%v), sync: remote believes "+ + "our tail height is %v, while we have %v, we owe "+ + "them a revocation", lc.channelState.FundingOutpoint, + msg.RemoteCommitTailHeight, localTailHeight) + revocationMsg, err := lc.generateRevocation( - localChainTail.height - 1, + localTailHeight - 1, ) if err != nil { return nil, nil, nil, err @@ -3233,25 +3303,16 @@ func (lc *LightningChannel) ProcessChanSyncMsg( } } - // If we don't owe the remote party a revocation, but their value for - // what our remote chain tail should be doesn't match up, and their - // purported commitment secrete matches up, then we're behind! - case msg.RemoteCommitTailHeight > localChainTail.height && - hasRecoveryOptions: + // There should be no other possible states. + default: + walletLog.Errorf("ChannelPoint(%v), sync failed: remote "+ + "believes our tail height is %v, while we have %v!", + lc.channelState.FundingOutpoint, + msg.RemoteCommitTailHeight, localTailHeight) - // In this case, we've likely lost data and shouldn't proceed - // with channel updates. So we'll return the appropriate error - // to signal to the caller the current state. - return nil, nil, nil, ErrCommitSyncLocalDataLoss - - // If we don't owe them a revocation, and the height of our commitment - // chain reported by the remote party is not equal to our chain tail, - // then we cannot sync. - case !oweRevocation && localChainTail.height != msg.RemoteCommitTailHeight: if err := lc.channelState.MarkBorked(); err != nil { return nil, nil, nil, err } - return nil, nil, nil, ErrCannotSyncCommitChains } From a2f2d28d0b4ea564e50ee442980315602ec1649b Mon Sep 17 00:00:00 2001 From: "Johan T. Halseth" Date: Thu, 12 Jul 2018 11:02:53 +0200 Subject: [PATCH 13/18] lnwallet/channel: enumerate error cases from remote chain desync This commit enumerates the various error cases we can encounter when we compare our remote commit chain to the view the remote communicates to us via msg.NextLocalCommitHeight. We now compare this height to our remote tail and tip height, returning relevant error in case of a unrecoverable desync, and re-send a commitment signature (including log updates) in case we owe one. --- lnwallet/channel.go | 79 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 61 insertions(+), 18 deletions(-) diff --git a/lnwallet/channel.go b/lnwallet/channel.go index d11202de..d929e2d9 100644 --- a/lnwallet/channel.go +++ b/lnwallet/channel.go @@ -10,12 +10,12 @@ import ( "sync" "sync/atomic" + "github.com/btcsuite/btcd/blockchain" + "github.com/btcsuite/btcd/chaincfg/chainhash" "github.com/davecgh/go-spew/spew" "github.com/lightningnetwork/lnd/chainntnfs" "github.com/lightningnetwork/lnd/channeldb" "github.com/lightningnetwork/lnd/lnwire" - "github.com/btcsuite/btcd/blockchain" - "github.com/btcsuite/btcd/chaincfg/chainhash" "github.com/btcsuite/btcd/btcec" "github.com/btcsuite/btcd/txscript" @@ -3133,13 +3133,6 @@ func (lc *LightningChannel) ProcessChanSyncMsg( msg *lnwire.ChannelReestablish) ([]lnwire.Message, []channeldb.CircuitKey, []channeldb.CircuitKey, error) { - // We owe them a commitment if they have an un-acked commitment and the - // tip of their chain (from our Pov) is equal to what they think their - // next commit height should be. - remoteChainTip := lc.remoteCommitChain.tip() - oweCommitment := (lc.remoteCommitChain.hasUnackedCommitment() && - msg.NextLocalCommitHeight == remoteChainTip.height) - // Now we'll examine the state we have, vs what was contained in the // chain sync message. If we're de-synchronized, then we'll send a // batch of messages which when applied will kick start the chain @@ -3186,6 +3179,8 @@ func (lc *LightningChannel) ProcessChanSyncMsg( // more to them. var ( localTailHeight = lc.localCommitChain.tail().height + remoteTailHeight = lc.remoteCommitChain.tail().height + remoteTipHeight = lc.remoteCommitChain.tip().height ) // We'll now check that their view of our local chain is up-to-date. @@ -3316,10 +3311,54 @@ func (lc *LightningChannel) ProcessChanSyncMsg( return nil, nil, nil, ErrCannotSyncCommitChains } - // If we owe them a commitment, then we'll read from disk our - // commitment diff, so we can re-send them to the remote party. - if oweCommitment { - // Grab the current remote chain tip from the database. This + // Now check if our view of the remote chain is consistent with what + // they tell us. + switch { + + // The remote's view of what their next commit height is 2+ states + // ahead of us, we most likely lost data, or the remote is trying to + // trick us. Since we have no way of verifying whether they are lying + // or not, we will fail the channel, but should not force close it + // automatically. + case msg.NextLocalCommitHeight > remoteTipHeight+1: + walletLog.Errorf("ChannelPoint(%v), sync failed: remote's "+ + "next commit height is %v, while we believe it is %v!", + lc.channelState.FundingOutpoint, + msg.NextLocalCommitHeight, remoteTipHeight) + + if err := lc.channelState.MarkBorked(); err != nil { + return nil, nil, nil, err + } + return nil, nil, nil, ErrCannotSyncCommitChains + + // They are waiting for a state they have already ACKed. + case msg.NextLocalCommitHeight <= remoteTailHeight: + walletLog.Errorf("ChannelPoint(%v), sync failed: remote's "+ + "next commit height is %v, while we believe it is %v!", + lc.channelState.FundingOutpoint, + msg.NextLocalCommitHeight, remoteTipHeight) + + // They previously ACKed our current tail, and now they are + // waiting for it. They probably lost state. + if err := lc.channelState.MarkBorked(); err != nil { + return nil, nil, nil, err + } + return nil, nil, nil, ErrCommitSyncRemoteDataLoss + + // They have received our latest commitment, life is good. + case msg.NextLocalCommitHeight == remoteTipHeight+1: + + // We owe them a commitment if the tip of their chain (from our Pov) is + // equal to what they think their next commit height should be. We'll + // re-send all the updates neccessary to recreate this state, along + // with the commit sig. + case msg.NextLocalCommitHeight == remoteTipHeight: + walletLog.Debugf("ChannelPoint(%v), sync: remote's next "+ + "commit height is %v, while we believe it is %v, we "+ + "owe them a commitment", lc.channelState.FundingOutpoint, + msg.NextLocalCommitHeight, remoteTipHeight) + + // Grab the current remote chain tip from the database. This // commit diff contains all the information required to re-sync // our states. commitDiff, err := lc.channelState.RemoteCommitChainTip() @@ -3341,14 +3380,18 @@ func (lc *LightningChannel) ProcessChanSyncMsg( openedCircuits = commitDiff.OpenedCircuitKeys closedCircuits = commitDiff.ClosedCircuitKeys - } else if remoteChainTip.height+1 != msg.NextLocalCommitHeight { + // There should be no other possible states as long as the commit chain + // can have at most two elements. If that's the case, something is + // wrong. + default: + walletLog.Errorf("ChannelPoint(%v), sync failed: remote's "+ + "next commit height is %v, while we believe it is %v!", + lc.channelState.FundingOutpoint, + msg.NextLocalCommitHeight, remoteTipHeight) + if err := lc.channelState.MarkBorked(); err != nil { return nil, nil, nil, err } - - // If we don't owe them a commitment, yet the tip of their - // chain isn't one more than the next local commit height they - // report, we'll fail the channel. return nil, nil, nil, ErrCannotSyncCommitChains } From 78a4a15bb4221759fe89176a7f72a3fb7d92bc64 Mon Sep 17 00:00:00 2001 From: "Johan T. Halseth" Date: Thu, 12 Jul 2018 11:02:53 +0200 Subject: [PATCH 14/18] lnwallet/channel: check validity of received commitPoint This commit adds a check for the LocalUnrevokedCommitPoint sent to us by the remote during channel reestablishment, ensuring it is the same point as they have previously sent us. --- lnwallet/channel.go | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/lnwallet/channel.go b/lnwallet/channel.go index d929e2d9..8cfd60c5 100644 --- a/lnwallet/channel.go +++ b/lnwallet/channel.go @@ -3173,8 +3173,6 @@ func (lc *LightningChannel) ProcessChanSyncMsg( } } - // TODO(roasbeef): check validity of commitment point after the fact - // Take note of our current commit chain heights before we begin adding // more to them. var ( @@ -3395,6 +3393,39 @@ func (lc *LightningChannel) ProcessChanSyncMsg( return nil, nil, nil, ErrCannotSyncCommitChains } + // If we didn't have recovery options, then the final check cannot be + // performed, and we'll return early. + if !hasRecoveryOptions { + return updates, openedCircuits, closedCircuits, nil + } + + // At this point we have determined that either the commit heights are + // in sync, or that we are in a state we can recover from. As a final + // check, we ensure that the commitment point sent to us by the remote + // is valid. + var commitPoint *btcec.PublicKey + switch { + case msg.NextLocalCommitHeight == remoteTailHeight+2: + commitPoint = lc.channelState.RemoteNextRevocation + + case msg.NextLocalCommitHeight == remoteTailHeight+1: + commitPoint = lc.channelState.RemoteCurrentRevocation + } + if commitPoint != nil && + !commitPoint.IsEqual(msg.LocalUnrevokedCommitPoint) { + + walletLog.Errorf("ChannelPoint(%v), sync failed: remote "+ + "sent invalid commit point for height %v!", + lc.channelState.FundingOutpoint, + msg.NextLocalCommitHeight) + + if err := lc.channelState.MarkBorked(); err != nil { + return nil, nil, nil, err + } + // TODO(halseth): force close? + return nil, nil, nil, ErrInvalidLocalUnrevokedCommitPoint + } + return updates, openedCircuits, closedCircuits, nil } From 410b7307784297e8cb82590538a7f91cf298a2db Mon Sep 17 00:00:00 2001 From: "Johan T. Halseth" Date: Thu, 12 Jul 2018 11:02:53 +0200 Subject: [PATCH 15/18] lnwallet/channel test: add TestChanSyncFailure --- lnwallet/channel_test.go | 231 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 227 insertions(+), 4 deletions(-) diff --git a/lnwallet/channel_test.go b/lnwallet/channel_test.go index b53fe6e7..9da2ee91 100644 --- a/lnwallet/channel_test.go +++ b/lnwallet/channel_test.go @@ -10,13 +10,14 @@ import ( "runtime" "testing" - "github.com/davecgh/go-spew/spew" - "github.com/lightningnetwork/lnd/chainntnfs" - "github.com/lightningnetwork/lnd/lnwire" "github.com/btcsuite/btcd/blockchain" + "github.com/btcsuite/btcd/btcec" "github.com/btcsuite/btcd/txscript" "github.com/btcsuite/btcd/wire" "github.com/btcsuite/btcutil" + "github.com/davecgh/go-spew/spew" + "github.com/lightningnetwork/lnd/chainntnfs" + "github.com/lightningnetwork/lnd/lnwire" ) // forceStateTransition executes the necessary interaction between the two @@ -2562,7 +2563,7 @@ func TestChanSyncFullySynced(t *testing.T) { assertNoChanSyncNeeded(t, aliceChannelNew, bobChannelNew) } -// restartChannel reads the passe channel from disk, and returns a newly +// restartChannel reads the passed channel from disk, and returns a newly // initialized instance. This simulates one party restarting and losing their // in memory state. func restartChannel(channelOld *LightningChannel) (*LightningChannel, error) { @@ -3486,6 +3487,228 @@ func TestChanSyncOweRevocationAndCommitForceTransition(t *testing.T) { } } +// TestChanSyncFailure tests the various scenarios during channel sync where we +// should be able to detect that the channels cannot be synced because of +// invalid state. +func TestChanSyncFailure(t *testing.T) { + t.Parallel() + + // Create a test channel which will be used for the duration of this + // unittest. The channel will be funded evenly with Alice having 5 BTC, + // and Bob having 5 BTC. + aliceChannel, bobChannel, cleanUp, err := CreateTestChannels() + if err != nil { + t.Fatalf("unable to create test channels: %v", err) + } + defer cleanUp() + + htlcAmt := lnwire.NewMSatFromSatoshis(20000) + index := byte(0) + + // advanceState is a helper method to fully advance the channel state + // by one. + advanceState := func() { + // We'll kick off the test by having Bob send Alice an HTLC, + // then lock it in with a state transition. + var bobPreimage [32]byte + copy(bobPreimage[:], bytes.Repeat([]byte{0xaa - index}, 32)) + rHash := sha256.Sum256(bobPreimage[:]) + bobHtlc := &lnwire.UpdateAddHTLC{ + PaymentHash: rHash, + Amount: htlcAmt, + Expiry: uint32(10), + ID: uint64(index), + } + index++ + + _, err := bobChannel.AddHTLC(bobHtlc, nil) + if err != nil { + t.Fatalf("unable to add bob's htlc: %v", err) + } + _, err = aliceChannel.ReceiveHTLC(bobHtlc) + if err != nil { + t.Fatalf("unable to recv bob's htlc: %v", err) + } + err = forceStateTransition(bobChannel, aliceChannel) + if err != nil { + t.Fatalf("unable to complete bob's state "+ + "transition: %v", err) + } + } + + // halfAdvance is a helper method that sends a new commitment signature + // from Alice to Bob, but doesn't make Bob revoke his current state. + halfAdvance := func() { + // We'll kick off the test by having Bob send Alice an HTLC, + // then lock it in with a state transition. + var bobPreimage [32]byte + copy(bobPreimage[:], bytes.Repeat([]byte{0xaa - index}, 32)) + rHash := sha256.Sum256(bobPreimage[:]) + bobHtlc := &lnwire.UpdateAddHTLC{ + PaymentHash: rHash, + Amount: htlcAmt, + Expiry: uint32(10), + ID: uint64(index), + } + index++ + + _, err := bobChannel.AddHTLC(bobHtlc, nil) + if err != nil { + t.Fatalf("unable to add bob's htlc: %v", err) + } + _, err = aliceChannel.ReceiveHTLC(bobHtlc) + if err != nil { + t.Fatalf("unable to recv bob's htlc: %v", err) + } + + aliceSig, aliceHtlcSigs, err := aliceChannel.SignNextCommitment() + if err != nil { + t.Fatalf("unable to sign next commit: %v", err) + } + err = bobChannel.ReceiveNewCommitment(aliceSig, aliceHtlcSigs) + if err != nil { + t.Fatalf("unable to receive commit sig: %v", err) + } + } + + // assertLocalDataLoss checks that aliceOld and bobChannel detects that + // Alice has lost state during sync. + assertLocalDataLoss := func(aliceOld *LightningChannel) { + aliceSyncMsg, err := aliceOld.ChanSyncMsg() + if err != nil { + t.Fatalf("unable to produce chan sync msg: %v", err) + } + bobSyncMsg, err := bobChannel.ChanSyncMsg() + if err != nil { + t.Fatalf("unable to produce chan sync msg: %v", err) + } + + // Alice should detect from Bob's message that she lost state. + _, _, _, err = aliceOld.ProcessChanSyncMsg(bobSyncMsg) + if err != ErrCommitSyncLocalDataLoss { + t.Fatalf("wrong error, expected "+ + "ErrCommitSyncLocalDataLoss instead got: %v", + err) + } + + // Bob should detect that Alice probably lost state. + _, _, _, err = bobChannel.ProcessChanSyncMsg(aliceSyncMsg) + if err != ErrCommitSyncRemoteDataLoss { + t.Fatalf("wrong error, expected "+ + "ErrCommitSyncRemoteDataLoss instead got: %v", + err) + } + } + + // Start by advancing the state. + advanceState() + + // They should be in sync. + assertNoChanSyncNeeded(t, aliceChannel, bobChannel) + + // Make a copy of Alice's state from the database at this point. + aliceOld, err := restartChannel(aliceChannel) + if err != nil { + t.Fatalf("unable to restart channel: %v", err) + } + + // Advance the states. + advanceState() + + // Trying to sync up the old version of Alice's channel should detect + // that we are out of sync. + assertLocalDataLoss(aliceOld) + + // Make sure the up-to-date channels still are in sync. + assertNoChanSyncNeeded(t, aliceChannel, bobChannel) + + // Advance the state again, and do the same check. + advanceState() + assertNoChanSyncNeeded(t, aliceChannel, bobChannel) + assertLocalDataLoss(aliceOld) + + // If we remove the recovery options from Bob's message, Alice cannot + // tell if she lost state, since Bob might be lying. She still should + // be able to detect that chains cannot be synced. + bobSyncMsg, err := bobChannel.ChanSyncMsg() + if err != nil { + t.Fatalf("unable to produce chan sync msg: %v", err) + } + bobSyncMsg.LocalUnrevokedCommitPoint = nil + _, _, _, err = aliceOld.ProcessChanSyncMsg(bobSyncMsg) + if err != ErrCannotSyncCommitChains { + t.Fatalf("wrong error, expected ErrCannotSyncCommitChains "+ + "instead got: %v", err) + } + + // If Bob lies about the NextLocalCommitHeight, making it greater than + // what Alice expect, she cannot tell for sure whether she lost state, + // but should detect the desync. + bobSyncMsg, err = bobChannel.ChanSyncMsg() + if err != nil { + t.Fatalf("unable to produce chan sync msg: %v", err) + } + bobSyncMsg.NextLocalCommitHeight++ + _, _, _, err = aliceChannel.ProcessChanSyncMsg(bobSyncMsg) + if err != ErrCannotSyncCommitChains { + t.Fatalf("wrong error, expected ErrCannotSyncCommitChains "+ + "instead got: %v", err) + } + + // If Bob's NextLocalCommitHeight is lower than what Alice expects, Bob + // probably lost state. + bobSyncMsg, err = bobChannel.ChanSyncMsg() + if err != nil { + t.Fatalf("unable to produce chan sync msg: %v", err) + } + bobSyncMsg.NextLocalCommitHeight-- + _, _, _, err = aliceChannel.ProcessChanSyncMsg(bobSyncMsg) + if err != ErrCommitSyncRemoteDataLoss { + t.Fatalf("wrong error, expected ErrCommitSyncRemoteDataLoss "+ + "instead got: %v", err) + } + + // If Alice and Bob's states are in sync, but Bob is sending the wrong + // LocalUnrevokedCommitPoint, Alice should detect this. + bobSyncMsg, err = bobChannel.ChanSyncMsg() + if err != nil { + t.Fatalf("unable to produce chan sync msg: %v", err) + } + p := bobSyncMsg.LocalUnrevokedCommitPoint.SerializeCompressed() + p[4] ^= 0x01 + modCommitPoint, err := btcec.ParsePubKey(p, btcec.S256()) + if err != nil { + t.Fatalf("unable to parse pubkey: %v", err) + } + + bobSyncMsg.LocalUnrevokedCommitPoint = modCommitPoint + _, _, _, err = aliceChannel.ProcessChanSyncMsg(bobSyncMsg) + if err != ErrInvalidLocalUnrevokedCommitPoint { + t.Fatalf("wrong error, expected "+ + "ErrInvalidLocalUnrevokedCommitPoint instead got: %v", + err) + } + + // Make sure the up-to-date channels still are good. + assertNoChanSyncNeeded(t, aliceChannel, bobChannel) + + // Finally check that Alice is also able to detect a wrong commit point + // when there's a pending remote commit. + halfAdvance() + + bobSyncMsg, err = bobChannel.ChanSyncMsg() + if err != nil { + t.Fatalf("unable to produce chan sync msg: %v", err) + } + bobSyncMsg.LocalUnrevokedCommitPoint = modCommitPoint + _, _, _, err = aliceChannel.ProcessChanSyncMsg(bobSyncMsg) + if err != ErrInvalidLocalUnrevokedCommitPoint { + t.Fatalf("wrong error, expected "+ + "ErrInvalidLocalUnrevokedCommitPoint instead got: %v", + err) + } +} + // TestFeeUpdateRejectInsaneFee tests that if the initiator tries to attach a // fee that would put them below their current reserve, then it's rejected by // the state machine. From ebed786b2ac00921120e1dce05c2005c80da1c67 Mon Sep 17 00:00:00 2001 From: "Johan T. Halseth" Date: Thu, 12 Jul 2018 11:02:54 +0200 Subject: [PATCH 16/18] htlcswitch/link: inspect sync errors, force close channel This commit makes the link inspect the error encountered during channel sync, force closing the channel if we detect a remote data loss. --- htlcswitch/link.go | 75 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 65 insertions(+), 10 deletions(-) diff --git a/htlcswitch/link.go b/htlcswitch/link.go index 22de510e..d984dfb2 100644 --- a/htlcswitch/link.go +++ b/htlcswitch/link.go @@ -613,10 +613,7 @@ func (l *channelLink) syncChanStates() error { msgsToReSend, openedCircuits, closedCircuits, err = l.channel.ProcessChanSyncMsg(remoteChanSyncMsg) if err != nil { - // TODO(roasbeef): check concrete type of error, act - // accordingly - return fmt.Errorf("unable to handle upstream reestablish "+ - "message: %v", err) + return err } // Repopulate any identifiers for circuits that may have been @@ -810,13 +807,71 @@ func (l *channelLink) htlcManager() { if l.cfg.SyncStates { err := l.syncChanStates() if err != nil { - l.errorf("unable to synchronize channel states: %v", err) - if err != ErrLinkShuttingDown { - // TODO(halseth): must be revisted when - // data-loss protection is in. - l.fail(LinkFailureError{code: ErrSyncError}, - err.Error()) + switch { + case err == ErrLinkShuttingDown: + log.Debugf("unable to sync channel states, " + + "link is shutting down") + return + + // We failed syncing the commit chains, probably + // because the remote has lost state. We should force + // close the channel. + // TODO(halseth): store sent chanSync message to + // database, such that it can be resent to peer in case + // it tries to sync the channel again. + case err == lnwallet.ErrCommitSyncRemoteDataLoss: + fallthrough + + // The remote sent us an invalid last commit secret, we + // should force close the channel. + // TODO(halseth): and permanently ban the peer? + case err == lnwallet.ErrInvalidLastCommitSecret: + fallthrough + + // The remote sent us a commit point different from + // what they sent us before. + // TODO(halseth): ban peer? + case err == lnwallet.ErrInvalidLocalUnrevokedCommitPoint: + l.fail( + LinkFailureError{ + code: ErrSyncError, + ForceClose: true, + }, + "unable to synchronize channel "+ + "states: %v", err, + ) + return + + // We have lost state and cannot safely force close the + // channel. Fail the channel and wait for the remote to + // hopefully force close it. The remote has sent us its + // latest unrevoked commitment point, that we stored in + // the database, that we can use to retrieve the funds + // when the remote closes the channel. + // TODO(halseth): mark this, such that we prevent + // channel from being force closed by the user or + // contractcourt etc. + case err == lnwallet.ErrCommitSyncLocalDataLoss: + + // We determined the commit chains were not possible to + // sync. We cautiously fail the channel, but don't + // force close. + // TODO(halseth): can we safely force close in any + // cases where this error is returned? + case err == lnwallet.ErrCannotSyncCommitChains: + + // Other, unspecified error. + default: } + + l.fail( + LinkFailureError{ + code: ErrSyncError, + ForceClose: false, + }, + "unable to synchronize channel "+ + "states: %v", err, + ) return } } From 00154bda24205b37dd7c740866444ad7c0cbfc88 Mon Sep 17 00:00:00 2001 From: "Johan T. Halseth" Date: Thu, 12 Jul 2018 11:02:54 +0200 Subject: [PATCH 17/18] contractcourt/chain_watcher: attempt dispatchRemoteClose using data loss commitPoint This commit makes the chainwatcher attempt to dispatch a remote close when it detects a remote state with a state number higher than our known remote state. This can mean that we lost some state, and we check the database for (hopefully) a data loss commit point retrieved during channel sync with the remote peer. If this commit point is found in the database we use it to try to recover our funds from the commitment. --- contractcourt/chain_watcher.go | 49 +++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/contractcourt/chain_watcher.go b/contractcourt/chain_watcher.go index f0fdb830..8126d37d 100644 --- a/contractcourt/chain_watcher.go +++ b/contractcourt/chain_watcher.go @@ -372,15 +372,50 @@ func (c *chainWatcher) closeObserver(spendNtfn *chainntnfs.SpendEvent) { c.cfg.chanState.FundingOutpoint, err) } - // This is the case that somehow the commitment - // broadcast is actually greater than even one beyond - // our best known state number. This should NEVER - // happen, but we'll log it in any case. + // This is the case that somehow the commitment broadcast is + // actually greater than even one beyond our best known state + // number. This should ONLY happen in case we experienced some + // sort of data loss. case broadcastStateNum > remoteStateNum+1: - log.Errorf("Remote node broadcast state #%v, "+ + log.Warnf("Remote node broadcast state #%v, "+ "which is more than 1 beyond best known "+ - "state #%v!!!", broadcastStateNum, - remoteStateNum) + "state #%v!!! Attempting recovery...", + broadcastStateNum, remoteStateNum) + + // If we are lucky, the remote peer sent us the correct + // commitment point during channel sync, such that we + // can sweep our funds. + // TODO(halseth): must handle the case where we haven't + // yet processed the chan sync message. + commitPoint, err := c.cfg.chanState.DataLossCommitPoint() + if err != nil { + log.Errorf("Unable to retrieve commitment "+ + "point for channel(%v) with lost "+ + "state: %v", + c.cfg.chanState.FundingOutpoint, err) + return + } + + log.Infof("Recovered commit point(%x) for "+ + "channel(%v)! Now attempting to use it to "+ + "sweep our funds...", + commitPoint.SerializeCompressed(), + c.cfg.chanState.FundingOutpoint) + + // Since we don't have the commitment stored for this + // state, we'll just pass an empty commitment. Note + // that this means we won't be able to recover any HTLC + // funds. + // TODO(halseth): can we try to recover some HTLCs? + err = c.dispatchRemoteForceClose( + commitSpend, channeldb.ChannelCommitment{}, + commitPoint, + ) + if err != nil { + log.Errorf("unable to handle remote "+ + "close for chan_point=%v: %v", + c.cfg.chanState.FundingOutpoint, err) + } // If the state number broadcast is lower than the // remote node's current un-revoked height, then From afccca59c40eac5f4495a922c54be8c70d0ff612 Mon Sep 17 00:00:00 2001 From: "Johan T. Halseth" Date: Thu, 12 Jul 2018 11:02:54 +0200 Subject: [PATCH 18/18] lnd_test: add testDataLossProtection This commit adds the integration test testDataLossProtection, that ensures that when a node loses state, the channel counterparty will force close the channel, and they both can recover their funds. --- lnd_test.go | 332 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 330 insertions(+), 2 deletions(-) diff --git a/lnd_test.go b/lnd_test.go index d1f76b5b..592cd1cd 100644 --- a/lnd_test.go +++ b/lnd_test.go @@ -6046,19 +6046,343 @@ func testRevokedCloseRetributionRemoteHodl(net *lntest.NetworkHarness, assertNodeNumChannels(t, ctxb, dave, 0) } +// assertNumPendingChannels checks that a PendingChannels response from the +// node reports the expected number of pending channels. +func assertNumPendingChannels(t *harnessTest, node *lntest.HarnessNode, + expWaitingClose, expPendingForceClose int) { + ctxb := context.Background() + + var predErr error + err := lntest.WaitPredicate(func() bool { + pendingChansRequest := &lnrpc.PendingChannelsRequest{} + pendingChanResp, err := node.PendingChannels(ctxb, + pendingChansRequest) + if err != nil { + predErr = fmt.Errorf("unable to query for pending "+ + "channels: %v", err) + return false + } + n := len(pendingChanResp.WaitingCloseChannels) + if n != expWaitingClose { + predErr = fmt.Errorf("Expected to find %d channels "+ + "waiting close, found %d", expWaitingClose, n) + return false + } + n = len(pendingChanResp.PendingForceClosingChannels) + if n != expPendingForceClose { + predErr = fmt.Errorf("expected to find %d channel "+ + "pending force close, found %d", expPendingForceClose, n) + return false + } + return true + }, time.Second*15) + if err != nil { + t.Fatalf("%v", predErr) + } +} + +// testDataLossProtection tests that if one of the nodes in a channel +// relationship lost state, they will detect this during channel sync, and the +// up-to-date party will force close the channel, giving the outdated party the +// oppurtunity to sweep its output. +func testDataLossProtection(net *lntest.NetworkHarness, t *harnessTest) { + ctxb := context.Background() + const ( + timeout = time.Duration(time.Second * 10) + chanAmt = maxBtcFundingAmount + paymentAmt = 10000 + numInvoices = 6 + defaultCSV = uint32(4) + ) + + // Carol will be the up-to-date party. We set --nolisten to ensure Dave + // won't be able to connect to her and trigger the channel data + // protection logic automatically. + carol, err := net.NewNode("Carol", []string{"--nolisten"}) + if err != nil { + t.Fatalf("unable to create new carol node: %v", err) + } + defer shutdownAndAssert(net, t, carol) + + // Dave will be the party losing his state. + dave, err := net.NewNode("Dave", nil) + if err != nil { + t.Fatalf("unable to create new node: %v", err) + } + defer shutdownAndAssert(net, t, dave) + + // We must let Dave communicate with Carol before they are able to open + // channel, so we connect them. + if err := net.ConnectNodes(ctxb, carol, dave); err != nil { + t.Fatalf("unable to connect dave to carol: %v", err) + } + + // Before we make a channel, we'll load up Carol with some coins sent + // directly from the miner. + err = net.SendCoins(ctxb, btcutil.SatoshiPerBitcoin, carol) + if err != nil { + t.Fatalf("unable to send coins to carol: %v", err) + } + + // We'll first open up a channel between them with a 0.5 BTC value. + ctxt, _ := context.WithTimeout(ctxb, timeout) + chanPoint := openChannelAndAssert( + ctxt, t, net, carol, dave, chanAmt, 0, false, + ) + + // We a´make a note of the nodes' current on-chain balances, to make + // sure they are able to retrieve the channel funds eventually, + balReq := &lnrpc.WalletBalanceRequest{} + carolBalResp, err := carol.WalletBalance(ctxb, balReq) + if err != nil { + t.Fatalf("unable to get carol's balance: %v", err) + } + carolStartingBalance := carolBalResp.ConfirmedBalance + + daveBalResp, err := dave.WalletBalance(ctxb, balReq) + if err != nil { + t.Fatalf("unable to get dave's balance: %v", err) + } + daveStartingBalance := daveBalResp.ConfirmedBalance + + // With the channel open, we'll create a few invoices for Dave that + // Carol will pay to in order to advance the state of the channel. + // TODO(halseth): have dangling HTLCs on the commitment, able to + // retrive funds? + davePayReqs := make([]string, numInvoices) + for i := 0; i < numInvoices; i++ { + preimage := bytes.Repeat([]byte{byte(17 - i)}, 32) + invoice := &lnrpc.Invoice{ + Memo: "testing", + RPreimage: preimage, + Value: paymentAmt, + } + resp, err := dave.AddInvoice(ctxb, invoice) + if err != nil { + t.Fatalf("unable to add invoice: %v", err) + } + + davePayReqs[i] = resp.PaymentRequest + } + + // As we'll be querying the state of Dave's channels frequently we'll + // create a closure helper function for the purpose. + getDaveChanInfo := func() (*lnrpc.Channel, error) { + req := &lnrpc.ListChannelsRequest{} + daveChannelInfo, err := dave.ListChannels(ctxb, req) + if err != nil { + return nil, err + } + if len(daveChannelInfo.Channels) != 1 { + t.Fatalf("dave should only have a single channel, "+ + "instead he has %v", + len(daveChannelInfo.Channels)) + } + + return daveChannelInfo.Channels[0], nil + } + + // Wait for Carol to receive the channel edge from the funding manager. + ctxt, _ = context.WithTimeout(ctxb, timeout) + err = carol.WaitForNetworkChannelOpen(ctxt, chanPoint) + if err != nil { + t.Fatalf("carol didn't see the carol->dave channel before "+ + "timeout: %v", err) + } + + // Send payments from Carol to Dave using 3 of Dave's payment hashes + // generated above. + ctxt, _ = context.WithTimeout(ctxb, timeout) + err = completePaymentRequests(ctxt, carol, davePayReqs[:numInvoices/2], + true) + if err != nil { + t.Fatalf("unable to send payments: %v", err) + } + + // Next query for Dave's channel state, as we sent 3 payments of 10k + // satoshis each, Dave should now see his balance as being 30k satoshis. + var daveChan *lnrpc.Channel + var predErr error + err = lntest.WaitPredicate(func() bool { + bChan, err := getDaveChanInfo() + if err != nil { + t.Fatalf("unable to get dave's channel info: %v", err) + } + if bChan.LocalBalance != 30000 { + predErr = fmt.Errorf("dave's balance is incorrect, "+ + "got %v, expected %v", bChan.LocalBalance, + 30000) + return false + } + + daveChan = bChan + return true + }, time.Second*15) + if err != nil { + t.Fatalf("%v", predErr) + } + + // Grab Dave's current commitment height (update number), we'll later + // revert him to this state after additional updates to revoke this + // state. + daveStateNumPreCopy := daveChan.NumUpdates + + // Create a temporary file to house Dave's database state at this + // particular point in history. + daveTempDbPath, err := ioutil.TempDir("", "dave-past-state") + if err != nil { + t.Fatalf("unable to create temp db folder: %v", err) + } + daveTempDbFile := filepath.Join(daveTempDbPath, "channel.db") + defer os.Remove(daveTempDbPath) + + // With the temporary file created, copy Dave's current state into the + // temporary file we created above. Later after more updates, we'll + // restore this state. + if err := copyFile(daveTempDbFile, dave.DBPath()); err != nil { + t.Fatalf("unable to copy database files: %v", err) + } + + // Finally, send payments from Carol to Dave, consuming Dave's remaining + // payment hashes. + ctxt, _ = context.WithTimeout(ctxb, timeout) + err = completePaymentRequests(ctxt, carol, davePayReqs[numInvoices/2:], + true) + if err != nil { + t.Fatalf("unable to send payments: %v", err) + } + + daveChan, err = getDaveChanInfo() + if err != nil { + t.Fatalf("unable to get dave chan info: %v", err) + } + + // Now we shutdown Dave, copying over the his temporary database state + // which has the *prior* channel state over his current most up to date + // state. With this, we essentially force Dave to travel back in time + // within the channel's history. + if err = net.RestartNode(dave, func() error { + return os.Rename(daveTempDbFile, dave.DBPath()) + }); err != nil { + t.Fatalf("unable to restart node: %v", err) + } + + // Now query for Dave's channel state, it should show that he's at a + // state number in the past, not the *latest* state. + daveChan, err = getDaveChanInfo() + if err != nil { + t.Fatalf("unable to get dave chan info: %v", err) + } + if daveChan.NumUpdates != daveStateNumPreCopy { + t.Fatalf("db copy failed: %v", daveChan.NumUpdates) + } + assertNodeNumChannels(t, ctxb, dave, 1) + + // Upon reconnection, the nodes should detect that Dave is out of sync. + if err := net.ConnectNodes(ctxb, carol, dave); err != nil { + t.Fatalf("unable to connect dave to carol: %v", err) + } + + // Carol should force close the channel using her latest commitment. + forceClose, err := waitForTxInMempool(net.Miner.Node, 5*time.Second) + if err != nil { + t.Fatalf("unable to find Carol's force close tx in mempool: %v", + err) + } + + // Channel should be in the state "waiting close" for Carol since she + // broadcasted the force close tx. + assertNumPendingChannels(t, carol, 1, 0) + + // Dave should also consider the channel "waiting close", as he noticed + // the channel was out of sync, and is now waiting for a force close to + // hit the chain. + assertNumPendingChannels(t, dave, 1, 0) + + // Restart Dave to make sure he is able to sweep the funds after + // shutdown. + if err := net.RestartNode(dave, nil); err != nil { + t.Fatalf("Node restart failed: %v", err) + } + + // Generate a single block, which should confirm the closing tx. + block := mineBlocks(t, net, 1)[0] + assertTxInBlock(t, block, forceClose) + + // Dave should sweep his funds immediately, as they are not timelocked. + daveSweep, err := waitForTxInMempool(net.Miner.Node, 15*time.Second) + if err != nil { + t.Fatalf("unable to find Dave's sweep tx in mempool: %v", err) + } + + // Dave should consider the channel pending force close (since he is + // waiting for his sweep to confirm). + assertNumPendingChannels(t, dave, 0, 1) + + // Carol is considering it "pending force close", as whe must wait + // before she can sweep her outputs. + assertNumPendingChannels(t, carol, 0, 1) + + block = mineBlocks(t, net, 1)[0] + assertTxInBlock(t, block, daveSweep) + + // Now Dave should consider the channel fully closed. + assertNumPendingChannels(t, dave, 0, 0) + + // We query Dave's balance to make sure it increased after the channel + // closed. This checks that he was able to sweep the funds he had in + // the channel. + daveBalResp, err = dave.WalletBalance(ctxb, balReq) + if err != nil { + t.Fatalf("unable to get dave's balance: %v", err) + } + daveBalance := daveBalResp.ConfirmedBalance + if daveBalance <= daveStartingBalance { + t.Fatalf("expected dave to have balance above %d, intead had %v", + daveStartingBalance, daveBalance) + } + + // After the Carol's output matures, she should also reclaim her funds. + mineBlocks(t, net, defaultCSV-1) + carolSweep, err := waitForTxInMempool(net.Miner.Node, 5*time.Second) + if err != nil { + t.Fatalf("unable to find Carol's sweep tx in mempool: %v", err) + } + block = mineBlocks(t, net, 1)[0] + assertTxInBlock(t, block, carolSweep) + + // Now the channel should be fully closed also from Carol's POV. + assertNumPendingChannels(t, carol, 0, 0) + + // Make sure Carol got her balance back. + carolBalResp, err = carol.WalletBalance(ctxb, balReq) + if err != nil { + t.Fatalf("unable to get carol's balance: %v", err) + } + carolBalance := carolBalResp.ConfirmedBalance + if carolBalance <= carolStartingBalance { + t.Fatalf("expected carol to have balance above %d, "+ + "instead had %v", carolStartingBalance, + carolBalance) + } + + assertNodeNumChannels(t, ctxb, dave, 0) + assertNodeNumChannels(t, ctxb, carol, 0) +} + // assertNodeNumChannels polls the provided node's list channels rpc until it // reaches the desired number of total channels. func assertNodeNumChannels(t *harnessTest, ctxb context.Context, node *lntest.HarnessNode, numChannels int) { - // Poll alice for her list of channels. + // Poll node for its list of channels. req := &lnrpc.ListChannelsRequest{} var predErr error pred := func() bool { chanInfo, err := node.ListChannels(ctxb, req) if err != nil { - predErr = fmt.Errorf("unable to query for alice's "+ + predErr = fmt.Errorf("unable to query for node's "+ "channels: %v", err) return false } @@ -10814,6 +11138,10 @@ var testsCases = []*testCase{ name: "revoked uncooperative close retribution remote hodl", test: testRevokedCloseRetributionRemoteHodl, }, + { + name: "data loss protection", + test: testDataLossProtection, + }, { name: "query routes", test: testQueryRoutes,