Merge pull request #2749 from Roasbeef/bork-force-close

lnwallet: within ForceClose, mark the channel as borked, prevent mutations for borked channels
2019-03-10 15:21:09 -07:00 · 2019-03-10 15:21:09 -07:00 · cfb5e249b9
commit cfb5e249b9
parent 3895a4f276 33ad645f8c
4 changed files with 237 additions and 22 deletions
--- a/channeldb/channel.go
+++ b/channeldb/channel.go
@ -107,6 +107,10 @@ var (
 	// mutate a channel that's been recovered.
 	ErrNoRestoredChannelMutation = fmt.Errorf("cannot mutate restored " +
 		"channel state")
+
+	// ErrChanBorked is returned when a caller attempts to mutate a borked
+	// channel.
+	ErrChanBorked = fmt.Errorf("cannot mutate borked channel")
 )

 // ChannelType is an enum-like type that describes one of several possible
@ -545,6 +549,16 @@ func (c *OpenChannel) ApplyChanStatus(status ChannelStatus) error {
 	return c.putChanStatus(status)
 }

+// ClearChanStatus allows the caller to clear a particular channel status from
+// the primary channel status bit field. After this method returns, a call to
+// HasChanStatus(status) should return false.
+func (c *OpenChannel) ClearChanStatus(status ChannelStatus) error {
+	c.Lock()
+	defer c.Unlock()
+
+	return c.clearChanStatus(status)
+}
+
 // HasChanStatus returns true if the internal bitfield channel status of the
 // target channel has the specified status bit set.
 func (c *OpenChannel) HasChanStatus(status ChannelStatus) bool {
@ -807,6 +821,20 @@ func (c *OpenChannel) MarkBorked() error {
 	return c.putChanStatus(ChanStatusBorked)
 }

+// isBorked returns true if the channel has been marked as borked in the
+// database. This requires an existing database transaction to already be
+// active.
+//
+// NOTE: The primary mutex should already be held before this method is called.
+func (c *OpenChannel) isBorked(chanBucket *bbolt.Bucket) (bool, error) {
+	channel, err := fetchOpenChannel(chanBucket, &c.FundingOutpoint)
+	if err != nil {
+		return false, err
+	}
+
+	return channel.chanStatus != ChanStatusDefault, nil
+}
+
 // MarkCommitmentBroadcasted marks the channel as a commitment transaction has
 // been broadcast, either our own or the remote, and we should watch the chain
 // for it to confirm before taking any further action.
@ -846,6 +874,35 @@ func (c *OpenChannel) putChanStatus(status ChannelStatus) error {
 	return nil
 }

+func (c *OpenChannel) clearChanStatus(status ChannelStatus) error {
+	if err := c.Db.Update(func(tx *bbolt.Tx) error {
+		chanBucket, err := fetchChanBucket(
+			tx, c.IdentityPub, &c.FundingOutpoint, c.ChainHash,
+		)
+		if err != nil {
+			return err
+		}
+
+		channel, err := fetchOpenChannel(chanBucket, &c.FundingOutpoint)
+		if err != nil {
+			return err
+		}
+
+		// Unset this bit in the bitvector on disk.
+		status = channel.chanStatus & ^status
+		channel.chanStatus = status
+
+		return putOpenChannel(chanBucket, channel)
+	}); err != nil {
+		return err
+	}
+
+	// Update the in-memory representation to keep it in sync with the DB.
+	c.chanStatus = status
+
+	return nil
+}
+
 // putChannel serializes, and stores the current state of the channel in its
 // entirety.
 func putOpenChannel(chanBucket *bbolt.Bucket, channel *OpenChannel) error {
@ -978,6 +1035,16 @@ func (c *OpenChannel) UpdateCommitment(newCommitment *ChannelCommitment) error {
 			return err
 		}

+		// If the channel is marked as borked, then for safety reasons,
+		// we shouldn't attempt any further updates.
+		isBorked, err := c.isBorked(chanBucket)
+		if err != nil {
+			return err
+		}
+		if isBorked {
+			return ErrChanBorked
+		}
+
 		if err = putChanInfo(chanBucket, c); err != nil {
 			return fmt.Errorf("unable to store chan info: %v", err)
 		}
@ -1408,6 +1475,16 @@ func (c *OpenChannel) AppendRemoteCommitChain(diff *CommitDiff) error {
 			return err
 		}

+		// If the channel is marked as borked, then for safety reasons,
+		// we shouldn't attempt any further updates.
+		isBorked, err := c.isBorked(chanBucket)
+		if err != nil {
+			return err
+		}
+		if isBorked {
+			return ErrChanBorked
+		}
+
 		// Any outgoing settles and fails necessarily have a
 		// corresponding adds in this channel's forwarding packages.
 		// Mark all of these as being fully processed in our forwarding
@ -1539,6 +1616,16 @@ func (c *OpenChannel) AdvanceCommitChainTail(fwdPkg *FwdPkg) error {
 			return err
 		}

+		// If the channel is marked as borked, then for safety reasons,
+		// we shouldn't attempt any further updates.
+		isBorked, err := c.isBorked(chanBucket)
+		if err != nil {
+			return err
+		}
+		if isBorked {
+			return ErrChanBorked
+		}
+
 		// Persist the latest preimage state to disk as the remote peer
 		// has just added to our local preimage store, and given us a
 		// new pending revocation key.
--- a/contractcourt/chain_arbitrator.go
+++ b/contractcourt/chain_arbitrator.go
@ -227,25 +227,37 @@ func newActiveChannelArbitrator(channel *channeldb.OpenChannel,
 		ShortChanID: channel.ShortChanID(),
 		BlockEpochs: blockEpoch,
 		ForceCloseChan: func() (*lnwallet.LocalForceCloseSummary, error) {
-			// With the channels fetched, attempt to locate
-			// the target channel according to its channel
-			// point.
+			// First, we mark the channel as borked, this ensure
+			// that no new state transitions can happen, and also
+			// that the link won't be loaded into the switch.
+			if err := channel.MarkBorked(); err != nil {
+				return nil, err
+			}
+
+			// With the channel marked as borked, we'll now remove
+			// the link from the switch if its there. If the link
+			// is active, then this method will block until it
+			// exits.
+			if err := c.cfg.MarkLinkInactive(chanPoint); err != nil {
+				log.Errorf("unable to mark link inactive: %v", err)
+			}
+
+			// Now that we know the link can't mutate the channel
+			// state, we'll read the channel from disk the target
+			// channel according to its channel point.
 			channel, err := c.chanSource.FetchChannel(chanPoint)
 			if err != nil {
 				return nil, err
 			}

+			// Finally, we'll force close the channel completing
+			// the force close workflow.
 			chanMachine, err := lnwallet.NewLightningChannel(
 				c.cfg.Signer, c.cfg.PreimageDB, channel, nil,
 			)
 			if err != nil {
 				return nil, err
 			}
-
-			if err := c.cfg.MarkLinkInactive(chanPoint); err != nil {
-				log.Errorf("unable to mark link inactive: %v", err)
-			}
-
 			return chanMachine.ForceClose()
 		},
 		MarkCommitmentBroadcasted: channel.MarkCommitmentBroadcasted,
--- a/lnwallet/channel.go
+++ b/lnwallet/channel.go
@ -3067,8 +3067,9 @@ func (lc *LightningChannel) SignNextCommitment() (lnwire.Sig, []lnwire.Sig, erro
 	// ensure that we aren't violating any of the constraints the remote
 	// party set up when we initially set up the channel. If we are, then
 	// we'll abort this state transition.
-	err := lc.validateCommitmentSanity(remoteACKedIndex,
-		lc.localUpdateLog.logIndex, true, nil)
+	err := lc.validateCommitmentSanity(
+		remoteACKedIndex, lc.localUpdateLog.logIndex, true, nil,
+	)
 	if err != nil {
 		return sig, htlcSigs, err
 	}
@ -3076,8 +3077,9 @@ func (lc *LightningChannel) SignNextCommitment() (lnwire.Sig, []lnwire.Sig, erro
 	// Grab the next commitment point for the remote party. This will be
 	// used within fetchCommitmentView to derive all the keys necessary to
 	// construct the commitment state.
-	keyRing := deriveCommitmentKeys(commitPoint, false, lc.localChanCfg,
-		lc.remoteChanCfg)
+	keyRing := deriveCommitmentKeys(
+		commitPoint, false, lc.localChanCfg, lc.remoteChanCfg,
+	)

 	// Create a new commitment view which will calculate the evaluated
 	// state of the remote node's new commitment including our latest added
@ -3165,7 +3167,8 @@ func (lc *LightningChannel) SignNextCommitment() (lnwire.Sig, []lnwire.Sig, erro
 	if err != nil {
 		return sig, htlcSigs, err
 	}
-	if lc.channelState.AppendRemoteCommitChain(commitDiff); err != nil {
+	err = lc.channelState.AppendRemoteCommitChain(commitDiff)
+	if err != nil {
 		return sig, htlcSigs, err
 	}

@ -4017,8 +4020,9 @@ func (lc *LightningChannel) ReceiveNewCommitment(commitSig lnwire.Sig,
 	// Ensure that this new local update from the remote node respects all
 	// the constraints we specified during initial channel setup. If not,
 	// then we'll abort the channel as they've violated our constraints.
-	err := lc.validateCommitmentSanity(lc.remoteUpdateLog.logIndex,
-		localACKedIndex, false, nil)
+	err := lc.validateCommitmentSanity(
+		lc.remoteUpdateLog.logIndex, localACKedIndex, false, nil,
+	)
 	if err != nil {
 		return err
 	}
@ -4033,8 +4037,9 @@ func (lc *LightningChannel) ReceiveNewCommitment(commitSig lnwire.Sig,
 		return err
 	}
 	commitPoint := input.ComputeCommitmentPoint(commitSecret[:])
-	keyRing := deriveCommitmentKeys(commitPoint, true, lc.localChanCfg,
-		lc.remoteChanCfg)
+	keyRing := deriveCommitmentKeys(
+		commitPoint, true, lc.localChanCfg, lc.remoteChanCfg,
+	)

 	// With the current commitment point re-calculated, construct the new
 	// commitment view which includes all the entries (pending or committed)
@ -4067,9 +4072,10 @@ func (lc *LightningChannel) ReceiveNewCommitment(commitSig lnwire.Sig,
 	localCommitTx := localCommitmentView.txn
 	multiSigScript := lc.signDesc.WitnessScript
 	hashCache := txscript.NewTxSigHashes(localCommitTx)
-	sigHash, err := txscript.CalcWitnessSigHash(multiSigScript, hashCache,
-		txscript.SigHashAll, localCommitTx, 0,
-		int64(lc.channelState.Capacity))
+	sigHash, err := txscript.CalcWitnessSigHash(
+		multiSigScript, hashCache, txscript.SigHashAll,
+		localCommitTx, 0, int64(lc.channelState.Capacity),
+	)
 	if err != nil {
 		// TODO(roasbeef): fetchview has already mutated the HTLCs...
 		//  * need to either roll-back, or make pure
@ -4442,8 +4448,10 @@ func (lc *LightningChannel) ReceiveRevocation(revMsg *lnwire.RevokeAndAck) (
 	// As we've just completed a new state transition, attempt to see if we
 	// can remove any entries from the update log which have been removed
 	// from the PoV of both commitment chains.
-	compactLogs(lc.localUpdateLog, lc.remoteUpdateLog,
-		localChainTail, remoteChainTail)
+	compactLogs(
+		lc.localUpdateLog, lc.remoteUpdateLog, localChainTail,
+		remoteChainTail,
+	)

 	return fwdPkg, addsToForward, settleFailsToForward, nil
 }
--- a/lnwallet/channel_test.go
+++ b/lnwallet/channel_test.go
@ -3641,6 +3641,8 @@ func TestChanSyncFailure(t *testing.T) {
 	// advanceState is a helper method to fully advance the channel state
 	// by one.
 	advanceState := func() {
+		t.Helper()
+
 		// We'll kick off the test by having Bob send Alice an HTLC,
 		// then lock it in with a state transition.
 		var bobPreimage [32]byte
@ -3672,6 +3674,8 @@ func TestChanSyncFailure(t *testing.T) {
 	// halfAdvance is a helper method that sends a new commitment signature
 	// from Alice to Bob, but doesn't make Bob revoke his current state.
 	halfAdvance := func() {
+		t.Helper()
+
 		// We'll kick off the test by having Bob send Alice an HTLC,
 		// then lock it in with a state transition.
 		var bobPreimage [32]byte
@ -3707,6 +3711,8 @@ func TestChanSyncFailure(t *testing.T) {
 	// assertLocalDataLoss checks that aliceOld and bobChannel detects that
 	// Alice has lost state during sync.
 	assertLocalDataLoss := func(aliceOld *LightningChannel) {
+		t.Helper()
+
 		aliceSyncMsg, err := ChanSyncMsg(aliceOld.channelState)
 		if err != nil {
 			t.Fatalf("unable to produce chan sync msg: %v", err)
@ -3733,6 +3739,25 @@ func TestChanSyncFailure(t *testing.T) {
 		}
 	}

+	// clearBorkedState is a method that allows us to clear the borked
+	// state that will arise after the first chan message sync. We need to
+	// do this in order to be able to continue to update the commitment
+	// state for our test scenarios.
+	clearBorkedState := func() {
+		err = aliceChannel.channelState.ClearChanStatus(
+			channeldb.ChanStatusLocalDataLoss | channeldb.ChanStatusBorked,
+		)
+		if err != nil {
+			t.Fatalf("unable to update channel state: %v", err)
+		}
+		err = bobChannel.channelState.ClearChanStatus(
+			channeldb.ChanStatusLocalDataLoss | channeldb.ChanStatusBorked,
+		)
+		if err != nil {
+			t.Fatalf("unable to update channel state: %v", err)
+		}
+	}
+
 	// Start by advancing the state.
 	advanceState()

@ -3755,6 +3780,9 @@ func TestChanSyncFailure(t *testing.T) {
 	// Make sure the up-to-date channels still are in sync.
 	assertNoChanSyncNeeded(t, aliceChannel, bobChannel)

+	// Clear the borked state before we attempt to advance.
+	clearBorkedState()
+
 	// Advance the state again, and do the same check.
 	advanceState()
 	assertNoChanSyncNeeded(t, aliceChannel, bobChannel)
@ -3825,6 +3853,9 @@ func TestChanSyncFailure(t *testing.T) {
 	// Make sure the up-to-date channels still are good.
 	assertNoChanSyncNeeded(t, aliceChannel, bobChannel)

+	// Clear the borked state before we attempt to advance.
+	clearBorkedState()
+
 	// Finally check that Alice is also able to detect a wrong commit point
 	// when there's a pending remote commit.
 	halfAdvance()
@ -6525,3 +6556,80 @@ func TestForceCloseFailLocalDataLoss(t *testing.T) {
 			"chan state")
 	}
 }
+
+// TestForceCloseBorkedState tests that once we force close a channel, it's
+// marked as borked in the database. Additionally, all calls to mutate channel
+// state should also fail.
+func TestForceCloseBorkedState(t *testing.T) {
+	t.Parallel()
+
+	aliceChannel, bobChannel, cleanUp, err := CreateTestChannels()
+	if err != nil {
+		t.Fatalf("unable to create test channels: %v", err)
+	}
+	defer cleanUp()
+
+	// Do the commitment dance until Bob sends a revocation so Alice is
+	// able to receive the revocation, and then also make a new state
+	// herself.
+	aliceSigs, aliceHtlcSigs, err := aliceChannel.SignNextCommitment()
+	if err != nil {
+		t.Fatalf("unable to sign commit: %v", err)
+	}
+	err = bobChannel.ReceiveNewCommitment(aliceSigs, aliceHtlcSigs)
+	if err != nil {
+		t.Fatalf("unable to receive commitment: %v", err)
+	}
+	revokeMsg, _, err := bobChannel.RevokeCurrentCommitment()
+	if err != nil {
+		t.Fatalf("unable to revoke bob commitment: %v", err)
+	}
+	bobSigs, bobHtlcSigs, err := bobChannel.SignNextCommitment()
+	if err != nil {
+		t.Fatalf("unable to sign commit: %v", err)
+	}
+	err = aliceChannel.ReceiveNewCommitment(bobSigs, bobHtlcSigs)
+	if err != nil {
+		t.Fatalf("unable to receive commitment: %v", err)
+	}
+
+	// Now that we have a new Alice channel, we'll force close once to
+	// trigger the update on disk to mark the channel as borked.
+	if _, err := aliceChannel.ForceClose(); err != nil {
+		t.Fatalf("unable to force close channel: %v", err)
+	}
+
+	// Next we'll mark the channel as borked before we proceed.
+	err = aliceChannel.channelState.ApplyChanStatus(
+		channeldb.ChanStatusBorked,
+	)
+	if err != nil {
+		t.Fatalf("unable to apply chan status: %v", err)
+	}
+
+	// The on-disk state should indicate that the channel is now borked.
+	if !aliceChannel.channelState.HasChanStatus(
+		channeldb.ChanStatusBorked,
+	) {
+		t.Fatalf("chan status not updated as borked")
+	}
+
+	// At this point, all channel mutating methods should now fail as they
+	// shouldn't be able to proceed if the channel is borked.
+	_, _, _, err = aliceChannel.ReceiveRevocation(revokeMsg)
+	if err != channeldb.ErrChanBorked {
+		t.Fatalf("advance commitment tail should have failed")
+	}
+
+	// We manually advance the commitment tail here since the above
+	// ReceiveRevocation call will fail before it's actually advanced.
+	aliceChannel.remoteCommitChain.advanceTail()
+	_, _, err = aliceChannel.SignNextCommitment()
+	if err != channeldb.ErrChanBorked {
+		t.Fatalf("sign commitment should have failed: %v", err)
+	}
+	_, _, err = aliceChannel.RevokeCurrentCommitment()
+	if err != channeldb.ErrChanBorked {
+		t.Fatalf("append remove chain tail should have failed")
+	}
+}