From 9e81b1fe536d750786a1b723f7b14e9b56a83c7f Mon Sep 17 00:00:00 2001
From: "Johan T. Halseth" <johanth@gmail.com>
Date: Tue, 20 Nov 2018 15:09:46 +0100
Subject: [PATCH] chain_watcher: poll for commit point in case of failure

We pool the database for the channel commit point with an exponential
backoff. This is meant to handle the case where we are in process of
handling a channel sync, and the case where we detect a channel close
and must wait for the peer to come online to start channel sync before
we can proceed.
---
 contractcourt/chain_watcher.go | 49 ++++++++++++++++++++++++++++------
 1 file changed, 41 insertions(+), 8 deletions(-)

diff --git a/contractcourt/chain_watcher.go b/contractcourt/chain_watcher.go
index 2d60b782..1bc1ee18 100644
--- a/contractcourt/chain_watcher.go
+++ b/contractcourt/chain_watcher.go
@@ -4,6 +4,7 @@ import (
 	"fmt"
 	"sync"
 	"sync/atomic"
+	"time"
 
 	"github.com/btcsuite/btcd/btcec"
 	"github.com/btcsuite/btcd/chaincfg"
@@ -16,6 +17,16 @@ import (
 	"github.com/lightningnetwork/lnd/lnwallet"
 )
 
+const (
+	// minCommitPointPollTimeout is the minimum time we'll wait before
+	// polling the database for a channel's commitpoint.
+	minCommitPointPollTimeout = 1 * time.Second
+
+	// maxCommitPointPollTimeout is the maximum time we'll wait before
+	// polling the database for a channel's commitpoint.
+	maxCommitPointPollTimeout = 10 * time.Minute
+)
+
 // LocalUnilateralCloseInfo encapsulates all the informnation we need to act
 // on a local force close that gets confirmed.
 type LocalUnilateralCloseInfo struct {
@@ -402,16 +413,38 @@ func (c *chainWatcher) closeObserver(spendNtfn *chainntnfs.SpendEvent) {
 
 			// If we are lucky, the remote peer sent us the correct
 			// commitment point during channel sync, such that we
-			// can sweep our funds.
-			// TODO(halseth): must handle the case where we haven't
-			// yet processed the chan sync message.
-			commitPoint, err := c.cfg.chanState.DataLossCommitPoint()
-			if err != nil {
+			// can sweep our funds. If we cannot find the commit
+			// point, there's not much we can do other than wait
+			// for us to retrieve it. We will attempt to retrieve
+			// it from the peer each time we connect to it.
+			// TODO(halseth): actively initiate re-connection to
+			// the peer?
+			var commitPoint *btcec.PublicKey
+			backoff := minCommitPointPollTimeout
+			for {
+				commitPoint, err = c.cfg.chanState.DataLossCommitPoint()
+				if err == nil {
+					break
+				}
+
 				log.Errorf("Unable to retrieve commitment "+
 					"point for channel(%v) with lost "+
-					"state: %v",
-					c.cfg.chanState.FundingOutpoint, err)
-				return
+					"state: %v. Retrying in %v.",
+					c.cfg.chanState.FundingOutpoint,
+					err, backoff)
+
+				select {
+				// Wait before retrying, with an exponential
+				// backoff.
+				case <-time.After(backoff):
+					backoff = 2 * backoff
+					if backoff > maxCommitPointPollTimeout {
+						backoff = maxCommitPointPollTimeout
+					}
+
+				case <-c.quit:
+					return
+				}
 			}
 
 			log.Infof("Recovered commit point(%x) for "+