routing: delay initial zombie prune by 30 sec

Since zombie pruning can be very slow on some devices (e.g. mobile) it
would stall lnd startup. Since it is not essential for pruning to be
finished for lnd to be functional, we instead delay the initial prune by
30 seconds.

Note that we could also wait for the graphPruneInterval to tick, but
since this is by default 2 hours, it is unlikely that a mobile app will
ever be open that long.
This commit is contained in:
Johan T. Halseth 2021-03-11 13:36:54 +01:00
parent d85d82824c
commit a0f3624303
No known key found for this signature in database
GPG Key ID: 15BAADA29DA20D26
3 changed files with 47 additions and 18 deletions

@ -43,6 +43,12 @@ const (
// if a channel should be pruned or not.
DefaultChannelPruneExpiry = time.Duration(time.Hour * 24 * 14)
// DefaultFirstTimePruneDelay is the time we'll wait after startup
// before attempting to prune the graph for zombie channels. We don't
// do it immediately after startup to allow lnd to start up without
// getting blocked by this job.
DefaultFirstTimePruneDelay = 30 * time.Second
// defaultStatInterval governs how often the router will log non-empty
// stats related to processing new channels, updates, or node
// announcements.
@ -306,6 +312,12 @@ type Config struct {
// should examine the channel graph to garbage collect zombie channels.
GraphPruneInterval time.Duration
// FirstTimePruneDelay is the time we'll wait after startup before
// attempting to prune the graph for zombie channels. We don't do it
// immediately after startup to allow lnd to start up without getting
// blocked by this job.
FirstTimePruneDelay time.Duration
// QueryBandwidth is a method that allows the router to query the lower
// link layer to determine the up to date available bandwidth at a
// prospective link to be traversed. If the link isn't available, then
@ -485,11 +497,21 @@ func (r *ChannelRouter) Start() error {
// If AssumeChannelValid is present, then we won't rely on pruning
// channels from the graph based on their spentness, but whether they
// are considered zombies or not.
// are considered zombies or not. We will start zombie pruning after a
// small delay, to avoid slowing down startup of lnd.
if r.cfg.AssumeChannelValid {
if err := r.pruneZombieChans(); err != nil {
return err
time.AfterFunc(r.cfg.FirstTimePruneDelay, func() {
select {
case <-r.quit:
return
default:
}
log.Info("Initial zombie prune starting")
if err := r.pruneZombieChans(); err != nil {
log.Errorf("Unable to prune zombies: %v", err)
}
})
} else {
// Otherwise, we'll use our filtered chain view to prune
// channels as soon as they are detected as spent on-chain.

@ -1541,6 +1541,9 @@ func TestWakeUpOnStaleBranch(t *testing.T) {
Control: makeMockControlTower(),
ChannelPruneExpiry: time.Hour * 24,
GraphPruneInterval: time.Hour * 2,
// We'll set the delay to zero to prune immediately.
FirstTimePruneDelay: 0,
})
if err != nil {
t.Fatalf("unable to create router %v", err)
@ -2157,6 +2160,9 @@ func testPruneChannelGraphDoubleDisabled(t *testing.T, assumeValid bool) {
if !assumeValid {
assertChannelsPruned(t, ctx.graph, testChannels)
} else {
// Sleep to allow the pruning to finish.
time.Sleep(200 * time.Millisecond)
prunedChannel := testChannels[len(testChannels)-1].ChannelID
assertChannelsPruned(t, ctx.graph, testChannels, prunedChannel)
}

@ -777,7 +777,8 @@ func newServer(cfg *Config, listenAddrs []net.Addr,
MissionControl: s.missionControl,
SessionSource: paymentSessionSource,
ChannelPruneExpiry: routing.DefaultChannelPruneExpiry,
GraphPruneInterval: time.Duration(time.Hour),
GraphPruneInterval: time.Hour,
FirstTimePruneDelay: routing.DefaultFirstTimePruneDelay,
QueryBandwidth: queryBandwidth,
AssumeChannelValid: cfg.Routing.AssumeChannelValid,
NextPaymentID: sequencer.NextID,