Merge pull request #4933 from halseth/router-delay-zombie-pruning

routing: delay initial zombie prune by 30 sec
This commit is contained in:
Olaoluwa Osuntokun 2021-03-31 13:22:36 -07:00 committed by GitHub
commit e7400dfb2b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 50 additions and 18 deletions

@ -43,6 +43,12 @@ const (
// if a channel should be pruned or not.
DefaultChannelPruneExpiry = time.Duration(time.Hour * 24 * 14)
// DefaultFirstTimePruneDelay is the time we'll wait after startup
// before attempting to prune the graph for zombie channels. We don't
// do it immediately after startup to allow lnd to start up without
// getting blocked by this job.
DefaultFirstTimePruneDelay = 30 * time.Second
// defaultStatInterval governs how often the router will log non-empty
// stats related to processing new channels, updates, or node
// announcements.
@ -306,6 +312,12 @@ type Config struct {
// should examine the channel graph to garbage collect zombie channels.
GraphPruneInterval time.Duration
// FirstTimePruneDelay is the time we'll wait after startup before
// attempting to prune the graph for zombie channels. We don't do it
// immediately after startup to allow lnd to start up without getting
// blocked by this job.
FirstTimePruneDelay time.Duration
// QueryBandwidth is a method that allows the router to query the lower
// link layer to determine the up to date available bandwidth at a
// prospective link to be traversed. If the link isn't available, then
@ -485,11 +497,21 @@ func (r *ChannelRouter) Start() error {
// If AssumeChannelValid is present, then we won't rely on pruning
// channels from the graph based on their spentness, but whether they
// are considered zombies or not.
// are considered zombies or not. We will start zombie pruning after a
// small delay, to avoid slowing down startup of lnd.
if r.cfg.AssumeChannelValid {
if err := r.pruneZombieChans(); err != nil {
return err
time.AfterFunc(r.cfg.FirstTimePruneDelay, func() {
select {
case <-r.quit:
return
default:
}
log.Info("Initial zombie prune starting")
if err := r.pruneZombieChans(); err != nil {
log.Errorf("Unable to prune zombies: %v", err)
}
})
} else {
// Otherwise, we'll use our filtered chain view to prune
// channels as soon as they are detected as spent on-chain.
@ -875,6 +897,9 @@ func (r *ChannelRouter) pruneZombieChans() error {
}
log.Infof("Pruning %v zombie channels", len(chansToPrune))
if len(chansToPrune) == 0 {
return nil
}
// With the set of zombie-like channels obtained, we'll do another pass
// to delete them from the channel graph.

@ -1541,6 +1541,9 @@ func TestWakeUpOnStaleBranch(t *testing.T) {
Control: makeMockControlTower(),
ChannelPruneExpiry: time.Hour * 24,
GraphPruneInterval: time.Hour * 2,
// We'll set the delay to zero to prune immediately.
FirstTimePruneDelay: 0,
})
if err != nil {
t.Fatalf("unable to create router %v", err)
@ -2157,6 +2160,9 @@ func testPruneChannelGraphDoubleDisabled(t *testing.T, assumeValid bool) {
if !assumeValid {
assertChannelsPruned(t, ctx.graph, testChannels)
} else {
// Sleep to allow the pruning to finish.
time.Sleep(200 * time.Millisecond)
prunedChannel := testChannels[len(testChannels)-1].ChannelID
assertChannelsPruned(t, ctx.graph, testChannels, prunedChannel)
}

@ -777,7 +777,8 @@ func newServer(cfg *Config, listenAddrs []net.Addr,
MissionControl: s.missionControl,
SessionSource: paymentSessionSource,
ChannelPruneExpiry: routing.DefaultChannelPruneExpiry,
GraphPruneInterval: time.Duration(time.Hour),
GraphPruneInterval: time.Hour,
FirstTimePruneDelay: routing.DefaultFirstTimePruneDelay,
QueryBandwidth: queryBandwidth,
AssumeChannelValid: cfg.Routing.AssumeChannelValid,
NextPaymentID: sequencer.NextID,