Merge pull request #4933 from halseth/router-delay-zombie-pruning

routing: delay initial zombie prune by 30 sec
This commit is contained in:
Olaoluwa Osuntokun 2021-03-31 13:22:36 -07:00 committed by GitHub
commit e7400dfb2b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 50 additions and 18 deletions

@ -43,6 +43,12 @@ const (
// if a channel should be pruned or not. // if a channel should be pruned or not.
DefaultChannelPruneExpiry = time.Duration(time.Hour * 24 * 14) DefaultChannelPruneExpiry = time.Duration(time.Hour * 24 * 14)
// DefaultFirstTimePruneDelay is the time we'll wait after startup
// before attempting to prune the graph for zombie channels. We don't
// do it immediately after startup to allow lnd to start up without
// getting blocked by this job.
DefaultFirstTimePruneDelay = 30 * time.Second
// defaultStatInterval governs how often the router will log non-empty // defaultStatInterval governs how often the router will log non-empty
// stats related to processing new channels, updates, or node // stats related to processing new channels, updates, or node
// announcements. // announcements.
@ -306,6 +312,12 @@ type Config struct {
// should examine the channel graph to garbage collect zombie channels. // should examine the channel graph to garbage collect zombie channels.
GraphPruneInterval time.Duration GraphPruneInterval time.Duration
// FirstTimePruneDelay is the time we'll wait after startup before
// attempting to prune the graph for zombie channels. We don't do it
// immediately after startup to allow lnd to start up without getting
// blocked by this job.
FirstTimePruneDelay time.Duration
// QueryBandwidth is a method that allows the router to query the lower // QueryBandwidth is a method that allows the router to query the lower
// link layer to determine the up to date available bandwidth at a // link layer to determine the up to date available bandwidth at a
// prospective link to be traversed. If the link isn't available, then // prospective link to be traversed. If the link isn't available, then
@ -485,11 +497,21 @@ func (r *ChannelRouter) Start() error {
// If AssumeChannelValid is present, then we won't rely on pruning // If AssumeChannelValid is present, then we won't rely on pruning
// channels from the graph based on their spentness, but whether they // channels from the graph based on their spentness, but whether they
// are considered zombies or not. // are considered zombies or not. We will start zombie pruning after a
// small delay, to avoid slowing down startup of lnd.
if r.cfg.AssumeChannelValid { if r.cfg.AssumeChannelValid {
if err := r.pruneZombieChans(); err != nil { time.AfterFunc(r.cfg.FirstTimePruneDelay, func() {
return err select {
} case <-r.quit:
return
default:
}
log.Info("Initial zombie prune starting")
if err := r.pruneZombieChans(); err != nil {
log.Errorf("Unable to prune zombies: %v", err)
}
})
} else { } else {
// Otherwise, we'll use our filtered chain view to prune // Otherwise, we'll use our filtered chain view to prune
// channels as soon as they are detected as spent on-chain. // channels as soon as they are detected as spent on-chain.
@ -875,6 +897,9 @@ func (r *ChannelRouter) pruneZombieChans() error {
} }
log.Infof("Pruning %v zombie channels", len(chansToPrune)) log.Infof("Pruning %v zombie channels", len(chansToPrune))
if len(chansToPrune) == 0 {
return nil
}
// With the set of zombie-like channels obtained, we'll do another pass // With the set of zombie-like channels obtained, we'll do another pass
// to delete them from the channel graph. // to delete them from the channel graph.

@ -1541,6 +1541,9 @@ func TestWakeUpOnStaleBranch(t *testing.T) {
Control: makeMockControlTower(), Control: makeMockControlTower(),
ChannelPruneExpiry: time.Hour * 24, ChannelPruneExpiry: time.Hour * 24,
GraphPruneInterval: time.Hour * 2, GraphPruneInterval: time.Hour * 2,
// We'll set the delay to zero to prune immediately.
FirstTimePruneDelay: 0,
}) })
if err != nil { if err != nil {
t.Fatalf("unable to create router %v", err) t.Fatalf("unable to create router %v", err)
@ -2157,6 +2160,9 @@ func testPruneChannelGraphDoubleDisabled(t *testing.T, assumeValid bool) {
if !assumeValid { if !assumeValid {
assertChannelsPruned(t, ctx.graph, testChannels) assertChannelsPruned(t, ctx.graph, testChannels)
} else { } else {
// Sleep to allow the pruning to finish.
time.Sleep(200 * time.Millisecond)
prunedChannel := testChannels[len(testChannels)-1].ChannelID prunedChannel := testChannels[len(testChannels)-1].ChannelID
assertChannelsPruned(t, ctx.graph, testChannels, prunedChannel) assertChannelsPruned(t, ctx.graph, testChannels, prunedChannel)
} }

@ -769,20 +769,21 @@ func newServer(cfg *Config, listenAddrs []net.Addr,
s.controlTower = routing.NewControlTower(paymentControl) s.controlTower = routing.NewControlTower(paymentControl)
s.chanRouter, err = routing.New(routing.Config{ s.chanRouter, err = routing.New(routing.Config{
Graph: chanGraph, Graph: chanGraph,
Chain: cc.ChainIO, Chain: cc.ChainIO,
ChainView: cc.ChainView, ChainView: cc.ChainView,
Payer: s.htlcSwitch, Payer: s.htlcSwitch,
Control: s.controlTower, Control: s.controlTower,
MissionControl: s.missionControl, MissionControl: s.missionControl,
SessionSource: paymentSessionSource, SessionSource: paymentSessionSource,
ChannelPruneExpiry: routing.DefaultChannelPruneExpiry, ChannelPruneExpiry: routing.DefaultChannelPruneExpiry,
GraphPruneInterval: time.Duration(time.Hour), GraphPruneInterval: time.Hour,
QueryBandwidth: queryBandwidth, FirstTimePruneDelay: routing.DefaultFirstTimePruneDelay,
AssumeChannelValid: cfg.Routing.AssumeChannelValid, QueryBandwidth: queryBandwidth,
NextPaymentID: sequencer.NextID, AssumeChannelValid: cfg.Routing.AssumeChannelValid,
PathFindingConfig: pathFindingConfig, NextPaymentID: sequencer.NextID,
Clock: clock.NewDefaultClock(), PathFindingConfig: pathFindingConfig,
Clock: clock.NewDefaultClock(),
}) })
if err != nil { if err != nil {
return nil, fmt.Errorf("can't create router: %v", err) return nil, fmt.Errorf("can't create router: %v", err)