Merge pull request #2885 from cfromknecht/stagger-initial-reconnect

server: stagger initial reconnects
This commit is contained in:
Conner Fromknecht 2019-04-05 15:46:12 -07:00 committed by GitHub
commit 25d2b1b537
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 46 additions and 1 deletions

@ -256,6 +256,8 @@ type config struct {
RejectPush bool `long:"rejectpush" description:"If true, lnd will not accept channel opening requests with non-zero push amounts. This should prevent accidental pushes to merchant nodes."` RejectPush bool `long:"rejectpush" description:"If true, lnd will not accept channel opening requests with non-zero push amounts. This should prevent accidental pushes to merchant nodes."`
StaggerInitialReconnect bool `long:"stagger-initial-reconnect" description:"If true, will apply a randomized staggering between 0s and 30s when reconnecting to persistent peers on startup. The first 10 reconnections will be attempted instantly, regardless of the flag's value"`
net tor.Net net tor.Net
Routing *routing.Conf `group:"routing" namespace:"routing"` Routing *routing.Conf `group:"routing" namespace:"routing"`

@ -8,6 +8,7 @@ import (
"fmt" "fmt"
"image/color" "image/color"
"math/big" "math/big"
prand "math/rand"
"net" "net"
"path/filepath" "path/filepath"
"regexp" "regexp"
@ -60,6 +61,18 @@ const (
// durations exceeding this value will be eligible to have their // durations exceeding this value will be eligible to have their
// backoffs reduced. // backoffs reduced.
defaultStableConnDuration = 10 * time.Minute defaultStableConnDuration = 10 * time.Minute
// numInstantInitReconnect specifies how many persistent peers we should
// always attempt outbound connections to immediately. After this value
// is surpassed, the remaining peers will be randomly delayed using
// maxInitReconnectDelay.
numInstantInitReconnect = 10
// maxInitReconnectDelay specifies the maximum delay in seconds we will
// apply in attempting to reconnect to persistent peers on startup. The
// value used or a particular peer will be chosen between 0s and this
// value.
maxInitReconnectDelay = 30
) )
var ( var (
@ -1932,6 +1945,7 @@ func (s *server) establishPersistentConnections() error {
// Iterate through the combined list of addresses from prior links and // Iterate through the combined list of addresses from prior links and
// node announcements and attempt to reconnect to each node. // node announcements and attempt to reconnect to each node.
var numOutboundConns int
for pubStr, nodeAddr := range nodeAddrsMap { for pubStr, nodeAddr := range nodeAddrsMap {
// Add this peer to the set of peers we should maintain a // Add this peer to the set of peers we should maintain a
// persistent connection with. // persistent connection with.
@ -1962,13 +1976,42 @@ func (s *server) establishPersistentConnections() error {
s.persistentConnReqs[pubStr] = append( s.persistentConnReqs[pubStr] = append(
s.persistentConnReqs[pubStr], connReq) s.persistentConnReqs[pubStr], connReq)
go s.connMgr.Connect(connReq) // We'll connect to the first 10 peers immediately, then
// randomly stagger any remaining connections if the
// stagger initial reconnect flag is set. This ensures
// that mobile nodes or nodes with a small number of
// channels obtain connectivity quickly, but larger
// nodes are able to disperse the costs of connecting to
// all peers at once.
if numOutboundConns < numInstantInitReconnect ||
!cfg.StaggerInitialReconnect {
go s.connMgr.Connect(connReq)
} else {
go s.delayInitialReconnect(connReq)
}
} }
numOutboundConns++
} }
return nil return nil
} }
// delayInitialReconnect will attempt a reconnection using the passed connreq
// after sampling a value for the delay between 0s and the
// maxInitReconnectDelay.
//
// NOTE: This method MUST be run as a goroutine.
func (s *server) delayInitialReconnect(connReq *connmgr.ConnReq) {
delay := time.Duration(prand.Intn(maxInitReconnectDelay)) * time.Second
select {
case <-time.After(delay):
s.connMgr.Connect(connReq)
case <-s.quit:
}
}
// prunePersistentPeerConnection removes all internal state related to // prunePersistentPeerConnection removes all internal state related to
// persistent connections to a peer within the server. This is used to avoid // persistent connections to a peer within the server. This is used to avoid
// persistent connection retries to peers we do not have any open channels with. // persistent connection retries to peers we do not have any open channels with.