server: always backoff for unstable peers
This commit modifies the connection peer backoff logic such that it will always backoff for "unstable" peers. Unstable in this context is determined by connections whose duration is shorter than 10 minutes. If a disconnect happens with a peer whose connection lasts longer than 10 minutes, we will scale back our stored backoff for that peer. This resolves an issue that would result in a tight connection loop with remote peers. This stemmed from the connection duration being very short, and always driving the backoff to the default backoff of 1 second. Short connections like this are now caught by the stable connection threshold. This also modifies the computation on the backoff relaxation to subtract the connection duration after applying randomized exponential backoff, which offers better stability when the connection duration and backoff are roughly equal.
This commit is contained in:
parent
4f43c1c943
commit
8d7eb41d48
37
server.go
37
server.go
@ -53,6 +53,12 @@ const (
|
||||
// maximumBackoff is the largest backoff we will permit when
|
||||
// reattempting connections to persistent peers.
|
||||
maximumBackoff = time.Hour
|
||||
|
||||
// defaultStableConnDuration is a floor under which all reconnection
|
||||
// attempts will apply exponential randomized backoff. Connections
|
||||
// durations exceeding this value will be eligible to have their
|
||||
// backoffs reduced.
|
||||
defaultStableConnDuration = 10 * time.Minute
|
||||
)
|
||||
|
||||
var (
|
||||
@ -1951,19 +1957,28 @@ func (s *server) nextPeerBackoff(pubStr string,
|
||||
return computeNextBackoff(backoff)
|
||||
}
|
||||
|
||||
// The peer succeeded in starting. We'll reduce the timeout duration
|
||||
// by the length of the connection before applying randomized
|
||||
// exponential backoff. We'll only apply this if:
|
||||
// backoff - connDuration > defaultBackoff
|
||||
// The peer succeeded in starting. If the connection didn't last long
|
||||
// enough to be considered stable, we'll continue to back off retries
|
||||
// with this peer.
|
||||
connDuration := time.Now().Sub(startTime)
|
||||
relaxedBackoff := backoff - connDuration
|
||||
if relaxedBackoff > defaultBackoff {
|
||||
return computeNextBackoff(relaxedBackoff)
|
||||
if connDuration < defaultStableConnDuration {
|
||||
return computeNextBackoff(backoff)
|
||||
}
|
||||
|
||||
// Otherwise, backoff - connDuration <= defaultBackoff, meaning the
|
||||
// connection lasted much longer than our previous backoff. To reward
|
||||
// such good behavior, we'll reconnect after the default timeout.
|
||||
// The peer succeed in starting and this was stable peer, so we'll
|
||||
// reduce the timeout duration by the length of the connection after
|
||||
// applying randomized exponential backoff. We'll only apply this in the
|
||||
// case that:
|
||||
// reb(curBackoff) - connDuration > defaultBackoff
|
||||
relaxedBackoff := computeNextBackoff(backoff) - connDuration
|
||||
if relaxedBackoff > defaultBackoff {
|
||||
return relaxedBackoff
|
||||
}
|
||||
|
||||
// Lastly, if reb(currBackoff) - connDuration <= defaultBackoff, meaning
|
||||
// the stable connection lasted much longer than our previous backoff.
|
||||
// To reward such good behavior, we'll reconnect after the default
|
||||
// timeout.
|
||||
return defaultBackoff
|
||||
}
|
||||
|
||||
@ -2451,7 +2466,7 @@ func (s *server) peerTerminationWatcher(p *peer, ready chan struct{}) {
|
||||
links, err := p.server.htlcSwitch.GetLinksByInterface(p.pubKeyBytes)
|
||||
if err != nil && err != htlcswitch.ErrNoLinksFound {
|
||||
srvrLog.Errorf("Unable to get channel links for %x: %v",
|
||||
p.pubKeyBytes, err)
|
||||
p.PubKey(), err)
|
||||
}
|
||||
|
||||
for _, link := range links {
|
||||
|
Loading…
Reference in New Issue
Block a user