Merge pull request #1840 from cfromknecht/backoff-short-conns

server: always backoff for unstable peers
This commit is contained in:
Olaoluwa Osuntokun 2018-09-04 19:12:54 -07:00 committed by GitHub
commit 45c607c537
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -53,6 +53,12 @@ const (
// maximumBackoff is the largest backoff we will permit when // maximumBackoff is the largest backoff we will permit when
// reattempting connections to persistent peers. // reattempting connections to persistent peers.
maximumBackoff = time.Hour maximumBackoff = time.Hour
// defaultStableConnDuration is a floor under which all reconnection
// attempts will apply exponential randomized backoff. Connections
// durations exceeding this value will be eligible to have their
// backoffs reduced.
defaultStableConnDuration = 10 * time.Minute
) )
var ( var (
@ -1951,19 +1957,28 @@ func (s *server) nextPeerBackoff(pubStr string,
return computeNextBackoff(backoff) return computeNextBackoff(backoff)
} }
// The peer succeeded in starting. We'll reduce the timeout duration // The peer succeeded in starting. If the connection didn't last long
// by the length of the connection before applying randomized // enough to be considered stable, we'll continue to back off retries
// exponential backoff. We'll only apply this if: // with this peer.
// backoff - connDuration > defaultBackoff
connDuration := time.Now().Sub(startTime) connDuration := time.Now().Sub(startTime)
relaxedBackoff := backoff - connDuration if connDuration < defaultStableConnDuration {
if relaxedBackoff > defaultBackoff { return computeNextBackoff(backoff)
return computeNextBackoff(relaxedBackoff)
} }
// Otherwise, backoff - connDuration <= defaultBackoff, meaning the // The peer succeed in starting and this was stable peer, so we'll
// connection lasted much longer than our previous backoff. To reward // reduce the timeout duration by the length of the connection after
// such good behavior, we'll reconnect after the default timeout. // applying randomized exponential backoff. We'll only apply this in the
// case that:
// reb(curBackoff) - connDuration > defaultBackoff
relaxedBackoff := computeNextBackoff(backoff) - connDuration
if relaxedBackoff > defaultBackoff {
return relaxedBackoff
}
// Lastly, if reb(currBackoff) - connDuration <= defaultBackoff, meaning
// the stable connection lasted much longer than our previous backoff.
// To reward such good behavior, we'll reconnect after the default
// timeout.
return defaultBackoff return defaultBackoff
} }
@ -2451,7 +2466,7 @@ func (s *server) peerTerminationWatcher(p *peer, ready chan struct{}) {
links, err := p.server.htlcSwitch.GetLinksByInterface(p.pubKeyBytes) links, err := p.server.htlcSwitch.GetLinksByInterface(p.pubKeyBytes)
if err != nil && err != htlcswitch.ErrNoLinksFound { if err != nil && err != htlcswitch.ErrNoLinksFound {
srvrLog.Errorf("Unable to get channel links for %x: %v", srvrLog.Errorf("Unable to get channel links for %x: %v",
p.pubKeyBytes, err) p.PubKey(), err)
} }
for _, link := range links { for _, link := range links {