peer: add symmetric write idle timeout

In this commit, we add a 5 minute idle timer to
the write handler. After catching the write
timeouts, it's been observed that some connections
have trouble reading a message for several hours.
This typically points to a deeper issue w/ the peer
or, e.g. the remote peer switched networks. This now
mirrors the idle timeout used in the read handler,
such that we will disconnect a peer if we are unable
to send or receive a message from the peer after 5
minutes.

We also modify the readHandler to drain its
idleTimer's channel in the even that the timer had
already fired, but we successfully sent the message.
This commit is contained in:
Conner Fromknecht 2019-03-26 16:40:41 -07:00
parent b78e5f6742
commit 93e56f9ee8
No known key found for this signature in database
GPG Key ID: E7D737B67FA592C7

25
peer.go

@ -1005,7 +1005,12 @@ func (p *peer) readHandler() {
out:
for atomic.LoadInt32(&p.disconnect) == 0 {
nextMsg, err := p.readNextMessage()
idleTimer.Stop()
if !idleTimer.Stop() {
select {
case <-idleTimer.C:
default:
}
}
if err != nil {
peerLog.Infof("unable to read message from %v: %v",
p, err)
@ -1427,6 +1432,14 @@ func (p *peer) writeMessage(msg lnwire.Message) error {
//
// NOTE: This method MUST be run as a goroutine.
func (p *peer) writeHandler() {
// We'll stop the timer after a new messages is sent, and also reset it
// after we process the next message.
idleTimer := time.AfterFunc(idleTimeout, func() {
err := fmt.Errorf("Peer %s no write for %s -- disconnecting",
p, idleTimeout)
p.Disconnect(err)
})
var exitErr error
const (
@ -1501,6 +1514,16 @@ out:
goto retryWithDelay
}
// The write succeeded, reset the idle timer to prevent
// us from disconnecting the peer.
if !idleTimer.Stop() {
select {
case <-idleTimer.C:
default:
}
}
idleTimer.Reset(idleTimeout)
// If the peer requested a synchronous write, respond
// with the error.
if outMsg.errChan != nil {