routing: stricter mission control state failure updates

This commit puts a mechanism in place to prevent a failure for a low
amount from being overwritten very soon after by a higher amount
failure.
This commit is contained in:
Joost Jager 2020-03-23 19:59:16 +01:00
parent 6a36ed44f8
commit 1a6b28553a
No known key found for this signature in database
GPG Key ID: A61B9D4C393C59C7
4 changed files with 91 additions and 9 deletions

@ -50,6 +50,11 @@ const (
// DefaultAprioriWeight is the default a priori weight. See
// MissionControlConfig for further explanation.
DefaultAprioriWeight = 0.5
// DefaultMinFailureRelaxInterval is the default minimum time that must
// have passed since the previously recorded failure before the failure
// amount may be raised.
DefaultMinFailureRelaxInterval = time.Minute
)
// NodeResults contains previous results from a node to its peers.
@ -113,6 +118,11 @@ type MissionControlConfig struct {
// results, unless there are none available.
AprioriWeight float64
// MinFailureRelaxInterval is the minimum time that must have passed
// since the previously recorded failure before the failure amount may
// be raised.
MinFailureRelaxInterval time.Duration
// SelfNode is our own pubkey.
SelfNode route.Vertex
}
@ -188,7 +198,7 @@ func NewMissionControl(db kvdb.Backend, cfg *MissionControlConfig) (
}
mc := &MissionControl{
state: newMissionControlState(),
state: newMissionControlState(cfg.MinFailureRelaxInterval),
now: time.Now,
cfg: cfg,
store: store,

@ -19,13 +19,21 @@ type missionControlState struct {
// lastSecondChance tracks the last time a second chance was granted for
// a directed node pair.
lastSecondChance map[DirectedNodePair]time.Time
// minFailureRelaxInterval is the minimum time that must have passed
// since the previously recorded failure before the failure amount may
// be raised.
minFailureRelaxInterval time.Duration
}
// newMissionControlState instantiates a new mission control state object.
func newMissionControlState() *missionControlState {
func newMissionControlState(
minFailureRelaxInterval time.Duration) *missionControlState {
return &missionControlState{
lastPairResult: make(map[route.Vertex]NodeResults),
lastSecondChance: make(map[DirectedNodePair]time.Time),
lastPairResult: make(map[route.Vertex]NodeResults),
lastSecondChance: make(map[DirectedNodePair]time.Time),
minFailureRelaxInterval: minFailureRelaxInterval,
}
}
@ -87,6 +95,22 @@ func (m *missionControlState) setLastPairResult(fromNode, toNode route.Vertex,
// condition) to be revived as if it just happened.
failAmt := result.amt
// Drop result if it would increase the failure amount too soon
// after a previous failure. This can happen if htlc results
// come in out of order. This check makes it easier for payment
// processes to converge to a final state.
failInterval := timestamp.Sub(current.FailTime)
if failAmt > current.FailAmt &&
failInterval < m.minFailureRelaxInterval {
log.Debugf("Ignoring higher amount failure within min "+
"failure relaxation interval: prev_fail_amt=%v, "+
"fail_amt=%v, interval=%v",
current.FailAmt, failAmt, failInterval)
return
}
current.FailTime = timestamp
current.FailAmt = failAmt

@ -0,0 +1,47 @@
package routing
import (
"testing"
"time"
"github.com/lightningnetwork/lnd/routing/route"
)
// TestMissionControlStateFailureResult tests setting failure results on the
// mission control state.
func TestMissionControlStateFailureResult(t *testing.T) {
const minFailureRelaxInterval = time.Minute
state := newMissionControlState(minFailureRelaxInterval)
var (
from = route.Vertex{1}
to = route.Vertex{2}
timestamp = testTime
)
// Report a 1000 sat failure.
state.setLastPairResult(from, to, timestamp, &pairResult{amt: 1000})
result, _ := state.getLastPairResult(from)
if result[to].FailAmt != 1000 {
t.Fatalf("unexpected fail amount %v", result[to].FailAmt)
}
// Report an 1100 sat failure one hour later. It is expected to
// overwrite the previous failure.
timestamp = timestamp.Add(time.Hour)
state.setLastPairResult(from, to, timestamp, &pairResult{amt: 1100})
result, _ = state.getLastPairResult(from)
if result[to].FailAmt != 1100 {
t.Fatalf("unexpected fail amount %v", result[to].FailAmt)
}
// Report a 1200 sat failure one second later. Because this increase of
// the failure amount is too soon after the previous failure, the result
// is not applied.
timestamp = timestamp.Add(time.Second)
state.setLastPairResult(from, to, timestamp, &pairResult{amt: 1200})
result, _ = state.getLastPairResult(from)
if result[to].FailAmt != 1100 {
t.Fatalf("unexpected fail amount %v", result[to].FailAmt)
}
}

@ -710,11 +710,12 @@ func newServer(listenAddrs []net.Addr, chanDB *channeldb.DB,
s.missionControl, err = routing.NewMissionControl(
chanDB,
&routing.MissionControlConfig{
AprioriHopProbability: routingConfig.AprioriHopProbability,
PenaltyHalfLife: routingConfig.PenaltyHalfLife,
MaxMcHistory: routingConfig.MaxMcHistory,
AprioriWeight: routingConfig.AprioriWeight,
SelfNode: selfNode.PubKeyBytes,
AprioriHopProbability: routingConfig.AprioriHopProbability,
PenaltyHalfLife: routingConfig.PenaltyHalfLife,
MaxMcHistory: routingConfig.MaxMcHistory,
AprioriWeight: routingConfig.AprioriWeight,
SelfNode: selfNode.PubKeyBytes,
MinFailureRelaxInterval: routing.DefaultMinFailureRelaxInterval,
},
)
if err != nil {