routing: extract mission control state

Preparation for unit testing just the state logic.
This commit is contained in:
Joost Jager 2020-03-24 16:26:48 +01:00
parent 4726c674db
commit 6a36ed44f8
No known key found for this signature in database
GPG Key ID: A61B9D4C393C59C7
2 changed files with 203 additions and 157 deletions

View File

@ -65,15 +65,9 @@ type NodeResults map[route.Vertex]TimedPairResult
// since the last failure is used to estimate a success probability that is fed
// into the path finding process for subsequent payment attempts.
type MissionControl struct {
// lastPairResult tracks the last payment result (on a pair basis) for
// each transited node. This is a multi-layer map that allows us to look
// up the failure history of all connected channels (node pairs) for a
// particular node.
lastPairResult map[route.Vertex]NodeResults
// lastSecondChance tracks the last time a second chance was granted for
// a directed node pair.
lastSecondChance map[DirectedNodePair]time.Time
// state is the internal mission control state that is input for
// probability estimation.
state *missionControlState
// now is expected to return the current time. It is supplied as an
// external function to enable deterministic unit tests.
@ -194,12 +188,11 @@ func NewMissionControl(db kvdb.Backend, cfg *MissionControlConfig) (
}
mc := &MissionControl{
lastPairResult: make(map[route.Vertex]NodeResults),
lastSecondChance: make(map[DirectedNodePair]time.Time),
now: time.Now,
cfg: cfg,
store: store,
estimator: estimator,
state: newMissionControlState(),
now: time.Now,
cfg: cfg,
store: store,
estimator: estimator,
}
if err := mc.init(); err != nil {
@ -240,8 +233,7 @@ func (m *MissionControl) ResetHistory() error {
return err
}
m.lastPairResult = make(map[route.Vertex]NodeResults)
m.lastSecondChance = make(map[DirectedNodePair]time.Time)
m.state.resetHistory()
log.Debugf("Mission control history cleared")
@ -257,7 +249,7 @@ func (m *MissionControl) GetProbability(fromNode, toNode route.Vertex,
defer m.Unlock()
now := m.now()
results := m.lastPairResult[fromNode]
results, _ := m.state.getLastPairResult(fromNode)
// Use a distinct probability estimation function for local channels.
if fromNode == m.cfg.SelfNode {
@ -267,148 +259,15 @@ func (m *MissionControl) GetProbability(fromNode, toNode route.Vertex,
return m.estimator.getPairProbability(now, results, toNode, amt)
}
// setLastPairResult stores a result for a node pair.
func (m *MissionControl) setLastPairResult(fromNode, toNode route.Vertex,
timestamp time.Time, result *pairResult) {
nodePairs, ok := m.lastPairResult[fromNode]
if !ok {
nodePairs = make(NodeResults)
m.lastPairResult[fromNode] = nodePairs
}
current := nodePairs[toNode]
// Apply the new result to the existing data for this pair. If there is
// no existing data, apply it to the default values for TimedPairResult.
if result.success {
successAmt := result.amt
current.SuccessTime = timestamp
// Only update the success amount if this amount is higher. This
// prevents the success range from shrinking when there is no
// reason to do so. For example: small amount probes shouldn't
// affect a previous success for a much larger amount.
if successAmt > current.SuccessAmt {
current.SuccessAmt = successAmt
}
// If the success amount goes into the failure range, move the
// failure range up. Future attempts up to the success amount
// are likely to succeed. We don't want to clear the failure
// completely, because we haven't learnt much for amounts above
// the current success amount.
if !current.FailTime.IsZero() && successAmt >= current.FailAmt {
current.FailAmt = successAmt + 1
}
} else {
// For failures we always want to update both the amount and the
// time. Those need to relate to the same result, because the
// time is used to gradually diminish the penality for that
// specific result. Updating the timestamp but not the amount
// could cause a failure for a lower amount (a more severe
// condition) to be revived as if it just happened.
failAmt := result.amt
current.FailTime = timestamp
current.FailAmt = failAmt
switch {
// The failure amount is set to zero when the failure is
// amount-independent, meaning that the attempt would have
// failed regardless of the amount. This should also reset the
// success amount to zero.
case failAmt == 0:
current.SuccessAmt = 0
// If the failure range goes into the success range, move the
// success range down.
case failAmt <= current.SuccessAmt:
current.SuccessAmt = failAmt - 1
}
}
log.Debugf("Setting %v->%v range to [%v-%v]",
fromNode, toNode, current.SuccessAmt, current.FailAmt)
nodePairs[toNode] = current
}
// setAllFail stores a fail result for all known connections to and from the
// given node.
func (m *MissionControl) setAllFail(node route.Vertex,
timestamp time.Time) {
for fromNode, nodePairs := range m.lastPairResult {
for toNode := range nodePairs {
if fromNode == node || toNode == node {
nodePairs[toNode] = TimedPairResult{
FailTime: timestamp,
}
}
}
}
}
// requestSecondChance checks whether the node fromNode can have a second chance
// at providing a channel update for its channel with toNode.
func (m *MissionControl) requestSecondChance(timestamp time.Time,
fromNode, toNode route.Vertex) bool {
// Look up previous second chance time.
pair := DirectedNodePair{
From: fromNode,
To: toNode,
}
lastSecondChance, ok := m.lastSecondChance[pair]
// If the channel hasn't already be given a second chance or its last
// second chance was long ago, we give it another chance.
if !ok || timestamp.Sub(lastSecondChance) > minSecondChanceInterval {
m.lastSecondChance[pair] = timestamp
log.Debugf("Second chance granted for %v->%v", fromNode, toNode)
return true
}
// Otherwise penalize the channel, because we don't allow channel
// updates that are that frequent. This is to prevent nodes from keeping
// us busy by continuously sending new channel updates.
log.Debugf("Second chance denied for %v->%v, remaining interval: %v",
fromNode, toNode, timestamp.Sub(lastSecondChance))
return false
}
// GetHistorySnapshot takes a snapshot from the current mission control state
// and actual probability estimates.
func (m *MissionControl) GetHistorySnapshot() *MissionControlSnapshot {
m.Lock()
defer m.Unlock()
log.Debugf("Requesting history snapshot from mission control: "+
"pair_result_count=%v", len(m.lastPairResult))
log.Debugf("Requesting history snapshot from mission control")
pairs := make([]MissionControlPairSnapshot, 0, len(m.lastPairResult))
for fromNode, fromPairs := range m.lastPairResult {
for toNode, result := range fromPairs {
pair := NewDirectedNodePair(fromNode, toNode)
pairSnapshot := MissionControlPairSnapshot{
Pair: pair,
TimedPairResult: result,
}
pairs = append(pairs, pairSnapshot)
}
}
snapshot := MissionControlSnapshot{
Pairs: pairs,
}
return &snapshot
return m.state.getSnapshot()
}
// GetPairHistorySnapshot returns the stored history for a given node pair.
@ -418,7 +277,7 @@ func (m *MissionControl) GetPairHistorySnapshot(
m.Lock()
defer m.Unlock()
results, ok := m.lastPairResult[fromNode]
results, ok := m.state.getLastPairResult(fromNode)
if !ok {
return TimedPairResult{}
}
@ -507,7 +366,7 @@ func (m *MissionControl) applyPaymentResult(
defer m.Unlock()
if i.policyFailure != nil {
if m.requestSecondChance(
if m.state.requestSecondChance(
result.timeReply,
i.policyFailure.From, i.policyFailure.To,
) {
@ -536,7 +395,7 @@ func (m *MissionControl) applyPaymentResult(
log.Debugf("Reporting node failure to Mission Control: "+
"node=%v", *i.nodeFailure)
m.setAllFail(*i.nodeFailure, result.timeReply)
m.state.setAllFail(*i.nodeFailure, result.timeReply)
}
for pair, pairResult := range i.pairResults {
@ -551,7 +410,7 @@ func (m *MissionControl) applyPaymentResult(
pair, pairResult.amt)
}
m.setLastPairResult(
m.state.setLastPairResult(
pair.From, pair.To, result.timeReply, &pairResult,
)
}

View File

@ -0,0 +1,187 @@
package routing
import (
"time"
"github.com/lightningnetwork/lnd/routing/route"
)
// missionControlState is an object that manages the internal mission control
// state. Note that it isn't thread safe and synchronization needs to be
// enforced externally.
type missionControlState struct {
// lastPairResult tracks the last payment result (on a pair basis) for
// each transited node. This is a multi-layer map that allows us to look
// up the failure history of all connected channels (node pairs) for a
// particular node.
lastPairResult map[route.Vertex]NodeResults
// lastSecondChance tracks the last time a second chance was granted for
// a directed node pair.
lastSecondChance map[DirectedNodePair]time.Time
}
// newMissionControlState instantiates a new mission control state object.
func newMissionControlState() *missionControlState {
return &missionControlState{
lastPairResult: make(map[route.Vertex]NodeResults),
lastSecondChance: make(map[DirectedNodePair]time.Time),
}
}
// getLastPairResult returns the current state for connections to the given node.
func (m *missionControlState) getLastPairResult(node route.Vertex) (NodeResults,
bool) {
result, ok := m.lastPairResult[node]
return result, ok
}
// ResetHistory resets the history of MissionControl returning it to a state as
// if no payment attempts have been made.
func (m *missionControlState) resetHistory() {
m.lastPairResult = make(map[route.Vertex]NodeResults)
m.lastSecondChance = make(map[DirectedNodePair]time.Time)
}
// setLastPairResult stores a result for a node pair.
func (m *missionControlState) setLastPairResult(fromNode, toNode route.Vertex,
timestamp time.Time, result *pairResult) {
nodePairs, ok := m.lastPairResult[fromNode]
if !ok {
nodePairs = make(NodeResults)
m.lastPairResult[fromNode] = nodePairs
}
current := nodePairs[toNode]
// Apply the new result to the existing data for this pair. If there is
// no existing data, apply it to the default values for TimedPairResult.
if result.success {
successAmt := result.amt
current.SuccessTime = timestamp
// Only update the success amount if this amount is higher. This
// prevents the success range from shrinking when there is no
// reason to do so. For example: small amount probes shouldn't
// affect a previous success for a much larger amount.
if successAmt > current.SuccessAmt {
current.SuccessAmt = successAmt
}
// If the success amount goes into the failure range, move the
// failure range up. Future attempts up to the success amount
// are likely to succeed. We don't want to clear the failure
// completely, because we haven't learnt much for amounts above
// the current success amount.
if !current.FailTime.IsZero() && successAmt >= current.FailAmt {
current.FailAmt = successAmt + 1
}
} else {
// For failures we always want to update both the amount and the
// time. Those need to relate to the same result, because the
// time is used to gradually diminish the penality for that
// specific result. Updating the timestamp but not the amount
// could cause a failure for a lower amount (a more severe
// condition) to be revived as if it just happened.
failAmt := result.amt
current.FailTime = timestamp
current.FailAmt = failAmt
switch {
// The failure amount is set to zero when the failure is
// amount-independent, meaning that the attempt would have
// failed regardless of the amount. This should also reset the
// success amount to zero.
case failAmt == 0:
current.SuccessAmt = 0
// If the failure range goes into the success range, move the
// success range down.
case failAmt <= current.SuccessAmt:
current.SuccessAmt = failAmt - 1
}
}
log.Debugf("Setting %v->%v range to [%v-%v]",
fromNode, toNode, current.SuccessAmt, current.FailAmt)
nodePairs[toNode] = current
}
// setAllFail stores a fail result for all known connections to and from the
// given node.
func (m *missionControlState) setAllFail(node route.Vertex,
timestamp time.Time) {
for fromNode, nodePairs := range m.lastPairResult {
for toNode := range nodePairs {
if fromNode == node || toNode == node {
nodePairs[toNode] = TimedPairResult{
FailTime: timestamp,
}
}
}
}
}
// requestSecondChance checks whether the node fromNode can have a second chance
// at providing a channel update for its channel with toNode.
func (m *missionControlState) requestSecondChance(timestamp time.Time,
fromNode, toNode route.Vertex) bool {
// Look up previous second chance time.
pair := DirectedNodePair{
From: fromNode,
To: toNode,
}
lastSecondChance, ok := m.lastSecondChance[pair]
// If the channel hasn't already be given a second chance or its last
// second chance was long ago, we give it another chance.
if !ok || timestamp.Sub(lastSecondChance) > minSecondChanceInterval {
m.lastSecondChance[pair] = timestamp
log.Debugf("Second chance granted for %v->%v", fromNode, toNode)
return true
}
// Otherwise penalize the channel, because we don't allow channel
// updates that are that frequent. This is to prevent nodes from keeping
// us busy by continuously sending new channel updates.
log.Debugf("Second chance denied for %v->%v, remaining interval: %v",
fromNode, toNode, timestamp.Sub(lastSecondChance))
return false
}
// GetHistorySnapshot takes a snapshot from the current mission control state
// and actual probability estimates.
func (m *missionControlState) getSnapshot() *MissionControlSnapshot {
log.Debugf("Requesting history snapshot from mission control: "+
"pair_result_count=%v", len(m.lastPairResult))
pairs := make([]MissionControlPairSnapshot, 0, len(m.lastPairResult))
for fromNode, fromPairs := range m.lastPairResult {
for toNode, result := range fromPairs {
pair := NewDirectedNodePair(fromNode, toNode)
pairSnapshot := MissionControlPairSnapshot{
Pair: pair,
TimedPairResult: result,
}
pairs = append(pairs, pairSnapshot)
}
}
snapshot := MissionControlSnapshot{
Pairs: pairs,
}
return &snapshot
}