2017-10-17 04:57:30 +03:00
|
|
|
package routing
|
|
|
|
|
|
|
|
import (
|
2019-03-19 19:09:27 +03:00
|
|
|
"math"
|
2017-10-17 04:57:30 +03:00
|
|
|
"sync"
|
|
|
|
"time"
|
2017-10-18 05:41:46 +03:00
|
|
|
|
2018-05-08 07:04:31 +03:00
|
|
|
"github.com/coreos/bbolt"
|
2017-10-18 05:41:46 +03:00
|
|
|
"github.com/lightningnetwork/lnd/channeldb"
|
2018-03-27 06:53:46 +03:00
|
|
|
"github.com/lightningnetwork/lnd/lnwire"
|
2019-04-05 18:36:11 +03:00
|
|
|
"github.com/lightningnetwork/lnd/routing/route"
|
2017-10-17 04:57:30 +03:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
2019-05-22 12:56:04 +03:00
|
|
|
// DefaultPenaltyHalfLife is the default half-life duration. The
|
2019-03-19 19:09:27 +03:00
|
|
|
// half-life duration defines after how much time a penalized node or
|
|
|
|
// channel is back at 50% probability.
|
2019-05-22 12:56:04 +03:00
|
|
|
DefaultPenaltyHalfLife = time.Hour
|
2017-10-17 04:57:30 +03:00
|
|
|
)
|
|
|
|
|
2019-05-23 21:05:30 +03:00
|
|
|
// MissionControl contains state which summarizes the past attempts of HTLC
|
2019-03-19 19:09:27 +03:00
|
|
|
// routing by external callers when sending payments throughout the network. It
|
|
|
|
// acts as a shared memory during routing attempts with the goal to optimize the
|
|
|
|
// payment attempt success rate.
|
|
|
|
//
|
|
|
|
// Failed payment attempts are reported to mission control. These reports are
|
|
|
|
// used to track the time of the last node or channel level failure. The time
|
|
|
|
// since the last failure is used to estimate a success probability that is fed
|
|
|
|
// into the path finding process for subsequent payment attempts.
|
2019-05-23 21:05:30 +03:00
|
|
|
type MissionControl struct {
|
2019-03-19 19:09:27 +03:00
|
|
|
history map[route.Vertex]*nodeHistory
|
2017-10-17 04:57:30 +03:00
|
|
|
|
2019-03-19 19:09:27 +03:00
|
|
|
// now is expected to return the current time. It is supplied as an
|
|
|
|
// external function to enable deterministic unit tests.
|
|
|
|
now func() time.Time
|
|
|
|
|
2019-05-22 12:56:04 +03:00
|
|
|
cfg *MissionControlConfig
|
2019-03-19 19:09:27 +03:00
|
|
|
|
2017-10-17 04:57:30 +03:00
|
|
|
sync.Mutex
|
|
|
|
|
|
|
|
// TODO(roasbeef): further counters, if vertex continually unavailable,
|
|
|
|
// add to another generation
|
|
|
|
|
|
|
|
// TODO(roasbeef): also add favorable metrics for nodes
|
|
|
|
}
|
|
|
|
|
2019-05-22 12:56:04 +03:00
|
|
|
// MissionControlConfig defines parameters that control mission control
|
|
|
|
// behaviour.
|
|
|
|
type MissionControlConfig struct {
|
|
|
|
// PenaltyHalfLife defines after how much time a penalized node or
|
|
|
|
// channel is back at 50% probability.
|
|
|
|
PenaltyHalfLife time.Duration
|
|
|
|
|
|
|
|
// AprioriHopProbability is the assumed success probability of a hop in
|
|
|
|
// a route when no other information is available.
|
|
|
|
AprioriHopProbability float64
|
|
|
|
}
|
|
|
|
|
2019-03-19 19:09:27 +03:00
|
|
|
// nodeHistory contains a summary of payment attempt outcomes involving a
|
|
|
|
// particular node.
|
|
|
|
type nodeHistory struct {
|
|
|
|
// lastFail is the last time a node level failure occurred, if any.
|
|
|
|
lastFail *time.Time
|
2017-10-18 05:41:46 +03:00
|
|
|
|
2019-03-19 19:09:27 +03:00
|
|
|
// channelLastFail tracks history per channel, if available for that
|
|
|
|
// channel.
|
|
|
|
channelLastFail map[uint64]*channelHistory
|
2017-10-17 04:57:30 +03:00
|
|
|
}
|
|
|
|
|
2019-03-19 19:09:27 +03:00
|
|
|
// channelHistory contains a summary of payment attempt outcomes involving a
|
|
|
|
// particular channel.
|
|
|
|
type channelHistory struct {
|
|
|
|
// lastFail is the last time a channel level failure occurred.
|
|
|
|
lastFail time.Time
|
2017-10-18 05:41:46 +03:00
|
|
|
|
2019-03-19 19:09:27 +03:00
|
|
|
// minPenalizeAmt is the minimum amount for which to take this failure
|
|
|
|
// into account.
|
|
|
|
minPenalizeAmt lnwire.MilliSatoshi
|
2017-10-18 05:41:46 +03:00
|
|
|
}
|
|
|
|
|
2019-05-10 11:38:31 +03:00
|
|
|
// MissionControlSnapshot contains a snapshot of the current state of mission
|
|
|
|
// control.
|
|
|
|
type MissionControlSnapshot struct {
|
|
|
|
// Nodes contains the per node information of this snapshot.
|
|
|
|
Nodes []MissionControlNodeSnapshot
|
|
|
|
}
|
|
|
|
|
|
|
|
// MissionControlNodeSnapshot contains a snapshot of the current node state in
|
|
|
|
// mission control.
|
|
|
|
type MissionControlNodeSnapshot struct {
|
|
|
|
// Node pubkey.
|
|
|
|
Node route.Vertex
|
|
|
|
|
|
|
|
// Lastfail is the time of last failure, if any.
|
|
|
|
LastFail *time.Time
|
|
|
|
|
|
|
|
// Channels is a list of channels for which specific information is
|
|
|
|
// logged.
|
|
|
|
Channels []MissionControlChannelSnapshot
|
|
|
|
|
|
|
|
// OtherChanSuccessProb is the success probability for channels not in
|
|
|
|
// the Channels slice.
|
|
|
|
OtherChanSuccessProb float64
|
|
|
|
}
|
|
|
|
|
|
|
|
// MissionControlChannelSnapshot contains a snapshot of the current channel
|
|
|
|
// state in mission control.
|
|
|
|
type MissionControlChannelSnapshot struct {
|
|
|
|
// ChannelID is the short channel id of the snapshot.
|
|
|
|
ChannelID uint64
|
|
|
|
|
|
|
|
// LastFail is the time of last failure.
|
|
|
|
LastFail time.Time
|
|
|
|
|
|
|
|
// MinPenalizeAmt is the minimum amount for which the channel will be
|
|
|
|
// penalized.
|
|
|
|
MinPenalizeAmt lnwire.MilliSatoshi
|
|
|
|
|
|
|
|
// SuccessProb is the success probability estimation for this channel.
|
|
|
|
SuccessProb float64
|
|
|
|
}
|
|
|
|
|
2019-03-19 19:09:27 +03:00
|
|
|
// NewMissionControl returns a new instance of missionControl.
|
2019-06-18 19:30:56 +03:00
|
|
|
func NewMissionControl(cfg *MissionControlConfig) *MissionControl {
|
2019-05-22 12:56:04 +03:00
|
|
|
log.Debugf("Instantiating mission control with config: "+
|
2019-06-18 19:30:56 +03:00
|
|
|
"PenaltyHalfLife=%v, AprioriHopProbability=%v",
|
|
|
|
cfg.PenaltyHalfLife, cfg.AprioriHopProbability)
|
2017-10-17 04:57:30 +03:00
|
|
|
|
2019-03-19 19:09:27 +03:00
|
|
|
return &MissionControl{
|
2019-06-18 19:30:56 +03:00
|
|
|
history: make(map[route.Vertex]*nodeHistory),
|
|
|
|
now: time.Now,
|
|
|
|
cfg: cfg,
|
2019-05-23 21:05:29 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
routing: add new paymentSession companion struct to missionControl
In this commit, we modify the pruning semantics of the missionControl
struct. Before this commit, on each payment attempt, we would fetch a
new graph pruned view each time. This served to instantly propagate any
detected failures to all outstanding payment attempts. However, this
meant that we could at times get stuck in a retry loop if sends take a
few second, then we may prune an edge, try another, then the original
edge is now unpruned.
To remedy this, we now introduce the concept of a paymentSession. The
session will start out as a snapshot of the latest graph prune view.
Any payment failures are now reported directly to the paymentSession
rather than missionControl. The rationale for this is that
edges/vertexes pruned as result of failures will never decay for a
local payment session, only for the global prune view. With this in
place, we ensure that our set of prune view only grows for a session.
Fixes #536.
2018-01-09 06:38:49 +03:00
|
|
|
|
2019-05-23 21:05:30 +03:00
|
|
|
// ResetHistory resets the history of MissionControl returning it to a state as
|
2017-10-17 04:57:30 +03:00
|
|
|
// if no payment attempts have been made.
|
2019-05-23 21:05:30 +03:00
|
|
|
func (m *MissionControl) ResetHistory() {
|
2017-10-17 04:57:30 +03:00
|
|
|
m.Lock()
|
2019-03-19 19:09:27 +03:00
|
|
|
defer m.Unlock()
|
|
|
|
|
|
|
|
m.history = make(map[route.Vertex]*nodeHistory)
|
|
|
|
|
|
|
|
log.Debugf("Mission control history cleared")
|
|
|
|
}
|
|
|
|
|
2019-06-18 19:30:56 +03:00
|
|
|
// GetEdgeProbability is expected to return the success probability of a payment
|
2019-03-19 19:09:27 +03:00
|
|
|
// from fromNode along edge.
|
2019-06-18 19:30:56 +03:00
|
|
|
func (m *MissionControl) GetEdgeProbability(fromNode route.Vertex,
|
2019-03-19 19:09:27 +03:00
|
|
|
edge EdgeLocator, amt lnwire.MilliSatoshi) float64 {
|
|
|
|
|
|
|
|
m.Lock()
|
|
|
|
defer m.Unlock()
|
|
|
|
|
|
|
|
// Get the history for this node. If there is no history available,
|
|
|
|
// assume that it's success probability is a constant a priori
|
|
|
|
// probability. After the attempt new information becomes available to
|
|
|
|
// adjust this probability.
|
|
|
|
nodeHistory, ok := m.history[fromNode]
|
|
|
|
if !ok {
|
2019-05-22 12:56:04 +03:00
|
|
|
return m.cfg.AprioriHopProbability
|
2019-03-19 19:09:27 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return m.getEdgeProbabilityForNode(nodeHistory, edge.ChannelID, amt)
|
|
|
|
}
|
|
|
|
|
|
|
|
// getEdgeProbabilityForNode estimates the probability of successfully
|
|
|
|
// traversing a channel based on the node history.
|
|
|
|
func (m *MissionControl) getEdgeProbabilityForNode(nodeHistory *nodeHistory,
|
|
|
|
channelID uint64, amt lnwire.MilliSatoshi) float64 {
|
|
|
|
|
|
|
|
// Calculate the last failure of the given edge. A node failure is
|
|
|
|
// considered a failure that would have affected every edge. Therefore
|
|
|
|
// we insert a node level failure into the history of every channel.
|
|
|
|
lastFailure := nodeHistory.lastFail
|
|
|
|
|
|
|
|
// Take into account a minimum penalize amount. For balance errors, a
|
|
|
|
// failure may be reported with such a minimum to prevent too aggresive
|
|
|
|
// penalization. We only take into account a previous failure if the
|
|
|
|
// amount that we currently get the probability for is greater or equal
|
|
|
|
// than the minPenalizeAmt of the previous failure.
|
|
|
|
channelHistory, ok := nodeHistory.channelLastFail[channelID]
|
|
|
|
if ok && channelHistory.minPenalizeAmt <= amt {
|
|
|
|
|
|
|
|
// If there is both a node level failure recorded and a channel
|
|
|
|
// level failure is applicable too, we take the most recent of
|
|
|
|
// the two.
|
|
|
|
if lastFailure == nil ||
|
|
|
|
channelHistory.lastFail.After(*lastFailure) {
|
|
|
|
|
|
|
|
lastFailure = &channelHistory.lastFail
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if lastFailure == nil {
|
2019-05-22 12:56:04 +03:00
|
|
|
return m.cfg.AprioriHopProbability
|
2019-03-19 19:09:27 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
timeSinceLastFailure := m.now().Sub(*lastFailure)
|
|
|
|
|
|
|
|
// Calculate success probability. It is an exponential curve that brings
|
|
|
|
// the probability down to zero when a failure occurs. From there it
|
|
|
|
// recovers asymptotically back to the a priori probability. The rate at
|
|
|
|
// which this happens is controlled by the penaltyHalfLife parameter.
|
2019-05-22 12:56:04 +03:00
|
|
|
exp := -timeSinceLastFailure.Hours() / m.cfg.PenaltyHalfLife.Hours()
|
|
|
|
probability := m.cfg.AprioriHopProbability * (1 - math.Pow(2, exp))
|
2019-03-19 19:09:27 +03:00
|
|
|
|
|
|
|
return probability
|
|
|
|
}
|
|
|
|
|
|
|
|
// createHistoryIfNotExists returns the history for the given node. If the node
|
|
|
|
// is yet unknown, it will create an empty history structure.
|
|
|
|
func (m *MissionControl) createHistoryIfNotExists(vertex route.Vertex) *nodeHistory {
|
|
|
|
if node, ok := m.history[vertex]; ok {
|
|
|
|
return node
|
|
|
|
}
|
|
|
|
|
|
|
|
node := &nodeHistory{
|
|
|
|
channelLastFail: make(map[uint64]*channelHistory),
|
|
|
|
}
|
|
|
|
m.history[vertex] = node
|
|
|
|
|
|
|
|
return node
|
|
|
|
}
|
|
|
|
|
2019-06-18 19:30:56 +03:00
|
|
|
// ReportVertexFailure reports a node level failure.
|
|
|
|
func (m *MissionControl) ReportVertexFailure(v route.Vertex) {
|
2019-03-19 19:09:27 +03:00
|
|
|
log.Debugf("Reporting vertex %v failure to Mission Control", v)
|
|
|
|
|
|
|
|
now := m.now()
|
|
|
|
|
|
|
|
m.Lock()
|
|
|
|
defer m.Unlock()
|
|
|
|
|
|
|
|
history := m.createHistoryIfNotExists(v)
|
|
|
|
history.lastFail = &now
|
|
|
|
}
|
|
|
|
|
2019-06-18 19:30:56 +03:00
|
|
|
// ReportEdgeFailure reports a channel level failure.
|
2019-03-19 19:09:27 +03:00
|
|
|
//
|
|
|
|
// TODO(roasbeef): also add value attempted to send and capacity of channel
|
2019-06-18 19:30:56 +03:00
|
|
|
func (m *MissionControl) ReportEdgeFailure(failedEdge edge,
|
2019-03-19 19:09:27 +03:00
|
|
|
minPenalizeAmt lnwire.MilliSatoshi) {
|
|
|
|
|
|
|
|
log.Debugf("Reporting channel %v failure to Mission Control",
|
|
|
|
failedEdge.channel)
|
|
|
|
|
|
|
|
now := m.now()
|
|
|
|
|
|
|
|
m.Lock()
|
|
|
|
defer m.Unlock()
|
|
|
|
|
|
|
|
history := m.createHistoryIfNotExists(failedEdge.from)
|
|
|
|
history.channelLastFail[failedEdge.channel] = &channelHistory{
|
|
|
|
lastFail: now,
|
|
|
|
minPenalizeAmt: minPenalizeAmt,
|
|
|
|
}
|
2017-10-17 04:57:30 +03:00
|
|
|
}
|
2019-05-10 11:38:31 +03:00
|
|
|
|
|
|
|
// GetHistorySnapshot takes a snapshot from the current mission control state
|
|
|
|
// and actual probability estimates.
|
|
|
|
func (m *MissionControl) GetHistorySnapshot() *MissionControlSnapshot {
|
|
|
|
m.Lock()
|
|
|
|
defer m.Unlock()
|
|
|
|
|
|
|
|
log.Debugf("Requesting history snapshot from mission control: "+
|
|
|
|
"node_count=%v", len(m.history))
|
|
|
|
|
|
|
|
nodes := make([]MissionControlNodeSnapshot, 0, len(m.history))
|
|
|
|
|
|
|
|
for v, h := range m.history {
|
|
|
|
channelSnapshot := make([]MissionControlChannelSnapshot, 0,
|
|
|
|
len(h.channelLastFail),
|
|
|
|
)
|
|
|
|
|
|
|
|
for id, lastFail := range h.channelLastFail {
|
|
|
|
// Show probability assuming amount meets min
|
|
|
|
// penalization amount.
|
|
|
|
prob := m.getEdgeProbabilityForNode(
|
|
|
|
h, id, lastFail.minPenalizeAmt,
|
|
|
|
)
|
|
|
|
|
|
|
|
channelSnapshot = append(channelSnapshot,
|
|
|
|
MissionControlChannelSnapshot{
|
|
|
|
ChannelID: id,
|
|
|
|
LastFail: lastFail.lastFail,
|
|
|
|
MinPenalizeAmt: lastFail.minPenalizeAmt,
|
|
|
|
SuccessProb: prob,
|
|
|
|
},
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
otherProb := m.getEdgeProbabilityForNode(h, 0, 0)
|
|
|
|
|
|
|
|
nodes = append(nodes,
|
|
|
|
MissionControlNodeSnapshot{
|
|
|
|
Node: v,
|
|
|
|
LastFail: h.lastFail,
|
|
|
|
OtherChanSuccessProb: otherProb,
|
|
|
|
Channels: channelSnapshot,
|
|
|
|
},
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
snapshot := MissionControlSnapshot{
|
|
|
|
Nodes: nodes,
|
|
|
|
}
|
|
|
|
|
|
|
|
return &snapshot
|
|
|
|
}
|