routing: add new paymentSession companion struct to missionControl

In this commit, we modify the pruning semantics of the missionControl
struct. Before this commit, on each payment attempt, we would fetch a
new graph pruned view each time. This served to instantly propagate any
detected failures to all outstanding payment attempts. However, this
meant that we could at times get stuck in a retry loop if sends take a
few second, then we may prune an edge, try another, then the original
edge is now unpruned.

To remedy this, we now introduce the concept of a paymentSession. The
session will start out as a snapshot of the latest graph prune view.
Any payment failures are now reported directly to the paymentSession
rather than missionControl. The rationale for this is that
edges/vertexes pruned as result of failures will never decay for a
local payment session, only for the global prune view. With this in
place, we ensure that our set of prune view only grows for a session.

Fixes #536.
This commit is contained in:
Olaoluwa Osuntokun 2018-01-08 19:38:49 -08:00
parent 912366ada5
commit 98f63cdce1
No known key found for this signature in database
GPG Key ID: 964EA263DD637C21
2 changed files with 125 additions and 79 deletions

@ -77,69 +77,15 @@ func newMissionControl(g *channeldb.ChannelGraph,
}
}
// ReportVertexFailure adds a vertex to the graph prune view after a client
// reports a routing failure localized to the vertex. The time the vertex was
// added is noted, as it'll be pruned from the view after a period of
// vertexDecay.
func (m *missionControl) ReportVertexFailure(v Vertex) {
log.Debugf("Reporting vertex %v failure to Mission Control", v)
// graphPruneView is a filter of sorts that path finding routines should
// consult during the execution. Any edges or vertexes within the view should
// be ignored during path finding. The contents of the view reflect the current
// state of the wider network from the PoV of mission control compiled via HTLC
// routing attempts in the past.
type graphPruneView struct {
edges map[uint64]struct{}
m.Lock()
m.failedVertexes[v] = time.Now()
m.Unlock()
}
// ReportChannelFailure adds a channel to the graph prune view. The time the
// channel was added is noted, as it'll be pruned from the view after a period
// of edgeDecay.
//
// TODO(roasbeef): also add value attempted to send and capacity of channel
func (m *missionControl) ReportChannelFailure(e uint64) {
log.Debugf("Reporting edge %v failure to Mission Control", e)
m.Lock()
m.failedEdges[e] = time.Now()
m.Unlock()
}
// RequestRoute returns a route which is likely to be capable for successfully
// routing the specified HTLC payment to the target node. Initially the first
// set of paths returned from this method may encounter routing failure along
// the way, however as more payments are sent, mission control will start to
// build an up to date view of the network itself. With each payment a new area
// will be explored, which feeds into the recommendations made for routing.
//
// NOTE: This function is safe for concurrent access.
func (m *missionControl) RequestRoute(payment *LightningPayment,
height uint32, finalCltvDelta uint16) (*Route, error) {
// First, we'll query mission control for it's current recommendation
// on the edges/vertexes to ignore during path finding.
pruneView := m.GraphPruneView()
// TODO(roasbeef): sync logic amongst dist sys
// Taking into account this prune view, we'll attempt to locate a path
// to our destination, respecting the recommendations from
// missionControl.
path, err := findPath(nil, m.graph, m.selfNode, payment.Target,
pruneView.vertexes, pruneView.edges, payment.Amount)
if err != nil {
return nil, err
}
// With the next candidate path found, we'll attempt to turn this into
// a route by applying the time-lock and fee requirements.
sourceVertex := NewVertex(m.selfNode.PubKey)
route, err := newRoute(payment.Amount, sourceVertex, path, height,
finalCltvDelta)
if err != nil {
// TODO(roasbeef): return which edge/vertex didn't work
// out
return nil, err
}
return route, err
vertexes map[Vertex]struct{}
}
// GraphPruneView returns a new graphPruneView instance which is to be
@ -147,7 +93,7 @@ func (m *missionControl) RequestRoute(payment *LightningPayment,
// prune view, it is to be ignored as a goroutine has had issues routing
// through it successfully. Within this method the main view of the
// missionControl is garbage collected as entires are detected to be "stale".
func (m *missionControl) GraphPruneView() *graphPruneView {
func (m *missionControl) GraphPruneView() graphPruneView {
// First, we'll grab the current time, this value will be used to
// determine if an entry is stale or not.
now := time.Now()
@ -190,21 +136,115 @@ func (m *missionControl) GraphPruneView() *graphPruneView {
log.Debugf("Mission Control returning prune view of %v edges, %v "+
"vertexes", len(edges), len(vertexes))
return &graphPruneView{
return graphPruneView{
edges: edges,
vertexes: vertexes,
}
}
// graphPruneView is a filter of sorts that path finding routines should
// consult during the execution. Any edges or vertexes within the view should
// be ignored during path finding. The contents of the view reflect the current
// state of the wider network from the PoV of mission control compiled via HTLC
// routing attempts in the past.
type graphPruneView struct {
edges map[uint64]struct{}
// paymentSession is used during an HTLC routings session to prune the local
// chain view in response to failures, and also report those failures back to
// missionControl. The snapshot copied for this session will only ever grow,
// and will now be pruned after a decay like the main view within mission
// control. We do this as we want to avoid the case where we continually try a
// bad edge or route multiple times in a session. This can lead to an infinite
// loop if payment attempts take long enough.
type paymentSession struct {
pruneViewSnapshot graphPruneView
vertexes map[Vertex]struct{}
mc *missionControl
}
// NewPaymentSession creates a new payment session backed by the latest prune
// view from Mission Control.
func (m *missionControl) NewPaymentSession() *paymentSession {
viewSnapshot := m.GraphPruneView()
return &paymentSession{
pruneViewSnapshot: viewSnapshot,
mc: m,
}
}
// ReportVertexFailure adds a vertex to the graph prune view after a client
// reports a routing failure localized to the vertex. The time the vertex was
// added is noted, as it'll be pruned from the shared view after a period of
// vertexDecay. However, the vertex will remain pruned for the *local* session.
// This ensures we don't retry this vertex during the payment attempt.
func (p *paymentSession) ReportVertexFailure(v Vertex) {
log.Debugf("Reporting vertex %v failure to Mission Control", v)
// First, we'll add the failed vertex to our local prune view snapshot.
p.pruneViewSnapshot.vertexes[v] = struct{}{}
// With the vertex added, we'll now report back to the global prune
// view, with this new piece of information so it can be utilized for
// new payment sessions.
p.mc.Lock()
p.mc.failedVertexes[v] = time.Now()
p.mc.Unlock()
}
// ReportChannelFailure adds a channel to the graph prune view. The time the
// channel was added is noted, as it'll be pruned from the global view after a
// period of edgeDecay. However, the edge will remain pruned for the duration
// of the *local* session. This ensures that we don't flap by continually
// retrying an edge after its pruning has expired.
//
// TODO(roasbeef): also add value attempted to send and capacity of channel
func (p *paymentSession) ReportChannelFailure(e uint64) {
log.Debugf("Reporting edge %v failure to Mission Control", e)
// First, we'll add the failed edge to our local prune view snapshot.
p.pruneViewSnapshot.edges[e] = struct{}{}
// With the edge added, we'll now report back to the global prune view,
// with this new piece of information so it can be utilized for new
// payment sessions.
p.mc.Lock()
p.mc.failedEdges[e] = time.Now()
p.mc.Unlock()
}
// RequestRoute returns a route which is likely to be capable for successfully
// routing the specified HTLC payment to the target node. Initially the first
// set of paths returned from this method may encounter routing failure along
// the way, however as more payments are sent, mission control will start to
// build an up to date view of the network itself. With each payment a new area
// will be explored, which feeds into the recommendations made for routing.
//
// NOTE: This function is safe for concurrent access.
func (p *paymentSession) RequestRoute(payment *LightningPayment,
height uint32, finalCltvDelta uint16) (*Route, error) {
// First, we'll obtain our current prune view snapshot. This view will
// only ever grow during the duration of this payment session, never
// shrinking.
pruneView := p.pruneViewSnapshot
// TODO(roasbeef): sync logic amongst dist sys
// Taking into account this prune view, we'll attempt to locate a path
// to our destination, respecting the recommendations from
// missionControl.
path, err := findPath(nil, p.mc.graph, p.mc.selfNode, payment.Target,
pruneView.vertexes, pruneView.edges, payment.Amount)
if err != nil {
return nil, err
}
// With the next candidate path found, we'll attempt to turn this into
// a route by applying the time-lock and fee requirements.
sourceVertex := NewVertex(p.mc.selfNode.PubKey)
route, err := newRoute(payment.Amount, sourceVertex, path, height,
finalCltvDelta)
if err != nil {
// TODO(roasbeef): return which edge/vertex didn't work
// out
return nil, err
}
return route, err
}
// ResetHistory resets the history of missionControl returning it to a state as

@ -1372,15 +1372,21 @@ func (r *ChannelRouter) SendPayment(payment *LightningPayment) ([32]byte, *Route
finalCLTVDelta = *payment.FinalCLTVDelta
}
// Before starting the HTLC routing attempt, we'll create a fresh
// payment session which will report our errors back to mission
// control.
paySession := r.missionControl.NewPaymentSession()
// We'll continue until either our payment succeeds, or we encounter a
// critical error during path finding.
for {
// We'll kick things off by requesting a new route from mission
// control, which will incoroporate the current best known
// state of the channel graph and our past HTLC routing
// control, which will incorporate the current best known state
// of the channel graph and our past HTLC routing
// successes/failures.
route, err := r.missionControl.RequestRoute(payment,
uint32(currentHeight), finalCLTVDelta)
route, err := paySession.RequestRoute(
payment, uint32(currentHeight), finalCLTVDelta,
)
if err != nil {
// If we're unable to successfully make a payment using
// any of the routes we've found, then return an error.
@ -1546,7 +1552,7 @@ func (r *ChannelRouter) SendPayment(payment *LightningPayment) ([32]byte, *Route
// If the channel was found, then we'll inform
// mission control of this failure so future
// attempts avoid this link temporarily.
r.missionControl.ReportChannelFailure(badChan)
paySession.ReportChannelFailure(badChan)
continue
// If the send fail due to a node not having the
@ -1581,7 +1587,7 @@ func (r *ChannelRouter) SendPayment(payment *LightningPayment) ([32]byte, *Route
// Once we've located the vertex, we'll report
// this failure to missionControl and restart
// path finding.
r.missionControl.ReportVertexFailure(missingNode)
paySession.ReportVertexFailure(missingNode)
continue
// If the node wasn't able to forward for which ever
@ -1593,7 +1599,7 @@ func (r *ChannelRouter) SendPayment(payment *LightningPayment) ([32]byte, *Route
continue
}
r.missionControl.ReportVertexFailure(missingNode)
paySession.ReportVertexFailure(missingNode)
continue
// If we get a permanent channel or node failure, then