Merge pull request #1706 from joostjager/errorprocessing
routing: make routing retry behaviour consistent
This commit is contained in:
commit
5075394617
@ -164,6 +164,12 @@ type paymentSession struct {
|
||||
|
||||
bandwidthHints map[uint64]lnwire.MilliSatoshi
|
||||
|
||||
// errFailedFeeChans is a map of the short channel ID's that were the
|
||||
// source of policy related routing failures during this payment attempt.
|
||||
// We'll use this map to prune out channels when the first error may not
|
||||
// require pruning, but any subsequent ones do.
|
||||
errFailedPolicyChans map[uint64]struct{}
|
||||
|
||||
mc *missionControl
|
||||
|
||||
haveRoutes bool
|
||||
@ -239,6 +245,7 @@ func (m *missionControl) NewPaymentSession(routeHints [][]HopHint,
|
||||
pruneViewSnapshot: viewSnapshot,
|
||||
additionalEdges: edges,
|
||||
bandwidthHints: bandwidthHints,
|
||||
errFailedPolicyChans: make(map[uint64]struct{}),
|
||||
mc: m,
|
||||
}, nil
|
||||
}
|
||||
@ -252,6 +259,7 @@ func (m *missionControl) NewPaymentSessionFromRoutes(routes []*Route) *paymentSe
|
||||
pruneViewSnapshot: m.GraphPruneView(),
|
||||
haveRoutes: true,
|
||||
preBuiltRoutes: routes,
|
||||
errFailedPolicyChans: make(map[uint64]struct{}),
|
||||
mc: m,
|
||||
}
|
||||
}
|
||||
@ -331,6 +339,31 @@ func (p *paymentSession) ReportChannelFailure(e uint64) {
|
||||
p.mc.Unlock()
|
||||
}
|
||||
|
||||
// ReportChannelPolicyFailure handles a failure message that relates to a
|
||||
// channel policy. For these types of failures, the policy is updated and we
|
||||
// want to keep it included during path finding. This function does mark the
|
||||
// edge as 'policy failed once'. The next time it fails, the whole node will be
|
||||
// pruned. This is to prevent nodes from keeping us busy by continuously sending
|
||||
// new channel updates.
|
||||
func (p *paymentSession) ReportChannelPolicyFailure(
|
||||
errSource Vertex, failedChanID uint64) {
|
||||
|
||||
// Check to see if we've already reported a policy related failure for
|
||||
// this channel. If so, then we'll prune out the vertex.
|
||||
_, ok := p.errFailedPolicyChans[failedChanID]
|
||||
if ok {
|
||||
// TODO(joostjager): is this aggresive pruning still necessary?
|
||||
// Just pruning edges may also work unless there is a huge
|
||||
// number of failing channels from that node?
|
||||
p.ReportVertexFailure(errSource)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Finally, we'll record a policy failure from this node and move on.
|
||||
p.errFailedPolicyChans[failedChanID] = struct{}{}
|
||||
}
|
||||
|
||||
// RequestRoute returns a route which is likely to be capable for successfully
|
||||
// routing the specified HTLC payment to the target node. Initially the first
|
||||
// set of paths returned from this method may encounter routing failure along
|
||||
|
@ -1641,12 +1641,6 @@ func (r *ChannelRouter) sendPayment(payment *LightningPayment,
|
||||
sendError error
|
||||
)
|
||||
|
||||
// errFailedFeeChans is a map of the short channel ID's that were the
|
||||
// source of fee related routing failures during this payment attempt.
|
||||
// We'll use this map to prune out channels when the first error may
|
||||
// not require pruning, but any subsequent ones do.
|
||||
errFailedFeeChans := make(map[lnwire.ShortChannelID]struct{})
|
||||
|
||||
// We'll also fetch the current block height so we can properly
|
||||
// calculate the required HTLC time locks within the route.
|
||||
_, currentHeight, err := r.cfg.Chain.GetBestBlock()
|
||||
@ -1758,11 +1752,55 @@ func (r *ChannelRouter) sendPayment(payment *LightningPayment,
|
||||
}
|
||||
|
||||
errSource := fErr.ErrorSource
|
||||
errVertex := NewVertex(errSource)
|
||||
|
||||
log.Tracef("node=%x reported failure when sending "+
|
||||
"htlc=%x", errSource.SerializeCompressed(),
|
||||
payment.PaymentHash[:])
|
||||
|
||||
// Always determine chan id ourselves, because a channel
|
||||
// update with id may not be available.
|
||||
failedChanID, err := getFailedChannelID(route, errSource)
|
||||
if err != nil {
|
||||
return preImage, nil, err
|
||||
}
|
||||
|
||||
// processChannelUpdateAndRetry is a closure that
|
||||
// handles a failure message containing a channel
|
||||
// update. This function always tries to apply the
|
||||
// channel update and passes on the result to the
|
||||
// payment session to adjust its view on the reliability
|
||||
// of the network.
|
||||
//
|
||||
// As channel id, the locally determined channel id is
|
||||
// used. It does not rely on the channel id that is part
|
||||
// of the channel update message, because the remote
|
||||
// node may lie to us or the update may be corrupt.
|
||||
processChannelUpdateAndRetry := func(
|
||||
update *lnwire.ChannelUpdate,
|
||||
pubKey *btcec.PublicKey) {
|
||||
|
||||
// Try to apply the channel update.
|
||||
updateOk := r.applyChannelUpdate(update, pubKey)
|
||||
|
||||
// If the update could not be applied, prune the
|
||||
// edge. There is no reason to continue trying
|
||||
// this channel.
|
||||
//
|
||||
// TODO: Could even prune the node completely?
|
||||
// Or is there a valid reason for the channel
|
||||
// update to fail?
|
||||
if !updateOk {
|
||||
paySession.ReportChannelFailure(
|
||||
failedChanID,
|
||||
)
|
||||
}
|
||||
|
||||
paySession.ReportChannelPolicyFailure(
|
||||
NewVertex(errSource), failedChanID,
|
||||
)
|
||||
}
|
||||
|
||||
switch onionErr := fErr.FailureMessage.(type) {
|
||||
// If the end destination didn't know they payment
|
||||
// hash, then we'll terminate immediately.
|
||||
@ -1803,16 +1841,8 @@ func (r *ChannelRouter) sendPayment(payment *LightningPayment,
|
||||
// that sent us this error, as it doesn't now what the
|
||||
// correct block height is.
|
||||
case *lnwire.FailExpiryTooSoon:
|
||||
update := onionErr.Update
|
||||
err := r.applyChannelUpdate(&update, errSource)
|
||||
if err != nil {
|
||||
log.Errorf("unable to apply channel "+
|
||||
"update for onion error: %v", err)
|
||||
}
|
||||
|
||||
pruneVertexFailure(
|
||||
paySession, route, errSource, false,
|
||||
)
|
||||
r.applyChannelUpdate(&onionErr.Update, errSource)
|
||||
paySession.ReportVertexFailure(errVertex)
|
||||
continue
|
||||
|
||||
// If we hit an instance of onion payload corruption or
|
||||
@ -1825,66 +1855,30 @@ func (r *ChannelRouter) sendPayment(payment *LightningPayment,
|
||||
case *lnwire.FailInvalidOnionKey:
|
||||
return preImage, nil, sendError
|
||||
|
||||
// If the onion error includes a channel update, and
|
||||
// isn't necessarily fatal, then we'll apply the update
|
||||
// and continue with the rest of the routes.
|
||||
// If we get a failure due to violating the minimum
|
||||
// amount, we'll apply the new minimum amount and retry
|
||||
// routing.
|
||||
case *lnwire.FailAmountBelowMinimum:
|
||||
update := onionErr.Update
|
||||
err := r.applyChannelUpdate(&update, errSource)
|
||||
if err != nil {
|
||||
log.Errorf("unable to apply channel "+
|
||||
"update for onion error: %v", err)
|
||||
}
|
||||
processChannelUpdateAndRetry(
|
||||
&onionErr.Update, errSource,
|
||||
)
|
||||
continue
|
||||
|
||||
return preImage, nil, sendError
|
||||
|
||||
// If we get a failure due to a fee, so we'll apply the
|
||||
// If we get a failure due to a fee, we'll apply the
|
||||
// new fee update, and retry our attempt using the
|
||||
// newly updated fees.
|
||||
case *lnwire.FailFeeInsufficient:
|
||||
update := onionErr.Update
|
||||
err := r.applyChannelUpdate(&update, errSource)
|
||||
if err != nil {
|
||||
log.Errorf("unable to apply channel "+
|
||||
"update for onion error: %v", err)
|
||||
|
||||
pruneEdgeFailure(
|
||||
paySession, route, errSource,
|
||||
processChannelUpdateAndRetry(
|
||||
&onionErr.Update, errSource,
|
||||
)
|
||||
}
|
||||
|
||||
// We'll now check to see if we've already
|
||||
// reported a fee related failure for this
|
||||
// node. If so, then we'll actually prune out
|
||||
// the vertex for now.
|
||||
chanID := update.ShortChannelID
|
||||
_, ok := errFailedFeeChans[chanID]
|
||||
if ok {
|
||||
pruneVertexFailure(
|
||||
paySession, route, errSource, false,
|
||||
)
|
||||
continue
|
||||
}
|
||||
|
||||
// Finally, we'll record a fee failure from
|
||||
// this node and move on.
|
||||
errFailedFeeChans[chanID] = struct{}{}
|
||||
continue
|
||||
|
||||
// If we get the failure for an intermediate node that
|
||||
// disagrees with our time lock values, then we'll
|
||||
// prune it out for now, and continue with path
|
||||
// finding.
|
||||
// apply the new delta value and try it once more.
|
||||
case *lnwire.FailIncorrectCltvExpiry:
|
||||
update := onionErr.Update
|
||||
err := r.applyChannelUpdate(&update, errSource)
|
||||
if err != nil {
|
||||
log.Errorf("unable to apply channel "+
|
||||
"update for onion error: %v", err)
|
||||
}
|
||||
|
||||
pruneVertexFailure(
|
||||
paySession, route, errSource, false,
|
||||
processChannelUpdateAndRetry(
|
||||
&onionErr.Update, errSource,
|
||||
)
|
||||
continue
|
||||
|
||||
@ -1892,46 +1886,30 @@ func (r *ChannelRouter) sendPayment(payment *LightningPayment,
|
||||
// forward one is currently disabled, so we'll apply
|
||||
// the update and continue.
|
||||
case *lnwire.FailChannelDisabled:
|
||||
update := onionErr.Update
|
||||
err := r.applyChannelUpdate(&update, errSource)
|
||||
if err != nil {
|
||||
log.Errorf("unable to apply channel "+
|
||||
"update for onion error: %v", err)
|
||||
}
|
||||
|
||||
pruneEdgeFailure(paySession, route, errSource)
|
||||
r.applyChannelUpdate(&onionErr.Update, errSource)
|
||||
paySession.ReportChannelFailure(failedChanID)
|
||||
continue
|
||||
|
||||
// It's likely that the outgoing channel didn't have
|
||||
// sufficient capacity, so we'll prune this edge for
|
||||
// now, and continue onwards with our path finding.
|
||||
case *lnwire.FailTemporaryChannelFailure:
|
||||
update := onionErr.Update
|
||||
err := r.applyChannelUpdate(update, errSource)
|
||||
if err != nil {
|
||||
log.Errorf("unable to apply channel "+
|
||||
"update for onion error: %v", err)
|
||||
}
|
||||
|
||||
pruneEdgeFailure(paySession, route, errSource)
|
||||
r.applyChannelUpdate(onionErr.Update, errSource)
|
||||
paySession.ReportChannelFailure(failedChanID)
|
||||
continue
|
||||
|
||||
// If the send fail due to a node not having the
|
||||
// required features, then we'll note this error and
|
||||
// continue.
|
||||
case *lnwire.FailRequiredNodeFeatureMissing:
|
||||
pruneVertexFailure(
|
||||
paySession, route, errSource, false,
|
||||
)
|
||||
paySession.ReportVertexFailure(errVertex)
|
||||
continue
|
||||
|
||||
// If the send fail due to a node not having the
|
||||
// required features, then we'll note this error and
|
||||
// continue.
|
||||
case *lnwire.FailRequiredChannelFeatureMissing:
|
||||
pruneVertexFailure(
|
||||
paySession, route, errSource, false,
|
||||
)
|
||||
paySession.ReportVertexFailure(errVertex)
|
||||
continue
|
||||
|
||||
// If the next hop in the route wasn't known or
|
||||
@ -1942,22 +1920,18 @@ func (r *ChannelRouter) sendPayment(payment *LightningPayment,
|
||||
// returning errors in order to attempt to black list
|
||||
// another node.
|
||||
case *lnwire.FailUnknownNextPeer:
|
||||
pruneEdgeFailure(paySession, route, errSource)
|
||||
paySession.ReportChannelFailure(failedChanID)
|
||||
continue
|
||||
|
||||
// If the node wasn't able to forward for which ever
|
||||
// reason, then we'll note this and continue with the
|
||||
// routes.
|
||||
case *lnwire.FailTemporaryNodeFailure:
|
||||
pruneVertexFailure(
|
||||
paySession, route, errSource, false,
|
||||
)
|
||||
paySession.ReportVertexFailure(errVertex)
|
||||
continue
|
||||
|
||||
case *lnwire.FailPermanentNodeFailure:
|
||||
pruneVertexFailure(
|
||||
paySession, route, errSource, false,
|
||||
)
|
||||
paySession.ReportVertexFailure(errVertex)
|
||||
continue
|
||||
|
||||
// If we crafted a route that contains a too long time
|
||||
@ -1970,16 +1944,14 @@ func (r *ChannelRouter) sendPayment(payment *LightningPayment,
|
||||
// that as a hint during future path finding through
|
||||
// that node.
|
||||
case *lnwire.FailExpiryTooFar:
|
||||
pruneVertexFailure(
|
||||
paySession, route, errSource, false,
|
||||
)
|
||||
paySession.ReportVertexFailure(errVertex)
|
||||
continue
|
||||
|
||||
// If we get a permanent channel or node failure, then
|
||||
// we'll note this (exclude the vertex/edge), and
|
||||
// continue with the rest of the routes.
|
||||
case *lnwire.FailPermanentChannelFailure:
|
||||
pruneEdgeFailure(paySession, route, errSource)
|
||||
paySession.ReportChannelFailure(failedChanID)
|
||||
continue
|
||||
|
||||
default:
|
||||
@ -1991,74 +1963,46 @@ func (r *ChannelRouter) sendPayment(payment *LightningPayment,
|
||||
}
|
||||
}
|
||||
|
||||
// pruneVertexFailure will attempt to prune a vertex from the current available
|
||||
// vertexes of the target payment session in response to an encountered routing
|
||||
// error.
|
||||
func pruneVertexFailure(paySession *paymentSession, route *Route,
|
||||
errSource *btcec.PublicKey, nextNode bool) {
|
||||
|
||||
// By default, we'll try to prune the node that actually sent us the
|
||||
// error.
|
||||
errNode := NewVertex(errSource)
|
||||
|
||||
// If this failure indicates that the node _after_ the source of the
|
||||
// error was not found. As a result, we'll locate the vertex for that
|
||||
// node itself.
|
||||
if nextNode {
|
||||
nodeToPrune, ok := route.nextHopVertex(errSource)
|
||||
|
||||
if ok {
|
||||
errNode = nodeToPrune
|
||||
}
|
||||
}
|
||||
|
||||
// Once we've located the vertex, we'll report this failure to
|
||||
// missionControl and restart path finding.
|
||||
paySession.ReportVertexFailure(errNode)
|
||||
}
|
||||
|
||||
// pruneEdgeFailure will attempts to prune an edge from the current available
|
||||
// edges of the target payment session in response to an encountered routing
|
||||
// error.
|
||||
func pruneEdgeFailure(paySession *paymentSession, route *Route,
|
||||
errSource *btcec.PublicKey) {
|
||||
// getFailedChannelID tries to locate the failing channel given a route and the
|
||||
// pubkey of the node that sent the error. It will assume that the error is
|
||||
// associated with the outgoing channel of the error node.
|
||||
func getFailedChannelID(route *Route, errSource *btcec.PublicKey) (
|
||||
uint64, error) {
|
||||
|
||||
// As this error indicates that the target channel was unable to carry
|
||||
// this HTLC (for w/e reason), we'll query the index to find the
|
||||
// _outgoing_ channel the source of the error was meant to pass the
|
||||
// HTLC along to.
|
||||
badChan, ok := route.nextHopChannel(errSource)
|
||||
if !ok {
|
||||
// If we weren't able to find the hop *after* this node, then
|
||||
// we'll attempt to disable the previous channel.
|
||||
prevChan, ok := route.prevHopChannel(
|
||||
errSource,
|
||||
)
|
||||
|
||||
if !ok {
|
||||
return
|
||||
if badChan, ok := route.nextHopChannel(errSource); ok {
|
||||
return badChan.ChannelID, nil
|
||||
}
|
||||
|
||||
badChan = prevChan
|
||||
// If we weren't able to find the hop *after* this node, then we'll
|
||||
// attempt to disable the previous channel.
|
||||
//
|
||||
// TODO(joostjager): errSource must be the final hop then? In that case,
|
||||
// certain types of errors are not expected. For example
|
||||
// FailUnknownNextPeer. This could be a reason to prune the node?
|
||||
if prevChan, ok := route.prevHopChannel(errSource); ok {
|
||||
return prevChan.ChannelID, nil
|
||||
}
|
||||
|
||||
// If the channel was found, then we'll inform mission control of this
|
||||
// failure so future attempts avoid this link temporarily.
|
||||
paySession.ReportChannelFailure(badChan.ChannelID)
|
||||
return 0, fmt.Errorf("cannot find channel in route")
|
||||
}
|
||||
|
||||
// applyChannelUpdate validates a channel update and if valid, applies it to the
|
||||
// database.
|
||||
// database. It returns a bool indicating whether the updates was successful.
|
||||
func (r *ChannelRouter) applyChannelUpdate(msg *lnwire.ChannelUpdate,
|
||||
pubKey *btcec.PublicKey) error {
|
||||
pubKey *btcec.PublicKey) bool {
|
||||
// If we get passed a nil channel update (as it's optional with some
|
||||
// onion errors), then we'll exit early with a nil error.
|
||||
// onion errors), then we'll exit early with a success result.
|
||||
if msg == nil {
|
||||
return nil
|
||||
return true
|
||||
}
|
||||
|
||||
if err := ValidateChannelUpdateAnn(pubKey, msg); err != nil {
|
||||
return err
|
||||
log.Errorf("Unable to validate channel update: %v", err)
|
||||
return false
|
||||
}
|
||||
|
||||
err := r.UpdateEdge(&channeldb.ChannelEdgePolicy{
|
||||
@ -2072,10 +2016,11 @@ func (r *ChannelRouter) applyChannelUpdate(msg *lnwire.ChannelUpdate,
|
||||
FeeProportionalMillionths: lnwire.MilliSatoshi(msg.FeeRate),
|
||||
})
|
||||
if err != nil && !IsError(err, ErrIgnored, ErrOutdated) {
|
||||
return fmt.Errorf("Unable to apply channel update: %v", err)
|
||||
log.Errorf("Unable to apply channel update: %v", err)
|
||||
return false
|
||||
}
|
||||
|
||||
return nil
|
||||
return true
|
||||
}
|
||||
|
||||
// AddNode is used to add information about a node to the router database. If
|
||||
|
@ -421,9 +421,10 @@ func TestChannelUpdateValidation(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
route := &Route{
|
||||
Hops: hops,
|
||||
}
|
||||
route := NewRouteFromHops(
|
||||
lnwire.MilliSatoshi(10000), 100,
|
||||
NewVertex(ctx.aliases["a"]), hops,
|
||||
)
|
||||
|
||||
// Set up a channel update message with an invalid signature to be
|
||||
// returned to the sender.
|
||||
|
Loading…
Reference in New Issue
Block a user