diff --git a/routing/missioncontrol.go b/routing/missioncontrol.go index f7103016..a0125b63 100644 --- a/routing/missioncontrol.go +++ b/routing/missioncontrol.go @@ -164,6 +164,12 @@ type paymentSession struct { bandwidthHints map[uint64]lnwire.MilliSatoshi + // errFailedFeeChans is a map of the short channel ID's that were the + // source of policy related routing failures during this payment attempt. + // We'll use this map to prune out channels when the first error may not + // require pruning, but any subsequent ones do. + errFailedPolicyChans map[uint64]struct{} + mc *missionControl haveRoutes bool @@ -236,10 +242,11 @@ func (m *missionControl) NewPaymentSession(routeHints [][]HopHint, } return &paymentSession{ - pruneViewSnapshot: viewSnapshot, - additionalEdges: edges, - bandwidthHints: bandwidthHints, - mc: m, + pruneViewSnapshot: viewSnapshot, + additionalEdges: edges, + bandwidthHints: bandwidthHints, + errFailedPolicyChans: make(map[uint64]struct{}), + mc: m, }, nil } @@ -249,10 +256,11 @@ func (m *missionControl) NewPaymentSession(routeHints [][]HopHint, // used for things like channel rebalancing, and swaps. func (m *missionControl) NewPaymentSessionFromRoutes(routes []*Route) *paymentSession { return &paymentSession{ - pruneViewSnapshot: m.GraphPruneView(), - haveRoutes: true, - preBuiltRoutes: routes, - mc: m, + pruneViewSnapshot: m.GraphPruneView(), + haveRoutes: true, + preBuiltRoutes: routes, + errFailedPolicyChans: make(map[uint64]struct{}), + mc: m, } } @@ -331,6 +339,31 @@ func (p *paymentSession) ReportChannelFailure(e uint64) { p.mc.Unlock() } +// ReportChannelPolicyFailure handles a failure message that relates to a +// channel policy. For these types of failures, the policy is updated and we +// want to keep it included during path finding. This function does mark the +// edge as 'policy failed once'. The next time it fails, the whole node will be +// pruned. This is to prevent nodes from keeping us busy by continuously sending +// new channel updates. +func (p *paymentSession) ReportChannelPolicyFailure( + errSource Vertex, failedChanID uint64) { + + // Check to see if we've already reported a policy related failure for + // this channel. If so, then we'll prune out the vertex. + _, ok := p.errFailedPolicyChans[failedChanID] + if ok { + // TODO(joostjager): is this aggresive pruning still necessary? + // Just pruning edges may also work unless there is a huge + // number of failing channels from that node? + p.ReportVertexFailure(errSource) + + return + } + + // Finally, we'll record a policy failure from this node and move on. + p.errFailedPolicyChans[failedChanID] = struct{}{} +} + // RequestRoute returns a route which is likely to be capable for successfully // routing the specified HTLC payment to the target node. Initially the first // set of paths returned from this method may encounter routing failure along diff --git a/routing/router.go b/routing/router.go index c68a960f..f5590234 100644 --- a/routing/router.go +++ b/routing/router.go @@ -1641,12 +1641,6 @@ func (r *ChannelRouter) sendPayment(payment *LightningPayment, sendError error ) - // errFailedFeeChans is a map of the short channel ID's that were the - // source of fee related routing failures during this payment attempt. - // We'll use this map to prune out channels when the first error may - // not require pruning, but any subsequent ones do. - errFailedFeeChans := make(map[lnwire.ShortChannelID]struct{}) - // We'll also fetch the current block height so we can properly // calculate the required HTLC time locks within the route. _, currentHeight, err := r.cfg.Chain.GetBestBlock() @@ -1758,11 +1752,55 @@ func (r *ChannelRouter) sendPayment(payment *LightningPayment, } errSource := fErr.ErrorSource + errVertex := NewVertex(errSource) log.Tracef("node=%x reported failure when sending "+ "htlc=%x", errSource.SerializeCompressed(), payment.PaymentHash[:]) + // Always determine chan id ourselves, because a channel + // update with id may not be available. + failedChanID, err := getFailedChannelID(route, errSource) + if err != nil { + return preImage, nil, err + } + + // processChannelUpdateAndRetry is a closure that + // handles a failure message containing a channel + // update. This function always tries to apply the + // channel update and passes on the result to the + // payment session to adjust its view on the reliability + // of the network. + // + // As channel id, the locally determined channel id is + // used. It does not rely on the channel id that is part + // of the channel update message, because the remote + // node may lie to us or the update may be corrupt. + processChannelUpdateAndRetry := func( + update *lnwire.ChannelUpdate, + pubKey *btcec.PublicKey) { + + // Try to apply the channel update. + updateOk := r.applyChannelUpdate(update, pubKey) + + // If the update could not be applied, prune the + // edge. There is no reason to continue trying + // this channel. + // + // TODO: Could even prune the node completely? + // Or is there a valid reason for the channel + // update to fail? + if !updateOk { + paySession.ReportChannelFailure( + failedChanID, + ) + } + + paySession.ReportChannelPolicyFailure( + NewVertex(errSource), failedChanID, + ) + } + switch onionErr := fErr.FailureMessage.(type) { // If the end destination didn't know they payment // hash, then we'll terminate immediately. @@ -1803,16 +1841,8 @@ func (r *ChannelRouter) sendPayment(payment *LightningPayment, // that sent us this error, as it doesn't now what the // correct block height is. case *lnwire.FailExpiryTooSoon: - update := onionErr.Update - err := r.applyChannelUpdate(&update, errSource) - if err != nil { - log.Errorf("unable to apply channel "+ - "update for onion error: %v", err) - } - - pruneVertexFailure( - paySession, route, errSource, false, - ) + r.applyChannelUpdate(&onionErr.Update, errSource) + paySession.ReportVertexFailure(errVertex) continue // If we hit an instance of onion payload corruption or @@ -1825,66 +1855,30 @@ func (r *ChannelRouter) sendPayment(payment *LightningPayment, case *lnwire.FailInvalidOnionKey: return preImage, nil, sendError - // If the onion error includes a channel update, and - // isn't necessarily fatal, then we'll apply the update - // and continue with the rest of the routes. + // If we get a failure due to violating the minimum + // amount, we'll apply the new minimum amount and retry + // routing. case *lnwire.FailAmountBelowMinimum: - update := onionErr.Update - err := r.applyChannelUpdate(&update, errSource) - if err != nil { - log.Errorf("unable to apply channel "+ - "update for onion error: %v", err) - } + processChannelUpdateAndRetry( + &onionErr.Update, errSource, + ) + continue - return preImage, nil, sendError - - // If we get a failure due to a fee, so we'll apply the + // If we get a failure due to a fee, we'll apply the // new fee update, and retry our attempt using the // newly updated fees. case *lnwire.FailFeeInsufficient: - update := onionErr.Update - err := r.applyChannelUpdate(&update, errSource) - if err != nil { - log.Errorf("unable to apply channel "+ - "update for onion error: %v", err) - - pruneEdgeFailure( - paySession, route, errSource, - ) - } - - // We'll now check to see if we've already - // reported a fee related failure for this - // node. If so, then we'll actually prune out - // the vertex for now. - chanID := update.ShortChannelID - _, ok := errFailedFeeChans[chanID] - if ok { - pruneVertexFailure( - paySession, route, errSource, false, - ) - continue - } - - // Finally, we'll record a fee failure from - // this node and move on. - errFailedFeeChans[chanID] = struct{}{} + processChannelUpdateAndRetry( + &onionErr.Update, errSource, + ) continue // If we get the failure for an intermediate node that // disagrees with our time lock values, then we'll - // prune it out for now, and continue with path - // finding. + // apply the new delta value and try it once more. case *lnwire.FailIncorrectCltvExpiry: - update := onionErr.Update - err := r.applyChannelUpdate(&update, errSource) - if err != nil { - log.Errorf("unable to apply channel "+ - "update for onion error: %v", err) - } - - pruneVertexFailure( - paySession, route, errSource, false, + processChannelUpdateAndRetry( + &onionErr.Update, errSource, ) continue @@ -1892,46 +1886,30 @@ func (r *ChannelRouter) sendPayment(payment *LightningPayment, // forward one is currently disabled, so we'll apply // the update and continue. case *lnwire.FailChannelDisabled: - update := onionErr.Update - err := r.applyChannelUpdate(&update, errSource) - if err != nil { - log.Errorf("unable to apply channel "+ - "update for onion error: %v", err) - } - - pruneEdgeFailure(paySession, route, errSource) + r.applyChannelUpdate(&onionErr.Update, errSource) + paySession.ReportChannelFailure(failedChanID) continue // It's likely that the outgoing channel didn't have // sufficient capacity, so we'll prune this edge for // now, and continue onwards with our path finding. case *lnwire.FailTemporaryChannelFailure: - update := onionErr.Update - err := r.applyChannelUpdate(update, errSource) - if err != nil { - log.Errorf("unable to apply channel "+ - "update for onion error: %v", err) - } - - pruneEdgeFailure(paySession, route, errSource) + r.applyChannelUpdate(onionErr.Update, errSource) + paySession.ReportChannelFailure(failedChanID) continue // If the send fail due to a node not having the // required features, then we'll note this error and // continue. case *lnwire.FailRequiredNodeFeatureMissing: - pruneVertexFailure( - paySession, route, errSource, false, - ) + paySession.ReportVertexFailure(errVertex) continue // If the send fail due to a node not having the // required features, then we'll note this error and // continue. case *lnwire.FailRequiredChannelFeatureMissing: - pruneVertexFailure( - paySession, route, errSource, false, - ) + paySession.ReportVertexFailure(errVertex) continue // If the next hop in the route wasn't known or @@ -1942,22 +1920,18 @@ func (r *ChannelRouter) sendPayment(payment *LightningPayment, // returning errors in order to attempt to black list // another node. case *lnwire.FailUnknownNextPeer: - pruneEdgeFailure(paySession, route, errSource) + paySession.ReportChannelFailure(failedChanID) continue // If the node wasn't able to forward for which ever // reason, then we'll note this and continue with the // routes. case *lnwire.FailTemporaryNodeFailure: - pruneVertexFailure( - paySession, route, errSource, false, - ) + paySession.ReportVertexFailure(errVertex) continue case *lnwire.FailPermanentNodeFailure: - pruneVertexFailure( - paySession, route, errSource, false, - ) + paySession.ReportVertexFailure(errVertex) continue // If we crafted a route that contains a too long time @@ -1970,16 +1944,14 @@ func (r *ChannelRouter) sendPayment(payment *LightningPayment, // that as a hint during future path finding through // that node. case *lnwire.FailExpiryTooFar: - pruneVertexFailure( - paySession, route, errSource, false, - ) + paySession.ReportVertexFailure(errVertex) continue // If we get a permanent channel or node failure, then // we'll note this (exclude the vertex/edge), and // continue with the rest of the routes. case *lnwire.FailPermanentChannelFailure: - pruneEdgeFailure(paySession, route, errSource) + paySession.ReportChannelFailure(failedChanID) continue default: @@ -1991,74 +1963,46 @@ func (r *ChannelRouter) sendPayment(payment *LightningPayment, } } -// pruneVertexFailure will attempt to prune a vertex from the current available -// vertexes of the target payment session in response to an encountered routing -// error. -func pruneVertexFailure(paySession *paymentSession, route *Route, - errSource *btcec.PublicKey, nextNode bool) { - - // By default, we'll try to prune the node that actually sent us the - // error. - errNode := NewVertex(errSource) - - // If this failure indicates that the node _after_ the source of the - // error was not found. As a result, we'll locate the vertex for that - // node itself. - if nextNode { - nodeToPrune, ok := route.nextHopVertex(errSource) - - if ok { - errNode = nodeToPrune - } - } - - // Once we've located the vertex, we'll report this failure to - // missionControl and restart path finding. - paySession.ReportVertexFailure(errNode) -} - -// pruneEdgeFailure will attempts to prune an edge from the current available -// edges of the target payment session in response to an encountered routing -// error. -func pruneEdgeFailure(paySession *paymentSession, route *Route, - errSource *btcec.PublicKey) { +// getFailedChannelID tries to locate the failing channel given a route and the +// pubkey of the node that sent the error. It will assume that the error is +// associated with the outgoing channel of the error node. +func getFailedChannelID(route *Route, errSource *btcec.PublicKey) ( + uint64, error) { // As this error indicates that the target channel was unable to carry // this HTLC (for w/e reason), we'll query the index to find the // _outgoing_ channel the source of the error was meant to pass the // HTLC along to. - badChan, ok := route.nextHopChannel(errSource) - if !ok { - // If we weren't able to find the hop *after* this node, then - // we'll attempt to disable the previous channel. - prevChan, ok := route.prevHopChannel( - errSource, - ) - - if !ok { - return - } - - badChan = prevChan + if badChan, ok := route.nextHopChannel(errSource); ok { + return badChan.ChannelID, nil } - // If the channel was found, then we'll inform mission control of this - // failure so future attempts avoid this link temporarily. - paySession.ReportChannelFailure(badChan.ChannelID) + // If we weren't able to find the hop *after* this node, then we'll + // attempt to disable the previous channel. + // + // TODO(joostjager): errSource must be the final hop then? In that case, + // certain types of errors are not expected. For example + // FailUnknownNextPeer. This could be a reason to prune the node? + if prevChan, ok := route.prevHopChannel(errSource); ok { + return prevChan.ChannelID, nil + } + + return 0, fmt.Errorf("cannot find channel in route") } // applyChannelUpdate validates a channel update and if valid, applies it to the -// database. +// database. It returns a bool indicating whether the updates was successful. func (r *ChannelRouter) applyChannelUpdate(msg *lnwire.ChannelUpdate, - pubKey *btcec.PublicKey) error { + pubKey *btcec.PublicKey) bool { // If we get passed a nil channel update (as it's optional with some - // onion errors), then we'll exit early with a nil error. + // onion errors), then we'll exit early with a success result. if msg == nil { - return nil + return true } if err := ValidateChannelUpdateAnn(pubKey, msg); err != nil { - return err + log.Errorf("Unable to validate channel update: %v", err) + return false } err := r.UpdateEdge(&channeldb.ChannelEdgePolicy{ @@ -2072,10 +2016,11 @@ func (r *ChannelRouter) applyChannelUpdate(msg *lnwire.ChannelUpdate, FeeProportionalMillionths: lnwire.MilliSatoshi(msg.FeeRate), }) if err != nil && !IsError(err, ErrIgnored, ErrOutdated) { - return fmt.Errorf("Unable to apply channel update: %v", err) + log.Errorf("Unable to apply channel update: %v", err) + return false } - return nil + return true } // AddNode is used to add information about a node to the router database. If diff --git a/routing/router_test.go b/routing/router_test.go index 8bfebe82..7a62754e 100644 --- a/routing/router_test.go +++ b/routing/router_test.go @@ -421,9 +421,10 @@ func TestChannelUpdateValidation(t *testing.T) { }, } - route := &Route{ - Hops: hops, - } + route := NewRouteFromHops( + lnwire.MilliSatoshi(10000), 100, + NewVertex(ctx.aliases["a"]), hops, + ) // Set up a channel update message with an invalid signature to be // returned to the sender.