routing: account for remaining routing onion errors in SendPayment

In this commit, we address a number of edge cases that were unaccounted
for when responding to errors that can be sent back due to an HTLC
routing failure. Namely:

   * We’ll no longer stop payment attempts if we’re unable to apply a
channel update, instead, we’ll log the error, prune the channel and
continue.
    * We’ll no remember which channels were pruned due to insufficient
fee errors. If we ever get a repeat fee error from a channel, then we
prune it. This ensure that we don’t get stuck in a loop due to a node
continually advertising the same fees.
   * We also correct an error in which node we’d prune due to a
temporary or permanent node failure. Before this commit, we would prune
the next node, when we should actually be pruning the node that sent us
the error.

Finally, we also add a new test to exercise the fee insufficient error
handling and channel pruning.

Fixes #865.
This commit is contained in:
Olaoluwa Osuntokun 2018-03-19 19:08:55 -07:00
parent 363cec7012
commit 93b04b39fe
No known key found for this signature in database
GPG Key ID: 964EA263DD637C21
2 changed files with 234 additions and 86 deletions

@ -1487,6 +1487,12 @@ func (r *ChannelRouter) SendPayment(payment *LightningPayment) ([32]byte, *Route
sendError error
)
// errFailedFeeChans is a map of the short channel ID's that were the
// source of fee related routing failures during this payment attempt.
// We'll use this map to prune out channels when the first error may
// not require pruning, but any subsequent ones do.
errFailedFeeChans := make(map[lnwire.ShortChannelID]struct{})
// We'll also fetch the current block height so we can properly
// calculate the required HTLC time locks within the route.
_, currentHeight, err := r.cfg.Chain.GetBestBlock()
@ -1567,8 +1573,9 @@ func (r *ChannelRouter) SendPayment(payment *LightningPayment) ([32]byte, *Route
// Generate the raw encoded sphinx packet to be included along
// with the htlcAdd message that we send directly to the
// switch.
onionBlob, circuit, err := generateSphinxPacket(route,
payment.PaymentHash[:])
onionBlob, circuit, err := generateSphinxPacket(
route, payment.PaymentHash[:],
)
if err != nil {
return preImage, nil, err
}
@ -1587,8 +1594,9 @@ func (r *ChannelRouter) SendPayment(payment *LightningPayment) ([32]byte, *Route
// the payment. If this attempt fails, then we'll continue on
// to the next available route.
firstHop := route.Hops[0].Channel.Node.PubKeyBytes
preImage, sendError = r.cfg.SendToSwitch(firstHop, htlcAdd,
circuit)
preImage, sendError = r.cfg.SendToSwitch(
firstHop, htlcAdd, circuit,
)
if sendError != nil {
// An error occurred when attempting to send the
// payment, depending on the error type, we'll either
@ -1649,7 +1657,8 @@ func (r *ChannelRouter) SendPayment(payment *LightningPayment) ([32]byte, *Route
case *lnwire.FailExpiryTooSoon:
update := onionErr.Update
if err := r.applyChannelUpdate(&update); err != nil {
return preImage, nil, err
log.Errorf("unable to apply channel "+
"update for onion error: %v", err)
}
return preImage, nil, sendError
@ -1666,145 +1675,129 @@ func (r *ChannelRouter) SendPayment(payment *LightningPayment) ([32]byte, *Route
// If the onion error includes a channel update, and
// isn't necessarily fatal, then we'll apply the update
// an continue with the rest of the routes.
//
// TODO(roasbeef): should re-query for routes with new updates
case *lnwire.FailAmountBelowMinimum:
update := onionErr.Update
if err := r.applyChannelUpdate(&update); err != nil {
return preImage, nil, err
log.Errorf("unable to apply channel "+
"update for onion error: %v", err)
}
return preImage, nil, sendError
// If we get a failure due to a fee, so we'll apply the
// new fee update, and retry our attempt using the
// newly updated fees.
case *lnwire.FailFeeInsufficient:
update := onionErr.Update
if err := r.applyChannelUpdate(&update); err != nil {
return preImage, nil, err
log.Errorf("unable to apply channel "+
"update for onion error: %v", err)
pruneEdgeFailure(
paySession, route, errSource,
)
}
return preImage, nil, sendError
// We'll now check to see if we've already
// reported a fee related failure for this
// node. If so, then we'll actually prune out
// the edge for now.
chanID := update.ShortChannelID
_, ok := errFailedFeeChans[chanID]
if ok {
pruneEdgeFailure(
paySession, route, errSource,
)
continue
}
// Finally, we'll record a fee failure from
// this node and move on.
errFailedFeeChans[chanID] = struct{}{}
continue
case *lnwire.FailIncorrectCltvExpiry:
update := onionErr.Update
if err := r.applyChannelUpdate(&update); err != nil {
return preImage, nil, err
log.Errorf("unable to apply channel "+
"update for onion error: %v", err)
}
return preImage, nil, sendError
// The outgoing channel that this node was meant to
// forward one is currently disabled, so we'll apply
// the update and continue.
case *lnwire.FailChannelDisabled:
update := onionErr.Update
if err := r.applyChannelUpdate(&update); err != nil {
return preImage, nil, err
log.Errorf("unable to apply channel "+
"update for onion error: %v", err)
}
return preImage, nil, sendError
pruneEdgeFailure(paySession, route, errSource)
continue
// It's likely that the outgoing channel didn't have
// sufficient capacity, so we'll prune this edge for
// now, and continue onwards with our path finding.
case *lnwire.FailTemporaryChannelFailure:
update := onionErr.Update
if err := r.applyChannelUpdate(update); err != nil {
return preImage, nil, err
log.Errorf("unable to apply channel "+
"update for onion error: %v", err)
}
// As this error indicates that the target
// channel was unable to carry this HTLC (for
// w/e reason), we'll query the index to find
// the _outgoing_ channel the source of the
// error was meant to pass the HTLC along to.
badChan, ok := route.nextHopChannel(errSource)
if !ok {
// If we weren't able to find the hop
// *after* this node, then we'll
// attempt to disable the previous
// channel.
badChan, ok = route.prevHopChannel(
errSource,
)
if !ok {
continue
}
}
// If the channel was found, then we'll inform
// mission control of this failure so future
// attempts avoid this link temporarily.
paySession.ReportChannelFailure(badChan.ChannelID)
pruneEdgeFailure(paySession, route, errSource)
continue
// If the send fail due to a node not having the
// required features, then we'll note this error and
// continue.
//
// TODO(roasbeef): remove node from path
case *lnwire.FailRequiredNodeFeatureMissing:
pruneVertexFailure(
paySession, route, errSource, false,
)
continue
// If the send fail due to a node not having the
// required features, then we'll note this error and
// continue.
//
// TODO(roasbeef): remove channel from path
case *lnwire.FailRequiredChannelFeatureMissing:
pruneVertexFailure(
paySession, route, errSource, false,
)
continue
// If the next hop in the route wasn't known or
// offline, we'll prune the _next_ hop from the set of
// routes and retry.
case *lnwire.FailUnknownNextPeer:
// This failure indicates that the node _after_
// the source of the error was not found. As a
// result, we'll locate the vertex for that
// node itself.
missingNode, ok := route.nextHopVertex(errSource)
if !ok {
continue
}
// Once we've located the vertex, we'll report
// this failure to missionControl and restart
// path finding.
paySession.ReportVertexFailure(missingNode)
pruneVertexFailure(
paySession, route, errSource, true,
)
continue
// If the node wasn't able to forward for which ever
// reason, then we'll note this and continue with the
// routes.
case *lnwire.FailTemporaryNodeFailure:
missingNode, ok := route.nextHopVertex(errSource)
if !ok {
continue
}
pruneVertexFailure(
paySession, route, errSource, false,
)
continue
paySession.ReportVertexFailure(missingNode)
case *lnwire.FailPermanentNodeFailure:
pruneVertexFailure(
paySession, route, errSource, false,
)
continue
// If we get a permanent channel or node failure, then
// we'll note this (exclude the vertex/edge), and
// continue with the rest of the routes.
case *lnwire.FailPermanentChannelFailure:
// As this error indicates that the target
// channel was unable to carry this HTLC (for
// w/e reason), we'll query the index to find
// the _outgoing_ channel the source of the
// error was meant to pass the HTLC along to.
badChan, ok := route.nextHopChannel(errSource)
if !ok {
// If we weren't able to find the hop
// *after* this node, then we'll
// attempt to disable the previous
// channel.
badChan, ok = route.prevHopChannel(
errSource,
)
if !ok {
continue
}
}
// If the channel was found, then we'll inform
// mission control of this failure so future
// attempts avoid this link temporarily.
paySession.ReportChannelFailure(badChan.ChannelID)
continue
case *lnwire.FailPermanentNodeFailure:
// TODO(roasbeef): remove node from path
pruneEdgeFailure(paySession, route, errSource)
continue
default:
@ -1816,6 +1809,62 @@ func (r *ChannelRouter) SendPayment(payment *LightningPayment) ([32]byte, *Route
}
}
// pruneVertexFailure will attempt to prune a vertex from the current available
// vertexes of the target payment session in response to an encountered routing
// error.
func pruneVertexFailure(paySession *paymentSession, route *Route,
errSource *btcec.PublicKey, nextNode bool) {
// By default, we'll try to prune the node that actually sent us the
// error.
errNode := NewVertex(errSource)
// If this failure indicates that the node _after_ the source of the
// error was not found. As a result, we'll locate the vertex for that
// node itself.
if nextNode {
nodeToPrune, ok := route.nextHopVertex(errSource)
if ok {
errNode = nodeToPrune
}
}
// Once we've located the vertex, we'll report this failure to
// missionControl and restart path finding.
paySession.ReportVertexFailure(errNode)
}
// pruneEdgeFailure will attempts to prune an edge from the current available
// edges of the target payment session in response to an encountered routing
// error.
func pruneEdgeFailure(paySession *paymentSession, route *Route,
errSource *btcec.PublicKey) {
// As this error indicates that the target channel was unable to carry
// this HTLC (for w/e reason), we'll query the index to find the
// _outgoing_ channel the source of the error was meant to pass the
// HTLC along to.
badChan, ok := route.nextHopChannel(errSource)
if !ok {
// If we weren't able to find the hop *after* this node, then
// we'll attempt to disable the previous channel.
prevChan, ok := route.prevHopChannel(
errSource,
)
if !ok {
return
}
badChan = prevChan
}
// If the channel was found, then we'll inform mission control of this
// failure so future attempts avoid this link temporarily.
paySession.ReportChannelFailure(badChan.ChannelID)
}
// applyChannelUpdate applies a channel update directly to the database,
// skipping preliminary validation.
func (r *ChannelRouter) applyChannelUpdate(msg *lnwire.ChannelUpdate) error {

@ -280,6 +280,105 @@ func TestSendPaymentRouteFailureFallback(t *testing.T) {
}
}
// TestSendPaymentErrorRepeatedFeeInsufficient tests that if we receive
// multiple fee related errors from a channel that we're attempting to route
// through, then we'll prune the channel after the second attempt.
func TestSendPaymentErrorRepeatedFeeInsufficient(t *testing.T) {
t.Parallel()
const startingBlockHeight = 101
ctx, cleanUp, err := createTestCtx(startingBlockHeight, basicGraphFilePath)
defer cleanUp()
if err != nil {
t.Fatalf("unable to create router: %v", err)
}
// Craft a LightningPayment struct that'll send a payment from roasbeef
// to luo ji for 100 satoshis.
var payHash [32]byte
payment := LightningPayment{
Target: ctx.aliases["luoji"],
Amount: lnwire.NewMSatFromSatoshis(1000),
PaymentHash: payHash,
}
var preImage [32]byte
copy(preImage[:], bytes.Repeat([]byte{9}, 32))
// We'll also fetch the first outgoing channel edge from roasbeef to
// luo ji. We'll obtain this as we'll need to to generate the
// FeeInsufficient error that we'll send back.
chanID := uint64(689530843)
_, _, edgeUpateToFail, err := ctx.graph.FetchChannelEdgesByID(chanID)
if err != nil {
t.Fatalf("unable to fetch chan id: %v", err)
}
errChanUpdate := lnwire.ChannelUpdate{
ShortChannelID: lnwire.NewShortChanIDFromInt(chanID),
Timestamp: uint32(edgeUpateToFail.LastUpdate.Unix()),
Flags: edgeUpateToFail.Flags,
TimeLockDelta: edgeUpateToFail.TimeLockDelta,
HtlcMinimumMsat: edgeUpateToFail.MinHTLC,
BaseFee: uint32(edgeUpateToFail.FeeBaseMSat),
FeeRate: uint32(edgeUpateToFail.FeeProportionalMillionths),
}
sourceNode := ctx.router.selfNode
// We'll now modify the SendToSwitch method to return an error for the
// outgoing channel to luo ji. This will be a fee related error, so it
// should only cause the edge to be pruned after the second attempt.
ctx.router.cfg.SendToSwitch = func(n [33]byte,
_ *lnwire.UpdateAddHTLC, _ *sphinx.Circuit) ([32]byte, error) {
if bytes.Equal(ctx.aliases["luoji"].SerializeCompressed(), n[:]) {
pub, err := sourceNode.PubKey()
if err != nil {
return preImage, err
}
return [32]byte{}, &htlcswitch.ForwardingError{
ErrorSource: pub,
// Within our error, we'll add a channel update
// which is meant to refelct he new fee
// schedule for the node/channel.
FailureMessage: &lnwire.FailFeeInsufficient{
Update: errChanUpdate,
},
}
}
return preImage, nil
}
// Send off the payment request to the router, route through satoshi
// should've been selected as a fall back and succeeded correctly.
paymentPreImage, route, err := ctx.router.SendPayment(&payment)
if err != nil {
t.Fatalf("unable to send payment: %v", err)
}
// The route selected should have two hops
if len(route.Hops) != 2 {
t.Fatalf("incorrect route length: expected %v got %v", 2,
len(route.Hops))
}
// The preimage should match up with the once created above.
if !bytes.Equal(paymentPreImage[:], preImage[:]) {
t.Fatalf("incorrect preimage used: expected %x got %x",
preImage[:], paymentPreImage[:])
}
// The route should have satoshi as the first hop.
if route.Hops[0].Channel.Node.Alias != "satoshi" {
t.Fatalf("route should go through satoshi as first hop, "+
"instead passes through: %v",
route.Hops[0].Channel.Node.Alias)
}
}
// TestSendPaymentErrorPathPruning tests that the send of candidate routes
// properly gets pruned in response to ForwardingError response from the
// underlying SendToSwitch function.