From 363cec7012ad4578e3d39d2509900535e0a08dc5 Mon Sep 17 00:00:00 2001 From: Olaoluwa Osuntokun Date: Mon, 19 Mar 2018 18:44:41 -0700 Subject: [PATCH 1/2] routing: add new PayAttemptTimeout field to LightningPayment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In this commit, we add a new field to the LightningPayment struct: PayAttemptTimeout. This new field allows the caller to control exactly how much time should be spent attempting to route a payment to the destination. The default value we’ll use is 60 seconds, but callers are able to specify a diff value. Once the timeout has passed, we’ll abandon th e payment attempt, and return an error back to the original caller. --- routing/errors.go | 4 ++++ routing/router.go | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/routing/errors.go b/routing/errors.go index cdd94c4e..1000dba2 100644 --- a/routing/errors.go +++ b/routing/errors.go @@ -38,6 +38,10 @@ const ( // this update can't bring us something new, or because a node // announcement was given for node not found in any channel. ErrIgnored + + // ErrPaymentAttemptTimeout is an error that indicates that a payment + // attempt timed out before we were able to successfully route an HTLC. + ErrPaymentAttemptTimeout ) // routerError is a structure that represent the error inside the routing package, diff --git a/routing/router.go b/routing/router.go index b8d3a05e..333254de 100644 --- a/routing/router.go +++ b/routing/router.go @@ -31,6 +31,11 @@ const ( // DefaultFinalCLTVDelta is the default value to be used as the final // CLTV delta for a route if one is unspecified. DefaultFinalCLTVDelta = 9 + + // defaultPayAttemptTimeout is a duration that we'll use to determine + // if we should give up on a payment attempt. This will be used if a + // value isn't specified in the LightningNode struct. + defaultPayAttemptTimeout = time.Duration(time.Second * 60) ) // ChannelGraphSource represents the source of information about the topology @@ -1453,6 +1458,12 @@ type LightningPayment struct { // used. FinalCLTVDelta *uint16 + // PayAttemptTimeout is a timeout value that we'll use to determine + // when we should should abandon the payment attempt after consecutive + // payment failure. This prevents us from attempting to send a payment + // indefinitely. + PayAttemptTimeout time.Duration + // TODO(roasbeef): add e2e message? } @@ -1490,6 +1501,15 @@ func (r *ChannelRouter) SendPayment(payment *LightningPayment) ([32]byte, *Route finalCLTVDelta = *payment.FinalCLTVDelta } + var payAttemptTimeout time.Duration + if payment.PayAttemptTimeout == time.Duration(0) { + payAttemptTimeout = defaultPayAttemptTimeout + } else { + payAttemptTimeout = payment.PayAttemptTimeout + } + + timeoutChan := time.After(payAttemptTimeout) + // Before starting the HTLC routing attempt, we'll create a fresh // payment session which will report our errors back to mission // control. @@ -1498,6 +1518,27 @@ func (r *ChannelRouter) SendPayment(payment *LightningPayment) ([32]byte, *Route // We'll continue until either our payment succeeds, or we encounter a // critical error during path finding. for { + // Before we attempt this next payment, we'll check to see if + // either we've gone past the payment attempt timeout, or the + // router is exiting. In either case, we'll stop this payment + // attempt short. + select { + case <-timeoutChan: + errStr := fmt.Sprintf("payment attempt not completed "+ + "before timeout of %v", payAttemptTimeout) + + return preImage, nil, newErr( + ErrPaymentAttemptTimeout, errStr, + ) + + case <-r.quit: + return preImage, nil, fmt.Errorf("router shutting down") + + default: + // Fall through if we haven't hit our time limit, or + // are expiring. + } + // We'll kick things off by requesting a new route from mission // control, which will incorporate the current best known state // of the channel graph and our past HTLC routing From 93b04b39fe27aa0c2f76302f7dc19b2bb44e53e8 Mon Sep 17 00:00:00 2001 From: Olaoluwa Osuntokun Date: Mon, 19 Mar 2018 19:08:55 -0700 Subject: [PATCH 2/2] routing: account for remaining routing onion errors in SendPayment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In this commit, we address a number of edge cases that were unaccounted for when responding to errors that can be sent back due to an HTLC routing failure. Namely: * We’ll no longer stop payment attempts if we’re unable to apply a channel update, instead, we’ll log the error, prune the channel and continue. * We’ll no remember which channels were pruned due to insufficient fee errors. If we ever get a repeat fee error from a channel, then we prune it. This ensure that we don’t get stuck in a loop due to a node continually advertising the same fees. * We also correct an error in which node we’d prune due to a temporary or permanent node failure. Before this commit, we would prune the next node, when we should actually be pruning the node that sent us the error. Finally, we also add a new test to exercise the fee insufficient error handling and channel pruning. Fixes #865. --- routing/router.go | 221 +++++++++++++++++++++++++---------------- routing/router_test.go | 99 ++++++++++++++++++ 2 files changed, 234 insertions(+), 86 deletions(-) diff --git a/routing/router.go b/routing/router.go index 333254de..cb786754 100644 --- a/routing/router.go +++ b/routing/router.go @@ -1487,6 +1487,12 @@ func (r *ChannelRouter) SendPayment(payment *LightningPayment) ([32]byte, *Route sendError error ) + // errFailedFeeChans is a map of the short channel ID's that were the + // source of fee related routing failures during this payment attempt. + // We'll use this map to prune out channels when the first error may + // not require pruning, but any subsequent ones do. + errFailedFeeChans := make(map[lnwire.ShortChannelID]struct{}) + // We'll also fetch the current block height so we can properly // calculate the required HTLC time locks within the route. _, currentHeight, err := r.cfg.Chain.GetBestBlock() @@ -1567,8 +1573,9 @@ func (r *ChannelRouter) SendPayment(payment *LightningPayment) ([32]byte, *Route // Generate the raw encoded sphinx packet to be included along // with the htlcAdd message that we send directly to the // switch. - onionBlob, circuit, err := generateSphinxPacket(route, - payment.PaymentHash[:]) + onionBlob, circuit, err := generateSphinxPacket( + route, payment.PaymentHash[:], + ) if err != nil { return preImage, nil, err } @@ -1587,8 +1594,9 @@ func (r *ChannelRouter) SendPayment(payment *LightningPayment) ([32]byte, *Route // the payment. If this attempt fails, then we'll continue on // to the next available route. firstHop := route.Hops[0].Channel.Node.PubKeyBytes - preImage, sendError = r.cfg.SendToSwitch(firstHop, htlcAdd, - circuit) + preImage, sendError = r.cfg.SendToSwitch( + firstHop, htlcAdd, circuit, + ) if sendError != nil { // An error occurred when attempting to send the // payment, depending on the error type, we'll either @@ -1649,7 +1657,8 @@ func (r *ChannelRouter) SendPayment(payment *LightningPayment) ([32]byte, *Route case *lnwire.FailExpiryTooSoon: update := onionErr.Update if err := r.applyChannelUpdate(&update); err != nil { - return preImage, nil, err + log.Errorf("unable to apply channel "+ + "update for onion error: %v", err) } return preImage, nil, sendError @@ -1666,145 +1675,129 @@ func (r *ChannelRouter) SendPayment(payment *LightningPayment) ([32]byte, *Route // If the onion error includes a channel update, and // isn't necessarily fatal, then we'll apply the update // an continue with the rest of the routes. - // - // TODO(roasbeef): should re-query for routes with new updates case *lnwire.FailAmountBelowMinimum: update := onionErr.Update if err := r.applyChannelUpdate(&update); err != nil { - return preImage, nil, err + log.Errorf("unable to apply channel "+ + "update for onion error: %v", err) } return preImage, nil, sendError + + // If we get a failure due to a fee, so we'll apply the + // new fee update, and retry our attempt using the + // newly updated fees. case *lnwire.FailFeeInsufficient: update := onionErr.Update if err := r.applyChannelUpdate(&update); err != nil { - return preImage, nil, err + log.Errorf("unable to apply channel "+ + "update for onion error: %v", err) + + pruneEdgeFailure( + paySession, route, errSource, + ) } - return preImage, nil, sendError + // We'll now check to see if we've already + // reported a fee related failure for this + // node. If so, then we'll actually prune out + // the edge for now. + chanID := update.ShortChannelID + _, ok := errFailedFeeChans[chanID] + if ok { + pruneEdgeFailure( + paySession, route, errSource, + ) + continue + } + + // Finally, we'll record a fee failure from + // this node and move on. + errFailedFeeChans[chanID] = struct{}{} + continue + case *lnwire.FailIncorrectCltvExpiry: update := onionErr.Update if err := r.applyChannelUpdate(&update); err != nil { - return preImage, nil, err + log.Errorf("unable to apply channel "+ + "update for onion error: %v", err) } return preImage, nil, sendError + + // The outgoing channel that this node was meant to + // forward one is currently disabled, so we'll apply + // the update and continue. case *lnwire.FailChannelDisabled: update := onionErr.Update if err := r.applyChannelUpdate(&update); err != nil { - return preImage, nil, err + log.Errorf("unable to apply channel "+ + "update for onion error: %v", err) } - return preImage, nil, sendError + pruneEdgeFailure(paySession, route, errSource) + continue + + // It's likely that the outgoing channel didn't have + // sufficient capacity, so we'll prune this edge for + // now, and continue onwards with our path finding. case *lnwire.FailTemporaryChannelFailure: update := onionErr.Update if err := r.applyChannelUpdate(update); err != nil { - return preImage, nil, err + log.Errorf("unable to apply channel "+ + "update for onion error: %v", err) } - // As this error indicates that the target - // channel was unable to carry this HTLC (for - // w/e reason), we'll query the index to find - // the _outgoing_ channel the source of the - // error was meant to pass the HTLC along to. - badChan, ok := route.nextHopChannel(errSource) - if !ok { - // If we weren't able to find the hop - // *after* this node, then we'll - // attempt to disable the previous - // channel. - badChan, ok = route.prevHopChannel( - errSource, - ) - if !ok { - continue - } - } - - // If the channel was found, then we'll inform - // mission control of this failure so future - // attempts avoid this link temporarily. - paySession.ReportChannelFailure(badChan.ChannelID) + pruneEdgeFailure(paySession, route, errSource) continue // If the send fail due to a node not having the // required features, then we'll note this error and // continue. - // - // TODO(roasbeef): remove node from path case *lnwire.FailRequiredNodeFeatureMissing: + pruneVertexFailure( + paySession, route, errSource, false, + ) continue // If the send fail due to a node not having the // required features, then we'll note this error and // continue. - // - // TODO(roasbeef): remove channel from path case *lnwire.FailRequiredChannelFeatureMissing: + pruneVertexFailure( + paySession, route, errSource, false, + ) continue // If the next hop in the route wasn't known or // offline, we'll prune the _next_ hop from the set of // routes and retry. case *lnwire.FailUnknownNextPeer: - // This failure indicates that the node _after_ - // the source of the error was not found. As a - // result, we'll locate the vertex for that - // node itself. - missingNode, ok := route.nextHopVertex(errSource) - if !ok { - continue - } - - // Once we've located the vertex, we'll report - // this failure to missionControl and restart - // path finding. - paySession.ReportVertexFailure(missingNode) + pruneVertexFailure( + paySession, route, errSource, true, + ) continue // If the node wasn't able to forward for which ever // reason, then we'll note this and continue with the // routes. case *lnwire.FailTemporaryNodeFailure: - missingNode, ok := route.nextHopVertex(errSource) - if !ok { - continue - } + pruneVertexFailure( + paySession, route, errSource, false, + ) + continue - paySession.ReportVertexFailure(missingNode) + case *lnwire.FailPermanentNodeFailure: + pruneVertexFailure( + paySession, route, errSource, false, + ) continue // If we get a permanent channel or node failure, then // we'll note this (exclude the vertex/edge), and // continue with the rest of the routes. case *lnwire.FailPermanentChannelFailure: - // As this error indicates that the target - // channel was unable to carry this HTLC (for - // w/e reason), we'll query the index to find - // the _outgoing_ channel the source of the - // error was meant to pass the HTLC along to. - badChan, ok := route.nextHopChannel(errSource) - if !ok { - // If we weren't able to find the hop - // *after* this node, then we'll - // attempt to disable the previous - // channel. - badChan, ok = route.prevHopChannel( - errSource, - ) - if !ok { - continue - } - } - - // If the channel was found, then we'll inform - // mission control of this failure so future - // attempts avoid this link temporarily. - paySession.ReportChannelFailure(badChan.ChannelID) - continue - - case *lnwire.FailPermanentNodeFailure: - // TODO(roasbeef): remove node from path + pruneEdgeFailure(paySession, route, errSource) continue default: @@ -1816,6 +1809,62 @@ func (r *ChannelRouter) SendPayment(payment *LightningPayment) ([32]byte, *Route } } +// pruneVertexFailure will attempt to prune a vertex from the current available +// vertexes of the target payment session in response to an encountered routing +// error. +func pruneVertexFailure(paySession *paymentSession, route *Route, + errSource *btcec.PublicKey, nextNode bool) { + + // By default, we'll try to prune the node that actually sent us the + // error. + errNode := NewVertex(errSource) + + // If this failure indicates that the node _after_ the source of the + // error was not found. As a result, we'll locate the vertex for that + // node itself. + if nextNode { + nodeToPrune, ok := route.nextHopVertex(errSource) + + if ok { + errNode = nodeToPrune + } + } + + // Once we've located the vertex, we'll report this failure to + // missionControl and restart path finding. + paySession.ReportVertexFailure(errNode) +} + +// pruneEdgeFailure will attempts to prune an edge from the current available +// edges of the target payment session in response to an encountered routing +// error. +func pruneEdgeFailure(paySession *paymentSession, route *Route, + errSource *btcec.PublicKey) { + + // As this error indicates that the target channel was unable to carry + // this HTLC (for w/e reason), we'll query the index to find the + // _outgoing_ channel the source of the error was meant to pass the + // HTLC along to. + badChan, ok := route.nextHopChannel(errSource) + if !ok { + // If we weren't able to find the hop *after* this node, then + // we'll attempt to disable the previous channel. + prevChan, ok := route.prevHopChannel( + errSource, + ) + + if !ok { + return + } + + badChan = prevChan + } + + // If the channel was found, then we'll inform mission control of this + // failure so future attempts avoid this link temporarily. + paySession.ReportChannelFailure(badChan.ChannelID) +} + // applyChannelUpdate applies a channel update directly to the database, // skipping preliminary validation. func (r *ChannelRouter) applyChannelUpdate(msg *lnwire.ChannelUpdate) error { diff --git a/routing/router_test.go b/routing/router_test.go index 67bc48f0..4d2f6928 100644 --- a/routing/router_test.go +++ b/routing/router_test.go @@ -280,6 +280,105 @@ func TestSendPaymentRouteFailureFallback(t *testing.T) { } } +// TestSendPaymentErrorRepeatedFeeInsufficient tests that if we receive +// multiple fee related errors from a channel that we're attempting to route +// through, then we'll prune the channel after the second attempt. +func TestSendPaymentErrorRepeatedFeeInsufficient(t *testing.T) { + t.Parallel() + + const startingBlockHeight = 101 + ctx, cleanUp, err := createTestCtx(startingBlockHeight, basicGraphFilePath) + defer cleanUp() + if err != nil { + t.Fatalf("unable to create router: %v", err) + } + + // Craft a LightningPayment struct that'll send a payment from roasbeef + // to luo ji for 100 satoshis. + var payHash [32]byte + payment := LightningPayment{ + Target: ctx.aliases["luoji"], + Amount: lnwire.NewMSatFromSatoshis(1000), + PaymentHash: payHash, + } + + var preImage [32]byte + copy(preImage[:], bytes.Repeat([]byte{9}, 32)) + + // We'll also fetch the first outgoing channel edge from roasbeef to + // luo ji. We'll obtain this as we'll need to to generate the + // FeeInsufficient error that we'll send back. + chanID := uint64(689530843) + _, _, edgeUpateToFail, err := ctx.graph.FetchChannelEdgesByID(chanID) + if err != nil { + t.Fatalf("unable to fetch chan id: %v", err) + } + + errChanUpdate := lnwire.ChannelUpdate{ + ShortChannelID: lnwire.NewShortChanIDFromInt(chanID), + Timestamp: uint32(edgeUpateToFail.LastUpdate.Unix()), + Flags: edgeUpateToFail.Flags, + TimeLockDelta: edgeUpateToFail.TimeLockDelta, + HtlcMinimumMsat: edgeUpateToFail.MinHTLC, + BaseFee: uint32(edgeUpateToFail.FeeBaseMSat), + FeeRate: uint32(edgeUpateToFail.FeeProportionalMillionths), + } + + sourceNode := ctx.router.selfNode + + // We'll now modify the SendToSwitch method to return an error for the + // outgoing channel to luo ji. This will be a fee related error, so it + // should only cause the edge to be pruned after the second attempt. + ctx.router.cfg.SendToSwitch = func(n [33]byte, + _ *lnwire.UpdateAddHTLC, _ *sphinx.Circuit) ([32]byte, error) { + + if bytes.Equal(ctx.aliases["luoji"].SerializeCompressed(), n[:]) { + pub, err := sourceNode.PubKey() + if err != nil { + return preImage, err + } + return [32]byte{}, &htlcswitch.ForwardingError{ + ErrorSource: pub, + + // Within our error, we'll add a channel update + // which is meant to refelct he new fee + // schedule for the node/channel. + FailureMessage: &lnwire.FailFeeInsufficient{ + Update: errChanUpdate, + }, + } + } + + return preImage, nil + } + + // Send off the payment request to the router, route through satoshi + // should've been selected as a fall back and succeeded correctly. + paymentPreImage, route, err := ctx.router.SendPayment(&payment) + if err != nil { + t.Fatalf("unable to send payment: %v", err) + } + + // The route selected should have two hops + if len(route.Hops) != 2 { + t.Fatalf("incorrect route length: expected %v got %v", 2, + len(route.Hops)) + } + + // The preimage should match up with the once created above. + if !bytes.Equal(paymentPreImage[:], preImage[:]) { + t.Fatalf("incorrect preimage used: expected %x got %x", + preImage[:], paymentPreImage[:]) + } + + // The route should have satoshi as the first hop. + if route.Hops[0].Channel.Node.Alias != "satoshi" { + t.Fatalf("route should go through satoshi as first hop, "+ + "instead passes through: %v", + route.Hops[0].Channel.Node.Alias) + } +} + // TestSendPaymentErrorPathPruning tests that the send of candidate routes // properly gets pruned in response to ForwardingError response from the // underlying SendToSwitch function.