Merge pull request #893 from Roasbeef/routing-error-handling-bug-fix
routing: add timeout during pay attempts, don't exit if unable apply update, account for all onion errors
This commit is contained in:
commit
90cb2944eb
@ -38,6 +38,10 @@ const (
|
||||
// this update can't bring us something new, or because a node
|
||||
// announcement was given for node not found in any channel.
|
||||
ErrIgnored
|
||||
|
||||
// ErrPaymentAttemptTimeout is an error that indicates that a payment
|
||||
// attempt timed out before we were able to successfully route an HTLC.
|
||||
ErrPaymentAttemptTimeout
|
||||
)
|
||||
|
||||
// routerError is a structure that represent the error inside the routing package,
|
||||
|
@ -31,6 +31,11 @@ const (
|
||||
// DefaultFinalCLTVDelta is the default value to be used as the final
|
||||
// CLTV delta for a route if one is unspecified.
|
||||
DefaultFinalCLTVDelta = 9
|
||||
|
||||
// defaultPayAttemptTimeout is a duration that we'll use to determine
|
||||
// if we should give up on a payment attempt. This will be used if a
|
||||
// value isn't specified in the LightningNode struct.
|
||||
defaultPayAttemptTimeout = time.Duration(time.Second * 60)
|
||||
)
|
||||
|
||||
// ChannelGraphSource represents the source of information about the topology
|
||||
@ -1453,6 +1458,12 @@ type LightningPayment struct {
|
||||
// used.
|
||||
FinalCLTVDelta *uint16
|
||||
|
||||
// PayAttemptTimeout is a timeout value that we'll use to determine
|
||||
// when we should should abandon the payment attempt after consecutive
|
||||
// payment failure. This prevents us from attempting to send a payment
|
||||
// indefinitely.
|
||||
PayAttemptTimeout time.Duration
|
||||
|
||||
// TODO(roasbeef): add e2e message?
|
||||
}
|
||||
|
||||
@ -1476,6 +1487,12 @@ func (r *ChannelRouter) SendPayment(payment *LightningPayment) ([32]byte, *Route
|
||||
sendError error
|
||||
)
|
||||
|
||||
// errFailedFeeChans is a map of the short channel ID's that were the
|
||||
// source of fee related routing failures during this payment attempt.
|
||||
// We'll use this map to prune out channels when the first error may
|
||||
// not require pruning, but any subsequent ones do.
|
||||
errFailedFeeChans := make(map[lnwire.ShortChannelID]struct{})
|
||||
|
||||
// We'll also fetch the current block height so we can properly
|
||||
// calculate the required HTLC time locks within the route.
|
||||
_, currentHeight, err := r.cfg.Chain.GetBestBlock()
|
||||
@ -1490,6 +1507,15 @@ func (r *ChannelRouter) SendPayment(payment *LightningPayment) ([32]byte, *Route
|
||||
finalCLTVDelta = *payment.FinalCLTVDelta
|
||||
}
|
||||
|
||||
var payAttemptTimeout time.Duration
|
||||
if payment.PayAttemptTimeout == time.Duration(0) {
|
||||
payAttemptTimeout = defaultPayAttemptTimeout
|
||||
} else {
|
||||
payAttemptTimeout = payment.PayAttemptTimeout
|
||||
}
|
||||
|
||||
timeoutChan := time.After(payAttemptTimeout)
|
||||
|
||||
// Before starting the HTLC routing attempt, we'll create a fresh
|
||||
// payment session which will report our errors back to mission
|
||||
// control.
|
||||
@ -1498,6 +1524,27 @@ func (r *ChannelRouter) SendPayment(payment *LightningPayment) ([32]byte, *Route
|
||||
// We'll continue until either our payment succeeds, or we encounter a
|
||||
// critical error during path finding.
|
||||
for {
|
||||
// Before we attempt this next payment, we'll check to see if
|
||||
// either we've gone past the payment attempt timeout, or the
|
||||
// router is exiting. In either case, we'll stop this payment
|
||||
// attempt short.
|
||||
select {
|
||||
case <-timeoutChan:
|
||||
errStr := fmt.Sprintf("payment attempt not completed "+
|
||||
"before timeout of %v", payAttemptTimeout)
|
||||
|
||||
return preImage, nil, newErr(
|
||||
ErrPaymentAttemptTimeout, errStr,
|
||||
)
|
||||
|
||||
case <-r.quit:
|
||||
return preImage, nil, fmt.Errorf("router shutting down")
|
||||
|
||||
default:
|
||||
// Fall through if we haven't hit our time limit, or
|
||||
// are expiring.
|
||||
}
|
||||
|
||||
// We'll kick things off by requesting a new route from mission
|
||||
// control, which will incorporate the current best known state
|
||||
// of the channel graph and our past HTLC routing
|
||||
@ -1526,8 +1573,9 @@ func (r *ChannelRouter) SendPayment(payment *LightningPayment) ([32]byte, *Route
|
||||
// Generate the raw encoded sphinx packet to be included along
|
||||
// with the htlcAdd message that we send directly to the
|
||||
// switch.
|
||||
onionBlob, circuit, err := generateSphinxPacket(route,
|
||||
payment.PaymentHash[:])
|
||||
onionBlob, circuit, err := generateSphinxPacket(
|
||||
route, payment.PaymentHash[:],
|
||||
)
|
||||
if err != nil {
|
||||
return preImage, nil, err
|
||||
}
|
||||
@ -1546,8 +1594,9 @@ func (r *ChannelRouter) SendPayment(payment *LightningPayment) ([32]byte, *Route
|
||||
// the payment. If this attempt fails, then we'll continue on
|
||||
// to the next available route.
|
||||
firstHop := route.Hops[0].Channel.Node.PubKeyBytes
|
||||
preImage, sendError = r.cfg.SendToSwitch(firstHop, htlcAdd,
|
||||
circuit)
|
||||
preImage, sendError = r.cfg.SendToSwitch(
|
||||
firstHop, htlcAdd, circuit,
|
||||
)
|
||||
if sendError != nil {
|
||||
// An error occurred when attempting to send the
|
||||
// payment, depending on the error type, we'll either
|
||||
@ -1608,7 +1657,8 @@ func (r *ChannelRouter) SendPayment(payment *LightningPayment) ([32]byte, *Route
|
||||
case *lnwire.FailExpiryTooSoon:
|
||||
update := onionErr.Update
|
||||
if err := r.applyChannelUpdate(&update); err != nil {
|
||||
return preImage, nil, err
|
||||
log.Errorf("unable to apply channel "+
|
||||
"update for onion error: %v", err)
|
||||
}
|
||||
return preImage, nil, sendError
|
||||
|
||||
@ -1625,145 +1675,129 @@ func (r *ChannelRouter) SendPayment(payment *LightningPayment) ([32]byte, *Route
|
||||
// If the onion error includes a channel update, and
|
||||
// isn't necessarily fatal, then we'll apply the update
|
||||
// an continue with the rest of the routes.
|
||||
//
|
||||
// TODO(roasbeef): should re-query for routes with new updates
|
||||
case *lnwire.FailAmountBelowMinimum:
|
||||
update := onionErr.Update
|
||||
if err := r.applyChannelUpdate(&update); err != nil {
|
||||
return preImage, nil, err
|
||||
log.Errorf("unable to apply channel "+
|
||||
"update for onion error: %v", err)
|
||||
}
|
||||
|
||||
return preImage, nil, sendError
|
||||
|
||||
// If we get a failure due to a fee, so we'll apply the
|
||||
// new fee update, and retry our attempt using the
|
||||
// newly updated fees.
|
||||
case *lnwire.FailFeeInsufficient:
|
||||
update := onionErr.Update
|
||||
if err := r.applyChannelUpdate(&update); err != nil {
|
||||
return preImage, nil, err
|
||||
log.Errorf("unable to apply channel "+
|
||||
"update for onion error: %v", err)
|
||||
|
||||
pruneEdgeFailure(
|
||||
paySession, route, errSource,
|
||||
)
|
||||
}
|
||||
|
||||
return preImage, nil, sendError
|
||||
// We'll now check to see if we've already
|
||||
// reported a fee related failure for this
|
||||
// node. If so, then we'll actually prune out
|
||||
// the edge for now.
|
||||
chanID := update.ShortChannelID
|
||||
_, ok := errFailedFeeChans[chanID]
|
||||
if ok {
|
||||
pruneEdgeFailure(
|
||||
paySession, route, errSource,
|
||||
)
|
||||
continue
|
||||
}
|
||||
|
||||
// Finally, we'll record a fee failure from
|
||||
// this node and move on.
|
||||
errFailedFeeChans[chanID] = struct{}{}
|
||||
continue
|
||||
|
||||
case *lnwire.FailIncorrectCltvExpiry:
|
||||
update := onionErr.Update
|
||||
if err := r.applyChannelUpdate(&update); err != nil {
|
||||
return preImage, nil, err
|
||||
log.Errorf("unable to apply channel "+
|
||||
"update for onion error: %v", err)
|
||||
}
|
||||
|
||||
return preImage, nil, sendError
|
||||
|
||||
// The outgoing channel that this node was meant to
|
||||
// forward one is currently disabled, so we'll apply
|
||||
// the update and continue.
|
||||
case *lnwire.FailChannelDisabled:
|
||||
update := onionErr.Update
|
||||
if err := r.applyChannelUpdate(&update); err != nil {
|
||||
return preImage, nil, err
|
||||
log.Errorf("unable to apply channel "+
|
||||
"update for onion error: %v", err)
|
||||
}
|
||||
|
||||
return preImage, nil, sendError
|
||||
pruneEdgeFailure(paySession, route, errSource)
|
||||
continue
|
||||
|
||||
// It's likely that the outgoing channel didn't have
|
||||
// sufficient capacity, so we'll prune this edge for
|
||||
// now, and continue onwards with our path finding.
|
||||
case *lnwire.FailTemporaryChannelFailure:
|
||||
update := onionErr.Update
|
||||
if err := r.applyChannelUpdate(update); err != nil {
|
||||
return preImage, nil, err
|
||||
log.Errorf("unable to apply channel "+
|
||||
"update for onion error: %v", err)
|
||||
}
|
||||
|
||||
// As this error indicates that the target
|
||||
// channel was unable to carry this HTLC (for
|
||||
// w/e reason), we'll query the index to find
|
||||
// the _outgoing_ channel the source of the
|
||||
// error was meant to pass the HTLC along to.
|
||||
badChan, ok := route.nextHopChannel(errSource)
|
||||
if !ok {
|
||||
// If we weren't able to find the hop
|
||||
// *after* this node, then we'll
|
||||
// attempt to disable the previous
|
||||
// channel.
|
||||
badChan, ok = route.prevHopChannel(
|
||||
errSource,
|
||||
)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// If the channel was found, then we'll inform
|
||||
// mission control of this failure so future
|
||||
// attempts avoid this link temporarily.
|
||||
paySession.ReportChannelFailure(badChan.ChannelID)
|
||||
pruneEdgeFailure(paySession, route, errSource)
|
||||
continue
|
||||
|
||||
// If the send fail due to a node not having the
|
||||
// required features, then we'll note this error and
|
||||
// continue.
|
||||
//
|
||||
// TODO(roasbeef): remove node from path
|
||||
case *lnwire.FailRequiredNodeFeatureMissing:
|
||||
pruneVertexFailure(
|
||||
paySession, route, errSource, false,
|
||||
)
|
||||
continue
|
||||
|
||||
// If the send fail due to a node not having the
|
||||
// required features, then we'll note this error and
|
||||
// continue.
|
||||
//
|
||||
// TODO(roasbeef): remove channel from path
|
||||
case *lnwire.FailRequiredChannelFeatureMissing:
|
||||
pruneVertexFailure(
|
||||
paySession, route, errSource, false,
|
||||
)
|
||||
continue
|
||||
|
||||
// If the next hop in the route wasn't known or
|
||||
// offline, we'll prune the _next_ hop from the set of
|
||||
// routes and retry.
|
||||
case *lnwire.FailUnknownNextPeer:
|
||||
// This failure indicates that the node _after_
|
||||
// the source of the error was not found. As a
|
||||
// result, we'll locate the vertex for that
|
||||
// node itself.
|
||||
missingNode, ok := route.nextHopVertex(errSource)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
// Once we've located the vertex, we'll report
|
||||
// this failure to missionControl and restart
|
||||
// path finding.
|
||||
paySession.ReportVertexFailure(missingNode)
|
||||
pruneVertexFailure(
|
||||
paySession, route, errSource, true,
|
||||
)
|
||||
continue
|
||||
|
||||
// If the node wasn't able to forward for which ever
|
||||
// reason, then we'll note this and continue with the
|
||||
// routes.
|
||||
case *lnwire.FailTemporaryNodeFailure:
|
||||
missingNode, ok := route.nextHopVertex(errSource)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
pruneVertexFailure(
|
||||
paySession, route, errSource, false,
|
||||
)
|
||||
continue
|
||||
|
||||
paySession.ReportVertexFailure(missingNode)
|
||||
case *lnwire.FailPermanentNodeFailure:
|
||||
pruneVertexFailure(
|
||||
paySession, route, errSource, false,
|
||||
)
|
||||
continue
|
||||
|
||||
// If we get a permanent channel or node failure, then
|
||||
// we'll note this (exclude the vertex/edge), and
|
||||
// continue with the rest of the routes.
|
||||
case *lnwire.FailPermanentChannelFailure:
|
||||
// As this error indicates that the target
|
||||
// channel was unable to carry this HTLC (for
|
||||
// w/e reason), we'll query the index to find
|
||||
// the _outgoing_ channel the source of the
|
||||
// error was meant to pass the HTLC along to.
|
||||
badChan, ok := route.nextHopChannel(errSource)
|
||||
if !ok {
|
||||
// If we weren't able to find the hop
|
||||
// *after* this node, then we'll
|
||||
// attempt to disable the previous
|
||||
// channel.
|
||||
badChan, ok = route.prevHopChannel(
|
||||
errSource,
|
||||
)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// If the channel was found, then we'll inform
|
||||
// mission control of this failure so future
|
||||
// attempts avoid this link temporarily.
|
||||
paySession.ReportChannelFailure(badChan.ChannelID)
|
||||
continue
|
||||
|
||||
case *lnwire.FailPermanentNodeFailure:
|
||||
// TODO(roasbeef): remove node from path
|
||||
pruneEdgeFailure(paySession, route, errSource)
|
||||
continue
|
||||
|
||||
default:
|
||||
@ -1775,6 +1809,62 @@ func (r *ChannelRouter) SendPayment(payment *LightningPayment) ([32]byte, *Route
|
||||
}
|
||||
}
|
||||
|
||||
// pruneVertexFailure will attempt to prune a vertex from the current available
|
||||
// vertexes of the target payment session in response to an encountered routing
|
||||
// error.
|
||||
func pruneVertexFailure(paySession *paymentSession, route *Route,
|
||||
errSource *btcec.PublicKey, nextNode bool) {
|
||||
|
||||
// By default, we'll try to prune the node that actually sent us the
|
||||
// error.
|
||||
errNode := NewVertex(errSource)
|
||||
|
||||
// If this failure indicates that the node _after_ the source of the
|
||||
// error was not found. As a result, we'll locate the vertex for that
|
||||
// node itself.
|
||||
if nextNode {
|
||||
nodeToPrune, ok := route.nextHopVertex(errSource)
|
||||
|
||||
if ok {
|
||||
errNode = nodeToPrune
|
||||
}
|
||||
}
|
||||
|
||||
// Once we've located the vertex, we'll report this failure to
|
||||
// missionControl and restart path finding.
|
||||
paySession.ReportVertexFailure(errNode)
|
||||
}
|
||||
|
||||
// pruneEdgeFailure will attempts to prune an edge from the current available
|
||||
// edges of the target payment session in response to an encountered routing
|
||||
// error.
|
||||
func pruneEdgeFailure(paySession *paymentSession, route *Route,
|
||||
errSource *btcec.PublicKey) {
|
||||
|
||||
// As this error indicates that the target channel was unable to carry
|
||||
// this HTLC (for w/e reason), we'll query the index to find the
|
||||
// _outgoing_ channel the source of the error was meant to pass the
|
||||
// HTLC along to.
|
||||
badChan, ok := route.nextHopChannel(errSource)
|
||||
if !ok {
|
||||
// If we weren't able to find the hop *after* this node, then
|
||||
// we'll attempt to disable the previous channel.
|
||||
prevChan, ok := route.prevHopChannel(
|
||||
errSource,
|
||||
)
|
||||
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
badChan = prevChan
|
||||
}
|
||||
|
||||
// If the channel was found, then we'll inform mission control of this
|
||||
// failure so future attempts avoid this link temporarily.
|
||||
paySession.ReportChannelFailure(badChan.ChannelID)
|
||||
}
|
||||
|
||||
// applyChannelUpdate applies a channel update directly to the database,
|
||||
// skipping preliminary validation.
|
||||
func (r *ChannelRouter) applyChannelUpdate(msg *lnwire.ChannelUpdate) error {
|
||||
|
@ -280,6 +280,105 @@ func TestSendPaymentRouteFailureFallback(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestSendPaymentErrorRepeatedFeeInsufficient tests that if we receive
|
||||
// multiple fee related errors from a channel that we're attempting to route
|
||||
// through, then we'll prune the channel after the second attempt.
|
||||
func TestSendPaymentErrorRepeatedFeeInsufficient(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
const startingBlockHeight = 101
|
||||
ctx, cleanUp, err := createTestCtx(startingBlockHeight, basicGraphFilePath)
|
||||
defer cleanUp()
|
||||
if err != nil {
|
||||
t.Fatalf("unable to create router: %v", err)
|
||||
}
|
||||
|
||||
// Craft a LightningPayment struct that'll send a payment from roasbeef
|
||||
// to luo ji for 100 satoshis.
|
||||
var payHash [32]byte
|
||||
payment := LightningPayment{
|
||||
Target: ctx.aliases["luoji"],
|
||||
Amount: lnwire.NewMSatFromSatoshis(1000),
|
||||
PaymentHash: payHash,
|
||||
}
|
||||
|
||||
var preImage [32]byte
|
||||
copy(preImage[:], bytes.Repeat([]byte{9}, 32))
|
||||
|
||||
// We'll also fetch the first outgoing channel edge from roasbeef to
|
||||
// luo ji. We'll obtain this as we'll need to to generate the
|
||||
// FeeInsufficient error that we'll send back.
|
||||
chanID := uint64(689530843)
|
||||
_, _, edgeUpateToFail, err := ctx.graph.FetchChannelEdgesByID(chanID)
|
||||
if err != nil {
|
||||
t.Fatalf("unable to fetch chan id: %v", err)
|
||||
}
|
||||
|
||||
errChanUpdate := lnwire.ChannelUpdate{
|
||||
ShortChannelID: lnwire.NewShortChanIDFromInt(chanID),
|
||||
Timestamp: uint32(edgeUpateToFail.LastUpdate.Unix()),
|
||||
Flags: edgeUpateToFail.Flags,
|
||||
TimeLockDelta: edgeUpateToFail.TimeLockDelta,
|
||||
HtlcMinimumMsat: edgeUpateToFail.MinHTLC,
|
||||
BaseFee: uint32(edgeUpateToFail.FeeBaseMSat),
|
||||
FeeRate: uint32(edgeUpateToFail.FeeProportionalMillionths),
|
||||
}
|
||||
|
||||
sourceNode := ctx.router.selfNode
|
||||
|
||||
// We'll now modify the SendToSwitch method to return an error for the
|
||||
// outgoing channel to luo ji. This will be a fee related error, so it
|
||||
// should only cause the edge to be pruned after the second attempt.
|
||||
ctx.router.cfg.SendToSwitch = func(n [33]byte,
|
||||
_ *lnwire.UpdateAddHTLC, _ *sphinx.Circuit) ([32]byte, error) {
|
||||
|
||||
if bytes.Equal(ctx.aliases["luoji"].SerializeCompressed(), n[:]) {
|
||||
pub, err := sourceNode.PubKey()
|
||||
if err != nil {
|
||||
return preImage, err
|
||||
}
|
||||
return [32]byte{}, &htlcswitch.ForwardingError{
|
||||
ErrorSource: pub,
|
||||
|
||||
// Within our error, we'll add a channel update
|
||||
// which is meant to refelct he new fee
|
||||
// schedule for the node/channel.
|
||||
FailureMessage: &lnwire.FailFeeInsufficient{
|
||||
Update: errChanUpdate,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
return preImage, nil
|
||||
}
|
||||
|
||||
// Send off the payment request to the router, route through satoshi
|
||||
// should've been selected as a fall back and succeeded correctly.
|
||||
paymentPreImage, route, err := ctx.router.SendPayment(&payment)
|
||||
if err != nil {
|
||||
t.Fatalf("unable to send payment: %v", err)
|
||||
}
|
||||
|
||||
// The route selected should have two hops
|
||||
if len(route.Hops) != 2 {
|
||||
t.Fatalf("incorrect route length: expected %v got %v", 2,
|
||||
len(route.Hops))
|
||||
}
|
||||
|
||||
// The preimage should match up with the once created above.
|
||||
if !bytes.Equal(paymentPreImage[:], preImage[:]) {
|
||||
t.Fatalf("incorrect preimage used: expected %x got %x",
|
||||
preImage[:], paymentPreImage[:])
|
||||
}
|
||||
|
||||
// The route should have satoshi as the first hop.
|
||||
if route.Hops[0].Channel.Node.Alias != "satoshi" {
|
||||
t.Fatalf("route should go through satoshi as first hop, "+
|
||||
"instead passes through: %v",
|
||||
route.Hops[0].Channel.Node.Alias)
|
||||
}
|
||||
}
|
||||
|
||||
// TestSendPaymentErrorPathPruning tests that the send of candidate routes
|
||||
// properly gets pruned in response to ForwardingError response from the
|
||||
// underlying SendToSwitch function.
|
||||
|
Loading…
Reference in New Issue
Block a user