diff --git a/htlcswitch/link.go b/htlcswitch/link.go index 66a20c9f..bd79b621 100644 --- a/htlcswitch/link.go +++ b/htlcswitch/link.go @@ -1631,8 +1631,21 @@ func (l *channelLink) handleUpstreamMsg(msg lnwire.Message) { OnionSHA256: msg.ShaOnionBlob, } default: - log.Errorf("Unknown failure code: %v", msg.FailureCode) - failure = &lnwire.FailTemporaryChannelFailure{} + log.Warnf("Unexpected failure code received in "+ + "UpdateFailMailformedHTLC: %v", msg.FailureCode) + + // We don't just pass back the error we received from + // our successor. Otherwise we might report a failure + // that penalizes us more than needed. If the onion that + // we forwarded was correct, the node should have been + // able to send back its own failure. The node did not + // send back its own failure, so we assume there was a + // problem with the onion and report that back. We reuse + // the invalid onion key failure because there is no + // specific error for this case. + failure = &lnwire.FailInvalidOnionKey{ + OnionSHA256: msg.ShaOnionBlob, + } } // With the error parsed, we'll convert the into it's opaque diff --git a/htlcswitch/switch_test.go b/htlcswitch/switch_test.go index 6e8a1698..59f6fae9 100644 --- a/htlcswitch/switch_test.go +++ b/htlcswitch/switch_test.go @@ -2097,8 +2097,8 @@ func TestUpdateFailMalformedHTLCErrorConversion(t *testing.T) { fwdingErr := err.(*ForwardingError) failureMsg := fwdingErr.FailureMessage - if _, ok := failureMsg.(*lnwire.FailTemporaryChannelFailure); !ok { - t.Fatalf("expected temp chan failure instead got: %v", + if _, ok := failureMsg.(*lnwire.FailInvalidOnionKey); !ok { + t.Fatalf("expected onion failure instead got: %v", fwdingErr.FailureMessage) } } diff --git a/lntest/itest/lnd_test.go b/lntest/itest/lnd_test.go index ae5ce147..9905c56b 100644 --- a/lntest/itest/lnd_test.go +++ b/lntest/itest/lnd_test.go @@ -3567,7 +3567,7 @@ func testSphinxReplayPersistence(net *lntest.NetworkHarness, t *harnessTest) { // Construct the response we expect after sending a duplicate packet // that fails due to sphinx replay detection. - replayErr := "TemporaryChannelFailure" + replayErr := "InvalidOnionKey" if !strings.Contains(resp.PaymentError, replayErr) { t.Fatalf("received payment error: %v, expected %v", resp.PaymentError, replayErr) diff --git a/routing/missioncontrol.go b/routing/missioncontrol.go index b2ea102d..65e342e1 100644 --- a/routing/missioncontrol.go +++ b/routing/missioncontrol.go @@ -322,55 +322,6 @@ func (m *MissionControl) requestSecondChance(timestamp time.Time, return false } -// reportVertexFailure reports a node level failure. -func (m *MissionControl) reportVertexFailure(timestamp time.Time, - v route.Vertex) { - - log.Debugf("Reporting vertex %v failure to Mission Control", v) - - m.Lock() - defer m.Unlock() - - m.lastNodeFailure[v] = timestamp -} - -// reportPairPolicyFailure reports a policy related failure. -func (m *MissionControl) reportPairPolicyFailure(timestamp time.Time, - failedPair DirectedNodePair) { - - m.Lock() - defer m.Unlock() - - // We may have an out of date graph. Therefore we don't always penalize - // immediately. If some time has passed since the last policy failure, - // we grant the node a second chance at forwarding the payment. - if m.requestSecondChance( - timestamp, failedPair.From, failedPair.To, - ) { - return - } - - m.lastNodeFailure[failedPair.From] = timestamp -} - -// reportPairFailure reports a pair level failure. -// -// TODO(roasbeef): also add value attempted to send and capacity of channel -func (m *MissionControl) reportPairFailure(timestamp time.Time, - failedPair DirectedNodePair, minPenalizeAmt lnwire.MilliSatoshi) { - - log.Debugf("Reporting pair %v failure to Mission Control", failedPair) - - m.Lock() - defer m.Unlock() - - pair := NewDirectedNodePair(failedPair.From, failedPair.To) - m.lastPairFailure[pair] = pairFailure{ - minPenalizeAmt: minPenalizeAmt, - timestamp: timestamp, - } -} - // GetHistorySnapshot takes a snapshot from the current mission control state // and actual probability estimates. func (m *MissionControl) GetHistorySnapshot() *MissionControlSnapshot { @@ -420,12 +371,11 @@ func (m *MissionControl) GetHistorySnapshot() *MissionControlSnapshot { // ReportPaymentFail reports a failed payment to mission control as input for // future probability estimates. The failureSourceIdx argument indicates the // failure source. If it is nil, the failure source is unknown. This function -// returns a bool indicating whether this error is a final error. If it is -// final, a failure reason is returned and no further payment attempts need to -// be made. +// returns a reason if this failure is a final failure. In that case no further +// payment attempts need to be made. func (m *MissionControl) ReportPaymentFail(paymentID uint64, rt *route.Route, - failureSourceIdx *int, failure lnwire.FailureMessage) (bool, - channeldb.FailureReason, error) { + failureSourceIdx *int, failure lnwire.FailureMessage) ( + *channeldb.FailureReason, error) { timestamp := m.now() @@ -442,256 +392,55 @@ func (m *MissionControl) ReportPaymentFail(paymentID uint64, rt *route.Route, // Store complete result in database. if err := m.store.AddResult(result); err != nil { - return false, 0, err + return nil, err } // Apply result to update mission control state. - final, reason := m.applyPaymentResult(result) + reason := m.applyPaymentResult(result) - return final, reason, nil + return reason, nil } // applyPaymentResult applies a payment result as input for future probability // estimates. It returns a bool indicating whether this error is a final error // and no further payment attempts need to be made. -func (m *MissionControl) applyPaymentResult(result *paymentResult) ( - bool, channeldb.FailureReason) { +func (m *MissionControl) applyPaymentResult( + result *paymentResult) *channeldb.FailureReason { - var ( - failureSourceIdxInt int - failure lnwire.FailureMessage + // Interpret result. + i := interpretResult( + result.route, result.failureSourceIdx, result.failure, ) - if result.failureSourceIdx == nil { - // If the failure message could not be decrypted, attribute the - // failure to our own outgoing channel. - // - // TODO(joostager): Penalize all channels in the route. - failureSourceIdxInt = 0 - failure = lnwire.NewTemporaryChannelFailure(nil) - } else { - failureSourceIdxInt = *result.failureSourceIdx - failure = result.failure + // Update mission control state using the interpretation. + m.Lock() + defer m.Unlock() + + if i.policyFailure != nil { + if m.requestSecondChance( + result.timeReply, + i.policyFailure.From, i.policyFailure.To, + ) { + return nil + } } - var failureVertex route.Vertex + if i.nodeFailure != nil { + log.Debugf("Reporting node failure to Mission Control: "+ + "node=%v", *i.nodeFailure) - if failureSourceIdxInt > 0 { - failureVertex = result.route.Hops[failureSourceIdxInt-1].PubKeyBytes - } else { - failureVertex = result.route.SourcePubKey + m.lastNodeFailure[*i.nodeFailure] = result.timeReply } - log.Tracef("Node %x (index %v) reported failure when sending htlc", - failureVertex, result.failureSourceIdx) - // Always determine chan id ourselves, because a channel update with id - // may not be available. - failedPair, failedAmt := getFailedPair( - result.route, failureSourceIdxInt, - ) + for pair, minPenalizeAmt := range i.pairResults { + log.Debugf("Reporting pair failure to Mission Control: "+ + "pair=%v, minPenalizeAmt=%v", pair, minPenalizeAmt) - switch failure.(type) { - - // If the end destination didn't know the payment - // hash or we sent the wrong payment amount to the - // destination, then we'll terminate immediately. - case *lnwire.FailIncorrectDetails: - // TODO(joostjager): Check onionErr.Amount() whether it matches - // what we expect. (Will it ever not match, because if not - // final_incorrect_htlc_amount would be returned?) - - return true, channeldb.FailureReasonIncorrectPaymentDetails - - // If we sent the wrong amount to the destination, then - // we'll exit early. - case *lnwire.FailIncorrectPaymentAmount: - return true, channeldb.FailureReasonIncorrectPaymentDetails - - // If the time-lock that was extended to the final node - // was incorrect, then we can't proceed. - case *lnwire.FailFinalIncorrectCltvExpiry: - // TODO(joostjager): Take into account that second last hop may - // have deliberately handed out an htlc that expires too soon. - // In that case we should continue routing. - return true, channeldb.FailureReasonError - - // If we crafted an invalid onion payload for the final - // node, then we'll exit early. - case *lnwire.FailFinalIncorrectHtlcAmount: - // TODO(joostjager): Take into account that second last hop may - // have deliberately handed out an htlc with a too low value. In - // that case we should continue routing. - - return true, channeldb.FailureReasonError - - // Similarly, if the HTLC expiry that we extended to - // the final hop expires too soon, then will fail the - // payment. - // - // TODO(roasbeef): can happen to to race condition, try - // again with recent block height - case *lnwire.FailFinalExpiryTooSoon: - // TODO(joostjager): Take into account that any hop may have - // delayed. Ideally we should continue routing. Knowing the - // delaying node at this point would help. - return true, channeldb.FailureReasonIncorrectPaymentDetails - - // If we erroneously attempted to cross a chain border, - // then we'll cancel the payment. - case *lnwire.FailInvalidRealm: - return true, channeldb.FailureReasonError - - // If we get a notice that the expiry was too soon for - // an intermediate node, then we'll prune out the node - // that sent us this error, as it doesn't now what the - // correct block height is. - case *lnwire.FailExpiryTooSoon: - m.reportVertexFailure(result.timeReply, failureVertex) - return false, 0 - - // If we hit an instance of onion payload corruption or an invalid - // version, then we'll exit early as this shouldn't happen in the - // typical case. - // - // TODO(joostjager): Take into account that the previous hop may have - // tampered with the onion. Routing should continue using other paths. - case *lnwire.FailInvalidOnionVersion: - return true, channeldb.FailureReasonError - case *lnwire.FailInvalidOnionHmac: - return true, channeldb.FailureReasonError - case *lnwire.FailInvalidOnionKey: - return true, channeldb.FailureReasonError - - // If we get a failure due to violating the minimum - // amount, we'll apply the new minimum amount and retry - // routing. - case *lnwire.FailAmountBelowMinimum: - m.reportPairPolicyFailure(result.timeReply, failedPair) - return false, 0 - - // If we get a failure due to a fee, we'll apply the - // new fee update, and retry our attempt using the - // newly updated fees. - case *lnwire.FailFeeInsufficient: - m.reportPairPolicyFailure(result.timeReply, failedPair) - return false, 0 - - // If we get the failure for an intermediate node that - // disagrees with our time lock values, then we'll - // apply the new delta value and try it once more. - case *lnwire.FailIncorrectCltvExpiry: - m.reportPairPolicyFailure(result.timeReply, failedPair) - return false, 0 - - // The outgoing channel that this node was meant to - // forward one is currently disabled, so we'll apply - // the update and continue. - case *lnwire.FailChannelDisabled: - m.reportPairFailure(result.timeReply, failedPair, 0) - return false, 0 - - // It's likely that the outgoing channel didn't have - // sufficient capacity, so we'll prune this pair for - // now, and continue onwards with our path finding. - case *lnwire.FailTemporaryChannelFailure: - m.reportPairFailure(result.timeReply, failedPair, failedAmt) - return false, 0 - - // If the send fail due to a node not having the - // required features, then we'll note this error and - // continue. - case *lnwire.FailRequiredNodeFeatureMissing: - m.reportVertexFailure(result.timeReply, failureVertex) - return false, 0 - - // If the send fail due to a node not having the - // required features, then we'll note this error and - // continue. - case *lnwire.FailRequiredChannelFeatureMissing: - m.reportVertexFailure(result.timeReply, failureVertex) - return false, 0 - - // If the next hop in the route wasn't known or - // offline, we'll only the channel which we attempted - // to route over. This is conservative, and it can - // handle faulty channels between nodes properly. - // Additionally, this guards against routing nodes - // returning errors in order to attempt to black list - // another node. - case *lnwire.FailUnknownNextPeer: - m.reportPairFailure(result.timeReply, failedPair, 0) - return false, 0 - - // If the node wasn't able to forward for which ever - // reason, then we'll note this and continue with the - // routes. - case *lnwire.FailTemporaryNodeFailure: - m.reportVertexFailure(result.timeReply, failureVertex) - return false, 0 - - case *lnwire.FailPermanentNodeFailure: - m.reportVertexFailure(result.timeReply, failureVertex) - return false, 0 - - // If we crafted a route that contains a too long time - // lock for an intermediate node, we'll prune the node. - // As there currently is no way of knowing that node's - // maximum acceptable cltv, we cannot take this - // constraint into account during routing. - // - // TODO(joostjager): Record the rejected cltv and use - // that as a hint during future path finding through - // that node. - case *lnwire.FailExpiryTooFar: - m.reportVertexFailure(result.timeReply, failureVertex) - return false, 0 - - // If we get a permanent channel or node failure, then - // we'll prune the channel in both directions and - // continue with the rest of the routes. - case *lnwire.FailPermanentChannelFailure: - m.reportPairFailure(result.timeReply, failedPair, 0) - m.reportPairFailure( - result.timeReply, failedPair.Reverse(), 0, - ) - return false, 0 - - // Any other failure or an empty failure will get the node pruned. - default: - m.reportVertexFailure(result.timeReply, failureVertex) - return false, 0 + m.lastPairFailure[pair] = pairFailure{ + minPenalizeAmt: minPenalizeAmt, + timestamp: result.timeReply, + } } -} - -// getFailedPair tries to locate the failing pair given a route and the pubkey -// of the node that sent the failure. It will assume that the failure is -// associated with the outgoing channel set of the failing node. As a second -// result, it returns the amount sent between the pair. -func getFailedPair(route *route.Route, failureSource int) (DirectedNodePair, - lnwire.MilliSatoshi) { - - // Determine if we have a failure from the final hop. If it is, we - // assume that the failing channel is the incoming channel. - // - // TODO(joostjager): In this case, certain types of failures are not - // expected. For example FailUnknownNextPeer. This could be a reason to - // prune the node? - if failureSource == len(route.Hops) { - failureSource-- - } - - // As this failure indicates that the target channel was unable to carry - // this HTLC (for w/e reason), we'll return the _outgoing_ channel that - // the source of the failure was meant to pass the HTLC along to. - if failureSource == 0 { - return NewDirectedNodePair( - route.SourcePubKey, - route.Hops[0].PubKeyBytes, - ), route.TotalAmount - } - - return NewDirectedNodePair( - route.Hops[failureSource-1].PubKeyBytes, - route.Hops[failureSource].PubKeyBytes, - ), route.Hops[failureSource-1].AmtToForward + + return i.finalFailureReason } diff --git a/routing/mock_test.go b/routing/mock_test.go index b58ba3d5..96da1448 100644 --- a/routing/mock_test.go +++ b/routing/mock_test.go @@ -98,11 +98,11 @@ type mockMissionControl struct { var _ MissionController = (*mockMissionControl)(nil) -func (m *mockMissionControl) ReportPaymentFail(paymentID uint64, - rt *route.Route, failureSourceIdx *int, failure lnwire.FailureMessage) ( - bool, channeldb.FailureReason, error) { +func (m *mockMissionControl) ReportPaymentFail(paymentID uint64, rt *route.Route, + failureSourceIdx *int, failure lnwire.FailureMessage) ( + *channeldb.FailureReason, error) { - return false, 0, nil + return nil, nil } func (m *mockMissionControl) GetProbability(fromNode, toNode route.Vertex, diff --git a/routing/payment_lifecycle.go b/routing/payment_lifecycle.go index db9b9803..18edfc0a 100644 --- a/routing/payment_lifecycle.go +++ b/routing/payment_lifecycle.go @@ -342,10 +342,10 @@ func (p *paymentLifecycle) sendPaymentAttempt(firstHop lnwire.ShortChannelID, // whether we should make another payment attempt. func (p *paymentLifecycle) handleSendError(sendErr error) error { - final, reason := p.router.processSendError( + reason := p.router.processSendError( p.attempt.PaymentID, &p.attempt.Route, sendErr, ) - if !final { + if reason == nil { // Save the forwarding error so it can be returned if // this turns out to be the last attempt. p.lastError = sendErr @@ -354,14 +354,14 @@ func (p *paymentLifecycle) handleSendError(sendErr error) error { } log.Debugf("Payment %x failed: final_outcome=%v, raw_err=%v", - p.payment.PaymentHash, reason, sendErr) + p.payment.PaymentHash, *reason, sendErr) // Mark the payment failed with no route. // // TODO(halseth): make payment codes for the actual reason we don't // continue path finding. err := p.router.cfg.Control.Fail( - p.payment.PaymentHash, reason, + p.payment.PaymentHash, *reason, ) if err != nil { return err diff --git a/routing/payment_session.go b/routing/payment_session.go index fe10a501..3894bc60 100644 --- a/routing/payment_session.go +++ b/routing/payment_session.go @@ -33,7 +33,7 @@ type PaymentSession interface { type paymentSession struct { additionalEdges map[route.Vertex][]*channeldb.ChannelEdgePolicy - bandwidthHints map[uint64]lnwire.MilliSatoshi + getBandwidthHints func() (map[uint64]lnwire.MilliSatoshi, error) sessionSource *SessionSource @@ -97,11 +97,22 @@ func (p *paymentSession) RequestRoute(payment *LightningPayment, CltvLimit: cltvLimit, } + // We'll also obtain a set of bandwidthHints from the lower layer for + // each of our outbound channels. This will allow the path finding to + // skip any links that aren't active or just don't have enough bandwidth + // to carry the payment. New bandwidth hints are queried for every new + // path finding attempt, because concurrent payments may change + // balances. + bandwidthHints, err := p.getBandwidthHints() + if err != nil { + return nil, err + } + path, err := p.pathFinder( &graphParams{ graph: ss.Graph, additionalEdges: p.additionalEdges, - bandwidthHints: p.bandwidthHints, + bandwidthHints: bandwidthHints, }, restrictions, &ss.PathFindingConfig, ss.SelfNode.PubKeyBytes, payment.Target, diff --git a/routing/payment_session_source.go b/routing/payment_session_source.go index 1b8fea5f..b5175cf7 100644 --- a/routing/payment_session_source.go +++ b/routing/payment_session_source.go @@ -97,26 +97,22 @@ func (m *SessionSource) NewPaymentSession(routeHints [][]zpay32.HopHint, } } - // We'll also obtain a set of bandwidthHints from the lower layer for - // each of our outbound channels. This will allow the path finding to - // skip any links that aren't active or just don't have enough - // bandwidth to carry the payment. sourceNode, err := m.Graph.SourceNode() if err != nil { return nil, err } - bandwidthHints, err := generateBandwidthHints( - sourceNode, m.QueryBandwidth, - ) - if err != nil { - return nil, err + + getBandwidthHints := func() (map[uint64]lnwire.MilliSatoshi, + error) { + + return generateBandwidthHints(sourceNode, m.QueryBandwidth) } return &paymentSession{ - additionalEdges: edges, - bandwidthHints: bandwidthHints, - sessionSource: m, - pathFinder: findPath, + additionalEdges: edges, + getBandwidthHints: getBandwidthHints, + sessionSource: m, + pathFinder: findPath, }, nil } diff --git a/routing/payment_session_test.go b/routing/payment_session_test.go index 627f4f85..5d31e902 100644 --- a/routing/payment_session_test.go +++ b/routing/payment_session_test.go @@ -41,6 +41,11 @@ func TestRequestRoute(t *testing.T) { } session := &paymentSession{ + getBandwidthHints: func() (map[uint64]lnwire.MilliSatoshi, + error) { + + return nil, nil + }, sessionSource: sessionSource, pathFinder: findPath, } diff --git a/routing/result_interpretation.go b/routing/result_interpretation.go new file mode 100644 index 00000000..00703c28 --- /dev/null +++ b/routing/result_interpretation.go @@ -0,0 +1,368 @@ +package routing + +import ( + "github.com/lightningnetwork/lnd/channeldb" + "github.com/lightningnetwork/lnd/lnwire" + "github.com/lightningnetwork/lnd/routing/route" +) + +// Instantiate variables to allow taking a reference from the failure reason. +var ( + reasonError = channeldb.FailureReasonError + reasonIncorrectDetails = channeldb.FailureReasonIncorrectPaymentDetails +) + +// interpretedResult contains the result of the interpretation of a payment +// outcome. +type interpretedResult struct { + // nodeFailure points to a node pubkey if all channels of that node are + // responsible for the result. + nodeFailure *route.Vertex + + // pairResults contains a map of node pairs that could be responsible + // for the failure. The map values are the minimum amounts for which a + // future penalty should be applied. + pairResults map[DirectedNodePair]lnwire.MilliSatoshi + + // finalFailureReason is set to a non-nil value if it makes no more + // sense to start another payment attempt. It will contain the reason + // why. + finalFailureReason *channeldb.FailureReason + + // policyFailure is set to a node pair if there is a policy failure on + // that connection. This is used to control the second chance logic for + // policy failures. + policyFailure *DirectedNodePair +} + +// interpretResult interprets a payment outcome and returns an object that +// contains information required to update mission control. +func interpretResult(rt *route.Route, failureSrcIdx *int, + failure lnwire.FailureMessage) *interpretedResult { + + i := &interpretedResult{ + pairResults: make(map[DirectedNodePair]lnwire.MilliSatoshi), + } + + i.processFail(rt, failureSrcIdx, failure) + + return i +} + +// processFail processes a failed payment attempt. +func (i *interpretedResult) processFail( + rt *route.Route, errSourceIdx *int, + failure lnwire.FailureMessage) { + + if errSourceIdx == nil { + i.processPaymentOutcomeUnknown(rt) + return + } + + switch *errSourceIdx { + + // We are the source of the failure. + case 0: + i.processPaymentOutcomeSelf(rt, failure) + + // A failure from the final hop was received. + case len(rt.Hops): + i.processPaymentOutcomeFinal( + rt, failure, + ) + + // An intermediate hop failed. Interpret the outcome, update reputation + // and try again. + default: + i.processPaymentOutcomeIntermediate( + rt, *errSourceIdx, failure, + ) + } +} + +// processPaymentOutcomeSelf handles failures sent by ourselves. +func (i *interpretedResult) processPaymentOutcomeSelf( + rt *route.Route, failure lnwire.FailureMessage) { + + switch failure.(type) { + + // We receive a malformed htlc failure from our peer. We trust ourselves + // to send the correct htlc, so our peer must be at fault. + case *lnwire.FailInvalidOnionVersion, + *lnwire.FailInvalidOnionHmac, + *lnwire.FailInvalidOnionKey: + + i.failNode(rt, 1) + + // If this was a payment to a direct peer, we can stop trying. + if len(rt.Hops) == 1 { + i.finalFailureReason = &reasonError + } + + // Any other failure originating from ourselves should be temporary and + // caused by changing conditions between path finding and execution of + // the payment. We just retry and trust that the information locally + // available in the link has been updated. + default: + log.Warnf("Routing failure for local channel %v occurred", + rt.Hops[0].ChannelID) + } +} + +// processPaymentOutcomeFinal handles failures sent by the final hop. +func (i *interpretedResult) processPaymentOutcomeFinal( + route *route.Route, failure lnwire.FailureMessage) { + + n := len(route.Hops) + + // If a failure from the final node is received, we will fail the + // payment in almost all cases. Only when the penultimate node sends an + // incorrect htlc, we want to retry via another route. Invalid onion + // failures are not expected, because the final node wouldn't be able to + // encrypt that failure. + switch failure.(type) { + + // Expiry or amount of the HTLC doesn't match the onion, try another + // route. + case *lnwire.FailFinalIncorrectCltvExpiry, + *lnwire.FailFinalIncorrectHtlcAmount: + + // We trust ourselves. If this is a direct payment, we penalize + // the final node and fail the payment. + if n == 1 { + i.failNode(route, n) + i.finalFailureReason = &reasonError + + return + } + + // Otherwise penalize the last pair of the route and retry. + // Either the final node is at fault, or it gets sent a bad htlc + // from its predecessor. + i.failPair(route, n-1) + + // We are using wrong payment hash or amount, fail the payment. + case *lnwire.FailIncorrectPaymentAmount, + *lnwire.FailIncorrectDetails: + + i.finalFailureReason = &reasonIncorrectDetails + + // The HTLC that was extended to the final hop expires too soon. Fail + // the payment, because we may be using the wrong final cltv delta. + case *lnwire.FailFinalExpiryTooSoon: + // TODO(roasbeef): can happen to to race condition, try again + // with recent block height + + // TODO(joostjager): can also happen because a node delayed + // deliberately. What to penalize? + i.finalFailureReason = &reasonIncorrectDetails + + default: + // All other errors are considered terminal if coming from the + // final hop. They indicate that something is wrong at the + // recipient, so we do apply a penalty. + i.failNode(route, n) + i.finalFailureReason = &reasonError + } +} + +// processPaymentOutcomeIntermediate handles failures sent by an intermediate +// hop. +func (i *interpretedResult) processPaymentOutcomeIntermediate( + route *route.Route, errorSourceIdx int, + failure lnwire.FailureMessage) { + + reportOutgoing := func() { + i.failPair( + route, errorSourceIdx, + ) + } + + reportOutgoingBalance := func() { + i.failPairBalance( + route, errorSourceIdx, + ) + } + + reportIncoming := func() { + // We trust ourselves. If the error comes from the first hop, we + // can penalize the whole node. In that case there is no + // uncertainty as to which node to blame. + if errorSourceIdx == 1 { + i.failNode(route, errorSourceIdx) + return + } + + // Otherwise report the incoming pair. + i.failPair( + route, errorSourceIdx-1, + ) + } + + reportAll := func() { + // We trust ourselves. If the error comes from the first hop, we + // can penalize the whole node. In that case there is no + // uncertainty as to which node to blame. + if errorSourceIdx == 1 { + i.failNode(route, errorSourceIdx) + return + } + + // Otherwise penalize all pairs up to the error source. This + // includes our own outgoing connection. + i.failPairRange( + route, 0, errorSourceIdx-1, + ) + } + + switch failure.(type) { + + // If a node reports onion payload corruption or an invalid version, + // that node may be responsible, but it could also be that it is just + // relaying a malformed htlc failure from it successor. By reporting the + // outgoing channel set, we will surely hit the responsible node. At + // this point, it is not possible that the node's predecessor corrupted + // the onion blob. If the predecessor would have corrupted the payload, + // the error source wouldn't have been able to encrypt this failure + // message for us. + case *lnwire.FailInvalidOnionVersion, + *lnwire.FailInvalidOnionHmac, + *lnwire.FailInvalidOnionKey: + + reportOutgoing() + + // If the next hop in the route wasn't known or offline, we'll only + // penalize the channel set which we attempted to route over. This is + // conservative, and it can handle faulty channels between nodes + // properly. Additionally, this guards against routing nodes returning + // errors in order to attempt to black list another node. + case *lnwire.FailUnknownNextPeer: + reportOutgoing() + + // If we get a permanent channel, we'll prune the channel set in both + // directions and continue with the rest of the routes. + case *lnwire.FailPermanentChannelFailure: + reportOutgoing() + + // When an HTLC parameter is incorrect, the node sending the error may + // be doing something wrong. But it could also be that its predecessor + // is intentionally modifying the htlc parameters that we instructed it + // via the hop payload. Therefore we penalize the incoming node pair. A + // third cause of this error may be that we have an out of date channel + // update. This is handled by the second chance logic up in mission + // control. + case *lnwire.FailAmountBelowMinimum, + *lnwire.FailFeeInsufficient, + *lnwire.FailIncorrectCltvExpiry, + *lnwire.FailChannelDisabled: + + // Set the node pair for which a channel update may be out of + // date. The second chance logic uses the policyFailure field. + i.policyFailure = &DirectedNodePair{ + From: route.Hops[errorSourceIdx-1].PubKeyBytes, + To: route.Hops[errorSourceIdx].PubKeyBytes, + } + + // We report incoming channel. If a second pair is granted in + // mission control, this report is ignored. + reportIncoming() + + // If the outgoing channel doesn't have enough capacity, we penalize. + // But we penalize only in a single direction and only for amounts + // greater than the attempted amount. + case *lnwire.FailTemporaryChannelFailure: + reportOutgoingBalance() + + // If FailExpiryTooSoon is received, there must have been some delay + // along the path. We can't know which node is causing the delay, so we + // penalize all of them up to the error source. + // + // Alternatively it could also be that we ourselves have fallen behind + // somehow. We ignore that case for now. + case *lnwire.FailExpiryTooSoon: + reportAll() + + // In all other cases, we penalize the reporting node. These are all + // failures that should not happen. + default: + i.failNode(route, errorSourceIdx) + } +} + +// processPaymentOutcomeUnknown processes a payment outcome for which no failure +// message or source is available. +func (i *interpretedResult) processPaymentOutcomeUnknown(route *route.Route) { + n := len(route.Hops) + + // If this is a direct payment, the destination must be at fault. + if n == 1 { + i.failNode(route, n) + i.finalFailureReason = &reasonError + return + } + + // Otherwise penalize all channels in the route to make sure the + // responsible node is at least hit too. We even penalize the connection + // to our own peer, because that peer could also be responsible. + i.failPairRange(route, 0, n-1) +} + +// failNode marks the node indicated by idx in the route as failed. This +// function intentionally panics when the self node is failed. +func (i *interpretedResult) failNode(rt *route.Route, idx int) { + i.nodeFailure = &rt.Hops[idx-1].PubKeyBytes +} + +// failPairRange marks the node pairs from node fromIdx to node toIdx as failed +// in both direction. +func (i *interpretedResult) failPairRange( + rt *route.Route, fromIdx, toIdx int) { + + for idx := fromIdx; idx <= toIdx; idx++ { + i.failPair(rt, idx) + } +} + +// failPair marks a pair as failed in both directions. +func (i *interpretedResult) failPair( + rt *route.Route, idx int) { + + pair, _ := getPair(rt, idx) + + // Report pair in both directions without a minimum penalization amount. + i.pairResults[pair] = 0 + i.pairResults[pair.Reverse()] = 0 +} + +// failPairBalance marks a pair as failed with a minimum penalization amount. +func (i *interpretedResult) failPairBalance( + rt *route.Route, channelIdx int) { + + pair, amt := getPair(rt, channelIdx) + + i.pairResults[pair] = amt +} + +// getPair returns a node pair from the route and the amount passed between that +// pair. +func getPair(rt *route.Route, channelIdx int) (DirectedNodePair, + lnwire.MilliSatoshi) { + + nodeTo := rt.Hops[channelIdx].PubKeyBytes + var ( + nodeFrom route.Vertex + amt lnwire.MilliSatoshi + ) + + if channelIdx == 0 { + nodeFrom = rt.SourcePubKey + amt = rt.TotalAmount + } else { + nodeFrom = rt.Hops[channelIdx-1].PubKeyBytes + amt = rt.Hops[channelIdx-1].AmtToForward + } + + pair := NewDirectedNodePair(nodeFrom, nodeTo) + + return pair, amt +} diff --git a/routing/result_interpretation_test.go b/routing/result_interpretation_test.go new file mode 100644 index 00000000..86b65aa4 --- /dev/null +++ b/routing/result_interpretation_test.go @@ -0,0 +1,146 @@ +package routing + +import ( + "reflect" + "testing" + + "github.com/lightningnetwork/lnd/lnwire" + + "github.com/lightningnetwork/lnd/routing/route" +) + +var ( + hops = []route.Vertex{ + {1, 0}, {1, 1}, {1, 2}, {1, 3}, {1, 4}, + } + + routeOneHop = route.Route{ + SourcePubKey: hops[0], + TotalAmount: 100, + Hops: []*route.Hop{ + {PubKeyBytes: hops[1], AmtToForward: 99}, + }, + } + + routeTwoHop = route.Route{ + SourcePubKey: hops[0], + TotalAmount: 100, + Hops: []*route.Hop{ + {PubKeyBytes: hops[1], AmtToForward: 99}, + {PubKeyBytes: hops[2], AmtToForward: 97}, + }, + } + + routeFourHop = route.Route{ + SourcePubKey: hops[0], + TotalAmount: 100, + Hops: []*route.Hop{ + {PubKeyBytes: hops[1], AmtToForward: 99}, + {PubKeyBytes: hops[2], AmtToForward: 97}, + {PubKeyBytes: hops[3], AmtToForward: 94}, + {PubKeyBytes: hops[4], AmtToForward: 90}, + }, + } +) + +func getTestPair(from, to int) DirectedNodePair { + return NewDirectedNodePair(hops[from], hops[to]) +} + +type resultTestCase struct { + name string + route *route.Route + failureSrcIdx int + failure lnwire.FailureMessage + + expectedResult *interpretedResult +} + +var resultTestCases = []resultTestCase{ + // Tests that a temporary channel failure result is properly + // interpreted. + { + name: "fail", + route: &routeTwoHop, + failureSrcIdx: 1, + failure: lnwire.NewTemporaryChannelFailure(nil), + + expectedResult: &interpretedResult{ + pairResults: map[DirectedNodePair]lnwire.MilliSatoshi{ + getTestPair(1, 2): 99, + }, + }, + }, + + // Tests that a expiry too soon failure result is properly interpreted. + { + name: "fail expiry too soon", + route: &routeFourHop, + failureSrcIdx: 3, + failure: lnwire.NewExpiryTooSoon(lnwire.ChannelUpdate{}), + + expectedResult: &interpretedResult{ + pairResults: map[DirectedNodePair]lnwire.MilliSatoshi{ + getTestPair(0, 1): 0, + getTestPair(1, 0): 0, + getTestPair(1, 2): 0, + getTestPair(2, 1): 0, + getTestPair(2, 3): 0, + getTestPair(3, 2): 0, + }, + }, + }, + + // Tests a malformed htlc from a direct peer. + { + name: "fail malformed htlc from direct peer", + route: &routeTwoHop, + failureSrcIdx: 0, + failure: lnwire.NewInvalidOnionKey(nil), + + expectedResult: &interpretedResult{ + nodeFailure: &hops[1], + }, + }, + + // Tests a malformed htlc from a direct peer that is also the final + // destination. + { + name: "fail malformed htlc from direct final peer", + route: &routeOneHop, + failureSrcIdx: 0, + failure: lnwire.NewInvalidOnionKey(nil), + + expectedResult: &interpretedResult{ + finalFailureReason: &reasonError, + nodeFailure: &hops[1], + }, + }, +} + +// TestResultInterpretation executes a list of test cases that test the result +// interpretation logic. +func TestResultInterpretation(t *testing.T) { + emptyResults := make(map[DirectedNodePair]lnwire.MilliSatoshi) + + for _, testCase := range resultTestCases { + t.Run(testCase.name, func(t *testing.T) { + i := interpretResult( + testCase.route, &testCase.failureSrcIdx, + testCase.failure, + ) + + expected := testCase.expectedResult + + // Replace nil pairResults with empty map to satisfy + // DeepEqual. + if expected.pairResults == nil { + expected.pairResults = emptyResults + } + + if !reflect.DeepEqual(i, expected) { + t.Fatal("unexpected result") + } + }) + } +} diff --git a/routing/router.go b/routing/router.go index 6a8c0308..65b28f51 100644 --- a/routing/router.go +++ b/routing/router.go @@ -179,8 +179,8 @@ type MissionController interface { // whether this error is a final error and no further payment attempts // need to be made. ReportPaymentFail(paymentID uint64, rt *route.Route, - failureSourceIdx *int, failure lnwire.FailureMessage) (bool, - channeldb.FailureReason, error) + failureSourceIdx *int, failure lnwire.FailureMessage) ( + *channeldb.FailureReason, error) // GetProbability is expected to return the success probability of a // payment from fromNode along edge. @@ -1887,23 +1887,25 @@ func (r *ChannelRouter) tryApplyChannelUpdate(rt *route.Route, // to continue with an alternative route. This is indicated by the boolean // return value. func (r *ChannelRouter) processSendError(paymentID uint64, rt *route.Route, - sendErr error) (bool, channeldb.FailureReason) { + sendErr error) *channeldb.FailureReason { - reportFail := func(srcIdx *int, msg lnwire.FailureMessage) (bool, - channeldb.FailureReason) { + internalErrorReason := channeldb.FailureReasonError + + reportFail := func(srcIdx *int, + msg lnwire.FailureMessage) *channeldb.FailureReason { // Report outcome to mission control. - final, reason, err := r.cfg.MissionControl.ReportPaymentFail( + reason, err := r.cfg.MissionControl.ReportPaymentFail( paymentID, rt, srcIdx, msg, ) if err != nil { log.Errorf("Error reporting payment result to mc: %v", err) - return true, channeldb.FailureReasonError + return &internalErrorReason } - return final, reason + return reason } if sendErr == htlcswitch.ErrUnreadableFailureMessage { @@ -1915,7 +1917,7 @@ func (r *ChannelRouter) processSendError(paymentID uint64, rt *route.Route, // trying. fErr, ok := sendErr.(*htlcswitch.ForwardingError) if !ok { - return true, channeldb.FailureReasonError + return &internalErrorReason } failureMessage := fErr.FailureMessage @@ -1928,7 +1930,7 @@ func (r *ChannelRouter) processSendError(paymentID uint64, rt *route.Route, rt, failureSourceIdx, failureMessage, ) if err != nil { - return true, channeldb.FailureReasonError + return &internalErrorReason } } diff --git a/routing/router_test.go b/routing/router_test.go index e27914e9..a1fa141f 100644 --- a/routing/router_test.go +++ b/routing/router_test.go @@ -274,7 +274,7 @@ func TestSendPaymentRouteFailureFallback(t *testing.T) { var payHash [32]byte paymentAmt := lnwire.NewMSatFromSatoshis(1000) payment := LightningPayment{ - Target: ctx.aliases["luoji"], + Target: ctx.aliases["sophon"], Amount: paymentAmt, FeeLimit: noFeeLimit, PaymentHash: payHash, @@ -284,16 +284,16 @@ func TestSendPaymentRouteFailureFallback(t *testing.T) { copy(preImage[:], bytes.Repeat([]byte{9}, 32)) // We'll modify the SendToSwitch method that's been set within the - // router's configuration to ignore the path that has luo ji as the + // router's configuration to ignore the path that has son goku as the // first hop. This should force the router to instead take the - // available two hop path (through satoshi). + // the more costly path (through pham nuwen). ctx.router.cfg.Payer.(*mockPaymentAttemptDispatcher).setPaymentResult( func(firstHop lnwire.ShortChannelID) ([32]byte, error) { - roasbeefLuoji := lnwire.NewShortChanIDFromInt(689530843) - if firstHop == roasbeefLuoji { + roasbeefSongoku := lnwire.NewShortChanIDFromInt(12345) + if firstHop == roasbeefSongoku { return [32]byte{}, &htlcswitch.ForwardingError{ - FailureSourceIdx: 0, + FailureSourceIdx: 1, // TODO(roasbeef): temp node failure should be? FailureMessage: &lnwire.FailTemporaryChannelFailure{}, } @@ -302,7 +302,7 @@ func TestSendPaymentRouteFailureFallback(t *testing.T) { return preImage, nil }) - // Send off the payment request to the router, route through satoshi + // Send off the payment request to the router, route through pham nuwen // should've been selected as a fall back and succeeded correctly. paymentPreImage, route, err := ctx.router.SendPayment(&payment) if err != nil { @@ -321,10 +321,10 @@ func TestSendPaymentRouteFailureFallback(t *testing.T) { preImage[:], paymentPreImage[:]) } - // The route should have satoshi as the first hop. - if route.Hops[0].PubKeyBytes != ctx.aliases["satoshi"] { + // The route should have pham nuwen as the first hop. + if route.Hops[0].PubKeyBytes != ctx.aliases["phamnuwen"] { - t.Fatalf("route should go through satoshi as first hop, "+ + t.Fatalf("route should go through phamnuwen as first hop, "+ "instead passes through: %v", getAliasFromPubKey(route.Hops[0].PubKeyBytes, ctx.aliases)) @@ -743,7 +743,7 @@ func TestSendPaymentErrorPathPruning(t *testing.T) { var payHash [32]byte paymentAmt := lnwire.NewMSatFromSatoshis(1000) payment := LightningPayment{ - Target: ctx.aliases["luoji"], + Target: ctx.aliases["sophon"], Amount: paymentAmt, FeeLimit: noFeeLimit, PaymentHash: payHash, @@ -752,32 +752,29 @@ func TestSendPaymentErrorPathPruning(t *testing.T) { var preImage [32]byte copy(preImage[:], bytes.Repeat([]byte{9}, 32)) - roasbeefLuoji := lnwire.NewShortChanIDFromInt(689530843) + roasbeefSongoku := lnwire.NewShortChanIDFromInt(12345) + roasbeefPhanNuwen := lnwire.NewShortChanIDFromInt(999991) // First, we'll modify the SendToSwitch method to return an error - // indicating that the channel from roasbeef to luoji is not operable + // indicating that the channel from roasbeef to son goku is not operable // with an UnknownNextPeer. - // - // TODO(roasbeef): filtering should be intelligent enough so just not - // go through satoshi at all at this point. ctx.router.cfg.Payer.(*mockPaymentAttemptDispatcher).setPaymentResult( func(firstHop lnwire.ShortChannelID) ([32]byte, error) { - if firstHop == roasbeefLuoji { + if firstHop == roasbeefSongoku { // We'll first simulate an error from the first - // outgoing link to simulate the channel from luo ji to - // roasbeef not having enough capacity. + // hop to simulate the channel from songoku to + // sophon not having enough capacity. return [32]byte{}, &htlcswitch.ForwardingError{ - FailureSourceIdx: 0, + FailureSourceIdx: 1, FailureMessage: &lnwire.FailTemporaryChannelFailure{}, } } - // Next, we'll create an error from satoshi to indicate - // that the luoji node is not longer online, which should - // prune out the rest of the routes. - roasbeefSatoshi := lnwire.NewShortChanIDFromInt(2340213491) - if firstHop == roasbeefSatoshi { + // Next, we'll create an error from phan nuwen to + // indicate that the sophon node is not longer online, + // which should prune out the rest of the routes. + if firstHop == roasbeefPhanNuwen { return [32]byte{}, &htlcswitch.ForwardingError{ FailureSourceIdx: 1, FailureMessage: &lnwire.FailUnknownNextPeer{}, @@ -804,15 +801,14 @@ func TestSendPaymentErrorPathPruning(t *testing.T) { ctx.router.cfg.MissionControl.(*MissionControl).ResetHistory() - // Next, we'll modify the SendToSwitch method to indicate that luo ji - // wasn't originally online. This should also halt the send all - // together as all paths contain luoji and he can't be reached. + // Next, we'll modify the SendToSwitch method to indicate that the + // connection between songoku and isn't up. ctx.router.cfg.Payer.(*mockPaymentAttemptDispatcher).setPaymentResult( func(firstHop lnwire.ShortChannelID) ([32]byte, error) { - if firstHop == roasbeefLuoji { + if firstHop == roasbeefSongoku { return [32]byte{}, &htlcswitch.ForwardingError{ - FailureSourceIdx: 0, + FailureSourceIdx: 1, FailureMessage: &lnwire.FailUnknownNextPeer{}, } } @@ -821,14 +817,14 @@ func TestSendPaymentErrorPathPruning(t *testing.T) { }) // This shouldn't return an error, as we'll make a payment attempt via - // the satoshi channel based on the assumption that there might be an - // intermittent issue with the roasbeef <-> lioji channel. + // the pham nuwen channel based on the assumption that there might be an + // intermittent issue with the songoku <-> sophon channel. paymentPreImage, rt, err := ctx.router.SendPayment(&payment) if err != nil { t.Fatalf("unable send payment: %v", err) } - // This path should go: roasbeef -> satoshi -> luoji + // This path should go: roasbeef -> pham nuwen -> sophon if len(rt.Hops) != 2 { t.Fatalf("incorrect route length: expected %v got %v", 2, len(rt.Hops)) @@ -837,9 +833,9 @@ func TestSendPaymentErrorPathPruning(t *testing.T) { t.Fatalf("incorrect preimage used: expected %x got %x", preImage[:], paymentPreImage[:]) } - if rt.Hops[0].PubKeyBytes != ctx.aliases["satoshi"] { + if rt.Hops[0].PubKeyBytes != ctx.aliases["phamnuwen"] { - t.Fatalf("route should go through satoshi as first hop, "+ + t.Fatalf("route should go through phamnuwen as first hop, "+ "instead passes through: %v", getAliasFromPubKey(rt.Hops[0].PubKeyBytes, ctx.aliases)) @@ -853,12 +849,12 @@ func TestSendPaymentErrorPathPruning(t *testing.T) { ctx.router.cfg.Payer.(*mockPaymentAttemptDispatcher).setPaymentResult( func(firstHop lnwire.ShortChannelID) ([32]byte, error) { - if firstHop == roasbeefLuoji { + if firstHop == roasbeefSongoku { // We'll first simulate an error from the first // outgoing link to simulate the channel from luo ji to // roasbeef not having enough capacity. return [32]byte{}, &htlcswitch.ForwardingError{ - FailureSourceIdx: 0, + FailureSourceIdx: 1, FailureMessage: &lnwire.FailTemporaryChannelFailure{}, } } @@ -886,9 +882,9 @@ func TestSendPaymentErrorPathPruning(t *testing.T) { } // The route should have satoshi as the first hop. - if rt.Hops[0].PubKeyBytes != ctx.aliases["satoshi"] { + if rt.Hops[0].PubKeyBytes != ctx.aliases["phamnuwen"] { - t.Fatalf("route should go through satoshi as first hop, "+ + t.Fatalf("route should go through phamnuwen as first hop, "+ "instead passes through: %v", getAliasFromPubKey(rt.Hops[0].PubKeyBytes, ctx.aliases))