From e7af6a077a76072c0e718c9146885f7000becd74 Mon Sep 17 00:00:00 2001 From: Joost Jager Date: Mon, 5 Aug 2019 12:13:58 +0200 Subject: [PATCH 1/7] routing: convert to nillable failure reason This commit converts several functions from returning a bool and a failure reason to a nillable failure reason as return parameter. This will take away confusion about the interpretation of the two separate values. --- routing/missioncontrol.go | 18 +++++++++++------- routing/mock_test.go | 8 ++++---- routing/payment_lifecycle.go | 8 ++++---- routing/router.go | 22 ++++++++++++---------- 4 files changed, 31 insertions(+), 25 deletions(-) diff --git a/routing/missioncontrol.go b/routing/missioncontrol.go index b2ea102d..f1f28dc4 100644 --- a/routing/missioncontrol.go +++ b/routing/missioncontrol.go @@ -420,12 +420,11 @@ func (m *MissionControl) GetHistorySnapshot() *MissionControlSnapshot { // ReportPaymentFail reports a failed payment to mission control as input for // future probability estimates. The failureSourceIdx argument indicates the // failure source. If it is nil, the failure source is unknown. This function -// returns a bool indicating whether this error is a final error. If it is -// final, a failure reason is returned and no further payment attempts need to -// be made. +// returns a reason if this failure is a final failure. In that case no further +// payment attempts need to be made. func (m *MissionControl) ReportPaymentFail(paymentID uint64, rt *route.Route, - failureSourceIdx *int, failure lnwire.FailureMessage) (bool, - channeldb.FailureReason, error) { + failureSourceIdx *int, failure lnwire.FailureMessage) ( + *channeldb.FailureReason, error) { timestamp := m.now() @@ -442,13 +441,18 @@ func (m *MissionControl) ReportPaymentFail(paymentID uint64, rt *route.Route, // Store complete result in database. if err := m.store.AddResult(result); err != nil { - return false, 0, err + return nil, err } // Apply result to update mission control state. final, reason := m.applyPaymentResult(result) - return final, reason, nil + // Convert final bool and reason to nillable reason. + if final { + return &reason, nil + } + + return nil, nil } // applyPaymentResult applies a payment result as input for future probability diff --git a/routing/mock_test.go b/routing/mock_test.go index b58ba3d5..96da1448 100644 --- a/routing/mock_test.go +++ b/routing/mock_test.go @@ -98,11 +98,11 @@ type mockMissionControl struct { var _ MissionController = (*mockMissionControl)(nil) -func (m *mockMissionControl) ReportPaymentFail(paymentID uint64, - rt *route.Route, failureSourceIdx *int, failure lnwire.FailureMessage) ( - bool, channeldb.FailureReason, error) { +func (m *mockMissionControl) ReportPaymentFail(paymentID uint64, rt *route.Route, + failureSourceIdx *int, failure lnwire.FailureMessage) ( + *channeldb.FailureReason, error) { - return false, 0, nil + return nil, nil } func (m *mockMissionControl) GetProbability(fromNode, toNode route.Vertex, diff --git a/routing/payment_lifecycle.go b/routing/payment_lifecycle.go index db9b9803..18edfc0a 100644 --- a/routing/payment_lifecycle.go +++ b/routing/payment_lifecycle.go @@ -342,10 +342,10 @@ func (p *paymentLifecycle) sendPaymentAttempt(firstHop lnwire.ShortChannelID, // whether we should make another payment attempt. func (p *paymentLifecycle) handleSendError(sendErr error) error { - final, reason := p.router.processSendError( + reason := p.router.processSendError( p.attempt.PaymentID, &p.attempt.Route, sendErr, ) - if !final { + if reason == nil { // Save the forwarding error so it can be returned if // this turns out to be the last attempt. p.lastError = sendErr @@ -354,14 +354,14 @@ func (p *paymentLifecycle) handleSendError(sendErr error) error { } log.Debugf("Payment %x failed: final_outcome=%v, raw_err=%v", - p.payment.PaymentHash, reason, sendErr) + p.payment.PaymentHash, *reason, sendErr) // Mark the payment failed with no route. // // TODO(halseth): make payment codes for the actual reason we don't // continue path finding. err := p.router.cfg.Control.Fail( - p.payment.PaymentHash, reason, + p.payment.PaymentHash, *reason, ) if err != nil { return err diff --git a/routing/router.go b/routing/router.go index 6a8c0308..65b28f51 100644 --- a/routing/router.go +++ b/routing/router.go @@ -179,8 +179,8 @@ type MissionController interface { // whether this error is a final error and no further payment attempts // need to be made. ReportPaymentFail(paymentID uint64, rt *route.Route, - failureSourceIdx *int, failure lnwire.FailureMessage) (bool, - channeldb.FailureReason, error) + failureSourceIdx *int, failure lnwire.FailureMessage) ( + *channeldb.FailureReason, error) // GetProbability is expected to return the success probability of a // payment from fromNode along edge. @@ -1887,23 +1887,25 @@ func (r *ChannelRouter) tryApplyChannelUpdate(rt *route.Route, // to continue with an alternative route. This is indicated by the boolean // return value. func (r *ChannelRouter) processSendError(paymentID uint64, rt *route.Route, - sendErr error) (bool, channeldb.FailureReason) { + sendErr error) *channeldb.FailureReason { - reportFail := func(srcIdx *int, msg lnwire.FailureMessage) (bool, - channeldb.FailureReason) { + internalErrorReason := channeldb.FailureReasonError + + reportFail := func(srcIdx *int, + msg lnwire.FailureMessage) *channeldb.FailureReason { // Report outcome to mission control. - final, reason, err := r.cfg.MissionControl.ReportPaymentFail( + reason, err := r.cfg.MissionControl.ReportPaymentFail( paymentID, rt, srcIdx, msg, ) if err != nil { log.Errorf("Error reporting payment result to mc: %v", err) - return true, channeldb.FailureReasonError + return &internalErrorReason } - return final, reason + return reason } if sendErr == htlcswitch.ErrUnreadableFailureMessage { @@ -1915,7 +1917,7 @@ func (r *ChannelRouter) processSendError(paymentID uint64, rt *route.Route, // trying. fErr, ok := sendErr.(*htlcswitch.ForwardingError) if !ok { - return true, channeldb.FailureReasonError + return &internalErrorReason } failureMessage := fErr.FailureMessage @@ -1928,7 +1930,7 @@ func (r *ChannelRouter) processSendError(paymentID uint64, rt *route.Route, rt, failureSourceIdx, failureMessage, ) if err != nil { - return true, channeldb.FailureReasonError + return &internalErrorReason } } From 45dacd0df1e6189b5d3f011c68a9a3bce413a3ee Mon Sep 17 00:00:00 2001 From: Joost Jager Date: Mon, 5 Aug 2019 12:29:16 +0200 Subject: [PATCH 2/7] routing: isolate failure interpretation from mission control This commit moves the payment outcome interpretation logic into a separate file. Also, mission control isn't updated directly anymore, but results are stored in an interpretedResult struct. This allows the mission control state to be locked for a minimum amount of time and makes it easier to unit test the result interpretation. --- routing/missioncontrol.go | 317 +++----------------------- routing/result_interpretation.go | 275 ++++++++++++++++++++++ routing/result_interpretation_test.go | 101 ++++++++ 3 files changed, 407 insertions(+), 286 deletions(-) create mode 100644 routing/result_interpretation.go create mode 100644 routing/result_interpretation_test.go diff --git a/routing/missioncontrol.go b/routing/missioncontrol.go index f1f28dc4..65e342e1 100644 --- a/routing/missioncontrol.go +++ b/routing/missioncontrol.go @@ -322,55 +322,6 @@ func (m *MissionControl) requestSecondChance(timestamp time.Time, return false } -// reportVertexFailure reports a node level failure. -func (m *MissionControl) reportVertexFailure(timestamp time.Time, - v route.Vertex) { - - log.Debugf("Reporting vertex %v failure to Mission Control", v) - - m.Lock() - defer m.Unlock() - - m.lastNodeFailure[v] = timestamp -} - -// reportPairPolicyFailure reports a policy related failure. -func (m *MissionControl) reportPairPolicyFailure(timestamp time.Time, - failedPair DirectedNodePair) { - - m.Lock() - defer m.Unlock() - - // We may have an out of date graph. Therefore we don't always penalize - // immediately. If some time has passed since the last policy failure, - // we grant the node a second chance at forwarding the payment. - if m.requestSecondChance( - timestamp, failedPair.From, failedPair.To, - ) { - return - } - - m.lastNodeFailure[failedPair.From] = timestamp -} - -// reportPairFailure reports a pair level failure. -// -// TODO(roasbeef): also add value attempted to send and capacity of channel -func (m *MissionControl) reportPairFailure(timestamp time.Time, - failedPair DirectedNodePair, minPenalizeAmt lnwire.MilliSatoshi) { - - log.Debugf("Reporting pair %v failure to Mission Control", failedPair) - - m.Lock() - defer m.Unlock() - - pair := NewDirectedNodePair(failedPair.From, failedPair.To) - m.lastPairFailure[pair] = pairFailure{ - minPenalizeAmt: minPenalizeAmt, - timestamp: timestamp, - } -} - // GetHistorySnapshot takes a snapshot from the current mission control state // and actual probability estimates. func (m *MissionControl) GetHistorySnapshot() *MissionControlSnapshot { @@ -445,257 +396,51 @@ func (m *MissionControl) ReportPaymentFail(paymentID uint64, rt *route.Route, } // Apply result to update mission control state. - final, reason := m.applyPaymentResult(result) + reason := m.applyPaymentResult(result) - // Convert final bool and reason to nillable reason. - if final { - return &reason, nil - } - - return nil, nil + return reason, nil } // applyPaymentResult applies a payment result as input for future probability // estimates. It returns a bool indicating whether this error is a final error // and no further payment attempts need to be made. -func (m *MissionControl) applyPaymentResult(result *paymentResult) ( - bool, channeldb.FailureReason) { +func (m *MissionControl) applyPaymentResult( + result *paymentResult) *channeldb.FailureReason { - var ( - failureSourceIdxInt int - failure lnwire.FailureMessage + // Interpret result. + i := interpretResult( + result.route, result.failureSourceIdx, result.failure, ) - if result.failureSourceIdx == nil { - // If the failure message could not be decrypted, attribute the - // failure to our own outgoing channel. - // - // TODO(joostager): Penalize all channels in the route. - failureSourceIdxInt = 0 - failure = lnwire.NewTemporaryChannelFailure(nil) - } else { - failureSourceIdxInt = *result.failureSourceIdx - failure = result.failure + // Update mission control state using the interpretation. + m.Lock() + defer m.Unlock() + + if i.policyFailure != nil { + if m.requestSecondChance( + result.timeReply, + i.policyFailure.From, i.policyFailure.To, + ) { + return nil + } } - var failureVertex route.Vertex + if i.nodeFailure != nil { + log.Debugf("Reporting node failure to Mission Control: "+ + "node=%v", *i.nodeFailure) - if failureSourceIdxInt > 0 { - failureVertex = result.route.Hops[failureSourceIdxInt-1].PubKeyBytes - } else { - failureVertex = result.route.SourcePubKey + m.lastNodeFailure[*i.nodeFailure] = result.timeReply } - log.Tracef("Node %x (index %v) reported failure when sending htlc", - failureVertex, result.failureSourceIdx) - // Always determine chan id ourselves, because a channel update with id - // may not be available. - failedPair, failedAmt := getFailedPair( - result.route, failureSourceIdxInt, - ) + for pair, minPenalizeAmt := range i.pairResults { + log.Debugf("Reporting pair failure to Mission Control: "+ + "pair=%v, minPenalizeAmt=%v", pair, minPenalizeAmt) - switch failure.(type) { - - // If the end destination didn't know the payment - // hash or we sent the wrong payment amount to the - // destination, then we'll terminate immediately. - case *lnwire.FailIncorrectDetails: - // TODO(joostjager): Check onionErr.Amount() whether it matches - // what we expect. (Will it ever not match, because if not - // final_incorrect_htlc_amount would be returned?) - - return true, channeldb.FailureReasonIncorrectPaymentDetails - - // If we sent the wrong amount to the destination, then - // we'll exit early. - case *lnwire.FailIncorrectPaymentAmount: - return true, channeldb.FailureReasonIncorrectPaymentDetails - - // If the time-lock that was extended to the final node - // was incorrect, then we can't proceed. - case *lnwire.FailFinalIncorrectCltvExpiry: - // TODO(joostjager): Take into account that second last hop may - // have deliberately handed out an htlc that expires too soon. - // In that case we should continue routing. - return true, channeldb.FailureReasonError - - // If we crafted an invalid onion payload for the final - // node, then we'll exit early. - case *lnwire.FailFinalIncorrectHtlcAmount: - // TODO(joostjager): Take into account that second last hop may - // have deliberately handed out an htlc with a too low value. In - // that case we should continue routing. - - return true, channeldb.FailureReasonError - - // Similarly, if the HTLC expiry that we extended to - // the final hop expires too soon, then will fail the - // payment. - // - // TODO(roasbeef): can happen to to race condition, try - // again with recent block height - case *lnwire.FailFinalExpiryTooSoon: - // TODO(joostjager): Take into account that any hop may have - // delayed. Ideally we should continue routing. Knowing the - // delaying node at this point would help. - return true, channeldb.FailureReasonIncorrectPaymentDetails - - // If we erroneously attempted to cross a chain border, - // then we'll cancel the payment. - case *lnwire.FailInvalidRealm: - return true, channeldb.FailureReasonError - - // If we get a notice that the expiry was too soon for - // an intermediate node, then we'll prune out the node - // that sent us this error, as it doesn't now what the - // correct block height is. - case *lnwire.FailExpiryTooSoon: - m.reportVertexFailure(result.timeReply, failureVertex) - return false, 0 - - // If we hit an instance of onion payload corruption or an invalid - // version, then we'll exit early as this shouldn't happen in the - // typical case. - // - // TODO(joostjager): Take into account that the previous hop may have - // tampered with the onion. Routing should continue using other paths. - case *lnwire.FailInvalidOnionVersion: - return true, channeldb.FailureReasonError - case *lnwire.FailInvalidOnionHmac: - return true, channeldb.FailureReasonError - case *lnwire.FailInvalidOnionKey: - return true, channeldb.FailureReasonError - - // If we get a failure due to violating the minimum - // amount, we'll apply the new minimum amount and retry - // routing. - case *lnwire.FailAmountBelowMinimum: - m.reportPairPolicyFailure(result.timeReply, failedPair) - return false, 0 - - // If we get a failure due to a fee, we'll apply the - // new fee update, and retry our attempt using the - // newly updated fees. - case *lnwire.FailFeeInsufficient: - m.reportPairPolicyFailure(result.timeReply, failedPair) - return false, 0 - - // If we get the failure for an intermediate node that - // disagrees with our time lock values, then we'll - // apply the new delta value and try it once more. - case *lnwire.FailIncorrectCltvExpiry: - m.reportPairPolicyFailure(result.timeReply, failedPair) - return false, 0 - - // The outgoing channel that this node was meant to - // forward one is currently disabled, so we'll apply - // the update and continue. - case *lnwire.FailChannelDisabled: - m.reportPairFailure(result.timeReply, failedPair, 0) - return false, 0 - - // It's likely that the outgoing channel didn't have - // sufficient capacity, so we'll prune this pair for - // now, and continue onwards with our path finding. - case *lnwire.FailTemporaryChannelFailure: - m.reportPairFailure(result.timeReply, failedPair, failedAmt) - return false, 0 - - // If the send fail due to a node not having the - // required features, then we'll note this error and - // continue. - case *lnwire.FailRequiredNodeFeatureMissing: - m.reportVertexFailure(result.timeReply, failureVertex) - return false, 0 - - // If the send fail due to a node not having the - // required features, then we'll note this error and - // continue. - case *lnwire.FailRequiredChannelFeatureMissing: - m.reportVertexFailure(result.timeReply, failureVertex) - return false, 0 - - // If the next hop in the route wasn't known or - // offline, we'll only the channel which we attempted - // to route over. This is conservative, and it can - // handle faulty channels between nodes properly. - // Additionally, this guards against routing nodes - // returning errors in order to attempt to black list - // another node. - case *lnwire.FailUnknownNextPeer: - m.reportPairFailure(result.timeReply, failedPair, 0) - return false, 0 - - // If the node wasn't able to forward for which ever - // reason, then we'll note this and continue with the - // routes. - case *lnwire.FailTemporaryNodeFailure: - m.reportVertexFailure(result.timeReply, failureVertex) - return false, 0 - - case *lnwire.FailPermanentNodeFailure: - m.reportVertexFailure(result.timeReply, failureVertex) - return false, 0 - - // If we crafted a route that contains a too long time - // lock for an intermediate node, we'll prune the node. - // As there currently is no way of knowing that node's - // maximum acceptable cltv, we cannot take this - // constraint into account during routing. - // - // TODO(joostjager): Record the rejected cltv and use - // that as a hint during future path finding through - // that node. - case *lnwire.FailExpiryTooFar: - m.reportVertexFailure(result.timeReply, failureVertex) - return false, 0 - - // If we get a permanent channel or node failure, then - // we'll prune the channel in both directions and - // continue with the rest of the routes. - case *lnwire.FailPermanentChannelFailure: - m.reportPairFailure(result.timeReply, failedPair, 0) - m.reportPairFailure( - result.timeReply, failedPair.Reverse(), 0, - ) - return false, 0 - - // Any other failure or an empty failure will get the node pruned. - default: - m.reportVertexFailure(result.timeReply, failureVertex) - return false, 0 + m.lastPairFailure[pair] = pairFailure{ + minPenalizeAmt: minPenalizeAmt, + timestamp: result.timeReply, + } } -} - -// getFailedPair tries to locate the failing pair given a route and the pubkey -// of the node that sent the failure. It will assume that the failure is -// associated with the outgoing channel set of the failing node. As a second -// result, it returns the amount sent between the pair. -func getFailedPair(route *route.Route, failureSource int) (DirectedNodePair, - lnwire.MilliSatoshi) { - - // Determine if we have a failure from the final hop. If it is, we - // assume that the failing channel is the incoming channel. - // - // TODO(joostjager): In this case, certain types of failures are not - // expected. For example FailUnknownNextPeer. This could be a reason to - // prune the node? - if failureSource == len(route.Hops) { - failureSource-- - } - - // As this failure indicates that the target channel was unable to carry - // this HTLC (for w/e reason), we'll return the _outgoing_ channel that - // the source of the failure was meant to pass the HTLC along to. - if failureSource == 0 { - return NewDirectedNodePair( - route.SourcePubKey, - route.Hops[0].PubKeyBytes, - ), route.TotalAmount - } - - return NewDirectedNodePair( - route.Hops[failureSource-1].PubKeyBytes, - route.Hops[failureSource].PubKeyBytes, - ), route.Hops[failureSource-1].AmtToForward + + return i.finalFailureReason } diff --git a/routing/result_interpretation.go b/routing/result_interpretation.go new file mode 100644 index 00000000..5eb35563 --- /dev/null +++ b/routing/result_interpretation.go @@ -0,0 +1,275 @@ +package routing + +import ( + "github.com/lightningnetwork/lnd/channeldb" + + "github.com/lightningnetwork/lnd/lnwire" + "github.com/lightningnetwork/lnd/routing/route" +) + +// interpretedResult contains the result of the interpretation of a payment +// outcome. +type interpretedResult struct { + // nodeFailure points to a node pubkey if all channels of that node are + // responsible for the result. + nodeFailure *route.Vertex + + // pairResults contains a map of node pairs that could be responsible + // for the failure. The map values are the minimum amounts for which a + // future penalty should be applied. + pairResults map[DirectedNodePair]lnwire.MilliSatoshi + + // finalFailureReason is set to a non-nil value if it makes no more + // sense to start another payment attempt. It will contain the reason + // why. + finalFailureReason *channeldb.FailureReason + + // policyFailure is set to a node pair if there is a policy failure on + // that connection. This is used to control the second chance logic for + // policy failures. + policyFailure *DirectedNodePair +} + +// interpretResult interprets a payment outcome and returns an object that +// contains information required to update mission control. +func interpretResult(rt *route.Route, failureSrcIdx *int, + failure lnwire.FailureMessage) *interpretedResult { + + i := &interpretedResult{ + pairResults: make(map[DirectedNodePair]lnwire.MilliSatoshi), + } + + final, reason := i.processFail(rt, failureSrcIdx, failure) + if final { + i.finalFailureReason = &reason + } + + return i +} + +// processFail processes a failed payment attempt. +func (i *interpretedResult) processFail( + rt *route.Route, errSourceIdx *int, + failure lnwire.FailureMessage) (bool, channeldb.FailureReason) { + + var failureSourceIdxInt int + + if errSourceIdx == nil { + // If the failure message could not be decrypted, attribute the + // failure to our own outgoing channel. + // + // TODO(joostager): Penalize all channels in the route. + failureSourceIdxInt = 0 + failure = lnwire.NewTemporaryChannelFailure(nil) + } else { + failureSourceIdxInt = *errSourceIdx + } + + var failureVertex route.Vertex + + if failureSourceIdxInt > 0 { + failureVertex = rt.Hops[failureSourceIdxInt-1].PubKeyBytes + } else { + failureVertex = rt.SourcePubKey + } + log.Tracef("Node %x (index %v) reported failure when sending htlc", + failureVertex, errSourceIdx) + + // Always determine chan id ourselves, because a channel update with id + // may not be available. + failedPair, failedAmt := getFailedPair( + rt, failureSourceIdxInt, + ) + + switch failure.(type) { + + // If the end destination didn't know the payment hash or we sent the + // wrong payment amount to the destination, then we'll terminate + // immediately. + case *lnwire.FailIncorrectDetails: + // TODO(joostjager): Check onionErr.Amount() whether it matches + // what we expect. (Will it ever not match, because if not + // final_incorrect_htlc_amount would be returned?) + + return true, channeldb.FailureReasonIncorrectPaymentDetails + + // If we sent the wrong amount to the destination, then we'll exit + // early. + case *lnwire.FailIncorrectPaymentAmount: + return true, channeldb.FailureReasonIncorrectPaymentDetails + + // If the time-lock that was extended to the final node was incorrect, + // then we can't proceed. + case *lnwire.FailFinalIncorrectCltvExpiry: + // TODO(joostjager): Take into account that second last hop may + // have deliberately handed out an htlc that expires too soon. + // In that case we should continue routing. + return true, channeldb.FailureReasonError + + // If we crafted an invalid onion payload for the final node, then we'll + // exit early. + case *lnwire.FailFinalIncorrectHtlcAmount: + // TODO(joostjager): Take into account that second last hop may + // have deliberately handed out an htlc with a too low value. In + // that case we should continue routing. + return true, channeldb.FailureReasonError + + // Similarly, if the HTLC expiry that we extended to the final hop + // expires too soon, then will fail the payment. + // + // TODO(roasbeef): can happen to to race condition, try again with + // recent block height + case *lnwire.FailFinalExpiryTooSoon: + // TODO(joostjager): Take into account that any hop may have + // delayed. Ideally we should continue routing. Knowing the + // delaying node at this point would help. + return true, channeldb.FailureReasonIncorrectPaymentDetails + + // If we erroneously attempted to cross a chain border, then we'll + // cancel the payment. + case *lnwire.FailInvalidRealm: + return true, channeldb.FailureReasonError + + // If we get a notice that the expiry was too soon for an intermediate + // node, then we'll prune out the node that sent us this error, as it + // doesn't now what the correct block height is. + case *lnwire.FailExpiryTooSoon: + i.nodeFailure = &failureVertex + return false, 0 + + // If we hit an instance of onion payload corruption or an invalid + // version, then we'll exit early as this shouldn't happen in the + // typical case. + // + // TODO(joostjager): Take into account that the previous hop may have + // tampered with the onion. Routing should continue using other paths. + case *lnwire.FailInvalidOnionVersion: + return true, channeldb.FailureReasonError + case *lnwire.FailInvalidOnionHmac: + return true, channeldb.FailureReasonError + case *lnwire.FailInvalidOnionKey: + return true, channeldb.FailureReasonError + + // If we get a failure due to violating the minimum amount, we'll apply + // the new minimum amount and retry routing. + case *lnwire.FailAmountBelowMinimum: + i.policyFailure = &failedPair + i.pairResults[failedPair] = 0 + return false, 0 + + // If we get a failure due to a fee, we'll apply the new fee update, and + // retry our attempt using the newly updated fees. + case *lnwire.FailFeeInsufficient: + i.policyFailure = &failedPair + i.pairResults[failedPair] = 0 + return false, 0 + + // If we get the failure for an intermediate node that disagrees with + // our time lock values, then we'll apply the new delta value and try it + // once more. + case *lnwire.FailIncorrectCltvExpiry: + i.policyFailure = &failedPair + i.pairResults[failedPair] = 0 + return false, 0 + + // The outgoing channel that this node was meant to forward one is + // currently disabled, so we'll apply the update and continue. + case *lnwire.FailChannelDisabled: + i.pairResults[failedPair] = 0 + return false, 0 + + // It's likely that the outgoing channel didn't have sufficient + // capacity, so we'll prune this edge for now, and continue onwards with + // our path finding. + case *lnwire.FailTemporaryChannelFailure: + i.pairResults[failedPair] = failedAmt + return false, 0 + + // If the send fail due to a node not having the required features, then + // we'll note this error and continue. + case *lnwire.FailRequiredNodeFeatureMissing: + i.nodeFailure = &failureVertex + return false, 0 + + // If the send fail due to a node not having the required features, then + // we'll note this error and continue. + case *lnwire.FailRequiredChannelFeatureMissing: + i.nodeFailure = &failureVertex + return false, 0 + + // If the next hop in the route wasn't known or offline, we'll only the + // channel which we attempted to route over. This is conservative, and + // it can handle faulty channels between nodes properly. Additionally, + // this guards against routing nodes returning errors in order to + // attempt to black list another node. + case *lnwire.FailUnknownNextPeer: + i.pairResults[failedPair] = 0 + return false, 0 + + // If the node wasn't able to forward for which ever reason, then we'll + // note this and continue with the routes. + case *lnwire.FailTemporaryNodeFailure: + i.nodeFailure = &failureVertex + return false, 0 + + case *lnwire.FailPermanentNodeFailure: + i.nodeFailure = &failureVertex + return false, 0 + + // If we crafted a route that contains a too long time lock for an + // intermediate node, we'll prune the node. As there currently is no way + // of knowing that node's maximum acceptable cltv, we cannot take this + // constraint into account during routing. + // + // TODO(joostjager): Record the rejected cltv and use that as a hint + // during future path finding through that node. + case *lnwire.FailExpiryTooFar: + i.nodeFailure = &failureVertex + return false, 0 + + // If we get a permanent channel or node failure, then we'll prune the + // channel in both directions and continue with the rest of the routes. + case *lnwire.FailPermanentChannelFailure: + i.pairResults[failedPair] = 0 + i.pairResults[failedPair.Reverse()] = 0 + return false, 0 + + // Any other failure or an empty failure will get the node pruned. + default: + i.nodeFailure = &failureVertex + return false, 0 + } +} + +// getFailedPair tries to locate the failing pair given a route and the pubkey +// of the node that sent the failure. It will assume that the failure is +// associated with the outgoing channel set of the failing node. As a second +// result, it returns the amount sent between the pair. +func getFailedPair(route *route.Route, failureSource int) (DirectedNodePair, + lnwire.MilliSatoshi) { + + // Determine if we have a failure from the final hop. If it is, we + // assume that the failing channel is the incoming channel. + // + // TODO(joostjager): In this case, certain types of failures are not + // expected. For example FailUnknownNextPeer. This could be a reason to + // prune the node? + if failureSource == len(route.Hops) { + failureSource-- + } + + // As this failure indicates that the target channel was unable to carry + // this HTLC (for w/e reason), we'll return the _outgoing_ channel that + // the source of the failure was meant to pass the HTLC along to. + if failureSource == 0 { + return NewDirectedNodePair( + route.SourcePubKey, + route.Hops[0].PubKeyBytes, + ), route.TotalAmount + } + + return NewDirectedNodePair( + route.Hops[failureSource-1].PubKeyBytes, + route.Hops[failureSource].PubKeyBytes, + ), route.Hops[failureSource-1].AmtToForward +} diff --git a/routing/result_interpretation_test.go b/routing/result_interpretation_test.go new file mode 100644 index 00000000..b7b29577 --- /dev/null +++ b/routing/result_interpretation_test.go @@ -0,0 +1,101 @@ +package routing + +import ( + "reflect" + "testing" + + "github.com/lightningnetwork/lnd/lnwire" + + "github.com/lightningnetwork/lnd/routing/route" +) + +var ( + hops = []route.Vertex{ + {1, 0}, {1, 1}, {1, 2}, {1, 3}, {1, 4}, + } + + routeTwoHop = route.Route{ + SourcePubKey: hops[0], + TotalAmount: 100, + Hops: []*route.Hop{ + {PubKeyBytes: hops[1], AmtToForward: 99}, + {PubKeyBytes: hops[2], AmtToForward: 97}, + }, + } + + routeFourHop = route.Route{ + SourcePubKey: hops[0], + TotalAmount: 100, + Hops: []*route.Hop{ + {PubKeyBytes: hops[1], AmtToForward: 99}, + {PubKeyBytes: hops[2], AmtToForward: 97}, + {PubKeyBytes: hops[3], AmtToForward: 94}, + {PubKeyBytes: hops[4], AmtToForward: 90}, + }, + } +) + +type resultTestCase struct { + name string + route *route.Route + failureSrcIdx int + failure lnwire.FailureMessage + + expectedResult *interpretedResult +} + +var resultTestCases = []resultTestCase{ + // Tests that a temporary channel failure result is properly + // interpreted. + { + name: "fail", + route: &routeTwoHop, + failureSrcIdx: 1, + failure: lnwire.NewTemporaryChannelFailure(nil), + + expectedResult: &interpretedResult{ + pairResults: map[DirectedNodePair]lnwire.MilliSatoshi{ + NewDirectedNodePair(hops[1], hops[2]): 99, + }, + }, + }, + + // Tests that a expiry too soon failure result is properly interpreted. + { + name: "fail expiry too soon", + route: &routeFourHop, + failureSrcIdx: 3, + failure: lnwire.NewExpiryTooSoon(lnwire.ChannelUpdate{}), + + expectedResult: &interpretedResult{ + nodeFailure: &hops[3], + }, + }, +} + +// TestResultInterpretation executes a list of test cases that test the result +// interpretation logic. +func TestResultInterpretation(t *testing.T) { + emptyResults := make(map[DirectedNodePair]lnwire.MilliSatoshi) + + for _, testCase := range resultTestCases { + t.Run(testCase.name, func(t *testing.T) { + i := interpretResult( + testCase.route, &testCase.failureSrcIdx, + testCase.failure, + ) + + expected := testCase.expectedResult + + // Replace nil pairResults with empty map to satisfy + // DeepEqual. + if expected.pairResults == nil { + expected.pairResults = emptyResults + } + + if !reflect.DeepEqual(i, expected) { + t.Fatal("unexpected result") + } + }) + } +} From c39d7a29cde031b44a82672e01a03ba287f34bba Mon Sep 17 00:00:00 2001 From: Joost Jager Date: Tue, 18 Jun 2019 19:42:51 +0200 Subject: [PATCH 3/7] routing/test: do not test local channel mission control This commit updates existing tests to not rely on mission control for pruning of local channels. Information about local channels should already be up to date before path finding starts. If not, the problem should be fixed where bandwidth hints are set up. --- routing/router_test.go | 74 ++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 39 deletions(-) diff --git a/routing/router_test.go b/routing/router_test.go index e27914e9..a1fa141f 100644 --- a/routing/router_test.go +++ b/routing/router_test.go @@ -274,7 +274,7 @@ func TestSendPaymentRouteFailureFallback(t *testing.T) { var payHash [32]byte paymentAmt := lnwire.NewMSatFromSatoshis(1000) payment := LightningPayment{ - Target: ctx.aliases["luoji"], + Target: ctx.aliases["sophon"], Amount: paymentAmt, FeeLimit: noFeeLimit, PaymentHash: payHash, @@ -284,16 +284,16 @@ func TestSendPaymentRouteFailureFallback(t *testing.T) { copy(preImage[:], bytes.Repeat([]byte{9}, 32)) // We'll modify the SendToSwitch method that's been set within the - // router's configuration to ignore the path that has luo ji as the + // router's configuration to ignore the path that has son goku as the // first hop. This should force the router to instead take the - // available two hop path (through satoshi). + // the more costly path (through pham nuwen). ctx.router.cfg.Payer.(*mockPaymentAttemptDispatcher).setPaymentResult( func(firstHop lnwire.ShortChannelID) ([32]byte, error) { - roasbeefLuoji := lnwire.NewShortChanIDFromInt(689530843) - if firstHop == roasbeefLuoji { + roasbeefSongoku := lnwire.NewShortChanIDFromInt(12345) + if firstHop == roasbeefSongoku { return [32]byte{}, &htlcswitch.ForwardingError{ - FailureSourceIdx: 0, + FailureSourceIdx: 1, // TODO(roasbeef): temp node failure should be? FailureMessage: &lnwire.FailTemporaryChannelFailure{}, } @@ -302,7 +302,7 @@ func TestSendPaymentRouteFailureFallback(t *testing.T) { return preImage, nil }) - // Send off the payment request to the router, route through satoshi + // Send off the payment request to the router, route through pham nuwen // should've been selected as a fall back and succeeded correctly. paymentPreImage, route, err := ctx.router.SendPayment(&payment) if err != nil { @@ -321,10 +321,10 @@ func TestSendPaymentRouteFailureFallback(t *testing.T) { preImage[:], paymentPreImage[:]) } - // The route should have satoshi as the first hop. - if route.Hops[0].PubKeyBytes != ctx.aliases["satoshi"] { + // The route should have pham nuwen as the first hop. + if route.Hops[0].PubKeyBytes != ctx.aliases["phamnuwen"] { - t.Fatalf("route should go through satoshi as first hop, "+ + t.Fatalf("route should go through phamnuwen as first hop, "+ "instead passes through: %v", getAliasFromPubKey(route.Hops[0].PubKeyBytes, ctx.aliases)) @@ -743,7 +743,7 @@ func TestSendPaymentErrorPathPruning(t *testing.T) { var payHash [32]byte paymentAmt := lnwire.NewMSatFromSatoshis(1000) payment := LightningPayment{ - Target: ctx.aliases["luoji"], + Target: ctx.aliases["sophon"], Amount: paymentAmt, FeeLimit: noFeeLimit, PaymentHash: payHash, @@ -752,32 +752,29 @@ func TestSendPaymentErrorPathPruning(t *testing.T) { var preImage [32]byte copy(preImage[:], bytes.Repeat([]byte{9}, 32)) - roasbeefLuoji := lnwire.NewShortChanIDFromInt(689530843) + roasbeefSongoku := lnwire.NewShortChanIDFromInt(12345) + roasbeefPhanNuwen := lnwire.NewShortChanIDFromInt(999991) // First, we'll modify the SendToSwitch method to return an error - // indicating that the channel from roasbeef to luoji is not operable + // indicating that the channel from roasbeef to son goku is not operable // with an UnknownNextPeer. - // - // TODO(roasbeef): filtering should be intelligent enough so just not - // go through satoshi at all at this point. ctx.router.cfg.Payer.(*mockPaymentAttemptDispatcher).setPaymentResult( func(firstHop lnwire.ShortChannelID) ([32]byte, error) { - if firstHop == roasbeefLuoji { + if firstHop == roasbeefSongoku { // We'll first simulate an error from the first - // outgoing link to simulate the channel from luo ji to - // roasbeef not having enough capacity. + // hop to simulate the channel from songoku to + // sophon not having enough capacity. return [32]byte{}, &htlcswitch.ForwardingError{ - FailureSourceIdx: 0, + FailureSourceIdx: 1, FailureMessage: &lnwire.FailTemporaryChannelFailure{}, } } - // Next, we'll create an error from satoshi to indicate - // that the luoji node is not longer online, which should - // prune out the rest of the routes. - roasbeefSatoshi := lnwire.NewShortChanIDFromInt(2340213491) - if firstHop == roasbeefSatoshi { + // Next, we'll create an error from phan nuwen to + // indicate that the sophon node is not longer online, + // which should prune out the rest of the routes. + if firstHop == roasbeefPhanNuwen { return [32]byte{}, &htlcswitch.ForwardingError{ FailureSourceIdx: 1, FailureMessage: &lnwire.FailUnknownNextPeer{}, @@ -804,15 +801,14 @@ func TestSendPaymentErrorPathPruning(t *testing.T) { ctx.router.cfg.MissionControl.(*MissionControl).ResetHistory() - // Next, we'll modify the SendToSwitch method to indicate that luo ji - // wasn't originally online. This should also halt the send all - // together as all paths contain luoji and he can't be reached. + // Next, we'll modify the SendToSwitch method to indicate that the + // connection between songoku and isn't up. ctx.router.cfg.Payer.(*mockPaymentAttemptDispatcher).setPaymentResult( func(firstHop lnwire.ShortChannelID) ([32]byte, error) { - if firstHop == roasbeefLuoji { + if firstHop == roasbeefSongoku { return [32]byte{}, &htlcswitch.ForwardingError{ - FailureSourceIdx: 0, + FailureSourceIdx: 1, FailureMessage: &lnwire.FailUnknownNextPeer{}, } } @@ -821,14 +817,14 @@ func TestSendPaymentErrorPathPruning(t *testing.T) { }) // This shouldn't return an error, as we'll make a payment attempt via - // the satoshi channel based on the assumption that there might be an - // intermittent issue with the roasbeef <-> lioji channel. + // the pham nuwen channel based on the assumption that there might be an + // intermittent issue with the songoku <-> sophon channel. paymentPreImage, rt, err := ctx.router.SendPayment(&payment) if err != nil { t.Fatalf("unable send payment: %v", err) } - // This path should go: roasbeef -> satoshi -> luoji + // This path should go: roasbeef -> pham nuwen -> sophon if len(rt.Hops) != 2 { t.Fatalf("incorrect route length: expected %v got %v", 2, len(rt.Hops)) @@ -837,9 +833,9 @@ func TestSendPaymentErrorPathPruning(t *testing.T) { t.Fatalf("incorrect preimage used: expected %x got %x", preImage[:], paymentPreImage[:]) } - if rt.Hops[0].PubKeyBytes != ctx.aliases["satoshi"] { + if rt.Hops[0].PubKeyBytes != ctx.aliases["phamnuwen"] { - t.Fatalf("route should go through satoshi as first hop, "+ + t.Fatalf("route should go through phamnuwen as first hop, "+ "instead passes through: %v", getAliasFromPubKey(rt.Hops[0].PubKeyBytes, ctx.aliases)) @@ -853,12 +849,12 @@ func TestSendPaymentErrorPathPruning(t *testing.T) { ctx.router.cfg.Payer.(*mockPaymentAttemptDispatcher).setPaymentResult( func(firstHop lnwire.ShortChannelID) ([32]byte, error) { - if firstHop == roasbeefLuoji { + if firstHop == roasbeefSongoku { // We'll first simulate an error from the first // outgoing link to simulate the channel from luo ji to // roasbeef not having enough capacity. return [32]byte{}, &htlcswitch.ForwardingError{ - FailureSourceIdx: 0, + FailureSourceIdx: 1, FailureMessage: &lnwire.FailTemporaryChannelFailure{}, } } @@ -886,9 +882,9 @@ func TestSendPaymentErrorPathPruning(t *testing.T) { } // The route should have satoshi as the first hop. - if rt.Hops[0].PubKeyBytes != ctx.aliases["satoshi"] { + if rt.Hops[0].PubKeyBytes != ctx.aliases["phamnuwen"] { - t.Fatalf("route should go through satoshi as first hop, "+ + t.Fatalf("route should go through phamnuwen as first hop, "+ "instead passes through: %v", getAliasFromPubKey(rt.Hops[0].PubKeyBytes, ctx.aliases)) From e135cf7326cf572505c83c06744e2bfa9c96fc61 Mon Sep 17 00:00:00 2001 From: Joost Jager Date: Mon, 5 Aug 2019 14:07:25 +0200 Subject: [PATCH 4/7] routing: penalize all node pairs for unknown outcomes When an undecryptable failure comes back for a payment attempt, we previously only penalized our own outgoing connection. However, any node could have caused this failure. It is therefore better to penalize all node connections along the route. Then at least we know for sure that we will hit the responsible node. --- routing/result_interpretation.go | 86 ++++++++++++++++++++++++++++---- 1 file changed, 76 insertions(+), 10 deletions(-) diff --git a/routing/result_interpretation.go b/routing/result_interpretation.go index 5eb35563..44841096 100644 --- a/routing/result_interpretation.go +++ b/routing/result_interpretation.go @@ -7,6 +7,11 @@ import ( "github.com/lightningnetwork/lnd/routing/route" ) +// Instantiate variables to allow taking a reference from the failure reason. +var ( + reasonError = channeldb.FailureReasonError +) + // interpretedResult contains the result of the interpretation of a payment // outcome. type interpretedResult struct { @@ -52,21 +57,14 @@ func (i *interpretedResult) processFail( rt *route.Route, errSourceIdx *int, failure lnwire.FailureMessage) (bool, channeldb.FailureReason) { - var failureSourceIdxInt int - if errSourceIdx == nil { - // If the failure message could not be decrypted, attribute the - // failure to our own outgoing channel. - // - // TODO(joostager): Penalize all channels in the route. - failureSourceIdxInt = 0 - failure = lnwire.NewTemporaryChannelFailure(nil) - } else { - failureSourceIdxInt = *errSourceIdx + i.processPaymentOutcomeUnknown(rt) + return false, 0 } var failureVertex route.Vertex + failureSourceIdxInt := *errSourceIdx if failureSourceIdxInt > 0 { failureVertex = rt.Hops[failureSourceIdxInt-1].PubKeyBytes } else { @@ -241,6 +239,74 @@ func (i *interpretedResult) processFail( } } +// processPaymentOutcomeUnknown processes a payment outcome for which no failure +// message or source is available. +func (i *interpretedResult) processPaymentOutcomeUnknown(route *route.Route) { + n := len(route.Hops) + + // If this is a direct payment, the destination must be at fault. + if n == 1 { + i.failNode(route, n) + i.finalFailureReason = &reasonError + return + } + + // Otherwise penalize all channels in the route to make sure the + // responsible node is at least hit too. We even penalize the connection + // to our own peer, because that peer could also be responsible. + i.failPairRange(route, 0, n-1) +} + +// failNode marks the node indicated by idx in the route as failed. This +// function intentionally panics when the self node is failed. +func (i *interpretedResult) failNode(rt *route.Route, idx int) { + i.nodeFailure = &rt.Hops[idx-1].PubKeyBytes +} + +// failPairRange marks the node pairs from node fromIdx to node toIdx as failed. +func (i *interpretedResult) failPairRange( + rt *route.Route, fromIdx, toIdx int) { + + for idx := fromIdx; idx <= toIdx; idx++ { + i.failPair(rt, idx) + } +} + +// failPair marks a pair as failed in both directions. +func (i *interpretedResult) failPair( + rt *route.Route, idx int) { + + pair, _ := getPair(rt, idx) + + // Report pair in both directions without a minimum penalization amount. + i.pairResults[pair] = 0 + i.pairResults[pair.Reverse()] = 0 +} + +// getPair returns a node pair from the route and the amount passed between that +// pair. +func getPair(rt *route.Route, channelIdx int) (DirectedNodePair, + lnwire.MilliSatoshi) { + + nodeTo := rt.Hops[channelIdx].PubKeyBytes + var ( + nodeFrom route.Vertex + amt lnwire.MilliSatoshi + ) + + if channelIdx == 0 { + nodeFrom = rt.SourcePubKey + amt = rt.TotalAmount + } else { + nodeFrom = rt.Hops[channelIdx-1].PubKeyBytes + amt = rt.Hops[channelIdx-1].AmtToForward + } + + pair := NewDirectedNodePair(nodeFrom, nodeTo) + + return pair, amt +} + // getFailedPair tries to locate the failing pair given a route and the pubkey // of the node that sent the failure. It will assume that the failure is // associated with the outgoing channel set of the failing node. As a second From 67e40d44335aa19efa17c98bab825f628d7b3361 Mon Sep 17 00:00:00 2001 From: Joost Jager Date: Fri, 16 Aug 2019 19:46:51 +0200 Subject: [PATCH 5/7] htlcswitch: always assume an onion error for malformed htlc failures Previously a temporary channel failure was returning for unexpected malformed htlc failures. This is not what we want to communicate to the sender, because the sender may apply a penalty to us only. Returning the temporary channel failure is especially problematic if we ourselves are the sender and the malformed htlc failure comes from our direct peer. When interpretating the failure, we aren't able to distinguish anymore between our channel not having enough balance and our peer sending an unexpected failure back. --- htlcswitch/link.go | 17 +++++++++++++++-- htlcswitch/switch_test.go | 4 ++-- lntest/itest/lnd_test.go | 2 +- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/htlcswitch/link.go b/htlcswitch/link.go index 9e940529..f50cc580 100644 --- a/htlcswitch/link.go +++ b/htlcswitch/link.go @@ -1637,8 +1637,21 @@ func (l *channelLink) handleUpstreamMsg(msg lnwire.Message) { OnionSHA256: msg.ShaOnionBlob, } default: - log.Errorf("Unknown failure code: %v", msg.FailureCode) - failure = &lnwire.FailTemporaryChannelFailure{} + log.Warnf("Unexpected failure code received in "+ + "UpdateFailMailformedHTLC: %v", msg.FailureCode) + + // We don't just pass back the error we received from + // our successor. Otherwise we might report a failure + // that penalizes us more than needed. If the onion that + // we forwarded was correct, the node should have been + // able to send back its own failure. The node did not + // send back its own failure, so we assume there was a + // problem with the onion and report that back. We reuse + // the invalid onion key failure because there is no + // specific error for this case. + failure = &lnwire.FailInvalidOnionKey{ + OnionSHA256: msg.ShaOnionBlob, + } } // With the error parsed, we'll convert the into it's opaque diff --git a/htlcswitch/switch_test.go b/htlcswitch/switch_test.go index 6e8a1698..59f6fae9 100644 --- a/htlcswitch/switch_test.go +++ b/htlcswitch/switch_test.go @@ -2097,8 +2097,8 @@ func TestUpdateFailMalformedHTLCErrorConversion(t *testing.T) { fwdingErr := err.(*ForwardingError) failureMsg := fwdingErr.FailureMessage - if _, ok := failureMsg.(*lnwire.FailTemporaryChannelFailure); !ok { - t.Fatalf("expected temp chan failure instead got: %v", + if _, ok := failureMsg.(*lnwire.FailInvalidOnionKey); !ok { + t.Fatalf("expected onion failure instead got: %v", fwdingErr.FailureMessage) } } diff --git a/lntest/itest/lnd_test.go b/lntest/itest/lnd_test.go index ea0996ab..751edf02 100644 --- a/lntest/itest/lnd_test.go +++ b/lntest/itest/lnd_test.go @@ -3570,7 +3570,7 @@ func testSphinxReplayPersistence(net *lntest.NetworkHarness, t *harnessTest) { // Construct the response we expect after sending a duplicate packet // that fails due to sphinx replay detection. - replayErr := "TemporaryChannelFailure" + replayErr := "InvalidOnionKey" if !strings.Contains(resp.PaymentError, replayErr) { t.Fatalf("received payment error: %v, expected %v", resp.PaymentError, replayErr) From e7a457f1ce4480fe0fe3781962d67d72632f14db Mon Sep 17 00:00:00 2001 From: Joost Jager Date: Sat, 17 Aug 2019 09:58:36 +0200 Subject: [PATCH 6/7] routing: query bandwidth hints before each payment attempt Previously the bandwidth hints were only queried once per payment. This did not allow for concurrent payments changing channel balances. --- routing/payment_session.go | 15 +++++++++++++-- routing/payment_session_source.go | 22 +++++++++------------- routing/payment_session_test.go | 5 +++++ 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/routing/payment_session.go b/routing/payment_session.go index fe10a501..3894bc60 100644 --- a/routing/payment_session.go +++ b/routing/payment_session.go @@ -33,7 +33,7 @@ type PaymentSession interface { type paymentSession struct { additionalEdges map[route.Vertex][]*channeldb.ChannelEdgePolicy - bandwidthHints map[uint64]lnwire.MilliSatoshi + getBandwidthHints func() (map[uint64]lnwire.MilliSatoshi, error) sessionSource *SessionSource @@ -97,11 +97,22 @@ func (p *paymentSession) RequestRoute(payment *LightningPayment, CltvLimit: cltvLimit, } + // We'll also obtain a set of bandwidthHints from the lower layer for + // each of our outbound channels. This will allow the path finding to + // skip any links that aren't active or just don't have enough bandwidth + // to carry the payment. New bandwidth hints are queried for every new + // path finding attempt, because concurrent payments may change + // balances. + bandwidthHints, err := p.getBandwidthHints() + if err != nil { + return nil, err + } + path, err := p.pathFinder( &graphParams{ graph: ss.Graph, additionalEdges: p.additionalEdges, - bandwidthHints: p.bandwidthHints, + bandwidthHints: bandwidthHints, }, restrictions, &ss.PathFindingConfig, ss.SelfNode.PubKeyBytes, payment.Target, diff --git a/routing/payment_session_source.go b/routing/payment_session_source.go index 1b8fea5f..b5175cf7 100644 --- a/routing/payment_session_source.go +++ b/routing/payment_session_source.go @@ -97,26 +97,22 @@ func (m *SessionSource) NewPaymentSession(routeHints [][]zpay32.HopHint, } } - // We'll also obtain a set of bandwidthHints from the lower layer for - // each of our outbound channels. This will allow the path finding to - // skip any links that aren't active or just don't have enough - // bandwidth to carry the payment. sourceNode, err := m.Graph.SourceNode() if err != nil { return nil, err } - bandwidthHints, err := generateBandwidthHints( - sourceNode, m.QueryBandwidth, - ) - if err != nil { - return nil, err + + getBandwidthHints := func() (map[uint64]lnwire.MilliSatoshi, + error) { + + return generateBandwidthHints(sourceNode, m.QueryBandwidth) } return &paymentSession{ - additionalEdges: edges, - bandwidthHints: bandwidthHints, - sessionSource: m, - pathFinder: findPath, + additionalEdges: edges, + getBandwidthHints: getBandwidthHints, + sessionSource: m, + pathFinder: findPath, }, nil } diff --git a/routing/payment_session_test.go b/routing/payment_session_test.go index 627f4f85..5d31e902 100644 --- a/routing/payment_session_test.go +++ b/routing/payment_session_test.go @@ -41,6 +41,11 @@ func TestRequestRoute(t *testing.T) { } session := &paymentSession{ + getBandwidthHints: func() (map[uint64]lnwire.MilliSatoshi, + error) { + + return nil, nil + }, sessionSource: sessionSource, pathFinder: findPath, } From d9ec158412284e907fb97a52dbafd3d4f88e8ec9 Mon Sep 17 00:00:00 2001 From: Joost Jager Date: Tue, 25 Jun 2019 10:51:55 +0200 Subject: [PATCH 7/7] routing: stricter payment result interpretation This commit overhauls the interpretation of failed payments. It changes the interpretation rules so that we always apply the strongest possible set of penalties, without making assumptions that would hurt good nodes. Main changes are: - Apply different rule sets for intermediate and final nodes. Both types of nodes have different sets of failures that we expect. Penalize nodes that send unexpected failure messages. - Distinguish between direct payments and multi-hop payments. For direct payments, we can infer more about the performance of our peer because we trust ourselves. - In many cases it is impossible for the sender to determine which of the two nodes in a pair is responsible for the failure. In this situation, we now penalize bidirectionally. This does not hurt the good node of the pair, because only its connection to a bad node is penalized. - Previously we always penalized the outgoing connection of the reporting node. This is incorrect for policy related failures. For policy related failures, it could also be that the reporting node received a wrongly crafted htlc from its predecessor. By penalizing the incoming channel, we surely hit the responsible node. - FailExpiryTooSoon is a failure that could have been caused by any node up to the reporting node by delaying forwarding of the htlc. We don't know which node is responsible, therefore we now penalize all node pairs in the route. --- routing/result_interpretation.go | 439 ++++++++++++++------------ routing/result_interpretation_test.go | 49 ++- 2 files changed, 280 insertions(+), 208 deletions(-) diff --git a/routing/result_interpretation.go b/routing/result_interpretation.go index 44841096..00703c28 100644 --- a/routing/result_interpretation.go +++ b/routing/result_interpretation.go @@ -2,14 +2,14 @@ package routing import ( "github.com/lightningnetwork/lnd/channeldb" - "github.com/lightningnetwork/lnd/lnwire" "github.com/lightningnetwork/lnd/routing/route" ) // Instantiate variables to allow taking a reference from the failure reason. var ( - reasonError = channeldb.FailureReasonError + reasonError = channeldb.FailureReasonError + reasonIncorrectDetails = channeldb.FailureReasonIncorrectPaymentDetails ) // interpretedResult contains the result of the interpretation of a payment @@ -44,10 +44,7 @@ func interpretResult(rt *route.Route, failureSrcIdx *int, pairResults: make(map[DirectedNodePair]lnwire.MilliSatoshi), } - final, reason := i.processFail(rt, failureSrcIdx, failure) - if final { - i.finalFailureReason = &reason - } + i.processFail(rt, failureSrcIdx, failure) return i } @@ -55,187 +52,240 @@ func interpretResult(rt *route.Route, failureSrcIdx *int, // processFail processes a failed payment attempt. func (i *interpretedResult) processFail( rt *route.Route, errSourceIdx *int, - failure lnwire.FailureMessage) (bool, channeldb.FailureReason) { + failure lnwire.FailureMessage) { if errSourceIdx == nil { i.processPaymentOutcomeUnknown(rt) - return false, 0 + return } - var failureVertex route.Vertex + switch *errSourceIdx { - failureSourceIdxInt := *errSourceIdx - if failureSourceIdxInt > 0 { - failureVertex = rt.Hops[failureSourceIdxInt-1].PubKeyBytes - } else { - failureVertex = rt.SourcePubKey + // We are the source of the failure. + case 0: + i.processPaymentOutcomeSelf(rt, failure) + + // A failure from the final hop was received. + case len(rt.Hops): + i.processPaymentOutcomeFinal( + rt, failure, + ) + + // An intermediate hop failed. Interpret the outcome, update reputation + // and try again. + default: + i.processPaymentOutcomeIntermediate( + rt, *errSourceIdx, failure, + ) } - log.Tracef("Node %x (index %v) reported failure when sending htlc", - failureVertex, errSourceIdx) +} - // Always determine chan id ourselves, because a channel update with id - // may not be available. - failedPair, failedAmt := getFailedPair( - rt, failureSourceIdxInt, - ) +// processPaymentOutcomeSelf handles failures sent by ourselves. +func (i *interpretedResult) processPaymentOutcomeSelf( + rt *route.Route, failure lnwire.FailureMessage) { switch failure.(type) { - // If the end destination didn't know the payment hash or we sent the - // wrong payment amount to the destination, then we'll terminate - // immediately. - case *lnwire.FailIncorrectDetails: - // TODO(joostjager): Check onionErr.Amount() whether it matches - // what we expect. (Will it ever not match, because if not - // final_incorrect_htlc_amount would be returned?) + // We receive a malformed htlc failure from our peer. We trust ourselves + // to send the correct htlc, so our peer must be at fault. + case *lnwire.FailInvalidOnionVersion, + *lnwire.FailInvalidOnionHmac, + *lnwire.FailInvalidOnionKey: - return true, channeldb.FailureReasonIncorrectPaymentDetails + i.failNode(rt, 1) - // If we sent the wrong amount to the destination, then we'll exit - // early. - case *lnwire.FailIncorrectPaymentAmount: - return true, channeldb.FailureReasonIncorrectPaymentDetails + // If this was a payment to a direct peer, we can stop trying. + if len(rt.Hops) == 1 { + i.finalFailureReason = &reasonError + } - // If the time-lock that was extended to the final node was incorrect, - // then we can't proceed. - case *lnwire.FailFinalIncorrectCltvExpiry: - // TODO(joostjager): Take into account that second last hop may - // have deliberately handed out an htlc that expires too soon. - // In that case we should continue routing. - return true, channeldb.FailureReasonError - - // If we crafted an invalid onion payload for the final node, then we'll - // exit early. - case *lnwire.FailFinalIncorrectHtlcAmount: - // TODO(joostjager): Take into account that second last hop may - // have deliberately handed out an htlc with a too low value. In - // that case we should continue routing. - return true, channeldb.FailureReasonError - - // Similarly, if the HTLC expiry that we extended to the final hop - // expires too soon, then will fail the payment. - // - // TODO(roasbeef): can happen to to race condition, try again with - // recent block height - case *lnwire.FailFinalExpiryTooSoon: - // TODO(joostjager): Take into account that any hop may have - // delayed. Ideally we should continue routing. Knowing the - // delaying node at this point would help. - return true, channeldb.FailureReasonIncorrectPaymentDetails - - // If we erroneously attempted to cross a chain border, then we'll - // cancel the payment. - case *lnwire.FailInvalidRealm: - return true, channeldb.FailureReasonError - - // If we get a notice that the expiry was too soon for an intermediate - // node, then we'll prune out the node that sent us this error, as it - // doesn't now what the correct block height is. - case *lnwire.FailExpiryTooSoon: - i.nodeFailure = &failureVertex - return false, 0 - - // If we hit an instance of onion payload corruption or an invalid - // version, then we'll exit early as this shouldn't happen in the - // typical case. - // - // TODO(joostjager): Take into account that the previous hop may have - // tampered with the onion. Routing should continue using other paths. - case *lnwire.FailInvalidOnionVersion: - return true, channeldb.FailureReasonError - case *lnwire.FailInvalidOnionHmac: - return true, channeldb.FailureReasonError - case *lnwire.FailInvalidOnionKey: - return true, channeldb.FailureReasonError - - // If we get a failure due to violating the minimum amount, we'll apply - // the new minimum amount and retry routing. - case *lnwire.FailAmountBelowMinimum: - i.policyFailure = &failedPair - i.pairResults[failedPair] = 0 - return false, 0 - - // If we get a failure due to a fee, we'll apply the new fee update, and - // retry our attempt using the newly updated fees. - case *lnwire.FailFeeInsufficient: - i.policyFailure = &failedPair - i.pairResults[failedPair] = 0 - return false, 0 - - // If we get the failure for an intermediate node that disagrees with - // our time lock values, then we'll apply the new delta value and try it - // once more. - case *lnwire.FailIncorrectCltvExpiry: - i.policyFailure = &failedPair - i.pairResults[failedPair] = 0 - return false, 0 - - // The outgoing channel that this node was meant to forward one is - // currently disabled, so we'll apply the update and continue. - case *lnwire.FailChannelDisabled: - i.pairResults[failedPair] = 0 - return false, 0 - - // It's likely that the outgoing channel didn't have sufficient - // capacity, so we'll prune this edge for now, and continue onwards with - // our path finding. - case *lnwire.FailTemporaryChannelFailure: - i.pairResults[failedPair] = failedAmt - return false, 0 - - // If the send fail due to a node not having the required features, then - // we'll note this error and continue. - case *lnwire.FailRequiredNodeFeatureMissing: - i.nodeFailure = &failureVertex - return false, 0 - - // If the send fail due to a node not having the required features, then - // we'll note this error and continue. - case *lnwire.FailRequiredChannelFeatureMissing: - i.nodeFailure = &failureVertex - return false, 0 - - // If the next hop in the route wasn't known or offline, we'll only the - // channel which we attempted to route over. This is conservative, and - // it can handle faulty channels between nodes properly. Additionally, - // this guards against routing nodes returning errors in order to - // attempt to black list another node. - case *lnwire.FailUnknownNextPeer: - i.pairResults[failedPair] = 0 - return false, 0 - - // If the node wasn't able to forward for which ever reason, then we'll - // note this and continue with the routes. - case *lnwire.FailTemporaryNodeFailure: - i.nodeFailure = &failureVertex - return false, 0 - - case *lnwire.FailPermanentNodeFailure: - i.nodeFailure = &failureVertex - return false, 0 - - // If we crafted a route that contains a too long time lock for an - // intermediate node, we'll prune the node. As there currently is no way - // of knowing that node's maximum acceptable cltv, we cannot take this - // constraint into account during routing. - // - // TODO(joostjager): Record the rejected cltv and use that as a hint - // during future path finding through that node. - case *lnwire.FailExpiryTooFar: - i.nodeFailure = &failureVertex - return false, 0 - - // If we get a permanent channel or node failure, then we'll prune the - // channel in both directions and continue with the rest of the routes. - case *lnwire.FailPermanentChannelFailure: - i.pairResults[failedPair] = 0 - i.pairResults[failedPair.Reverse()] = 0 - return false, 0 - - // Any other failure or an empty failure will get the node pruned. + // Any other failure originating from ourselves should be temporary and + // caused by changing conditions between path finding and execution of + // the payment. We just retry and trust that the information locally + // available in the link has been updated. default: - i.nodeFailure = &failureVertex - return false, 0 + log.Warnf("Routing failure for local channel %v occurred", + rt.Hops[0].ChannelID) + } +} + +// processPaymentOutcomeFinal handles failures sent by the final hop. +func (i *interpretedResult) processPaymentOutcomeFinal( + route *route.Route, failure lnwire.FailureMessage) { + + n := len(route.Hops) + + // If a failure from the final node is received, we will fail the + // payment in almost all cases. Only when the penultimate node sends an + // incorrect htlc, we want to retry via another route. Invalid onion + // failures are not expected, because the final node wouldn't be able to + // encrypt that failure. + switch failure.(type) { + + // Expiry or amount of the HTLC doesn't match the onion, try another + // route. + case *lnwire.FailFinalIncorrectCltvExpiry, + *lnwire.FailFinalIncorrectHtlcAmount: + + // We trust ourselves. If this is a direct payment, we penalize + // the final node and fail the payment. + if n == 1 { + i.failNode(route, n) + i.finalFailureReason = &reasonError + + return + } + + // Otherwise penalize the last pair of the route and retry. + // Either the final node is at fault, or it gets sent a bad htlc + // from its predecessor. + i.failPair(route, n-1) + + // We are using wrong payment hash or amount, fail the payment. + case *lnwire.FailIncorrectPaymentAmount, + *lnwire.FailIncorrectDetails: + + i.finalFailureReason = &reasonIncorrectDetails + + // The HTLC that was extended to the final hop expires too soon. Fail + // the payment, because we may be using the wrong final cltv delta. + case *lnwire.FailFinalExpiryTooSoon: + // TODO(roasbeef): can happen to to race condition, try again + // with recent block height + + // TODO(joostjager): can also happen because a node delayed + // deliberately. What to penalize? + i.finalFailureReason = &reasonIncorrectDetails + + default: + // All other errors are considered terminal if coming from the + // final hop. They indicate that something is wrong at the + // recipient, so we do apply a penalty. + i.failNode(route, n) + i.finalFailureReason = &reasonError + } +} + +// processPaymentOutcomeIntermediate handles failures sent by an intermediate +// hop. +func (i *interpretedResult) processPaymentOutcomeIntermediate( + route *route.Route, errorSourceIdx int, + failure lnwire.FailureMessage) { + + reportOutgoing := func() { + i.failPair( + route, errorSourceIdx, + ) + } + + reportOutgoingBalance := func() { + i.failPairBalance( + route, errorSourceIdx, + ) + } + + reportIncoming := func() { + // We trust ourselves. If the error comes from the first hop, we + // can penalize the whole node. In that case there is no + // uncertainty as to which node to blame. + if errorSourceIdx == 1 { + i.failNode(route, errorSourceIdx) + return + } + + // Otherwise report the incoming pair. + i.failPair( + route, errorSourceIdx-1, + ) + } + + reportAll := func() { + // We trust ourselves. If the error comes from the first hop, we + // can penalize the whole node. In that case there is no + // uncertainty as to which node to blame. + if errorSourceIdx == 1 { + i.failNode(route, errorSourceIdx) + return + } + + // Otherwise penalize all pairs up to the error source. This + // includes our own outgoing connection. + i.failPairRange( + route, 0, errorSourceIdx-1, + ) + } + + switch failure.(type) { + + // If a node reports onion payload corruption or an invalid version, + // that node may be responsible, but it could also be that it is just + // relaying a malformed htlc failure from it successor. By reporting the + // outgoing channel set, we will surely hit the responsible node. At + // this point, it is not possible that the node's predecessor corrupted + // the onion blob. If the predecessor would have corrupted the payload, + // the error source wouldn't have been able to encrypt this failure + // message for us. + case *lnwire.FailInvalidOnionVersion, + *lnwire.FailInvalidOnionHmac, + *lnwire.FailInvalidOnionKey: + + reportOutgoing() + + // If the next hop in the route wasn't known or offline, we'll only + // penalize the channel set which we attempted to route over. This is + // conservative, and it can handle faulty channels between nodes + // properly. Additionally, this guards against routing nodes returning + // errors in order to attempt to black list another node. + case *lnwire.FailUnknownNextPeer: + reportOutgoing() + + // If we get a permanent channel, we'll prune the channel set in both + // directions and continue with the rest of the routes. + case *lnwire.FailPermanentChannelFailure: + reportOutgoing() + + // When an HTLC parameter is incorrect, the node sending the error may + // be doing something wrong. But it could also be that its predecessor + // is intentionally modifying the htlc parameters that we instructed it + // via the hop payload. Therefore we penalize the incoming node pair. A + // third cause of this error may be that we have an out of date channel + // update. This is handled by the second chance logic up in mission + // control. + case *lnwire.FailAmountBelowMinimum, + *lnwire.FailFeeInsufficient, + *lnwire.FailIncorrectCltvExpiry, + *lnwire.FailChannelDisabled: + + // Set the node pair for which a channel update may be out of + // date. The second chance logic uses the policyFailure field. + i.policyFailure = &DirectedNodePair{ + From: route.Hops[errorSourceIdx-1].PubKeyBytes, + To: route.Hops[errorSourceIdx].PubKeyBytes, + } + + // We report incoming channel. If a second pair is granted in + // mission control, this report is ignored. + reportIncoming() + + // If the outgoing channel doesn't have enough capacity, we penalize. + // But we penalize only in a single direction and only for amounts + // greater than the attempted amount. + case *lnwire.FailTemporaryChannelFailure: + reportOutgoingBalance() + + // If FailExpiryTooSoon is received, there must have been some delay + // along the path. We can't know which node is causing the delay, so we + // penalize all of them up to the error source. + // + // Alternatively it could also be that we ourselves have fallen behind + // somehow. We ignore that case for now. + case *lnwire.FailExpiryTooSoon: + reportAll() + + // In all other cases, we penalize the reporting node. These are all + // failures that should not happen. + default: + i.failNode(route, errorSourceIdx) } } @@ -263,7 +313,8 @@ func (i *interpretedResult) failNode(rt *route.Route, idx int) { i.nodeFailure = &rt.Hops[idx-1].PubKeyBytes } -// failPairRange marks the node pairs from node fromIdx to node toIdx as failed. +// failPairRange marks the node pairs from node fromIdx to node toIdx as failed +// in both direction. func (i *interpretedResult) failPairRange( rt *route.Route, fromIdx, toIdx int) { @@ -283,6 +334,15 @@ func (i *interpretedResult) failPair( i.pairResults[pair.Reverse()] = 0 } +// failPairBalance marks a pair as failed with a minimum penalization amount. +func (i *interpretedResult) failPairBalance( + rt *route.Route, channelIdx int) { + + pair, amt := getPair(rt, channelIdx) + + i.pairResults[pair] = amt +} + // getPair returns a node pair from the route and the amount passed between that // pair. func getPair(rt *route.Route, channelIdx int) (DirectedNodePair, @@ -306,36 +366,3 @@ func getPair(rt *route.Route, channelIdx int) (DirectedNodePair, return pair, amt } - -// getFailedPair tries to locate the failing pair given a route and the pubkey -// of the node that sent the failure. It will assume that the failure is -// associated with the outgoing channel set of the failing node. As a second -// result, it returns the amount sent between the pair. -func getFailedPair(route *route.Route, failureSource int) (DirectedNodePair, - lnwire.MilliSatoshi) { - - // Determine if we have a failure from the final hop. If it is, we - // assume that the failing channel is the incoming channel. - // - // TODO(joostjager): In this case, certain types of failures are not - // expected. For example FailUnknownNextPeer. This could be a reason to - // prune the node? - if failureSource == len(route.Hops) { - failureSource-- - } - - // As this failure indicates that the target channel was unable to carry - // this HTLC (for w/e reason), we'll return the _outgoing_ channel that - // the source of the failure was meant to pass the HTLC along to. - if failureSource == 0 { - return NewDirectedNodePair( - route.SourcePubKey, - route.Hops[0].PubKeyBytes, - ), route.TotalAmount - } - - return NewDirectedNodePair( - route.Hops[failureSource-1].PubKeyBytes, - route.Hops[failureSource].PubKeyBytes, - ), route.Hops[failureSource-1].AmtToForward -} diff --git a/routing/result_interpretation_test.go b/routing/result_interpretation_test.go index b7b29577..86b65aa4 100644 --- a/routing/result_interpretation_test.go +++ b/routing/result_interpretation_test.go @@ -14,6 +14,14 @@ var ( {1, 0}, {1, 1}, {1, 2}, {1, 3}, {1, 4}, } + routeOneHop = route.Route{ + SourcePubKey: hops[0], + TotalAmount: 100, + Hops: []*route.Hop{ + {PubKeyBytes: hops[1], AmtToForward: 99}, + }, + } + routeTwoHop = route.Route{ SourcePubKey: hops[0], TotalAmount: 100, @@ -35,6 +43,10 @@ var ( } ) +func getTestPair(from, to int) DirectedNodePair { + return NewDirectedNodePair(hops[from], hops[to]) +} + type resultTestCase struct { name string route *route.Route @@ -55,7 +67,7 @@ var resultTestCases = []resultTestCase{ expectedResult: &interpretedResult{ pairResults: map[DirectedNodePair]lnwire.MilliSatoshi{ - NewDirectedNodePair(hops[1], hops[2]): 99, + getTestPair(1, 2): 99, }, }, }, @@ -68,7 +80,40 @@ var resultTestCases = []resultTestCase{ failure: lnwire.NewExpiryTooSoon(lnwire.ChannelUpdate{}), expectedResult: &interpretedResult{ - nodeFailure: &hops[3], + pairResults: map[DirectedNodePair]lnwire.MilliSatoshi{ + getTestPair(0, 1): 0, + getTestPair(1, 0): 0, + getTestPair(1, 2): 0, + getTestPair(2, 1): 0, + getTestPair(2, 3): 0, + getTestPair(3, 2): 0, + }, + }, + }, + + // Tests a malformed htlc from a direct peer. + { + name: "fail malformed htlc from direct peer", + route: &routeTwoHop, + failureSrcIdx: 0, + failure: lnwire.NewInvalidOnionKey(nil), + + expectedResult: &interpretedResult{ + nodeFailure: &hops[1], + }, + }, + + // Tests a malformed htlc from a direct peer that is also the final + // destination. + { + name: "fail malformed htlc from direct final peer", + route: &routeOneHop, + failureSrcIdx: 0, + failure: lnwire.NewInvalidOnionKey(nil), + + expectedResult: &interpretedResult{ + finalFailureReason: &reasonError, + nodeFailure: &hops[1], }, }, }