diff --git a/routing/missioncontrol.go b/routing/missioncontrol.go index f1f28dc4..65e342e1 100644 --- a/routing/missioncontrol.go +++ b/routing/missioncontrol.go @@ -322,55 +322,6 @@ func (m *MissionControl) requestSecondChance(timestamp time.Time, return false } -// reportVertexFailure reports a node level failure. -func (m *MissionControl) reportVertexFailure(timestamp time.Time, - v route.Vertex) { - - log.Debugf("Reporting vertex %v failure to Mission Control", v) - - m.Lock() - defer m.Unlock() - - m.lastNodeFailure[v] = timestamp -} - -// reportPairPolicyFailure reports a policy related failure. -func (m *MissionControl) reportPairPolicyFailure(timestamp time.Time, - failedPair DirectedNodePair) { - - m.Lock() - defer m.Unlock() - - // We may have an out of date graph. Therefore we don't always penalize - // immediately. If some time has passed since the last policy failure, - // we grant the node a second chance at forwarding the payment. - if m.requestSecondChance( - timestamp, failedPair.From, failedPair.To, - ) { - return - } - - m.lastNodeFailure[failedPair.From] = timestamp -} - -// reportPairFailure reports a pair level failure. -// -// TODO(roasbeef): also add value attempted to send and capacity of channel -func (m *MissionControl) reportPairFailure(timestamp time.Time, - failedPair DirectedNodePair, minPenalizeAmt lnwire.MilliSatoshi) { - - log.Debugf("Reporting pair %v failure to Mission Control", failedPair) - - m.Lock() - defer m.Unlock() - - pair := NewDirectedNodePair(failedPair.From, failedPair.To) - m.lastPairFailure[pair] = pairFailure{ - minPenalizeAmt: minPenalizeAmt, - timestamp: timestamp, - } -} - // GetHistorySnapshot takes a snapshot from the current mission control state // and actual probability estimates. func (m *MissionControl) GetHistorySnapshot() *MissionControlSnapshot { @@ -445,257 +396,51 @@ func (m *MissionControl) ReportPaymentFail(paymentID uint64, rt *route.Route, } // Apply result to update mission control state. - final, reason := m.applyPaymentResult(result) + reason := m.applyPaymentResult(result) - // Convert final bool and reason to nillable reason. - if final { - return &reason, nil - } - - return nil, nil + return reason, nil } // applyPaymentResult applies a payment result as input for future probability // estimates. It returns a bool indicating whether this error is a final error // and no further payment attempts need to be made. -func (m *MissionControl) applyPaymentResult(result *paymentResult) ( - bool, channeldb.FailureReason) { +func (m *MissionControl) applyPaymentResult( + result *paymentResult) *channeldb.FailureReason { - var ( - failureSourceIdxInt int - failure lnwire.FailureMessage + // Interpret result. + i := interpretResult( + result.route, result.failureSourceIdx, result.failure, ) - if result.failureSourceIdx == nil { - // If the failure message could not be decrypted, attribute the - // failure to our own outgoing channel. - // - // TODO(joostager): Penalize all channels in the route. - failureSourceIdxInt = 0 - failure = lnwire.NewTemporaryChannelFailure(nil) - } else { - failureSourceIdxInt = *result.failureSourceIdx - failure = result.failure + // Update mission control state using the interpretation. + m.Lock() + defer m.Unlock() + + if i.policyFailure != nil { + if m.requestSecondChance( + result.timeReply, + i.policyFailure.From, i.policyFailure.To, + ) { + return nil + } } - var failureVertex route.Vertex + if i.nodeFailure != nil { + log.Debugf("Reporting node failure to Mission Control: "+ + "node=%v", *i.nodeFailure) - if failureSourceIdxInt > 0 { - failureVertex = result.route.Hops[failureSourceIdxInt-1].PubKeyBytes - } else { - failureVertex = result.route.SourcePubKey + m.lastNodeFailure[*i.nodeFailure] = result.timeReply } - log.Tracef("Node %x (index %v) reported failure when sending htlc", - failureVertex, result.failureSourceIdx) - // Always determine chan id ourselves, because a channel update with id - // may not be available. - failedPair, failedAmt := getFailedPair( - result.route, failureSourceIdxInt, - ) + for pair, minPenalizeAmt := range i.pairResults { + log.Debugf("Reporting pair failure to Mission Control: "+ + "pair=%v, minPenalizeAmt=%v", pair, minPenalizeAmt) - switch failure.(type) { - - // If the end destination didn't know the payment - // hash or we sent the wrong payment amount to the - // destination, then we'll terminate immediately. - case *lnwire.FailIncorrectDetails: - // TODO(joostjager): Check onionErr.Amount() whether it matches - // what we expect. (Will it ever not match, because if not - // final_incorrect_htlc_amount would be returned?) - - return true, channeldb.FailureReasonIncorrectPaymentDetails - - // If we sent the wrong amount to the destination, then - // we'll exit early. - case *lnwire.FailIncorrectPaymentAmount: - return true, channeldb.FailureReasonIncorrectPaymentDetails - - // If the time-lock that was extended to the final node - // was incorrect, then we can't proceed. - case *lnwire.FailFinalIncorrectCltvExpiry: - // TODO(joostjager): Take into account that second last hop may - // have deliberately handed out an htlc that expires too soon. - // In that case we should continue routing. - return true, channeldb.FailureReasonError - - // If we crafted an invalid onion payload for the final - // node, then we'll exit early. - case *lnwire.FailFinalIncorrectHtlcAmount: - // TODO(joostjager): Take into account that second last hop may - // have deliberately handed out an htlc with a too low value. In - // that case we should continue routing. - - return true, channeldb.FailureReasonError - - // Similarly, if the HTLC expiry that we extended to - // the final hop expires too soon, then will fail the - // payment. - // - // TODO(roasbeef): can happen to to race condition, try - // again with recent block height - case *lnwire.FailFinalExpiryTooSoon: - // TODO(joostjager): Take into account that any hop may have - // delayed. Ideally we should continue routing. Knowing the - // delaying node at this point would help. - return true, channeldb.FailureReasonIncorrectPaymentDetails - - // If we erroneously attempted to cross a chain border, - // then we'll cancel the payment. - case *lnwire.FailInvalidRealm: - return true, channeldb.FailureReasonError - - // If we get a notice that the expiry was too soon for - // an intermediate node, then we'll prune out the node - // that sent us this error, as it doesn't now what the - // correct block height is. - case *lnwire.FailExpiryTooSoon: - m.reportVertexFailure(result.timeReply, failureVertex) - return false, 0 - - // If we hit an instance of onion payload corruption or an invalid - // version, then we'll exit early as this shouldn't happen in the - // typical case. - // - // TODO(joostjager): Take into account that the previous hop may have - // tampered with the onion. Routing should continue using other paths. - case *lnwire.FailInvalidOnionVersion: - return true, channeldb.FailureReasonError - case *lnwire.FailInvalidOnionHmac: - return true, channeldb.FailureReasonError - case *lnwire.FailInvalidOnionKey: - return true, channeldb.FailureReasonError - - // If we get a failure due to violating the minimum - // amount, we'll apply the new minimum amount and retry - // routing. - case *lnwire.FailAmountBelowMinimum: - m.reportPairPolicyFailure(result.timeReply, failedPair) - return false, 0 - - // If we get a failure due to a fee, we'll apply the - // new fee update, and retry our attempt using the - // newly updated fees. - case *lnwire.FailFeeInsufficient: - m.reportPairPolicyFailure(result.timeReply, failedPair) - return false, 0 - - // If we get the failure for an intermediate node that - // disagrees with our time lock values, then we'll - // apply the new delta value and try it once more. - case *lnwire.FailIncorrectCltvExpiry: - m.reportPairPolicyFailure(result.timeReply, failedPair) - return false, 0 - - // The outgoing channel that this node was meant to - // forward one is currently disabled, so we'll apply - // the update and continue. - case *lnwire.FailChannelDisabled: - m.reportPairFailure(result.timeReply, failedPair, 0) - return false, 0 - - // It's likely that the outgoing channel didn't have - // sufficient capacity, so we'll prune this pair for - // now, and continue onwards with our path finding. - case *lnwire.FailTemporaryChannelFailure: - m.reportPairFailure(result.timeReply, failedPair, failedAmt) - return false, 0 - - // If the send fail due to a node not having the - // required features, then we'll note this error and - // continue. - case *lnwire.FailRequiredNodeFeatureMissing: - m.reportVertexFailure(result.timeReply, failureVertex) - return false, 0 - - // If the send fail due to a node not having the - // required features, then we'll note this error and - // continue. - case *lnwire.FailRequiredChannelFeatureMissing: - m.reportVertexFailure(result.timeReply, failureVertex) - return false, 0 - - // If the next hop in the route wasn't known or - // offline, we'll only the channel which we attempted - // to route over. This is conservative, and it can - // handle faulty channels between nodes properly. - // Additionally, this guards against routing nodes - // returning errors in order to attempt to black list - // another node. - case *lnwire.FailUnknownNextPeer: - m.reportPairFailure(result.timeReply, failedPair, 0) - return false, 0 - - // If the node wasn't able to forward for which ever - // reason, then we'll note this and continue with the - // routes. - case *lnwire.FailTemporaryNodeFailure: - m.reportVertexFailure(result.timeReply, failureVertex) - return false, 0 - - case *lnwire.FailPermanentNodeFailure: - m.reportVertexFailure(result.timeReply, failureVertex) - return false, 0 - - // If we crafted a route that contains a too long time - // lock for an intermediate node, we'll prune the node. - // As there currently is no way of knowing that node's - // maximum acceptable cltv, we cannot take this - // constraint into account during routing. - // - // TODO(joostjager): Record the rejected cltv and use - // that as a hint during future path finding through - // that node. - case *lnwire.FailExpiryTooFar: - m.reportVertexFailure(result.timeReply, failureVertex) - return false, 0 - - // If we get a permanent channel or node failure, then - // we'll prune the channel in both directions and - // continue with the rest of the routes. - case *lnwire.FailPermanentChannelFailure: - m.reportPairFailure(result.timeReply, failedPair, 0) - m.reportPairFailure( - result.timeReply, failedPair.Reverse(), 0, - ) - return false, 0 - - // Any other failure or an empty failure will get the node pruned. - default: - m.reportVertexFailure(result.timeReply, failureVertex) - return false, 0 + m.lastPairFailure[pair] = pairFailure{ + minPenalizeAmt: minPenalizeAmt, + timestamp: result.timeReply, + } } -} - -// getFailedPair tries to locate the failing pair given a route and the pubkey -// of the node that sent the failure. It will assume that the failure is -// associated with the outgoing channel set of the failing node. As a second -// result, it returns the amount sent between the pair. -func getFailedPair(route *route.Route, failureSource int) (DirectedNodePair, - lnwire.MilliSatoshi) { - - // Determine if we have a failure from the final hop. If it is, we - // assume that the failing channel is the incoming channel. - // - // TODO(joostjager): In this case, certain types of failures are not - // expected. For example FailUnknownNextPeer. This could be a reason to - // prune the node? - if failureSource == len(route.Hops) { - failureSource-- - } - - // As this failure indicates that the target channel was unable to carry - // this HTLC (for w/e reason), we'll return the _outgoing_ channel that - // the source of the failure was meant to pass the HTLC along to. - if failureSource == 0 { - return NewDirectedNodePair( - route.SourcePubKey, - route.Hops[0].PubKeyBytes, - ), route.TotalAmount - } - - return NewDirectedNodePair( - route.Hops[failureSource-1].PubKeyBytes, - route.Hops[failureSource].PubKeyBytes, - ), route.Hops[failureSource-1].AmtToForward + + return i.finalFailureReason } diff --git a/routing/result_interpretation.go b/routing/result_interpretation.go new file mode 100644 index 00000000..5eb35563 --- /dev/null +++ b/routing/result_interpretation.go @@ -0,0 +1,275 @@ +package routing + +import ( + "github.com/lightningnetwork/lnd/channeldb" + + "github.com/lightningnetwork/lnd/lnwire" + "github.com/lightningnetwork/lnd/routing/route" +) + +// interpretedResult contains the result of the interpretation of a payment +// outcome. +type interpretedResult struct { + // nodeFailure points to a node pubkey if all channels of that node are + // responsible for the result. + nodeFailure *route.Vertex + + // pairResults contains a map of node pairs that could be responsible + // for the failure. The map values are the minimum amounts for which a + // future penalty should be applied. + pairResults map[DirectedNodePair]lnwire.MilliSatoshi + + // finalFailureReason is set to a non-nil value if it makes no more + // sense to start another payment attempt. It will contain the reason + // why. + finalFailureReason *channeldb.FailureReason + + // policyFailure is set to a node pair if there is a policy failure on + // that connection. This is used to control the second chance logic for + // policy failures. + policyFailure *DirectedNodePair +} + +// interpretResult interprets a payment outcome and returns an object that +// contains information required to update mission control. +func interpretResult(rt *route.Route, failureSrcIdx *int, + failure lnwire.FailureMessage) *interpretedResult { + + i := &interpretedResult{ + pairResults: make(map[DirectedNodePair]lnwire.MilliSatoshi), + } + + final, reason := i.processFail(rt, failureSrcIdx, failure) + if final { + i.finalFailureReason = &reason + } + + return i +} + +// processFail processes a failed payment attempt. +func (i *interpretedResult) processFail( + rt *route.Route, errSourceIdx *int, + failure lnwire.FailureMessage) (bool, channeldb.FailureReason) { + + var failureSourceIdxInt int + + if errSourceIdx == nil { + // If the failure message could not be decrypted, attribute the + // failure to our own outgoing channel. + // + // TODO(joostager): Penalize all channels in the route. + failureSourceIdxInt = 0 + failure = lnwire.NewTemporaryChannelFailure(nil) + } else { + failureSourceIdxInt = *errSourceIdx + } + + var failureVertex route.Vertex + + if failureSourceIdxInt > 0 { + failureVertex = rt.Hops[failureSourceIdxInt-1].PubKeyBytes + } else { + failureVertex = rt.SourcePubKey + } + log.Tracef("Node %x (index %v) reported failure when sending htlc", + failureVertex, errSourceIdx) + + // Always determine chan id ourselves, because a channel update with id + // may not be available. + failedPair, failedAmt := getFailedPair( + rt, failureSourceIdxInt, + ) + + switch failure.(type) { + + // If the end destination didn't know the payment hash or we sent the + // wrong payment amount to the destination, then we'll terminate + // immediately. + case *lnwire.FailIncorrectDetails: + // TODO(joostjager): Check onionErr.Amount() whether it matches + // what we expect. (Will it ever not match, because if not + // final_incorrect_htlc_amount would be returned?) + + return true, channeldb.FailureReasonIncorrectPaymentDetails + + // If we sent the wrong amount to the destination, then we'll exit + // early. + case *lnwire.FailIncorrectPaymentAmount: + return true, channeldb.FailureReasonIncorrectPaymentDetails + + // If the time-lock that was extended to the final node was incorrect, + // then we can't proceed. + case *lnwire.FailFinalIncorrectCltvExpiry: + // TODO(joostjager): Take into account that second last hop may + // have deliberately handed out an htlc that expires too soon. + // In that case we should continue routing. + return true, channeldb.FailureReasonError + + // If we crafted an invalid onion payload for the final node, then we'll + // exit early. + case *lnwire.FailFinalIncorrectHtlcAmount: + // TODO(joostjager): Take into account that second last hop may + // have deliberately handed out an htlc with a too low value. In + // that case we should continue routing. + return true, channeldb.FailureReasonError + + // Similarly, if the HTLC expiry that we extended to the final hop + // expires too soon, then will fail the payment. + // + // TODO(roasbeef): can happen to to race condition, try again with + // recent block height + case *lnwire.FailFinalExpiryTooSoon: + // TODO(joostjager): Take into account that any hop may have + // delayed. Ideally we should continue routing. Knowing the + // delaying node at this point would help. + return true, channeldb.FailureReasonIncorrectPaymentDetails + + // If we erroneously attempted to cross a chain border, then we'll + // cancel the payment. + case *lnwire.FailInvalidRealm: + return true, channeldb.FailureReasonError + + // If we get a notice that the expiry was too soon for an intermediate + // node, then we'll prune out the node that sent us this error, as it + // doesn't now what the correct block height is. + case *lnwire.FailExpiryTooSoon: + i.nodeFailure = &failureVertex + return false, 0 + + // If we hit an instance of onion payload corruption or an invalid + // version, then we'll exit early as this shouldn't happen in the + // typical case. + // + // TODO(joostjager): Take into account that the previous hop may have + // tampered with the onion. Routing should continue using other paths. + case *lnwire.FailInvalidOnionVersion: + return true, channeldb.FailureReasonError + case *lnwire.FailInvalidOnionHmac: + return true, channeldb.FailureReasonError + case *lnwire.FailInvalidOnionKey: + return true, channeldb.FailureReasonError + + // If we get a failure due to violating the minimum amount, we'll apply + // the new minimum amount and retry routing. + case *lnwire.FailAmountBelowMinimum: + i.policyFailure = &failedPair + i.pairResults[failedPair] = 0 + return false, 0 + + // If we get a failure due to a fee, we'll apply the new fee update, and + // retry our attempt using the newly updated fees. + case *lnwire.FailFeeInsufficient: + i.policyFailure = &failedPair + i.pairResults[failedPair] = 0 + return false, 0 + + // If we get the failure for an intermediate node that disagrees with + // our time lock values, then we'll apply the new delta value and try it + // once more. + case *lnwire.FailIncorrectCltvExpiry: + i.policyFailure = &failedPair + i.pairResults[failedPair] = 0 + return false, 0 + + // The outgoing channel that this node was meant to forward one is + // currently disabled, so we'll apply the update and continue. + case *lnwire.FailChannelDisabled: + i.pairResults[failedPair] = 0 + return false, 0 + + // It's likely that the outgoing channel didn't have sufficient + // capacity, so we'll prune this edge for now, and continue onwards with + // our path finding. + case *lnwire.FailTemporaryChannelFailure: + i.pairResults[failedPair] = failedAmt + return false, 0 + + // If the send fail due to a node not having the required features, then + // we'll note this error and continue. + case *lnwire.FailRequiredNodeFeatureMissing: + i.nodeFailure = &failureVertex + return false, 0 + + // If the send fail due to a node not having the required features, then + // we'll note this error and continue. + case *lnwire.FailRequiredChannelFeatureMissing: + i.nodeFailure = &failureVertex + return false, 0 + + // If the next hop in the route wasn't known or offline, we'll only the + // channel which we attempted to route over. This is conservative, and + // it can handle faulty channels between nodes properly. Additionally, + // this guards against routing nodes returning errors in order to + // attempt to black list another node. + case *lnwire.FailUnknownNextPeer: + i.pairResults[failedPair] = 0 + return false, 0 + + // If the node wasn't able to forward for which ever reason, then we'll + // note this and continue with the routes. + case *lnwire.FailTemporaryNodeFailure: + i.nodeFailure = &failureVertex + return false, 0 + + case *lnwire.FailPermanentNodeFailure: + i.nodeFailure = &failureVertex + return false, 0 + + // If we crafted a route that contains a too long time lock for an + // intermediate node, we'll prune the node. As there currently is no way + // of knowing that node's maximum acceptable cltv, we cannot take this + // constraint into account during routing. + // + // TODO(joostjager): Record the rejected cltv and use that as a hint + // during future path finding through that node. + case *lnwire.FailExpiryTooFar: + i.nodeFailure = &failureVertex + return false, 0 + + // If we get a permanent channel or node failure, then we'll prune the + // channel in both directions and continue with the rest of the routes. + case *lnwire.FailPermanentChannelFailure: + i.pairResults[failedPair] = 0 + i.pairResults[failedPair.Reverse()] = 0 + return false, 0 + + // Any other failure or an empty failure will get the node pruned. + default: + i.nodeFailure = &failureVertex + return false, 0 + } +} + +// getFailedPair tries to locate the failing pair given a route and the pubkey +// of the node that sent the failure. It will assume that the failure is +// associated with the outgoing channel set of the failing node. As a second +// result, it returns the amount sent between the pair. +func getFailedPair(route *route.Route, failureSource int) (DirectedNodePair, + lnwire.MilliSatoshi) { + + // Determine if we have a failure from the final hop. If it is, we + // assume that the failing channel is the incoming channel. + // + // TODO(joostjager): In this case, certain types of failures are not + // expected. For example FailUnknownNextPeer. This could be a reason to + // prune the node? + if failureSource == len(route.Hops) { + failureSource-- + } + + // As this failure indicates that the target channel was unable to carry + // this HTLC (for w/e reason), we'll return the _outgoing_ channel that + // the source of the failure was meant to pass the HTLC along to. + if failureSource == 0 { + return NewDirectedNodePair( + route.SourcePubKey, + route.Hops[0].PubKeyBytes, + ), route.TotalAmount + } + + return NewDirectedNodePair( + route.Hops[failureSource-1].PubKeyBytes, + route.Hops[failureSource].PubKeyBytes, + ), route.Hops[failureSource-1].AmtToForward +} diff --git a/routing/result_interpretation_test.go b/routing/result_interpretation_test.go new file mode 100644 index 00000000..b7b29577 --- /dev/null +++ b/routing/result_interpretation_test.go @@ -0,0 +1,101 @@ +package routing + +import ( + "reflect" + "testing" + + "github.com/lightningnetwork/lnd/lnwire" + + "github.com/lightningnetwork/lnd/routing/route" +) + +var ( + hops = []route.Vertex{ + {1, 0}, {1, 1}, {1, 2}, {1, 3}, {1, 4}, + } + + routeTwoHop = route.Route{ + SourcePubKey: hops[0], + TotalAmount: 100, + Hops: []*route.Hop{ + {PubKeyBytes: hops[1], AmtToForward: 99}, + {PubKeyBytes: hops[2], AmtToForward: 97}, + }, + } + + routeFourHop = route.Route{ + SourcePubKey: hops[0], + TotalAmount: 100, + Hops: []*route.Hop{ + {PubKeyBytes: hops[1], AmtToForward: 99}, + {PubKeyBytes: hops[2], AmtToForward: 97}, + {PubKeyBytes: hops[3], AmtToForward: 94}, + {PubKeyBytes: hops[4], AmtToForward: 90}, + }, + } +) + +type resultTestCase struct { + name string + route *route.Route + failureSrcIdx int + failure lnwire.FailureMessage + + expectedResult *interpretedResult +} + +var resultTestCases = []resultTestCase{ + // Tests that a temporary channel failure result is properly + // interpreted. + { + name: "fail", + route: &routeTwoHop, + failureSrcIdx: 1, + failure: lnwire.NewTemporaryChannelFailure(nil), + + expectedResult: &interpretedResult{ + pairResults: map[DirectedNodePair]lnwire.MilliSatoshi{ + NewDirectedNodePair(hops[1], hops[2]): 99, + }, + }, + }, + + // Tests that a expiry too soon failure result is properly interpreted. + { + name: "fail expiry too soon", + route: &routeFourHop, + failureSrcIdx: 3, + failure: lnwire.NewExpiryTooSoon(lnwire.ChannelUpdate{}), + + expectedResult: &interpretedResult{ + nodeFailure: &hops[3], + }, + }, +} + +// TestResultInterpretation executes a list of test cases that test the result +// interpretation logic. +func TestResultInterpretation(t *testing.T) { + emptyResults := make(map[DirectedNodePair]lnwire.MilliSatoshi) + + for _, testCase := range resultTestCases { + t.Run(testCase.name, func(t *testing.T) { + i := interpretResult( + testCase.route, &testCase.failureSrcIdx, + testCase.failure, + ) + + expected := testCase.expectedResult + + // Replace nil pairResults with empty map to satisfy + // DeepEqual. + if expected.pairResults == nil { + expected.pairResults = emptyResults + } + + if !reflect.DeepEqual(i, expected) { + t.Fatal("unexpected result") + } + }) + } +}