Merge pull request #3256 from joostjager/failure-attribution

routing: failure attribution
This commit is contained in:
Olaoluwa Osuntokun 2019-08-20 16:53:56 -07:00 committed by GitHub
commit 2f8d3c4526
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 650 additions and 364 deletions

@ -1631,8 +1631,21 @@ func (l *channelLink) handleUpstreamMsg(msg lnwire.Message) {
OnionSHA256: msg.ShaOnionBlob,
}
default:
log.Errorf("Unknown failure code: %v", msg.FailureCode)
failure = &lnwire.FailTemporaryChannelFailure{}
log.Warnf("Unexpected failure code received in "+
"UpdateFailMailformedHTLC: %v", msg.FailureCode)
// We don't just pass back the error we received from
// our successor. Otherwise we might report a failure
// that penalizes us more than needed. If the onion that
// we forwarded was correct, the node should have been
// able to send back its own failure. The node did not
// send back its own failure, so we assume there was a
// problem with the onion and report that back. We reuse
// the invalid onion key failure because there is no
// specific error for this case.
failure = &lnwire.FailInvalidOnionKey{
OnionSHA256: msg.ShaOnionBlob,
}
}
// With the error parsed, we'll convert the into it's opaque

@ -2097,8 +2097,8 @@ func TestUpdateFailMalformedHTLCErrorConversion(t *testing.T) {
fwdingErr := err.(*ForwardingError)
failureMsg := fwdingErr.FailureMessage
if _, ok := failureMsg.(*lnwire.FailTemporaryChannelFailure); !ok {
t.Fatalf("expected temp chan failure instead got: %v",
if _, ok := failureMsg.(*lnwire.FailInvalidOnionKey); !ok {
t.Fatalf("expected onion failure instead got: %v",
fwdingErr.FailureMessage)
}
}

@ -3567,7 +3567,7 @@ func testSphinxReplayPersistence(net *lntest.NetworkHarness, t *harnessTest) {
// Construct the response we expect after sending a duplicate packet
// that fails due to sphinx replay detection.
replayErr := "TemporaryChannelFailure"
replayErr := "InvalidOnionKey"
if !strings.Contains(resp.PaymentError, replayErr) {
t.Fatalf("received payment error: %v, expected %v",
resp.PaymentError, replayErr)

@ -322,55 +322,6 @@ func (m *MissionControl) requestSecondChance(timestamp time.Time,
return false
}
// reportVertexFailure reports a node level failure.
func (m *MissionControl) reportVertexFailure(timestamp time.Time,
v route.Vertex) {
log.Debugf("Reporting vertex %v failure to Mission Control", v)
m.Lock()
defer m.Unlock()
m.lastNodeFailure[v] = timestamp
}
// reportPairPolicyFailure reports a policy related failure.
func (m *MissionControl) reportPairPolicyFailure(timestamp time.Time,
failedPair DirectedNodePair) {
m.Lock()
defer m.Unlock()
// We may have an out of date graph. Therefore we don't always penalize
// immediately. If some time has passed since the last policy failure,
// we grant the node a second chance at forwarding the payment.
if m.requestSecondChance(
timestamp, failedPair.From, failedPair.To,
) {
return
}
m.lastNodeFailure[failedPair.From] = timestamp
}
// reportPairFailure reports a pair level failure.
//
// TODO(roasbeef): also add value attempted to send and capacity of channel
func (m *MissionControl) reportPairFailure(timestamp time.Time,
failedPair DirectedNodePair, minPenalizeAmt lnwire.MilliSatoshi) {
log.Debugf("Reporting pair %v failure to Mission Control", failedPair)
m.Lock()
defer m.Unlock()
pair := NewDirectedNodePair(failedPair.From, failedPair.To)
m.lastPairFailure[pair] = pairFailure{
minPenalizeAmt: minPenalizeAmt,
timestamp: timestamp,
}
}
// GetHistorySnapshot takes a snapshot from the current mission control state
// and actual probability estimates.
func (m *MissionControl) GetHistorySnapshot() *MissionControlSnapshot {
@ -420,12 +371,11 @@ func (m *MissionControl) GetHistorySnapshot() *MissionControlSnapshot {
// ReportPaymentFail reports a failed payment to mission control as input for
// future probability estimates. The failureSourceIdx argument indicates the
// failure source. If it is nil, the failure source is unknown. This function
// returns a bool indicating whether this error is a final error. If it is
// final, a failure reason is returned and no further payment attempts need to
// be made.
// returns a reason if this failure is a final failure. In that case no further
// payment attempts need to be made.
func (m *MissionControl) ReportPaymentFail(paymentID uint64, rt *route.Route,
failureSourceIdx *int, failure lnwire.FailureMessage) (bool,
channeldb.FailureReason, error) {
failureSourceIdx *int, failure lnwire.FailureMessage) (
*channeldb.FailureReason, error) {
timestamp := m.now()
@ -442,256 +392,55 @@ func (m *MissionControl) ReportPaymentFail(paymentID uint64, rt *route.Route,
// Store complete result in database.
if err := m.store.AddResult(result); err != nil {
return false, 0, err
return nil, err
}
// Apply result to update mission control state.
final, reason := m.applyPaymentResult(result)
reason := m.applyPaymentResult(result)
return final, reason, nil
return reason, nil
}
// applyPaymentResult applies a payment result as input for future probability
// estimates. It returns a bool indicating whether this error is a final error
// and no further payment attempts need to be made.
func (m *MissionControl) applyPaymentResult(result *paymentResult) (
bool, channeldb.FailureReason) {
func (m *MissionControl) applyPaymentResult(
result *paymentResult) *channeldb.FailureReason {
var (
failureSourceIdxInt int
failure lnwire.FailureMessage
// Interpret result.
i := interpretResult(
result.route, result.failureSourceIdx, result.failure,
)
if result.failureSourceIdx == nil {
// If the failure message could not be decrypted, attribute the
// failure to our own outgoing channel.
//
// TODO(joostager): Penalize all channels in the route.
failureSourceIdxInt = 0
failure = lnwire.NewTemporaryChannelFailure(nil)
} else {
failureSourceIdxInt = *result.failureSourceIdx
failure = result.failure
// Update mission control state using the interpretation.
m.Lock()
defer m.Unlock()
if i.policyFailure != nil {
if m.requestSecondChance(
result.timeReply,
i.policyFailure.From, i.policyFailure.To,
) {
return nil
}
}
var failureVertex route.Vertex
if i.nodeFailure != nil {
log.Debugf("Reporting node failure to Mission Control: "+
"node=%v", *i.nodeFailure)
if failureSourceIdxInt > 0 {
failureVertex = result.route.Hops[failureSourceIdxInt-1].PubKeyBytes
} else {
failureVertex = result.route.SourcePubKey
m.lastNodeFailure[*i.nodeFailure] = result.timeReply
}
log.Tracef("Node %x (index %v) reported failure when sending htlc",
failureVertex, result.failureSourceIdx)
// Always determine chan id ourselves, because a channel update with id
// may not be available.
failedPair, failedAmt := getFailedPair(
result.route, failureSourceIdxInt,
)
for pair, minPenalizeAmt := range i.pairResults {
log.Debugf("Reporting pair failure to Mission Control: "+
"pair=%v, minPenalizeAmt=%v", pair, minPenalizeAmt)
switch failure.(type) {
// If the end destination didn't know the payment
// hash or we sent the wrong payment amount to the
// destination, then we'll terminate immediately.
case *lnwire.FailIncorrectDetails:
// TODO(joostjager): Check onionErr.Amount() whether it matches
// what we expect. (Will it ever not match, because if not
// final_incorrect_htlc_amount would be returned?)
return true, channeldb.FailureReasonIncorrectPaymentDetails
// If we sent the wrong amount to the destination, then
// we'll exit early.
case *lnwire.FailIncorrectPaymentAmount:
return true, channeldb.FailureReasonIncorrectPaymentDetails
// If the time-lock that was extended to the final node
// was incorrect, then we can't proceed.
case *lnwire.FailFinalIncorrectCltvExpiry:
// TODO(joostjager): Take into account that second last hop may
// have deliberately handed out an htlc that expires too soon.
// In that case we should continue routing.
return true, channeldb.FailureReasonError
// If we crafted an invalid onion payload for the final
// node, then we'll exit early.
case *lnwire.FailFinalIncorrectHtlcAmount:
// TODO(joostjager): Take into account that second last hop may
// have deliberately handed out an htlc with a too low value. In
// that case we should continue routing.
return true, channeldb.FailureReasonError
// Similarly, if the HTLC expiry that we extended to
// the final hop expires too soon, then will fail the
// payment.
//
// TODO(roasbeef): can happen to to race condition, try
// again with recent block height
case *lnwire.FailFinalExpiryTooSoon:
// TODO(joostjager): Take into account that any hop may have
// delayed. Ideally we should continue routing. Knowing the
// delaying node at this point would help.
return true, channeldb.FailureReasonIncorrectPaymentDetails
// If we erroneously attempted to cross a chain border,
// then we'll cancel the payment.
case *lnwire.FailInvalidRealm:
return true, channeldb.FailureReasonError
// If we get a notice that the expiry was too soon for
// an intermediate node, then we'll prune out the node
// that sent us this error, as it doesn't now what the
// correct block height is.
case *lnwire.FailExpiryTooSoon:
m.reportVertexFailure(result.timeReply, failureVertex)
return false, 0
// If we hit an instance of onion payload corruption or an invalid
// version, then we'll exit early as this shouldn't happen in the
// typical case.
//
// TODO(joostjager): Take into account that the previous hop may have
// tampered with the onion. Routing should continue using other paths.
case *lnwire.FailInvalidOnionVersion:
return true, channeldb.FailureReasonError
case *lnwire.FailInvalidOnionHmac:
return true, channeldb.FailureReasonError
case *lnwire.FailInvalidOnionKey:
return true, channeldb.FailureReasonError
// If we get a failure due to violating the minimum
// amount, we'll apply the new minimum amount and retry
// routing.
case *lnwire.FailAmountBelowMinimum:
m.reportPairPolicyFailure(result.timeReply, failedPair)
return false, 0
// If we get a failure due to a fee, we'll apply the
// new fee update, and retry our attempt using the
// newly updated fees.
case *lnwire.FailFeeInsufficient:
m.reportPairPolicyFailure(result.timeReply, failedPair)
return false, 0
// If we get the failure for an intermediate node that
// disagrees with our time lock values, then we'll
// apply the new delta value and try it once more.
case *lnwire.FailIncorrectCltvExpiry:
m.reportPairPolicyFailure(result.timeReply, failedPair)
return false, 0
// The outgoing channel that this node was meant to
// forward one is currently disabled, so we'll apply
// the update and continue.
case *lnwire.FailChannelDisabled:
m.reportPairFailure(result.timeReply, failedPair, 0)
return false, 0
// It's likely that the outgoing channel didn't have
// sufficient capacity, so we'll prune this pair for
// now, and continue onwards with our path finding.
case *lnwire.FailTemporaryChannelFailure:
m.reportPairFailure(result.timeReply, failedPair, failedAmt)
return false, 0
// If the send fail due to a node not having the
// required features, then we'll note this error and
// continue.
case *lnwire.FailRequiredNodeFeatureMissing:
m.reportVertexFailure(result.timeReply, failureVertex)
return false, 0
// If the send fail due to a node not having the
// required features, then we'll note this error and
// continue.
case *lnwire.FailRequiredChannelFeatureMissing:
m.reportVertexFailure(result.timeReply, failureVertex)
return false, 0
// If the next hop in the route wasn't known or
// offline, we'll only the channel which we attempted
// to route over. This is conservative, and it can
// handle faulty channels between nodes properly.
// Additionally, this guards against routing nodes
// returning errors in order to attempt to black list
// another node.
case *lnwire.FailUnknownNextPeer:
m.reportPairFailure(result.timeReply, failedPair, 0)
return false, 0
// If the node wasn't able to forward for which ever
// reason, then we'll note this and continue with the
// routes.
case *lnwire.FailTemporaryNodeFailure:
m.reportVertexFailure(result.timeReply, failureVertex)
return false, 0
case *lnwire.FailPermanentNodeFailure:
m.reportVertexFailure(result.timeReply, failureVertex)
return false, 0
// If we crafted a route that contains a too long time
// lock for an intermediate node, we'll prune the node.
// As there currently is no way of knowing that node's
// maximum acceptable cltv, we cannot take this
// constraint into account during routing.
//
// TODO(joostjager): Record the rejected cltv and use
// that as a hint during future path finding through
// that node.
case *lnwire.FailExpiryTooFar:
m.reportVertexFailure(result.timeReply, failureVertex)
return false, 0
// If we get a permanent channel or node failure, then
// we'll prune the channel in both directions and
// continue with the rest of the routes.
case *lnwire.FailPermanentChannelFailure:
m.reportPairFailure(result.timeReply, failedPair, 0)
m.reportPairFailure(
result.timeReply, failedPair.Reverse(), 0,
)
return false, 0
// Any other failure or an empty failure will get the node pruned.
default:
m.reportVertexFailure(result.timeReply, failureVertex)
return false, 0
m.lastPairFailure[pair] = pairFailure{
minPenalizeAmt: minPenalizeAmt,
timestamp: result.timeReply,
}
}
}
// getFailedPair tries to locate the failing pair given a route and the pubkey
// of the node that sent the failure. It will assume that the failure is
// associated with the outgoing channel set of the failing node. As a second
// result, it returns the amount sent between the pair.
func getFailedPair(route *route.Route, failureSource int) (DirectedNodePair,
lnwire.MilliSatoshi) {
// Determine if we have a failure from the final hop. If it is, we
// assume that the failing channel is the incoming channel.
//
// TODO(joostjager): In this case, certain types of failures are not
// expected. For example FailUnknownNextPeer. This could be a reason to
// prune the node?
if failureSource == len(route.Hops) {
failureSource--
}
// As this failure indicates that the target channel was unable to carry
// this HTLC (for w/e reason), we'll return the _outgoing_ channel that
// the source of the failure was meant to pass the HTLC along to.
if failureSource == 0 {
return NewDirectedNodePair(
route.SourcePubKey,
route.Hops[0].PubKeyBytes,
), route.TotalAmount
}
return NewDirectedNodePair(
route.Hops[failureSource-1].PubKeyBytes,
route.Hops[failureSource].PubKeyBytes,
), route.Hops[failureSource-1].AmtToForward
return i.finalFailureReason
}

@ -98,11 +98,11 @@ type mockMissionControl struct {
var _ MissionController = (*mockMissionControl)(nil)
func (m *mockMissionControl) ReportPaymentFail(paymentID uint64,
rt *route.Route, failureSourceIdx *int, failure lnwire.FailureMessage) (
bool, channeldb.FailureReason, error) {
func (m *mockMissionControl) ReportPaymentFail(paymentID uint64, rt *route.Route,
failureSourceIdx *int, failure lnwire.FailureMessage) (
*channeldb.FailureReason, error) {
return false, 0, nil
return nil, nil
}
func (m *mockMissionControl) GetProbability(fromNode, toNode route.Vertex,

@ -342,10 +342,10 @@ func (p *paymentLifecycle) sendPaymentAttempt(firstHop lnwire.ShortChannelID,
// whether we should make another payment attempt.
func (p *paymentLifecycle) handleSendError(sendErr error) error {
final, reason := p.router.processSendError(
reason := p.router.processSendError(
p.attempt.PaymentID, &p.attempt.Route, sendErr,
)
if !final {
if reason == nil {
// Save the forwarding error so it can be returned if
// this turns out to be the last attempt.
p.lastError = sendErr
@ -354,14 +354,14 @@ func (p *paymentLifecycle) handleSendError(sendErr error) error {
}
log.Debugf("Payment %x failed: final_outcome=%v, raw_err=%v",
p.payment.PaymentHash, reason, sendErr)
p.payment.PaymentHash, *reason, sendErr)
// Mark the payment failed with no route.
//
// TODO(halseth): make payment codes for the actual reason we don't
// continue path finding.
err := p.router.cfg.Control.Fail(
p.payment.PaymentHash, reason,
p.payment.PaymentHash, *reason,
)
if err != nil {
return err

@ -33,7 +33,7 @@ type PaymentSession interface {
type paymentSession struct {
additionalEdges map[route.Vertex][]*channeldb.ChannelEdgePolicy
bandwidthHints map[uint64]lnwire.MilliSatoshi
getBandwidthHints func() (map[uint64]lnwire.MilliSatoshi, error)
sessionSource *SessionSource
@ -97,11 +97,22 @@ func (p *paymentSession) RequestRoute(payment *LightningPayment,
CltvLimit: cltvLimit,
}
// We'll also obtain a set of bandwidthHints from the lower layer for
// each of our outbound channels. This will allow the path finding to
// skip any links that aren't active or just don't have enough bandwidth
// to carry the payment. New bandwidth hints are queried for every new
// path finding attempt, because concurrent payments may change
// balances.
bandwidthHints, err := p.getBandwidthHints()
if err != nil {
return nil, err
}
path, err := p.pathFinder(
&graphParams{
graph: ss.Graph,
additionalEdges: p.additionalEdges,
bandwidthHints: p.bandwidthHints,
bandwidthHints: bandwidthHints,
},
restrictions, &ss.PathFindingConfig,
ss.SelfNode.PubKeyBytes, payment.Target,

@ -97,26 +97,22 @@ func (m *SessionSource) NewPaymentSession(routeHints [][]zpay32.HopHint,
}
}
// We'll also obtain a set of bandwidthHints from the lower layer for
// each of our outbound channels. This will allow the path finding to
// skip any links that aren't active or just don't have enough
// bandwidth to carry the payment.
sourceNode, err := m.Graph.SourceNode()
if err != nil {
return nil, err
}
bandwidthHints, err := generateBandwidthHints(
sourceNode, m.QueryBandwidth,
)
if err != nil {
return nil, err
getBandwidthHints := func() (map[uint64]lnwire.MilliSatoshi,
error) {
return generateBandwidthHints(sourceNode, m.QueryBandwidth)
}
return &paymentSession{
additionalEdges: edges,
bandwidthHints: bandwidthHints,
sessionSource: m,
pathFinder: findPath,
additionalEdges: edges,
getBandwidthHints: getBandwidthHints,
sessionSource: m,
pathFinder: findPath,
}, nil
}

@ -41,6 +41,11 @@ func TestRequestRoute(t *testing.T) {
}
session := &paymentSession{
getBandwidthHints: func() (map[uint64]lnwire.MilliSatoshi,
error) {
return nil, nil
},
sessionSource: sessionSource,
pathFinder: findPath,
}

@ -0,0 +1,368 @@
package routing
import (
"github.com/lightningnetwork/lnd/channeldb"
"github.com/lightningnetwork/lnd/lnwire"
"github.com/lightningnetwork/lnd/routing/route"
)
// Instantiate variables to allow taking a reference from the failure reason.
var (
reasonError = channeldb.FailureReasonError
reasonIncorrectDetails = channeldb.FailureReasonIncorrectPaymentDetails
)
// interpretedResult contains the result of the interpretation of a payment
// outcome.
type interpretedResult struct {
// nodeFailure points to a node pubkey if all channels of that node are
// responsible for the result.
nodeFailure *route.Vertex
// pairResults contains a map of node pairs that could be responsible
// for the failure. The map values are the minimum amounts for which a
// future penalty should be applied.
pairResults map[DirectedNodePair]lnwire.MilliSatoshi
// finalFailureReason is set to a non-nil value if it makes no more
// sense to start another payment attempt. It will contain the reason
// why.
finalFailureReason *channeldb.FailureReason
// policyFailure is set to a node pair if there is a policy failure on
// that connection. This is used to control the second chance logic for
// policy failures.
policyFailure *DirectedNodePair
}
// interpretResult interprets a payment outcome and returns an object that
// contains information required to update mission control.
func interpretResult(rt *route.Route, failureSrcIdx *int,
failure lnwire.FailureMessage) *interpretedResult {
i := &interpretedResult{
pairResults: make(map[DirectedNodePair]lnwire.MilliSatoshi),
}
i.processFail(rt, failureSrcIdx, failure)
return i
}
// processFail processes a failed payment attempt.
func (i *interpretedResult) processFail(
rt *route.Route, errSourceIdx *int,
failure lnwire.FailureMessage) {
if errSourceIdx == nil {
i.processPaymentOutcomeUnknown(rt)
return
}
switch *errSourceIdx {
// We are the source of the failure.
case 0:
i.processPaymentOutcomeSelf(rt, failure)
// A failure from the final hop was received.
case len(rt.Hops):
i.processPaymentOutcomeFinal(
rt, failure,
)
// An intermediate hop failed. Interpret the outcome, update reputation
// and try again.
default:
i.processPaymentOutcomeIntermediate(
rt, *errSourceIdx, failure,
)
}
}
// processPaymentOutcomeSelf handles failures sent by ourselves.
func (i *interpretedResult) processPaymentOutcomeSelf(
rt *route.Route, failure lnwire.FailureMessage) {
switch failure.(type) {
// We receive a malformed htlc failure from our peer. We trust ourselves
// to send the correct htlc, so our peer must be at fault.
case *lnwire.FailInvalidOnionVersion,
*lnwire.FailInvalidOnionHmac,
*lnwire.FailInvalidOnionKey:
i.failNode(rt, 1)
// If this was a payment to a direct peer, we can stop trying.
if len(rt.Hops) == 1 {
i.finalFailureReason = &reasonError
}
// Any other failure originating from ourselves should be temporary and
// caused by changing conditions between path finding and execution of
// the payment. We just retry and trust that the information locally
// available in the link has been updated.
default:
log.Warnf("Routing failure for local channel %v occurred",
rt.Hops[0].ChannelID)
}
}
// processPaymentOutcomeFinal handles failures sent by the final hop.
func (i *interpretedResult) processPaymentOutcomeFinal(
route *route.Route, failure lnwire.FailureMessage) {
n := len(route.Hops)
// If a failure from the final node is received, we will fail the
// payment in almost all cases. Only when the penultimate node sends an
// incorrect htlc, we want to retry via another route. Invalid onion
// failures are not expected, because the final node wouldn't be able to
// encrypt that failure.
switch failure.(type) {
// Expiry or amount of the HTLC doesn't match the onion, try another
// route.
case *lnwire.FailFinalIncorrectCltvExpiry,
*lnwire.FailFinalIncorrectHtlcAmount:
// We trust ourselves. If this is a direct payment, we penalize
// the final node and fail the payment.
if n == 1 {
i.failNode(route, n)
i.finalFailureReason = &reasonError
return
}
// Otherwise penalize the last pair of the route and retry.
// Either the final node is at fault, or it gets sent a bad htlc
// from its predecessor.
i.failPair(route, n-1)
// We are using wrong payment hash or amount, fail the payment.
case *lnwire.FailIncorrectPaymentAmount,
*lnwire.FailIncorrectDetails:
i.finalFailureReason = &reasonIncorrectDetails
// The HTLC that was extended to the final hop expires too soon. Fail
// the payment, because we may be using the wrong final cltv delta.
case *lnwire.FailFinalExpiryTooSoon:
// TODO(roasbeef): can happen to to race condition, try again
// with recent block height
// TODO(joostjager): can also happen because a node delayed
// deliberately. What to penalize?
i.finalFailureReason = &reasonIncorrectDetails
default:
// All other errors are considered terminal if coming from the
// final hop. They indicate that something is wrong at the
// recipient, so we do apply a penalty.
i.failNode(route, n)
i.finalFailureReason = &reasonError
}
}
// processPaymentOutcomeIntermediate handles failures sent by an intermediate
// hop.
func (i *interpretedResult) processPaymentOutcomeIntermediate(
route *route.Route, errorSourceIdx int,
failure lnwire.FailureMessage) {
reportOutgoing := func() {
i.failPair(
route, errorSourceIdx,
)
}
reportOutgoingBalance := func() {
i.failPairBalance(
route, errorSourceIdx,
)
}
reportIncoming := func() {
// We trust ourselves. If the error comes from the first hop, we
// can penalize the whole node. In that case there is no
// uncertainty as to which node to blame.
if errorSourceIdx == 1 {
i.failNode(route, errorSourceIdx)
return
}
// Otherwise report the incoming pair.
i.failPair(
route, errorSourceIdx-1,
)
}
reportAll := func() {
// We trust ourselves. If the error comes from the first hop, we
// can penalize the whole node. In that case there is no
// uncertainty as to which node to blame.
if errorSourceIdx == 1 {
i.failNode(route, errorSourceIdx)
return
}
// Otherwise penalize all pairs up to the error source. This
// includes our own outgoing connection.
i.failPairRange(
route, 0, errorSourceIdx-1,
)
}
switch failure.(type) {
// If a node reports onion payload corruption or an invalid version,
// that node may be responsible, but it could also be that it is just
// relaying a malformed htlc failure from it successor. By reporting the
// outgoing channel set, we will surely hit the responsible node. At
// this point, it is not possible that the node's predecessor corrupted
// the onion blob. If the predecessor would have corrupted the payload,
// the error source wouldn't have been able to encrypt this failure
// message for us.
case *lnwire.FailInvalidOnionVersion,
*lnwire.FailInvalidOnionHmac,
*lnwire.FailInvalidOnionKey:
reportOutgoing()
// If the next hop in the route wasn't known or offline, we'll only
// penalize the channel set which we attempted to route over. This is
// conservative, and it can handle faulty channels between nodes
// properly. Additionally, this guards against routing nodes returning
// errors in order to attempt to black list another node.
case *lnwire.FailUnknownNextPeer:
reportOutgoing()
// If we get a permanent channel, we'll prune the channel set in both
// directions and continue with the rest of the routes.
case *lnwire.FailPermanentChannelFailure:
reportOutgoing()
// When an HTLC parameter is incorrect, the node sending the error may
// be doing something wrong. But it could also be that its predecessor
// is intentionally modifying the htlc parameters that we instructed it
// via the hop payload. Therefore we penalize the incoming node pair. A
// third cause of this error may be that we have an out of date channel
// update. This is handled by the second chance logic up in mission
// control.
case *lnwire.FailAmountBelowMinimum,
*lnwire.FailFeeInsufficient,
*lnwire.FailIncorrectCltvExpiry,
*lnwire.FailChannelDisabled:
// Set the node pair for which a channel update may be out of
// date. The second chance logic uses the policyFailure field.
i.policyFailure = &DirectedNodePair{
From: route.Hops[errorSourceIdx-1].PubKeyBytes,
To: route.Hops[errorSourceIdx].PubKeyBytes,
}
// We report incoming channel. If a second pair is granted in
// mission control, this report is ignored.
reportIncoming()
// If the outgoing channel doesn't have enough capacity, we penalize.
// But we penalize only in a single direction and only for amounts
// greater than the attempted amount.
case *lnwire.FailTemporaryChannelFailure:
reportOutgoingBalance()
// If FailExpiryTooSoon is received, there must have been some delay
// along the path. We can't know which node is causing the delay, so we
// penalize all of them up to the error source.
//
// Alternatively it could also be that we ourselves have fallen behind
// somehow. We ignore that case for now.
case *lnwire.FailExpiryTooSoon:
reportAll()
// In all other cases, we penalize the reporting node. These are all
// failures that should not happen.
default:
i.failNode(route, errorSourceIdx)
}
}
// processPaymentOutcomeUnknown processes a payment outcome for which no failure
// message or source is available.
func (i *interpretedResult) processPaymentOutcomeUnknown(route *route.Route) {
n := len(route.Hops)
// If this is a direct payment, the destination must be at fault.
if n == 1 {
i.failNode(route, n)
i.finalFailureReason = &reasonError
return
}
// Otherwise penalize all channels in the route to make sure the
// responsible node is at least hit too. We even penalize the connection
// to our own peer, because that peer could also be responsible.
i.failPairRange(route, 0, n-1)
}
// failNode marks the node indicated by idx in the route as failed. This
// function intentionally panics when the self node is failed.
func (i *interpretedResult) failNode(rt *route.Route, idx int) {
i.nodeFailure = &rt.Hops[idx-1].PubKeyBytes
}
// failPairRange marks the node pairs from node fromIdx to node toIdx as failed
// in both direction.
func (i *interpretedResult) failPairRange(
rt *route.Route, fromIdx, toIdx int) {
for idx := fromIdx; idx <= toIdx; idx++ {
i.failPair(rt, idx)
}
}
// failPair marks a pair as failed in both directions.
func (i *interpretedResult) failPair(
rt *route.Route, idx int) {
pair, _ := getPair(rt, idx)
// Report pair in both directions without a minimum penalization amount.
i.pairResults[pair] = 0
i.pairResults[pair.Reverse()] = 0
}
// failPairBalance marks a pair as failed with a minimum penalization amount.
func (i *interpretedResult) failPairBalance(
rt *route.Route, channelIdx int) {
pair, amt := getPair(rt, channelIdx)
i.pairResults[pair] = amt
}
// getPair returns a node pair from the route and the amount passed between that
// pair.
func getPair(rt *route.Route, channelIdx int) (DirectedNodePair,
lnwire.MilliSatoshi) {
nodeTo := rt.Hops[channelIdx].PubKeyBytes
var (
nodeFrom route.Vertex
amt lnwire.MilliSatoshi
)
if channelIdx == 0 {
nodeFrom = rt.SourcePubKey
amt = rt.TotalAmount
} else {
nodeFrom = rt.Hops[channelIdx-1].PubKeyBytes
amt = rt.Hops[channelIdx-1].AmtToForward
}
pair := NewDirectedNodePair(nodeFrom, nodeTo)
return pair, amt
}

@ -0,0 +1,146 @@
package routing
import (
"reflect"
"testing"
"github.com/lightningnetwork/lnd/lnwire"
"github.com/lightningnetwork/lnd/routing/route"
)
var (
hops = []route.Vertex{
{1, 0}, {1, 1}, {1, 2}, {1, 3}, {1, 4},
}
routeOneHop = route.Route{
SourcePubKey: hops[0],
TotalAmount: 100,
Hops: []*route.Hop{
{PubKeyBytes: hops[1], AmtToForward: 99},
},
}
routeTwoHop = route.Route{
SourcePubKey: hops[0],
TotalAmount: 100,
Hops: []*route.Hop{
{PubKeyBytes: hops[1], AmtToForward: 99},
{PubKeyBytes: hops[2], AmtToForward: 97},
},
}
routeFourHop = route.Route{
SourcePubKey: hops[0],
TotalAmount: 100,
Hops: []*route.Hop{
{PubKeyBytes: hops[1], AmtToForward: 99},
{PubKeyBytes: hops[2], AmtToForward: 97},
{PubKeyBytes: hops[3], AmtToForward: 94},
{PubKeyBytes: hops[4], AmtToForward: 90},
},
}
)
func getTestPair(from, to int) DirectedNodePair {
return NewDirectedNodePair(hops[from], hops[to])
}
type resultTestCase struct {
name string
route *route.Route
failureSrcIdx int
failure lnwire.FailureMessage
expectedResult *interpretedResult
}
var resultTestCases = []resultTestCase{
// Tests that a temporary channel failure result is properly
// interpreted.
{
name: "fail",
route: &routeTwoHop,
failureSrcIdx: 1,
failure: lnwire.NewTemporaryChannelFailure(nil),
expectedResult: &interpretedResult{
pairResults: map[DirectedNodePair]lnwire.MilliSatoshi{
getTestPair(1, 2): 99,
},
},
},
// Tests that a expiry too soon failure result is properly interpreted.
{
name: "fail expiry too soon",
route: &routeFourHop,
failureSrcIdx: 3,
failure: lnwire.NewExpiryTooSoon(lnwire.ChannelUpdate{}),
expectedResult: &interpretedResult{
pairResults: map[DirectedNodePair]lnwire.MilliSatoshi{
getTestPair(0, 1): 0,
getTestPair(1, 0): 0,
getTestPair(1, 2): 0,
getTestPair(2, 1): 0,
getTestPair(2, 3): 0,
getTestPair(3, 2): 0,
},
},
},
// Tests a malformed htlc from a direct peer.
{
name: "fail malformed htlc from direct peer",
route: &routeTwoHop,
failureSrcIdx: 0,
failure: lnwire.NewInvalidOnionKey(nil),
expectedResult: &interpretedResult{
nodeFailure: &hops[1],
},
},
// Tests a malformed htlc from a direct peer that is also the final
// destination.
{
name: "fail malformed htlc from direct final peer",
route: &routeOneHop,
failureSrcIdx: 0,
failure: lnwire.NewInvalidOnionKey(nil),
expectedResult: &interpretedResult{
finalFailureReason: &reasonError,
nodeFailure: &hops[1],
},
},
}
// TestResultInterpretation executes a list of test cases that test the result
// interpretation logic.
func TestResultInterpretation(t *testing.T) {
emptyResults := make(map[DirectedNodePair]lnwire.MilliSatoshi)
for _, testCase := range resultTestCases {
t.Run(testCase.name, func(t *testing.T) {
i := interpretResult(
testCase.route, &testCase.failureSrcIdx,
testCase.failure,
)
expected := testCase.expectedResult
// Replace nil pairResults with empty map to satisfy
// DeepEqual.
if expected.pairResults == nil {
expected.pairResults = emptyResults
}
if !reflect.DeepEqual(i, expected) {
t.Fatal("unexpected result")
}
})
}
}

@ -179,8 +179,8 @@ type MissionController interface {
// whether this error is a final error and no further payment attempts
// need to be made.
ReportPaymentFail(paymentID uint64, rt *route.Route,
failureSourceIdx *int, failure lnwire.FailureMessage) (bool,
channeldb.FailureReason, error)
failureSourceIdx *int, failure lnwire.FailureMessage) (
*channeldb.FailureReason, error)
// GetProbability is expected to return the success probability of a
// payment from fromNode along edge.
@ -1887,23 +1887,25 @@ func (r *ChannelRouter) tryApplyChannelUpdate(rt *route.Route,
// to continue with an alternative route. This is indicated by the boolean
// return value.
func (r *ChannelRouter) processSendError(paymentID uint64, rt *route.Route,
sendErr error) (bool, channeldb.FailureReason) {
sendErr error) *channeldb.FailureReason {
reportFail := func(srcIdx *int, msg lnwire.FailureMessage) (bool,
channeldb.FailureReason) {
internalErrorReason := channeldb.FailureReasonError
reportFail := func(srcIdx *int,
msg lnwire.FailureMessage) *channeldb.FailureReason {
// Report outcome to mission control.
final, reason, err := r.cfg.MissionControl.ReportPaymentFail(
reason, err := r.cfg.MissionControl.ReportPaymentFail(
paymentID, rt, srcIdx, msg,
)
if err != nil {
log.Errorf("Error reporting payment result to mc: %v",
err)
return true, channeldb.FailureReasonError
return &internalErrorReason
}
return final, reason
return reason
}
if sendErr == htlcswitch.ErrUnreadableFailureMessage {
@ -1915,7 +1917,7 @@ func (r *ChannelRouter) processSendError(paymentID uint64, rt *route.Route,
// trying.
fErr, ok := sendErr.(*htlcswitch.ForwardingError)
if !ok {
return true, channeldb.FailureReasonError
return &internalErrorReason
}
failureMessage := fErr.FailureMessage
@ -1928,7 +1930,7 @@ func (r *ChannelRouter) processSendError(paymentID uint64, rt *route.Route,
rt, failureSourceIdx, failureMessage,
)
if err != nil {
return true, channeldb.FailureReasonError
return &internalErrorReason
}
}

@ -274,7 +274,7 @@ func TestSendPaymentRouteFailureFallback(t *testing.T) {
var payHash [32]byte
paymentAmt := lnwire.NewMSatFromSatoshis(1000)
payment := LightningPayment{
Target: ctx.aliases["luoji"],
Target: ctx.aliases["sophon"],
Amount: paymentAmt,
FeeLimit: noFeeLimit,
PaymentHash: payHash,
@ -284,16 +284,16 @@ func TestSendPaymentRouteFailureFallback(t *testing.T) {
copy(preImage[:], bytes.Repeat([]byte{9}, 32))
// We'll modify the SendToSwitch method that's been set within the
// router's configuration to ignore the path that has luo ji as the
// router's configuration to ignore the path that has son goku as the
// first hop. This should force the router to instead take the
// available two hop path (through satoshi).
// the more costly path (through pham nuwen).
ctx.router.cfg.Payer.(*mockPaymentAttemptDispatcher).setPaymentResult(
func(firstHop lnwire.ShortChannelID) ([32]byte, error) {
roasbeefLuoji := lnwire.NewShortChanIDFromInt(689530843)
if firstHop == roasbeefLuoji {
roasbeefSongoku := lnwire.NewShortChanIDFromInt(12345)
if firstHop == roasbeefSongoku {
return [32]byte{}, &htlcswitch.ForwardingError{
FailureSourceIdx: 0,
FailureSourceIdx: 1,
// TODO(roasbeef): temp node failure should be?
FailureMessage: &lnwire.FailTemporaryChannelFailure{},
}
@ -302,7 +302,7 @@ func TestSendPaymentRouteFailureFallback(t *testing.T) {
return preImage, nil
})
// Send off the payment request to the router, route through satoshi
// Send off the payment request to the router, route through pham nuwen
// should've been selected as a fall back and succeeded correctly.
paymentPreImage, route, err := ctx.router.SendPayment(&payment)
if err != nil {
@ -321,10 +321,10 @@ func TestSendPaymentRouteFailureFallback(t *testing.T) {
preImage[:], paymentPreImage[:])
}
// The route should have satoshi as the first hop.
if route.Hops[0].PubKeyBytes != ctx.aliases["satoshi"] {
// The route should have pham nuwen as the first hop.
if route.Hops[0].PubKeyBytes != ctx.aliases["phamnuwen"] {
t.Fatalf("route should go through satoshi as first hop, "+
t.Fatalf("route should go through phamnuwen as first hop, "+
"instead passes through: %v",
getAliasFromPubKey(route.Hops[0].PubKeyBytes,
ctx.aliases))
@ -743,7 +743,7 @@ func TestSendPaymentErrorPathPruning(t *testing.T) {
var payHash [32]byte
paymentAmt := lnwire.NewMSatFromSatoshis(1000)
payment := LightningPayment{
Target: ctx.aliases["luoji"],
Target: ctx.aliases["sophon"],
Amount: paymentAmt,
FeeLimit: noFeeLimit,
PaymentHash: payHash,
@ -752,32 +752,29 @@ func TestSendPaymentErrorPathPruning(t *testing.T) {
var preImage [32]byte
copy(preImage[:], bytes.Repeat([]byte{9}, 32))
roasbeefLuoji := lnwire.NewShortChanIDFromInt(689530843)
roasbeefSongoku := lnwire.NewShortChanIDFromInt(12345)
roasbeefPhanNuwen := lnwire.NewShortChanIDFromInt(999991)
// First, we'll modify the SendToSwitch method to return an error
// indicating that the channel from roasbeef to luoji is not operable
// indicating that the channel from roasbeef to son goku is not operable
// with an UnknownNextPeer.
//
// TODO(roasbeef): filtering should be intelligent enough so just not
// go through satoshi at all at this point.
ctx.router.cfg.Payer.(*mockPaymentAttemptDispatcher).setPaymentResult(
func(firstHop lnwire.ShortChannelID) ([32]byte, error) {
if firstHop == roasbeefLuoji {
if firstHop == roasbeefSongoku {
// We'll first simulate an error from the first
// outgoing link to simulate the channel from luo ji to
// roasbeef not having enough capacity.
// hop to simulate the channel from songoku to
// sophon not having enough capacity.
return [32]byte{}, &htlcswitch.ForwardingError{
FailureSourceIdx: 0,
FailureSourceIdx: 1,
FailureMessage: &lnwire.FailTemporaryChannelFailure{},
}
}
// Next, we'll create an error from satoshi to indicate
// that the luoji node is not longer online, which should
// prune out the rest of the routes.
roasbeefSatoshi := lnwire.NewShortChanIDFromInt(2340213491)
if firstHop == roasbeefSatoshi {
// Next, we'll create an error from phan nuwen to
// indicate that the sophon node is not longer online,
// which should prune out the rest of the routes.
if firstHop == roasbeefPhanNuwen {
return [32]byte{}, &htlcswitch.ForwardingError{
FailureSourceIdx: 1,
FailureMessage: &lnwire.FailUnknownNextPeer{},
@ -804,15 +801,14 @@ func TestSendPaymentErrorPathPruning(t *testing.T) {
ctx.router.cfg.MissionControl.(*MissionControl).ResetHistory()
// Next, we'll modify the SendToSwitch method to indicate that luo ji
// wasn't originally online. This should also halt the send all
// together as all paths contain luoji and he can't be reached.
// Next, we'll modify the SendToSwitch method to indicate that the
// connection between songoku and isn't up.
ctx.router.cfg.Payer.(*mockPaymentAttemptDispatcher).setPaymentResult(
func(firstHop lnwire.ShortChannelID) ([32]byte, error) {
if firstHop == roasbeefLuoji {
if firstHop == roasbeefSongoku {
return [32]byte{}, &htlcswitch.ForwardingError{
FailureSourceIdx: 0,
FailureSourceIdx: 1,
FailureMessage: &lnwire.FailUnknownNextPeer{},
}
}
@ -821,14 +817,14 @@ func TestSendPaymentErrorPathPruning(t *testing.T) {
})
// This shouldn't return an error, as we'll make a payment attempt via
// the satoshi channel based on the assumption that there might be an
// intermittent issue with the roasbeef <-> lioji channel.
// the pham nuwen channel based on the assumption that there might be an
// intermittent issue with the songoku <-> sophon channel.
paymentPreImage, rt, err := ctx.router.SendPayment(&payment)
if err != nil {
t.Fatalf("unable send payment: %v", err)
}
// This path should go: roasbeef -> satoshi -> luoji
// This path should go: roasbeef -> pham nuwen -> sophon
if len(rt.Hops) != 2 {
t.Fatalf("incorrect route length: expected %v got %v", 2,
len(rt.Hops))
@ -837,9 +833,9 @@ func TestSendPaymentErrorPathPruning(t *testing.T) {
t.Fatalf("incorrect preimage used: expected %x got %x",
preImage[:], paymentPreImage[:])
}
if rt.Hops[0].PubKeyBytes != ctx.aliases["satoshi"] {
if rt.Hops[0].PubKeyBytes != ctx.aliases["phamnuwen"] {
t.Fatalf("route should go through satoshi as first hop, "+
t.Fatalf("route should go through phamnuwen as first hop, "+
"instead passes through: %v",
getAliasFromPubKey(rt.Hops[0].PubKeyBytes,
ctx.aliases))
@ -853,12 +849,12 @@ func TestSendPaymentErrorPathPruning(t *testing.T) {
ctx.router.cfg.Payer.(*mockPaymentAttemptDispatcher).setPaymentResult(
func(firstHop lnwire.ShortChannelID) ([32]byte, error) {
if firstHop == roasbeefLuoji {
if firstHop == roasbeefSongoku {
// We'll first simulate an error from the first
// outgoing link to simulate the channel from luo ji to
// roasbeef not having enough capacity.
return [32]byte{}, &htlcswitch.ForwardingError{
FailureSourceIdx: 0,
FailureSourceIdx: 1,
FailureMessage: &lnwire.FailTemporaryChannelFailure{},
}
}
@ -886,9 +882,9 @@ func TestSendPaymentErrorPathPruning(t *testing.T) {
}
// The route should have satoshi as the first hop.
if rt.Hops[0].PubKeyBytes != ctx.aliases["satoshi"] {
if rt.Hops[0].PubKeyBytes != ctx.aliases["phamnuwen"] {
t.Fatalf("route should go through satoshi as first hop, "+
t.Fatalf("route should go through phamnuwen as first hop, "+
"instead passes through: %v",
getAliasFromPubKey(rt.Hops[0].PubKeyBytes,
ctx.aliases))