routing: penalize node-level failures harder

Previously we only penalized the outgoing connections of a failing node. This turned out not to be sufficient, because the next route sometimes went into the same failing node again to try a different outgoing connection that wasn't yet known to mission control and therefore not penalized before.
2020-01-21 09:02:25 +01:00 · 2020-01-21 09:02:25 +01:00 · ef28d2aaed
commit ef28d2aaed
parent ad0a89b844
4 changed files with 26 additions and 15 deletions
--- a/routing/missioncontrol.go
+++ b/routing/missioncontrol.go
@ -333,21 +333,21 @@ func (m *MissionControl) setLastPairResult(fromNode, toNode route.Vertex,
 	nodePairs[toNode] = current
 }

-// setAllFail stores a fail result for all known connection of the given node.
-func (m *MissionControl) setAllFail(fromNode route.Vertex,
+// setAllFail stores a fail result for all known connections to and from the
+// given node.
+func (m *MissionControl) setAllFail(node route.Vertex,
 	timestamp time.Time) {

-	nodePairs, ok := m.lastPairResult[fromNode]
-	if !ok {
-		return
-	}
-
-	for connection := range nodePairs {
-		nodePairs[connection] = TimedPairResult{
+	for fromNode, nodePairs := range m.lastPairResult {
+		for toNode := range nodePairs {
+			if fromNode == node || toNode == node {
+				nodePairs[toNode] = TimedPairResult{
 					FailTime: timestamp,
 				}
 			}
 		}
+	}
+}

 // requestSecondChance checks whether the node fromNode can have a second chance
 // at providing a channel update for its channel with toNode.
--- a/routing/missioncontrol_test.go
+++ b/routing/missioncontrol_test.go
@ -186,8 +186,8 @@ func TestMissionControl(t *testing.T) {
 	// Check whether history snapshot looks sane.
 	history := ctx.mc.GetHistorySnapshot()

-	if len(history.Pairs) != 3 {
-		t.Fatalf("expected 3 pairs, but got %v", len(history.Pairs))
+	if len(history.Pairs) != 4 {
+		t.Fatalf("expected 4 pairs, but got %v", len(history.Pairs))
 	}

 	// Test reporting a success.
--- a/routing/result_interpretation.go
+++ b/routing/result_interpretation.go
@ -424,11 +424,13 @@ func (i *interpretedResult) failNode(rt *route.Route, idx int) {
 	// Mark the incoming connection as failed for the node. We intent to
 	// penalize as much as we can for a node level failure, including future
 	// outgoing traffic for this connection. The pair as it is returned by
-	// getPair is directed towards the failed node. Therefore we first
-	// reverse the pair. We don't want to affect the score of the node
-	// sending towards the failing node.
+	// getPair is penalized in the original and the reversed direction. Note
+	// that this will also affect the score of the failing node's peers.
+	// This is necessary to prevent future routes from keep going into the
+	// same node again.
 	incomingChannelIdx := idx - 1
 	inPair, _ := getPair(rt, incomingChannelIdx)
+	i.pairResults[inPair] = failPairResult(0)
 	i.pairResults[inPair.Reverse()] = failPairResult(0)

 	// If not the ultimate node, mark the outgoing connection as failed for
@ -437,6 +439,7 @@ func (i *interpretedResult) failNode(rt *route.Route, idx int) {
 		outgoingChannelIdx := idx
 		outPair, _ := getPair(rt, outgoingChannelIdx)
 		i.pairResults[outPair] = failPairResult(0)
+		i.pairResults[outPair.Reverse()] = failPairResult(0)
 	}
 }

--- a/routing/result_interpretation_test.go
+++ b/routing/result_interpretation_test.go
@ -165,6 +165,8 @@ var resultTestCases = []resultTestCase{
 			pairResults: map[DirectedNodePair]pairResult{
 				getTestPair(1, 0): failPairResult(0),
 				getTestPair(1, 2): failPairResult(0),
+				getTestPair(0, 1): failPairResult(0),
+				getTestPair(2, 1): failPairResult(0),
 			},
 		},
 	},
@ -182,6 +184,7 @@ var resultTestCases = []resultTestCase{
 			nodeFailure:        &hops[1],
 			pairResults: map[DirectedNodePair]pairResult{
 				getTestPair(1, 0): failPairResult(0),
+				getTestPair(0, 1): failPairResult(0),
 			},
 		},
 	},
@ -233,6 +236,7 @@ var resultTestCases = []resultTestCase{
 					amt:     97,
 				},
 				getTestPair(4, 3): {},
+				getTestPair(3, 4): {},
 			},
 			finalFailureReason: &reasonError,
 			nodeFailure:        &hops[4],
@ -257,6 +261,7 @@ var resultTestCases = []resultTestCase{
 					amt:     99,
 				},
 				getTestPair(3, 2): {},
+				getTestPair(2, 3): {},
 			},
 			finalFailureReason: &reasonError,
 			nodeFailure:        &hops[3],
@ -284,6 +289,8 @@ var resultTestCases = []resultTestCase{
 				},
 				getTestPair(3, 2): {},
 				getTestPair(3, 4): {},
+				getTestPair(2, 3): {},
+				getTestPair(4, 3): {},
 			},
 			nodeFailure: &hops[3],
 		},
@ -301,6 +308,7 @@ var resultTestCases = []resultTestCase{
 		expectedResult: &interpretedResult{
 			pairResults: map[DirectedNodePair]pairResult{
 				getTestPair(1, 0): {},
+				getTestPair(0, 1): {},
 			},
 			finalFailureReason: &reasonError,
 			nodeFailure:        &hops[1],