chanfitness: cool down flap count for rate limiting

Since we store all-time flap count for a peer, we add a cooldown factor which will discount poor flap counts in the past. This is only applied to peers that have not flapped for at least a cooldown period, so that we do not downgrade our rate limiting for badly behaved peers.
2020-09-08 13:47:22 +02:00 · 2020-09-08 13:47:22 +02:00 · 6cf66aea47
commit 6cf66aea47
parent a550ca3d64
4 changed files with 155 additions and 5 deletions
--- a/chanfitness/chanevent.go
+++ b/chanfitness/chanevent.go
@ -117,6 +117,16 @@ func newChannelInfo(openedAt time.Time) *channelInfo {
 func (p *peerLog) onlineEvent(online bool) {
 	eventTime := p.clock.Now()

+	// If we have a non-nil last flap time, potentially apply a cooldown
+	// factor to the peer's flap count before we rate limit it. This allows
+	// us to decrease the penalty for historical flaps over time, provided
+	// the peer has not flapped for a while.
+	if p.lastFlap != nil {
+		p.flapCount = cooldownFlapCount(
+			p.clock.Now(), p.flapCount, *p.lastFlap,
+		)
+	}
+
 	// Record flap count information and online state regardless of whether
 	// we have any channels open with this peer.
 	p.flapCount++
--- a/chanfitness/chanevent_test.go
+++ b/chanfitness/chanevent_test.go
@ -204,6 +204,47 @@ func TestRateLimitAdd(t *testing.T) {
 		require.Equal(t, []*event{peerEvent}, peerLog.onlineEvents)
 		require.Equal(t, nextEvent, peerLog.stagedEvent)
 	}
+
+	// Now, we test the case where a peer's flap count is cooled down
+	// because it has not flapped for a while. Set our peer's flap count so
+	// that we fall within our second rate limiting tier and assert that we
+	// are at this level.
+	peerLog.flapCount = rateLimitScale + 1
+	rateLimit := getRateLimit(peerLog.flapCount)
+	require.Equal(t, rateLimits[1], rateLimit)
+
+	// Progress our clock to the point where we will have our flap count
+	// cooled.
+	newNow = mockedClock.Now().Add(flapCountCooldownPeriod)
+	mockedClock.SetTime(newNow)
+
+	// Add an online event, and expect it to be staged.
+	onlineEvent := &event{
+		timestamp: newNow,
+		eventType: peerOnlineEvent,
+	}
+	peerLog.onlineEvent(true)
+	require.Equal(t, onlineEvent, peerLog.stagedEvent)
+
+	// Progress our clock by the rate limit level that we will be on if
+	// our flap rate is cooled down to a lower level.
+	newNow = mockedClock.Now().Add(rateLimits[0] + 1)
+	mockedClock.SetTime(newNow)
+
+	// Add another event. We expect this event to be staged and our previous
+	// event to be flushed to the event log (because our cooldown has been
+	// applied).
+	offlineEvent := &event{
+		timestamp: newNow,
+		eventType: peerOfflineEvent,
+	}
+	peerLog.onlineEvent(false)
+	require.Equal(t, offlineEvent, peerLog.stagedEvent)
+
+	flushedEventIdx := len(peerLog.onlineEvents) - 1
+	require.Equal(
+		t, onlineEvent, peerLog.onlineEvents[flushedEventIdx],
+	)
 }

 // TestGetOnlinePeriod tests the getOnlinePeriod function. It tests the case
--- a/chanfitness/rate_limit.go
+++ b/chanfitness/rate_limit.go
@ -1,11 +1,25 @@
 package chanfitness

-import "time"
+import (
+	"math"
+	"time"
+)

-// rateLimitScale is the number of events we allow per rate limited tier.
-// Increasing this value makes our rate limiting more lenient, decreasing it
-// makes us less lenient.
-const rateLimitScale = 200
+const (
+	// rateLimitScale is the number of events we allow per rate limited
+	// tier. Increasing this value makes our rate limiting more lenient,
+	// decreasing it makes us less lenient.
+	rateLimitScale = 200
+
+	// flapCountCooldownFactor is the factor by which we decrease a peer's
+	// flap count if they have not flapped for the cooldown period.
+	flapCountCooldownFactor = 0.95
+
+	// flapCountCooldownPeriod is the amount of time that we require a peer
+	// has not flapped for before we reduce their all time flap count using
+	// our cooldown factor.
+	flapCountCooldownPeriod = time.Hour * 8
+)

 // rateLimits is the set of rate limit tiers we apply to our peers based on
 // their flap count. A peer can be placed in their tier by dividing their flap
@ -35,3 +49,34 @@ func getRateLimit(flapCount int) time.Duration {

 	return rateLimits[tier]
 }
+
+// cooldownFlapCount takes a timestamped flap count, and returns its value
+// scaled down by our cooldown factor if at least our cooldown period has
+// elapsed since the peer last flapped. We do this because we store all-time
+// flap count for peers, and want to allow downgrading of peers that have not
+// flapped for a long time.
+func cooldownFlapCount(now time.Time, flapCount int,
+	lastFlap time.Time) int {
+
+	// Calculate time since our last flap, and the number of times we need
+	// to apply our cooldown factor.
+	timeSinceFlap := now.Sub(lastFlap)
+
+	// If our cooldown period has not elapsed yet, we just return our flap
+	// count. We allow fractional cooldown periods once this period has
+	// elapsed, so we do not want to apply a fractional cooldown before the
+	// full cooldown period has elapsed.
+	if timeSinceFlap < flapCountCooldownPeriod {
+		return flapCount
+	}
+
+	// Get the factor by which we need to cooldown our flap count. If
+	// insufficient time has passed to cooldown our flap count. Use use a
+	// float so that we allow fractional cooldown periods.
+	cooldownPeriods := float64(timeSinceFlap) /
+		float64(flapCountCooldownPeriod)
+
+	effectiveFactor := math.Pow(flapCountCooldownFactor, cooldownPeriods)
+
+	return int(float64(flapCount) * effectiveFactor)
+}
--- a/chanfitness/rate_limit_test.go
+++ b/chanfitness/rate_limit_test.go
@ -49,3 +49,57 @@ func TestGetRateLimit(t *testing.T) {
 		})
 	}
 }
+
+// TestCooldownFlapCount tests cooldown of all time flap counts.
+func TestCooldownFlapCount(t *testing.T) {
+	tests := []struct {
+		name      string
+		flapCount int
+		lastFlap  time.Time
+		expected  int
+	}{
+		{
+			name:      "just flapped, do not cooldown",
+			flapCount: 1,
+			lastFlap:  testNow,
+			expected:  1,
+		},
+		{
+			name:      "period not elapsed, do not cooldown",
+			flapCount: 1,
+			lastFlap:  testNow.Add(flapCountCooldownPeriod / 2 * -1),
+			expected:  1,
+		},
+		{
+			name:      "rounded to 0",
+			flapCount: 1,
+			lastFlap:  testNow.Add(flapCountCooldownPeriod * -1),
+			expected:  0,
+		},
+		{
+			name:      "decreased to integer value",
+			flapCount: 10,
+			lastFlap:  testNow.Add(flapCountCooldownPeriod * -1),
+			expected:  9,
+		},
+		{
+			name:      "multiple cooldown periods",
+			flapCount: 10,
+			lastFlap:  testNow.Add(flapCountCooldownPeriod * -3),
+			expected:  8,
+		},
+	}
+
+	for _, test := range tests {
+		test := test
+
+		t.Run(test.name, func(t *testing.T) {
+			t.Parallel()
+
+			flapCount := cooldownFlapCount(
+				testNow, test.flapCount, test.lastFlap,
+			)
+			require.Equal(t, test.expected, flapCount)
+		})
+	}
+}