Merge pull request #2293 from halseth/unit-tests-weighted-choice

[autopilot] weighted choice unit tests and optimizations
2019-01-07 19:42:29 -08:00 · 2019-01-07 19:42:29 -08:00 · 21460c9e67
commit 21460c9e67
parent 0725fecc12 902d6edad2
3 changed files with 386 additions and 30 deletions
--- a/autopilot/agent.go
+++ b/autopilot/agent.go
@ -551,7 +551,7 @@ func (a *Agent) openChans(availableFunds btcutil.Amount, numChans uint32,

 	// Now use the score to make a weighted choice which
 	// nodes to attempt to open channels to.
-	chanCandidates, err := chooseN(int(numChans), scores)
+	chanCandidates, err := chooseN(numChans, scores)
 	if err != nil {
 		return fmt.Errorf("Unable to make weighted choice: %v",
 			err)
--- a/autopilot/choice.go
+++ b/autopilot/choice.go
@ -10,28 +10,23 @@ import (
 // weights left to choose from.
 var ErrNoPositive = errors.New("no positive weights left")

-// weightedChoice draws a random index from the map of channel candidates, with
-// a probability propotional to their score.
-func weightedChoice(s map[NodeID]*AttachmentDirective) (NodeID, error) {
-	// Calculate the sum of scores found in the map.
+// weightedChoice draws a random index from the slice of weights, with a
+// probability propotional to the weight at the given index.
+func weightedChoice(w []float64) (int, error) {
+	// Calculate the sum of weights.
 	var sum float64
-	for _, v := range s {
-		sum += v.Score
+	for _, v := range w {
+		sum += v
 	}

 	if sum <= 0 {
-		return NodeID{}, ErrNoPositive
+		return 0, ErrNoPositive
 	}

-	// Create a map of normalized scores such, that they sum to 1.0.
-	norm := make(map[NodeID]float64)
-	for k, v := range s {
-		norm[k] = v.Score / sum
-	}
-
-	// Pick a random number in the range [0.0, 1.0), and iterate the map
-	// until the number goes below 0. This means that each index is picked
-	// with a probablity equal to their normalized score.
+	// Pick a random number in the range [0.0, 1.0) and multiply it with
+	// the sum of weights. Then we'll iterate the weights until the number
+	// goes below 0. This means that each index is picked with a probablity
+	// equal to their normalized score.
 	//
 	// Example:
 	// Items with scores [1, 5, 2, 2]
@ -40,40 +35,52 @@ func weightedChoice(s map[NodeID]*AttachmentDirective) (NodeID, error) {
 	// in [0, 1.0]:
 	// [|-0.1-||-----0.5-----||--0.2--||--0.2--|]
 	// The following loop is now equivalent to "hitting" the intervals.
-	r := rand.Float64()
-	for k, v := range norm {
-		r -= v
+	r := rand.Float64() * sum
+	for i := range w {
+		r -= w[i]
 		if r <= 0 {
-			return k, nil
+			return i, nil
 		}
 	}
-	return NodeID{}, fmt.Errorf("unable to make choice")
+
+	return 0, fmt.Errorf("unable to make choice")
 }

 // chooseN picks at random min[n, len(s)] nodes if from the
 // AttachmentDirectives map, with a probability weighted by their score.
-func chooseN(n int, s map[NodeID]*AttachmentDirective) (
+func chooseN(n uint32, s map[NodeID]*AttachmentDirective) (
 	map[NodeID]*AttachmentDirective, error) {

-	// Keep a map of nodes not yet choosen.
-	rem := make(map[NodeID]*AttachmentDirective)
+	// Keep track of the number of nodes not yet chosen, in addition to
+	// their scores and NodeIDs.
+	rem := len(s)
+	scores := make([]float64, len(s))
+	nodeIDs := make([]NodeID, len(s))
+	i := 0
 	for k, v := range s {
-		rem[k] = v
+		scores[i] = v.Score
+		nodeIDs[i] = k
+		i++
 	}

 	// Pick a weighted choice from the remaining nodes as long as there are
 	// nodes left, and we haven't already picked n.
 	chosen := make(map[NodeID]*AttachmentDirective)
-	for len(chosen) < n && len(rem) > 0 {
-		choice, err := weightedChoice(rem)
+	for len(chosen) < int(n) && rem > 0 {
+		choice, err := weightedChoice(scores)
 		if err == ErrNoPositive {
 			return chosen, nil
 		} else if err != nil {
 			return nil, err
 		}

-		chosen[choice] = rem[choice]
-		delete(rem, choice)
+		nID := nodeIDs[choice]
+
+		chosen[nID] = s[nID]
+
+		// We set the score of the chosen node to 0, so it won't be
+		// picked the next iteration.
+		scores[choice] = 0
 	}

 	return chosen, nil
--- a/autopilot/choice_test.go
+++ b/autopilot/choice_test.go
@ -0,0 +1,349 @@
+package autopilot
+
+import (
+	"encoding/binary"
+	"math/rand"
+	"reflect"
+	"testing"
+	"testing/quick"
+)
+
+var (
+	nID1 = NodeID([33]byte{1})
+	nID2 = NodeID([33]byte{2})
+	nID3 = NodeID([33]byte{3})
+	nID4 = NodeID([33]byte{4})
+)
+
+// TestWeightedChoiceEmptyMap tests that passing in an empty slice of weights
+// returns an error.
+func TestWeightedChoiceEmptyMap(t *testing.T) {
+	t.Parallel()
+
+	var w []float64
+	_, err := weightedChoice(w)
+	if err != ErrNoPositive {
+		t.Fatalf("expected ErrNoPositive when choosing in "+
+			"empty map, instead got %v", err)
+	}
+}
+
+// singeNonZero is a type used to generate float64 slices with one non-zero
+// element.
+type singleNonZero []float64
+
+// Generate generates a value of type sinelNonZero to be used during
+// QuickTests.
+func (singleNonZero) Generate(rand *rand.Rand, size int) reflect.Value {
+	w := make([]float64, size)
+
+	// Pick a random index and set it to a random float.
+	i := rand.Intn(size)
+	w[i] = rand.Float64()
+
+	return reflect.ValueOf(w)
+}
+
+// TestWeightedChoiceSingleIndex tests that choosing randomly in a slice with
+// one positive element always returns that one index.
+func TestWeightedChoiceSingleIndex(t *testing.T) {
+	t.Parallel()
+
+	// Helper that returns the index of the non-zero element.
+	allButOneZero := func(weights []float64) (bool, int) {
+		var (
+			numZero   uint32
+			nonZeroEl int
+		)
+
+		for i, w := range weights {
+			if w != 0 {
+				numZero++
+				nonZeroEl = i
+			}
+		}
+
+		return numZero == 1, nonZeroEl
+	}
+
+	property := func(weights singleNonZero) bool {
+		// Make sure the generated slice has exactly one non-zero
+		// element.
+		conditionMet, nonZeroElem := allButOneZero(weights[:])
+		if !conditionMet {
+			return false
+		}
+
+		// Call weightedChoice and assert it picks the non-zero
+		// element.
+		choice, err := weightedChoice(weights[:])
+		if err != nil {
+			return false
+		}
+		return choice == nonZeroElem
+	}
+
+	if err := quick.Check(property, nil); err != nil {
+		t.Fatal(err)
+	}
+}
+
+// nonNegative is a type used to generate float64 slices with non-negative
+// elements.
+type nonNegative []float64
+
+// Generate generates a value of type nonNegative to be used during
+// QuickTests.
+func (nonNegative) Generate(rand *rand.Rand, size int) reflect.Value {
+	const precision = 100
+	w := make([]float64, size)
+
+	for i := range w {
+		r := rand.Float64()
+
+		// For very small weights it won't work to check deviation from
+		// expected value, so we set them to zero.
+		if r < 0.01*float64(size) {
+			r = 0
+		}
+		w[i] = float64(r)
+	}
+	return reflect.ValueOf(w)
+}
+
+func assertChoice(w []float64, iterations int) bool {
+	var sum float64
+	for _, v := range w {
+		sum += v
+	}
+
+	// Calculate the expected frequency of each choice.
+	expFrequency := make([]float64, len(w))
+	for i, ww := range w {
+		expFrequency[i] = ww / sum
+	}
+
+	chosen := make(map[int]int)
+	for i := 0; i < iterations; i++ {
+		res, err := weightedChoice(w)
+		if err != nil {
+			return false
+		}
+		chosen[res]++
+	}
+
+	// Since this is random we check that the number of times chosen is
+	// within 20% of the expected value.
+	totalChoices := 0
+	for i, f := range expFrequency {
+		exp := float64(iterations) * f
+		v := float64(chosen[i])
+		totalChoices += chosen[i]
+		expHigh := exp + exp/5
+		expLow := exp - exp/5
+		if v < expLow || v > expHigh {
+			return false
+		}
+	}
+
+	// The sum of choices must be exactly iterations of course.
+	if totalChoices != iterations {
+		return false
+	}
+	return true
+
+}
+
+// TestWeightedChoiceDistribution asserts that the weighted choice algorithm
+// chooses among indexes according to their scores.
+func TestWeightedChoiceDistribution(t *testing.T) {
+	const iterations = 100000
+
+	property := func(weights nonNegative) bool {
+		return assertChoice(weights, iterations)
+	}
+
+	if err := quick.Check(property, nil); err != nil {
+		t.Fatal(err)
+	}
+}
+
+// TestChooseNEmptyMap checks that chooseN returns an empty result when no
+// nodes are chosen among.
+func TestChooseNEmptyMap(t *testing.T) {
+	t.Parallel()
+
+	nodes := map[NodeID]*AttachmentDirective{}
+	property := func(n uint32) bool {
+		res, err := chooseN(n, nodes)
+		if err != nil {
+			return false
+		}
+
+		// Result should always be empty.
+		return len(res) == 0
+	}
+
+	if err := quick.Check(property, nil); err != nil {
+		t.Fatal(err)
+	}
+}
+
+// candidateMapVarLen is a type we'll use to generate maps of various lengths
+// up to 255 to be used during QuickTests.
+type candidateMapVarLen map[NodeID]*AttachmentDirective
+
+// Generate generates a value of type candidateMapVarLen to be used during
+// QuickTests.
+func (candidateMapVarLen) Generate(rand *rand.Rand, size int) reflect.Value {
+	nodes := make(map[NodeID]*AttachmentDirective)
+
+	// To avoid creating huge maps, we restrict them to max uint8 len.
+	n := uint8(rand.Uint32())
+
+	for i := uint8(0); i < n; i++ {
+		s := rand.Float64()
+
+		// We set small values to zero, to ensure we handle these
+		// correctly.
+		if s < 0.01 {
+			s = 0
+		}
+
+		var nID [33]byte
+		binary.BigEndian.PutUint32(nID[:], uint32(i))
+		nodes[nID] = &AttachmentDirective{
+			Score: s,
+		}
+	}
+
+	return reflect.ValueOf(nodes)
+}
+
+// TestChooseNMinimum test that chooseN returns the minimum of the number of
+// nodes we request and the number of positively scored nodes in the given map.
+func TestChooseNMinimum(t *testing.T) {
+	t.Parallel()
+
+	// Helper to count the number of positive scores in the given map.
+	numPositive := func(nodes map[NodeID]*AttachmentDirective) int {
+		cnt := 0
+		for _, v := range nodes {
+			if v.Score > 0 {
+				cnt++
+			}
+		}
+		return cnt
+	}
+
+	// We use let the type of n be uint8 to avoid generating huge numbers.
+	property := func(nodes candidateMapVarLen, n uint8) bool {
+		res, err := chooseN(uint32(n), nodes)
+		if err != nil {
+			return false
+		}
+
+		positive := numPositive(nodes)
+
+		// Result should always be the minimum of the number of nodes
+		// we wanted to select and the number of positively scored
+		// nodes in the map.
+		min := positive
+		if int(n) < min {
+			min = int(n)
+		}
+
+		if len(res) != min {
+			return false
+
+		}
+		return true
+	}
+
+	if err := quick.Check(property, nil); err != nil {
+		t.Fatal(err)
+	}
+}
+
+// TestChooseNSample sanity checks that nodes are picked by chooseN according
+// to their scores.
+func TestChooseNSample(t *testing.T) {
+	t.Parallel()
+
+	const numNodes = 500
+	const maxIterations = 100000
+	fifth := uint32(numNodes / 5)
+
+	nodes := make(map[NodeID]*AttachmentDirective)
+
+	// we make 5 buckets of nodes: 0, 0.1, 0.2, 0.4 and 0.8 score. We want
+	// to check that zero scores never gets chosen, while a doubling the
+	// score makes a node getting chosen about double the amount (this is
+	// true only when n <<< numNodes).
+	j := 2 * fifth
+	score := 0.1
+	for i := uint32(0); i < numNodes; i++ {
+
+		// Each time i surpasses j we double the score we give to the
+		// next fifth of nodes.
+		if i >= j {
+			score *= 2
+			j += fifth
+		}
+		s := score
+
+		// The first 1/5 of nodes we give a score of 0.
+		if i < fifth {
+			s = 0
+		}
+
+		var nID [33]byte
+		binary.BigEndian.PutUint32(nID[:], i)
+		nodes[nID] = &AttachmentDirective{
+			Score: s,
+		}
+	}
+
+	// For each value of N we'll check that the nodes are picked the
+	// expected number of times over time.
+	for _, n := range []uint32{1, 5, 10, 20, 50} {
+		// Since choosing more nodes will result in chooseN getting
+		// slower we decrease the number of iterations. This is okay
+		// since the variance in the total picks for a node will be
+		// lower when choosing more nodes each time.
+		iterations := maxIterations / n
+		count := make(map[NodeID]int)
+		for i := 0; i < int(iterations); i++ {
+			res, err := chooseN(n, nodes)
+			if err != nil {
+				t.Fatalf("failed choosing nodes: %v", err)
+			}
+
+			for nID := range res {
+				count[nID]++
+			}
+		}
+
+		// Sum the number of times a node in each score bucket was
+		// picked.
+		sums := make(map[float64]int)
+		for nID, s := range nodes {
+			sums[s.Score] += count[nID]
+		}
+
+		// The count of each bucket should be about double of the
+		// previous bucket.  Since this is all random, we check that
+		// the result is within 20% of the expected value.
+		for _, score := range []float64{0.2, 0.4, 0.8} {
+			cnt := sums[score]
+			half := cnt / 2
+			expLow := half - half/5
+			expHigh := half + half/5
+			if sums[score/2] < expLow || sums[score/2] > expHigh {
+				t.Fatalf("expected the nodes with score %v "+
+					"to be chosen about %v times, instead "+
+					"was %v", score/2, half, sums[score/2])
+			}
+		}
+	}
+}