autopilot: betweenness centrality using Brandes algo on simplifed graph

This commit adds betweenness centrality to the available node metrics. Betweenness centrality is a per node centrality measure which for an arbitrary node v equals to the sum of shortest paths going trough v divided by the number of all shortest paths for for each vertex pair k, s where k != s != v.
2019-12-20 12:54:59 +01:00 · 2019-12-20 12:54:59 +01:00 · 3fe9c70722
commit 3fe9c70722
parent be83d504f8
3 changed files with 434 additions and 0 deletions
--- a/autopilot/betweenness_centrality.go
+++ b/autopilot/betweenness_centrality.go
@ -0,0 +1,212 @@
+package autopilot
+
+// stack is a simple int stack to help with readability of Brandes'
+// betweenness centrality implementation below.
+type stack struct {
+	stack []int
+}
+
+func (s *stack) push(v int) {
+	s.stack = append(s.stack, v)
+}
+
+func (s *stack) top() int {
+	return s.stack[len(s.stack)-1]
+}
+
+func (s *stack) pop() {
+	s.stack = s.stack[:len(s.stack)-1]
+}
+
+func (s *stack) empty() bool {
+	return len(s.stack) == 0
+}
+
+// queue is a simple int queue to help with readability of Brandes'
+// betweenness centrality implementation below.
+type queue struct {
+	queue []int
+}
+
+func (q *queue) push(v int) {
+	q.queue = append(q.queue, v)
+}
+
+func (q *queue) front() int {
+	return q.queue[0]
+}
+
+func (q *queue) pop() {
+	q.queue = q.queue[1:]
+}
+
+func (q *queue) empty() bool {
+	return len(q.queue) == 0
+}
+
+// BetweennessCentrality is a NodeMetric that calculates node betweenness
+// centrality using Brandes' algorithm. Betweenness centrality for each node
+// is the number of shortest paths passing trough that node, not counting
+// shortest paths starting or ending at that node. This is a useful metric
+// to measure control of individual nodes over the whole network.
+type BetweennessCentrality struct {
+	// centrality stores original (not normalized) centrality values for
+	// each node in the graph.
+	centrality map[NodeID]float64
+
+	// min is the minimum centrality in the graph.
+	min float64
+
+	// max is the maximum centrality in the graph.
+	max float64
+}
+
+// NewBetweennessCentralityMetric creates a new BetweennessCentrality instance.
+func NewBetweennessCentralityMetric() *BetweennessCentrality {
+	return &BetweennessCentrality{}
+}
+
+// Name returns the name of the metric.
+func (bc *BetweennessCentrality) Name() string {
+	return "betweeness_centrality"
+}
+
+// betweennessCentrality is the core of Brandes' algorithm.
+// We first calculate the shortest paths from the start node s to all other
+// nodes with BFS, then update the betweenness centrality values by using
+// Brandes' dependency trick.
+// For detailed explanation please read:
+// https://www.cl.cam.ac.uk/teaching/1617/MLRD/handbook/brandes.html
+func betweennessCentrality(g *SimpleGraph, s int, centrality []float64) {
+	// pred[w] is the list of nodes that immediately precede w on a
+	// shortest path from s to t for each node t.
+	pred := make([][]int, len(g.Nodes))
+
+	// sigma[t] is the number of shortest paths between nodes s and t for
+	// each node t.
+	sigma := make([]int, len(g.Nodes))
+	sigma[s] = 1
+
+	// dist[t] holds the distance between s and t for each node t. We initialize
+	// this to -1 (meaning infinity) for each t != s.
+	dist := make([]int, len(g.Nodes))
+	for i := range dist {
+		dist[i] = -1
+	}
+
+	dist[s] = 0
+
+	var (
+		st stack
+		q  queue
+	)
+	q.push(s)
+
+	// BFS to calculate the shortest paths (sigma and pred)
+	// from s to t for each node t.
+	for !q.empty() {
+		v := q.front()
+		q.pop()
+		st.push(v)
+
+		for _, w := range g.Adj[v] {
+			// If distance from s to w is infinity (-1)
+			// then set it and enqueue w.
+			if dist[w] < 0 {
+				dist[w] = dist[v] + 1
+				q.push(w)
+			}
+
+			// If w is on a shortest path the update
+			// sigma and add v to w's predecessor list.
+			if dist[w] == dist[v]+1 {
+				sigma[w] += sigma[v]
+				pred[w] = append(pred[w], v)
+			}
+		}
+	}
+
+	// delta[v] is the ratio of the shortest paths between s and t that go
+	// through v and the total number of shortest paths between s and t.
+	// If we have delta then the betweenness centrality is simply the sum
+	// of delta[w] for each w != s.
+	delta := make([]float64, len(g.Nodes))
+
+	for !st.empty() {
+		w := st.top()
+		st.pop()
+
+		// pred[w] is the list of nodes that immediately precede w on a
+		// shortest path from s.
+		for _, v := range pred[w] {
+			// Update delta using Brandes' equation.
+			delta[v] += (float64(sigma[v]) / float64(sigma[w])) * (1.0 + delta[w])
+		}
+
+		if w != s {
+			// As noted above centrality is simply the sum
+			// of delta[w] for each w != s.
+			centrality[w] += delta[w]
+		}
+	}
+}
+
+// Refresh recaculates and stores centrality values.
+func (bc *BetweennessCentrality) Refresh(graph ChannelGraph) error {
+	cache, err := NewSimpleGraph(graph)
+	if err != nil {
+		return err
+	}
+
+	// TODO: parallelize updates to centrality.
+	centrality := make([]float64, len(cache.Nodes))
+	for node := range cache.Nodes {
+		betweennessCentrality(cache, node, centrality)
+	}
+
+	// Get min/max to be able to normalize
+	// centrality values between 0 and 1.
+	bc.min = 0
+	bc.max = 0
+	if len(centrality) > 0 {
+		for i := 1; i < len(centrality); i++ {
+			if centrality[i] < bc.min {
+				bc.min = centrality[i]
+			} else if centrality[i] > bc.max {
+				bc.max = centrality[i]
+			}
+		}
+	}
+
+	// Divide by two as this is an undirected graph.
+	bc.min /= 2.0
+	bc.max /= 2.0
+
+	bc.centrality = make(map[NodeID]float64)
+	for u, value := range centrality {
+		// Divide by two as this is an undirected graph.
+		bc.centrality[cache.Nodes[u]] = value / 2.0
+	}
+
+	return nil
+}
+
+// GetMetric returns the current centrality values for each node indexed
+// by node id.
+func (bc *BetweennessCentrality) GetMetric(normalize bool) map[NodeID]float64 {
+	// Normalization factor.
+	var z float64
+	if (bc.max - bc.min) > 0 {
+		z = 1.0 / (bc.max - bc.min)
+	}
+
+	centrality := make(map[NodeID]float64)
+	for k, v := range bc.centrality {
+		if normalize {
+			v = (v - bc.min) * z
+		}
+		centrality[k] = v
+	}
+
+	return centrality
+}
--- a/autopilot/betweenness_centrality_test.go
+++ b/autopilot/betweenness_centrality_test.go
@ -0,0 +1,156 @@
+package autopilot
+
+import (
+	"testing"
+
+	"github.com/btcsuite/btcd/btcec"
+	"github.com/btcsuite/btcutil"
+)
+
+// Tests that empty graph results in empty centrality result.
+func TestBetweennessCentralityEmptyGraph(t *testing.T) {
+	centralityMetric := NewBetweennessCentralityMetric()
+
+	for _, chanGraph := range chanGraphs {
+		graph, cleanup, err := chanGraph.genFunc()
+		success := t.Run(chanGraph.name, func(t1 *testing.T) {
+			if err != nil {
+				t1.Fatalf("unable to create graph: %v", err)
+			}
+			if cleanup != nil {
+				defer cleanup()
+			}
+
+			if err := centralityMetric.Refresh(graph); err != nil {
+				t.Fatalf("unexpected failure during metric refresh: %v", err)
+			}
+
+			centrality := centralityMetric.GetMetric(false)
+			if len(centrality) > 0 {
+				t.Fatalf("expected empty metric, got: %v", len(centrality))
+			}
+
+			centrality = centralityMetric.GetMetric(true)
+			if len(centrality) > 0 {
+				t.Fatalf("expected empty metric, got: %v", len(centrality))
+			}
+
+		})
+		if !success {
+			break
+		}
+	}
+}
+
+// testGraphDesc is a helper type to describe a test graph.
+type testGraphDesc struct {
+	nodes int
+	edges map[int][]int
+}
+
+// buildTestGraph builds a test graph from a passed graph desriptor.
+func buildTestGraph(t *testing.T,
+	graph testGraph, desc testGraphDesc) map[int]*btcec.PublicKey {
+
+	nodes := make(map[int]*btcec.PublicKey)
+
+	for i := 0; i < desc.nodes; i++ {
+		key, err := graph.addRandNode()
+		if err != nil {
+			t.Fatalf("cannot create random node")
+		}
+
+		nodes[i] = key
+	}
+
+	const chanCapacity = btcutil.SatoshiPerBitcoin
+	for u, neighbors := range desc.edges {
+		for _, v := range neighbors {
+			_, _, err := graph.addRandChannel(nodes[u], nodes[v], chanCapacity)
+			if err != nil {
+				t.Fatalf("unexpected error while adding random channel: %v", err)
+			}
+		}
+	}
+
+	return nodes
+}
+
+// Test betweenness centrality calculating using an example graph.
+func TestBetweennessCentralityWithNonEmptyGraph(t *testing.T) {
+	graphDesc := testGraphDesc{
+		nodes: 9,
+		edges: map[int][]int{
+			0: {1, 2, 3},
+			1: {2},
+			2: {3},
+			3: {4, 5},
+			4: {5, 6, 7},
+			5: {6, 7},
+			6: {7, 8},
+		},
+	}
+
+	tests := []struct {
+		name       string
+		normalize  bool
+		centrality []float64
+	}{
+		{
+			normalize: true,
+			centrality: []float64{
+				0.2, 0.0, 0.2, 1.0, 0.4, 0.4, 7.0 / 15.0, 0.0, 0.0,
+			},
+		},
+		{
+			normalize: false,
+			centrality: []float64{
+				3.0, 0.0, 3.0, 15.0, 6.0, 6.0, 7.0, 0.0, 0.0,
+			},
+		},
+	}
+
+	for _, chanGraph := range chanGraphs {
+		graph, cleanup, err := chanGraph.genFunc()
+		if err != nil {
+			t.Fatalf("unable to create graph: %v", err)
+		}
+		if cleanup != nil {
+			defer cleanup()
+		}
+
+		success := t.Run(chanGraph.name, func(t1 *testing.T) {
+			centralityMetric := NewBetweennessCentralityMetric()
+			graphNodes := buildTestGraph(t1, graph, graphDesc)
+
+			if err := centralityMetric.Refresh(graph); err != nil {
+				t1.Fatalf("error while calculating betweeness centrality")
+			}
+			for _, test := range tests {
+				test := test
+				centrality := centralityMetric.GetMetric(test.normalize)
+
+				if len(centrality) != graphDesc.nodes {
+					t.Fatalf("expected %v values, got: %v",
+						graphDesc.nodes, len(centrality))
+				}
+
+				for node, nodeCentrality := range test.centrality {
+					nodeID := NewNodeID(graphNodes[node])
+					calculatedCentrality, ok := centrality[nodeID]
+					if !ok {
+						t1.Fatalf("no result for node: %x (%v)", nodeID, node)
+					}
+
+					if nodeCentrality != calculatedCentrality {
+						t1.Errorf("centrality for node: %v should be %v, got: %v",
+							node, test.centrality[node], calculatedCentrality)
+					}
+				}
+			}
+		})
+		if !success {
+			break
+		}
+	}
+}
--- a/autopilot/simple_graph.go
+++ b/autopilot/simple_graph.go
@ -0,0 +1,66 @@
+package autopilot
+
+// SimpleGraph stores a simplifed adj graph of a channel graph to speed
+// up graph processing by eliminating all unnecessary hashing and map access.
+type SimpleGraph struct {
+	// Nodes is a map from node index to NodeID.
+	Nodes []NodeID
+
+	// Adj stores nodes and neighbors in an adjacency list.
+	Adj [][]int
+}
+
+// NewSimpleGraph creates a simplified graph from the current channel graph.
+// Returns an error if the channel graph iteration fails due to underlying
+// failure.
+func NewSimpleGraph(g ChannelGraph) (*SimpleGraph, error) {
+	nodes := make(map[NodeID]int)
+	adj := make(map[int][]int)
+	nextIndex := 0
+
+	// getNodeIndex returns the integer index of the passed node.
+	// The returned index is then used to create a simplifed adjacency list
+	// where each node is identified by its index instead of its pubkey, and
+	// also to create a mapping from node index to node pubkey.
+	getNodeIndex := func(node Node) int {
+		key := NodeID(node.PubKey())
+		nodeIndex, ok := nodes[key]
+
+		if !ok {
+			nodes[key] = nextIndex
+			nodeIndex = nextIndex
+			nextIndex++
+		}
+
+		return nodeIndex
+	}
+
+	// Iterate over each node and each channel and update the adj and the node
+	// index.
+	err := g.ForEachNode(func(node Node) error {
+		u := getNodeIndex(node)
+
+		return node.ForEachChannel(func(edge ChannelEdge) error {
+			v := getNodeIndex(edge.Peer)
+
+			adj[u] = append(adj[u], v)
+			return nil
+		})
+	})
+	if err != nil {
+		return nil, err
+	}
+
+	graph := &SimpleGraph{
+		Nodes: make([]NodeID, len(nodes)),
+		Adj:   make([][]int, len(nodes)),
+	}
+
+	// Fill the adj and the node index to node pubkey mapping.
+	for nodeID, nodeIndex := range nodes {
+		graph.Adj[nodeIndex] = adj[nodeIndex]
+		graph.Nodes[nodeIndex] = nodeID
+	}
+
+	return graph, nil
+}