Merge pull request #1817 from halseth/autopilot-node-update

Reduce goroutine overhead on autopilot updates
2018-09-05 18:08:37 -07:00 · 2018-09-05 18:08:37 -07:00 · b0347879db
commit b0347879db
parent b2efbce1ac 0d4df54118
1 changed files with 272 additions and 278 deletions
--- a/autopilot/agent.go
+++ b/autopilot/agent.go
@ -117,6 +117,29 @@ type Agent struct {
 	// affect the heuristics of the agent will be sent over.
 	stateUpdates chan interface{}
 	// balanceUpdates is a channel where notifications about updates to the
 	// wallet's balance will be sent. This channel will be buffered to
 	// ensure we have at most one pending update of this type to handle at
 	// a given time.
 	balanceUpdates chan *balanceUpdate
 	// nodeUpdates is a channel that changes to the graph node landscape
 	// will be sent over. This channel will be buffered to ensure we have
 	// at most one pending update of this type to handle at a given time.
 	nodeUpdates chan *nodeUpdates
 	// pendingOpenUpdates is a channel where updates about channel pending
 	// opening will be sent. This channel will be buffered to ensure we
 	// have at most one pending update of this type to handle at a given
 	// time.
 	pendingOpenUpdates chan *chanPendingOpenUpdate
 	// chanOpenFailures is a channel where updates about channel open
 	// failures will be sent. This channel will be buffered to ensure we
 	// have at most one pending update of this type to handle at a given
 	// time.
 	chanOpenFailures chan *chanOpenFailureUpdate
 	// totalBalance is the total number of satoshis the backing wallet is
 	// known to control at any given instance. This value will be updated
 	// when the agent receives external balance update signals.
@ -132,10 +155,14 @@ type Agent struct {
 // the backing Lightning Node.
 func New(cfg Config, initialState []Channel) (*Agent, error) {
 	a := &Agent{
-		cfg:          cfg,
+		cfg:                cfg,
-		chanState:    make(map[lnwire.ShortChannelID]Channel),
+		chanState:          make(map[lnwire.ShortChannelID]Channel),
-		quit:         make(chan struct{}),
+		quit:               make(chan struct{}),
-		stateUpdates: make(chan interface{}),
+		stateUpdates:       make(chan interface{}),
 		balanceUpdates:     make(chan *balanceUpdate, 1),
 		nodeUpdates:        make(chan *nodeUpdates, 1),
 		chanOpenFailures:   make(chan *chanOpenFailureUpdate, 1),
 		pendingOpenUpdates: make(chan *chanPendingOpenUpdate, 1),
 	}
 	for _, c := range initialState {
@ -206,32 +233,22 @@ type chanCloseUpdate struct {
 	closedChans []lnwire.ShortChannelID
 }
-// OnBalanceChange is a callback that should be executed each time the balance of
+// OnBalanceChange is a callback that should be executed each time the balance
-// the backing wallet changes.
+// of the backing wallet changes.
 func (a *Agent) OnBalanceChange() {
-	a.wg.Add(1)
+	select {
-	go func() {
+	case a.balanceUpdates <- &balanceUpdate{}:
-		defer a.wg.Done()
+	default:
-
+	}
 		select {
 		case a.stateUpdates <- &balanceUpdate{}:
 		case <-a.quit:
 		}
 	}()
 }
 // OnNodeUpdates is a callback that should be executed each time our channel
 // graph has new nodes or their node announcements are updated.
 func (a *Agent) OnNodeUpdates() {
-	a.wg.Add(1)
+	select {
-	go func() {
+	case a.nodeUpdates <- &nodeUpdates{}:
-		defer a.wg.Done()
+	default:
-
+	}
 		select {
 		case a.stateUpdates <- &nodeUpdates{}:
 		case <-a.quit:
 		}
 	}()
 }
 // OnChannelOpen is a callback that should be executed each time a new channel
@ -252,27 +269,20 @@ func (a *Agent) OnChannelOpen(c Channel) {
 // channel is opened, either by the agent or an external subsystems, but is
 // still pending.
 func (a *Agent) OnChannelPendingOpen() {
-	go func() {
+	select {
-		select {
+	case a.pendingOpenUpdates <- &chanPendingOpenUpdate{}:
-		case a.stateUpdates <- &chanPendingOpenUpdate{}:
+	default:
-		case <-a.quit:
+	}
 		}
 	}()
 }
 // OnChannelOpenFailure is a callback that should be executed when the
 // autopilot has attempted to open a channel, but failed. In this case we can
 // retry channel creation with a different node.
 func (a *Agent) OnChannelOpenFailure() {
-	a.wg.Add(1)
+	select {
-	go func() {
+	case a.chanOpenFailures <- &chanOpenFailureUpdate{}:
-		defer a.wg.Done()
+	default:
-
+	}
 		select {
 		case a.stateUpdates <- &chanOpenFailureUpdate{}:
 		case <-a.quit:
 		}
 	}()
 }
 // OnChannelClose is a callback that should be executed each time a prior
@ -377,24 +387,6 @@ func (a *Agent) controller() {
 			log.Infof("Processing new external signal")
 			switch update := signal.(type) {
 			// The balance of the backing wallet has changed, if
 			// more funds are now available, we may attempt to open
 			// up an additional channel, or splice in funds to an
 			// existing one.
 			case *balanceUpdate:
 				log.Debug("Applying external balance state " +
 					"update")
 				updateBalance()
 			// The channel we tried to open previously failed for
 			// whatever reason.
 			case *chanOpenFailureUpdate:
 				log.Debug("Retrying after previous channel " +
 					"open failure.")
 				updateBalance()
 			// A new channel has been opened successfully. This was
 			// either opened by the Agent, or an external system
 			// that is able to drive the Lightning Node.
@ -411,13 +403,6 @@ func (a *Agent) controller() {
 				pendingMtx.Unlock()
 				updateBalance()
 			// A new channel has been opened by the agent or an
 			// external subsystem, but is still pending
 			// confirmation.
 			case *chanPendingOpenUpdate:
 				updateBalance()
 			// A channel has been closed, this may free up an
 			// available slot, triggering a new channel update.
 			case *chanCloseUpdate:
@ -430,232 +415,241 @@ func (a *Agent) controller() {
 				}
 				updateBalance()
 			// New nodes have been added to the graph or their node
 			// announcements have been updated. We will consider
 			// opening channels to these nodes if we haven't
 			// stabilized.
 			case *nodeUpdates:
 				log.Infof("Node updates received, assessing " +
 					"need for more channels")
 			}
-			pendingMtx.Lock()
+		// A new channel has been opened by the agent or an external
-			log.Debugf("Pending channels: %v", spew.Sdump(pendingOpens))
+		// subsystem, but is still pending confirmation.
-			pendingMtx.Unlock()
+		case <-a.pendingOpenUpdates:
 			updateBalance()
-			// With all the updates applied, we'll obtain a set of
+		// The balance of the backing wallet has changed, if more funds
-			// the current active channels (confirmed channels),
+		// are now available, we may attempt to open up an additional
-			// and also factor in our set of unconfirmed channels.
+		// channel, or splice in funds to an existing one.
-			confirmedChans := a.chanState
+		case <-a.balanceUpdates:
-			pendingMtx.Lock()
+			log.Debug("Applying external balance state update")
 			totalChans := mergeChanState(pendingOpens, confirmedChans)
 			pendingMtx.Unlock()
-			// Now that we've updated our internal state, we'll
+			updateBalance()
 			// consult our channel attachment heuristic to
 			// determine if we should open up any additional
 			// channels or modify existing channels.
 			availableFunds, numChans, needMore := a.cfg.Heuristic.NeedMoreChans(
 				totalChans, a.totalBalance,
 			)
 			if !needMore {
 				continue
 			}
-			log.Infof("Triggering attachment directive dispatch, "+
+		// The channel we tried to open previously failed for whatever
-				"total_funds=%v", a.totalBalance)
+		// reason.
 		case <-a.chanOpenFailures:
 			log.Debug("Retrying after previous channel open " +
 				"failure.")
-			// We're to attempt an attachment so we'll o obtain the
+			updateBalance()
 			// set of nodes that we currently have channels with so
 			// we avoid duplicate edges.
 			connectedNodes := a.chanState.ConnectedNodes()
 			pendingMtx.Lock()
 			nodesToSkip := mergeNodeMaps(connectedNodes, failedNodes, pendingOpens)
 			pendingMtx.Unlock()
-			// If we reach this point, then according to our
+		// New nodes have been added to the graph or their node
-			// heuristic we should modify our channel state to tend
+		// announcements have been updated. We will consider opening
-			// towards what it determines to the optimal state. So
+		// channels to these nodes if we haven't stabilized.
-			// we'll call Select to get a fresh batch of attachment
+		case <-a.nodeUpdates:
-			// directives, passing in the amount of funds available
+			log.Infof("Node updates received, assessing " +
-			// for us to use.
+				"need for more channels")
 			chanCandidates, err := a.cfg.Heuristic.Select(
 				a.cfg.Self, a.cfg.Graph, availableFunds,
 				numChans, nodesToSkip,
 			)
 			if err != nil {
 				log.Errorf("Unable to select candidates for "+
 					"attachment: %v", err)
 				continue
 			}
 			if len(chanCandidates) == 0 {
 				log.Infof("No eligible candidates to connect to")
 				continue
 			}
 			log.Infof("Attempting to execute channel attachment "+
 				"directives: %v", spew.Sdump(chanCandidates))
 			// For each recommended attachment directive, we'll
 			// launch a new goroutine to attempt to carry out the
 			// directive. If any of these succeed, then we'll
 			// receive a new state update, taking us back to the
 			// top of our controller loop.
 			pendingMtx.Lock()
 			for _, chanCandidate := range chanCandidates {
 				// Before we proceed, we'll check to see if
 				// this attempt would take us past the total
 				// number of allowed pending opens. If so, then
 				// we'll skip this round and wait for an
 				// attempt to either fail or succeed.
 				if uint16(len(pendingOpens))+1 >
 					a.cfg.MaxPendingOpens {
 					log.Debugf("Reached cap of %v pending "+
 						"channel opens, will retry "+
 						"after success/failure",
 						a.cfg.MaxPendingOpens)
 					continue
 				}
 				go func(directive AttachmentDirective) {
 					// We'll start out by attempting to
 					// connect to the peer in order to begin
 					// the funding workflow.
 					pub := directive.PeerKey
 					alreadyConnected, err := a.cfg.ConnectToPeer(
 						pub, directive.Addrs,
 					)
 					if err != nil {
 						log.Warnf("Unable to connect "+
 							"to %x: %v",
 							pub.SerializeCompressed(),
 							err)
 						// Since we failed to connect to
 						// them, we'll mark them as
 						// failed so that we don't
 						// attempt to connect to them
 						// again.
 						nodeID := NewNodeID(pub)
 						pendingMtx.Lock()
 						failedNodes[nodeID] = struct{}{}
 						pendingMtx.Unlock()
 						// Finally, we'll trigger the
 						// agent to select new peers to
 						// connect to.
 						a.OnChannelOpenFailure()
 						return
 					}
 					// If we were succesful, we'll track
 					// this peer in our set of pending
 					// opens. We do this here to ensure we
 					// don't stall on selecting new peers if
 					// the connection attempt happens to
 					// take too long.
 					pendingMtx.Lock()
 					if uint16(len(pendingOpens))+1 >
 						a.cfg.MaxPendingOpens {
 						pendingMtx.Unlock()
 						// Since we've reached our max
 						// number of pending opens,
 						// we'll disconnect this peer
 						// and exit. However, if we were
 						// previously connected to them,
 						// then we'll make sure to
 						// maintain the connection
 						// alive.
 						if alreadyConnected {
 							return
 						}
 						err = a.cfg.DisconnectPeer(
 							pub,
 						)
 						if err != nil {
 							log.Warnf("Unable to "+
 								"disconnect peer "+
 								"%x: %v",
 								pub.SerializeCompressed(),
 								err)
 						}
 						return
 					}
 					nodeID := NewNodeID(directive.PeerKey)
 					pendingOpens[nodeID] = Channel{
 						Capacity: directive.ChanAmt,
 						Node:     nodeID,
 					}
 					pendingMtx.Unlock()
 					// We can then begin the funding
 					// workflow with this peer.
 					err = a.cfg.ChanController.OpenChannel(
 						pub, directive.ChanAmt,
 					)
 					if err != nil {
 						log.Warnf("Unable to open "+
 							"channel to %x of %v: %v",
 							pub.SerializeCompressed(),
 							directive.ChanAmt, err)
 						// As the attempt failed, we'll
 						// clear the peer from the set of
 						// pending opens and mark them
 						// as failed so we don't attempt
 						// to open a channel to them
 						// again.
 						pendingMtx.Lock()
 						delete(pendingOpens, nodeID)
 						failedNodes[nodeID] = struct{}{}
 						pendingMtx.Unlock()
 						// Trigger the agent to
 						// re-evaluate everything and
 						// possibly retry with a
 						// different node.
 						a.OnChannelOpenFailure()
 						// Finally, we should also
 						// disconnect the peer if we
 						// weren't already connected to
 						// them beforehand by an
 						// external subsystem.
 						if alreadyConnected {
 							return
 						}
 						err = a.cfg.DisconnectPeer(pub)
 						if err != nil {
 							log.Warnf("Unable to "+
 								"disconnect peer "+
 								"%x: %v",
 								pub.SerializeCompressed(),
 								err)
 						}
 					}
 					// Since the channel open was successful
 					// and is currently pending, we'll
 					// trigger the autopilot agent to query
 					// for more peers.
 					a.OnChannelPendingOpen()
 				}(chanCandidate)
 			}
 			pendingMtx.Unlock()
 		// The agent has been signalled to exit, so we'll bail out
 		// immediately.
 		case <-a.quit:
 			return
 		}
 		pendingMtx.Lock()
 		log.Debugf("Pending channels: %v", spew.Sdump(pendingOpens))
 		pendingMtx.Unlock()
 		// With all the updates applied, we'll obtain a set of the
 		// current active channels (confirmed channels), and also
 		// factor in our set of unconfirmed channels.
 		confirmedChans := a.chanState
 		pendingMtx.Lock()
 		totalChans := mergeChanState(pendingOpens, confirmedChans)
 		pendingMtx.Unlock()
 		// Now that we've updated our internal state, we'll consult our
 		// channel attachment heuristic to determine if we should open
 		// up any additional channels or modify existing channels.
 		availableFunds, numChans, needMore := a.cfg.Heuristic.NeedMoreChans(
 			totalChans, a.totalBalance,
 		)
 		if !needMore {
 			continue
 		}
 		log.Infof("Triggering attachment directive dispatch, "+
 			"total_funds=%v", a.totalBalance)
 		// We're to attempt an attachment so we'll o obtain the set of
 		// nodes that we currently have channels with so we avoid
 		// duplicate edges.
 		connectedNodes := a.chanState.ConnectedNodes()
 		pendingMtx.Lock()
 		nodesToSkip := mergeNodeMaps(connectedNodes, failedNodes, pendingOpens)
 		pendingMtx.Unlock()
 		// If we reach this point, then according to our heuristic we
 		// should modify our channel state to tend towards what it
 		// determines to the optimal state. So we'll call Select to get
 		// a fresh batch of attachment directives, passing in the
 		// amount of funds available for us to use.
 		chanCandidates, err := a.cfg.Heuristic.Select(
 			a.cfg.Self, a.cfg.Graph, availableFunds,
 			numChans, nodesToSkip,
 		)
 		if err != nil {
 			log.Errorf("Unable to select candidates for "+
 				"attachment: %v", err)
 			continue
 		}
 		if len(chanCandidates) == 0 {
 			log.Infof("No eligible candidates to connect to")
 			continue
 		}
 		log.Infof("Attempting to execute channel attachment "+
 			"directives: %v", spew.Sdump(chanCandidates))
 		// For each recommended attachment directive, we'll launch a
 		// new goroutine to attempt to carry out the directive. If any
 		// of these succeed, then we'll receive a new state update,
 		// taking us back to the top of our controller loop.
 		pendingMtx.Lock()
 		for _, chanCandidate := range chanCandidates {
 			// Before we proceed, we'll check to see if this
 			// attempt would take us past the total number of
 			// allowed pending opens. If so, then we'll skip this
 			// round and wait for an attempt to either fail or
 			// succeed.
 			if uint16(len(pendingOpens))+1 >
 				a.cfg.MaxPendingOpens {
 				log.Debugf("Reached cap of %v pending "+
 					"channel opens, will retry "+
 					"after success/failure",
 					a.cfg.MaxPendingOpens)
 				continue
 			}
 			go func(directive AttachmentDirective) {
 				// We'll start out by attempting to connect to
 				// the peer in order to begin the funding
 				// workflow.
 				pub := directive.PeerKey
 				alreadyConnected, err := a.cfg.ConnectToPeer(
 					pub, directive.Addrs,
 				)
 				if err != nil {
 					log.Warnf("Unable to connect "+
 						"to %x: %v",
 						pub.SerializeCompressed(),
 						err)
 					// Since we failed to connect to them,
 					// we'll mark them as failed so that we
 					// don't attempt to connect to them
 					// again.
 					nodeID := NewNodeID(pub)
 					pendingMtx.Lock()
 					failedNodes[nodeID] = struct{}{}
 					pendingMtx.Unlock()
 					// Finally, we'll trigger the agent to
 					// select new peers to connect to.
 					a.OnChannelOpenFailure()
 					return
 				}
 				// If we were succesful, we'll track this peer
 				// in our set of pending opens. We do this here
 				// to ensure we don't stall on selecting new
 				// peers if the connection attempt happens to
 				// take too long.
 				pendingMtx.Lock()
 				if uint16(len(pendingOpens))+1 >
 					a.cfg.MaxPendingOpens {
 					pendingMtx.Unlock()
 					// Since we've reached our max number
 					// of pending opens, we'll disconnect
 					// this peer and exit. However, if we
 					// were previously connected to them,
 					// then we'll make sure to maintain the
 					// connection alive.
 					if alreadyConnected {
 						return
 					}
 					err = a.cfg.DisconnectPeer(
 						pub,
 					)
 					if err != nil {
 						log.Warnf("Unable to "+
 							"disconnect peer "+
 							"%x: %v",
 							pub.SerializeCompressed(),
 							err)
 					}
 					return
 				}
 				nodeID := NewNodeID(directive.PeerKey)
 				pendingOpens[nodeID] = Channel{
 					Capacity: directive.ChanAmt,
 					Node:     nodeID,
 				}
 				pendingMtx.Unlock()
 				// We can then begin the funding workflow with
 				// this peer.
 				err = a.cfg.ChanController.OpenChannel(
 					pub, directive.ChanAmt,
 				)
 				if err != nil {
 					log.Warnf("Unable to open "+
 						"channel to %x of %v: %v",
 						pub.SerializeCompressed(),
 						directive.ChanAmt, err)
 					// As the attempt failed, we'll clear
 					// the peer from the set of pending
 					// opens and mark them as failed so we
 					// don't attempt to open a channel to
 					// them again.
 					pendingMtx.Lock()
 					delete(pendingOpens, nodeID)
 					failedNodes[nodeID] = struct{}{}
 					pendingMtx.Unlock()
 					// Trigger the agent to re-evaluate
 					// everything and possibly retry with a
 					// different node.
 					a.OnChannelOpenFailure()
 					// Finally, we should also disconnect
 					// the peer if we weren't already
 					// connected to them beforehand by an
 					// external subsystem.
 					if alreadyConnected {
 						return
 					}
 					err = a.cfg.DisconnectPeer(pub)
 					if err != nil {
 						log.Warnf("Unable to "+
 							"disconnect peer "+
 							"%x: %v",
 							pub.SerializeCompressed(),
 							err)
 					}
 				}
 				// Since the channel open was successful and is
 				// currently pending, we'll trigger the
 				// autopilot agent to query for more peers.
 				a.OnChannelPendingOpen()
 			}(chanCandidate)
 		}
 		pendingMtx.Unlock()
 	}
 }