From becae748debe6a0d7312b70ba1fac0447fa6af16 Mon Sep 17 00:00:00 2001 From: Roei Erez Date: Mon, 21 Jan 2019 13:11:51 +0200 Subject: [PATCH] server: stop sub systems in case of server failed to start. In this commit every succesfull started sub system is tracked and in case of a failure to complete the Start function all these sub systems are stopped using a generic cleanup function. --- server.go | 140 ++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 120 insertions(+), 20 deletions(-) diff --git a/server.go b/server.go index e54348f9..294959b5 100644 --- a/server.go +++ b/server.go @@ -1432,17 +1432,45 @@ func (s *server) Started() bool { return atomic.LoadInt32(&s.active) != 0 } +// cleaner is used to aggregate "cleanup" functions during an operation that +// starts several subsystems. In case one of the subsystem fails to start +// and a proper resource cleanup is required, the "run" method achieves this +// by running all these added "cleanup" functions +type cleaner []func() error + +// add is used to add a cleanup function to be called when +// the run function is executed +func (c cleaner) add(cleanup func() error) cleaner { + return append(c, cleanup) +} + +// run is used to run all the previousely added cleanup functions +func (c cleaner) run() { + for i := len(c) - 1; i >= 0; i-- { + if err := c[i](); err != nil { + srvrLog.Infof("Cleanup failed: %v", err) + } + } +} + // Start starts the main daemon server, all requested listeners, and any helper // goroutines. // NOTE: This function is safe for concurrent access. func (s *server) Start() error { var startErr error + + // If one sub system fails to start, the following code ensures that the + // previous started ones are stopped. It also ensures a proper wallet + // shutdown which is important for releasing its resources (boltdb, etc...) + cleanup := cleaner{} + s.start.Do(func() { if s.torController != nil { if err := s.createNewHiddenService(); err != nil { startErr = err return } + cleanup = cleanup.add(s.torController.Stop) } if s.natTraversal != nil { @@ -1455,6 +1483,7 @@ func (s *server) Start() error { startErr = err return } + cleanup = cleanup.add(s.hostAnn.Stop) } if s.livelinessMonitor != nil { @@ -1462,6 +1491,7 @@ func (s *server) Start() error { startErr = err return } + cleanup = cleanup.add(s.livelinessMonitor.Stop) } // Start the notification server. This is used so channel @@ -1473,91 +1503,134 @@ func (s *server) Start() error { startErr = err return } + cleanup = cleanup.add(s.sigPool.Stop) + if err := s.writePool.Start(); err != nil { startErr = err return } + cleanup = cleanup.add(s.writePool.Stop) + if err := s.readPool.Start(); err != nil { startErr = err return } + cleanup = cleanup.add(s.readPool.Stop) + if err := s.cc.ChainNotifier.Start(); err != nil { startErr = err return } + cleanup = cleanup.add(s.cc.ChainNotifier.Stop) + if err := s.channelNotifier.Start(); err != nil { startErr = err return } + cleanup = cleanup.add(s.channelNotifier.Stop) + if err := s.peerNotifier.Start(); err != nil { startErr = err return } + cleanup = cleanup.add(func() error { + return s.peerNotifier.Stop() + }) if err := s.htlcNotifier.Start(); err != nil { startErr = err return } + cleanup = cleanup.add(s.htlcNotifier.Stop) + if err := s.sphinx.Start(); err != nil { startErr = err return } + cleanup = cleanup.add(s.sphinx.Stop) + if s.towerClient != nil { if err := s.towerClient.Start(); err != nil { startErr = err return } + cleanup = cleanup.add(s.towerClient.Stop) } if s.anchorTowerClient != nil { if err := s.anchorTowerClient.Start(); err != nil { startErr = err return } + cleanup = cleanup.add(s.anchorTowerClient.Stop) } + if err := s.htlcSwitch.Start(); err != nil { startErr = err return } + cleanup = cleanup.add(s.htlcSwitch.Stop) + if err := s.sweeper.Start(); err != nil { startErr = err return } + cleanup = cleanup.add(s.sweeper.Stop) + if err := s.utxoNursery.Start(); err != nil { startErr = err return } + cleanup = cleanup.add(s.utxoNursery.Stop) + if err := s.chainArb.Start(); err != nil { startErr = err return } + cleanup = cleanup.add(s.chainArb.Stop) + if err := s.breachArbiter.Start(); err != nil { startErr = err return } + cleanup = cleanup.add(s.breachArbiter.Stop) + if err := s.authGossiper.Start(); err != nil { startErr = err return } + cleanup = cleanup.add(s.authGossiper.Stop) + if err := s.chanRouter.Start(); err != nil { startErr = err return } + cleanup = cleanup.add(s.chanRouter.Stop) + if err := s.fundingMgr.Start(); err != nil { startErr = err return } + cleanup = cleanup.add(s.fundingMgr.Stop) + if err := s.invoices.Start(); err != nil { startErr = err return } + cleanup = cleanup.add(s.invoices.Stop) + if err := s.chanStatusMgr.Start(); err != nil { startErr = err return } + cleanup = cleanup.add(s.chanStatusMgr.Stop) if err := s.chanEventStore.Start(); err != nil { startErr = err return } + cleanup = cleanup.add(func() error { + s.chanEventStore.Stop() + return nil + }) // Before we start the connMgr, we'll check to see if we have // any backups to recover. We do this now as we want to ensure @@ -1595,8 +1668,13 @@ func (s *server) Start() error { startErr = err return } + cleanup = cleanup.add(s.chanSubSwapper.Stop) s.connMgr.Start() + cleanup = cleanup.add(func() error { + s.connMgr.Stop() + return nil + }) // With all the relevant sub-systems started, we'll now attempt // to establish persistent connections to our direct channel @@ -1689,6 +1767,9 @@ func (s *server) Start() error { atomic.StoreInt32(&s.active, 1) }) + if startErr != nil { + cleanup.run() + } return startErr } @@ -1707,30 +1788,49 @@ func (s *server) Stop() error { if err := s.cc.ChainNotifier.Stop(); err != nil { srvrLog.Warnf("Unable to stop ChainNotifier: %v", err) } - s.chanRouter.Stop() - s.htlcSwitch.Stop() - s.sphinx.Stop() - s.utxoNursery.Stop() - s.breachArbiter.Stop() - s.authGossiper.Stop() - s.chainArb.Stop() - s.sweeper.Stop() - s.channelNotifier.Stop() - s.peerNotifier.Stop() - s.htlcNotifier.Stop() - if err := s.cc.Wallet.Shutdown(); err != nil { - srvrLog.Warnf("Unable to stop Wallet: %v", err) + if err := s.chanRouter.Stop(); err != nil { + srvrLog.Warnf("failed to stop chanRouter: %v", err) } - if err := s.cc.ChainView.Stop(); err != nil { - srvrLog.Warnf("Unable to stop ChainView: %v", err) + if err := s.htlcSwitch.Stop(); err != nil { + srvrLog.Warnf("failed to stop htlcSwitch: %v", err) + } + if err := s.sphinx.Stop(); err != nil { + srvrLog.Warnf("failed to stop sphinx: %v", err) + } + if err := s.utxoNursery.Stop(); err != nil { + srvrLog.Warnf("failed to stop utxoNursery: %v", err) + } + if err := s.breachArbiter.Stop(); err != nil { + srvrLog.Warnf("failed to stop breachArbiter: %v", err) + } + if err := s.authGossiper.Stop(); err != nil { + srvrLog.Warnf("failed to stop authGossiper: %v", err) + } + if err := s.chainArb.Stop(); err != nil { + srvrLog.Warnf("failed to stop chainArb: %v", err) + } + if err := s.sweeper.Stop(); err != nil { + srvrLog.Warnf("failed to stop sweeper: %v", err) + } + if err := s.channelNotifier.Stop(); err != nil { + srvrLog.Warnf("failed to stop channelNotifier: %v", err) + } + if err := s.peerNotifier.Stop(); err != nil { + srvrLog.Warnf("failed to stop peerNotifier: %v", err) + } + if err := s.htlcNotifier.Stop(); err != nil { + srvrLog.Warnf("failed to stop htlcNotifier: %v", err) } s.connMgr.Stop() - if err := s.cc.FeeEstimator.Stop(); err != nil { - srvrLog.Warnf("Unable to stop FeeEstimator: %v", err) + if err := s.invoices.Stop(); err != nil { + srvrLog.Warnf("failed to stop invoices: %v", err) + } + if err := s.fundingMgr.Stop(); err != nil { + srvrLog.Warnf("failed to stop fundingMgr: %v", err) + } + if err := s.chanSubSwapper.Stop(); err != nil { + srvrLog.Warnf("failed to stop chanSubSwapper: %v", err) } - s.invoices.Stop() - s.fundingMgr.Stop() - s.chanSubSwapper.Stop() s.chanEventStore.Stop() // Disconnect from each active peers to ensure that