lnd: Better error handling in lightningNode.Start().

There is an issue currently where if an error occurs in Start() before
the LightningClient is initialized, the process won't be killed and
the program will segfault (because Stop() tries to call a method on
the nil LightningClient). This handles some of those edge cases.
This commit is contained in:
Jim Posen 2017-10-24 19:39:50 -07:00 committed by Olaoluwa Osuntokun
parent 643c23f978
commit 923dd9ac30

@ -243,55 +243,19 @@ func (l *lightningNode) Start(lndError chan error) error {
close(l.processExit) close(l.processExit)
}() }()
pid, err := os.Create(filepath.Join(l.cfg.DataDir, // Write process ID to a file.
fmt.Sprintf("%v.pid", l.nodeID))) if err := l.writePidFile(); err != nil {
if err != nil { l.cmd.Process.Kill()
return err
}
l.pidFile = pid.Name()
if _, err = fmt.Fprintf(pid, "%v\n", l.cmd.Process.Pid); err != nil {
return err
}
if err := pid.Close(); err != nil {
return err return err
} }
// Wait until TLS certificate and admin macaroon are created before // Since Stop uses the LightningClient to stop the node, if we fail to get a
// using them, up to 20 sec. // connected client, we have to kill the process.
tlsTimeout := time.After(30 * time.Second) conn, err := l.connectRPC()
for !fileExists(l.cfg.TLSCertPath) || !fileExists(l.cfg.AdminMacPath) {
time.Sleep(100 * time.Millisecond)
select {
case <-tlsTimeout:
panic(fmt.Errorf("timeout waiting for TLS cert file " +
"and admin macaroon file to be created after " +
"20 seconds"))
default:
}
}
tlsCreds, err := credentials.NewClientTLSFromFile(l.cfg.TLSCertPath, "")
if err != nil { if err != nil {
l.cmd.Process.Kill()
return err return err
} }
macBytes, err := ioutil.ReadFile(l.cfg.AdminMacPath)
if err != nil {
return err
}
mac := &macaroon.Macaroon{}
if err = mac.UnmarshalBinary(macBytes); err != nil {
return err
}
opts := []grpc.DialOption{
grpc.WithTransportCredentials(tlsCreds),
grpc.WithPerRPCCredentials(macaroons.NewMacaroonCredential(mac)),
grpc.WithBlock(),
grpc.WithTimeout(time.Second * 20),
}
conn, err := grpc.Dial(l.rpcAddr, opts...)
if err != nil {
return err
}
l.LightningClient = lnrpc.NewLightningClient(conn) l.LightningClient = lnrpc.NewLightningClient(conn)
// Obtain the lnid of this node for quick identification purposes. // Obtain the lnid of this node for quick identification purposes.
@ -317,6 +281,62 @@ func (l *lightningNode) Start(lndError chan error) error {
return nil return nil
} }
// writePidFile writes the process ID of the running lnd process to a .pid file.
func (l *lightningNode) writePidFile() error {
filePath := filepath.Join(l.cfg.DataDir, fmt.Sprintf("%v.pid", l.nodeID))
pid, err := os.Create(filePath)
if err != nil {
return err
}
defer pid.Close()
_, err = fmt.Fprintf(pid, "%v\n", l.cmd.Process.Pid)
if err != nil {
return err
}
l.pidFile = filePath
return nil
}
// connectRPC uses the TLS certificate and admin macaroon files written by the
// lnd node to create a gRPC client connection.
func (l *lightningNode) connectRPC() (*grpc.ClientConn, error) {
// Wait until TLS certificate and admin macaroon are created before
// using them, up to 20 sec.
tlsTimeout := time.After(30 * time.Second)
for !fileExists(l.cfg.TLSCertPath) || !fileExists(l.cfg.AdminMacPath) {
select {
case <-tlsTimeout:
return nil, fmt.Errorf("timeout waiting for TLS cert file " +
"and admin macaroon file to be created after " +
"20 seconds")
case <-time.After(100 * time.Millisecond):
}
}
tlsCreds, err := credentials.NewClientTLSFromFile(l.cfg.TLSCertPath, "")
if err != nil {
return nil, err
}
macBytes, err := ioutil.ReadFile(l.cfg.AdminMacPath)
if err != nil {
return nil, err
}
mac := &macaroon.Macaroon{}
if err = mac.UnmarshalBinary(macBytes); err != nil {
return nil, err
}
opts := []grpc.DialOption{
grpc.WithTransportCredentials(tlsCreds),
grpc.WithPerRPCCredentials(macaroons.NewMacaroonCredential(mac)),
grpc.WithBlock(),
grpc.WithTimeout(time.Second * 20),
}
return grpc.Dial(l.rpcAddr, opts...)
}
// cleanup cleans up all the temporary files created by the node's process. // cleanup cleans up all the temporary files created by the node's process.
func (l *lightningNode) cleanup() error { func (l *lightningNode) cleanup() error {
dirs := []string{ dirs := []string{
@ -335,10 +355,12 @@ func (l *lightningNode) cleanup() error {
// Stop attempts to stop the active lnd process. // Stop attempts to stop the active lnd process.
func (l *lightningNode) Stop() error { func (l *lightningNode) Stop() error {
// We should skip node stop in case: // Do nothing if the process never started successfully.
// - start of the node wasn't initiated if l.LightningClient == nil {
// - process wasn't spawned return nil
// - process already finished }
// Do nothing if the process already finished.
select { select {
case <-l.quit: case <-l.quit:
return nil return nil
@ -371,6 +393,7 @@ func (l *lightningNode) Restart(errChan chan error, callback func() error) error
<-l.processExit <-l.processExit
l.LightningClient = nil
l.processExit = make(chan struct{}) l.processExit = make(chan struct{})
l.quit = make(chan struct{}) l.quit = make(chan struct{})
l.wg = sync.WaitGroup{} l.wg = sync.WaitGroup{}
@ -880,12 +903,14 @@ func (n *networkHarness) NewNode(extraArgs []string) (*lightningNode, error) {
return nil, err return nil, err
} }
// Put node in activeNodes to ensure Shutdown is called even if Start
// returns an error.
n.activeNodes[node.nodeID] = node
if err := node.Start(n.lndErrorChan); err != nil { if err := node.Start(n.lndErrorChan); err != nil {
return nil, err return nil, err
} }
n.activeNodes[node.nodeID] = node
return node, nil return node, nil
} }