lnd: Better error handling in lightningNode.Start().
There is an issue currently where if an error occurs in Start() before the LightningClient is initialized, the process won't be killed and the program will segfault (because Stop() tries to call a method on the nil LightningClient). This handles some of those edge cases.
This commit is contained in:
parent
643c23f978
commit
923dd9ac30
123
networktest.go
123
networktest.go
@ -243,55 +243,19 @@ func (l *lightningNode) Start(lndError chan error) error {
|
|||||||
close(l.processExit)
|
close(l.processExit)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
pid, err := os.Create(filepath.Join(l.cfg.DataDir,
|
// Write process ID to a file.
|
||||||
fmt.Sprintf("%v.pid", l.nodeID)))
|
if err := l.writePidFile(); err != nil {
|
||||||
if err != nil {
|
l.cmd.Process.Kill()
|
||||||
return err
|
|
||||||
}
|
|
||||||
l.pidFile = pid.Name()
|
|
||||||
if _, err = fmt.Fprintf(pid, "%v\n", l.cmd.Process.Pid); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if err := pid.Close(); err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait until TLS certificate and admin macaroon are created before
|
// Since Stop uses the LightningClient to stop the node, if we fail to get a
|
||||||
// using them, up to 20 sec.
|
// connected client, we have to kill the process.
|
||||||
tlsTimeout := time.After(30 * time.Second)
|
conn, err := l.connectRPC()
|
||||||
for !fileExists(l.cfg.TLSCertPath) || !fileExists(l.cfg.AdminMacPath) {
|
|
||||||
time.Sleep(100 * time.Millisecond)
|
|
||||||
select {
|
|
||||||
case <-tlsTimeout:
|
|
||||||
panic(fmt.Errorf("timeout waiting for TLS cert file " +
|
|
||||||
"and admin macaroon file to be created after " +
|
|
||||||
"20 seconds"))
|
|
||||||
default:
|
|
||||||
}
|
|
||||||
}
|
|
||||||
tlsCreds, err := credentials.NewClientTLSFromFile(l.cfg.TLSCertPath, "")
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
l.cmd.Process.Kill()
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
macBytes, err := ioutil.ReadFile(l.cfg.AdminMacPath)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
mac := &macaroon.Macaroon{}
|
|
||||||
if err = mac.UnmarshalBinary(macBytes); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
opts := []grpc.DialOption{
|
|
||||||
grpc.WithTransportCredentials(tlsCreds),
|
|
||||||
grpc.WithPerRPCCredentials(macaroons.NewMacaroonCredential(mac)),
|
|
||||||
grpc.WithBlock(),
|
|
||||||
grpc.WithTimeout(time.Second * 20),
|
|
||||||
}
|
|
||||||
conn, err := grpc.Dial(l.rpcAddr, opts...)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
l.LightningClient = lnrpc.NewLightningClient(conn)
|
l.LightningClient = lnrpc.NewLightningClient(conn)
|
||||||
|
|
||||||
// Obtain the lnid of this node for quick identification purposes.
|
// Obtain the lnid of this node for quick identification purposes.
|
||||||
@ -317,6 +281,62 @@ func (l *lightningNode) Start(lndError chan error) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// writePidFile writes the process ID of the running lnd process to a .pid file.
|
||||||
|
func (l *lightningNode) writePidFile() error {
|
||||||
|
filePath := filepath.Join(l.cfg.DataDir, fmt.Sprintf("%v.pid", l.nodeID))
|
||||||
|
|
||||||
|
pid, err := os.Create(filePath)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer pid.Close()
|
||||||
|
|
||||||
|
_, err = fmt.Fprintf(pid, "%v\n", l.cmd.Process.Pid)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
l.pidFile = filePath
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// connectRPC uses the TLS certificate and admin macaroon files written by the
|
||||||
|
// lnd node to create a gRPC client connection.
|
||||||
|
func (l *lightningNode) connectRPC() (*grpc.ClientConn, error) {
|
||||||
|
// Wait until TLS certificate and admin macaroon are created before
|
||||||
|
// using them, up to 20 sec.
|
||||||
|
tlsTimeout := time.After(30 * time.Second)
|
||||||
|
for !fileExists(l.cfg.TLSCertPath) || !fileExists(l.cfg.AdminMacPath) {
|
||||||
|
select {
|
||||||
|
case <-tlsTimeout:
|
||||||
|
return nil, fmt.Errorf("timeout waiting for TLS cert file " +
|
||||||
|
"and admin macaroon file to be created after " +
|
||||||
|
"20 seconds")
|
||||||
|
case <-time.After(100 * time.Millisecond):
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tlsCreds, err := credentials.NewClientTLSFromFile(l.cfg.TLSCertPath, "")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
macBytes, err := ioutil.ReadFile(l.cfg.AdminMacPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
mac := &macaroon.Macaroon{}
|
||||||
|
if err = mac.UnmarshalBinary(macBytes); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
opts := []grpc.DialOption{
|
||||||
|
grpc.WithTransportCredentials(tlsCreds),
|
||||||
|
grpc.WithPerRPCCredentials(macaroons.NewMacaroonCredential(mac)),
|
||||||
|
grpc.WithBlock(),
|
||||||
|
grpc.WithTimeout(time.Second * 20),
|
||||||
|
}
|
||||||
|
return grpc.Dial(l.rpcAddr, opts...)
|
||||||
|
}
|
||||||
|
|
||||||
// cleanup cleans up all the temporary files created by the node's process.
|
// cleanup cleans up all the temporary files created by the node's process.
|
||||||
func (l *lightningNode) cleanup() error {
|
func (l *lightningNode) cleanup() error {
|
||||||
dirs := []string{
|
dirs := []string{
|
||||||
@ -335,10 +355,12 @@ func (l *lightningNode) cleanup() error {
|
|||||||
|
|
||||||
// Stop attempts to stop the active lnd process.
|
// Stop attempts to stop the active lnd process.
|
||||||
func (l *lightningNode) Stop() error {
|
func (l *lightningNode) Stop() error {
|
||||||
// We should skip node stop in case:
|
// Do nothing if the process never started successfully.
|
||||||
// - start of the node wasn't initiated
|
if l.LightningClient == nil {
|
||||||
// - process wasn't spawned
|
return nil
|
||||||
// - process already finished
|
}
|
||||||
|
|
||||||
|
// Do nothing if the process already finished.
|
||||||
select {
|
select {
|
||||||
case <-l.quit:
|
case <-l.quit:
|
||||||
return nil
|
return nil
|
||||||
@ -371,6 +393,7 @@ func (l *lightningNode) Restart(errChan chan error, callback func() error) error
|
|||||||
|
|
||||||
<-l.processExit
|
<-l.processExit
|
||||||
|
|
||||||
|
l.LightningClient = nil
|
||||||
l.processExit = make(chan struct{})
|
l.processExit = make(chan struct{})
|
||||||
l.quit = make(chan struct{})
|
l.quit = make(chan struct{})
|
||||||
l.wg = sync.WaitGroup{}
|
l.wg = sync.WaitGroup{}
|
||||||
@ -880,12 +903,14 @@ func (n *networkHarness) NewNode(extraArgs []string) (*lightningNode, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Put node in activeNodes to ensure Shutdown is called even if Start
|
||||||
|
// returns an error.
|
||||||
|
n.activeNodes[node.nodeID] = node
|
||||||
|
|
||||||
if err := node.Start(n.lndErrorChan); err != nil {
|
if err := node.Start(n.lndErrorChan); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
n.activeNodes[node.nodeID] = node
|
|
||||||
|
|
||||||
return node, nil
|
return node, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user