From c04773963b33ab68330227916a00ad9cc3ec5a48 Mon Sep 17 00:00:00 2001 From: Marty Jones Date: Tue, 1 Dec 2020 21:34:19 -0500 Subject: [PATCH] healthcheck: add healthcheck to shutdown if cert is expired (#4792) In certain container set ups, it's useful to optionally have lnd just shutdown if it detects that its certs are expired, as assuming there's a hypervisor to restart the container/pod, then upon restart, lnd will have fully up to date certs. --- config.go | 15 +++++++++++++++ lncfg/healthcheck.go | 6 ++++++ sample-lnd.conf | 16 ++++++++++++++++ server.go | 29 ++++++++++++++++++++++++++++- 4 files changed, 65 insertions(+), 1 deletion(-) diff --git a/config.go b/config.go index a07eb76a..d7e3aefd 100644 --- a/config.go +++ b/config.go @@ -108,6 +108,15 @@ const ( defaultDiskBackoff = time.Minute defaultDiskAttempts = 0 + // Set defaults for a health check which ensures that the TLS certificate + // is not expired. Although this check is off by default (not all setups + // require it), we still set the other default values so that the health + // check can be easily enabled with sane defaults. + defaultTLSInterval = time.Minute + defaultTLSTimeout = time.Second * 5 + defaultTLSBackoff = time.Minute + defaultTLSAttempts = 0 + // defaultRemoteMaxHtlcs specifies the default limit for maximum // concurrent HTLCs the remote party may add to commitment transactions. // This value can be overridden with --default-remote-max-htlcs. @@ -457,6 +466,12 @@ func DefaultConfig() Config { Backoff: defaultDiskBackoff, }, }, + TLSCheck: &lncfg.CheckConfig{ + Interval: defaultTLSInterval, + Timeout: defaultTLSTimeout, + Attempts: defaultTLSAttempts, + Backoff: defaultTLSBackoff, + }, }, MaxOutgoingCltvExpiry: htlcswitch.DefaultMaxOutgoingCltvExpiry, MaxChannelFeeAllocation: htlcswitch.DefaultMaxLinkFeeAllocation, diff --git a/lncfg/healthcheck.go b/lncfg/healthcheck.go index a43505ca..bee569b3 100644 --- a/lncfg/healthcheck.go +++ b/lncfg/healthcheck.go @@ -26,6 +26,8 @@ type HealthCheckConfig struct { ChainCheck *CheckConfig `group:"chainbackend" namespace:"chainbackend"` DiskCheck *DiskCheckConfig `group:"diskspace" namespace:"diskspace"` + + TLSCheck *CheckConfig `group:"tls" namespace:"tls"` } // Validate checks the values configured for our health checks. @@ -38,6 +40,10 @@ func (h *HealthCheckConfig) Validate() error { return err } + if err := h.TLSCheck.validate("tls"); err != nil { + return err + } + if h.DiskCheck.RequiredRemaining < 0 || h.DiskCheck.RequiredRemaining >= 1 { diff --git a/sample-lnd.conf b/sample-lnd.conf index 760df39e..93f96061 100644 --- a/sample-lnd.conf +++ b/sample-lnd.conf @@ -846,6 +846,22 @@ litecoin.node=ltcd ; value must be >= 1m. ; healthcheck.diskspace.interval=6h +; The number of times we should attempt to check for certificate expiration before +; gracefully shutting down. Set this value to 0 to disable this health check. +; healthcheck.tls.attempts=2 + +; The amount of time we allow a query for certificate expiration to take +; before we fail the attempt. This value must be >= 1s. +; healthcheck.tls.timeout=5s + +; The amount of time we should backoff between failed attempts to query +; certificate expiration. This value must be >= 1s. +; healthcheck.tls.backoff=1m + +; The amount of time we should wait between certificate expiration health checks. +; This value must be >= 1m. +; healthcheck.tls.interval=1m + [signrpc] ; Path to the signer macaroon. diff --git a/server.go b/server.go index db4d6bc3..7b440ec0 100644 --- a/server.go +++ b/server.go @@ -27,6 +27,7 @@ import ( sphinx "github.com/lightningnetwork/lightning-onion" "github.com/lightningnetwork/lnd/autopilot" "github.com/lightningnetwork/lnd/brontide" + "github.com/lightningnetwork/lnd/cert" "github.com/lightningnetwork/lnd/chainreg" "github.com/lightningnetwork/lnd/chanacceptor" "github.com/lightningnetwork/lnd/chanbackup" @@ -1352,12 +1353,38 @@ func newServer(cfg *Config, listenAddrs []net.Addr, cfg.HealthChecks.DiskCheck.Attempts, ) + tlsHealthCheck := healthcheck.NewObservation( + "tls", + func() error { + _, parsedCert, err := cert.LoadCert( + cfg.TLSCertPath, cfg.TLSKeyPath, + ) + if err != nil { + return err + } + + // If the current time is passed the certificate's + // expiry time, then it is considered expired + if time.Now().After(parsedCert.NotAfter) { + return fmt.Errorf("TLS certificate is expired as of %v", parsedCert.NotAfter) + } + + // If the certificate is not outdated, no error needs to + // be returned + return nil + }, + cfg.HealthChecks.TLSCheck.Interval, + cfg.HealthChecks.TLSCheck.Timeout, + cfg.HealthChecks.TLSCheck.Backoff, + cfg.HealthChecks.TLSCheck.Attempts, + ) + // If we have not disabled all of our health checks, we create a // liveliness monitor with our configured checks. s.livelinessMonitor = healthcheck.NewMonitor( &healthcheck.Config{ Checks: []*healthcheck.Observation{ - chainHealthCheck, diskCheck, + chainHealthCheck, diskCheck, tlsHealthCheck, }, Shutdown: srvrLog.Criticalf, },