healthcheck: add healthcheck to shutdown if cert is expired (#4792)

In certain container set ups, it's useful to optionally have lnd just shutdown if it detects that its certs are expired, as assuming there's a hypervisor to restart the container/pod, then upon restart, lnd will have fully up to date certs.
This commit is contained in:
Marty Jones 2020-12-01 21:34:19 -05:00 committed by GitHub
parent e9b5b2d767
commit c04773963b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 65 additions and 1 deletions

@ -108,6 +108,15 @@ const (
defaultDiskBackoff = time.Minute
defaultDiskAttempts = 0
// Set defaults for a health check which ensures that the TLS certificate
// is not expired. Although this check is off by default (not all setups
// require it), we still set the other default values so that the health
// check can be easily enabled with sane defaults.
defaultTLSInterval = time.Minute
defaultTLSTimeout = time.Second * 5
defaultTLSBackoff = time.Minute
defaultTLSAttempts = 0
// defaultRemoteMaxHtlcs specifies the default limit for maximum
// concurrent HTLCs the remote party may add to commitment transactions.
// This value can be overridden with --default-remote-max-htlcs.
@ -457,6 +466,12 @@ func DefaultConfig() Config {
Backoff: defaultDiskBackoff,
},
},
TLSCheck: &lncfg.CheckConfig{
Interval: defaultTLSInterval,
Timeout: defaultTLSTimeout,
Attempts: defaultTLSAttempts,
Backoff: defaultTLSBackoff,
},
},
MaxOutgoingCltvExpiry: htlcswitch.DefaultMaxOutgoingCltvExpiry,
MaxChannelFeeAllocation: htlcswitch.DefaultMaxLinkFeeAllocation,

@ -26,6 +26,8 @@ type HealthCheckConfig struct {
ChainCheck *CheckConfig `group:"chainbackend" namespace:"chainbackend"`
DiskCheck *DiskCheckConfig `group:"diskspace" namespace:"diskspace"`
TLSCheck *CheckConfig `group:"tls" namespace:"tls"`
}
// Validate checks the values configured for our health checks.
@ -38,6 +40,10 @@ func (h *HealthCheckConfig) Validate() error {
return err
}
if err := h.TLSCheck.validate("tls"); err != nil {
return err
}
if h.DiskCheck.RequiredRemaining < 0 ||
h.DiskCheck.RequiredRemaining >= 1 {

@ -846,6 +846,22 @@ litecoin.node=ltcd
; value must be >= 1m.
; healthcheck.diskspace.interval=6h
; The number of times we should attempt to check for certificate expiration before
; gracefully shutting down. Set this value to 0 to disable this health check.
; healthcheck.tls.attempts=2
; The amount of time we allow a query for certificate expiration to take
; before we fail the attempt. This value must be >= 1s.
; healthcheck.tls.timeout=5s
; The amount of time we should backoff between failed attempts to query
; certificate expiration. This value must be >= 1s.
; healthcheck.tls.backoff=1m
; The amount of time we should wait between certificate expiration health checks.
; This value must be >= 1m.
; healthcheck.tls.interval=1m
[signrpc]
; Path to the signer macaroon.

@ -27,6 +27,7 @@ import (
sphinx "github.com/lightningnetwork/lightning-onion"
"github.com/lightningnetwork/lnd/autopilot"
"github.com/lightningnetwork/lnd/brontide"
"github.com/lightningnetwork/lnd/cert"
"github.com/lightningnetwork/lnd/chainreg"
"github.com/lightningnetwork/lnd/chanacceptor"
"github.com/lightningnetwork/lnd/chanbackup"
@ -1352,12 +1353,38 @@ func newServer(cfg *Config, listenAddrs []net.Addr,
cfg.HealthChecks.DiskCheck.Attempts,
)
tlsHealthCheck := healthcheck.NewObservation(
"tls",
func() error {
_, parsedCert, err := cert.LoadCert(
cfg.TLSCertPath, cfg.TLSKeyPath,
)
if err != nil {
return err
}
// If the current time is passed the certificate's
// expiry time, then it is considered expired
if time.Now().After(parsedCert.NotAfter) {
return fmt.Errorf("TLS certificate is expired as of %v", parsedCert.NotAfter)
}
// If the certificate is not outdated, no error needs to
// be returned
return nil
},
cfg.HealthChecks.TLSCheck.Interval,
cfg.HealthChecks.TLSCheck.Timeout,
cfg.HealthChecks.TLSCheck.Backoff,
cfg.HealthChecks.TLSCheck.Attempts,
)
// If we have not disabled all of our health checks, we create a
// liveliness monitor with our configured checks.
s.livelinessMonitor = healthcheck.NewMonitor(
&healthcheck.Config{
Checks: []*healthcheck.Observation{
chainHealthCheck, diskCheck,
chainHealthCheck, diskCheck, tlsHealthCheck,
},
Shutdown: srvrLog.Criticalf,
},