healthcheck: add healthcheck to shutdown if cert is expired (#4792)
In certain container set ups, it's useful to optionally have lnd just shutdown if it detects that its certs are expired, as assuming there's a hypervisor to restart the container/pod, then upon restart, lnd will have fully up to date certs.
This commit is contained in:
parent
e9b5b2d767
commit
c04773963b
15
config.go
15
config.go
@ -108,6 +108,15 @@ const (
|
||||
defaultDiskBackoff = time.Minute
|
||||
defaultDiskAttempts = 0
|
||||
|
||||
// Set defaults for a health check which ensures that the TLS certificate
|
||||
// is not expired. Although this check is off by default (not all setups
|
||||
// require it), we still set the other default values so that the health
|
||||
// check can be easily enabled with sane defaults.
|
||||
defaultTLSInterval = time.Minute
|
||||
defaultTLSTimeout = time.Second * 5
|
||||
defaultTLSBackoff = time.Minute
|
||||
defaultTLSAttempts = 0
|
||||
|
||||
// defaultRemoteMaxHtlcs specifies the default limit for maximum
|
||||
// concurrent HTLCs the remote party may add to commitment transactions.
|
||||
// This value can be overridden with --default-remote-max-htlcs.
|
||||
@ -457,6 +466,12 @@ func DefaultConfig() Config {
|
||||
Backoff: defaultDiskBackoff,
|
||||
},
|
||||
},
|
||||
TLSCheck: &lncfg.CheckConfig{
|
||||
Interval: defaultTLSInterval,
|
||||
Timeout: defaultTLSTimeout,
|
||||
Attempts: defaultTLSAttempts,
|
||||
Backoff: defaultTLSBackoff,
|
||||
},
|
||||
},
|
||||
MaxOutgoingCltvExpiry: htlcswitch.DefaultMaxOutgoingCltvExpiry,
|
||||
MaxChannelFeeAllocation: htlcswitch.DefaultMaxLinkFeeAllocation,
|
||||
|
@ -26,6 +26,8 @@ type HealthCheckConfig struct {
|
||||
ChainCheck *CheckConfig `group:"chainbackend" namespace:"chainbackend"`
|
||||
|
||||
DiskCheck *DiskCheckConfig `group:"diskspace" namespace:"diskspace"`
|
||||
|
||||
TLSCheck *CheckConfig `group:"tls" namespace:"tls"`
|
||||
}
|
||||
|
||||
// Validate checks the values configured for our health checks.
|
||||
@ -38,6 +40,10 @@ func (h *HealthCheckConfig) Validate() error {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := h.TLSCheck.validate("tls"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if h.DiskCheck.RequiredRemaining < 0 ||
|
||||
h.DiskCheck.RequiredRemaining >= 1 {
|
||||
|
||||
|
@ -846,6 +846,22 @@ litecoin.node=ltcd
|
||||
; value must be >= 1m.
|
||||
; healthcheck.diskspace.interval=6h
|
||||
|
||||
; The number of times we should attempt to check for certificate expiration before
|
||||
; gracefully shutting down. Set this value to 0 to disable this health check.
|
||||
; healthcheck.tls.attempts=2
|
||||
|
||||
; The amount of time we allow a query for certificate expiration to take
|
||||
; before we fail the attempt. This value must be >= 1s.
|
||||
; healthcheck.tls.timeout=5s
|
||||
|
||||
; The amount of time we should backoff between failed attempts to query
|
||||
; certificate expiration. This value must be >= 1s.
|
||||
; healthcheck.tls.backoff=1m
|
||||
|
||||
; The amount of time we should wait between certificate expiration health checks.
|
||||
; This value must be >= 1m.
|
||||
; healthcheck.tls.interval=1m
|
||||
|
||||
[signrpc]
|
||||
|
||||
; Path to the signer macaroon.
|
||||
|
29
server.go
29
server.go
@ -27,6 +27,7 @@ import (
|
||||
sphinx "github.com/lightningnetwork/lightning-onion"
|
||||
"github.com/lightningnetwork/lnd/autopilot"
|
||||
"github.com/lightningnetwork/lnd/brontide"
|
||||
"github.com/lightningnetwork/lnd/cert"
|
||||
"github.com/lightningnetwork/lnd/chainreg"
|
||||
"github.com/lightningnetwork/lnd/chanacceptor"
|
||||
"github.com/lightningnetwork/lnd/chanbackup"
|
||||
@ -1352,12 +1353,38 @@ func newServer(cfg *Config, listenAddrs []net.Addr,
|
||||
cfg.HealthChecks.DiskCheck.Attempts,
|
||||
)
|
||||
|
||||
tlsHealthCheck := healthcheck.NewObservation(
|
||||
"tls",
|
||||
func() error {
|
||||
_, parsedCert, err := cert.LoadCert(
|
||||
cfg.TLSCertPath, cfg.TLSKeyPath,
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// If the current time is passed the certificate's
|
||||
// expiry time, then it is considered expired
|
||||
if time.Now().After(parsedCert.NotAfter) {
|
||||
return fmt.Errorf("TLS certificate is expired as of %v", parsedCert.NotAfter)
|
||||
}
|
||||
|
||||
// If the certificate is not outdated, no error needs to
|
||||
// be returned
|
||||
return nil
|
||||
},
|
||||
cfg.HealthChecks.TLSCheck.Interval,
|
||||
cfg.HealthChecks.TLSCheck.Timeout,
|
||||
cfg.HealthChecks.TLSCheck.Backoff,
|
||||
cfg.HealthChecks.TLSCheck.Attempts,
|
||||
)
|
||||
|
||||
// If we have not disabled all of our health checks, we create a
|
||||
// liveliness monitor with our configured checks.
|
||||
s.livelinessMonitor = healthcheck.NewMonitor(
|
||||
&healthcheck.Config{
|
||||
Checks: []*healthcheck.Observation{
|
||||
chainHealthCheck, diskCheck,
|
||||
chainHealthCheck, diskCheck, tlsHealthCheck,
|
||||
},
|
||||
Shutdown: srvrLog.Criticalf,
|
||||
},
|
||||
|
Loading…
Reference in New Issue
Block a user