multi: add minimum disk space check
This commit is contained in:
parent
c365a16656
commit
5a73029442
19
config.go
19
config.go
@ -91,6 +91,16 @@ const (
|
|||||||
defaultChainTimeout = time.Second * 10
|
defaultChainTimeout = time.Second * 10
|
||||||
defaultChainBackoff = time.Second * 30
|
defaultChainBackoff = time.Second * 30
|
||||||
defaultChainAttempts = 3
|
defaultChainAttempts = 3
|
||||||
|
|
||||||
|
// By default, we will shutdown if less than 10% of disk space is
|
||||||
|
// available. We allow a longer interval for disk space checks, because
|
||||||
|
// this check is less likely to deteriorate quickly. However, we allow
|
||||||
|
// fewer retries because this should not be a flakey check.
|
||||||
|
defaultRequiredDisk = 0.1
|
||||||
|
defaultDiskInterval = time.Hour * 12
|
||||||
|
defaultDiskTimeout = time.Second * 5
|
||||||
|
defaultDiskBackoff = time.Minute
|
||||||
|
defaultDiskAttempts = 2
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@ -397,6 +407,15 @@ func DefaultConfig() Config {
|
|||||||
Attempts: defaultChainAttempts,
|
Attempts: defaultChainAttempts,
|
||||||
Backoff: defaultChainBackoff,
|
Backoff: defaultChainBackoff,
|
||||||
},
|
},
|
||||||
|
DiskCheck: &lncfg.DiskCheckConfig{
|
||||||
|
RequiredRemaining: defaultRequiredDisk,
|
||||||
|
CheckConfig: &lncfg.CheckConfig{
|
||||||
|
Interval: defaultDiskInterval,
|
||||||
|
Attempts: defaultDiskAttempts,
|
||||||
|
Timeout: defaultDiskTimeout,
|
||||||
|
Backoff: defaultDiskBackoff,
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
MaxOutgoingCltvExpiry: htlcswitch.DefaultMaxOutgoingCltvExpiry,
|
MaxOutgoingCltvExpiry: htlcswitch.DefaultMaxOutgoingCltvExpiry,
|
||||||
MaxChannelFeeAllocation: htlcswitch.DefaultMaxLinkFeeAllocation,
|
MaxChannelFeeAllocation: htlcswitch.DefaultMaxLinkFeeAllocation,
|
||||||
|
1
go.mod
1
go.mod
@ -66,6 +66,7 @@ require (
|
|||||||
go.uber.org/zap v1.14.1 // indirect
|
go.uber.org/zap v1.14.1 // indirect
|
||||||
golang.org/x/crypto v0.0.0-20200709230013-948cd5f35899
|
golang.org/x/crypto v0.0.0-20200709230013-948cd5f35899
|
||||||
golang.org/x/net v0.0.0-20191002035440-2ec189313ef0
|
golang.org/x/net v0.0.0-20191002035440-2ec189313ef0
|
||||||
|
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5
|
||||||
golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2
|
golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2
|
||||||
google.golang.org/grpc v1.24.0
|
google.golang.org/grpc v1.24.0
|
||||||
gopkg.in/errgo.v1 v1.0.1 // indirect
|
gopkg.in/errgo.v1 v1.0.1 // indirect
|
||||||
|
1
go.sum
1
go.sum
@ -332,6 +332,7 @@ golang.org/x/sys v0.0.0-20190904154756-749cb33beabd h1:DBH9mDw0zluJT/R+nGuV3jWFW
|
|||||||
golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5 h1:LfCXLvNmTYH9kEmVgqbnsWfruoXZIrh4YBgqVHtDvw0=
|
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5 h1:LfCXLvNmTYH9kEmVgqbnsWfruoXZIrh4YBgqVHtDvw0=
|
||||||
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/sys v0.0.0-20200724161237-0e2f3a69832c h1:UIcGWL6/wpCfyGuJnRFJRurA+yj8RrW7Q6x2YMCXt6c=
|
||||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||||
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2 h1:z99zHgr7hKfrUcX/KsoJk5FJfjTceCKIp96+biqP4To=
|
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2 h1:z99zHgr7hKfrUcX/KsoJk5FJfjTceCKIp96+biqP4To=
|
||||||
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||||
|
18
healthcheck/diskcheck.go
Normal file
18
healthcheck/diskcheck.go
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
// +build !windows,!solaris
|
||||||
|
|
||||||
|
package healthcheck
|
||||||
|
|
||||||
|
import "syscall"
|
||||||
|
|
||||||
|
// AvailableDiskSpace returns ratio of available disk space to total capacity.
|
||||||
|
func AvailableDiskSpace(path string) (float64, error) {
|
||||||
|
s := syscall.Statfs_t{}
|
||||||
|
err := syscall.Statfs(path, &s)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate our free blocks/total blocks to get our total ratio of
|
||||||
|
// free blocks.
|
||||||
|
return float64(s.Bfree) / float64(s.Blocks), nil
|
||||||
|
}
|
17
healthcheck/diskcheck_solaris.go
Normal file
17
healthcheck/diskcheck_solaris.go
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
package healthcheck
|
||||||
|
|
||||||
|
import "golang.org/x/sys/unix"
|
||||||
|
|
||||||
|
// AvailableDiskSpace returns ratio of available disk space to total capacity
|
||||||
|
// for solaris.
|
||||||
|
func AvailableDiskSpace(path string) (float64, error) {
|
||||||
|
s := unix.Statvfs_t{}
|
||||||
|
err := unix.Statvfs(path, &s)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate our free blocks/total blocks to get our total ratio of
|
||||||
|
// free blocks.
|
||||||
|
return float64(s.Bfree) / float64(s.Blocks), nil
|
||||||
|
}
|
17
healthcheck/diskcheck_windows.go
Normal file
17
healthcheck/diskcheck_windows.go
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
package healthcheck
|
||||||
|
|
||||||
|
import "golang.org/x/sys/windows"
|
||||||
|
|
||||||
|
// AvailableDiskSpace returns ratio of available disk space to total capacity
|
||||||
|
// for windows.
|
||||||
|
func AvailableDiskSpace(path string) (float64, error) {
|
||||||
|
var free, total, avail uint64
|
||||||
|
|
||||||
|
pathPtr, err := windows.UTF16PtrFromString(path)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
err = windows.GetDiskFreeSpaceEx(pathPtr, &free, &total, &avail)
|
||||||
|
|
||||||
|
return float64(avail) / float64(total), nil
|
||||||
|
}
|
@ -1,6 +1,7 @@
|
|||||||
package lncfg
|
package lncfg
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
@ -23,11 +24,27 @@ var (
|
|||||||
// the lnd runs.
|
// the lnd runs.
|
||||||
type HealthCheckConfig struct {
|
type HealthCheckConfig struct {
|
||||||
ChainCheck *CheckConfig `group:"chainbackend" namespace:"chainbackend"`
|
ChainCheck *CheckConfig `group:"chainbackend" namespace:"chainbackend"`
|
||||||
|
|
||||||
|
DiskCheck *DiskCheckConfig `group:"diskspace" namespace:"diskspace"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Validate checks the values configured for our health checks.
|
// Validate checks the values configured for our health checks.
|
||||||
func (h *HealthCheckConfig) Validate() error {
|
func (h *HealthCheckConfig) Validate() error {
|
||||||
return h.ChainCheck.validate("chain backend")
|
if err := h.ChainCheck.validate("chain backend"); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := h.DiskCheck.validate("disk space"); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if h.DiskCheck.RequiredRemaining < 0 ||
|
||||||
|
h.DiskCheck.RequiredRemaining >= 1 {
|
||||||
|
|
||||||
|
return errors.New("disk required ratio must be in [0:1)")
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type CheckConfig struct {
|
type CheckConfig struct {
|
||||||
@ -63,3 +80,11 @@ func (c *CheckConfig) validate(name string) error {
|
|||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DiskCheckConfig contains configuration for ensuring that our node has
|
||||||
|
// sufficient disk space.
|
||||||
|
type DiskCheckConfig struct {
|
||||||
|
RequiredRemaining float64 `long:"diskrequired" description:"The minimum ratio of free disk space to total capacity that we allow before shutting lnd down safely."`
|
||||||
|
|
||||||
|
*CheckConfig
|
||||||
|
}
|
||||||
|
@ -418,3 +418,21 @@ litecoin.node=ltcd
|
|||||||
; value must be >= 1m.
|
; value must be >= 1m.
|
||||||
; healthcheck.chainbackend.interval=1m
|
; healthcheck.chainbackend.interval=1m
|
||||||
|
|
||||||
|
; The minimum ratio of free disk space to total capacity that we require.
|
||||||
|
; healthcheck.diskspace.diskrequired=0.1
|
||||||
|
|
||||||
|
; The number of times we should attempt to query our available disk space before
|
||||||
|
; gracefully shutting down. Set this value to 0 to disable this health check.
|
||||||
|
; healthcheck.diskspace.attempts=2
|
||||||
|
|
||||||
|
; The amount of time we allow a query for our available disk space to take
|
||||||
|
; before we fail the attempt. This value must be >= 1s.
|
||||||
|
; healthcheck.diskspace.timeout=5s
|
||||||
|
|
||||||
|
; The amount of time we should backoff between failed attempts to query
|
||||||
|
; available disk space. This value must be >= 1s.
|
||||||
|
; healthcheck.diskspace.backoff=1m
|
||||||
|
|
||||||
|
; The amount of time we should wait between disk space health checks. This
|
||||||
|
; value must be >= 1m.
|
||||||
|
; healthcheck.diskspace.interval=6h
|
||||||
|
26
server.go
26
server.go
@ -1273,12 +1273,36 @@ func newServer(cfg *Config, listenAddrs []net.Addr,
|
|||||||
cfg.HealthChecks.ChainCheck.Attempts,
|
cfg.HealthChecks.ChainCheck.Attempts,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
diskCheck := healthcheck.NewObservation(
|
||||||
|
"disk space",
|
||||||
|
func() error {
|
||||||
|
free, err := healthcheck.AvailableDiskSpace(cfg.LndDir)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we have more free space than we require,
|
||||||
|
// we return a nil error.
|
||||||
|
if free > cfg.HealthChecks.DiskCheck.RequiredRemaining {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return fmt.Errorf("require: %v free space, got: %v",
|
||||||
|
cfg.HealthChecks.DiskCheck.RequiredRemaining,
|
||||||
|
free)
|
||||||
|
},
|
||||||
|
cfg.HealthChecks.DiskCheck.Interval,
|
||||||
|
cfg.HealthChecks.DiskCheck.Timeout,
|
||||||
|
cfg.HealthChecks.DiskCheck.Backoff,
|
||||||
|
cfg.HealthChecks.DiskCheck.Attempts,
|
||||||
|
)
|
||||||
|
|
||||||
// If we have not disabled all of our health checks, we create a
|
// If we have not disabled all of our health checks, we create a
|
||||||
// liveliness monitor with our configured checks.
|
// liveliness monitor with our configured checks.
|
||||||
s.livelinessMonitor = healthcheck.NewMonitor(
|
s.livelinessMonitor = healthcheck.NewMonitor(
|
||||||
&healthcheck.Config{
|
&healthcheck.Config{
|
||||||
Checks: []*healthcheck.Observation{
|
Checks: []*healthcheck.Observation{
|
||||||
chainHealthCheck,
|
chainHealthCheck, diskCheck,
|
||||||
},
|
},
|
||||||
Shutdown: srvrLog.Criticalf,
|
Shutdown: srvrLog.Criticalf,
|
||||||
},
|
},
|
||||||
|
Loading…
Reference in New Issue
Block a user