chanfitness: Add channel event log structure

This commit adds a chanfitness package which will be used to track
channel health and performance metrics. It adds a channel event
structure which will be used to track channel opens/closes and peer
uptime.

The eventLog implements an uptime function which calcualtes uptime
over a given period and a lifespan function which returns the time
when the log began monitoring the channel and, if the channel is
closed, the time when it stopped moitoring it.
This commit is contained in:
carla 2019-08-08 13:39:13 -04:00
parent fe597b6310
commit 744876003d
4 changed files with 647 additions and 0 deletions

218
chanfitness/chanevent.go Normal file
View File

@ -0,0 +1,218 @@
package chanfitness
import (
"fmt"
"time"
"github.com/lightningnetwork/lnd/routing/route"
)
type eventType int
const (
peerOnlineEvent eventType = iota
peerOfflineEvent
)
// String provides string representations of channel events.
func (e eventType) String() string {
switch e {
case peerOnlineEvent:
return "peer_online"
case peerOfflineEvent:
return "peer_offline"
}
return "unknown"
}
// channelEvent is a a timestamped event which is observed on a per channel
// basis.
type channelEvent struct {
timestamp time.Time
eventType eventType
}
// chanEventLog stores all events that have occurred over a channel's lifetime.
type chanEventLog struct {
// id is the uint64 of the short channel ID.
id uint64
// peer is the compressed public key of the peer being monitored.
peer route.Vertex
// events is a log of timestamped events observed for the channel.
events []*channelEvent
// now is expected to return the current time. It is supplied as an
// external function to enable deterministic unit tests.
now func() time.Time
// openedAt tracks the first time this channel was seen. This is not
// necessarily the time that it confirmed on chain because channel events
// are not persisted at present.
openedAt time.Time
// closedAt is the time that the channel was closed. If the channel has not
// been closed yet, it is zero.
closedAt time.Time
}
func newEventLog(id uint64, peer route.Vertex, now func() time.Time) *chanEventLog {
return &chanEventLog{
id: id,
peer: peer,
now: now,
}
}
// close sets the closing time for an event log.
func (e *chanEventLog) close() {
e.closedAt = e.now()
}
// add appends an event with the given type and current time to the event log.
// The open time for the eventLog will be set to the event's timestamp if it is
// not set yet.
func (e *chanEventLog) add(eventType eventType) {
// If the channel is already closed, return early without adding an event.
if !e.closedAt.IsZero() {
return
}
// Add the event to the eventLog with the current timestamp.
event := &channelEvent{
timestamp: e.now(),
eventType: eventType,
}
e.events = append(e.events, event)
// If the eventLog does not have an opened time set, set it to the timestamp
// of the event. This has the effect of setting the eventLog's open time to
// the timestamp of the first event added.
if e.openedAt.IsZero() {
e.openedAt = event.timestamp
}
log.Debugf("Channel %v recording event: %v", e.id, eventType)
}
// onlinePeriod represents a period of time over which a peer was online.
type onlinePeriod struct {
start, end time.Time
}
// getOnlinePeriods returns a list of all the periods that the event log has
// recorded the remote peer as being online. In the unexpected case where there
// are no events, the function returns early. Online periods are defined as a
// peer online event which is terminated by a peer offline event. This function
// expects the event log provided to be ordered by ascending timestamp.
func (e *chanEventLog) getOnlinePeriods() []*onlinePeriod {
// Return early if there are no events, there are no online periods.
if len(e.events) == 0 {
return nil
}
var (
lastOnline time.Time
offline bool
onlinePeriods []*onlinePeriod
)
// Loop through all events to build a list of periods that the peer was
// online. Online periods are added when they are terminated with a peer
// offline event. If the log ends on an online event, the period between
// the online event and the present is not tracked. The type of the most
// recent event is tracked using the offline bool so that we can add a
// final online period if necessary.
for _, event := range e.events {
switch event.eventType {
case peerOnlineEvent:
lastOnline = event.timestamp
offline = false
case peerOfflineEvent:
offline = true
// Do not add to uptime if there is no previous online timestamp,
// the event log has started with an offline event
if lastOnline.IsZero() {
continue
}
// The eventLog has recorded an offline event, having previously
// been online so we add an online period to to set of online periods.
onlinePeriods = append(onlinePeriods, &onlinePeriod{
start: lastOnline,
end: event.timestamp,
})
}
}
// If the last event was an peer offline event, we do not need to calculate
// a final online period and can return online periods as is.
if offline {
return onlinePeriods
}
// The log ended on an online event, so we need to add a final online event.
// If the channel is closed, this period is until channel closure. It it is
// still open, we calculate it until the present.
endTime := e.closedAt
if endTime.IsZero() {
endTime = e.now()
}
// Add the final online period to the set and return.
return append(onlinePeriods, &onlinePeriod{
start: lastOnline,
end: endTime,
})
}
// uptime calculates the total uptime we have recorded for a channel over the
// inclusive range specified. An error is returned if the end of the range is
// before the start or a zero end time is returned.
func (e *chanEventLog) uptime(start, end time.Time) (time.Duration, error) {
// Error if we are provided with an invalid range to calculate uptime for.
if end.Before(start) {
return 0, fmt.Errorf("end time: %v before start time: %v",
end, start)
}
if end.IsZero() {
return 0, fmt.Errorf("zero end time")
}
var uptime time.Duration
for _, p := range e.getOnlinePeriods() {
// The online period ends before the range we're looking at, so we can
// skip over it.
if p.end.Before(start) {
continue
}
// The online period starts after the range we're looking at, so can
// stop calculating uptime.
if p.start.After(end) {
break
}
// If the online period starts before our range, shift the start time up
// so that we only calculate uptime from the start of our range.
if p.start.Before(start) {
p.start = start
}
// If the online period ends before our range, shift the end time
// forward so that we only calculate uptime until the end of the range.
if p.end.After(end) {
p.end = end
}
uptime += p.end.Sub(p.start)
}
return uptime, nil
}

View File

@ -0,0 +1,395 @@
package chanfitness
import (
"testing"
"time"
)
// TestAdd tests adding events to an event log. It tests the case where the
// channel is open, and should have an event added, and the case where it is
// closed and the event should not be added.
func TestAdd(t *testing.T) {
tests := []struct {
name string
eventLog *chanEventLog
event eventType
expected []eventType
}{
{
name: "Channel open",
eventLog: &chanEventLog{
now: time.Now,
},
event: peerOnlineEvent,
expected: []eventType{peerOnlineEvent},
},
{
name: "Channel closed, event not added",
eventLog: &chanEventLog{
now: time.Now,
},
event: peerOnlineEvent,
expected: []eventType{},
},
}
for _, test := range tests {
test := test
t.Run(test.name, func(t *testing.T) {
test.eventLog.add(test.event)
for i, e := range test.expected {
if test.eventLog.events[i].eventType != e {
t.Fatalf("Expected event type: %v, got: %v",
e, test.eventLog.events[i].eventType)
}
}
})
}
}
// TestGetOnlinePeriod tests the getOnlinePeriod function. It tests the case
// where no events present, and the case where an additional online period
// must be added because the event log ends on an online event.
func TestGetOnlinePeriod(t *testing.T) {
// Set time for consistent testing.
now := time.Now()
fourHoursAgo := now.Add(time.Hour * -4)
threeHoursAgo := now.Add(time.Hour * -3)
twoHoursAgo := now.Add(time.Hour * -2)
oneHourAgo := now.Add(time.Hour * -1)
tests := []struct {
name string
events []*channelEvent
expectedOnline []*onlinePeriod
openedAt time.Time
closedAt time.Time
}{
{
name: "No events",
},
{
name: "Start on online period",
events: []*channelEvent{
{
timestamp: threeHoursAgo,
eventType: peerOnlineEvent,
},
{
timestamp: twoHoursAgo,
eventType: peerOfflineEvent,
},
},
expectedOnline: []*onlinePeriod{
{
start: threeHoursAgo,
end: twoHoursAgo,
},
},
},
{
name: "Start on offline period",
events: []*channelEvent{
{
timestamp: fourHoursAgo,
eventType: peerOfflineEvent,
},
},
},
{
name: "End on an online period, channel not closed",
events: []*channelEvent{
{
timestamp: fourHoursAgo,
eventType: peerOnlineEvent,
},
},
expectedOnline: []*onlinePeriod{
{
start: fourHoursAgo,
end: now,
},
},
},
{
name: "End on an online period, channel closed",
events: []*channelEvent{
{
timestamp: fourHoursAgo,
eventType: peerOnlineEvent,
},
},
expectedOnline: []*onlinePeriod{
{
start: fourHoursAgo,
end: oneHourAgo,
},
},
closedAt: oneHourAgo,
},
}
for _, test := range tests {
test := test
t.Run(test.name, func(t *testing.T) {
score := &chanEventLog{
events: test.events,
now: func() time.Time {
return now
},
openedAt: test.openedAt,
closedAt: test.closedAt,
}
online := score.getOnlinePeriods()
if len(online) != len(test.expectedOnline) {
t.Fatalf("Expectd: %v online periods, got: %v",
len(test.expectedOnline), len(online))
}
for i, o := range test.expectedOnline {
if online[i].start != o.start {
t.Errorf("Expected start: %v, got %v", o.start,
online[i].start)
}
if online[i].end != o.end {
t.Errorf("Expected end: %v, got %v", o.end,
online[i].end)
}
}
})
}
}
// TestUptime tests channel uptime calculation based on its event log.
func TestUptime(t *testing.T) {
// Set time for consistent testing.
now := time.Now()
fourHoursAgo := now.Add(time.Hour * -4)
threeHoursAgo := now.Add(time.Hour * -3)
twoHoursAgo := now.Add(time.Hour * -2)
oneHourAgo := now.Add(time.Hour * -1)
tests := []struct {
name string
// opened at is the time the channel was recorded as being open, and is
// never expected to be zero.
openedAt time.Time
// closed at is the tim the channel was recorded as being closed, and
// can have a zero value if the.
closedAt time.Time
// events is the set of event log that we are calculating uptime for.
events []*channelEvent
// startTime is the beginning of the period that we are calculating
// uptime for, it cannot have a zero value.
startTime time.Time
// endTime is the end of the period that we are calculating uptime for,
// it cannot have a zero value.
endTime time.Time
// expectedUptime is the amount of uptime we expect to be calculated
// over the period specified by startTime and endTime.
expectedUptime time.Duration
// expectErr is set to true if we expect an error to be returned when
// calling the uptime function
expectErr bool
}{
{
name: "End before start",
endTime: threeHoursAgo,
startTime: now,
expectErr: true,
},
{
name: "Zero end time",
expectErr: true,
},
{
name: "Online event and closed",
openedAt: fourHoursAgo,
closedAt: oneHourAgo,
events: []*channelEvent{
{
timestamp: fourHoursAgo,
eventType: peerOnlineEvent,
},
},
startTime: fourHoursAgo,
endTime: now,
expectedUptime: time.Hour * 3,
},
{
name: "Online event and not closed",
openedAt: fourHoursAgo,
events: []*channelEvent{
{
timestamp: fourHoursAgo,
eventType: peerOnlineEvent,
},
},
startTime: fourHoursAgo,
endTime: now,
expectedUptime: time.Hour * 4,
},
{
name: "Offline event and closed",
openedAt: fourHoursAgo,
closedAt: threeHoursAgo,
events: []*channelEvent{
{
timestamp: fourHoursAgo,
eventType: peerOfflineEvent,
},
},
startTime: fourHoursAgo,
endTime: now,
},
{
name: "Online event before close",
openedAt: fourHoursAgo,
closedAt: oneHourAgo,
events: []*channelEvent{
{
timestamp: twoHoursAgo,
eventType: peerOnlineEvent,
},
},
startTime: fourHoursAgo,
endTime: now,
expectedUptime: time.Hour,
},
{
name: "Online then offline event",
openedAt: fourHoursAgo,
closedAt: oneHourAgo,
events: []*channelEvent{
{
timestamp: threeHoursAgo,
eventType: peerOnlineEvent,
},
{
timestamp: twoHoursAgo,
eventType: peerOfflineEvent,
},
},
startTime: fourHoursAgo,
endTime: now,
expectedUptime: time.Hour,
},
{
name: "Online event before uptime period",
openedAt: fourHoursAgo,
closedAt: oneHourAgo,
events: []*channelEvent{
{
timestamp: threeHoursAgo,
eventType: peerOnlineEvent,
},
},
startTime: twoHoursAgo,
endTime: now,
expectedUptime: time.Hour,
},
{
name: "Offline event after uptime period",
openedAt: fourHoursAgo,
events: []*channelEvent{
{
timestamp: fourHoursAgo,
eventType: peerOnlineEvent,
},
{
timestamp: now.Add(time.Hour),
eventType: peerOfflineEvent,
},
},
startTime: twoHoursAgo,
endTime: now,
expectedUptime: time.Hour * 2,
},
{
name: "All events within period",
openedAt: fourHoursAgo,
events: []*channelEvent{
{
timestamp: twoHoursAgo,
eventType: peerOnlineEvent,
},
},
startTime: threeHoursAgo,
endTime: oneHourAgo,
expectedUptime: time.Hour,
},
{
name: "Multiple online and offline",
openedAt: now.Add(time.Hour * -8),
events: []*channelEvent{
{
timestamp: now.Add(time.Hour * -7),
eventType: peerOnlineEvent,
},
{
timestamp: now.Add(time.Hour * -6),
eventType: peerOfflineEvent,
},
{
timestamp: now.Add(time.Hour * -5),
eventType: peerOnlineEvent,
},
{
timestamp: now.Add(time.Hour * -4),
eventType: peerOfflineEvent,
},
{
timestamp: now.Add(time.Hour * -3),
eventType: peerOnlineEvent,
},
},
startTime: now.Add(time.Hour * -8),
endTime: oneHourAgo,
expectedUptime: time.Hour * 4,
},
}
for _, test := range tests {
test := test
t.Run(test.name, func(t *testing.T) {
score := &chanEventLog{
events: test.events,
now: func() time.Time {
return now
},
openedAt: test.openedAt,
closedAt: test.closedAt,
}
uptime, err := score.uptime(test.startTime, test.endTime)
if test.expectErr && err == nil {
t.Fatal("Expected an error, got nil")
}
if !test.expectErr && err != nil {
t.Fatalf("Expcted no error, got: %v", err)
}
if uptime != test.expectedUptime {
t.Errorf("Expected uptime: %v, got: %v",
test.expectedUptime, uptime)
}
})
}
}

32
chanfitness/log.go Normal file
View File

@ -0,0 +1,32 @@
package chanfitness
import (
"github.com/btcsuite/btclog"
"github.com/lightningnetwork/lnd/build"
)
// Subsystem defines the logging code for this subsystem.
const Subsystem = "CHFT"
// log is a logger that is initialized with no output filters. This
// means the package will not perform any logging by default until the caller
// requests it.
var log btclog.Logger
// The default amount of logging is none.
func init() {
UseLogger(build.NewSubLogger(Subsystem, nil))
}
// DisableLog disables all library log output. Logging output is disabled
// by default until UseLogger is called.
func DisableLog() {
UseLogger(btclog.Disabled)
}
// UseLogger uses a specified Logger to output package logging info.
// This should be used in preference to SetLogWriter if the caller is also
// using btclog.
func UseLogger(logger btclog.Logger) {
log = logger
}

2
log.go
View File

@ -11,6 +11,7 @@ import (
"github.com/lightningnetwork/lnd/build"
"github.com/lightningnetwork/lnd/chainntnfs"
"github.com/lightningnetwork/lnd/chanbackup"
"github.com/lightningnetwork/lnd/chanfitness"
"github.com/lightningnetwork/lnd/channeldb"
"github.com/lightningnetwork/lnd/channelnotifier"
"github.com/lightningnetwork/lnd/contractcourt"
@ -98,6 +99,7 @@ func init() {
addSubLogger(routerrpc.Subsystem, routerrpc.UseLogger)
addSubLogger(wtclientrpc.Subsystem, wtclientrpc.UseLogger)
addSubLogger(chanfitness.Subsystem, chanfitness.UseLogger)
}
// addSubLogger is a helper method to conveniently create and register the