Browse Source

chanfitness: Add channel event log structure

This commit adds a chanfitness package which will be used to track
channel health and performance metrics. It adds a channel event
structure which will be used to track channel opens/closes and peer
uptime.

The eventLog implements an uptime function which calcualtes uptime
over a given period and a lifespan function which returns the time
when the log began monitoring the channel and, if the channel is
closed, the time when it stopped moitoring it.
master
carla 5 years ago
parent
commit
744876003d
  1. 218
      chanfitness/chanevent.go
  2. 395
      chanfitness/chanevent_test.go
  3. 32
      chanfitness/log.go
  4. 2
      log.go

218
chanfitness/chanevent.go

@ -0,0 +1,218 @@
package chanfitness
import (
"fmt"
"time"
"github.com/lightningnetwork/lnd/routing/route"
)
type eventType int
const (
peerOnlineEvent eventType = iota
peerOfflineEvent
)
// String provides string representations of channel events.
func (e eventType) String() string {
switch e {
case peerOnlineEvent:
return "peer_online"
case peerOfflineEvent:
return "peer_offline"
}
return "unknown"
}
// channelEvent is a a timestamped event which is observed on a per channel
// basis.
type channelEvent struct {
timestamp time.Time
eventType eventType
}
// chanEventLog stores all events that have occurred over a channel's lifetime.
type chanEventLog struct {
// id is the uint64 of the short channel ID.
id uint64
// peer is the compressed public key of the peer being monitored.
peer route.Vertex
// events is a log of timestamped events observed for the channel.
events []*channelEvent
// now is expected to return the current time. It is supplied as an
// external function to enable deterministic unit tests.
now func() time.Time
// openedAt tracks the first time this channel was seen. This is not
// necessarily the time that it confirmed on chain because channel events
// are not persisted at present.
openedAt time.Time
// closedAt is the time that the channel was closed. If the channel has not
// been closed yet, it is zero.
closedAt time.Time
}
func newEventLog(id uint64, peer route.Vertex, now func() time.Time) *chanEventLog {
return &chanEventLog{
id: id,
peer: peer,
now: now,
}
}
// close sets the closing time for an event log.
func (e *chanEventLog) close() {
e.closedAt = e.now()
}
// add appends an event with the given type and current time to the event log.
// The open time for the eventLog will be set to the event's timestamp if it is
// not set yet.
func (e *chanEventLog) add(eventType eventType) {
// If the channel is already closed, return early without adding an event.
if !e.closedAt.IsZero() {
return
}
// Add the event to the eventLog with the current timestamp.
event := &channelEvent{
timestamp: e.now(),
eventType: eventType,
}
e.events = append(e.events, event)
// If the eventLog does not have an opened time set, set it to the timestamp
// of the event. This has the effect of setting the eventLog's open time to
// the timestamp of the first event added.
if e.openedAt.IsZero() {
e.openedAt = event.timestamp
}
log.Debugf("Channel %v recording event: %v", e.id, eventType)
}
// onlinePeriod represents a period of time over which a peer was online.
type onlinePeriod struct {
start, end time.Time
}
// getOnlinePeriods returns a list of all the periods that the event log has
// recorded the remote peer as being online. In the unexpected case where there
// are no events, the function returns early. Online periods are defined as a
// peer online event which is terminated by a peer offline event. This function
// expects the event log provided to be ordered by ascending timestamp.
func (e *chanEventLog) getOnlinePeriods() []*onlinePeriod {
// Return early if there are no events, there are no online periods.
if len(e.events) == 0 {
return nil
}
var (
lastOnline time.Time
offline bool
onlinePeriods []*onlinePeriod
)
// Loop through all events to build a list of periods that the peer was
// online. Online periods are added when they are terminated with a peer
// offline event. If the log ends on an online event, the period between
// the online event and the present is not tracked. The type of the most
// recent event is tracked using the offline bool so that we can add a
// final online period if necessary.
for _, event := range e.events {
switch event.eventType {
case peerOnlineEvent:
lastOnline = event.timestamp
offline = false
case peerOfflineEvent:
offline = true
// Do not add to uptime if there is no previous online timestamp,
// the event log has started with an offline event
if lastOnline.IsZero() {
continue
}
// The eventLog has recorded an offline event, having previously
// been online so we add an online period to to set of online periods.
onlinePeriods = append(onlinePeriods, &onlinePeriod{
start: lastOnline,
end: event.timestamp,
})
}
}
// If the last event was an peer offline event, we do not need to calculate
// a final online period and can return online periods as is.
if offline {
return onlinePeriods
}
// The log ended on an online event, so we need to add a final online event.
// If the channel is closed, this period is until channel closure. It it is
// still open, we calculate it until the present.
endTime := e.closedAt
if endTime.IsZero() {
endTime = e.now()
}
// Add the final online period to the set and return.
return append(onlinePeriods, &onlinePeriod{
start: lastOnline,
end: endTime,
})
}
// uptime calculates the total uptime we have recorded for a channel over the
// inclusive range specified. An error is returned if the end of the range is
// before the start or a zero end time is returned.
func (e *chanEventLog) uptime(start, end time.Time) (time.Duration, error) {
// Error if we are provided with an invalid range to calculate uptime for.
if end.Before(start) {
return 0, fmt.Errorf("end time: %v before start time: %v",
end, start)
}
if end.IsZero() {
return 0, fmt.Errorf("zero end time")
}
var uptime time.Duration
for _, p := range e.getOnlinePeriods() {
// The online period ends before the range we're looking at, so we can
// skip over it.
if p.end.Before(start) {
continue
}
// The online period starts after the range we're looking at, so can
// stop calculating uptime.
if p.start.After(end) {
break
}
// If the online period starts before our range, shift the start time up
// so that we only calculate uptime from the start of our range.
if p.start.Before(start) {
p.start = start
}
// If the online period ends before our range, shift the end time
// forward so that we only calculate uptime until the end of the range.
if p.end.After(end) {
p.end = end
}
uptime += p.end.Sub(p.start)
}
return uptime, nil
}

395
chanfitness/chanevent_test.go

@ -0,0 +1,395 @@
package chanfitness
import (
"testing"
"time"
)
// TestAdd tests adding events to an event log. It tests the case where the
// channel is open, and should have an event added, and the case where it is
// closed and the event should not be added.
func TestAdd(t *testing.T) {
tests := []struct {
name string
eventLog *chanEventLog
event eventType
expected []eventType
}{
{
name: "Channel open",
eventLog: &chanEventLog{
now: time.Now,
},
event: peerOnlineEvent,
expected: []eventType{peerOnlineEvent},
},
{
name: "Channel closed, event not added",
eventLog: &chanEventLog{
now: time.Now,
},
event: peerOnlineEvent,
expected: []eventType{},
},
}
for _, test := range tests {
test := test
t.Run(test.name, func(t *testing.T) {
test.eventLog.add(test.event)
for i, e := range test.expected {
if test.eventLog.events[i].eventType != e {
t.Fatalf("Expected event type: %v, got: %v",
e, test.eventLog.events[i].eventType)
}
}
})
}
}
// TestGetOnlinePeriod tests the getOnlinePeriod function. It tests the case
// where no events present, and the case where an additional online period
// must be added because the event log ends on an online event.
func TestGetOnlinePeriod(t *testing.T) {
// Set time for consistent testing.
now := time.Now()
fourHoursAgo := now.Add(time.Hour * -4)
threeHoursAgo := now.Add(time.Hour * -3)
twoHoursAgo := now.Add(time.Hour * -2)
oneHourAgo := now.Add(time.Hour * -1)
tests := []struct {
name string
events []*channelEvent
expectedOnline []*onlinePeriod
openedAt time.Time
closedAt time.Time
}{
{
name: "No events",
},
{
name: "Start on online period",
events: []*channelEvent{
{
timestamp: threeHoursAgo,
eventType: peerOnlineEvent,
},
{
timestamp: twoHoursAgo,
eventType: peerOfflineEvent,
},
},
expectedOnline: []*onlinePeriod{
{
start: threeHoursAgo,
end: twoHoursAgo,
},
},
},
{
name: "Start on offline period",
events: []*channelEvent{
{
timestamp: fourHoursAgo,
eventType: peerOfflineEvent,
},
},
},
{
name: "End on an online period, channel not closed",
events: []*channelEvent{
{
timestamp: fourHoursAgo,
eventType: peerOnlineEvent,
},
},
expectedOnline: []*onlinePeriod{
{
start: fourHoursAgo,
end: now,
},
},
},
{
name: "End on an online period, channel closed",
events: []*channelEvent{
{
timestamp: fourHoursAgo,
eventType: peerOnlineEvent,
},
},
expectedOnline: []*onlinePeriod{
{
start: fourHoursAgo,
end: oneHourAgo,
},
},
closedAt: oneHourAgo,
},
}
for _, test := range tests {
test := test
t.Run(test.name, func(t *testing.T) {
score := &chanEventLog{
events: test.events,
now: func() time.Time {
return now
},
openedAt: test.openedAt,
closedAt: test.closedAt,
}
online := score.getOnlinePeriods()
if len(online) != len(test.expectedOnline) {
t.Fatalf("Expectd: %v online periods, got: %v",
len(test.expectedOnline), len(online))
}
for i, o := range test.expectedOnline {
if online[i].start != o.start {
t.Errorf("Expected start: %v, got %v", o.start,
online[i].start)
}
if online[i].end != o.end {
t.Errorf("Expected end: %v, got %v", o.end,
online[i].end)
}
}
})
}
}
// TestUptime tests channel uptime calculation based on its event log.
func TestUptime(t *testing.T) {
// Set time for consistent testing.
now := time.Now()
fourHoursAgo := now.Add(time.Hour * -4)
threeHoursAgo := now.Add(time.Hour * -3)
twoHoursAgo := now.Add(time.Hour * -2)
oneHourAgo := now.Add(time.Hour * -1)
tests := []struct {
name string
// opened at is the time the channel was recorded as being open, and is
// never expected to be zero.
openedAt time.Time
// closed at is the tim the channel was recorded as being closed, and
// can have a zero value if the.
closedAt time.Time
// events is the set of event log that we are calculating uptime for.
events []*channelEvent
// startTime is the beginning of the period that we are calculating
// uptime for, it cannot have a zero value.
startTime time.Time
// endTime is the end of the period that we are calculating uptime for,
// it cannot have a zero value.
endTime time.Time
// expectedUptime is the amount of uptime we expect to be calculated
// over the period specified by startTime and endTime.
expectedUptime time.Duration
// expectErr is set to true if we expect an error to be returned when
// calling the uptime function
expectErr bool
}{
{
name: "End before start",
endTime: threeHoursAgo,
startTime: now,
expectErr: true,
},
{
name: "Zero end time",
expectErr: true,
},
{
name: "Online event and closed",
openedAt: fourHoursAgo,
closedAt: oneHourAgo,
events: []*channelEvent{
{
timestamp: fourHoursAgo,
eventType: peerOnlineEvent,
},
},
startTime: fourHoursAgo,
endTime: now,
expectedUptime: time.Hour * 3,
},
{
name: "Online event and not closed",
openedAt: fourHoursAgo,
events: []*channelEvent{
{
timestamp: fourHoursAgo,
eventType: peerOnlineEvent,
},
},
startTime: fourHoursAgo,
endTime: now,
expectedUptime: time.Hour * 4,
},
{
name: "Offline event and closed",
openedAt: fourHoursAgo,
closedAt: threeHoursAgo,
events: []*channelEvent{
{
timestamp: fourHoursAgo,
eventType: peerOfflineEvent,
},
},
startTime: fourHoursAgo,
endTime: now,
},
{
name: "Online event before close",
openedAt: fourHoursAgo,
closedAt: oneHourAgo,
events: []*channelEvent{
{
timestamp: twoHoursAgo,
eventType: peerOnlineEvent,
},
},
startTime: fourHoursAgo,
endTime: now,
expectedUptime: time.Hour,
},
{
name: "Online then offline event",
openedAt: fourHoursAgo,
closedAt: oneHourAgo,
events: []*channelEvent{
{
timestamp: threeHoursAgo,
eventType: peerOnlineEvent,
},
{
timestamp: twoHoursAgo,
eventType: peerOfflineEvent,
},
},
startTime: fourHoursAgo,
endTime: now,
expectedUptime: time.Hour,
},
{
name: "Online event before uptime period",
openedAt: fourHoursAgo,
closedAt: oneHourAgo,
events: []*channelEvent{
{
timestamp: threeHoursAgo,
eventType: peerOnlineEvent,
},
},
startTime: twoHoursAgo,
endTime: now,
expectedUptime: time.Hour,
},
{
name: "Offline event after uptime period",
openedAt: fourHoursAgo,
events: []*channelEvent{
{
timestamp: fourHoursAgo,
eventType: peerOnlineEvent,
},
{
timestamp: now.Add(time.Hour),
eventType: peerOfflineEvent,
},
},
startTime: twoHoursAgo,
endTime: now,
expectedUptime: time.Hour * 2,
},
{
name: "All events within period",
openedAt: fourHoursAgo,
events: []*channelEvent{
{
timestamp: twoHoursAgo,
eventType: peerOnlineEvent,
},
},
startTime: threeHoursAgo,
endTime: oneHourAgo,
expectedUptime: time.Hour,
},
{
name: "Multiple online and offline",
openedAt: now.Add(time.Hour * -8),
events: []*channelEvent{
{
timestamp: now.Add(time.Hour * -7),
eventType: peerOnlineEvent,
},
{
timestamp: now.Add(time.Hour * -6),
eventType: peerOfflineEvent,
},
{
timestamp: now.Add(time.Hour * -5),
eventType: peerOnlineEvent,
},
{
timestamp: now.Add(time.Hour * -4),
eventType: peerOfflineEvent,
},
{
timestamp: now.Add(time.Hour * -3),
eventType: peerOnlineEvent,
},
},
startTime: now.Add(time.Hour * -8),
endTime: oneHourAgo,
expectedUptime: time.Hour * 4,
},
}
for _, test := range tests {
test := test
t.Run(test.name, func(t *testing.T) {
score := &chanEventLog{
events: test.events,
now: func() time.Time {
return now
},
openedAt: test.openedAt,
closedAt: test.closedAt,
}
uptime, err := score.uptime(test.startTime, test.endTime)
if test.expectErr && err == nil {
t.Fatal("Expected an error, got nil")
}
if !test.expectErr && err != nil {
t.Fatalf("Expcted no error, got: %v", err)
}
if uptime != test.expectedUptime {
t.Errorf("Expected uptime: %v, got: %v",
test.expectedUptime, uptime)
}
})
}
}

32
chanfitness/log.go

@ -0,0 +1,32 @@
package chanfitness
import (
"github.com/btcsuite/btclog"
"github.com/lightningnetwork/lnd/build"
)
// Subsystem defines the logging code for this subsystem.
const Subsystem = "CHFT"
// log is a logger that is initialized with no output filters. This
// means the package will not perform any logging by default until the caller
// requests it.
var log btclog.Logger
// The default amount of logging is none.
func init() {
UseLogger(build.NewSubLogger(Subsystem, nil))
}
// DisableLog disables all library log output. Logging output is disabled
// by default until UseLogger is called.
func DisableLog() {
UseLogger(btclog.Disabled)
}
// UseLogger uses a specified Logger to output package logging info.
// This should be used in preference to SetLogWriter if the caller is also
// using btclog.
func UseLogger(logger btclog.Logger) {
log = logger
}

2
log.go

@ -11,6 +11,7 @@ import (
"github.com/lightningnetwork/lnd/build"
"github.com/lightningnetwork/lnd/chainntnfs"
"github.com/lightningnetwork/lnd/chanbackup"
"github.com/lightningnetwork/lnd/chanfitness"
"github.com/lightningnetwork/lnd/channeldb"
"github.com/lightningnetwork/lnd/channelnotifier"
"github.com/lightningnetwork/lnd/contractcourt"
@ -98,6 +99,7 @@ func init() {
addSubLogger(routerrpc.Subsystem, routerrpc.UseLogger)
addSubLogger(wtclientrpc.Subsystem, wtclientrpc.UseLogger)
addSubLogger(chanfitness.Subsystem, chanfitness.UseLogger)
}
// addSubLogger is a helper method to conveniently create and register the

Loading…
Cancel
Save