Browse Source
This commit adds a chanfitness package which will be used to track channel health and performance metrics. It adds a channel event structure which will be used to track channel opens/closes and peer uptime. The eventLog implements an uptime function which calcualtes uptime over a given period and a lifespan function which returns the time when the log began monitoring the channel and, if the channel is closed, the time when it stopped moitoring it.master
carla
5 years ago
4 changed files with 647 additions and 0 deletions
@ -0,0 +1,218 @@
|
||||
package chanfitness |
||||
|
||||
import ( |
||||
"fmt" |
||||
"time" |
||||
|
||||
"github.com/lightningnetwork/lnd/routing/route" |
||||
) |
||||
|
||||
type eventType int |
||||
|
||||
const ( |
||||
peerOnlineEvent eventType = iota |
||||
peerOfflineEvent |
||||
) |
||||
|
||||
// String provides string representations of channel events.
|
||||
func (e eventType) String() string { |
||||
switch e { |
||||
case peerOnlineEvent: |
||||
return "peer_online" |
||||
|
||||
case peerOfflineEvent: |
||||
return "peer_offline" |
||||
} |
||||
|
||||
return "unknown" |
||||
} |
||||
|
||||
// channelEvent is a a timestamped event which is observed on a per channel
|
||||
// basis.
|
||||
type channelEvent struct { |
||||
timestamp time.Time |
||||
eventType eventType |
||||
} |
||||
|
||||
// chanEventLog stores all events that have occurred over a channel's lifetime.
|
||||
type chanEventLog struct { |
||||
// id is the uint64 of the short channel ID.
|
||||
id uint64 |
||||
|
||||
// peer is the compressed public key of the peer being monitored.
|
||||
peer route.Vertex |
||||
|
||||
// events is a log of timestamped events observed for the channel.
|
||||
events []*channelEvent |
||||
|
||||
// now is expected to return the current time. It is supplied as an
|
||||
// external function to enable deterministic unit tests.
|
||||
now func() time.Time |
||||
|
||||
// openedAt tracks the first time this channel was seen. This is not
|
||||
// necessarily the time that it confirmed on chain because channel events
|
||||
// are not persisted at present.
|
||||
openedAt time.Time |
||||
|
||||
// closedAt is the time that the channel was closed. If the channel has not
|
||||
// been closed yet, it is zero.
|
||||
closedAt time.Time |
||||
} |
||||
|
||||
func newEventLog(id uint64, peer route.Vertex, now func() time.Time) *chanEventLog { |
||||
return &chanEventLog{ |
||||
id: id, |
||||
peer: peer, |
||||
now: now, |
||||
} |
||||
} |
||||
|
||||
// close sets the closing time for an event log.
|
||||
func (e *chanEventLog) close() { |
||||
e.closedAt = e.now() |
||||
} |
||||
|
||||
// add appends an event with the given type and current time to the event log.
|
||||
// The open time for the eventLog will be set to the event's timestamp if it is
|
||||
// not set yet.
|
||||
func (e *chanEventLog) add(eventType eventType) { |
||||
// If the channel is already closed, return early without adding an event.
|
||||
if !e.closedAt.IsZero() { |
||||
return |
||||
} |
||||
|
||||
// Add the event to the eventLog with the current timestamp.
|
||||
event := &channelEvent{ |
||||
timestamp: e.now(), |
||||
eventType: eventType, |
||||
} |
||||
e.events = append(e.events, event) |
||||
|
||||
// If the eventLog does not have an opened time set, set it to the timestamp
|
||||
// of the event. This has the effect of setting the eventLog's open time to
|
||||
// the timestamp of the first event added.
|
||||
if e.openedAt.IsZero() { |
||||
e.openedAt = event.timestamp |
||||
} |
||||
|
||||
log.Debugf("Channel %v recording event: %v", e.id, eventType) |
||||
} |
||||
|
||||
// onlinePeriod represents a period of time over which a peer was online.
|
||||
type onlinePeriod struct { |
||||
start, end time.Time |
||||
} |
||||
|
||||
// getOnlinePeriods returns a list of all the periods that the event log has
|
||||
// recorded the remote peer as being online. In the unexpected case where there
|
||||
// are no events, the function returns early. Online periods are defined as a
|
||||
// peer online event which is terminated by a peer offline event. This function
|
||||
// expects the event log provided to be ordered by ascending timestamp.
|
||||
func (e *chanEventLog) getOnlinePeriods() []*onlinePeriod { |
||||
// Return early if there are no events, there are no online periods.
|
||||
if len(e.events) == 0 { |
||||
return nil |
||||
} |
||||
|
||||
var ( |
||||
lastOnline time.Time |
||||
offline bool |
||||
onlinePeriods []*onlinePeriod |
||||
) |
||||
|
||||
// Loop through all events to build a list of periods that the peer was
|
||||
// online. Online periods are added when they are terminated with a peer
|
||||
// offline event. If the log ends on an online event, the period between
|
||||
// the online event and the present is not tracked. The type of the most
|
||||
// recent event is tracked using the offline bool so that we can add a
|
||||
// final online period if necessary.
|
||||
for _, event := range e.events { |
||||
|
||||
switch event.eventType { |
||||
case peerOnlineEvent: |
||||
lastOnline = event.timestamp |
||||
offline = false |
||||
|
||||
case peerOfflineEvent: |
||||
offline = true |
||||
|
||||
// Do not add to uptime if there is no previous online timestamp,
|
||||
// the event log has started with an offline event
|
||||
if lastOnline.IsZero() { |
||||
continue |
||||
} |
||||
|
||||
// The eventLog has recorded an offline event, having previously
|
||||
// been online so we add an online period to to set of online periods.
|
||||
onlinePeriods = append(onlinePeriods, &onlinePeriod{ |
||||
start: lastOnline, |
||||
end: event.timestamp, |
||||
}) |
||||
} |
||||
} |
||||
|
||||
// If the last event was an peer offline event, we do not need to calculate
|
||||
// a final online period and can return online periods as is.
|
||||
if offline { |
||||
return onlinePeriods |
||||
} |
||||
|
||||
// The log ended on an online event, so we need to add a final online event.
|
||||
// If the channel is closed, this period is until channel closure. It it is
|
||||
// still open, we calculate it until the present.
|
||||
endTime := e.closedAt |
||||
if endTime.IsZero() { |
||||
endTime = e.now() |
||||
} |
||||
|
||||
// Add the final online period to the set and return.
|
||||
return append(onlinePeriods, &onlinePeriod{ |
||||
start: lastOnline, |
||||
end: endTime, |
||||
}) |
||||
} |
||||
|
||||
// uptime calculates the total uptime we have recorded for a channel over the
|
||||
// inclusive range specified. An error is returned if the end of the range is
|
||||
// before the start or a zero end time is returned.
|
||||
func (e *chanEventLog) uptime(start, end time.Time) (time.Duration, error) { |
||||
// Error if we are provided with an invalid range to calculate uptime for.
|
||||
if end.Before(start) { |
||||
return 0, fmt.Errorf("end time: %v before start time: %v", |
||||
end, start) |
||||
} |
||||
if end.IsZero() { |
||||
return 0, fmt.Errorf("zero end time") |
||||
} |
||||
|
||||
var uptime time.Duration |
||||
|
||||
for _, p := range e.getOnlinePeriods() { |
||||
// The online period ends before the range we're looking at, so we can
|
||||
// skip over it.
|
||||
if p.end.Before(start) { |
||||
continue |
||||
} |
||||
// The online period starts after the range we're looking at, so can
|
||||
// stop calculating uptime.
|
||||
if p.start.After(end) { |
||||
break |
||||
} |
||||
|
||||
// If the online period starts before our range, shift the start time up
|
||||
// so that we only calculate uptime from the start of our range.
|
||||
if p.start.Before(start) { |
||||
p.start = start |
||||
} |
||||
|
||||
// If the online period ends before our range, shift the end time
|
||||
// forward so that we only calculate uptime until the end of the range.
|
||||
if p.end.After(end) { |
||||
p.end = end |
||||
} |
||||
|
||||
uptime += p.end.Sub(p.start) |
||||
} |
||||
|
||||
return uptime, nil |
||||
} |
@ -0,0 +1,395 @@
|
||||
package chanfitness |
||||
|
||||
import ( |
||||
"testing" |
||||
"time" |
||||
) |
||||
|
||||
// TestAdd tests adding events to an event log. It tests the case where the
|
||||
// channel is open, and should have an event added, and the case where it is
|
||||
// closed and the event should not be added.
|
||||
func TestAdd(t *testing.T) { |
||||
tests := []struct { |
||||
name string |
||||
eventLog *chanEventLog |
||||
event eventType |
||||
expected []eventType |
||||
}{ |
||||
{ |
||||
name: "Channel open", |
||||
eventLog: &chanEventLog{ |
||||
now: time.Now, |
||||
}, |
||||
event: peerOnlineEvent, |
||||
expected: []eventType{peerOnlineEvent}, |
||||
}, |
||||
{ |
||||
name: "Channel closed, event not added", |
||||
eventLog: &chanEventLog{ |
||||
now: time.Now, |
||||
}, |
||||
event: peerOnlineEvent, |
||||
expected: []eventType{}, |
||||
}, |
||||
} |
||||
|
||||
for _, test := range tests { |
||||
test := test |
||||
|
||||
t.Run(test.name, func(t *testing.T) { |
||||
test.eventLog.add(test.event) |
||||
|
||||
for i, e := range test.expected { |
||||
if test.eventLog.events[i].eventType != e { |
||||
t.Fatalf("Expected event type: %v, got: %v", |
||||
e, test.eventLog.events[i].eventType) |
||||
} |
||||
} |
||||
}) |
||||
} |
||||
} |
||||
|
||||
// TestGetOnlinePeriod tests the getOnlinePeriod function. It tests the case
|
||||
// where no events present, and the case where an additional online period
|
||||
// must be added because the event log ends on an online event.
|
||||
func TestGetOnlinePeriod(t *testing.T) { |
||||
// Set time for consistent testing.
|
||||
now := time.Now() |
||||
|
||||
fourHoursAgo := now.Add(time.Hour * -4) |
||||
threeHoursAgo := now.Add(time.Hour * -3) |
||||
twoHoursAgo := now.Add(time.Hour * -2) |
||||
oneHourAgo := now.Add(time.Hour * -1) |
||||
|
||||
tests := []struct { |
||||
name string |
||||
events []*channelEvent |
||||
expectedOnline []*onlinePeriod |
||||
openedAt time.Time |
||||
closedAt time.Time |
||||
}{ |
||||
{ |
||||
name: "No events", |
||||
}, |
||||
{ |
||||
name: "Start on online period", |
||||
events: []*channelEvent{ |
||||
{ |
||||
timestamp: threeHoursAgo, |
||||
eventType: peerOnlineEvent, |
||||
}, |
||||
{ |
||||
timestamp: twoHoursAgo, |
||||
eventType: peerOfflineEvent, |
||||
}, |
||||
}, |
||||
expectedOnline: []*onlinePeriod{ |
||||
{ |
||||
start: threeHoursAgo, |
||||
end: twoHoursAgo, |
||||
}, |
||||
}, |
||||
}, |
||||
{ |
||||
name: "Start on offline period", |
||||
events: []*channelEvent{ |
||||
{ |
||||
timestamp: fourHoursAgo, |
||||
eventType: peerOfflineEvent, |
||||
}, |
||||
}, |
||||
}, |
||||
{ |
||||
name: "End on an online period, channel not closed", |
||||
events: []*channelEvent{ |
||||
{ |
||||
timestamp: fourHoursAgo, |
||||
eventType: peerOnlineEvent, |
||||
}, |
||||
}, |
||||
expectedOnline: []*onlinePeriod{ |
||||
{ |
||||
start: fourHoursAgo, |
||||
end: now, |
||||
}, |
||||
}, |
||||
}, |
||||
{ |
||||
name: "End on an online period, channel closed", |
||||
events: []*channelEvent{ |
||||
{ |
||||
timestamp: fourHoursAgo, |
||||
eventType: peerOnlineEvent, |
||||
}, |
||||
}, |
||||
expectedOnline: []*onlinePeriod{ |
||||
{ |
||||
start: fourHoursAgo, |
||||
end: oneHourAgo, |
||||
}, |
||||
}, |
||||
closedAt: oneHourAgo, |
||||
}, |
||||
} |
||||
|
||||
for _, test := range tests { |
||||
test := test |
||||
|
||||
t.Run(test.name, func(t *testing.T) { |
||||
score := &chanEventLog{ |
||||
events: test.events, |
||||
now: func() time.Time { |
||||
return now |
||||
}, |
||||
openedAt: test.openedAt, |
||||
closedAt: test.closedAt, |
||||
} |
||||
|
||||
online := score.getOnlinePeriods() |
||||
|
||||
if len(online) != len(test.expectedOnline) { |
||||
t.Fatalf("Expectd: %v online periods, got: %v", |
||||
len(test.expectedOnline), len(online)) |
||||
} |
||||
|
||||
for i, o := range test.expectedOnline { |
||||
if online[i].start != o.start { |
||||
t.Errorf("Expected start: %v, got %v", o.start, |
||||
online[i].start) |
||||
} |
||||
|
||||
if online[i].end != o.end { |
||||
t.Errorf("Expected end: %v, got %v", o.end, |
||||
online[i].end) |
||||
} |
||||
} |
||||
}) |
||||
|
||||
} |
||||
} |
||||
|
||||
// TestUptime tests channel uptime calculation based on its event log.
|
||||
func TestUptime(t *testing.T) { |
||||
// Set time for consistent testing.
|
||||
now := time.Now() |
||||
|
||||
fourHoursAgo := now.Add(time.Hour * -4) |
||||
threeHoursAgo := now.Add(time.Hour * -3) |
||||
twoHoursAgo := now.Add(time.Hour * -2) |
||||
oneHourAgo := now.Add(time.Hour * -1) |
||||
|
||||
tests := []struct { |
||||
name string |
||||
|
||||
// opened at is the time the channel was recorded as being open, and is
|
||||
// never expected to be zero.
|
||||
openedAt time.Time |
||||
|
||||
// closed at is the tim the channel was recorded as being closed, and
|
||||
// can have a zero value if the.
|
||||
closedAt time.Time |
||||
|
||||
// events is the set of event log that we are calculating uptime for.
|
||||
events []*channelEvent |
||||
|
||||
// startTime is the beginning of the period that we are calculating
|
||||
// uptime for, it cannot have a zero value.
|
||||
startTime time.Time |
||||
|
||||
// endTime is the end of the period that we are calculating uptime for,
|
||||
// it cannot have a zero value.
|
||||
endTime time.Time |
||||
|
||||
// expectedUptime is the amount of uptime we expect to be calculated
|
||||
// over the period specified by startTime and endTime.
|
||||
expectedUptime time.Duration |
||||
|
||||
// expectErr is set to true if we expect an error to be returned when
|
||||
// calling the uptime function
|
||||
expectErr bool |
||||
}{ |
||||
{ |
||||
name: "End before start", |
||||
endTime: threeHoursAgo, |
||||
startTime: now, |
||||
expectErr: true, |
||||
}, |
||||
{ |
||||
name: "Zero end time", |
||||
expectErr: true, |
||||
}, |
||||
{ |
||||
name: "Online event and closed", |
||||
openedAt: fourHoursAgo, |
||||
closedAt: oneHourAgo, |
||||
events: []*channelEvent{ |
||||
{ |
||||
timestamp: fourHoursAgo, |
||||
eventType: peerOnlineEvent, |
||||
}, |
||||
}, |
||||
startTime: fourHoursAgo, |
||||
endTime: now, |
||||
expectedUptime: time.Hour * 3, |
||||
}, |
||||
{ |
||||
name: "Online event and not closed", |
||||
openedAt: fourHoursAgo, |
||||
events: []*channelEvent{ |
||||
{ |
||||
timestamp: fourHoursAgo, |
||||
eventType: peerOnlineEvent, |
||||
}, |
||||
}, |
||||
startTime: fourHoursAgo, |
||||
endTime: now, |
||||
expectedUptime: time.Hour * 4, |
||||
}, |
||||
{ |
||||
name: "Offline event and closed", |
||||
openedAt: fourHoursAgo, |
||||
closedAt: threeHoursAgo, |
||||
events: []*channelEvent{ |
||||
{ |
||||
timestamp: fourHoursAgo, |
||||
eventType: peerOfflineEvent, |
||||
}, |
||||
}, |
||||
startTime: fourHoursAgo, |
||||
endTime: now, |
||||
}, |
||||
{ |
||||
name: "Online event before close", |
||||
openedAt: fourHoursAgo, |
||||
closedAt: oneHourAgo, |
||||
events: []*channelEvent{ |
||||
{ |
||||
timestamp: twoHoursAgo, |
||||
eventType: peerOnlineEvent, |
||||
}, |
||||
}, |
||||
startTime: fourHoursAgo, |
||||
endTime: now, |
||||
expectedUptime: time.Hour, |
||||
}, |
||||
{ |
||||
name: "Online then offline event", |
||||
openedAt: fourHoursAgo, |
||||
closedAt: oneHourAgo, |
||||
events: []*channelEvent{ |
||||
{ |
||||
timestamp: threeHoursAgo, |
||||
eventType: peerOnlineEvent, |
||||
}, |
||||
{ |
||||
timestamp: twoHoursAgo, |
||||
eventType: peerOfflineEvent, |
||||
}, |
||||
}, |
||||
startTime: fourHoursAgo, |
||||
endTime: now, |
||||
expectedUptime: time.Hour, |
||||
}, |
||||
{ |
||||
name: "Online event before uptime period", |
||||
openedAt: fourHoursAgo, |
||||
closedAt: oneHourAgo, |
||||
events: []*channelEvent{ |
||||
{ |
||||
timestamp: threeHoursAgo, |
||||
eventType: peerOnlineEvent, |
||||
}, |
||||
}, |
||||
startTime: twoHoursAgo, |
||||
endTime: now, |
||||
expectedUptime: time.Hour, |
||||
}, |
||||
{ |
||||
name: "Offline event after uptime period", |
||||
openedAt: fourHoursAgo, |
||||
events: []*channelEvent{ |
||||
{ |
||||
timestamp: fourHoursAgo, |
||||
eventType: peerOnlineEvent, |
||||
}, |
||||
{ |
||||
timestamp: now.Add(time.Hour), |
||||
eventType: peerOfflineEvent, |
||||
}, |
||||
}, |
||||
startTime: twoHoursAgo, |
||||
endTime: now, |
||||
expectedUptime: time.Hour * 2, |
||||
}, |
||||
{ |
||||
name: "All events within period", |
||||
openedAt: fourHoursAgo, |
||||
events: []*channelEvent{ |
||||
{ |
||||
timestamp: twoHoursAgo, |
||||
eventType: peerOnlineEvent, |
||||
}, |
||||
}, |
||||
startTime: threeHoursAgo, |
||||
endTime: oneHourAgo, |
||||
expectedUptime: time.Hour, |
||||
}, |
||||
{ |
||||
name: "Multiple online and offline", |
||||
openedAt: now.Add(time.Hour * -8), |
||||
events: []*channelEvent{ |
||||
{ |
||||
timestamp: now.Add(time.Hour * -7), |
||||
eventType: peerOnlineEvent, |
||||
}, |
||||
{ |
||||
timestamp: now.Add(time.Hour * -6), |
||||
eventType: peerOfflineEvent, |
||||
}, |
||||
{ |
||||
timestamp: now.Add(time.Hour * -5), |
||||
eventType: peerOnlineEvent, |
||||
}, |
||||
{ |
||||
timestamp: now.Add(time.Hour * -4), |
||||
eventType: peerOfflineEvent, |
||||
}, |
||||
{ |
||||
timestamp: now.Add(time.Hour * -3), |
||||
eventType: peerOnlineEvent, |
||||
}, |
||||
}, |
||||
startTime: now.Add(time.Hour * -8), |
||||
endTime: oneHourAgo, |
||||
expectedUptime: time.Hour * 4, |
||||
}, |
||||
} |
||||
|
||||
for _, test := range tests { |
||||
test := test |
||||
|
||||
t.Run(test.name, func(t *testing.T) { |
||||
score := &chanEventLog{ |
||||
events: test.events, |
||||
now: func() time.Time { |
||||
return now |
||||
}, |
||||
openedAt: test.openedAt, |
||||
closedAt: test.closedAt, |
||||
} |
||||
|
||||
uptime, err := score.uptime(test.startTime, test.endTime) |
||||
if test.expectErr && err == nil { |
||||
t.Fatal("Expected an error, got nil") |
||||
} |
||||
if !test.expectErr && err != nil { |
||||
t.Fatalf("Expcted no error, got: %v", err) |
||||
} |
||||
|
||||
if uptime != test.expectedUptime { |
||||
t.Errorf("Expected uptime: %v, got: %v", |
||||
test.expectedUptime, uptime) |
||||
} |
||||
}) |
||||
} |
||||
} |
@ -0,0 +1,32 @@
|
||||
package chanfitness |
||||
|
||||
import ( |
||||
"github.com/btcsuite/btclog" |
||||
"github.com/lightningnetwork/lnd/build" |
||||
) |
||||
|
||||
// Subsystem defines the logging code for this subsystem.
|
||||
const Subsystem = "CHFT" |
||||
|
||||
// log is a logger that is initialized with no output filters. This
|
||||
// means the package will not perform any logging by default until the caller
|
||||
// requests it.
|
||||
var log btclog.Logger |
||||
|
||||
// The default amount of logging is none.
|
||||
func init() { |
||||
UseLogger(build.NewSubLogger(Subsystem, nil)) |
||||
} |
||||
|
||||
// DisableLog disables all library log output. Logging output is disabled
|
||||
// by default until UseLogger is called.
|
||||
func DisableLog() { |
||||
UseLogger(btclog.Disabled) |
||||
} |
||||
|
||||
// UseLogger uses a specified Logger to output package logging info.
|
||||
// This should be used in preference to SetLogWriter if the caller is also
|
||||
// using btclog.
|
||||
func UseLogger(logger btclog.Logger) { |
||||
log = logger |
||||
} |
Loading…
Reference in new issue