From 744876003dac8298790df9718ea5e337c2564685 Mon Sep 17 00:00:00 2001 From: carla Date: Thu, 8 Aug 2019 13:39:13 -0400 Subject: [PATCH] chanfitness: Add channel event log structure This commit adds a chanfitness package which will be used to track channel health and performance metrics. It adds a channel event structure which will be used to track channel opens/closes and peer uptime. The eventLog implements an uptime function which calcualtes uptime over a given period and a lifespan function which returns the time when the log began monitoring the channel and, if the channel is closed, the time when it stopped moitoring it. --- chanfitness/chanevent.go | 218 +++++++++++++++++++ chanfitness/chanevent_test.go | 395 ++++++++++++++++++++++++++++++++++ chanfitness/log.go | 32 +++ log.go | 2 + 4 files changed, 647 insertions(+) create mode 100644 chanfitness/chanevent.go create mode 100644 chanfitness/chanevent_test.go create mode 100644 chanfitness/log.go diff --git a/chanfitness/chanevent.go b/chanfitness/chanevent.go new file mode 100644 index 00000000..b91ada03 --- /dev/null +++ b/chanfitness/chanevent.go @@ -0,0 +1,218 @@ +package chanfitness + +import ( + "fmt" + "time" + + "github.com/lightningnetwork/lnd/routing/route" +) + +type eventType int + +const ( + peerOnlineEvent eventType = iota + peerOfflineEvent +) + +// String provides string representations of channel events. +func (e eventType) String() string { + switch e { + case peerOnlineEvent: + return "peer_online" + + case peerOfflineEvent: + return "peer_offline" + } + + return "unknown" +} + +// channelEvent is a a timestamped event which is observed on a per channel +// basis. +type channelEvent struct { + timestamp time.Time + eventType eventType +} + +// chanEventLog stores all events that have occurred over a channel's lifetime. +type chanEventLog struct { + // id is the uint64 of the short channel ID. + id uint64 + + // peer is the compressed public key of the peer being monitored. + peer route.Vertex + + // events is a log of timestamped events observed for the channel. + events []*channelEvent + + // now is expected to return the current time. It is supplied as an + // external function to enable deterministic unit tests. + now func() time.Time + + // openedAt tracks the first time this channel was seen. This is not + // necessarily the time that it confirmed on chain because channel events + // are not persisted at present. + openedAt time.Time + + // closedAt is the time that the channel was closed. If the channel has not + // been closed yet, it is zero. + closedAt time.Time +} + +func newEventLog(id uint64, peer route.Vertex, now func() time.Time) *chanEventLog { + return &chanEventLog{ + id: id, + peer: peer, + now: now, + } +} + +// close sets the closing time for an event log. +func (e *chanEventLog) close() { + e.closedAt = e.now() +} + +// add appends an event with the given type and current time to the event log. +// The open time for the eventLog will be set to the event's timestamp if it is +// not set yet. +func (e *chanEventLog) add(eventType eventType) { + // If the channel is already closed, return early without adding an event. + if !e.closedAt.IsZero() { + return + } + + // Add the event to the eventLog with the current timestamp. + event := &channelEvent{ + timestamp: e.now(), + eventType: eventType, + } + e.events = append(e.events, event) + + // If the eventLog does not have an opened time set, set it to the timestamp + // of the event. This has the effect of setting the eventLog's open time to + // the timestamp of the first event added. + if e.openedAt.IsZero() { + e.openedAt = event.timestamp + } + + log.Debugf("Channel %v recording event: %v", e.id, eventType) +} + +// onlinePeriod represents a period of time over which a peer was online. +type onlinePeriod struct { + start, end time.Time +} + +// getOnlinePeriods returns a list of all the periods that the event log has +// recorded the remote peer as being online. In the unexpected case where there +// are no events, the function returns early. Online periods are defined as a +// peer online event which is terminated by a peer offline event. This function +// expects the event log provided to be ordered by ascending timestamp. +func (e *chanEventLog) getOnlinePeriods() []*onlinePeriod { + // Return early if there are no events, there are no online periods. + if len(e.events) == 0 { + return nil + } + + var ( + lastOnline time.Time + offline bool + onlinePeriods []*onlinePeriod + ) + + // Loop through all events to build a list of periods that the peer was + // online. Online periods are added when they are terminated with a peer + // offline event. If the log ends on an online event, the period between + // the online event and the present is not tracked. The type of the most + // recent event is tracked using the offline bool so that we can add a + // final online period if necessary. + for _, event := range e.events { + + switch event.eventType { + case peerOnlineEvent: + lastOnline = event.timestamp + offline = false + + case peerOfflineEvent: + offline = true + + // Do not add to uptime if there is no previous online timestamp, + // the event log has started with an offline event + if lastOnline.IsZero() { + continue + } + + // The eventLog has recorded an offline event, having previously + // been online so we add an online period to to set of online periods. + onlinePeriods = append(onlinePeriods, &onlinePeriod{ + start: lastOnline, + end: event.timestamp, + }) + } + } + + // If the last event was an peer offline event, we do not need to calculate + // a final online period and can return online periods as is. + if offline { + return onlinePeriods + } + + // The log ended on an online event, so we need to add a final online event. + // If the channel is closed, this period is until channel closure. It it is + // still open, we calculate it until the present. + endTime := e.closedAt + if endTime.IsZero() { + endTime = e.now() + } + + // Add the final online period to the set and return. + return append(onlinePeriods, &onlinePeriod{ + start: lastOnline, + end: endTime, + }) +} + +// uptime calculates the total uptime we have recorded for a channel over the +// inclusive range specified. An error is returned if the end of the range is +// before the start or a zero end time is returned. +func (e *chanEventLog) uptime(start, end time.Time) (time.Duration, error) { + // Error if we are provided with an invalid range to calculate uptime for. + if end.Before(start) { + return 0, fmt.Errorf("end time: %v before start time: %v", + end, start) + } + if end.IsZero() { + return 0, fmt.Errorf("zero end time") + } + + var uptime time.Duration + + for _, p := range e.getOnlinePeriods() { + // The online period ends before the range we're looking at, so we can + // skip over it. + if p.end.Before(start) { + continue + } + // The online period starts after the range we're looking at, so can + // stop calculating uptime. + if p.start.After(end) { + break + } + + // If the online period starts before our range, shift the start time up + // so that we only calculate uptime from the start of our range. + if p.start.Before(start) { + p.start = start + } + + // If the online period ends before our range, shift the end time + // forward so that we only calculate uptime until the end of the range. + if p.end.After(end) { + p.end = end + } + + uptime += p.end.Sub(p.start) + } + + return uptime, nil +} diff --git a/chanfitness/chanevent_test.go b/chanfitness/chanevent_test.go new file mode 100644 index 00000000..72733dbe --- /dev/null +++ b/chanfitness/chanevent_test.go @@ -0,0 +1,395 @@ +package chanfitness + +import ( + "testing" + "time" +) + +// TestAdd tests adding events to an event log. It tests the case where the +// channel is open, and should have an event added, and the case where it is +// closed and the event should not be added. +func TestAdd(t *testing.T) { + tests := []struct { + name string + eventLog *chanEventLog + event eventType + expected []eventType + }{ + { + name: "Channel open", + eventLog: &chanEventLog{ + now: time.Now, + }, + event: peerOnlineEvent, + expected: []eventType{peerOnlineEvent}, + }, + { + name: "Channel closed, event not added", + eventLog: &chanEventLog{ + now: time.Now, + }, + event: peerOnlineEvent, + expected: []eventType{}, + }, + } + + for _, test := range tests { + test := test + + t.Run(test.name, func(t *testing.T) { + test.eventLog.add(test.event) + + for i, e := range test.expected { + if test.eventLog.events[i].eventType != e { + t.Fatalf("Expected event type: %v, got: %v", + e, test.eventLog.events[i].eventType) + } + } + }) + } +} + +// TestGetOnlinePeriod tests the getOnlinePeriod function. It tests the case +// where no events present, and the case where an additional online period +// must be added because the event log ends on an online event. +func TestGetOnlinePeriod(t *testing.T) { + // Set time for consistent testing. + now := time.Now() + + fourHoursAgo := now.Add(time.Hour * -4) + threeHoursAgo := now.Add(time.Hour * -3) + twoHoursAgo := now.Add(time.Hour * -2) + oneHourAgo := now.Add(time.Hour * -1) + + tests := []struct { + name string + events []*channelEvent + expectedOnline []*onlinePeriod + openedAt time.Time + closedAt time.Time + }{ + { + name: "No events", + }, + { + name: "Start on online period", + events: []*channelEvent{ + { + timestamp: threeHoursAgo, + eventType: peerOnlineEvent, + }, + { + timestamp: twoHoursAgo, + eventType: peerOfflineEvent, + }, + }, + expectedOnline: []*onlinePeriod{ + { + start: threeHoursAgo, + end: twoHoursAgo, + }, + }, + }, + { + name: "Start on offline period", + events: []*channelEvent{ + { + timestamp: fourHoursAgo, + eventType: peerOfflineEvent, + }, + }, + }, + { + name: "End on an online period, channel not closed", + events: []*channelEvent{ + { + timestamp: fourHoursAgo, + eventType: peerOnlineEvent, + }, + }, + expectedOnline: []*onlinePeriod{ + { + start: fourHoursAgo, + end: now, + }, + }, + }, + { + name: "End on an online period, channel closed", + events: []*channelEvent{ + { + timestamp: fourHoursAgo, + eventType: peerOnlineEvent, + }, + }, + expectedOnline: []*onlinePeriod{ + { + start: fourHoursAgo, + end: oneHourAgo, + }, + }, + closedAt: oneHourAgo, + }, + } + + for _, test := range tests { + test := test + + t.Run(test.name, func(t *testing.T) { + score := &chanEventLog{ + events: test.events, + now: func() time.Time { + return now + }, + openedAt: test.openedAt, + closedAt: test.closedAt, + } + + online := score.getOnlinePeriods() + + if len(online) != len(test.expectedOnline) { + t.Fatalf("Expectd: %v online periods, got: %v", + len(test.expectedOnline), len(online)) + } + + for i, o := range test.expectedOnline { + if online[i].start != o.start { + t.Errorf("Expected start: %v, got %v", o.start, + online[i].start) + } + + if online[i].end != o.end { + t.Errorf("Expected end: %v, got %v", o.end, + online[i].end) + } + } + }) + + } +} + +// TestUptime tests channel uptime calculation based on its event log. +func TestUptime(t *testing.T) { + // Set time for consistent testing. + now := time.Now() + + fourHoursAgo := now.Add(time.Hour * -4) + threeHoursAgo := now.Add(time.Hour * -3) + twoHoursAgo := now.Add(time.Hour * -2) + oneHourAgo := now.Add(time.Hour * -1) + + tests := []struct { + name string + + // opened at is the time the channel was recorded as being open, and is + // never expected to be zero. + openedAt time.Time + + // closed at is the tim the channel was recorded as being closed, and + // can have a zero value if the. + closedAt time.Time + + // events is the set of event log that we are calculating uptime for. + events []*channelEvent + + // startTime is the beginning of the period that we are calculating + // uptime for, it cannot have a zero value. + startTime time.Time + + // endTime is the end of the period that we are calculating uptime for, + // it cannot have a zero value. + endTime time.Time + + // expectedUptime is the amount of uptime we expect to be calculated + // over the period specified by startTime and endTime. + expectedUptime time.Duration + + // expectErr is set to true if we expect an error to be returned when + // calling the uptime function + expectErr bool + }{ + { + name: "End before start", + endTime: threeHoursAgo, + startTime: now, + expectErr: true, + }, + { + name: "Zero end time", + expectErr: true, + }, + { + name: "Online event and closed", + openedAt: fourHoursAgo, + closedAt: oneHourAgo, + events: []*channelEvent{ + { + timestamp: fourHoursAgo, + eventType: peerOnlineEvent, + }, + }, + startTime: fourHoursAgo, + endTime: now, + expectedUptime: time.Hour * 3, + }, + { + name: "Online event and not closed", + openedAt: fourHoursAgo, + events: []*channelEvent{ + { + timestamp: fourHoursAgo, + eventType: peerOnlineEvent, + }, + }, + startTime: fourHoursAgo, + endTime: now, + expectedUptime: time.Hour * 4, + }, + { + name: "Offline event and closed", + openedAt: fourHoursAgo, + closedAt: threeHoursAgo, + events: []*channelEvent{ + { + timestamp: fourHoursAgo, + eventType: peerOfflineEvent, + }, + }, + startTime: fourHoursAgo, + endTime: now, + }, + { + name: "Online event before close", + openedAt: fourHoursAgo, + closedAt: oneHourAgo, + events: []*channelEvent{ + { + timestamp: twoHoursAgo, + eventType: peerOnlineEvent, + }, + }, + startTime: fourHoursAgo, + endTime: now, + expectedUptime: time.Hour, + }, + { + name: "Online then offline event", + openedAt: fourHoursAgo, + closedAt: oneHourAgo, + events: []*channelEvent{ + { + timestamp: threeHoursAgo, + eventType: peerOnlineEvent, + }, + { + timestamp: twoHoursAgo, + eventType: peerOfflineEvent, + }, + }, + startTime: fourHoursAgo, + endTime: now, + expectedUptime: time.Hour, + }, + { + name: "Online event before uptime period", + openedAt: fourHoursAgo, + closedAt: oneHourAgo, + events: []*channelEvent{ + { + timestamp: threeHoursAgo, + eventType: peerOnlineEvent, + }, + }, + startTime: twoHoursAgo, + endTime: now, + expectedUptime: time.Hour, + }, + { + name: "Offline event after uptime period", + openedAt: fourHoursAgo, + events: []*channelEvent{ + { + timestamp: fourHoursAgo, + eventType: peerOnlineEvent, + }, + { + timestamp: now.Add(time.Hour), + eventType: peerOfflineEvent, + }, + }, + startTime: twoHoursAgo, + endTime: now, + expectedUptime: time.Hour * 2, + }, + { + name: "All events within period", + openedAt: fourHoursAgo, + events: []*channelEvent{ + { + timestamp: twoHoursAgo, + eventType: peerOnlineEvent, + }, + }, + startTime: threeHoursAgo, + endTime: oneHourAgo, + expectedUptime: time.Hour, + }, + { + name: "Multiple online and offline", + openedAt: now.Add(time.Hour * -8), + events: []*channelEvent{ + { + timestamp: now.Add(time.Hour * -7), + eventType: peerOnlineEvent, + }, + { + timestamp: now.Add(time.Hour * -6), + eventType: peerOfflineEvent, + }, + { + timestamp: now.Add(time.Hour * -5), + eventType: peerOnlineEvent, + }, + { + timestamp: now.Add(time.Hour * -4), + eventType: peerOfflineEvent, + }, + { + timestamp: now.Add(time.Hour * -3), + eventType: peerOnlineEvent, + }, + }, + startTime: now.Add(time.Hour * -8), + endTime: oneHourAgo, + expectedUptime: time.Hour * 4, + }, + } + + for _, test := range tests { + test := test + + t.Run(test.name, func(t *testing.T) { + score := &chanEventLog{ + events: test.events, + now: func() time.Time { + return now + }, + openedAt: test.openedAt, + closedAt: test.closedAt, + } + + uptime, err := score.uptime(test.startTime, test.endTime) + if test.expectErr && err == nil { + t.Fatal("Expected an error, got nil") + } + if !test.expectErr && err != nil { + t.Fatalf("Expcted no error, got: %v", err) + } + + if uptime != test.expectedUptime { + t.Errorf("Expected uptime: %v, got: %v", + test.expectedUptime, uptime) + } + }) + } +} diff --git a/chanfitness/log.go b/chanfitness/log.go new file mode 100644 index 00000000..626f7538 --- /dev/null +++ b/chanfitness/log.go @@ -0,0 +1,32 @@ +package chanfitness + +import ( + "github.com/btcsuite/btclog" + "github.com/lightningnetwork/lnd/build" +) + +// Subsystem defines the logging code for this subsystem. +const Subsystem = "CHFT" + +// log is a logger that is initialized with no output filters. This +// means the package will not perform any logging by default until the caller +// requests it. +var log btclog.Logger + +// The default amount of logging is none. +func init() { + UseLogger(build.NewSubLogger(Subsystem, nil)) +} + +// DisableLog disables all library log output. Logging output is disabled +// by default until UseLogger is called. +func DisableLog() { + UseLogger(btclog.Disabled) +} + +// UseLogger uses a specified Logger to output package logging info. +// This should be used in preference to SetLogWriter if the caller is also +// using btclog. +func UseLogger(logger btclog.Logger) { + log = logger +} diff --git a/log.go b/log.go index dc4d0247..a6d81f26 100644 --- a/log.go +++ b/log.go @@ -11,6 +11,7 @@ import ( "github.com/lightningnetwork/lnd/build" "github.com/lightningnetwork/lnd/chainntnfs" "github.com/lightningnetwork/lnd/chanbackup" + "github.com/lightningnetwork/lnd/chanfitness" "github.com/lightningnetwork/lnd/channeldb" "github.com/lightningnetwork/lnd/channelnotifier" "github.com/lightningnetwork/lnd/contractcourt" @@ -98,6 +99,7 @@ func init() { addSubLogger(routerrpc.Subsystem, routerrpc.UseLogger) addSubLogger(wtclientrpc.Subsystem, wtclientrpc.UseLogger) + addSubLogger(chanfitness.Subsystem, chanfitness.UseLogger) } // addSubLogger is a helper method to conveniently create and register the