You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
418 lines
13 KiB
418 lines
13 KiB
package chanfitness |
|
|
|
import ( |
|
"fmt" |
|
"time" |
|
|
|
"github.com/btcsuite/btcd/wire" |
|
"github.com/lightningnetwork/lnd/clock" |
|
) |
|
|
|
type eventType int |
|
|
|
const ( |
|
peerOnlineEvent eventType = iota |
|
peerOfflineEvent |
|
) |
|
|
|
// String provides string representations of channel events. |
|
func (e eventType) String() string { |
|
switch e { |
|
case peerOnlineEvent: |
|
return "peer_online" |
|
|
|
case peerOfflineEvent: |
|
return "peer_offline" |
|
} |
|
|
|
return "unknown" |
|
} |
|
|
|
type event struct { |
|
timestamp time.Time |
|
eventType eventType |
|
} |
|
|
|
// peerLog tracks events for a peer and its channels. If we currently have no |
|
// channels with the peer, it will simply track its current online state. If we |
|
// do have channels open with the peer, it will track the peer's online and |
|
// offline events so that we can calculate uptime for our channels. A single |
|
// event log is used for these online and offline events, and uptime for a |
|
// channel is calculated by examining a subsection of this log. |
|
type peerLog struct { |
|
// online stores whether the peer is currently online. |
|
online bool |
|
|
|
// onlineEvents is a log of timestamped events observed for the peer |
|
// that we have committed to allocating memory to. |
|
onlineEvents []*event |
|
|
|
// stagedEvent represents an event that is pending addition to the |
|
// events list. It has not yet been added because we rate limit the |
|
// frequency that we store events at. We need to store this value |
|
// in the log (rather than just ignore events) so that we can flush the |
|
// aggregate outcome to our event log once the rate limiting period has |
|
// ended. |
|
// |
|
// Take the following example: |
|
// - Peer online event recorded |
|
// - Peer offline event, not recorded due to rate limit |
|
// - No more events, we incorrectly believe our peer to be online |
|
// Instead of skipping events, we stage the most recent event during the |
|
// rate limited period so that we know what happened (on aggregate) |
|
// while we were rate limiting events. |
|
// |
|
// Note that we currently only store offline/online events so we can |
|
// use this field to track our online state. With the addition of other |
|
// event types, we need to only stage online/offline events, or split |
|
// them out. |
|
stagedEvent *event |
|
|
|
// flapCount is the number of times this peer has been observed as |
|
// going offline. |
|
flapCount int |
|
|
|
// lastFlap is the timestamp of the last flap we recorded for the peer. |
|
// This value will be nil if we have never recorded a flap for the peer. |
|
lastFlap *time.Time |
|
|
|
// clock allows creation of deterministic unit tests. |
|
clock clock.Clock |
|
|
|
// channels contains a set of currently open channels. Channels will be |
|
// added and removed from this map as they are opened and closed. |
|
channels map[wire.OutPoint]*channelInfo |
|
} |
|
|
|
// newPeerLog creates a log for a peer, taking its historical flap count and |
|
// last flap time as parameters. These values may be zero/nil if we have no |
|
// record of historical flap count for the peer. |
|
func newPeerLog(clock clock.Clock, flapCount int, |
|
lastFlap *time.Time) *peerLog { |
|
|
|
return &peerLog{ |
|
clock: clock, |
|
flapCount: flapCount, |
|
lastFlap: lastFlap, |
|
channels: make(map[wire.OutPoint]*channelInfo), |
|
} |
|
} |
|
|
|
// channelInfo contains information about a channel. |
|
type channelInfo struct { |
|
// openedAt tracks the first time this channel was seen. This is not |
|
// necessarily the time that it confirmed on chain because channel |
|
// events are not persisted at present. |
|
openedAt time.Time |
|
} |
|
|
|
func newChannelInfo(openedAt time.Time) *channelInfo { |
|
return &channelInfo{ |
|
openedAt: openedAt, |
|
} |
|
} |
|
|
|
// onlineEvent records a peer online or offline event in the log and increments |
|
// the peer's flap count. |
|
func (p *peerLog) onlineEvent(online bool) { |
|
eventTime := p.clock.Now() |
|
|
|
// If we have a non-nil last flap time, potentially apply a cooldown |
|
// factor to the peer's flap count before we rate limit it. This allows |
|
// us to decrease the penalty for historical flaps over time, provided |
|
// the peer has not flapped for a while. |
|
if p.lastFlap != nil { |
|
p.flapCount = cooldownFlapCount( |
|
p.clock.Now(), p.flapCount, *p.lastFlap, |
|
) |
|
} |
|
|
|
// Record flap count information and online state regardless of whether |
|
// we have any channels open with this peer. |
|
p.flapCount++ |
|
p.lastFlap = &eventTime |
|
p.online = online |
|
|
|
// If we have no channels currently open with the peer, we do not want |
|
// to commit resources to tracking their online state beyond a simple |
|
// online boolean, so we exit early. |
|
if p.channelCount() == 0 { |
|
return |
|
} |
|
|
|
p.addEvent(online, eventTime) |
|
} |
|
|
|
// addEvent records an online or offline event in our event log. and increments |
|
// the peer's flap count. |
|
func (p *peerLog) addEvent(online bool, time time.Time) { |
|
eventType := peerOnlineEvent |
|
if !online { |
|
eventType = peerOfflineEvent |
|
} |
|
|
|
event := &event{ |
|
timestamp: time, |
|
eventType: eventType, |
|
} |
|
|
|
// If we have no staged events, we can just stage this event and return. |
|
if p.stagedEvent == nil { |
|
p.stagedEvent = event |
|
return |
|
} |
|
|
|
// We get the amount of time we require between events according to |
|
// peer flap count. |
|
aggregation := getRateLimit(p.flapCount) |
|
nextRecordTime := p.stagedEvent.timestamp.Add(aggregation) |
|
flushEvent := nextRecordTime.Before(event.timestamp) |
|
|
|
// If enough time has passed since our last staged event, we add our |
|
// event to our in-memory list. |
|
if flushEvent { |
|
p.onlineEvents = append(p.onlineEvents, p.stagedEvent) |
|
} |
|
|
|
// Finally, we replace our staged event with the new event we received. |
|
p.stagedEvent = event |
|
} |
|
|
|
// addChannel adds a channel to our log. If we have not tracked any online |
|
// events for our peer yet, we create one with our peer's current online state |
|
// so that we know the state that the peer had at channel start, which is |
|
// required to calculate uptime over the channel's lifetime. |
|
func (p *peerLog) addChannel(channelPoint wire.OutPoint) error { |
|
_, ok := p.channels[channelPoint] |
|
if ok { |
|
return fmt.Errorf("channel: %v already present", channelPoint) |
|
} |
|
|
|
openTime := p.clock.Now() |
|
p.channels[channelPoint] = newChannelInfo(openTime) |
|
|
|
// If we do not have any online events tracked for our peer (which is |
|
// the case when we have no other channels open with the peer), we add |
|
// an event with the peer's current online state so that we know that |
|
// starting state for this peer when a channel was connected (which |
|
// allows us to calculate uptime over the lifetime of the channel). |
|
if len(p.onlineEvents) == 0 { |
|
p.addEvent(p.online, openTime) |
|
} |
|
|
|
return nil |
|
} |
|
|
|
// removeChannel removes a channel from our log. If we have no more channels |
|
// with the peer after removing this one, we clear our list of events. |
|
func (p *peerLog) removeChannel(channelPoint wire.OutPoint) error { |
|
_, ok := p.channels[channelPoint] |
|
if !ok { |
|
return fmt.Errorf("channel: %v not present", channelPoint) |
|
} |
|
|
|
delete(p.channels, channelPoint) |
|
|
|
// If we have no more channels in our event log, we can discard all of |
|
// our online events in memory, since we don't need them anymore. |
|
// TODO(carla): this could be done on a per channel basis. |
|
if p.channelCount() == 0 { |
|
p.onlineEvents = nil |
|
p.stagedEvent = nil |
|
} |
|
|
|
return nil |
|
} |
|
|
|
// channelCount returns the number of channels that we currently have |
|
// with the peer. |
|
func (p *peerLog) channelCount() int { |
|
return len(p.channels) |
|
} |
|
|
|
// channelUptime looks up a channel and returns the amount of time that the |
|
// channel has been monitored for and its uptime over this period. |
|
func (p *peerLog) channelUptime(channelPoint wire.OutPoint) (time.Duration, |
|
time.Duration, error) { |
|
|
|
channel, ok := p.channels[channelPoint] |
|
if !ok { |
|
return 0, 0, ErrChannelNotFound |
|
} |
|
|
|
now := p.clock.Now() |
|
|
|
uptime, err := p.uptime(channel.openedAt, now) |
|
if err != nil { |
|
return 0, 0, err |
|
} |
|
|
|
return now.Sub(channel.openedAt), uptime, nil |
|
} |
|
|
|
// getFlapCount returns the peer's flap count and the timestamp that we last |
|
// recorded a flap. |
|
func (p *peerLog) getFlapCount() (int, *time.Time) { |
|
return p.flapCount, p.lastFlap |
|
} |
|
|
|
// listEvents returns all of the events that our event log has tracked, |
|
// including events that are staged for addition to our set of events but have |
|
// not yet been committed to (because we rate limit and store only the aggregate |
|
// outcome over a period). |
|
func (p *peerLog) listEvents() []*event { |
|
if p.stagedEvent == nil { |
|
return p.onlineEvents |
|
} |
|
|
|
return append(p.onlineEvents, p.stagedEvent) |
|
} |
|
|
|
// onlinePeriod represents a period of time over which a peer was online. |
|
type onlinePeriod struct { |
|
start, end time.Time |
|
} |
|
|
|
// getOnlinePeriods returns a list of all the periods that the event log has |
|
// recorded the remote peer as being online. In the unexpected case where there |
|
// are no events, the function returns early. Online periods are defined as a |
|
// peer online event which is terminated by a peer offline event. If the event |
|
// log ends on a peer online event, it appends a final period which is |
|
// calculated until the present. This function expects the event log provided |
|
// to be ordered by ascending timestamp, and can tolerate multiple consecutive |
|
// online or offline events. |
|
func (p *peerLog) getOnlinePeriods() []*onlinePeriod { |
|
events := p.listEvents() |
|
|
|
// Return early if there are no events, there are no online periods. |
|
if len(events) == 0 { |
|
return nil |
|
} |
|
|
|
var ( |
|
// lastEvent tracks the last event that we had that was of |
|
// a different type to our own. It is used to determine the |
|
// start time of our online periods when we experience an |
|
// offline event, and to track our last recorded state. |
|
lastEvent *event |
|
onlinePeriods []*onlinePeriod |
|
) |
|
|
|
// Loop through all events to build a list of periods that the peer was |
|
// online. Online periods are added when they are terminated with a peer |
|
// offline event. If the log ends on an online event, the period between |
|
// the online event and the present is not tracked. The type of the most |
|
// recent event is tracked using the offline bool so that we can add a |
|
// final online period if necessary. |
|
for _, event := range events { |
|
switch event.eventType { |
|
case peerOnlineEvent: |
|
// If our previous event is nil, we just set it and |
|
// break out of the switch. |
|
if lastEvent == nil { |
|
lastEvent = event |
|
break |
|
} |
|
|
|
// If our previous event was an offline event, we update |
|
// it to this event. We do not do this if it was an |
|
// online event because duplicate online events would |
|
// progress our online timestamp forward (rather than |
|
// keep it at our earliest online event timestamp). |
|
if lastEvent.eventType == peerOfflineEvent { |
|
lastEvent = event |
|
} |
|
|
|
case peerOfflineEvent: |
|
// If our previous event is nil, we just set it and |
|
// break out of the switch since we cannot record an |
|
// online period from this single event. |
|
if lastEvent == nil { |
|
lastEvent = event |
|
break |
|
} |
|
|
|
// If the last event we saw was an online event, we |
|
// add an online period to our set and progress our |
|
// previous event to this offline event. We do not |
|
// do this if we have had duplicate offline events |
|
// because we would be tracking the most recent offline |
|
// event (rather than keep it at our earliest offline |
|
// event timestamp). |
|
if lastEvent.eventType == peerOnlineEvent { |
|
onlinePeriods = append( |
|
onlinePeriods, &onlinePeriod{ |
|
start: lastEvent.timestamp, |
|
end: event.timestamp, |
|
}, |
|
) |
|
|
|
lastEvent = event |
|
} |
|
} |
|
} |
|
|
|
// If the last event was an peer offline event, we do not need to |
|
// calculate a final online period and can return online periods as is. |
|
if lastEvent.eventType == peerOfflineEvent { |
|
return onlinePeriods |
|
} |
|
|
|
// The log ended on an online event, so we need to add a final online |
|
// period which terminates at the present. |
|
finalEvent := &onlinePeriod{ |
|
start: lastEvent.timestamp, |
|
end: p.clock.Now(), |
|
} |
|
|
|
// Add the final online period to the set and return. |
|
return append(onlinePeriods, finalEvent) |
|
} |
|
|
|
// uptime calculates the total uptime we have recorded for a peer over the |
|
// inclusive range specified. An error is returned if the end of the range is |
|
// before the start or a zero end time is returned. |
|
func (p *peerLog) uptime(start, end time.Time) (time.Duration, error) { |
|
// Error if we are provided with an invalid range to calculate uptime |
|
// for. |
|
if end.Before(start) { |
|
return 0, fmt.Errorf("end time: %v before start time: %v", |
|
end, start) |
|
} |
|
if end.IsZero() { |
|
return 0, fmt.Errorf("zero end time") |
|
} |
|
|
|
var uptime time.Duration |
|
|
|
for _, p := range p.getOnlinePeriods() { |
|
// The online period ends before the range we're looking at, so |
|
// we can skip over it. |
|
if p.end.Before(start) { |
|
continue |
|
} |
|
// The online period starts after the range we're looking at, so |
|
// can stop calculating uptime. |
|
if p.start.After(end) { |
|
break |
|
} |
|
|
|
// If the online period starts before our range, shift the start |
|
// time up so that we only calculate uptime from the start of |
|
// our range. |
|
if p.start.Before(start) { |
|
p.start = start |
|
} |
|
|
|
// If the online period ends before our range, shift the end |
|
// time forward so that we only calculate uptime until the end |
|
// of the range. |
|
if p.end.After(end) { |
|
p.end = end |
|
} |
|
|
|
uptime += p.end.Sub(p.start) |
|
} |
|
|
|
return uptime, nil |
|
}
|
|
|