Merge pull request #3253 from Roasbeef/second-dns-seed
multi: add secondary DNS seed for bootstrap, add exponential back off to initial bootstrap
This commit is contained in:
commit
6a4179e224
@ -576,6 +576,9 @@ var (
|
||||
"nodes.lightning.directory",
|
||||
"soa.nodes.lightning.directory",
|
||||
},
|
||||
{
|
||||
"lseed.bitcoinstats.com",
|
||||
},
|
||||
},
|
||||
|
||||
bitcoinTestnetGenesis: {
|
||||
|
@ -379,135 +379,146 @@ func (d *DNSSeedBootstrapper) SampleNodeAddrs(numAddrs uint32,
|
||||
|
||||
var netAddrs []*lnwire.NetAddress
|
||||
|
||||
// We'll continue this loop until we reach our target address limit.
|
||||
// Each SRV query to the seed will return 25 random nodes, so we can
|
||||
// continue to query until we reach our target.
|
||||
// We'll try all the registered DNS seeds, exiting early if one of them
|
||||
// gives us all the peers we need.
|
||||
//
|
||||
// TODO(roasbeef): should combine results from both
|
||||
search:
|
||||
for uint32(len(netAddrs)) < numAddrs {
|
||||
for _, dnsSeedTuple := range d.dnsSeeds {
|
||||
// We'll first query the seed with an SRV record so we
|
||||
// can obtain a random sample of the encoded public
|
||||
// keys of nodes. We use the lndLookupSRV function for
|
||||
// this task.
|
||||
primarySeed := dnsSeedTuple[0]
|
||||
_, addrs, err := d.net.LookupSRV("nodes", "tcp", primarySeed)
|
||||
if err != nil {
|
||||
log.Tracef("Unable to lookup SRV records via "+
|
||||
"primary seed: %v", err)
|
||||
for _, dnsSeedTuple := range d.dnsSeeds {
|
||||
// We'll first query the seed with an SRV record so we can
|
||||
// obtain a random sample of the encoded public keys of nodes.
|
||||
// We use the lndLookupSRV function for this task.
|
||||
primarySeed := dnsSeedTuple[0]
|
||||
_, addrs, err := d.net.LookupSRV("nodes", "tcp", primarySeed)
|
||||
if err != nil {
|
||||
log.Tracef("Unable to lookup SRV records via "+
|
||||
"primary seed (%v): %v", primarySeed, err)
|
||||
|
||||
log.Trace("Falling back to secondary")
|
||||
log.Trace("Falling back to secondary")
|
||||
|
||||
// If the host of the secondary seed is blank,
|
||||
// then we'll bail here as we can't proceed.
|
||||
if dnsSeedTuple[1] == "" {
|
||||
return nil, fmt.Errorf("Secondary seed is blank")
|
||||
}
|
||||
|
||||
// If we get an error when trying to query via
|
||||
// the primary seed, we'll fallback to the
|
||||
// secondary seed before concluding failure.
|
||||
soaShim := dnsSeedTuple[1]
|
||||
addrs, err = d.fallBackSRVLookup(
|
||||
soaShim, primarySeed,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
log.Tracef("Successfully queried fallback DNS seed")
|
||||
// If the host of the secondary seed is blank, then
|
||||
// we'll bail here as we can't proceed.
|
||||
if dnsSeedTuple[1] == "" {
|
||||
log.Tracef("DNS seed %v has no secondary, "+
|
||||
"skipping fallback", primarySeed)
|
||||
continue
|
||||
}
|
||||
|
||||
log.Tracef("Retrieved SRV records from dns seed: %v",
|
||||
spew.Sdump(addrs))
|
||||
// If we get an error when trying to query via the
|
||||
// primary seed, we'll fallback to the secondary seed
|
||||
// before concluding failure.
|
||||
soaShim := dnsSeedTuple[1]
|
||||
addrs, err = d.fallBackSRVLookup(
|
||||
soaShim, primarySeed,
|
||||
)
|
||||
if err != nil {
|
||||
log.Tracef("Unable to query fall "+
|
||||
"back dns seed (%v): %v", soaShim, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Next, we'll need to issue an A record request for
|
||||
// each of the nodes, skipping it if nothing comes
|
||||
// back.
|
||||
for _, nodeSrv := range addrs {
|
||||
if uint32(len(netAddrs)) >= numAddrs {
|
||||
break search
|
||||
}
|
||||
log.Tracef("Successfully queried fallback DNS seed")
|
||||
}
|
||||
|
||||
// With the SRV target obtained, we'll now
|
||||
// perform another query to obtain the IP
|
||||
// address for the matching bech32 encoded node
|
||||
// key. We use the lndLookup function for this
|
||||
// task.
|
||||
bechNodeHost := nodeSrv.Target
|
||||
addrs, err := d.net.LookupHost(bechNodeHost)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
log.Tracef("Retrieved SRV records from dns seed: %v",
|
||||
newLogClosure(func() string {
|
||||
return spew.Sdump(addrs)
|
||||
}),
|
||||
)
|
||||
|
||||
if len(addrs) == 0 {
|
||||
log.Tracef("No addresses for %v, skipping",
|
||||
bechNodeHost)
|
||||
// Next, we'll need to issue an A record request for each of
|
||||
// the nodes, skipping it if nothing comes back.
|
||||
for _, nodeSrv := range addrs {
|
||||
if uint32(len(netAddrs)) >= numAddrs {
|
||||
break search
|
||||
}
|
||||
|
||||
// With the SRV target obtained, we'll now perform
|
||||
// another query to obtain the IP address for the
|
||||
// matching bech32 encoded node key. We use the
|
||||
// lndLookup function for this task.
|
||||
bechNodeHost := nodeSrv.Target
|
||||
addrs, err := d.net.LookupHost(bechNodeHost)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(addrs) == 0 {
|
||||
log.Tracef("No addresses for %v, skipping",
|
||||
bechNodeHost)
|
||||
continue
|
||||
}
|
||||
|
||||
log.Tracef("Attempting to convert: %v", bechNodeHost)
|
||||
|
||||
// If the host isn't correctly formatted, then we'll
|
||||
// skip it.
|
||||
if len(bechNodeHost) == 0 ||
|
||||
!strings.Contains(bechNodeHost, ".") {
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
// If we have a set of valid addresses, then we'll need
|
||||
// to parse the public key from the original bech32
|
||||
// encoded string.
|
||||
bechNode := strings.Split(bechNodeHost, ".")
|
||||
_, nodeBytes5Bits, err := bech32.Decode(bechNode[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Once we have the bech32 decoded pubkey, we'll need
|
||||
// to convert the 5-bit word grouping into our regular
|
||||
// 8-bit word grouping so we can convert it into a
|
||||
// public key.
|
||||
nodeBytes, err := bech32.ConvertBits(
|
||||
nodeBytes5Bits, 5, 8, false,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
nodeKey, err := btcec.ParsePubKey(
|
||||
nodeBytes, btcec.S256(),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If we have an ignore list, and this node is in the
|
||||
// ignore list, then we'll go to the next candidate.
|
||||
if ignore != nil {
|
||||
nID := autopilot.NewNodeID(nodeKey)
|
||||
if _, ok := ignore[nID]; ok {
|
||||
continue
|
||||
}
|
||||
|
||||
log.Tracef("Attempting to convert: %v", bechNodeHost)
|
||||
|
||||
// If we have a set of valid addresses, then
|
||||
// we'll need to parse the public key from the
|
||||
// original bech32 encoded string.
|
||||
bechNode := strings.Split(bechNodeHost, ".")
|
||||
_, nodeBytes5Bits, err := bech32.Decode(bechNode[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Once we have the bech32 decoded pubkey,
|
||||
// we'll need to convert the 5-bit word
|
||||
// grouping into our regular 8-bit word
|
||||
// grouping so we can convert it into a public
|
||||
// key.
|
||||
nodeBytes, err := bech32.ConvertBits(
|
||||
nodeBytes5Bits, 5, 8, false,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
nodeKey, err := btcec.ParsePubKey(
|
||||
nodeBytes, btcec.S256(),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If we have an ignore list, and this node is
|
||||
// in the ignore list, then we'll go to the
|
||||
// next candidate.
|
||||
if ignore != nil {
|
||||
nID := autopilot.NewNodeID(nodeKey)
|
||||
if _, ok := ignore[nID]; ok {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Finally we'll convert the host:port peer to
|
||||
// a proper TCP address to use within the
|
||||
// lnwire.NetAddress. We don't need to use
|
||||
// the lndResolveTCP function here because we
|
||||
// already have the host:port peer.
|
||||
addr := net.JoinHostPort(addrs[0],
|
||||
strconv.FormatUint(uint64(nodeSrv.Port), 10))
|
||||
tcpAddr, err := net.ResolveTCPAddr("tcp", addr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Finally, with all the information parsed,
|
||||
// we'll return this fully valid address as a
|
||||
// connection attempt.
|
||||
lnAddr := &lnwire.NetAddress{
|
||||
IdentityKey: nodeKey,
|
||||
Address: tcpAddr,
|
||||
}
|
||||
|
||||
log.Tracef("Obtained %v as valid reachable "+
|
||||
"node", lnAddr)
|
||||
|
||||
netAddrs = append(netAddrs, lnAddr)
|
||||
}
|
||||
|
||||
// Finally we'll convert the host:port peer to a proper
|
||||
// TCP address to use within the lnwire.NetAddress. We
|
||||
// don't need to use the lndResolveTCP function here
|
||||
// because we already have the host:port peer.
|
||||
addr := net.JoinHostPort(
|
||||
addrs[0],
|
||||
strconv.FormatUint(uint64(nodeSrv.Port), 10),
|
||||
)
|
||||
tcpAddr, err := net.ResolveTCPAddr("tcp", addr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Finally, with all the information parsed, we'll
|
||||
// return this fully valid address as a connection
|
||||
// attempt.
|
||||
lnAddr := &lnwire.NetAddress{
|
||||
IdentityKey: nodeKey,
|
||||
Address: tcpAddr,
|
||||
}
|
||||
|
||||
log.Tracef("Obtained %v as valid reachable "+
|
||||
"node", lnAddr)
|
||||
|
||||
netAddrs = append(netAddrs, lnAddr)
|
||||
}
|
||||
}
|
||||
|
||||
|
49
server.go
49
server.go
@ -1600,7 +1600,6 @@ func (s *server) peerBootstrapper(numTargetPeers uint32,
|
||||
//
|
||||
// We'll use a 15 second backoff, and double the time every time an
|
||||
// epoch fails up to a ceiling.
|
||||
const backOffCeiling = time.Minute * 5
|
||||
backOff := time.Second * 15
|
||||
|
||||
// We'll create a new ticker to wake us up every 15 seconds so we can
|
||||
@ -1643,8 +1642,8 @@ func (s *server) peerBootstrapper(numTargetPeers uint32,
|
||||
sampleTicker.Stop()
|
||||
|
||||
backOff *= 2
|
||||
if backOff > backOffCeiling {
|
||||
backOff = backOffCeiling
|
||||
if backOff > bootstrapBackOffCeiling {
|
||||
backOff = bootstrapBackOffCeiling
|
||||
}
|
||||
|
||||
srvrLog.Debugf("Backing off peer bootstrapper to "+
|
||||
@ -1713,15 +1712,27 @@ func (s *server) peerBootstrapper(numTargetPeers uint32,
|
||||
}
|
||||
}
|
||||
|
||||
// bootstrapBackOffCeiling is the maximum amount of time we'll wait between
|
||||
// failed attempts to locate a set of bootstrap peers. We'll slowly double our
|
||||
// query back off each time we encounter a failure.
|
||||
const bootstrapBackOffCeiling = time.Minute * 5
|
||||
|
||||
// initialPeerBootstrap attempts to continuously connect to peers on startup
|
||||
// until the target number of peers has been reached. This ensures that nodes
|
||||
// receive an up to date network view as soon as possible.
|
||||
func (s *server) initialPeerBootstrap(ignore map[autopilot.NodeID]struct{},
|
||||
numTargetPeers uint32, bootstrappers []discovery.NetworkPeerBootstrapper) {
|
||||
|
||||
var wg sync.WaitGroup
|
||||
// We'll start off by waiting 2 seconds between failed attempts, then
|
||||
// double each time we fail until we hit the bootstrapBackOffCeiling.
|
||||
var delaySignal <-chan time.Time
|
||||
delayTime := time.Second * 2
|
||||
|
||||
for {
|
||||
// As want to be more aggressive, we'll use a lower back off celling
|
||||
// then the main peer bootstrap logic.
|
||||
backOffCeiling := bootstrapBackOffCeiling / 5
|
||||
|
||||
for attempts := 0; ; attempts++ {
|
||||
// Check if the server has been requested to shut down in order
|
||||
// to prevent blocking.
|
||||
if s.Stopped() {
|
||||
@ -1738,8 +1749,31 @@ func (s *server) initialPeerBootstrap(ignore map[autopilot.NodeID]struct{},
|
||||
return
|
||||
}
|
||||
|
||||
// Otherwise, we'll request for the remaining number of peers in
|
||||
// order to reach our target.
|
||||
if attempts > 0 {
|
||||
srvrLog.Debugf("Waiting %v before trying to locate "+
|
||||
"bootstrap peers (attempt #%v)", delayTime,
|
||||
attempts)
|
||||
|
||||
// We've completed at least one iterating and haven't
|
||||
// finished, so we'll start to insert a delay period
|
||||
// between each attempt.
|
||||
delaySignal = time.After(delayTime)
|
||||
select {
|
||||
case <-delaySignal:
|
||||
case <-s.quit:
|
||||
return
|
||||
}
|
||||
|
||||
// After our delay, we'll double the time we wait up to
|
||||
// the max back off period.
|
||||
delayTime *= 2
|
||||
if delayTime > backOffCeiling {
|
||||
delayTime = backOffCeiling
|
||||
}
|
||||
}
|
||||
|
||||
// Otherwise, we'll request for the remaining number of peers
|
||||
// in order to reach our target.
|
||||
peersNeeded := numTargetPeers - numActivePeers
|
||||
bootstrapAddrs, err := discovery.MultiSourceBootstrap(
|
||||
ignore, peersNeeded, bootstrappers...,
|
||||
@ -1752,6 +1786,7 @@ func (s *server) initialPeerBootstrap(ignore map[autopilot.NodeID]struct{},
|
||||
|
||||
// Then, we'll attempt to establish a connection to the
|
||||
// different peer addresses retrieved by our bootstrappers.
|
||||
var wg sync.WaitGroup
|
||||
for _, bootstrapAddr := range bootstrapAddrs {
|
||||
wg.Add(1)
|
||||
go func(addr *lnwire.NetAddress) {
|
||||
|
@ -316,8 +316,8 @@ func (q *sessionQueue) drainBackups() {
|
||||
// before attempting to dequeue any pending updates.
|
||||
stateUpdate, isPending, backupID, err := q.nextStateUpdate()
|
||||
if err != nil {
|
||||
log.Errorf("SessionQueue(%s) unable to get next state "+
|
||||
"update: %v", err)
|
||||
log.Errorf("SessionQueue(%v) unable to get next state "+
|
||||
"update: %v", q.ID(), err)
|
||||
return
|
||||
}
|
||||
|
||||
@ -557,7 +557,7 @@ func (q *sessionQueue) sendStateUpdate(conn wtserver.Peer,
|
||||
// TODO(conner): borked watchtower
|
||||
err = fmt.Errorf("unable to ack seqnum=%d: %v",
|
||||
stateUpdate.SeqNum, err)
|
||||
log.Errorf("SessionQueue(%s) failed to ack update: %v", err)
|
||||
log.Errorf("SessionQueue(%v) failed to ack update: %v", q.ID(), err)
|
||||
return err
|
||||
|
||||
case err == wtdb.ErrLastAppliedReversion:
|
||||
|
Loading…
Reference in New Issue
Block a user