Merge pull request #3253 from Roasbeef/second-dns-seed
multi: add secondary DNS seed for bootstrap, add exponential back off to initial bootstrap
This commit is contained in:
commit
6a4179e224
@ -576,6 +576,9 @@ var (
|
|||||||
"nodes.lightning.directory",
|
"nodes.lightning.directory",
|
||||||
"soa.nodes.lightning.directory",
|
"soa.nodes.lightning.directory",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"lseed.bitcoinstats.com",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
bitcoinTestnetGenesis: {
|
bitcoinTestnetGenesis: {
|
||||||
|
@ -379,135 +379,146 @@ func (d *DNSSeedBootstrapper) SampleNodeAddrs(numAddrs uint32,
|
|||||||
|
|
||||||
var netAddrs []*lnwire.NetAddress
|
var netAddrs []*lnwire.NetAddress
|
||||||
|
|
||||||
// We'll continue this loop until we reach our target address limit.
|
// We'll try all the registered DNS seeds, exiting early if one of them
|
||||||
// Each SRV query to the seed will return 25 random nodes, so we can
|
// gives us all the peers we need.
|
||||||
// continue to query until we reach our target.
|
//
|
||||||
|
// TODO(roasbeef): should combine results from both
|
||||||
search:
|
search:
|
||||||
for uint32(len(netAddrs)) < numAddrs {
|
for _, dnsSeedTuple := range d.dnsSeeds {
|
||||||
for _, dnsSeedTuple := range d.dnsSeeds {
|
// We'll first query the seed with an SRV record so we can
|
||||||
// We'll first query the seed with an SRV record so we
|
// obtain a random sample of the encoded public keys of nodes.
|
||||||
// can obtain a random sample of the encoded public
|
// We use the lndLookupSRV function for this task.
|
||||||
// keys of nodes. We use the lndLookupSRV function for
|
primarySeed := dnsSeedTuple[0]
|
||||||
// this task.
|
_, addrs, err := d.net.LookupSRV("nodes", "tcp", primarySeed)
|
||||||
primarySeed := dnsSeedTuple[0]
|
if err != nil {
|
||||||
_, addrs, err := d.net.LookupSRV("nodes", "tcp", primarySeed)
|
log.Tracef("Unable to lookup SRV records via "+
|
||||||
if err != nil {
|
"primary seed (%v): %v", primarySeed, err)
|
||||||
log.Tracef("Unable to lookup SRV records via "+
|
|
||||||
"primary seed: %v", err)
|
|
||||||
|
|
||||||
log.Trace("Falling back to secondary")
|
log.Trace("Falling back to secondary")
|
||||||
|
|
||||||
// If the host of the secondary seed is blank,
|
// If the host of the secondary seed is blank, then
|
||||||
// then we'll bail here as we can't proceed.
|
// we'll bail here as we can't proceed.
|
||||||
if dnsSeedTuple[1] == "" {
|
if dnsSeedTuple[1] == "" {
|
||||||
return nil, fmt.Errorf("Secondary seed is blank")
|
log.Tracef("DNS seed %v has no secondary, "+
|
||||||
}
|
"skipping fallback", primarySeed)
|
||||||
|
continue
|
||||||
// If we get an error when trying to query via
|
|
||||||
// the primary seed, we'll fallback to the
|
|
||||||
// secondary seed before concluding failure.
|
|
||||||
soaShim := dnsSeedTuple[1]
|
|
||||||
addrs, err = d.fallBackSRVLookup(
|
|
||||||
soaShim, primarySeed,
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
log.Tracef("Successfully queried fallback DNS seed")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Tracef("Retrieved SRV records from dns seed: %v",
|
// If we get an error when trying to query via the
|
||||||
spew.Sdump(addrs))
|
// primary seed, we'll fallback to the secondary seed
|
||||||
|
// before concluding failure.
|
||||||
|
soaShim := dnsSeedTuple[1]
|
||||||
|
addrs, err = d.fallBackSRVLookup(
|
||||||
|
soaShim, primarySeed,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
log.Tracef("Unable to query fall "+
|
||||||
|
"back dns seed (%v): %v", soaShim, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
// Next, we'll need to issue an A record request for
|
log.Tracef("Successfully queried fallback DNS seed")
|
||||||
// each of the nodes, skipping it if nothing comes
|
}
|
||||||
// back.
|
|
||||||
for _, nodeSrv := range addrs {
|
|
||||||
if uint32(len(netAddrs)) >= numAddrs {
|
|
||||||
break search
|
|
||||||
}
|
|
||||||
|
|
||||||
// With the SRV target obtained, we'll now
|
log.Tracef("Retrieved SRV records from dns seed: %v",
|
||||||
// perform another query to obtain the IP
|
newLogClosure(func() string {
|
||||||
// address for the matching bech32 encoded node
|
return spew.Sdump(addrs)
|
||||||
// key. We use the lndLookup function for this
|
}),
|
||||||
// task.
|
)
|
||||||
bechNodeHost := nodeSrv.Target
|
|
||||||
addrs, err := d.net.LookupHost(bechNodeHost)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(addrs) == 0 {
|
// Next, we'll need to issue an A record request for each of
|
||||||
log.Tracef("No addresses for %v, skipping",
|
// the nodes, skipping it if nothing comes back.
|
||||||
bechNodeHost)
|
for _, nodeSrv := range addrs {
|
||||||
|
if uint32(len(netAddrs)) >= numAddrs {
|
||||||
|
break search
|
||||||
|
}
|
||||||
|
|
||||||
|
// With the SRV target obtained, we'll now perform
|
||||||
|
// another query to obtain the IP address for the
|
||||||
|
// matching bech32 encoded node key. We use the
|
||||||
|
// lndLookup function for this task.
|
||||||
|
bechNodeHost := nodeSrv.Target
|
||||||
|
addrs, err := d.net.LookupHost(bechNodeHost)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(addrs) == 0 {
|
||||||
|
log.Tracef("No addresses for %v, skipping",
|
||||||
|
bechNodeHost)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Tracef("Attempting to convert: %v", bechNodeHost)
|
||||||
|
|
||||||
|
// If the host isn't correctly formatted, then we'll
|
||||||
|
// skip it.
|
||||||
|
if len(bechNodeHost) == 0 ||
|
||||||
|
!strings.Contains(bechNodeHost, ".") {
|
||||||
|
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we have a set of valid addresses, then we'll need
|
||||||
|
// to parse the public key from the original bech32
|
||||||
|
// encoded string.
|
||||||
|
bechNode := strings.Split(bechNodeHost, ".")
|
||||||
|
_, nodeBytes5Bits, err := bech32.Decode(bechNode[0])
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Once we have the bech32 decoded pubkey, we'll need
|
||||||
|
// to convert the 5-bit word grouping into our regular
|
||||||
|
// 8-bit word grouping so we can convert it into a
|
||||||
|
// public key.
|
||||||
|
nodeBytes, err := bech32.ConvertBits(
|
||||||
|
nodeBytes5Bits, 5, 8, false,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
nodeKey, err := btcec.ParsePubKey(
|
||||||
|
nodeBytes, btcec.S256(),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we have an ignore list, and this node is in the
|
||||||
|
// ignore list, then we'll go to the next candidate.
|
||||||
|
if ignore != nil {
|
||||||
|
nID := autopilot.NewNodeID(nodeKey)
|
||||||
|
if _, ok := ignore[nID]; ok {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Tracef("Attempting to convert: %v", bechNodeHost)
|
|
||||||
|
|
||||||
// If we have a set of valid addresses, then
|
|
||||||
// we'll need to parse the public key from the
|
|
||||||
// original bech32 encoded string.
|
|
||||||
bechNode := strings.Split(bechNodeHost, ".")
|
|
||||||
_, nodeBytes5Bits, err := bech32.Decode(bechNode[0])
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Once we have the bech32 decoded pubkey,
|
|
||||||
// we'll need to convert the 5-bit word
|
|
||||||
// grouping into our regular 8-bit word
|
|
||||||
// grouping so we can convert it into a public
|
|
||||||
// key.
|
|
||||||
nodeBytes, err := bech32.ConvertBits(
|
|
||||||
nodeBytes5Bits, 5, 8, false,
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
nodeKey, err := btcec.ParsePubKey(
|
|
||||||
nodeBytes, btcec.S256(),
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we have an ignore list, and this node is
|
|
||||||
// in the ignore list, then we'll go to the
|
|
||||||
// next candidate.
|
|
||||||
if ignore != nil {
|
|
||||||
nID := autopilot.NewNodeID(nodeKey)
|
|
||||||
if _, ok := ignore[nID]; ok {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Finally we'll convert the host:port peer to
|
|
||||||
// a proper TCP address to use within the
|
|
||||||
// lnwire.NetAddress. We don't need to use
|
|
||||||
// the lndResolveTCP function here because we
|
|
||||||
// already have the host:port peer.
|
|
||||||
addr := net.JoinHostPort(addrs[0],
|
|
||||||
strconv.FormatUint(uint64(nodeSrv.Port), 10))
|
|
||||||
tcpAddr, err := net.ResolveTCPAddr("tcp", addr)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Finally, with all the information parsed,
|
|
||||||
// we'll return this fully valid address as a
|
|
||||||
// connection attempt.
|
|
||||||
lnAddr := &lnwire.NetAddress{
|
|
||||||
IdentityKey: nodeKey,
|
|
||||||
Address: tcpAddr,
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Tracef("Obtained %v as valid reachable "+
|
|
||||||
"node", lnAddr)
|
|
||||||
|
|
||||||
netAddrs = append(netAddrs, lnAddr)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Finally we'll convert the host:port peer to a proper
|
||||||
|
// TCP address to use within the lnwire.NetAddress. We
|
||||||
|
// don't need to use the lndResolveTCP function here
|
||||||
|
// because we already have the host:port peer.
|
||||||
|
addr := net.JoinHostPort(
|
||||||
|
addrs[0],
|
||||||
|
strconv.FormatUint(uint64(nodeSrv.Port), 10),
|
||||||
|
)
|
||||||
|
tcpAddr, err := net.ResolveTCPAddr("tcp", addr)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Finally, with all the information parsed, we'll
|
||||||
|
// return this fully valid address as a connection
|
||||||
|
// attempt.
|
||||||
|
lnAddr := &lnwire.NetAddress{
|
||||||
|
IdentityKey: nodeKey,
|
||||||
|
Address: tcpAddr,
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Tracef("Obtained %v as valid reachable "+
|
||||||
|
"node", lnAddr)
|
||||||
|
|
||||||
|
netAddrs = append(netAddrs, lnAddr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
49
server.go
49
server.go
@ -1600,7 +1600,6 @@ func (s *server) peerBootstrapper(numTargetPeers uint32,
|
|||||||
//
|
//
|
||||||
// We'll use a 15 second backoff, and double the time every time an
|
// We'll use a 15 second backoff, and double the time every time an
|
||||||
// epoch fails up to a ceiling.
|
// epoch fails up to a ceiling.
|
||||||
const backOffCeiling = time.Minute * 5
|
|
||||||
backOff := time.Second * 15
|
backOff := time.Second * 15
|
||||||
|
|
||||||
// We'll create a new ticker to wake us up every 15 seconds so we can
|
// We'll create a new ticker to wake us up every 15 seconds so we can
|
||||||
@ -1643,8 +1642,8 @@ func (s *server) peerBootstrapper(numTargetPeers uint32,
|
|||||||
sampleTicker.Stop()
|
sampleTicker.Stop()
|
||||||
|
|
||||||
backOff *= 2
|
backOff *= 2
|
||||||
if backOff > backOffCeiling {
|
if backOff > bootstrapBackOffCeiling {
|
||||||
backOff = backOffCeiling
|
backOff = bootstrapBackOffCeiling
|
||||||
}
|
}
|
||||||
|
|
||||||
srvrLog.Debugf("Backing off peer bootstrapper to "+
|
srvrLog.Debugf("Backing off peer bootstrapper to "+
|
||||||
@ -1713,15 +1712,27 @@ func (s *server) peerBootstrapper(numTargetPeers uint32,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// bootstrapBackOffCeiling is the maximum amount of time we'll wait between
|
||||||
|
// failed attempts to locate a set of bootstrap peers. We'll slowly double our
|
||||||
|
// query back off each time we encounter a failure.
|
||||||
|
const bootstrapBackOffCeiling = time.Minute * 5
|
||||||
|
|
||||||
// initialPeerBootstrap attempts to continuously connect to peers on startup
|
// initialPeerBootstrap attempts to continuously connect to peers on startup
|
||||||
// until the target number of peers has been reached. This ensures that nodes
|
// until the target number of peers has been reached. This ensures that nodes
|
||||||
// receive an up to date network view as soon as possible.
|
// receive an up to date network view as soon as possible.
|
||||||
func (s *server) initialPeerBootstrap(ignore map[autopilot.NodeID]struct{},
|
func (s *server) initialPeerBootstrap(ignore map[autopilot.NodeID]struct{},
|
||||||
numTargetPeers uint32, bootstrappers []discovery.NetworkPeerBootstrapper) {
|
numTargetPeers uint32, bootstrappers []discovery.NetworkPeerBootstrapper) {
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
// We'll start off by waiting 2 seconds between failed attempts, then
|
||||||
|
// double each time we fail until we hit the bootstrapBackOffCeiling.
|
||||||
|
var delaySignal <-chan time.Time
|
||||||
|
delayTime := time.Second * 2
|
||||||
|
|
||||||
for {
|
// As want to be more aggressive, we'll use a lower back off celling
|
||||||
|
// then the main peer bootstrap logic.
|
||||||
|
backOffCeiling := bootstrapBackOffCeiling / 5
|
||||||
|
|
||||||
|
for attempts := 0; ; attempts++ {
|
||||||
// Check if the server has been requested to shut down in order
|
// Check if the server has been requested to shut down in order
|
||||||
// to prevent blocking.
|
// to prevent blocking.
|
||||||
if s.Stopped() {
|
if s.Stopped() {
|
||||||
@ -1738,8 +1749,31 @@ func (s *server) initialPeerBootstrap(ignore map[autopilot.NodeID]struct{},
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Otherwise, we'll request for the remaining number of peers in
|
if attempts > 0 {
|
||||||
// order to reach our target.
|
srvrLog.Debugf("Waiting %v before trying to locate "+
|
||||||
|
"bootstrap peers (attempt #%v)", delayTime,
|
||||||
|
attempts)
|
||||||
|
|
||||||
|
// We've completed at least one iterating and haven't
|
||||||
|
// finished, so we'll start to insert a delay period
|
||||||
|
// between each attempt.
|
||||||
|
delaySignal = time.After(delayTime)
|
||||||
|
select {
|
||||||
|
case <-delaySignal:
|
||||||
|
case <-s.quit:
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// After our delay, we'll double the time we wait up to
|
||||||
|
// the max back off period.
|
||||||
|
delayTime *= 2
|
||||||
|
if delayTime > backOffCeiling {
|
||||||
|
delayTime = backOffCeiling
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, we'll request for the remaining number of peers
|
||||||
|
// in order to reach our target.
|
||||||
peersNeeded := numTargetPeers - numActivePeers
|
peersNeeded := numTargetPeers - numActivePeers
|
||||||
bootstrapAddrs, err := discovery.MultiSourceBootstrap(
|
bootstrapAddrs, err := discovery.MultiSourceBootstrap(
|
||||||
ignore, peersNeeded, bootstrappers...,
|
ignore, peersNeeded, bootstrappers...,
|
||||||
@ -1752,6 +1786,7 @@ func (s *server) initialPeerBootstrap(ignore map[autopilot.NodeID]struct{},
|
|||||||
|
|
||||||
// Then, we'll attempt to establish a connection to the
|
// Then, we'll attempt to establish a connection to the
|
||||||
// different peer addresses retrieved by our bootstrappers.
|
// different peer addresses retrieved by our bootstrappers.
|
||||||
|
var wg sync.WaitGroup
|
||||||
for _, bootstrapAddr := range bootstrapAddrs {
|
for _, bootstrapAddr := range bootstrapAddrs {
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go func(addr *lnwire.NetAddress) {
|
go func(addr *lnwire.NetAddress) {
|
||||||
|
@ -316,8 +316,8 @@ func (q *sessionQueue) drainBackups() {
|
|||||||
// before attempting to dequeue any pending updates.
|
// before attempting to dequeue any pending updates.
|
||||||
stateUpdate, isPending, backupID, err := q.nextStateUpdate()
|
stateUpdate, isPending, backupID, err := q.nextStateUpdate()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("SessionQueue(%s) unable to get next state "+
|
log.Errorf("SessionQueue(%v) unable to get next state "+
|
||||||
"update: %v", err)
|
"update: %v", q.ID(), err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -557,7 +557,7 @@ func (q *sessionQueue) sendStateUpdate(conn wtserver.Peer,
|
|||||||
// TODO(conner): borked watchtower
|
// TODO(conner): borked watchtower
|
||||||
err = fmt.Errorf("unable to ack seqnum=%d: %v",
|
err = fmt.Errorf("unable to ack seqnum=%d: %v",
|
||||||
stateUpdate.SeqNum, err)
|
stateUpdate.SeqNum, err)
|
||||||
log.Errorf("SessionQueue(%s) failed to ack update: %v", err)
|
log.Errorf("SessionQueue(%v) failed to ack update: %v", q.ID(), err)
|
||||||
return err
|
return err
|
||||||
|
|
||||||
case err == wtdb.ErrLastAppliedReversion:
|
case err == wtdb.ErrLastAppliedReversion:
|
||||||
|
Loading…
Reference in New Issue
Block a user