Merge pull request #648 from halseth/fix-lost-announcement

gossiper/tests: Integration test bug/flake fixes.
This commit is contained in:
Olaoluwa Osuntokun 2018-01-27 17:25:51 -08:00 committed by GitHub
commit a808f76a60
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 173 additions and 111 deletions

@ -16,6 +16,7 @@ import (
"github.com/lightningnetwork/lnd/channeldb" "github.com/lightningnetwork/lnd/channeldb"
"github.com/lightningnetwork/lnd/lnwallet" "github.com/lightningnetwork/lnd/lnwallet"
"github.com/lightningnetwork/lnd/lnwire" "github.com/lightningnetwork/lnd/lnwire"
"github.com/lightningnetwork/lnd/multimutex"
"github.com/lightningnetwork/lnd/routing" "github.com/lightningnetwork/lnd/routing"
"github.com/roasbeef/btcd/btcec" "github.com/roasbeef/btcd/btcec"
"github.com/roasbeef/btcd/chaincfg/chainhash" "github.com/roasbeef/btcd/chaincfg/chainhash"
@ -186,6 +187,12 @@ type AuthenticatedGossiper struct {
// selfKey is the identity public key of the backing Lighting node. // selfKey is the identity public key of the backing Lighting node.
selfKey *btcec.PublicKey selfKey *btcec.PublicKey
// channelMtx is used to restrict the database access to one
// goroutine per channel ID. This is done to ensure that when
// the gossiper is handling an announcement, the db state stays
// consistent between when the DB is first read to it's written.
channelMtx *multimutex.Mutex
sync.Mutex sync.Mutex
} }
@ -206,6 +213,7 @@ func New(cfg Config, selfKey *btcec.PublicKey) (*AuthenticatedGossiper, error) {
prematureAnnouncements: make(map[uint32][]*networkMsg), prematureAnnouncements: make(map[uint32][]*networkMsg),
prematureChannelUpdates: make(map[uint64][]*networkMsg), prematureChannelUpdates: make(map[uint64][]*networkMsg),
waitingProofs: storage, waitingProofs: storage,
channelMtx: multimutex.NewMutex(),
}, nil }, nil
} }
@ -972,11 +980,6 @@ func (d *AuthenticatedGossiper) networkHandler() {
} }
} }
// If we're able to broadcast the current batch
// successfully, then we reset the batch for a new
// round of announcements.
announcements.Reset()
// The retransmission timer has ticked which indicates that we // The retransmission timer has ticked which indicates that we
// should check if we need to prune or re-broadcast any of our // should check if we need to prune or re-broadcast any of our
// personal channels. This addresses the case of "zombie" channels and // personal channels. This addresses the case of "zombie" channels and
@ -1376,6 +1379,14 @@ func (d *AuthenticatedGossiper) processNetworkAnnouncement(nMsg *networkMsg) []n
// present in this channel are not present in the database, a // present in this channel are not present in the database, a
// partial node will be added to represent each node while we // partial node will be added to represent each node while we
// wait for a node announcement. // wait for a node announcement.
//
// Before we add the edge to the database, we obtain
// the mutex for this channel ID. We do this to ensure
// no other goroutine has read the database and is now
// making decisions based on this DB state, before it
// writes to the DB.
d.channelMtx.Lock(msg.ShortChannelID.ToUint64())
defer d.channelMtx.Unlock(msg.ShortChannelID.ToUint64())
if err := d.cfg.Router.AddEdge(edge); err != nil { if err := d.cfg.Router.AddEdge(edge); err != nil {
// If the edge was rejected due to already being known, // If the edge was rejected due to already being known,
// then it may be that case that this new message has a // then it may be that case that this new message has a
@ -1516,6 +1527,13 @@ func (d *AuthenticatedGossiper) processNetworkAnnouncement(nMsg *networkMsg) []n
// Get the node pub key as far as we don't have it in channel // Get the node pub key as far as we don't have it in channel
// update announcement message. We'll need this to properly // update announcement message. We'll need this to properly
// verify message signature. // verify message signature.
//
// We make sure to obtain the mutex for this channel ID
// before we acces the database. This ensures the state
// we read from the database has not changed between this
// point and when we call UpdateEdge() later.
d.channelMtx.Lock(msg.ShortChannelID.ToUint64())
defer d.channelMtx.Unlock(msg.ShortChannelID.ToUint64())
chanInfo, _, _, err := d.cfg.Router.GetChannelByID(msg.ShortChannelID) chanInfo, _, _, err := d.cfg.Router.GetChannelByID(msg.ShortChannelID)
if err != nil { if err != nil {
switch err { switch err {
@ -1679,6 +1697,12 @@ func (d *AuthenticatedGossiper) processNetworkAnnouncement(nMsg *networkMsg) []n
// Ensure that we know of a channel with the target channel ID // Ensure that we know of a channel with the target channel ID
// before proceeding further. // before proceeding further.
//
// We must acquire the mutex for this channel ID before getting
// the channel from the database, to ensure what we read does
// not change before we call AddProof() later.
d.channelMtx.Lock(msg.ShortChannelID.ToUint64())
defer d.channelMtx.Unlock(msg.ShortChannelID.ToUint64())
chanInfo, e1, e2, err := d.cfg.Router.GetChannelByID( chanInfo, e1, e2, err := d.cfg.Router.GetChannelByID(
msg.ShortChannelID) msg.ShortChannelID)
if err != nil { if err != nil {

@ -2366,12 +2366,25 @@ func testMultiHopPayments(net *lntest.NetworkHarness, t *harnessTest) {
} }
// Wait for all nodes to have seen all channels. // Wait for all nodes to have seen all channels.
nodes := []*lntest.HarnessNode{net.Alice, net.Bob, carol, dave}
nodeNames := []string{"Alice", "Bob", "Carol", "Dave"}
for _, chanPoint := range networkChans { for _, chanPoint := range networkChans {
for _, node := range []*lntest.HarnessNode{net.Alice, net.Bob, carol, dave} { for i, node := range nodes {
txid, e := chainhash.NewHash(chanPoint.FundingTxid)
if e != nil {
t.Fatalf("unable to create sha hash: %v", e)
}
point := wire.OutPoint{
Hash: *txid,
Index: chanPoint.OutputIndex,
}
ctxt, _ = context.WithTimeout(ctxb, timeout) ctxt, _ = context.WithTimeout(ctxb, timeout)
err = node.WaitForNetworkChannelOpen(ctxt, chanPoint) err = node.WaitForNetworkChannelOpen(ctxt, chanPoint)
if err != nil { if err != nil {
t.Fatalf("timeout waiting for channel open: %v", err) t.Fatalf("%s(%d): timeout waiting for "+
"channel(%s) open: %v", nodeNames[i],
node.NodeID, point, err)
} }
} }
} }
@ -2551,17 +2564,27 @@ func testPrivateChannels(net *lntest.NetworkHarness, t *harnessTest) {
// Wait for all nodes to have seen all these channels, as they // Wait for all nodes to have seen all these channels, as they
// are all public. // are all public.
nodes := []*lntest.HarnessNode{net.Alice, net.Bob, carol, dave} nodes := []*lntest.HarnessNode{net.Alice, net.Bob, carol, dave}
nodeNames := []string{"Alice", "Bob", "Carol", "Dave"}
for _, chanPoint := range networkChans { for _, chanPoint := range networkChans {
for _, node := range nodes { for i, node := range nodes {
txid, e := chainhash.NewHash(chanPoint.FundingTxid)
if e != nil {
t.Fatalf("unable to create sha hash: %v", e)
}
point := wire.OutPoint{
Hash: *txid,
Index: chanPoint.OutputIndex,
}
ctxt, _ = context.WithTimeout(ctxb, timeout) ctxt, _ = context.WithTimeout(ctxb, timeout)
err = node.WaitForNetworkChannelOpen(ctxt, chanPoint) err = node.WaitForNetworkChannelOpen(ctxt, chanPoint)
if err != nil { if err != nil {
t.Fatalf("timeout waiting for channel open: %v", t.Fatalf("%s(%d): timeout waiting for "+
err) "channel(%s) open: %v", nodeNames[i],
node.NodeID, point, err)
} }
} }
} }
// Now create a _private_ channel directly between Carol and // Now create a _private_ channel directly between Carol and
// Alice of 100k. // Alice of 100k.
if err := net.ConnectNodes(ctxb, carol, net.Alice); err != nil { if err := net.ConnectNodes(ctxb, carol, net.Alice); err != nil {
@ -3150,14 +3173,23 @@ func testRevokedCloseRetribution(net *lntest.NetworkHarness, t *harnessTest) {
// Next query for Bob's channel state, as we sent 3 payments of 10k // Next query for Bob's channel state, as we sent 3 payments of 10k
// satoshis each, Bob should now see his balance as being 30k satoshis. // satoshis each, Bob should now see his balance as being 30k satoshis.
time.Sleep(time.Millisecond * 200) var bobChan *lnrpc.ActiveChannel
bobChan, err := getBobChanInfo() var predErr error
err = lntest.WaitPredicate(func() bool {
bobChan, err = getBobChanInfo()
if err != nil {
t.Fatalf("unable to get bob's channel info: %v", err)
}
if bobChan.LocalBalance != 30000 {
predErr = fmt.Errorf("bob's balance is incorrect, "+
"got %v, expected %v", bobChan.LocalBalance,
30000)
return false
}
return true
}, time.Second*15)
if err != nil { if err != nil {
t.Fatalf("unable to get bob's channel info: %v", err) t.Fatalf("%v", predErr)
}
if bobChan.LocalBalance != 30000 {
t.Fatalf("bob's balance is incorrect, got %v, expected %v",
bobChan.LocalBalance, 30000)
} }
// Grab Bob's current commitment height (update number), we'll later // Grab Bob's current commitment height (update number), we'll later

96
multimutex/multimutex.go Normal file

@ -0,0 +1,96 @@
package multimutex
import (
"fmt"
"sync"
)
// cntMutex is a struct that wraps a counter and a mutex, and is used
// to keep track of the number of goroutines waiting for access to the
// mutex, such that we can forget about it when the counter is zero.
type cntMutex struct {
cnt int
sync.Mutex
}
// Mutex is a struct that keeps track of a set of mutexes with
// a given ID. It can be used for making sure only one goroutine
// gets given the mutex per ID.
type Mutex struct {
// mutexes is a map of IDs to a cntMutex. The cntMutex for
// a given ID will hold the mutex to be used by all
// callers requesting access for the ID, in addition to
// the count of callers.
mutexes map[uint64]*cntMutex
// mapMtx is used to give synchronize concurrent access
// to the mutexes map.
mapMtx sync.Mutex
}
// NewMutex creates a new Mutex.
func NewMutex() *Mutex {
return &Mutex{
mutexes: make(map[uint64]*cntMutex),
}
}
// Lock locks the mutex by the given ID. If the mutex is already
// locked by this ID, Lock blocks until the mutex is available.
func (c *Mutex) Lock(id uint64) {
c.mapMtx.Lock()
mtx, ok := c.mutexes[id]
if ok {
// If the mutex already existed in the map, we
// increment its counter, to indicate that there
// now is one more goroutine waiting for it.
mtx.cnt++
} else {
// If it was not in the map, it means no other
// goroutine has locked the mutex for this ID,
// and we can create a new mutex with count 1
// and add it to the map.
mtx = &cntMutex{
cnt: 1,
}
c.mutexes[id] = mtx
}
c.mapMtx.Unlock()
// Acquire the mutex for this ID.
mtx.Lock()
}
// Unlock unlocks the mutex by the given ID. It is a run-time
// error if the mutex is not locked by the ID on entry to Unlock.
func (c *Mutex) Unlock(id uint64) {
// Since we are done with all the work for this
// update, we update the map to reflect that.
c.mapMtx.Lock()
mtx, ok := c.mutexes[id]
if !ok {
// The mutex not existing in the map means
// an unlock for an ID not currently locked
// was attempted.
panic(fmt.Sprintf("double unlock for id %v",
id))
}
// Decrement the counter. If the count goes to
// zero, it means this caller was the last one
// to wait for the mutex, and we can delete it
// from the map. We can do this safely since we
// are under the mapMtx, meaning that all other
// goroutines waiting for the mutex already
// have incremented it, or will create a new
// mutex when they get the mapMtx.
mtx.cnt--
if mtx.cnt == 0 {
delete(c.mutexes, id)
}
c.mapMtx.Unlock()
// Unlock the mutex for this ID.
mtx.Unlock()
}

@ -15,6 +15,7 @@ import (
"github.com/lightningnetwork/lnd/htlcswitch" "github.com/lightningnetwork/lnd/htlcswitch"
"github.com/lightningnetwork/lnd/lnwallet" "github.com/lightningnetwork/lnd/lnwallet"
"github.com/lightningnetwork/lnd/lnwire" "github.com/lightningnetwork/lnd/lnwire"
"github.com/lightningnetwork/lnd/multimutex"
"github.com/lightningnetwork/lnd/routing/chainview" "github.com/lightningnetwork/lnd/routing/chainview"
"github.com/roasbeef/btcd/btcec" "github.com/roasbeef/btcd/btcec"
"github.com/roasbeef/btcd/wire" "github.com/roasbeef/btcd/wire"
@ -162,97 +163,6 @@ func newRouteTuple(amt lnwire.MilliSatoshi, dest []byte) routeTuple {
return r return r
} }
// cntMutex is a struct that wraps a counter and a mutex, and is used
// to keep track of the number of goroutines waiting for access to the
// mutex, such that we can forget about it when the counter is zero.
type cntMutex struct {
cnt int
sync.Mutex
}
// mutexForID is a struct that keeps track of a set of mutexes with
// a given ID. It can be used for making sure only one goroutine
// gets given the mutex per ID. Here it is currently used to making
// sure we only process one ChannelEdgePolicy per channelID at a
// given time.
type mutexForID struct {
// mutexes is a map of IDs to a cntMutex. The cntMutex for
// a given ID will hold the mutex to be used by all
// callers requesting access for the ID, in addition to
// the count of callers.
mutexes map[uint64]*cntMutex
// mapMtx is used to give synchronize concurrent access
// to the mutexes map.
mapMtx sync.Mutex
}
func newMutexForID() *mutexForID {
return &mutexForID{
mutexes: make(map[uint64]*cntMutex),
}
}
// Lock locks the mutex by the given ID. If the mutex is already
// locked by this ID, Lock blocks until the mutex is available.
func (c *mutexForID) Lock(id uint64) {
c.mapMtx.Lock()
mtx, ok := c.mutexes[id]
if ok {
// If the mutex already existed in the map, we
// increment its counter, to indicate that there
// now is one more goroutine waiting for it.
mtx.cnt++
} else {
// If it was not in the map, it means no other
// goroutine has locked the mutex for this ID,
// and we can create a new mutex with count 1
// and add it to the map.
mtx = &cntMutex{
cnt: 1,
}
c.mutexes[id] = mtx
}
c.mapMtx.Unlock()
// Acquire the mutex for this ID.
mtx.Lock()
}
// Unlock unlocks the mutex by the given ID. It is a run-time
// error if the mutex is not locked by the ID on entry to Unlock.
func (c *mutexForID) Unlock(id uint64) {
// Since we are done with all the work for this
// update, we update the map to reflect that.
c.mapMtx.Lock()
mtx, ok := c.mutexes[id]
if !ok {
// The mutex not existing in the map means
// an unlock for an ID not currently locked
// was attempted.
panic(fmt.Sprintf("double unlock for id %v",
id))
}
// Decrement the counter. If the count goes to
// zero, it means this caller was the last one
// to wait for the mutex, and we can delete it
// from the map. We can do this safely since we
// are under the mapMtx, meaning that all other
// goroutines waiting for the mutex already
// have incremented it, or will create a new
// mutex when they get the mapMtx.
mtx.cnt--
if mtx.cnt == 0 {
delete(c.mutexes, id)
}
c.mapMtx.Unlock()
// Unlock the mutex for this ID.
mtx.Unlock()
}
// ChannelRouter is the layer 3 router within the Lightning stack. Below the // ChannelRouter is the layer 3 router within the Lightning stack. Below the
// ChannelRouter is the HtlcSwitch, and below that is the Bitcoin blockchain // ChannelRouter is the HtlcSwitch, and below that is the Bitcoin blockchain
// itself. The primary role of the ChannelRouter is to respond to queries for // itself. The primary role of the ChannelRouter is to respond to queries for
@ -325,7 +235,7 @@ type ChannelRouter struct {
// channelEdgeMtx is a mutex we use to make sure we process only one // channelEdgeMtx is a mutex we use to make sure we process only one
// ChannelEdgePolicy at a time for a given channelID, to ensure // ChannelEdgePolicy at a time for a given channelID, to ensure
// consistency between the various database accesses. // consistency between the various database accesses.
channelEdgeMtx *mutexForID channelEdgeMtx *multimutex.Mutex
sync.RWMutex sync.RWMutex
@ -355,7 +265,7 @@ func New(cfg Config) (*ChannelRouter, error) {
topologyClients: make(map[uint64]*topologyClient), topologyClients: make(map[uint64]*topologyClient),
ntfnClientUpdates: make(chan *topologyClientUpdate), ntfnClientUpdates: make(chan *topologyClientUpdate),
missionControl: newMissionControl(cfg.Graph, selfNode), missionControl: newMissionControl(cfg.Graph, selfNode),
channelEdgeMtx: newMutexForID(), channelEdgeMtx: multimutex.NewMutex(),
selfNode: selfNode, selfNode: selfNode,
routeCache: make(map[routeTuple][]*Route), routeCache: make(map[routeTuple][]*Route),
quit: make(chan struct{}), quit: make(chan struct{}),