etcd: integrate the commitQueue to the STM commit loop

This commit integrates an externally passed commitQueue instance with the STM to reduce retries for conflicting transactions.
2020-08-10 16:50:06 +02:00 · 2020-08-10 16:50:06 +02:00 · 9c47392dfa
commit 9c47392dfa
parent 6f3a45b75f
4 changed files with 118 additions and 62 deletions
--- a/channeldb/kvdb/etcd/db.go
+++ b/channeldb/kvdb/etcd/db.go
@ -16,8 +16,8 @@ import (
 )

 const (
-	// etcdConnectionTimeout is the timeout until successful connection to the
-	// etcd instance.
+	// etcdConnectionTimeout is the timeout until successful connection to
+	// the etcd instance.
 	etcdConnectionTimeout = 10 * time.Second

 	// etcdLongTimeout is a timeout for longer taking etcd operatons.
@ -34,7 +34,8 @@ type callerStats struct {

 func (s callerStats) String() string {
 	return fmt.Sprintf("count: %d, retries: %d, rset: %d, wset: %d",
-		s.count, s.commitStats.Retries, s.commitStats.Rset, s.commitStats.Wset)
+		s.count, s.commitStats.Retries, s.commitStats.Rset,
+		s.commitStats.Wset)
 }

 // commitStatsCollector collects commit stats for commits succeeding
@ -117,6 +118,7 @@ type db struct {
 	config               BackendConfig
 	cli                  *clientv3.Client
 	commitStatsCollector *commitStatsCollector
+	txQueue              *commitQueue
 }

 // Enforce db implements the walletdb.DB interface.
@ -188,8 +190,9 @@ func newEtcdBackend(config BackendConfig) (*db, error) {
 	}

 	backend := &db{
-		cli:    cli,
-		config: config,
+		cli:     cli,
+		config:  config,
+		txQueue: NewCommitQueue(config.Ctx),
 	}

 	if config.CollectCommitStats {
@ -201,7 +204,9 @@ func newEtcdBackend(config BackendConfig) (*db, error) {

 // getSTMOptions creats all STM options based on the backend config.
 func (db *db) getSTMOptions() []STMOptionFunc {
-	opts := []STMOptionFunc{WithAbortContext(db.config.Ctx)}
+	opts := []STMOptionFunc{
+		WithAbortContext(db.config.Ctx),
+	}

 	if db.config.CollectCommitStats {
 		opts = append(opts,
@ -221,7 +226,7 @@ func (db *db) View(f func(tx walletdb.ReadTx) error) error {
 		return f(newReadWriteTx(stm, db.config.Prefix))
 	}

-	return RunSTM(db.cli, apply, db.getSTMOptions()...)
+	return RunSTM(db.cli, apply, db.txQueue, db.getSTMOptions()...)
 }

 // Update opens a database read/write transaction and executes the function f
@ -235,7 +240,7 @@ func (db *db) Update(f func(tx walletdb.ReadWriteTx) error) error {
 		return f(newReadWriteTx(stm, db.config.Prefix))
 	}

-	return RunSTM(db.cli, apply, db.getSTMOptions()...)
+	return RunSTM(db.cli, apply, db.txQueue, db.getSTMOptions()...)
 }

 // PrintStats returns all collected stats pretty printed into a string.
@ -247,18 +252,18 @@ func (db *db) PrintStats() string {
 	return ""
 }

-// BeginReadTx opens a database read transaction.
+// BeginReadWriteTx opens a database read+write transaction.
 func (db *db) BeginReadWriteTx() (walletdb.ReadWriteTx, error) {
 	return newReadWriteTx(
-		NewSTM(db.cli, db.getSTMOptions()...),
+		NewSTM(db.cli, db.txQueue, db.getSTMOptions()...),
 		db.config.Prefix,
 	), nil
 }

-// BeginReadWriteTx opens a database read+write transaction.
+// BeginReadTx opens a database read transaction.
 func (db *db) BeginReadTx() (walletdb.ReadTx, error) {
 	return newReadWriteTx(
-		NewSTM(db.cli, db.getSTMOptions()...),
+		NewSTM(db.cli, db.txQueue, db.getSTMOptions()...),
 		db.config.Prefix,
 	), nil
 }
--- a/channeldb/kvdb/etcd/db_test.go
+++ b/channeldb/kvdb/etcd/db_test.go
@ -63,7 +63,7 @@ func TestAbortContext(t *testing.T) {
 	// Expect that the update will fail.
 	err = db.Update(func(tx walletdb.ReadWriteTx) error {
 		_, err := tx.CreateTopLevelBucket([]byte("bucket"))
-		require.NoError(t, err)
+		require.Error(t, err, "context canceled")

 		return nil
 	})
--- a/channeldb/kvdb/etcd/stm.go
+++ b/channeldb/kvdb/etcd/stm.go
@ -134,6 +134,10 @@ type stm struct {
 	// execute in the STM run loop.
 	manual bool

+	// txQueue is lightweight contention manager, which is used to detect
+	// transaction conflicts and reduce retries.
+	txQueue *commitQueue
+
 	// options stores optional settings passed by the user.
 	options *STMOptions

@ -183,18 +187,22 @@ func WithCommitStatsCallback(cb func(bool, CommitStats)) STMOptionFunc {

 // RunSTM runs the apply function by creating an STM using serializable snapshot
 // isolation, passing it to the apply and handling commit errors and retries.
-func RunSTM(cli *v3.Client, apply func(STM) error, so ...STMOptionFunc) error {
-	return runSTM(makeSTM(cli, false, so...), apply)
+func RunSTM(cli *v3.Client, apply func(STM) error, txQueue *commitQueue,
+	so ...STMOptionFunc) error {
+
+	return runSTM(makeSTM(cli, false, txQueue, so...), apply)
 }

 // NewSTM creates a new STM instance, using serializable snapshot isolation.
-func NewSTM(cli *v3.Client, so ...STMOptionFunc) STM {
-	return makeSTM(cli, true, so...)
+func NewSTM(cli *v3.Client, txQueue *commitQueue, so ...STMOptionFunc) STM {
+	return makeSTM(cli, true, txQueue, so...)
 }

 // makeSTM is the actual constructor of the stm. It first apply all passed
 // options then creates the stm object and resets it before returning.
-func makeSTM(cli *v3.Client, manual bool, so ...STMOptionFunc) *stm {
+func makeSTM(cli *v3.Client, manual bool, txQueue *commitQueue,
+	so ...STMOptionFunc) *stm {
+
 	opts := &STMOptions{
 		ctx: cli.Ctx(),
 	}
@ -207,6 +215,7 @@ func makeSTM(cli *v3.Client, manual bool, so ...STMOptionFunc) *stm {
 	s := &stm{
 		client:   cli,
 		manual:   manual,
+		txQueue:  txQueue,
 		options:  opts,
 		prefetch: make(map[string]stmGet),
 	}
@ -222,50 +231,72 @@ func makeSTM(cli *v3.Client, manual bool, so ...STMOptionFunc) *stm {
 // CommitError which is used to indicate a necessary retry.
 func runSTM(s *stm, apply func(STM) error) error {
 	var (
-		retries int
-		stats   CommitStats
-		err     error
+		retries    int
+		stats      CommitStats
+		executeErr error
 	)

-loop:
-	// In a loop try to apply and commit and roll back if the database has
-	// changed (CommitError).
-	for {
-		select {
-		// Check if the STM is aborted and break the retry loop if it is.
-		case <-s.options.ctx.Done():
-			err = fmt.Errorf("aborted")
-			break loop
+	done := make(chan struct{})

-		default:
+	execute := func() {
+		defer close(done)
+
+		for {
+			select {
+			// Check if the STM is aborted and break the retry loop
+			// if it is.
+			case <-s.options.ctx.Done():
+				executeErr = fmt.Errorf("aborted")
+				return
+
+			default:
+			}
+
+			stats, executeErr = s.commit()
+
+			// Re-apply only upon commit error (meaning the
+			// keys were changed).
+			if _, ok := executeErr.(CommitError); !ok {
+				// Anything that's not a CommitError
+				// aborts the transaction.
+				return
+			}
+
+			// Rollback before trying to re-apply.
+			s.Rollback()
+			retries++
+
+			// Re-apply the transaction closure.
+			if executeErr = apply(s); executeErr != nil {
+				return
+			}
 		}
-		// Apply the transaction closure and abort the STM if there was
-		// an application error.
-		if err = apply(s); err != nil {
-			break loop
-		}
-
-		stats, err = s.commit()
-
-		// Retry the apply closure only upon commit error (meaning the
-		// database was changed).
-		if _, ok := err.(CommitError); !ok {
-			// Anything that's not a CommitError aborts the STM
-			// run loop.
-			break loop
-		}
-
-		// Rollback before trying to re-apply.
-		s.Rollback()
-		retries++
 	}

+	// Run the tx closure to construct the read and write sets.
+	// Also we expect that if there are no conflicting transactions
+	// in the queue, then we only run apply once.
+	if preApplyErr := apply(s); preApplyErr != nil {
+		return preApplyErr
+	}
+
+	// Queue up the transaction for execution.
+	s.txQueue.Add(execute, s.rset, s.wset)
+
+	// Wait for the transaction to execute, or break if aborted.
+	select {
+	case <-done:
+	case <-s.options.ctx.Done():
+	}
+
+	s.txQueue.Done(s.rset, s.wset)
+
 	if s.options.commitStatsCallback != nil {
 		stats.Retries = retries
-		s.options.commitStatsCallback(err == nil, stats)
+		s.options.commitStatsCallback(executeErr == nil, stats)
 	}

-	return err
+	return executeErr
 }

 // add inserts a txn response to the read set. This is useful when the txn
--- a/channeldb/kvdb/etcd/stm_test.go
+++ b/channeldb/kvdb/etcd/stm_test.go
@ -21,7 +21,11 @@ func TestPutToEmpty(t *testing.T) {
 	t.Parallel()

 	f := NewEtcdTestFixture(t)
-	defer f.Cleanup()
+	txQueue := NewCommitQueue(f.config.Ctx)
+	defer func() {
+		f.Cleanup()
+		txQueue.Wait()
+	}()

 	db, err := newEtcdBackend(f.BackendConfig())
 	require.NoError(t, err)
@ -31,7 +35,7 @@ func TestPutToEmpty(t *testing.T) {
 		return nil
 	}

-	err = RunSTM(db.cli, apply)
+	err = RunSTM(db.cli, apply, txQueue)
 	require.NoError(t, err)

 	require.Equal(t, "abc", f.Get("123"))
@ -41,7 +45,11 @@ func TestGetPutDel(t *testing.T) {
 	t.Parallel()

 	f := NewEtcdTestFixture(t)
-	defer f.cleanup()
+	txQueue := NewCommitQueue(f.config.Ctx)
+	defer func() {
+		f.Cleanup()
+		txQueue.Wait()
+	}()

 	testKeyValues := []KV{
 		{"a", "1"},
@ -105,7 +113,7 @@ func TestGetPutDel(t *testing.T) {
 		return nil
 	}

-	err = RunSTM(db.cli, apply)
+	err = RunSTM(db.cli, apply, txQueue)
 	require.NoError(t, err)

 	require.Equal(t, "1", f.Get("a"))
@ -120,7 +128,11 @@ func TestFirstLastNextPrev(t *testing.T) {
 	t.Parallel()

 	f := NewEtcdTestFixture(t)
-	defer f.Cleanup()
+	txQueue := NewCommitQueue(f.config.Ctx)
+	defer func() {
+		f.Cleanup()
+		txQueue.Wait()
+	}()

 	testKeyValues := []KV{
 		{"kb", "1"},
@ -255,7 +267,7 @@ func TestFirstLastNextPrev(t *testing.T) {
 		return nil
 	}

-	err = RunSTM(db.cli, apply)
+	err = RunSTM(db.cli, apply, txQueue)
 	require.NoError(t, err)

 	require.Equal(t, "0", f.Get("ka"))
@ -271,7 +283,11 @@ func TestCommitError(t *testing.T) {
 	t.Parallel()

 	f := NewEtcdTestFixture(t)
-	defer f.Cleanup()
+	txQueue := NewCommitQueue(f.config.Ctx)
+	defer func() {
+		f.Cleanup()
+		txQueue.Wait()
+	}()

 	db, err := newEtcdBackend(f.BackendConfig())
 	require.NoError(t, err)
@ -301,7 +317,7 @@ func TestCommitError(t *testing.T) {
 		return nil
 	}

-	err = RunSTM(db.cli, apply)
+	err = RunSTM(db.cli, apply, txQueue)
 	require.NoError(t, err)
 	require.Equal(t, 2, cnt)

@ -312,7 +328,11 @@ func TestManualTxError(t *testing.T) {
 	t.Parallel()

 	f := NewEtcdTestFixture(t)
-	defer f.Cleanup()
+	txQueue := NewCommitQueue(f.config.Ctx)
+	defer func() {
+		f.Cleanup()
+		txQueue.Wait()
+	}()

 	db, err := newEtcdBackend(f.BackendConfig())
 	require.NoError(t, err)
@ -320,7 +340,7 @@ func TestManualTxError(t *testing.T) {
 	// Preset DB state.
 	f.Put("123", "xyz")

-	stm := NewSTM(db.cli)
+	stm := NewSTM(db.cli, txQueue)

 	val, err := stm.Get("123")
 	require.NoError(t, err)