You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
150 lines
4.0 KiB
150 lines
4.0 KiB
// +build kvdb_etcd |
|
|
|
package etcd |
|
|
|
import ( |
|
"context" |
|
"sync" |
|
) |
|
|
|
// commitQueueSize is the maximum number of commits we let to queue up. All |
|
// remaining commits will block on commitQueue.Add(). |
|
const commitQueueSize = 100 |
|
|
|
// commitQueue is a simple execution queue to manage conflicts for transactions |
|
// and thereby reduce the number of times conflicting transactions need to be |
|
// retried. When a new transaction is added to the queue, we first upgrade the |
|
// read/write counts in the queue's own accounting to decide whether the new |
|
// transaction has any conflicting dependencies. If the transaction does not |
|
// conflict with any other, then it is comitted immediately, otherwise it'll be |
|
// queued up for later exection. |
|
// The algorithm is described in: http://www.cs.umd.edu/~abadi/papers/vll-vldb13.pdf |
|
type commitQueue struct { |
|
ctx context.Context |
|
mx sync.Mutex |
|
readerMap map[string]int |
|
writerMap map[string]int |
|
|
|
commitMutex sync.RWMutex |
|
queue chan (func()) |
|
wg sync.WaitGroup |
|
} |
|
|
|
// NewCommitQueue creates a new commit queue, with the passed abort context. |
|
func NewCommitQueue(ctx context.Context) *commitQueue { |
|
q := &commitQueue{ |
|
ctx: ctx, |
|
readerMap: make(map[string]int), |
|
writerMap: make(map[string]int), |
|
queue: make(chan func(), commitQueueSize), |
|
} |
|
|
|
// Start the queue consumer loop. |
|
q.wg.Add(1) |
|
go q.mainLoop() |
|
|
|
return q |
|
} |
|
|
|
// Wait waits for the queue to stop (after the queue context has been canceled). |
|
func (c *commitQueue) Wait() { |
|
c.wg.Wait() |
|
} |
|
|
|
// Add increases lock counts and queues up tx commit closure for execution. |
|
// Transactions that don't have any conflicts are executed immediately by |
|
// "downgrading" the count mutex to allow concurrency. |
|
func (c *commitQueue) Add(commitLoop func(), rset readSet, wset writeSet) { |
|
c.mx.Lock() |
|
blocked := false |
|
|
|
// Mark as blocked if there's any writer changing any of the keys in |
|
// the read set. Do not increment the reader counts yet as we'll need to |
|
// use the original reader counts when scanning through the write set. |
|
for key := range rset { |
|
if c.writerMap[key] > 0 { |
|
blocked = true |
|
break |
|
} |
|
} |
|
|
|
// Mark as blocked if there's any writer or reader for any of the keys |
|
// in the write set. |
|
for key := range wset { |
|
blocked = blocked || c.readerMap[key] > 0 || c.writerMap[key] > 0 |
|
|
|
// Increment the writer count. |
|
c.writerMap[key] += 1 |
|
} |
|
|
|
// Finally we can increment the reader counts for keys in the read set. |
|
for key := range rset { |
|
c.readerMap[key] += 1 |
|
} |
|
|
|
if blocked { |
|
// Add the transaction to the queue if conflicts with an already |
|
// queued one. |
|
c.mx.Unlock() |
|
|
|
select { |
|
case c.queue <- commitLoop: |
|
case <-c.ctx.Done(): |
|
} |
|
} else { |
|
// To make sure we don't add a new tx to the queue that depends |
|
// on this "unblocked" tx, grab the commitMutex before lifting |
|
// the mutex guarding the lock maps. |
|
c.commitMutex.RLock() |
|
c.mx.Unlock() |
|
|
|
// At this point we're safe to execute the "unblocked" tx, as |
|
// we cannot execute blocked tx that may have been read from the |
|
// queue until the commitMutex is held. |
|
commitLoop() |
|
|
|
c.commitMutex.RUnlock() |
|
} |
|
} |
|
|
|
// Done decreases lock counts of the keys in the read/write sets. |
|
func (c *commitQueue) Done(rset readSet, wset writeSet) { |
|
c.mx.Lock() |
|
defer c.mx.Unlock() |
|
|
|
for key := range rset { |
|
c.readerMap[key] -= 1 |
|
if c.readerMap[key] == 0 { |
|
delete(c.readerMap, key) |
|
} |
|
} |
|
|
|
for key := range wset { |
|
c.writerMap[key] -= 1 |
|
if c.writerMap[key] == 0 { |
|
delete(c.writerMap, key) |
|
} |
|
} |
|
} |
|
|
|
// mainLoop executes queued transaction commits for transactions that have |
|
// dependencies. The queue ensures that the top element doesn't conflict with |
|
// any other transactions and therefore can be executed freely. |
|
func (c *commitQueue) mainLoop() { |
|
defer c.wg.Done() |
|
|
|
for { |
|
select { |
|
case top := <-c.queue: |
|
// Execute the next blocked transaction. As it is |
|
// the top element in the queue it means that it doesn't |
|
// depend on any other transactions anymore. |
|
c.commitMutex.Lock() |
|
top() |
|
c.commitMutex.Unlock() |
|
|
|
case <-c.ctx.Done(): |
|
return |
|
} |
|
} |
|
}
|
|
|