You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
539 lines
11 KiB
539 lines
11 KiB
/* |
|
* Copyright 2017 Dgraph Labs, Inc. and Contributors |
|
* |
|
* Licensed under the Apache License, Version 2.0 (the "License"); |
|
* you may not use this file except in compliance with the License. |
|
* You may obtain a copy of the License at |
|
* |
|
* http://www.apache.org/licenses/LICENSE-2.0 |
|
* |
|
* Unless required by applicable law or agreed to in writing, software |
|
* distributed under the License is distributed on an "AS IS" BASIS, |
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
* See the License for the specific language governing permissions and |
|
* limitations under the License. |
|
*/ |
|
|
|
package table |
|
|
|
import ( |
|
"bytes" |
|
"io" |
|
"math" |
|
"sort" |
|
|
|
"github.com/dgraph-io/badger/y" |
|
"github.com/pkg/errors" |
|
) |
|
|
|
type blockIterator struct { |
|
data []byte |
|
pos uint32 |
|
err error |
|
baseKey []byte |
|
|
|
key []byte |
|
val []byte |
|
init bool |
|
|
|
last header // The last header we saw. |
|
} |
|
|
|
func (itr *blockIterator) Reset() { |
|
itr.pos = 0 |
|
itr.err = nil |
|
itr.baseKey = []byte{} |
|
itr.key = []byte{} |
|
itr.val = []byte{} |
|
itr.init = false |
|
itr.last = header{} |
|
} |
|
|
|
func (itr *blockIterator) Init() { |
|
if !itr.init { |
|
itr.Next() |
|
} |
|
} |
|
|
|
func (itr *blockIterator) Valid() bool { |
|
return itr != nil && itr.err == nil |
|
} |
|
|
|
func (itr *blockIterator) Error() error { |
|
return itr.err |
|
} |
|
|
|
func (itr *blockIterator) Close() {} |
|
|
|
var ( |
|
origin = 0 |
|
current = 1 |
|
) |
|
|
|
// Seek brings us to the first block element that is >= input key. |
|
func (itr *blockIterator) Seek(key []byte, whence int) { |
|
itr.err = nil |
|
|
|
switch whence { |
|
case origin: |
|
itr.Reset() |
|
case current: |
|
} |
|
|
|
var done bool |
|
for itr.Init(); itr.Valid(); itr.Next() { |
|
k := itr.Key() |
|
if y.CompareKeys(k, key) >= 0 { |
|
// We are done as k is >= key. |
|
done = true |
|
break |
|
} |
|
} |
|
if !done { |
|
itr.err = io.EOF |
|
} |
|
} |
|
|
|
func (itr *blockIterator) SeekToFirst() { |
|
itr.err = nil |
|
itr.Init() |
|
} |
|
|
|
// SeekToLast brings us to the last element. Valid should return true. |
|
func (itr *blockIterator) SeekToLast() { |
|
itr.err = nil |
|
for itr.Init(); itr.Valid(); itr.Next() { |
|
} |
|
itr.Prev() |
|
} |
|
|
|
// parseKV would allocate a new byte slice for key and for value. |
|
func (itr *blockIterator) parseKV(h header) { |
|
if cap(itr.key) < int(h.plen+h.klen) { |
|
sz := int(h.plen) + int(h.klen) // Convert to int before adding to avoid uint16 overflow. |
|
itr.key = make([]byte, 2*sz) |
|
} |
|
itr.key = itr.key[:h.plen+h.klen] |
|
copy(itr.key, itr.baseKey[:h.plen]) |
|
copy(itr.key[h.plen:], itr.data[itr.pos:itr.pos+uint32(h.klen)]) |
|
itr.pos += uint32(h.klen) |
|
|
|
if itr.pos+uint32(h.vlen) > uint32(len(itr.data)) { |
|
itr.err = errors.Errorf("Value exceeded size of block: %d %d %d %d %v", |
|
itr.pos, h.klen, h.vlen, len(itr.data), h) |
|
return |
|
} |
|
itr.val = y.SafeCopy(itr.val, itr.data[itr.pos:itr.pos+uint32(h.vlen)]) |
|
itr.pos += uint32(h.vlen) |
|
} |
|
|
|
func (itr *blockIterator) Next() { |
|
itr.init = true |
|
itr.err = nil |
|
if itr.pos >= uint32(len(itr.data)) { |
|
itr.err = io.EOF |
|
return |
|
} |
|
|
|
var h header |
|
itr.pos += uint32(h.Decode(itr.data[itr.pos:])) |
|
itr.last = h // Store the last header. |
|
|
|
if h.klen == 0 && h.plen == 0 { |
|
// Last entry in the table. |
|
itr.err = io.EOF |
|
return |
|
} |
|
|
|
// Populate baseKey if it isn't set yet. This would only happen for the first Next. |
|
if len(itr.baseKey) == 0 { |
|
// This should be the first Next() for this block. Hence, prefix length should be zero. |
|
y.AssertTrue(h.plen == 0) |
|
itr.baseKey = itr.data[itr.pos : itr.pos+uint32(h.klen)] |
|
} |
|
itr.parseKV(h) |
|
} |
|
|
|
func (itr *blockIterator) Prev() { |
|
if !itr.init { |
|
return |
|
} |
|
itr.err = nil |
|
if itr.last.prev == math.MaxUint32 { |
|
// This is the first element of the block! |
|
itr.err = io.EOF |
|
itr.pos = 0 |
|
return |
|
} |
|
|
|
// Move back using current header's prev. |
|
itr.pos = itr.last.prev |
|
|
|
var h header |
|
y.AssertTruef(itr.pos < uint32(len(itr.data)), "%d %d", itr.pos, len(itr.data)) |
|
itr.pos += uint32(h.Decode(itr.data[itr.pos:])) |
|
itr.parseKV(h) |
|
itr.last = h |
|
} |
|
|
|
func (itr *blockIterator) Key() []byte { |
|
if itr.err != nil { |
|
return nil |
|
} |
|
return itr.key |
|
} |
|
|
|
func (itr *blockIterator) Value() []byte { |
|
if itr.err != nil { |
|
return nil |
|
} |
|
return itr.val |
|
} |
|
|
|
// Iterator is an iterator for a Table. |
|
type Iterator struct { |
|
t *Table |
|
bpos int |
|
bi *blockIterator |
|
err error |
|
|
|
// Internally, Iterator is bidirectional. However, we only expose the |
|
// unidirectional functionality for now. |
|
reversed bool |
|
} |
|
|
|
// NewIterator returns a new iterator of the Table |
|
func (t *Table) NewIterator(reversed bool) *Iterator { |
|
t.IncrRef() // Important. |
|
ti := &Iterator{t: t, reversed: reversed} |
|
ti.next() |
|
return ti |
|
} |
|
|
|
// Close closes the iterator (and it must be called). |
|
func (itr *Iterator) Close() error { |
|
return itr.t.DecrRef() |
|
} |
|
|
|
func (itr *Iterator) reset() { |
|
itr.bpos = 0 |
|
itr.err = nil |
|
} |
|
|
|
// Valid follows the y.Iterator interface |
|
func (itr *Iterator) Valid() bool { |
|
return itr.err == nil |
|
} |
|
|
|
func (itr *Iterator) seekToFirst() { |
|
numBlocks := len(itr.t.blockIndex) |
|
if numBlocks == 0 { |
|
itr.err = io.EOF |
|
return |
|
} |
|
itr.bpos = 0 |
|
block, err := itr.t.block(itr.bpos) |
|
if err != nil { |
|
itr.err = err |
|
return |
|
} |
|
itr.bi = block.NewIterator() |
|
itr.bi.SeekToFirst() |
|
itr.err = itr.bi.Error() |
|
} |
|
|
|
func (itr *Iterator) seekToLast() { |
|
numBlocks := len(itr.t.blockIndex) |
|
if numBlocks == 0 { |
|
itr.err = io.EOF |
|
return |
|
} |
|
itr.bpos = numBlocks - 1 |
|
block, err := itr.t.block(itr.bpos) |
|
if err != nil { |
|
itr.err = err |
|
return |
|
} |
|
itr.bi = block.NewIterator() |
|
itr.bi.SeekToLast() |
|
itr.err = itr.bi.Error() |
|
} |
|
|
|
func (itr *Iterator) seekHelper(blockIdx int, key []byte) { |
|
itr.bpos = blockIdx |
|
block, err := itr.t.block(blockIdx) |
|
if err != nil { |
|
itr.err = err |
|
return |
|
} |
|
itr.bi = block.NewIterator() |
|
itr.bi.Seek(key, origin) |
|
itr.err = itr.bi.Error() |
|
} |
|
|
|
// seekFrom brings us to a key that is >= input key. |
|
func (itr *Iterator) seekFrom(key []byte, whence int) { |
|
itr.err = nil |
|
switch whence { |
|
case origin: |
|
itr.reset() |
|
case current: |
|
} |
|
|
|
idx := sort.Search(len(itr.t.blockIndex), func(idx int) bool { |
|
ko := itr.t.blockIndex[idx] |
|
return y.CompareKeys(ko.key, key) > 0 |
|
}) |
|
if idx == 0 { |
|
// The smallest key in our table is already strictly > key. We can return that. |
|
// This is like a SeekToFirst. |
|
itr.seekHelper(0, key) |
|
return |
|
} |
|
|
|
// block[idx].smallest is > key. |
|
// Since idx>0, we know block[idx-1].smallest is <= key. |
|
// There are two cases. |
|
// 1) Everything in block[idx-1] is strictly < key. In this case, we should go to the first |
|
// element of block[idx]. |
|
// 2) Some element in block[idx-1] is >= key. We should go to that element. |
|
itr.seekHelper(idx-1, key) |
|
if itr.err == io.EOF { |
|
// Case 1. Need to visit block[idx]. |
|
if idx == len(itr.t.blockIndex) { |
|
// If idx == len(itr.t.blockIndex), then input key is greater than ANY element of table. |
|
// There's nothing we can do. Valid() should return false as we seek to end of table. |
|
return |
|
} |
|
// Since block[idx].smallest is > key. This is essentially a block[idx].SeekToFirst. |
|
itr.seekHelper(idx, key) |
|
} |
|
// Case 2: No need to do anything. We already did the seek in block[idx-1]. |
|
} |
|
|
|
// seek will reset iterator and seek to >= key. |
|
func (itr *Iterator) seek(key []byte) { |
|
itr.seekFrom(key, origin) |
|
} |
|
|
|
// seekForPrev will reset iterator and seek to <= key. |
|
func (itr *Iterator) seekForPrev(key []byte) { |
|
// TODO: Optimize this. We shouldn't have to take a Prev step. |
|
itr.seekFrom(key, origin) |
|
if !bytes.Equal(itr.Key(), key) { |
|
itr.prev() |
|
} |
|
} |
|
|
|
func (itr *Iterator) next() { |
|
itr.err = nil |
|
|
|
if itr.bpos >= len(itr.t.blockIndex) { |
|
itr.err = io.EOF |
|
return |
|
} |
|
|
|
if itr.bi == nil { |
|
block, err := itr.t.block(itr.bpos) |
|
if err != nil { |
|
itr.err = err |
|
return |
|
} |
|
itr.bi = block.NewIterator() |
|
itr.bi.SeekToFirst() |
|
itr.err = itr.bi.Error() |
|
return |
|
} |
|
|
|
itr.bi.Next() |
|
if !itr.bi.Valid() { |
|
itr.bpos++ |
|
itr.bi = nil |
|
itr.next() |
|
return |
|
} |
|
} |
|
|
|
func (itr *Iterator) prev() { |
|
itr.err = nil |
|
if itr.bpos < 0 { |
|
itr.err = io.EOF |
|
return |
|
} |
|
|
|
if itr.bi == nil { |
|
block, err := itr.t.block(itr.bpos) |
|
if err != nil { |
|
itr.err = err |
|
return |
|
} |
|
itr.bi = block.NewIterator() |
|
itr.bi.SeekToLast() |
|
itr.err = itr.bi.Error() |
|
return |
|
} |
|
|
|
itr.bi.Prev() |
|
if !itr.bi.Valid() { |
|
itr.bpos-- |
|
itr.bi = nil |
|
itr.prev() |
|
return |
|
} |
|
} |
|
|
|
// Key follows the y.Iterator interface |
|
func (itr *Iterator) Key() []byte { |
|
return itr.bi.Key() |
|
} |
|
|
|
// Value follows the y.Iterator interface |
|
func (itr *Iterator) Value() (ret y.ValueStruct) { |
|
ret.Decode(itr.bi.Value()) |
|
return |
|
} |
|
|
|
// Next follows the y.Iterator interface |
|
func (itr *Iterator) Next() { |
|
if !itr.reversed { |
|
itr.next() |
|
} else { |
|
itr.prev() |
|
} |
|
} |
|
|
|
// Rewind follows the y.Iterator interface |
|
func (itr *Iterator) Rewind() { |
|
if !itr.reversed { |
|
itr.seekToFirst() |
|
} else { |
|
itr.seekToLast() |
|
} |
|
} |
|
|
|
// Seek follows the y.Iterator interface |
|
func (itr *Iterator) Seek(key []byte) { |
|
if !itr.reversed { |
|
itr.seek(key) |
|
} else { |
|
itr.seekForPrev(key) |
|
} |
|
} |
|
|
|
// ConcatIterator concatenates the sequences defined by several iterators. (It only works with |
|
// TableIterators, probably just because it's faster to not be so generic.) |
|
type ConcatIterator struct { |
|
idx int // Which iterator is active now. |
|
cur *Iterator |
|
iters []*Iterator // Corresponds to tables. |
|
tables []*Table // Disregarding reversed, this is in ascending order. |
|
reversed bool |
|
} |
|
|
|
// NewConcatIterator creates a new concatenated iterator |
|
func NewConcatIterator(tbls []*Table, reversed bool) *ConcatIterator { |
|
iters := make([]*Iterator, len(tbls)) |
|
for i := 0; i < len(tbls); i++ { |
|
iters[i] = tbls[i].NewIterator(reversed) |
|
} |
|
return &ConcatIterator{ |
|
reversed: reversed, |
|
iters: iters, |
|
tables: tbls, |
|
idx: -1, // Not really necessary because s.it.Valid()=false, but good to have. |
|
} |
|
} |
|
|
|
func (s *ConcatIterator) setIdx(idx int) { |
|
s.idx = idx |
|
if idx < 0 || idx >= len(s.iters) { |
|
s.cur = nil |
|
} else { |
|
s.cur = s.iters[s.idx] |
|
} |
|
} |
|
|
|
// Rewind implements y.Interface |
|
func (s *ConcatIterator) Rewind() { |
|
if len(s.iters) == 0 { |
|
return |
|
} |
|
if !s.reversed { |
|
s.setIdx(0) |
|
} else { |
|
s.setIdx(len(s.iters) - 1) |
|
} |
|
s.cur.Rewind() |
|
} |
|
|
|
// Valid implements y.Interface |
|
func (s *ConcatIterator) Valid() bool { |
|
return s.cur != nil && s.cur.Valid() |
|
} |
|
|
|
// Key implements y.Interface |
|
func (s *ConcatIterator) Key() []byte { |
|
return s.cur.Key() |
|
} |
|
|
|
// Value implements y.Interface |
|
func (s *ConcatIterator) Value() y.ValueStruct { |
|
return s.cur.Value() |
|
} |
|
|
|
// Seek brings us to element >= key if reversed is false. Otherwise, <= key. |
|
func (s *ConcatIterator) Seek(key []byte) { |
|
var idx int |
|
if !s.reversed { |
|
idx = sort.Search(len(s.tables), func(i int) bool { |
|
return y.CompareKeys(s.tables[i].Biggest(), key) >= 0 |
|
}) |
|
} else { |
|
n := len(s.tables) |
|
idx = n - 1 - sort.Search(n, func(i int) bool { |
|
return y.CompareKeys(s.tables[n-1-i].Smallest(), key) <= 0 |
|
}) |
|
} |
|
if idx >= len(s.tables) || idx < 0 { |
|
s.setIdx(-1) |
|
return |
|
} |
|
// For reversed=false, we know s.tables[i-1].Biggest() < key. Thus, the |
|
// previous table cannot possibly contain key. |
|
s.setIdx(idx) |
|
s.cur.Seek(key) |
|
} |
|
|
|
// Next advances our concat iterator. |
|
func (s *ConcatIterator) Next() { |
|
s.cur.Next() |
|
if s.cur.Valid() { |
|
// Nothing to do. Just stay with the current table. |
|
return |
|
} |
|
for { // In case there are empty tables. |
|
if !s.reversed { |
|
s.setIdx(s.idx + 1) |
|
} else { |
|
s.setIdx(s.idx - 1) |
|
} |
|
if s.cur == nil { |
|
// End of list. Valid will become false. |
|
return |
|
} |
|
s.cur.Rewind() |
|
if s.cur.Valid() { |
|
break |
|
} |
|
} |
|
} |
|
|
|
// Close implements y.Interface. |
|
func (s *ConcatIterator) Close() error { |
|
for _, it := range s.iters { |
|
if err := it.Close(); err != nil { |
|
return errors.Wrap(err, "ConcatIterator") |
|
} |
|
} |
|
return nil |
|
}
|
|
|