You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
235 lines
6.8 KiB
235 lines
6.8 KiB
/* |
|
* Copyright 2017 Dgraph Labs, Inc. and Contributors |
|
* |
|
* Licensed under the Apache License, Version 2.0 (the "License"); |
|
* you may not use this file except in compliance with the License. |
|
* You may obtain a copy of the License at |
|
* |
|
* http://www.apache.org/licenses/LICENSE-2.0 |
|
* |
|
* Unless required by applicable law or agreed to in writing, software |
|
* distributed under the License is distributed on an "AS IS" BASIS, |
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
* See the License for the specific language governing permissions and |
|
* limitations under the License. |
|
*/ |
|
|
|
package table |
|
|
|
import ( |
|
"bytes" |
|
"encoding/binary" |
|
"io" |
|
"math" |
|
|
|
"github.com/AndreasBriese/bbloom" |
|
"github.com/dgraph-io/badger/y" |
|
) |
|
|
|
var ( |
|
restartInterval = 100 // Might want to change this to be based on total size instead of numKeys. |
|
) |
|
|
|
func newBuffer(sz int) *bytes.Buffer { |
|
b := new(bytes.Buffer) |
|
b.Grow(sz) |
|
return b |
|
} |
|
|
|
type header struct { |
|
plen uint16 // Overlap with base key. |
|
klen uint16 // Length of the diff. |
|
vlen uint16 // Length of value. |
|
prev uint32 // Offset for the previous key-value pair. The offset is relative to block base offset. |
|
} |
|
|
|
// Encode encodes the header. |
|
func (h header) Encode(b []byte) { |
|
binary.BigEndian.PutUint16(b[0:2], h.plen) |
|
binary.BigEndian.PutUint16(b[2:4], h.klen) |
|
binary.BigEndian.PutUint16(b[4:6], h.vlen) |
|
binary.BigEndian.PutUint32(b[6:10], h.prev) |
|
} |
|
|
|
// Decode decodes the header. |
|
func (h *header) Decode(buf []byte) int { |
|
h.plen = binary.BigEndian.Uint16(buf[0:2]) |
|
h.klen = binary.BigEndian.Uint16(buf[2:4]) |
|
h.vlen = binary.BigEndian.Uint16(buf[4:6]) |
|
h.prev = binary.BigEndian.Uint32(buf[6:10]) |
|
return h.Size() |
|
} |
|
|
|
// Size returns size of the header. Currently it's just a constant. |
|
func (h header) Size() int { return 10 } |
|
|
|
// Builder is used in building a table. |
|
type Builder struct { |
|
counter int // Number of keys written for the current block. |
|
|
|
// Typically tens or hundreds of meg. This is for one single file. |
|
buf *bytes.Buffer |
|
|
|
baseKey []byte // Base key for the current block. |
|
baseOffset uint32 // Offset for the current block. |
|
|
|
restarts []uint32 // Base offsets of every block. |
|
|
|
// Tracks offset for the previous key-value pair. Offset is relative to block base offset. |
|
prevOffset uint32 |
|
|
|
keyBuf *bytes.Buffer |
|
keyCount int |
|
} |
|
|
|
// NewTableBuilder makes a new TableBuilder. |
|
func NewTableBuilder() *Builder { |
|
return &Builder{ |
|
keyBuf: newBuffer(1 << 20), |
|
buf: newBuffer(1 << 20), |
|
prevOffset: math.MaxUint32, // Used for the first element! |
|
} |
|
} |
|
|
|
// Close closes the TableBuilder. |
|
func (b *Builder) Close() {} |
|
|
|
// Empty returns whether it's empty. |
|
func (b *Builder) Empty() bool { return b.buf.Len() == 0 } |
|
|
|
// keyDiff returns a suffix of newKey that is different from b.baseKey. |
|
func (b Builder) keyDiff(newKey []byte) []byte { |
|
var i int |
|
for i = 0; i < len(newKey) && i < len(b.baseKey); i++ { |
|
if newKey[i] != b.baseKey[i] { |
|
break |
|
} |
|
} |
|
return newKey[i:] |
|
} |
|
|
|
func (b *Builder) addHelper(key []byte, v y.ValueStruct) { |
|
// Add key to bloom filter. |
|
if len(key) > 0 { |
|
var klen [2]byte |
|
keyNoTs := y.ParseKey(key) |
|
binary.BigEndian.PutUint16(klen[:], uint16(len(keyNoTs))) |
|
b.keyBuf.Write(klen[:]) |
|
b.keyBuf.Write(keyNoTs) |
|
b.keyCount++ |
|
} |
|
|
|
// diffKey stores the difference of key with baseKey. |
|
var diffKey []byte |
|
if len(b.baseKey) == 0 { |
|
// Make a copy. Builder should not keep references. Otherwise, caller has to be very careful |
|
// and will have to make copies of keys every time they add to builder, which is even worse. |
|
b.baseKey = append(b.baseKey[:0], key...) |
|
diffKey = key |
|
} else { |
|
diffKey = b.keyDiff(key) |
|
} |
|
|
|
h := header{ |
|
plen: uint16(len(key) - len(diffKey)), |
|
klen: uint16(len(diffKey)), |
|
vlen: uint16(v.EncodedSize()), |
|
prev: b.prevOffset, // prevOffset is the location of the last key-value added. |
|
} |
|
b.prevOffset = uint32(b.buf.Len()) - b.baseOffset // Remember current offset for the next Add call. |
|
|
|
// Layout: header, diffKey, value. |
|
var hbuf [10]byte |
|
h.Encode(hbuf[:]) |
|
b.buf.Write(hbuf[:]) |
|
b.buf.Write(diffKey) // We only need to store the key difference. |
|
|
|
v.EncodeTo(b.buf) |
|
b.counter++ // Increment number of keys added for this current block. |
|
} |
|
|
|
func (b *Builder) finishBlock() { |
|
// When we are at the end of the block and Valid=false, and the user wants to do a Prev, |
|
// we need a dummy header to tell us the offset of the previous key-value pair. |
|
b.addHelper([]byte{}, y.ValueStruct{}) |
|
} |
|
|
|
// Add adds a key-value pair to the block. |
|
// If doNotRestart is true, we will not restart even if b.counter >= restartInterval. |
|
func (b *Builder) Add(key []byte, value y.ValueStruct) error { |
|
if b.counter >= restartInterval { |
|
b.finishBlock() |
|
// Start a new block. Initialize the block. |
|
b.restarts = append(b.restarts, uint32(b.buf.Len())) |
|
b.counter = 0 |
|
b.baseKey = []byte{} |
|
b.baseOffset = uint32(b.buf.Len()) |
|
b.prevOffset = math.MaxUint32 // First key-value pair of block has header.prev=MaxInt. |
|
} |
|
b.addHelper(key, value) |
|
return nil // Currently, there is no meaningful error. |
|
} |
|
|
|
// TODO: vvv this was the comment on ReachedCapacity. |
|
// FinalSize returns the *rough* final size of the array, counting the header which is not yet written. |
|
// TODO: Look into why there is a discrepancy. I suspect it is because of Write(empty, empty) |
|
// at the end. The diff can vary. |
|
|
|
// ReachedCapacity returns true if we... roughly (?) reached capacity? |
|
func (b *Builder) ReachedCapacity(cap int64) bool { |
|
estimateSz := b.buf.Len() + 8 /* empty header */ + 4*len(b.restarts) + 8 // 8 = end of buf offset + len(restarts). |
|
return int64(estimateSz) > cap |
|
} |
|
|
|
// blockIndex generates the block index for the table. |
|
// It is mainly a list of all the block base offsets. |
|
func (b *Builder) blockIndex() []byte { |
|
// Store the end offset, so we know the length of the final block. |
|
b.restarts = append(b.restarts, uint32(b.buf.Len())) |
|
|
|
// Add 4 because we want to write out number of restarts at the end. |
|
sz := 4*len(b.restarts) + 4 |
|
out := make([]byte, sz) |
|
buf := out |
|
for _, r := range b.restarts { |
|
binary.BigEndian.PutUint32(buf[:4], r) |
|
buf = buf[4:] |
|
} |
|
binary.BigEndian.PutUint32(buf[:4], uint32(len(b.restarts))) |
|
return out |
|
} |
|
|
|
// Finish finishes the table by appending the index. |
|
func (b *Builder) Finish() []byte { |
|
bf := bbloom.New(float64(b.keyCount), 0.01) |
|
var klen [2]byte |
|
key := make([]byte, 1024) |
|
for { |
|
if _, err := b.keyBuf.Read(klen[:]); err == io.EOF { |
|
break |
|
} else if err != nil { |
|
y.Check(err) |
|
} |
|
kl := int(binary.BigEndian.Uint16(klen[:])) |
|
if cap(key) < kl { |
|
key = make([]byte, 2*int(kl)) // 2 * uint16 will overflow |
|
} |
|
key = key[:kl] |
|
y.Check2(b.keyBuf.Read(key)) |
|
bf.Add(key) |
|
} |
|
|
|
b.finishBlock() // This will never start a new block. |
|
index := b.blockIndex() |
|
b.buf.Write(index) |
|
|
|
// Write bloom filter. |
|
bdata := bf.JSONMarshal() |
|
n, err := b.buf.Write(bdata) |
|
y.Check(err) |
|
var buf [4]byte |
|
binary.BigEndian.PutUint32(buf[:], uint32(n)) |
|
b.buf.Write(buf[:]) |
|
|
|
return b.buf.Bytes() |
|
}
|
|
|