You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
302 lines
8.3 KiB
302 lines
8.3 KiB
// Copyright (c) 2012, Suryandaru Triandana <[email protected]> |
|
// All rights reserved. |
|
// |
|
// Use of this source code is governed by a BSD-style license that can be |
|
// found in the LICENSE file. |
|
|
|
package leveldb |
|
|
|
import ( |
|
"sync/atomic" |
|
|
|
"github.com/syndtr/goleveldb/leveldb/iterator" |
|
"github.com/syndtr/goleveldb/leveldb/memdb" |
|
"github.com/syndtr/goleveldb/leveldb/opt" |
|
) |
|
|
|
func (s *session) pickMemdbLevel(umin, umax []byte, maxLevel int) int { |
|
v := s.version() |
|
defer v.release() |
|
return v.pickMemdbLevel(umin, umax, maxLevel) |
|
} |
|
|
|
func (s *session) flushMemdb(rec *sessionRecord, mdb *memdb.DB, maxLevel int) (int, error) { |
|
// Create sorted table. |
|
iter := mdb.NewIterator(nil) |
|
defer iter.Release() |
|
t, n, err := s.tops.createFrom(iter) |
|
if err != nil { |
|
return 0, err |
|
} |
|
|
|
// Pick level other than zero can cause compaction issue with large |
|
// bulk insert and delete on strictly incrementing key-space. The |
|
// problem is that the small deletion markers trapped at lower level, |
|
// while key/value entries keep growing at higher level. Since the |
|
// key-space is strictly incrementing it will not overlaps with |
|
// higher level, thus maximum possible level is always picked, while |
|
// overlapping deletion marker pushed into lower level. |
|
// See: https://github.com/syndtr/goleveldb/issues/127. |
|
flushLevel := s.pickMemdbLevel(t.imin.ukey(), t.imax.ukey(), maxLevel) |
|
rec.addTableFile(flushLevel, t) |
|
|
|
s.logf("memdb@flush created L%d@%d N·%d S·%s %q:%q", flushLevel, t.fd.Num, n, shortenb(int(t.size)), t.imin, t.imax) |
|
return flushLevel, nil |
|
} |
|
|
|
// Pick a compaction based on current state; need external synchronization. |
|
func (s *session) pickCompaction() *compaction { |
|
v := s.version() |
|
|
|
var sourceLevel int |
|
var t0 tFiles |
|
if v.cScore >= 1 { |
|
sourceLevel = v.cLevel |
|
cptr := s.getCompPtr(sourceLevel) |
|
tables := v.levels[sourceLevel] |
|
for _, t := range tables { |
|
if cptr == nil || s.icmp.Compare(t.imax, cptr) > 0 { |
|
t0 = append(t0, t) |
|
break |
|
} |
|
} |
|
if len(t0) == 0 { |
|
t0 = append(t0, tables[0]) |
|
} |
|
} else { |
|
if p := atomic.LoadPointer(&v.cSeek); p != nil { |
|
ts := (*tSet)(p) |
|
sourceLevel = ts.level |
|
t0 = append(t0, ts.table) |
|
} else { |
|
v.release() |
|
return nil |
|
} |
|
} |
|
|
|
return newCompaction(s, v, sourceLevel, t0) |
|
} |
|
|
|
// Create compaction from given level and range; need external synchronization. |
|
func (s *session) getCompactionRange(sourceLevel int, umin, umax []byte, noLimit bool) *compaction { |
|
v := s.version() |
|
|
|
if sourceLevel >= len(v.levels) { |
|
v.release() |
|
return nil |
|
} |
|
|
|
t0 := v.levels[sourceLevel].getOverlaps(nil, s.icmp, umin, umax, sourceLevel == 0) |
|
if len(t0) == 0 { |
|
v.release() |
|
return nil |
|
} |
|
|
|
// Avoid compacting too much in one shot in case the range is large. |
|
// But we cannot do this for level-0 since level-0 files can overlap |
|
// and we must not pick one file and drop another older file if the |
|
// two files overlap. |
|
if !noLimit && sourceLevel > 0 { |
|
limit := int64(v.s.o.GetCompactionSourceLimit(sourceLevel)) |
|
total := int64(0) |
|
for i, t := range t0 { |
|
total += t.size |
|
if total >= limit { |
|
s.logf("table@compaction limiting F·%d -> F·%d", len(t0), i+1) |
|
t0 = t0[:i+1] |
|
break |
|
} |
|
} |
|
} |
|
|
|
return newCompaction(s, v, sourceLevel, t0) |
|
} |
|
|
|
func newCompaction(s *session, v *version, sourceLevel int, t0 tFiles) *compaction { |
|
c := &compaction{ |
|
s: s, |
|
v: v, |
|
sourceLevel: sourceLevel, |
|
levels: [2]tFiles{t0, nil}, |
|
maxGPOverlaps: int64(s.o.GetCompactionGPOverlaps(sourceLevel)), |
|
tPtrs: make([]int, len(v.levels)), |
|
} |
|
c.expand() |
|
c.save() |
|
return c |
|
} |
|
|
|
// compaction represent a compaction state. |
|
type compaction struct { |
|
s *session |
|
v *version |
|
|
|
sourceLevel int |
|
levels [2]tFiles |
|
maxGPOverlaps int64 |
|
|
|
gp tFiles |
|
gpi int |
|
seenKey bool |
|
gpOverlappedBytes int64 |
|
imin, imax internalKey |
|
tPtrs []int |
|
released bool |
|
|
|
snapGPI int |
|
snapSeenKey bool |
|
snapGPOverlappedBytes int64 |
|
snapTPtrs []int |
|
} |
|
|
|
func (c *compaction) save() { |
|
c.snapGPI = c.gpi |
|
c.snapSeenKey = c.seenKey |
|
c.snapGPOverlappedBytes = c.gpOverlappedBytes |
|
c.snapTPtrs = append(c.snapTPtrs[:0], c.tPtrs...) |
|
} |
|
|
|
func (c *compaction) restore() { |
|
c.gpi = c.snapGPI |
|
c.seenKey = c.snapSeenKey |
|
c.gpOverlappedBytes = c.snapGPOverlappedBytes |
|
c.tPtrs = append(c.tPtrs[:0], c.snapTPtrs...) |
|
} |
|
|
|
func (c *compaction) release() { |
|
if !c.released { |
|
c.released = true |
|
c.v.release() |
|
} |
|
} |
|
|
|
// Expand compacted tables; need external synchronization. |
|
func (c *compaction) expand() { |
|
limit := int64(c.s.o.GetCompactionExpandLimit(c.sourceLevel)) |
|
vt0 := c.v.levels[c.sourceLevel] |
|
vt1 := tFiles{} |
|
if level := c.sourceLevel + 1; level < len(c.v.levels) { |
|
vt1 = c.v.levels[level] |
|
} |
|
|
|
t0, t1 := c.levels[0], c.levels[1] |
|
imin, imax := t0.getRange(c.s.icmp) |
|
// We expand t0 here just incase ukey hop across tables. |
|
t0 = vt0.getOverlaps(t0, c.s.icmp, imin.ukey(), imax.ukey(), c.sourceLevel == 0) |
|
if len(t0) != len(c.levels[0]) { |
|
imin, imax = t0.getRange(c.s.icmp) |
|
} |
|
t1 = vt1.getOverlaps(t1, c.s.icmp, imin.ukey(), imax.ukey(), false) |
|
// Get entire range covered by compaction. |
|
amin, amax := append(t0, t1...).getRange(c.s.icmp) |
|
|
|
// See if we can grow the number of inputs in "sourceLevel" without |
|
// changing the number of "sourceLevel+1" files we pick up. |
|
if len(t1) > 0 { |
|
exp0 := vt0.getOverlaps(nil, c.s.icmp, amin.ukey(), amax.ukey(), c.sourceLevel == 0) |
|
if len(exp0) > len(t0) && t1.size()+exp0.size() < limit { |
|
xmin, xmax := exp0.getRange(c.s.icmp) |
|
exp1 := vt1.getOverlaps(nil, c.s.icmp, xmin.ukey(), xmax.ukey(), false) |
|
if len(exp1) == len(t1) { |
|
c.s.logf("table@compaction expanding L%d+L%d (F·%d S·%s)+(F·%d S·%s) -> (F·%d S·%s)+(F·%d S·%s)", |
|
c.sourceLevel, c.sourceLevel+1, len(t0), shortenb(int(t0.size())), len(t1), shortenb(int(t1.size())), |
|
len(exp0), shortenb(int(exp0.size())), len(exp1), shortenb(int(exp1.size()))) |
|
imin, imax = xmin, xmax |
|
t0, t1 = exp0, exp1 |
|
amin, amax = append(t0, t1...).getRange(c.s.icmp) |
|
} |
|
} |
|
} |
|
|
|
// Compute the set of grandparent files that overlap this compaction |
|
// (parent == sourceLevel+1; grandparent == sourceLevel+2) |
|
if level := c.sourceLevel + 2; level < len(c.v.levels) { |
|
c.gp = c.v.levels[level].getOverlaps(c.gp, c.s.icmp, amin.ukey(), amax.ukey(), false) |
|
} |
|
|
|
c.levels[0], c.levels[1] = t0, t1 |
|
c.imin, c.imax = imin, imax |
|
} |
|
|
|
// Check whether compaction is trivial. |
|
func (c *compaction) trivial() bool { |
|
return len(c.levels[0]) == 1 && len(c.levels[1]) == 0 && c.gp.size() <= c.maxGPOverlaps |
|
} |
|
|
|
func (c *compaction) baseLevelForKey(ukey []byte) bool { |
|
for level := c.sourceLevel + 2; level < len(c.v.levels); level++ { |
|
tables := c.v.levels[level] |
|
for c.tPtrs[level] < len(tables) { |
|
t := tables[c.tPtrs[level]] |
|
if c.s.icmp.uCompare(ukey, t.imax.ukey()) <= 0 { |
|
// We've advanced far enough. |
|
if c.s.icmp.uCompare(ukey, t.imin.ukey()) >= 0 { |
|
// Key falls in this file's range, so definitely not base level. |
|
return false |
|
} |
|
break |
|
} |
|
c.tPtrs[level]++ |
|
} |
|
} |
|
return true |
|
} |
|
|
|
func (c *compaction) shouldStopBefore(ikey internalKey) bool { |
|
for ; c.gpi < len(c.gp); c.gpi++ { |
|
gp := c.gp[c.gpi] |
|
if c.s.icmp.Compare(ikey, gp.imax) <= 0 { |
|
break |
|
} |
|
if c.seenKey { |
|
c.gpOverlappedBytes += gp.size |
|
} |
|
} |
|
c.seenKey = true |
|
|
|
if c.gpOverlappedBytes > c.maxGPOverlaps { |
|
// Too much overlap for current output; start new output. |
|
c.gpOverlappedBytes = 0 |
|
return true |
|
} |
|
return false |
|
} |
|
|
|
// Creates an iterator. |
|
func (c *compaction) newIterator() iterator.Iterator { |
|
// Creates iterator slice. |
|
icap := len(c.levels) |
|
if c.sourceLevel == 0 { |
|
// Special case for level-0. |
|
icap = len(c.levels[0]) + 1 |
|
} |
|
its := make([]iterator.Iterator, 0, icap) |
|
|
|
// Options. |
|
ro := &opt.ReadOptions{ |
|
DontFillCache: true, |
|
Strict: opt.StrictOverride, |
|
} |
|
strict := c.s.o.GetStrict(opt.StrictCompaction) |
|
if strict { |
|
ro.Strict |= opt.StrictReader |
|
} |
|
|
|
for i, tables := range c.levels { |
|
if len(tables) == 0 { |
|
continue |
|
} |
|
|
|
// Level-0 is not sorted and may overlaps each other. |
|
if c.sourceLevel+i == 0 { |
|
for _, t := range tables { |
|
its = append(its, c.s.tops.newIterator(t, nil, ro)) |
|
} |
|
} else { |
|
it := iterator.NewIndexedIterator(tables.newIndexIterator(c.s.tops, c.s.icmp, nil, ro), strict) |
|
its = append(its, it) |
|
} |
|
} |
|
|
|
return iterator.NewMergedIterator(its, c.s.icmp, strict) |
|
}
|
|
|