You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
333 lines
10 KiB
333 lines
10 KiB
// Copyright (C) 2015 The GoHBase Authors. All rights reserved. |
|
// This file is part of GoHBase. |
|
// Use of this source code is governed by the Apache License 2.0 |
|
// that can be found in the COPYING file. |
|
|
|
package hrpc |
|
|
|
import ( |
|
"context" |
|
"errors" |
|
"fmt" |
|
"math" |
|
|
|
"github.com/golang/protobuf/proto" |
|
"github.com/tsuna/gohbase/pb" |
|
) |
|
|
|
const ( |
|
// DefaultMaxVersions defualt value for maximum versions to return for scan queries |
|
DefaultMaxVersions uint32 = 1 |
|
// MinTimestamp default value for minimum timestamp for scan queries |
|
MinTimestamp uint64 = 0 |
|
// MaxTimestamp default value for maximum timestamp for scan queries |
|
MaxTimestamp = math.MaxUint64 |
|
// DefaultMaxResultSize Maximum number of bytes fetched when calling a scanner's |
|
// next method. The default value is 2MB, which is good for 1ge networks. |
|
// With faster and/or high latency networks this value should be increased. |
|
DefaultMaxResultSize = 2097152 |
|
// DefaultNumberOfRows is default maximum number of rows fetched by scanner |
|
DefaultNumberOfRows = math.MaxInt32 |
|
// DefaultMaxResultsPerColumnFamily is the default max number of cells fetched |
|
// per column family for each row |
|
DefaultMaxResultsPerColumnFamily = math.MaxInt32 |
|
) |
|
|
|
// Scanner is used to read data sequentially from HBase. |
|
// Scanner will be automatically closed if there's no more data to read, |
|
// otherwise Close method should be called. |
|
type Scanner interface { |
|
// Next returns a row at a time. |
|
// Once all rows are returned, subsequent calls will return io.EOF error. |
|
// |
|
// In case of an error, only the first call to Next() will return partial |
|
// result (could be not a complete row) and the actual error, |
|
// the subsequent calls will return io.EOF error. |
|
Next() (*Result, error) |
|
|
|
// Close should be called if it is desired to stop scanning before getting all of results. |
|
// If you call Next() after calling Close() you might still get buffered results. |
|
// Othwerwise, in case all results have been delivered or in case of an error, the Scanner |
|
// will be closed automatically. |
|
Close() error |
|
} |
|
|
|
// Scan represents a scanner on an HBase table. |
|
type Scan struct { |
|
base |
|
baseQuery |
|
|
|
startRow []byte |
|
stopRow []byte |
|
|
|
scannerID uint64 |
|
|
|
maxResultSize uint64 |
|
numberOfRows uint32 |
|
reversed bool |
|
|
|
closeScanner bool |
|
allowPartialResults bool |
|
} |
|
|
|
// baseScan returns a Scan struct with default values set. |
|
func baseScan(ctx context.Context, table []byte, |
|
options ...func(Call) error) (*Scan, error) { |
|
s := &Scan{ |
|
base: base{ |
|
table: table, |
|
ctx: ctx, |
|
resultch: make(chan RPCResult, 1), |
|
}, |
|
baseQuery: newBaseQuery(), |
|
scannerID: math.MaxUint64, |
|
maxResultSize: DefaultMaxResultSize, |
|
numberOfRows: DefaultNumberOfRows, |
|
reversed: false, |
|
} |
|
err := applyOptions(s, options...) |
|
if err != nil { |
|
return nil, err |
|
} |
|
return s, nil |
|
} |
|
|
|
func (s *Scan) String() string { |
|
return fmt.Sprintf("Scan{Table=%q StartRow=%q StopRow=%q TimeRange=(%d, %d) "+ |
|
"MaxVersions=%d NumberOfRows=%d MaxResultSize=%d Familes=%v Filter=%v "+ |
|
"StoreLimit=%d StoreOffset=%d ScannerID=%d Close=%v}", |
|
s.table, s.startRow, s.stopRow, s.fromTimestamp, s.toTimestamp, |
|
s.maxVersions, s.numberOfRows, s.maxResultSize, s.families, s.filter, |
|
s.storeLimit, s.storeOffset, s.scannerID, s.closeScanner) |
|
} |
|
|
|
// NewScan creates a scanner for the given table. |
|
func NewScan(ctx context.Context, table []byte, options ...func(Call) error) (*Scan, error) { |
|
return baseScan(ctx, table, options...) |
|
} |
|
|
|
// NewScanRange creates a scanner for the given table and key range. |
|
// The range is half-open, i.e. [startRow; stopRow[ -- stopRow is not |
|
// included in the range. |
|
func NewScanRange(ctx context.Context, table, startRow, stopRow []byte, |
|
options ...func(Call) error) (*Scan, error) { |
|
scan, err := baseScan(ctx, table, options...) |
|
if err != nil { |
|
return nil, err |
|
} |
|
scan.startRow = startRow |
|
scan.stopRow = stopRow |
|
scan.key = startRow |
|
return scan, nil |
|
} |
|
|
|
// NewScanStr creates a scanner for the given table. |
|
func NewScanStr(ctx context.Context, table string, options ...func(Call) error) (*Scan, error) { |
|
return NewScan(ctx, []byte(table), options...) |
|
} |
|
|
|
// NewScanRangeStr creates a scanner for the given table and key range. |
|
// The range is half-open, i.e. [startRow; stopRow[ -- stopRow is not |
|
// included in the range. |
|
func NewScanRangeStr(ctx context.Context, table, startRow, stopRow string, |
|
options ...func(Call) error) (*Scan, error) { |
|
return NewScanRange(ctx, []byte(table), []byte(startRow), []byte(stopRow), options...) |
|
} |
|
|
|
// Name returns the name of this RPC call. |
|
func (s *Scan) Name() string { |
|
return "Scan" |
|
} |
|
|
|
// StopRow returns the end key (exclusive) of this scanner. |
|
func (s *Scan) StopRow() []byte { |
|
return s.stopRow |
|
} |
|
|
|
// StartRow returns the start key (inclusive) of this scanner. |
|
func (s *Scan) StartRow() []byte { |
|
return s.startRow |
|
} |
|
|
|
// IsClosing returns wether this scan closes scanner prematurely |
|
func (s *Scan) IsClosing() bool { |
|
return s.closeScanner |
|
} |
|
|
|
// AllowPartialResults returns true if client handles partials. |
|
func (s *Scan) AllowPartialResults() bool { |
|
return s.allowPartialResults |
|
} |
|
|
|
// Reversed returns true if scanner scans in reverse. |
|
func (s *Scan) Reversed() bool { |
|
return s.reversed |
|
} |
|
|
|
// NumberOfRows returns how many rows this scan |
|
// fetches from regionserver in a single response. |
|
func (s *Scan) NumberOfRows() uint32 { |
|
return s.numberOfRows |
|
} |
|
|
|
// ToProto converts this Scan into a protobuf message |
|
func (s *Scan) ToProto() proto.Message { |
|
scan := &pb.ScanRequest{ |
|
Region: s.regionSpecifier(), |
|
CloseScanner: &s.closeScanner, |
|
NumberOfRows: &s.numberOfRows, |
|
// tell server that we can process results that are only part of a row |
|
ClientHandlesPartials: proto.Bool(true), |
|
// tell server that we "handle" heartbeats by ignoring them |
|
// since we don't really time out our scans (unless context was cancelled) |
|
ClientHandlesHeartbeats: proto.Bool(true), |
|
} |
|
if s.scannerID != math.MaxUint64 { |
|
scan.ScannerId = &s.scannerID |
|
return scan |
|
} |
|
scan.Scan = &pb.Scan{ |
|
Column: familiesToColumn(s.families), |
|
StartRow: s.startRow, |
|
StopRow: s.stopRow, |
|
TimeRange: &pb.TimeRange{}, |
|
MaxResultSize: &s.maxResultSize, |
|
} |
|
if s.maxVersions != DefaultMaxVersions { |
|
scan.Scan.MaxVersions = &s.maxVersions |
|
} |
|
|
|
/* added support for limit number of cells per row */ |
|
if s.storeLimit != DefaultMaxResultsPerColumnFamily { |
|
scan.Scan.StoreLimit = &s.storeLimit |
|
} |
|
if s.storeOffset != 0 { |
|
scan.Scan.StoreOffset = &s.storeOffset |
|
} |
|
|
|
if s.fromTimestamp != MinTimestamp { |
|
scan.Scan.TimeRange.From = &s.fromTimestamp |
|
} |
|
if s.toTimestamp != MaxTimestamp { |
|
scan.Scan.TimeRange.To = &s.toTimestamp |
|
} |
|
if s.reversed { |
|
scan.Scan.Reversed = &s.reversed |
|
} |
|
scan.Scan.Filter = s.filter |
|
return scan |
|
} |
|
|
|
// NewResponse creates an empty protobuf message to read the response |
|
// of this RPC. |
|
func (s *Scan) NewResponse() proto.Message { |
|
return &pb.ScanResponse{} |
|
} |
|
|
|
// DeserializeCellBlocks deserializes scan results from cell blocks |
|
func (s *Scan) DeserializeCellBlocks(m proto.Message, b []byte) (uint32, error) { |
|
scanResp := m.(*pb.ScanResponse) |
|
partials := scanResp.GetPartialFlagPerResult() |
|
scanResp.Results = make([]*pb.Result, len(partials)) |
|
var readLen uint32 |
|
for i, numCells := range scanResp.GetCellsPerResult() { |
|
cells, l, err := deserializeCellBlocks(b[readLen:], numCells) |
|
if err != nil { |
|
return 0, err |
|
} |
|
scanResp.Results[i] = &pb.Result{ |
|
Cell: cells, |
|
Partial: proto.Bool(partials[i]), |
|
} |
|
readLen += l |
|
} |
|
return readLen, nil |
|
} |
|
|
|
// ScannerID is an option for scan requests. |
|
// This is an internal option to fetch the next set of results for an ongoing scan. |
|
func ScannerID(id uint64) func(Call) error { |
|
return func(s Call) error { |
|
scan, ok := s.(*Scan) |
|
if !ok { |
|
return errors.New("'ScannerID' option can only be used with Scan queries") |
|
} |
|
scan.scannerID = id |
|
return nil |
|
} |
|
} |
|
|
|
// CloseScanner is an option for scan requests. |
|
// Closes scanner after the first result is returned. This is an internal option |
|
// but could be useful if you know that your scan result fits into one response |
|
// in order to save an extra request. |
|
func CloseScanner() func(Call) error { |
|
return func(s Call) error { |
|
scan, ok := s.(*Scan) |
|
if !ok { |
|
return errors.New("'Close' option can only be used with Scan queries") |
|
} |
|
scan.closeScanner = true |
|
return nil |
|
} |
|
} |
|
|
|
// MaxResultSize is an option for scan requests. |
|
// Maximum number of bytes fetched when calling a scanner's next method. |
|
// MaxResultSize takes priority over NumberOfRows. |
|
func MaxResultSize(n uint64) func(Call) error { |
|
return func(g Call) error { |
|
scan, ok := g.(*Scan) |
|
if !ok { |
|
return errors.New("'MaxResultSize' option can only be used with Scan queries") |
|
} |
|
if n == 0 { |
|
return errors.New("'MaxResultSize' option must be greater than 0") |
|
} |
|
scan.maxResultSize = n |
|
return nil |
|
} |
|
} |
|
|
|
// NumberOfRows is an option for scan requests. |
|
// Specifies how many rows are fetched with each request to regionserver. |
|
// Should be > 0, avoid extremely low values such as 1 because a request |
|
// to regionserver will be made for every row. |
|
func NumberOfRows(n uint32) func(Call) error { |
|
return func(g Call) error { |
|
scan, ok := g.(*Scan) |
|
if !ok { |
|
return errors.New("'NumberOfRows' option can only be used with Scan queries") |
|
} |
|
scan.numberOfRows = n |
|
return nil |
|
} |
|
} |
|
|
|
// AllowPartialResults is an option for scan requests. |
|
// This option should be provided if the client has really big rows and |
|
// wants to avoid OOM errors on her side. With this option provided, Next() |
|
// will return partial rows. |
|
func AllowPartialResults() func(Call) error { |
|
return func(g Call) error { |
|
scan, ok := g.(*Scan) |
|
if !ok { |
|
return errors.New("'AllowPartialResults' option can only be used with Scan queries") |
|
} |
|
scan.allowPartialResults = true |
|
return nil |
|
} |
|
} |
|
|
|
// Reversed is a Scan-only option which allows you to scan in reverse key order |
|
// To use it the startKey would be greater than the end key |
|
func Reversed() func(Call) error { |
|
return func(g Call) error { |
|
scan, ok := g.(*Scan) |
|
if !ok { |
|
return errors.New("'Reversed' option can only be used with Scan queries") |
|
} |
|
scan.reversed = true |
|
return nil |
|
} |
|
}
|
|
|