vsphere-influxdb-go/vendor/github.com/influxdata/influxdb/cmd/influx_tsm/b1/reader.go

271 lines
5.9 KiB
Go

// Package b1 reads data from b1 shards.
package b1 // import "github.com/influxdata/influxdb/cmd/influx_tsm/b1"
import (
"encoding/binary"
"math"
"sort"
"time"
"github.com/boltdb/bolt"
"github.com/influxdata/influxdb/cmd/influx_tsm/stats"
"github.com/influxdata/influxdb/cmd/influx_tsm/tsdb"
"github.com/influxdata/influxdb/tsdb/engine/tsm1"
)
// DefaultChunkSize is the size of chunks read from the b1 shard
const DefaultChunkSize int = 1000
var excludedBuckets = map[string]bool{
"fields": true,
"meta": true,
"series": true,
"wal": true,
}
// Reader is used to read all data from a b1 shard.
type Reader struct {
path string
db *bolt.DB
tx *bolt.Tx
cursors []*cursor
currCursor int
keyBuf string
values []tsm1.Value
valuePos int
fields map[string]*tsdb.MeasurementFields
codecs map[string]*tsdb.FieldCodec
stats *stats.Stats
}
// NewReader returns a reader for the b1 shard at path.
func NewReader(path string, stats *stats.Stats, chunkSize int) *Reader {
r := &Reader{
path: path,
fields: make(map[string]*tsdb.MeasurementFields),
codecs: make(map[string]*tsdb.FieldCodec),
stats: stats,
}
if chunkSize <= 0 {
chunkSize = DefaultChunkSize
}
r.values = make([]tsm1.Value, chunkSize)
return r
}
// Open opens the reader.
func (r *Reader) Open() error {
// Open underlying storage.
db, err := bolt.Open(r.path, 0666, &bolt.Options{Timeout: 1 * time.Second})
if err != nil {
return err
}
r.db = db
// Load fields.
if err := r.db.View(func(tx *bolt.Tx) error {
meta := tx.Bucket([]byte("fields"))
c := meta.Cursor()
for k, v := c.First(); k != nil; k, v = c.Next() {
mf := &tsdb.MeasurementFields{}
if err := mf.UnmarshalBinary(v); err != nil {
return err
}
r.fields[string(k)] = mf
r.codecs[string(k)] = tsdb.NewFieldCodec(mf.Fields)
}
return nil
}); err != nil {
return err
}
seriesSet := make(map[string]bool)
// ignore series index and find all series in this shard
if err := r.db.View(func(tx *bolt.Tx) error {
tx.ForEach(func(name []byte, _ *bolt.Bucket) error {
key := string(name)
if !excludedBuckets[key] {
seriesSet[key] = true
}
return nil
})
return nil
}); err != nil {
return err
}
r.tx, err = r.db.Begin(false)
if err != nil {
return err
}
// Create cursor for each field of each series.
for s := range seriesSet {
measurement := tsdb.MeasurementFromSeriesKey(s)
fields := r.fields[measurement]
if fields == nil {
r.stats.IncrFiltered()
continue
}
for _, f := range fields.Fields {
c := newCursor(r.tx, s, f.Name, r.codecs[measurement])
c.SeekTo(0)
r.cursors = append(r.cursors, c)
}
}
sort.Sort(cursors(r.cursors))
return nil
}
// Next returns whether any data remains to be read. It must be called before
// the next call to Read().
func (r *Reader) Next() bool {
r.valuePos = 0
OUTER:
for {
if r.currCursor >= len(r.cursors) {
// All cursors drained. No more data remains.
return false
}
cc := r.cursors[r.currCursor]
r.keyBuf = tsm1.SeriesFieldKey(cc.series, cc.field)
for {
k, v := cc.Next()
if k == -1 {
// Go to next cursor and try again.
r.currCursor++
if r.valuePos == 0 {
// The previous cursor had no data. Instead of returning
// just go immediately to the next cursor.
continue OUTER
}
// There is some data available. Indicate that it should be read.
return true
}
if f, ok := v.(float64); ok {
if math.IsInf(f, 0) {
r.stats.AddPointsRead(1)
r.stats.IncrInf()
continue
}
if math.IsNaN(f) {
r.stats.AddPointsRead(1)
r.stats.IncrNaN()
continue
}
}
r.values[r.valuePos] = tsm1.NewValue(k, v)
r.valuePos++
if r.valuePos >= len(r.values) {
return true
}
}
}
}
// Read returns the next chunk of data in the shard, converted to tsm1 values. Data is
// emitted completely for every field, in every series, before the next field is processed.
// Data from Read() adheres to the requirements for writing to tsm1 shards
func (r *Reader) Read() (string, []tsm1.Value, error) {
return r.keyBuf, r.values[:r.valuePos], nil
}
// Close closes the reader.
func (r *Reader) Close() error {
r.tx.Rollback()
return r.db.Close()
}
// cursor provides ordered iteration across a series.
type cursor struct {
// Bolt cursor and readahead buffer.
cursor *bolt.Cursor
keyBuf int64
valBuf interface{}
series string
field string
dec *tsdb.FieldCodec
}
// Cursor returns an iterator for a key over a single field.
func newCursor(tx *bolt.Tx, series string, field string, dec *tsdb.FieldCodec) *cursor {
cur := &cursor{
keyBuf: -2,
series: series,
field: field,
dec: dec,
}
// Retrieve series bucket.
b := tx.Bucket([]byte(series))
if b != nil {
cur.cursor = b.Cursor()
}
return cur
}
// Seek moves the cursor to a position.
func (c *cursor) SeekTo(seek int64) {
var seekBytes [8]byte
binary.BigEndian.PutUint64(seekBytes[:], uint64(seek))
k, v := c.cursor.Seek(seekBytes[:])
c.keyBuf, c.valBuf = tsdb.DecodeKeyValue(c.field, c.dec, k, v)
}
// Next returns the next key/value pair from the cursor.
func (c *cursor) Next() (key int64, value interface{}) {
for {
k, v := func() (int64, interface{}) {
if c.keyBuf != -2 {
k, v := c.keyBuf, c.valBuf
c.keyBuf = -2
return k, v
}
k, v := c.cursor.Next()
if k == nil {
return -1, nil
}
return tsdb.DecodeKeyValue(c.field, c.dec, k, v)
}()
if k != -1 && v == nil {
// There is a point in the series at the next timestamp,
// but not for this cursor's field. Go to the next point.
continue
}
return k, v
}
}
// Sort b1 cursors in correct order for writing to TSM files.
type cursors []*cursor
func (a cursors) Len() int { return len(a) }
func (a cursors) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a cursors) Less(i, j int) bool {
if a[i].series == a[j].series {
return a[i].field < a[j].field
}
return a[i].series < a[j].series
}