1
0
mirror of https://github.com/Oxalide/vsphere-influxdb-go.git synced 2023-10-10 11:36:51 +00:00

add vendoring with go dep

This commit is contained in:
Adrian Todorov
2017-10-25 20:52:40 +00:00
parent 704f4d20d1
commit a59409f16b
1627 changed files with 489673 additions and 0 deletions

View File

@@ -0,0 +1,6 @@
package index // import "github.com/influxdata/influxdb/tsdb/index"
import (
_ "github.com/influxdata/influxdb/tsdb/index/inmem"
_ "github.com/influxdata/influxdb/tsdb/index/tsi1"
)

View File

@@ -0,0 +1,988 @@
/*
Package inmem implements a shared, in-memory index for each database.
The in-memory index is the original index implementation and provides fast
access to index data. However, it also forces high memory usage for large
datasets and can cause OOM errors.
Index is the shared index structure that provides most of the functionality.
However, ShardIndex is a light per-shard wrapper that adapts this original
shared index format to the new per-shard format.
*/
package inmem
import (
"errors"
"fmt"
"regexp"
"sort"
"sync"
// "sync/atomic"
"github.com/influxdata/influxdb/influxql"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/pkg/bytesutil"
"github.com/influxdata/influxdb/pkg/escape"
"github.com/influxdata/influxdb/pkg/estimator"
"github.com/influxdata/influxdb/pkg/estimator/hll"
"github.com/influxdata/influxdb/tsdb"
"github.com/uber-go/zap"
)
// IndexName is the name of this index.
const IndexName = "inmem"
func init() {
tsdb.NewInmemIndex = func(name string) (interface{}, error) { return NewIndex(name), nil }
tsdb.RegisterIndex(IndexName, func(id uint64, database, path string, opt tsdb.EngineOptions) tsdb.Index {
return NewShardIndex(id, database, path, opt)
})
}
// Index is the in memory index of a collection of measurements, time
// series, and their tags. Exported functions are goroutine safe while
// un-exported functions assume the caller will use the appropriate locks.
type Index struct {
mu sync.RWMutex
database string
// In-memory metadata index, built on load and updated when new series come in
measurements map[string]*Measurement // measurement name to object and index
series map[string]*Series // map series key to the Series object
lastID uint64 // last used series ID. They're in memory only for this shard
seriesSketch, seriesTSSketch *hll.Plus
measurementsSketch, measurementsTSSketch *hll.Plus
}
// NewIndex returns a new initialized Index.
func NewIndex(database string) *Index {
index := &Index{
database: database,
measurements: make(map[string]*Measurement),
series: make(map[string]*Series),
}
index.seriesSketch = hll.NewDefaultPlus()
index.seriesTSSketch = hll.NewDefaultPlus()
index.measurementsSketch = hll.NewDefaultPlus()
index.measurementsTSSketch = hll.NewDefaultPlus()
return index
}
func (i *Index) Type() string { return IndexName }
func (i *Index) Open() (err error) { return nil }
func (i *Index) Close() error { return nil }
func (i *Index) WithLogger(zap.Logger) {}
// Series returns a series by key.
func (i *Index) Series(key []byte) (*Series, error) {
i.mu.RLock()
s := i.series[string(key)]
i.mu.RUnlock()
return s, nil
}
// SeriesSketches returns the sketches for the series.
func (i *Index) SeriesSketches() (estimator.Sketch, estimator.Sketch, error) {
i.mu.RLock()
defer i.mu.RUnlock()
return i.seriesSketch.Clone(), i.seriesTSSketch.Clone(), nil
}
// SeriesN returns the number of unique non-tombstoned series in the index.
// Since indexes are not shared across shards, the count returned by SeriesN
// cannot be combined with other shards' counts.
func (i *Index) SeriesN() int64 {
i.mu.RLock()
n := int64(len(i.series))
i.mu.RUnlock()
return n
}
// Measurement returns the measurement object from the index by the name
func (i *Index) Measurement(name []byte) (*Measurement, error) {
i.mu.RLock()
defer i.mu.RUnlock()
return i.measurements[string(name)], nil
}
// MeasurementExists returns true if the measurement exists.
func (i *Index) MeasurementExists(name []byte) (bool, error) {
i.mu.RLock()
defer i.mu.RUnlock()
return i.measurements[string(name)] != nil, nil
}
// MeasurementsSketches returns the sketches for the measurements.
func (i *Index) MeasurementsSketches() (estimator.Sketch, estimator.Sketch, error) {
i.mu.RLock()
defer i.mu.RUnlock()
return i.measurementsSketch.Clone(), i.measurementsTSSketch.Clone(), nil
}
// MeasurementsByName returns a list of measurements.
func (i *Index) MeasurementsByName(names [][]byte) ([]*Measurement, error) {
i.mu.RLock()
defer i.mu.RUnlock()
a := make([]*Measurement, 0, len(names))
for _, name := range names {
if m := i.measurements[string(name)]; m != nil {
a = append(a, m)
}
}
return a, nil
}
// CreateSeriesIfNotExists adds the series for the given measurement to the
// index and sets its ID or returns the existing series object
func (i *Index) CreateSeriesIfNotExists(shardID uint64, key, name []byte, tags models.Tags, opt *tsdb.EngineOptions, ignoreLimits bool) error {
i.mu.RLock()
// if there is a series for this id, it's already been added
ss := i.series[string(key)]
i.mu.RUnlock()
if ss != nil {
ss.AssignShard(shardID)
return nil
}
// get or create the measurement index
m := i.CreateMeasurementIndexIfNotExists(name)
i.mu.Lock()
// Check for the series again under a write lock
ss = i.series[string(key)]
if ss != nil {
i.mu.Unlock()
ss.AssignShard(shardID)
return nil
}
// Verify that the series will not exceed limit.
if !ignoreLimits {
if max := opt.Config.MaxSeriesPerDatabase; max > 0 && len(i.series)+1 > max {
i.mu.Unlock()
return errMaxSeriesPerDatabaseExceeded
}
}
// set the in memory ID for query processing on this shard
// The series key and tags are clone to prevent a memory leak
series := NewSeries([]byte(string(key)), tags.Clone())
series.ID = i.lastID + 1
i.lastID++
series.SetMeasurement(m)
i.series[string(key)] = series
m.AddSeries(series)
series.AssignShard(shardID)
// Add the series to the series sketch.
i.seriesSketch.Add(key)
i.mu.Unlock()
return nil
}
// CreateMeasurementIndexIfNotExists creates or retrieves an in memory index
// object for the measurement
func (i *Index) CreateMeasurementIndexIfNotExists(name []byte) *Measurement {
name = escape.Unescape(name)
// See if the measurement exists using a read-lock
i.mu.RLock()
m := i.measurements[string(name)]
if m != nil {
i.mu.RUnlock()
return m
}
i.mu.RUnlock()
// Doesn't exist, so lock the index to create it
i.mu.Lock()
defer i.mu.Unlock()
// Make sure it was created in between the time we released our read-lock
// and acquire the write lock
m = i.measurements[string(name)]
if m == nil {
m = NewMeasurement(i.database, string(name))
i.measurements[string(name)] = m
// Add the measurement to the measurements sketch.
i.measurementsSketch.Add([]byte(name))
}
return m
}
// HasTagKey returns true if tag key exists.
func (i *Index) HasTagKey(name, key []byte) (bool, error) {
i.mu.RLock()
mm := i.measurements[string(name)]
i.mu.RUnlock()
if mm == nil {
return false, nil
}
return mm.HasTagKey(string(key)), nil
}
// HasTagValue returns true if tag value exists.
func (i *Index) HasTagValue(name, key, value []byte) bool {
i.mu.RLock()
mm := i.measurements[string(name)]
i.mu.RUnlock()
if mm == nil {
return false
}
return mm.HasTagKeyValue(key, value)
}
// TagValueN returns the cardinality of a tag value.
func (i *Index) TagValueN(name, key []byte) int {
i.mu.RLock()
mm := i.measurements[string(name)]
i.mu.RUnlock()
if mm == nil {
return 0
}
return mm.CardinalityBytes(key)
}
// MeasurementTagKeysByExpr returns an ordered set of tag keys filtered by an expression.
func (i *Index) MeasurementTagKeysByExpr(name []byte, expr influxql.Expr) (map[string]struct{}, error) {
i.mu.RLock()
mm := i.measurements[string(name)]
i.mu.RUnlock()
if mm == nil {
return nil, nil
}
return mm.TagKeysByExpr(expr)
}
// MeasurementTagKeyValuesByExpr returns a set of tag values filtered by an expression.
//
// See tsm1.Engine.MeasurementTagKeyValuesByExpr for a fuller description of this
// method.
func (i *Index) MeasurementTagKeyValuesByExpr(name []byte, keys []string, expr influxql.Expr, keysSorted bool) ([][]string, error) {
i.mu.RLock()
mm := i.measurements[string(name)]
i.mu.RUnlock()
if mm == nil || len(keys) == 0 {
return nil, nil
}
results := make([][]string, len(keys))
// If we haven't been provided sorted keys, then we need to sort them.
if !keysSorted {
sort.Sort(sort.StringSlice(keys))
}
ids, _, _ := mm.WalkWhereForSeriesIds(expr)
if ids.Len() == 0 && expr == nil {
for ki, key := range keys {
values := mm.TagValues(key)
sort.Sort(sort.StringSlice(values))
results[ki] = values
}
return results, nil
}
// This is the case where we have filtered series by some WHERE condition.
// We only care about the tag values for the keys given the
// filtered set of series ids.
keyIdxs := make(map[string]int, len(keys))
for ki, key := range keys {
keyIdxs[key] = ki
}
resultSet := make([]stringSet, len(keys))
for i := 0; i < len(resultSet); i++ {
resultSet[i] = newStringSet()
}
// Iterate all series to collect tag values.
for _, id := range ids {
s := mm.SeriesByID(id)
if s == nil {
continue
}
// Iterate the tag keys we're interested in and collect values
// from this series, if they exist.
for _, t := range s.Tags() {
if idx, ok := keyIdxs[string(t.Key)]; ok {
resultSet[idx].add(string(t.Value))
} else if string(t.Key) > keys[len(keys)-1] {
// The tag key is > the largest key we're interested in.
break
}
}
}
for i, s := range resultSet {
results[i] = s.list()
}
return results, nil
}
// ForEachMeasurementTagKey iterates over all tag keys for a measurement.
func (i *Index) ForEachMeasurementTagKey(name []byte, fn func(key []byte) error) error {
// Ensure we do not hold a lock on the index while fn executes in case fn tries
// to acquire a lock on the index again. If another goroutine has Lock, this will
// deadlock.
i.mu.RLock()
mm := i.measurements[string(name)]
i.mu.RUnlock()
if mm == nil {
return nil
}
for _, key := range mm.TagKeys() {
if err := fn([]byte(key)); err != nil {
return err
}
}
return nil
}
// TagKeyCardinality returns the number of values for a measurement/tag key.
func (i *Index) TagKeyCardinality(name, key []byte) int {
i.mu.RLock()
mm := i.measurements[string(name)]
i.mu.RUnlock()
if mm == nil {
return 0
}
return mm.CardinalityBytes(key)
}
// TagsForSeries returns the tag map for the passed in series
func (i *Index) TagsForSeries(key string) (models.Tags, error) {
i.mu.RLock()
ss := i.series[key]
i.mu.RUnlock()
if ss == nil {
return nil, nil
}
return ss.Tags(), nil
}
// MeasurementNamesByExpr takes an expression containing only tags and returns a
// list of matching meaurement names.
func (i *Index) MeasurementNamesByExpr(expr influxql.Expr) ([][]byte, error) {
i.mu.RLock()
defer i.mu.RUnlock()
// Return all measurement names if no expression is provided.
if expr == nil {
a := make([][]byte, 0, len(i.measurements))
for name := range i.measurements {
a = append(a, []byte(name))
}
bytesutil.Sort(a)
return a, nil
}
return i.measurementNamesByExpr(expr)
}
func (i *Index) measurementNamesByExpr(expr influxql.Expr) ([][]byte, error) {
if expr == nil {
return nil, nil
}
switch e := expr.(type) {
case *influxql.BinaryExpr:
switch e.Op {
case influxql.EQ, influxql.NEQ, influxql.EQREGEX, influxql.NEQREGEX:
tag, ok := e.LHS.(*influxql.VarRef)
if !ok {
return nil, fmt.Errorf("left side of '%s' must be a tag key", e.Op.String())
}
tf := &TagFilter{
Op: e.Op,
Key: tag.Val,
}
if influxql.IsRegexOp(e.Op) {
re, ok := e.RHS.(*influxql.RegexLiteral)
if !ok {
return nil, fmt.Errorf("right side of '%s' must be a regular expression", e.Op.String())
}
tf.Regex = re.Val
} else {
s, ok := e.RHS.(*influxql.StringLiteral)
if !ok {
return nil, fmt.Errorf("right side of '%s' must be a tag value string", e.Op.String())
}
tf.Value = s.Val
}
// Match on name, if specified.
if tag.Val == "_name" {
return i.measurementNamesByNameFilter(tf.Op, tf.Value, tf.Regex), nil
} else if influxql.IsSystemName(tag.Val) {
return nil, nil
}
return i.measurementNamesByTagFilters(tf), nil
case influxql.OR, influxql.AND:
lhs, err := i.measurementNamesByExpr(e.LHS)
if err != nil {
return nil, err
}
rhs, err := i.measurementNamesByExpr(e.RHS)
if err != nil {
return nil, err
}
if e.Op == influxql.OR {
return bytesutil.Union(lhs, rhs), nil
}
return bytesutil.Intersect(lhs, rhs), nil
default:
return nil, fmt.Errorf("invalid tag comparison operator")
}
case *influxql.ParenExpr:
return i.measurementNamesByExpr(e.Expr)
}
return nil, fmt.Errorf("%#v", expr)
}
// measurementNamesByNameFilter returns the sorted measurements matching a name.
func (i *Index) measurementNamesByNameFilter(op influxql.Token, val string, regex *regexp.Regexp) [][]byte {
var names [][]byte
for _, m := range i.measurements {
var matched bool
switch op {
case influxql.EQ:
matched = m.Name == val
case influxql.NEQ:
matched = m.Name != val
case influxql.EQREGEX:
matched = regex.MatchString(m.Name)
case influxql.NEQREGEX:
matched = !regex.MatchString(m.Name)
}
if !matched {
continue
}
names = append(names, []byte(m.Name))
}
bytesutil.Sort(names)
return names
}
// measurementNamesByTagFilters returns the sorted measurements matching the filters on tag values.
func (i *Index) measurementNamesByTagFilters(filter *TagFilter) [][]byte {
// Build a list of measurements matching the filters.
var names [][]byte
var tagMatch bool
// Iterate through all measurements in the database.
for _, m := range i.measurements {
tagVals := m.SeriesByTagKeyValue(filter.Key)
if tagVals == nil {
continue
}
tagMatch = false
// If the operator is non-regex, only check the specified value.
if filter.Op == influxql.EQ || filter.Op == influxql.NEQ {
if _, ok := tagVals[filter.Value]; ok {
tagMatch = true
}
} else {
// Else, the operator is a regex and we have to check all tag
// values against the regular expression.
for tagVal := range tagVals {
if filter.Regex.MatchString(tagVal) {
tagMatch = true
continue
}
}
}
//
// XNOR gate
//
// tags match | operation is EQ | measurement matches
// --------------------------------------------------
// True | True | True
// True | False | False
// False | True | False
// False | False | True
if tagMatch == (filter.Op == influxql.EQ || filter.Op == influxql.EQREGEX) {
names = append(names, []byte(m.Name))
continue
}
}
bytesutil.Sort(names)
return names
}
// MeasurementNamesByRegex returns the measurements that match the regex.
func (i *Index) MeasurementNamesByRegex(re *regexp.Regexp) ([][]byte, error) {
i.mu.RLock()
defer i.mu.RUnlock()
var matches [][]byte
for _, m := range i.measurements {
if re.MatchString(m.Name) {
matches = append(matches, []byte(m.Name))
}
}
return matches, nil
}
// DropMeasurement removes the measurement and all of its underlying
// series from the database index
func (i *Index) DropMeasurement(name []byte) error {
i.mu.Lock()
defer i.mu.Unlock()
return i.dropMeasurement(string(name))
}
func (i *Index) dropMeasurement(name string) error {
// Update the tombstone sketch.
i.measurementsTSSketch.Add([]byte(name))
m := i.measurements[name]
if m == nil {
return nil
}
delete(i.measurements, name)
for _, s := range m.SeriesByIDMap() {
delete(i.series, s.Key)
i.seriesTSSketch.Add([]byte(s.Key))
}
return nil
}
// DropSeries removes the series key and its tags from the index.
func (i *Index) DropSeries(key []byte) error {
if key == nil {
return nil
}
i.mu.Lock()
k := string(key)
series := i.series[k]
if series == nil {
i.mu.Unlock()
return nil
}
// Update the tombstone sketch.
i.seriesTSSketch.Add([]byte(k))
// Remove from the index.
delete(i.series, k)
// Remove the measurement's reference.
series.Measurement().DropSeries(series)
// If the measurement no longer has any series, remove it as well.
if !series.Measurement().HasSeries() {
i.dropMeasurement(series.Measurement().Name)
}
i.mu.Unlock()
return nil
}
// ForEachMeasurementSeriesByExpr iterates over all series in a measurement filtered by an expression.
func (i *Index) ForEachMeasurementSeriesByExpr(name []byte, expr influxql.Expr, fn func(tags models.Tags) error) error {
i.mu.RLock()
mm := i.measurements[string(name)]
i.mu.RUnlock()
if mm == nil {
return nil
}
if err := mm.ForEachSeriesByExpr(expr, fn); err != nil {
return err
}
return nil
}
// TagSets returns a list of tag sets.
func (i *Index) TagSets(shardID uint64, name []byte, opt influxql.IteratorOptions) ([]*influxql.TagSet, error) {
i.mu.RLock()
defer i.mu.RUnlock()
mm := i.measurements[string(name)]
if mm == nil {
return nil, nil
}
tagSets, err := mm.TagSets(shardID, opt)
if err != nil {
return nil, err
}
return tagSets, nil
}
func (i *Index) SeriesKeys() []string {
i.mu.RLock()
s := make([]string, 0, len(i.series))
for k := range i.series {
s = append(s, k)
}
i.mu.RUnlock()
return s
}
// SetFieldSet sets a shared field set from the engine.
func (i *Index) SetFieldSet(*tsdb.MeasurementFieldSet) {}
// SetFieldName adds a field name to a measurement.
func (i *Index) SetFieldName(measurement []byte, name string) {
m := i.CreateMeasurementIndexIfNotExists(measurement)
m.SetFieldName(name)
}
// ForEachMeasurementName iterates over each measurement name.
func (i *Index) ForEachMeasurementName(fn func(name []byte) error) error {
i.mu.RLock()
defer i.mu.RUnlock()
mms := make(Measurements, 0, len(i.measurements))
for _, m := range i.measurements {
mms = append(mms, m)
}
sort.Sort(mms)
for _, m := range mms {
if err := fn([]byte(m.Name)); err != nil {
return err
}
}
return nil
}
func (i *Index) MeasurementSeriesKeysByExpr(name []byte, condition influxql.Expr) ([][]byte, error) {
i.mu.RLock()
defer i.mu.RUnlock()
m := i.measurements[string(name)]
if m == nil {
return nil, nil
}
// Return all series if no condition specified.
if condition == nil {
return m.SeriesKeys(), nil
}
// Get series IDs that match the WHERE clause.
ids, filters, err := m.WalkWhereForSeriesIds(condition)
if err != nil {
return nil, err
}
// Delete boolean literal true filter expressions.
// These are returned for `WHERE tagKey = 'tagVal'` type expressions and are okay.
filters.DeleteBoolLiteralTrues()
// Check for unsupported field filters.
// Any remaining filters means there were fields (e.g., `WHERE value = 1.2`).
if filters.Len() > 0 {
return nil, errors.New("fields not supported in WHERE clause during deletion")
}
return m.SeriesKeysByID(ids), nil
}
// SeriesPointIterator returns an influxql iterator over all series.
func (i *Index) SeriesPointIterator(opt influxql.IteratorOptions) (influxql.Iterator, error) {
// Read and sort all measurements.
mms := make(Measurements, 0, len(i.measurements))
for _, mm := range i.measurements {
mms = append(mms, mm)
}
sort.Sort(mms)
return &seriesPointIterator{
mms: mms,
point: influxql.FloatPoint{
Aux: make([]interface{}, len(opt.Aux)),
},
opt: opt,
}, nil
}
// SnapshotTo is a no-op since this is an in-memory index.
func (i *Index) SnapshotTo(path string) error { return nil }
// AssignShard update the index to indicate that series k exists in the given shardID.
func (i *Index) AssignShard(k string, shardID uint64) {
ss, _ := i.Series([]byte(k))
if ss != nil {
ss.AssignShard(shardID)
}
}
// UnassignShard updates the index to indicate that series k does not exist in
// the given shardID.
func (i *Index) UnassignShard(k string, shardID uint64) error {
ss, _ := i.Series([]byte(k))
if ss != nil {
if ss.Assigned(shardID) {
// Remove the shard from any series
ss.UnassignShard(shardID)
// If this series no longer has shards assigned, remove the series
if ss.ShardN() == 0 {
// Remove the series key from the index.
return i.DropSeries([]byte(k))
}
}
}
return nil
}
// RemoveShard removes all references to shardID from any series or measurements
// in the index. If the shard was the only owner of data for the series, the series
// is removed from the index.
func (i *Index) RemoveShard(shardID uint64) {
for _, k := range i.SeriesKeys() {
i.UnassignShard(k, shardID)
}
}
// assignExistingSeries assigns the existings series to shardID and returns the series, names and tags that
// do not exists yet.
func (i *Index) assignExistingSeries(shardID uint64, keys, names [][]byte, tagsSlice []models.Tags) ([][]byte, [][]byte, []models.Tags) {
i.mu.RLock()
var n int
for j, key := range keys {
if ss, ok := i.series[string(key)]; !ok {
keys[n] = keys[j]
names[n] = names[j]
tagsSlice[n] = tagsSlice[j]
n++
} else {
ss.AssignShard(shardID)
}
}
i.mu.RUnlock()
return keys[:n], names[:n], tagsSlice[:n]
}
// Ensure index implements interface.
var _ tsdb.Index = &ShardIndex{}
// ShardIndex represents a shim between the TSDB index interface and the shared
// in-memory index. This is required because per-shard in-memory indexes will
// grow the heap size too large.
type ShardIndex struct {
*Index
id uint64 // shard id
opt tsdb.EngineOptions
}
// CreateSeriesListIfNotExists creates a list of series if they doesn't exist in bulk.
func (idx *ShardIndex) CreateSeriesListIfNotExists(keys, names [][]byte, tagsSlice []models.Tags) error {
keys, names, tagsSlice = idx.assignExistingSeries(idx.id, keys, names, tagsSlice)
if len(keys) == 0 {
return nil
}
var reason string
var dropped int
var droppedKeys map[string]struct{}
// Ensure that no tags go over the maximum cardinality.
if maxValuesPerTag := idx.opt.Config.MaxValuesPerTag; maxValuesPerTag > 0 {
var n int
outer:
for i, name := range names {
tags := tagsSlice[i]
for _, tag := range tags {
// Skip if the tag value already exists.
if idx.HasTagValue(name, tag.Key, tag.Value) {
continue
}
// Read cardinality. Skip if we're below the threshold.
n := idx.TagValueN(name, tag.Key)
if n < maxValuesPerTag {
continue
}
dropped++
reason = fmt.Sprintf("max-values-per-tag limit exceeded (%d/%d): measurement=%q tag=%q value=%q",
n, maxValuesPerTag, name, string(tag.Key), string(tag.Value))
if droppedKeys == nil {
droppedKeys = make(map[string]struct{})
}
droppedKeys[string(keys[i])] = struct{}{}
continue outer
}
// Increment success count if all checks complete.
keys[n], names[n], tagsSlice[n] = keys[i], names[i], tagsSlice[i]
n++
}
// Slice to only include successful points.
keys, names, tagsSlice = keys[:n], names[:n], tagsSlice[:n]
}
// Write
for i := range keys {
if err := idx.CreateSeriesIfNotExists(keys[i], names[i], tagsSlice[i]); err == errMaxSeriesPerDatabaseExceeded {
dropped++
reason = fmt.Sprintf("max-series-per-database limit exceeded: (%d)", idx.opt.Config.MaxSeriesPerDatabase)
if droppedKeys == nil {
droppedKeys = make(map[string]struct{})
}
droppedKeys[string(keys[i])] = struct{}{}
continue
} else if err != nil {
return err
}
}
// Report partial writes back to shard.
if dropped > 0 {
return &tsdb.PartialWriteError{
Reason: reason,
Dropped: dropped,
DroppedKeys: droppedKeys,
}
}
return nil
}
// InitializeSeries is called during startup.
// This works the same as CreateSeriesIfNotExists except it ignore limit errors.
func (i *ShardIndex) InitializeSeries(key, name []byte, tags models.Tags) error {
return i.Index.CreateSeriesIfNotExists(i.id, key, name, tags, &i.opt, true)
}
func (i *ShardIndex) CreateSeriesIfNotExists(key, name []byte, tags models.Tags) error {
return i.Index.CreateSeriesIfNotExists(i.id, key, name, tags, &i.opt, false)
}
// TagSets returns a list of tag sets based on series filtering.
func (i *ShardIndex) TagSets(name []byte, opt influxql.IteratorOptions) ([]*influxql.TagSet, error) {
return i.Index.TagSets(i.id, name, opt)
}
// NewShardIndex returns a new index for a shard.
func NewShardIndex(id uint64, database, path string, opt tsdb.EngineOptions) tsdb.Index {
return &ShardIndex{
Index: opt.InmemIndex.(*Index),
id: id,
opt: opt,
}
}
// seriesPointIterator emits series as influxql points.
type seriesPointIterator struct {
mms Measurements
keys struct {
buf []string
i int
}
point influxql.FloatPoint // reusable point
opt influxql.IteratorOptions
}
// Stats returns stats about the points processed.
func (itr *seriesPointIterator) Stats() influxql.IteratorStats { return influxql.IteratorStats{} }
// Close closes the iterator.
func (itr *seriesPointIterator) Close() error { return nil }
// Next emits the next point in the iterator.
func (itr *seriesPointIterator) Next() (*influxql.FloatPoint, error) {
for {
// Load next measurement's keys if there are no more remaining.
if itr.keys.i >= len(itr.keys.buf) {
if err := itr.nextKeys(); err != nil {
return nil, err
}
if len(itr.keys.buf) == 0 {
return nil, nil
}
}
// Read the next key.
key := itr.keys.buf[itr.keys.i]
itr.keys.i++
// Write auxiliary fields.
for i, f := range itr.opt.Aux {
switch f.Val {
case "key":
itr.point.Aux[i] = key
}
}
return &itr.point, nil
}
}
// nextKeys reads all keys for the next measurement.
func (itr *seriesPointIterator) nextKeys() error {
for {
// Ensure previous keys are cleared out.
itr.keys.i, itr.keys.buf = 0, itr.keys.buf[:0]
// Read next measurement.
if len(itr.mms) == 0 {
return nil
}
mm := itr.mms[0]
itr.mms = itr.mms[1:]
// Read all series keys.
ids, err := mm.SeriesIDsAllOrByExpr(itr.opt.Condition)
if err != nil {
return err
} else if len(ids) == 0 {
continue
}
itr.keys.buf = mm.AppendSeriesKeysByID(itr.keys.buf, ids)
sort.Strings(itr.keys.buf)
return nil
}
}
// errMaxSeriesPerDatabaseExceeded is a marker error returned during series creation
// to indicate that a new series would exceed the limits of the database.
var errMaxSeriesPerDatabaseExceeded = errors.New("max series per database exceeded")

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,258 @@
package inmem_test
import (
"fmt"
"strings"
"testing"
"github.com/influxdata/influxdb/influxql"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/tsdb/index/inmem"
)
// Test comparing SeriesIDs for equality.
func TestSeriesIDs_Equals(t *testing.T) {
ids1 := inmem.SeriesIDs([]uint64{1, 2, 3})
ids2 := inmem.SeriesIDs([]uint64{1, 2, 3})
ids3 := inmem.SeriesIDs([]uint64{4, 5, 6})
if !ids1.Equals(ids2) {
t.Fatal("expected ids1 == ids2")
} else if ids1.Equals(ids3) {
t.Fatal("expected ids1 != ids3")
}
}
// Test intersecting sets of SeriesIDs.
func TestSeriesIDs_Intersect(t *testing.T) {
// Test swaping l & r, all branches of if-else, and exit loop when 'j < len(r)'
ids1 := inmem.SeriesIDs([]uint64{1, 3, 4, 5, 6})
ids2 := inmem.SeriesIDs([]uint64{1, 2, 3, 7})
exp := inmem.SeriesIDs([]uint64{1, 3})
got := ids1.Intersect(ids2)
if !exp.Equals(got) {
t.Fatalf("exp=%v, got=%v", exp, got)
}
// Test exit for loop when 'i < len(l)'
ids1 = inmem.SeriesIDs([]uint64{1})
ids2 = inmem.SeriesIDs([]uint64{1, 2})
exp = inmem.SeriesIDs([]uint64{1})
got = ids1.Intersect(ids2)
if !exp.Equals(got) {
t.Fatalf("exp=%v, got=%v", exp, got)
}
}
// Test union sets of SeriesIDs.
func TestSeriesIDs_Union(t *testing.T) {
// Test all branches of if-else, exit loop because of 'j < len(r)', and append remainder from left.
ids1 := inmem.SeriesIDs([]uint64{1, 2, 3, 7})
ids2 := inmem.SeriesIDs([]uint64{1, 3, 4, 5, 6})
exp := inmem.SeriesIDs([]uint64{1, 2, 3, 4, 5, 6, 7})
got := ids1.Union(ids2)
if !exp.Equals(got) {
t.Fatalf("exp=%v, got=%v", exp, got)
}
// Test exit because of 'i < len(l)' and append remainder from right.
ids1 = inmem.SeriesIDs([]uint64{1})
ids2 = inmem.SeriesIDs([]uint64{1, 2})
exp = inmem.SeriesIDs([]uint64{1, 2})
got = ids1.Union(ids2)
if !exp.Equals(got) {
t.Fatalf("exp=%v, got=%v", exp, got)
}
}
// Test removing one set of SeriesIDs from another.
func TestSeriesIDs_Reject(t *testing.T) {
// Test all branches of if-else, exit loop because of 'j < len(r)', and append remainder from left.
ids1 := inmem.SeriesIDs([]uint64{1, 2, 3, 7})
ids2 := inmem.SeriesIDs([]uint64{1, 3, 4, 5, 6})
exp := inmem.SeriesIDs([]uint64{2, 7})
got := ids1.Reject(ids2)
if !exp.Equals(got) {
t.Fatalf("exp=%v, got=%v", exp, got)
}
// Test exit because of 'i < len(l)'.
ids1 = inmem.SeriesIDs([]uint64{1})
ids2 = inmem.SeriesIDs([]uint64{1, 2})
exp = inmem.SeriesIDs{}
got = ids1.Reject(ids2)
if !exp.Equals(got) {
t.Fatalf("exp=%v, got=%v", exp, got)
}
}
func TestMeasurement_AppendSeriesKeysByID_Missing(t *testing.T) {
m := inmem.NewMeasurement("foo", "cpu")
var dst []string
dst = m.AppendSeriesKeysByID(dst, []uint64{1})
if exp, got := 0, len(dst); exp != got {
t.Fatalf("series len mismatch: exp %v, got %v", exp, got)
}
}
func TestMeasurement_AppendSeriesKeysByID_Exists(t *testing.T) {
m := inmem.NewMeasurement("foo", "cpu")
s := inmem.NewSeries([]byte("cpu,host=foo"), models.Tags{models.NewTag([]byte("host"), []byte("foo"))})
s.ID = 1
m.AddSeries(s)
var dst []string
dst = m.AppendSeriesKeysByID(dst, []uint64{1})
if exp, got := 1, len(dst); exp != got {
t.Fatalf("series len mismatch: exp %v, got %v", exp, got)
}
if exp, got := "cpu,host=foo", dst[0]; exp != got {
t.Fatalf("series mismatch: exp %v, got %v", exp, got)
}
}
func TestMeasurement_TagsSet_Deadlock(t *testing.T) {
m := inmem.NewMeasurement("foo", "cpu")
s1 := inmem.NewSeries([]byte("cpu,host=foo"), models.Tags{models.NewTag([]byte("host"), []byte("foo"))})
s1.ID = 1
m.AddSeries(s1)
s2 := inmem.NewSeries([]byte("cpu,host=bar"), models.Tags{models.NewTag([]byte("host"), []byte("bar"))})
s2.ID = 2
m.AddSeries(s2)
m.DropSeries(s1)
// This was deadlocking
m.TagSets(1, influxql.IteratorOptions{})
if got, exp := len(m.SeriesIDs()), 1; got != exp {
t.Fatalf("series count mismatch: got %v, exp %v", got, exp)
}
}
func TestMeasurement_ForEachSeriesByExpr_Deadlock(t *testing.T) {
m := inmem.NewMeasurement("foo", "cpu")
s1 := inmem.NewSeries([]byte("cpu,host=foo"), models.Tags{models.NewTag([]byte("host"), []byte("foo"))})
s1.ID = 1
m.AddSeries(s1)
s2 := inmem.NewSeries([]byte("cpu,host=bar"), models.Tags{models.NewTag([]byte("host"), []byte("bar"))})
s2.ID = 2
m.AddSeries(s2)
m.DropSeries(s1)
// This was deadlocking
m.ForEachSeriesByExpr(nil, func(tags models.Tags) error {
return nil
})
if got, exp := len(m.SeriesIDs()), 1; got != exp {
t.Fatalf("series count mismatch: got %v, exp %v", got, exp)
}
}
func BenchmarkMeasurement_SeriesIDForExp_EQRegex(b *testing.B) {
m := inmem.NewMeasurement("foo", "cpu")
for i := 0; i < 100000; i++ {
s := inmem.NewSeries([]byte("cpu"), models.Tags{models.NewTag(
[]byte("host"),
[]byte(fmt.Sprintf("host%d", i)))})
s.ID = uint64(i)
m.AddSeries(s)
}
if exp, got := 100000, len(m.SeriesKeys()); exp != got {
b.Fatalf("series count mismatch: exp %v got %v", exp, got)
}
stmt, err := influxql.NewParser(strings.NewReader(`SELECT * FROM cpu WHERE host =~ /host\d+/`)).ParseStatement()
if err != nil {
b.Fatalf("invalid statement: %s", err)
}
selectStmt := stmt.(*influxql.SelectStatement)
b.ResetTimer()
for i := 0; i < b.N; i++ {
ids := m.IDsForExpr(selectStmt.Condition.(*influxql.BinaryExpr))
if exp, got := 100000, len(ids); exp != got {
b.Fatalf("series count mismatch: exp %v got %v", exp, got)
}
}
}
func BenchmarkMeasurement_SeriesIDForExp_NERegex(b *testing.B) {
m := inmem.NewMeasurement("foo", "cpu")
for i := 0; i < 100000; i++ {
s := inmem.NewSeries([]byte("cpu"), models.Tags{models.Tag{
Key: []byte("host"),
Value: []byte(fmt.Sprintf("host%d", i))}})
s.ID = uint64(i)
m.AddSeries(s)
}
if exp, got := 100000, len(m.SeriesKeys()); exp != got {
b.Fatalf("series count mismatch: exp %v got %v", exp, got)
}
stmt, err := influxql.NewParser(strings.NewReader(`SELECT * FROM cpu WHERE host !~ /foo\d+/`)).ParseStatement()
if err != nil {
b.Fatalf("invalid statement: %s", err)
}
selectStmt := stmt.(*influxql.SelectStatement)
b.ResetTimer()
for i := 0; i < b.N; i++ {
ids := m.IDsForExpr(selectStmt.Condition.(*influxql.BinaryExpr))
if exp, got := 100000, len(ids); exp != got {
b.Fatalf("series count mismatch: exp %v got %v", exp, got)
}
}
}
func benchmarkTagSets(b *testing.B, n int, opt influxql.IteratorOptions) {
m := inmem.NewMeasurement("foo", "m")
for i := 0; i < n; i++ {
tags := map[string]string{"tag1": "value1", "tag2": "value2"}
s := inmem.NewSeries([]byte(fmt.Sprintf("m,tag1=value1,tag2=value2")), models.NewTags(tags))
s.ID = uint64(i)
s.AssignShard(0)
m.AddSeries(s)
}
// warm caches
m.TagSets(0, opt)
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
m.TagSets(0, opt)
}
}
func BenchmarkMeasurement_TagSetsNoDimensions_1000(b *testing.B) {
benchmarkTagSets(b, 1000, influxql.IteratorOptions{})
}
func BenchmarkMeasurement_TagSetsDimensions_1000(b *testing.B) {
benchmarkTagSets(b, 1000, influxql.IteratorOptions{Dimensions: []string{"tag1", "tag2"}})
}
func BenchmarkMeasurement_TagSetsNoDimensions_100000(b *testing.B) {
benchmarkTagSets(b, 100000, influxql.IteratorOptions{})
}
func BenchmarkMeasurement_TagSetsDimensions_100000(b *testing.B) {
benchmarkTagSets(b, 100000, influxql.IteratorOptions{Dimensions: []string{"tag1", "tag2"}})
}

View File

@@ -0,0 +1,71 @@
package internal
import (
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/pkg/bloom"
"github.com/influxdata/influxdb/pkg/estimator"
"github.com/influxdata/influxdb/tsdb/index/tsi1"
)
// File is a mock implementation of a tsi1.File.
type File struct {
Closef func() error
Pathf func() string
IDf func() int
Levelf func() int
Measurementf func(name []byte) tsi1.MeasurementElem
MeasurementIteratorf func() tsi1.MeasurementIterator
HasSeriesf func(name []byte, tags models.Tags, buf []byte) (exists, tombstoned bool)
Seriesf func(name []byte, tags models.Tags) tsi1.SeriesElem
SeriesNf func() uint64
TagKeyf func(name, key []byte) tsi1.TagKeyElem
TagKeyIteratorf func(name []byte) tsi1.TagKeyIterator
TagValuef func(name, key, value []byte) tsi1.TagValueElem
TagValueIteratorf func(name, key []byte) tsi1.TagValueIterator
SeriesIteratorf func() tsi1.SeriesIterator
MeasurementSeriesIteratorf func(name []byte) tsi1.SeriesIterator
TagKeySeriesIteratorf func(name, key []byte) tsi1.SeriesIterator
TagValueSeriesIteratorf func(name, key, value []byte) tsi1.SeriesIterator
MergeSeriesSketchesf func(s, t estimator.Sketch) error
MergeMeasurementsSketchesf func(s, t estimator.Sketch) error
Retainf func()
Releasef func()
Filterf func() *bloom.Filter
}
func (f *File) Close() error { return f.Closef() }
func (f *File) Path() string { return f.Pathf() }
func (f *File) ID() int { return f.IDf() }
func (f *File) Level() int { return f.Levelf() }
func (f *File) Measurement(name []byte) tsi1.MeasurementElem { return f.Measurementf(name) }
func (f *File) MeasurementIterator() tsi1.MeasurementIterator { return f.MeasurementIteratorf() }
func (f *File) HasSeries(name []byte, tags models.Tags, buf []byte) (exists, tombstoned bool) {
return f.HasSeriesf(name, tags, buf)
}
func (f *File) Series(name []byte, tags models.Tags) tsi1.SeriesElem { return f.Seriesf(name, tags) }
func (f *File) SeriesN() uint64 { return f.SeriesNf() }
func (f *File) TagKey(name, key []byte) tsi1.TagKeyElem { return f.TagKeyf(name, key) }
func (f *File) TagKeyIterator(name []byte) tsi1.TagKeyIterator { return f.TagKeyIteratorf(name) }
func (f *File) TagValue(name, key, value []byte) tsi1.TagValueElem {
return f.TagValuef(name, key, value)
}
func (f *File) TagValueIterator(name, key []byte) tsi1.TagValueIterator {
return f.TagValueIteratorf(name, key)
}
func (f *File) SeriesIterator() tsi1.SeriesIterator { return f.SeriesIteratorf() }
func (f *File) MeasurementSeriesIterator(name []byte) tsi1.SeriesIterator {
return f.MeasurementSeriesIteratorf(name)
}
func (f *File) TagKeySeriesIterator(name, key []byte) tsi1.SeriesIterator {
return f.TagKeySeriesIteratorf(name, key)
}
func (f *File) TagValueSeriesIterator(name, key, value []byte) tsi1.SeriesIterator {
return f.TagValueSeriesIteratorf(name, key, value)
}
func (f *File) MergeSeriesSketches(s, t estimator.Sketch) error { return f.MergeSeriesSketchesf(s, t) }
func (f *File) MergeMeasurementsSketches(s, t estimator.Sketch) error {
return f.MergeMeasurementsSketchesf(s, t)
}
func (f *File) Retain() { f.Retainf() }
func (f *File) Release() { f.Releasef() }
func (f *File) Filter() *bloom.Filter { return f.Filterf() }

View File

@@ -0,0 +1,238 @@
/*
Package tsi1 provides a memory-mapped index implementation that supports
high cardinality series.
Overview
The top-level object in tsi1 is the Index. It is the primary access point from
the rest of the system. The Index is composed of LogFile and IndexFile objects.
Log files are small write-ahead log files that record new series immediately
in the order that they are received. The data within the file is indexed
in-memory so it can be quickly accessed. When the system is restarted, this log
file is replayed and the in-memory representation is rebuilt.
Index files also contain series information, however, they are highly indexed
so that reads can be performed quickly. Index files are built through a process
called compaction where a log file or multiple index files are merged together.
Operations
The index can perform many tasks related to series, measurement, & tag data.
All data is inserted by adding a series to the index. When adding a series,
the measurement, tag keys, and tag values are all extracted and indexed
separately.
Once a series has been added, it can be removed in several ways. First, the
individual series can be removed. Second, it can be removed as part of a bulk
operation by deleting the entire measurement.
The query engine needs to be able to look up series in a variety of ways such
as by measurement name, by tag value, or by using regular expressions. The
index provides an API to iterate over subsets of series and perform set
operations such as unions and intersections.
Log File Layout
The write-ahead file that series initially are inserted into simply appends
all new operations sequentially. It is simply composed of a series of log
entries. An entry contains a flag to specify the operation type, the measurement
name, the tag set, and a checksum.
┏━━━━━━━━━LogEntry━━━━━━━━━┓
┃ ┌──────────────────────┐ ┃
┃ │ Flag │ ┃
┃ ├──────────────────────┤ ┃
┃ │ Measurement │ ┃
┃ ├──────────────────────┤ ┃
┃ │ Key/Value │ ┃
┃ ├──────────────────────┤ ┃
┃ │ Key/Value │ ┃
┃ ├──────────────────────┤ ┃
┃ │ Key/Value │ ┃
┃ ├──────────────────────┤ ┃
┃ │ Checksum │ ┃
┃ └──────────────────────┘ ┃
┗━━━━━━━━━━━━━━━━━━━━━━━━━━┛
When the log file is replayed, if the checksum is incorrect or the entry is
incomplete (because of a partially failed write) then the log is truncated.
Index File Layout
The index file is composed of 3 main block types: one series block, one or more
tag blocks, and one measurement block. At the end of the index file is a
trailer that records metadata such as the offsets to these blocks.
Series Block Layout
The series block stores raw series keys in sorted order. It also provides hash
indexes so that series can be looked up quickly. Hash indexes are inserted
periodically so that memory size is limited at write time. Once all the series
and hash indexes have been written then a list of index entries are written
so that hash indexes can be looked up via binary search.
The end of the block contains two HyperLogLog++ sketches which track the
estimated number of created series and deleted series. After the sketches is
a trailer which contains metadata about the block.
┏━━━━━━━SeriesBlock━━━━━━━━┓
┃ ┌──────────────────────┐ ┃
┃ │ Series Key │ ┃
┃ ├──────────────────────┤ ┃
┃ │ Series Key │ ┃
┃ ├──────────────────────┤ ┃
┃ │ Series Key │ ┃
┃ ├──────────────────────┤ ┃
┃ │ │ ┃
┃ │ Hash Index │ ┃
┃ │ │ ┃
┃ ├──────────────────────┤ ┃
┃ │ Series Key │ ┃
┃ ├──────────────────────┤ ┃
┃ │ Series Key │ ┃
┃ ├──────────────────────┤ ┃
┃ │ Series Key │ ┃
┃ ├──────────────────────┤ ┃
┃ │ │ ┃
┃ │ Hash Index │ ┃
┃ │ │ ┃
┃ ├──────────────────────┤ ┃
┃ │ Index Entries │ ┃
┃ ├──────────────────────┤ ┃
┃ │ HLL Sketches │ ┃
┃ ├──────────────────────┤ ┃
┃ │ Trailer │ ┃
┃ └──────────────────────┘ ┃
┗━━━━━━━━━━━━━━━━━━━━━━━━━━┛
Tag Block Layout
After the series block is one or more tag blocks. One of these blocks exists
for every measurement in the index file. The block is structured as a sorted
list of values for each key and then a sorted list of keys. Each of these lists
has their own hash index for fast direct lookups.
┏━━━━━━━━Tag Block━━━━━━━━━┓
┃ ┌──────────────────────┐ ┃
┃ │ Value │ ┃
┃ ├──────────────────────┤ ┃
┃ │ Value │ ┃
┃ ├──────────────────────┤ ┃
┃ │ Value │ ┃
┃ ├──────────────────────┤ ┃
┃ │ │ ┃
┃ │ Hash Index │ ┃
┃ │ │ ┃
┃ └──────────────────────┘ ┃
┃ ┌──────────────────────┐ ┃
┃ │ Value │ ┃
┃ ├──────────────────────┤ ┃
┃ │ Value │ ┃
┃ ├──────────────────────┤ ┃
┃ │ │ ┃
┃ │ Hash Index │ ┃
┃ │ │ ┃
┃ └──────────────────────┘ ┃
┃ ┌──────────────────────┐ ┃
┃ │ Key │ ┃
┃ ├──────────────────────┤ ┃
┃ │ Key │ ┃
┃ ├──────────────────────┤ ┃
┃ │ │ ┃
┃ │ Hash Index │ ┃
┃ │ │ ┃
┃ └──────────────────────┘ ┃
┃ ┌──────────────────────┐ ┃
┃ │ Trailer │ ┃
┃ └──────────────────────┘ ┃
┗━━━━━━━━━━━━━━━━━━━━━━━━━━┛
Each entry for values contains a sorted list of offsets for series keys that use
that value. Series iterators can be built around a single tag key value or
multiple iterators can be merged with set operators such as union or
intersection.
Measurement block
The measurement block stores a sorted list of measurements, their associated
series offsets, and the offset to their tag block. This allows all series for
a measurement to be traversed quickly and it allows fast direct lookups of
measurements and their tags.
This block also contains HyperLogLog++ sketches for new and deleted
measurements.
┏━━━━Measurement Block━━━━━┓
┃ ┌──────────────────────┐ ┃
┃ │ Measurement │ ┃
┃ ├──────────────────────┤ ┃
┃ │ Measurement │ ┃
┃ ├──────────────────────┤ ┃
┃ │ Measurement │ ┃
┃ ├──────────────────────┤ ┃
┃ │ │ ┃
┃ │ Hash Index │ ┃
┃ │ │ ┃
┃ ├──────────────────────┤ ┃
┃ │ HLL Sketches │ ┃
┃ ├──────────────────────┤ ┃
┃ │ Trailer │ ┃
┃ └──────────────────────┘ ┃
┗━━━━━━━━━━━━━━━━━━━━━━━━━━┛
Manifest file
The index is simply an ordered set of log and index files. These files can be
merged together or rewritten but their order must always be the same. This is
because series, measurements, & tags can be marked as deleted (aka tombstoned)
and this action needs to be tracked in time order.
Whenever the set of active files is changed, a manifest file is written to
track the set. The manifest specifies the ordering of files and, on startup,
all files not in the manifest are removed from the index directory.
Compacting index files
Compaction is the process of taking files and merging them together into a
single file. There are two stages of compaction within TSI.
First, once log files exceed a size threshold then they are compacted into an
index file. This threshold is relatively small because log files must maintain
their index in the heap which TSI tries to avoid. Small log files are also very
quick to convert into an index file so this is done aggressively.
Second, once a contiguous set of index files exceed a factor (e.g. 10x) then
they are all merged together into a single index file and the old files are
discarded. Because all blocks are written in sorted order, the new index file
can be streamed and minimize memory use.
Concurrency
Index files are immutable so they do not require fine grained locks, however,
compactions require that we track which files are in use so they are not
discarded too soon. This is done by using reference counting with file sets.
A file set is simply an ordered list of index files. When the current file set
is obtained from the index, a counter is incremented to track its usage. Once
the user is done with the file set, it is released and the counter is
decremented. A file cannot be removed from the file system until this counter
returns to zero.
Besides the reference counting, there are no other locking mechanisms when
reading or writing index files. Log files, however, do require a lock whenever
they are accessed. This is another reason to minimize log file size.
*/
package tsi1

View File

@@ -0,0 +1,998 @@
package tsi1
import (
"bytes"
"errors"
"fmt"
"regexp"
"github.com/influxdata/influxdb/influxql"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/pkg/bloom"
"github.com/influxdata/influxdb/pkg/bytesutil"
"github.com/influxdata/influxdb/pkg/estimator"
"github.com/influxdata/influxdb/pkg/estimator/hll"
"github.com/influxdata/influxdb/tsdb"
)
// FileSet represents a collection of files.
type FileSet struct {
levels []CompactionLevel
files []File
filters []*bloom.Filter // per-level filters
}
// NewFileSet returns a new instance of FileSet.
func NewFileSet(levels []CompactionLevel, files []File) (*FileSet, error) {
fs := &FileSet{levels: levels, files: files}
if err := fs.buildFilters(); err != nil {
return nil, err
}
return fs, nil
}
// Close closes all the files in the file set.
func (p FileSet) Close() error {
var err error
for _, f := range p.files {
if e := f.Close(); e != nil && err == nil {
err = e
}
}
return err
}
// Retain adds a reference count to all files.
func (fs *FileSet) Retain() {
for _, f := range fs.files {
f.Retain()
}
}
// Release removes a reference count from all files.
func (fs *FileSet) Release() {
for _, f := range fs.files {
f.Release()
}
}
// Prepend returns a new file set with f added at the beginning.
func (fs *FileSet) Prepend(f File) (*FileSet, error) {
return NewFileSet(fs.levels, append([]File{f}, fs.files...))
}
// MustReplace swaps a list of files for a single file and returns a new file set.
// The caller should always guarentee that the files exist and are contiguous.
func (fs *FileSet) MustReplace(oldFiles []File, newFile File) *FileSet {
assert(len(oldFiles) > 0, "cannot replace empty files")
// Find index of first old file.
var i int
for ; i < len(fs.files); i++ {
if fs.files[i] == oldFiles[0] {
break
} else if i == len(fs.files)-1 {
panic("first replacement file not found")
}
}
// Ensure all old files are contiguous.
for j := range oldFiles {
if fs.files[i+j] != oldFiles[j] {
panic(fmt.Sprintf("cannot replace non-contiguous files: subset=%+v, fileset=%+v", Files(oldFiles).IDs(), Files(fs.files).IDs()))
}
}
// Copy to new fileset.
other := make([]File, len(fs.files)-len(oldFiles)+1)
copy(other[:i], fs.files[:i])
other[i] = newFile
copy(other[i+1:], fs.files[i+len(oldFiles):])
fs, err := NewFileSet(fs.levels, other)
if err != nil {
panic("cannot build file set: " + err.Error())
}
return fs
}
// MaxID returns the highest file identifier.
func (fs *FileSet) MaxID() int {
var max int
for _, f := range fs.files {
if i := f.ID(); i > max {
max = i
}
}
return max
}
// Files returns all files in the set.
func (fs *FileSet) Files() []File {
return fs.files
}
// LogFiles returns all log files from the file set.
func (fs *FileSet) LogFiles() []*LogFile {
var a []*LogFile
for _, f := range fs.files {
if f, ok := f.(*LogFile); ok {
a = append(a, f)
}
}
return a
}
// IndexFiles returns all index files from the file set.
func (fs *FileSet) IndexFiles() []*IndexFile {
var a []*IndexFile
for _, f := range fs.files {
if f, ok := f.(*IndexFile); ok {
a = append(a, f)
}
}
return a
}
// LastContiguousIndexFilesByLevel returns the last contiguous files by level.
// These can be used by the compaction scheduler.
func (fs *FileSet) LastContiguousIndexFilesByLevel(level int) []*IndexFile {
if level == 0 {
return nil
}
var a []*IndexFile
for i := len(fs.files) - 1; i >= 0; i-- {
f := fs.files[i]
// Ignore files above level, stop on files below level.
if level < f.Level() {
continue
} else if level > f.Level() {
break
}
a = append([]*IndexFile{f.(*IndexFile)}, a...)
}
return a
}
// SeriesIterator returns an iterator over all series in the index.
func (fs *FileSet) SeriesIterator() SeriesIterator {
a := make([]SeriesIterator, 0, len(fs.files))
for _, f := range fs.files {
itr := f.SeriesIterator()
if itr == nil {
continue
}
a = append(a, itr)
}
return FilterUndeletedSeriesIterator(MergeSeriesIterators(a...))
}
// Measurement returns a measurement by name.
func (fs *FileSet) Measurement(name []byte) MeasurementElem {
for _, f := range fs.files {
if e := f.Measurement(name); e == nil {
continue
} else if e.Deleted() {
return nil
} else {
return e
}
}
return nil
}
// MeasurementIterator returns an iterator over all measurements in the index.
func (fs *FileSet) MeasurementIterator() MeasurementIterator {
a := make([]MeasurementIterator, 0, len(fs.files))
for _, f := range fs.files {
itr := f.MeasurementIterator()
if itr != nil {
a = append(a, itr)
}
}
return FilterUndeletedMeasurementIterator(MergeMeasurementIterators(a...))
}
// MeasurementSeriesIterator returns an iterator over all non-tombstoned series
// in the index for the provided measurement.
func (fs *FileSet) MeasurementSeriesIterator(name []byte) SeriesIterator {
a := make([]SeriesIterator, 0, len(fs.files))
for _, f := range fs.files {
itr := f.MeasurementSeriesIterator(name)
if itr != nil {
a = append(a, itr)
}
}
return FilterUndeletedSeriesIterator(MergeSeriesIterators(a...))
}
// TagKeyIterator returns an iterator over all tag keys for a measurement.
func (fs *FileSet) TagKeyIterator(name []byte) TagKeyIterator {
a := make([]TagKeyIterator, 0, len(fs.files))
for _, f := range fs.files {
itr := f.TagKeyIterator(name)
if itr != nil {
a = append(a, itr)
}
}
return MergeTagKeyIterators(a...)
}
// MeasurementTagKeysByExpr extracts the tag keys wanted by the expression.
func (fs *FileSet) MeasurementTagKeysByExpr(name []byte, expr influxql.Expr) (map[string]struct{}, error) {
switch e := expr.(type) {
case *influxql.BinaryExpr:
switch e.Op {
case influxql.EQ, influxql.NEQ, influxql.EQREGEX, influxql.NEQREGEX:
tag, ok := e.LHS.(*influxql.VarRef)
if !ok {
return nil, fmt.Errorf("left side of '%s' must be a tag key", e.Op.String())
} else if tag.Val != "_tagKey" {
return nil, nil
}
if influxql.IsRegexOp(e.Op) {
re, ok := e.RHS.(*influxql.RegexLiteral)
if !ok {
return nil, fmt.Errorf("right side of '%s' must be a regular expression", e.Op.String())
}
return fs.tagKeysByFilter(name, e.Op, nil, re.Val), nil
}
s, ok := e.RHS.(*influxql.StringLiteral)
if !ok {
return nil, fmt.Errorf("right side of '%s' must be a tag value string", e.Op.String())
}
return fs.tagKeysByFilter(name, e.Op, []byte(s.Val), nil), nil
case influxql.AND, influxql.OR:
lhs, err := fs.MeasurementTagKeysByExpr(name, e.LHS)
if err != nil {
return nil, err
}
rhs, err := fs.MeasurementTagKeysByExpr(name, e.RHS)
if err != nil {
return nil, err
}
if lhs != nil && rhs != nil {
if e.Op == influxql.OR {
return unionStringSets(lhs, rhs), nil
}
return intersectStringSets(lhs, rhs), nil
} else if lhs != nil {
return lhs, nil
} else if rhs != nil {
return rhs, nil
}
return nil, nil
default:
return nil, fmt.Errorf("invalid operator")
}
case *influxql.ParenExpr:
return fs.MeasurementTagKeysByExpr(name, e.Expr)
}
return nil, fmt.Errorf("%#v", expr)
}
// tagValuesByKeyAndExpr retrieves tag values for the provided tag keys.
//
// tagValuesByKeyAndExpr returns sets of values for each key, indexable by the
// position of the tag key in the keys argument.
//
// N.B tagValuesByKeyAndExpr relies on keys being sorted in ascending
// lexicographic order.
func (fs *FileSet) tagValuesByKeyAndExpr(name []byte, keys []string, expr influxql.Expr, fieldset *tsdb.MeasurementFieldSet) ([]map[string]struct{}, error) {
itr, err := fs.seriesByExprIterator(name, expr, fieldset.Fields(string(name)))
if err != nil {
return nil, err
} else if itr == nil {
return nil, nil
}
keyIdxs := make(map[string]int, len(keys))
for ki, key := range keys {
keyIdxs[key] = ki
// Check that keys are in order.
if ki > 0 && key < keys[ki-1] {
return nil, fmt.Errorf("keys %v are not in ascending order", keys)
}
}
resultSet := make([]map[string]struct{}, len(keys))
for i := 0; i < len(resultSet); i++ {
resultSet[i] = make(map[string]struct{})
}
// Iterate all series to collect tag values.
for e := itr.Next(); e != nil; e = itr.Next() {
for _, t := range e.Tags() {
if idx, ok := keyIdxs[string(t.Key)]; ok {
resultSet[idx][string(t.Value)] = struct{}{}
} else if string(t.Key) > keys[len(keys)-1] {
// The tag key is > the largest key we're interested in.
break
}
}
}
return resultSet, nil
}
// tagKeysByFilter will filter the tag keys for the measurement.
func (fs *FileSet) tagKeysByFilter(name []byte, op influxql.Token, val []byte, regex *regexp.Regexp) map[string]struct{} {
ss := make(map[string]struct{})
itr := fs.TagKeyIterator(name)
for e := itr.Next(); e != nil; e = itr.Next() {
var matched bool
switch op {
case influxql.EQ:
matched = bytes.Equal(e.Key(), val)
case influxql.NEQ:
matched = !bytes.Equal(e.Key(), val)
case influxql.EQREGEX:
matched = regex.Match(e.Key())
case influxql.NEQREGEX:
matched = !regex.Match(e.Key())
}
if !matched {
continue
}
ss[string(e.Key())] = struct{}{}
}
return ss
}
// TagKeySeriesIterator returns a series iterator for all values across a single key.
func (fs *FileSet) TagKeySeriesIterator(name, key []byte) SeriesIterator {
a := make([]SeriesIterator, 0, len(fs.files))
for _, f := range fs.files {
itr := f.TagKeySeriesIterator(name, key)
if itr != nil {
a = append(a, itr)
}
}
return FilterUndeletedSeriesIterator(MergeSeriesIterators(a...))
}
// HasTagKey returns true if the tag key exists.
func (fs *FileSet) HasTagKey(name, key []byte) bool {
for _, f := range fs.files {
if e := f.TagKey(name, key); e != nil {
return !e.Deleted()
}
}
return false
}
// HasTagValue returns true if the tag value exists.
func (fs *FileSet) HasTagValue(name, key, value []byte) bool {
for _, f := range fs.files {
if e := f.TagValue(name, key, value); e != nil {
return !e.Deleted()
}
}
return false
}
// TagValueIterator returns a value iterator for a tag key.
func (fs *FileSet) TagValueIterator(name, key []byte) TagValueIterator {
a := make([]TagValueIterator, 0, len(fs.files))
for _, f := range fs.files {
itr := f.TagValueIterator(name, key)
if itr != nil {
a = append(a, itr)
}
}
return MergeTagValueIterators(a...)
}
// TagValueSeriesIterator returns a series iterator for a single tag value.
func (fs *FileSet) TagValueSeriesIterator(name, key, value []byte) SeriesIterator {
a := make([]SeriesIterator, 0, len(fs.files))
for _, f := range fs.files {
itr := f.TagValueSeriesIterator(name, key, value)
if itr != nil {
a = append(a, itr)
}
}
return FilterUndeletedSeriesIterator(MergeSeriesIterators(a...))
}
// MatchTagValueSeriesIterator returns a series iterator for tags which match value.
// If matches is false, returns iterators which do not match value.
func (fs *FileSet) MatchTagValueSeriesIterator(name, key []byte, value *regexp.Regexp, matches bool) SeriesIterator {
matchEmpty := value.MatchString("")
if matches {
if matchEmpty {
return FilterUndeletedSeriesIterator(fs.matchTagValueEqualEmptySeriesIterator(name, key, value))
}
return FilterUndeletedSeriesIterator(fs.matchTagValueEqualNotEmptySeriesIterator(name, key, value))
}
if matchEmpty {
return FilterUndeletedSeriesIterator(fs.matchTagValueNotEqualEmptySeriesIterator(name, key, value))
}
return FilterUndeletedSeriesIterator(fs.matchTagValueNotEqualNotEmptySeriesIterator(name, key, value))
}
func (fs *FileSet) matchTagValueEqualEmptySeriesIterator(name, key []byte, value *regexp.Regexp) SeriesIterator {
vitr := fs.TagValueIterator(name, key)
if vitr == nil {
return fs.MeasurementSeriesIterator(name)
}
var itrs []SeriesIterator
for e := vitr.Next(); e != nil; e = vitr.Next() {
if !value.Match(e.Value()) {
itrs = append(itrs, fs.TagValueSeriesIterator(name, key, e.Value()))
}
}
return DifferenceSeriesIterators(
fs.MeasurementSeriesIterator(name),
MergeSeriesIterators(itrs...),
)
}
func (fs *FileSet) matchTagValueEqualNotEmptySeriesIterator(name, key []byte, value *regexp.Regexp) SeriesIterator {
vitr := fs.TagValueIterator(name, key)
if vitr == nil {
return nil
}
var itrs []SeriesIterator
for e := vitr.Next(); e != nil; e = vitr.Next() {
if value.Match(e.Value()) {
itrs = append(itrs, fs.TagValueSeriesIterator(name, key, e.Value()))
}
}
return MergeSeriesIterators(itrs...)
}
func (fs *FileSet) matchTagValueNotEqualEmptySeriesIterator(name, key []byte, value *regexp.Regexp) SeriesIterator {
vitr := fs.TagValueIterator(name, key)
if vitr == nil {
return nil
}
var itrs []SeriesIterator
for e := vitr.Next(); e != nil; e = vitr.Next() {
if !value.Match(e.Value()) {
itrs = append(itrs, fs.TagValueSeriesIterator(name, key, e.Value()))
}
}
return MergeSeriesIterators(itrs...)
}
func (fs *FileSet) matchTagValueNotEqualNotEmptySeriesIterator(name, key []byte, value *regexp.Regexp) SeriesIterator {
vitr := fs.TagValueIterator(name, key)
if vitr == nil {
return fs.MeasurementSeriesIterator(name)
}
var itrs []SeriesIterator
for e := vitr.Next(); e != nil; e = vitr.Next() {
if value.Match(e.Value()) {
itrs = append(itrs, fs.TagValueSeriesIterator(name, key, e.Value()))
}
}
return DifferenceSeriesIterators(
fs.MeasurementSeriesIterator(name),
MergeSeriesIterators(itrs...),
)
}
func (fs *FileSet) MeasurementNamesByExpr(expr influxql.Expr) ([][]byte, error) {
// Return filtered list if expression exists.
if expr != nil {
return fs.measurementNamesByExpr(expr)
}
// Iterate over all measurements if no condition exists.
var names [][]byte
itr := fs.MeasurementIterator()
for e := itr.Next(); e != nil; e = itr.Next() {
names = append(names, e.Name())
}
return names, nil
}
func (fs *FileSet) measurementNamesByExpr(expr influxql.Expr) ([][]byte, error) {
if expr == nil {
return nil, nil
}
switch e := expr.(type) {
case *influxql.BinaryExpr:
switch e.Op {
case influxql.EQ, influxql.NEQ, influxql.EQREGEX, influxql.NEQREGEX:
tag, ok := e.LHS.(*influxql.VarRef)
if !ok {
return nil, fmt.Errorf("left side of '%s' must be a tag key", e.Op.String())
}
// Retrieve value or regex expression from RHS.
var value string
var regex *regexp.Regexp
if influxql.IsRegexOp(e.Op) {
re, ok := e.RHS.(*influxql.RegexLiteral)
if !ok {
return nil, fmt.Errorf("right side of '%s' must be a regular expression", e.Op.String())
}
regex = re.Val
} else {
s, ok := e.RHS.(*influxql.StringLiteral)
if !ok {
return nil, fmt.Errorf("right side of '%s' must be a tag value string", e.Op.String())
}
value = s.Val
}
// Match on name, if specified.
if tag.Val == "_name" {
return fs.measurementNamesByNameFilter(e.Op, value, regex), nil
} else if influxql.IsSystemName(tag.Val) {
return nil, nil
}
return fs.measurementNamesByTagFilter(e.Op, tag.Val, value, regex), nil
case influxql.OR, influxql.AND:
lhs, err := fs.measurementNamesByExpr(e.LHS)
if err != nil {
return nil, err
}
rhs, err := fs.measurementNamesByExpr(e.RHS)
if err != nil {
return nil, err
}
if e.Op == influxql.OR {
return bytesutil.Union(lhs, rhs), nil
}
return bytesutil.Intersect(lhs, rhs), nil
default:
return nil, fmt.Errorf("invalid tag comparison operator")
}
case *influxql.ParenExpr:
return fs.measurementNamesByExpr(e.Expr)
default:
return nil, fmt.Errorf("%#v", expr)
}
}
// measurementNamesByNameFilter returns matching measurement names in sorted order.
func (fs *FileSet) measurementNamesByNameFilter(op influxql.Token, val string, regex *regexp.Regexp) [][]byte {
var names [][]byte
itr := fs.MeasurementIterator()
for e := itr.Next(); e != nil; e = itr.Next() {
var matched bool
switch op {
case influxql.EQ:
matched = string(e.Name()) == val
case influxql.NEQ:
matched = string(e.Name()) != val
case influxql.EQREGEX:
matched = regex.Match(e.Name())
case influxql.NEQREGEX:
matched = !regex.Match(e.Name())
}
if matched {
names = append(names, e.Name())
}
}
bytesutil.Sort(names)
return names
}
func (fs *FileSet) measurementNamesByTagFilter(op influxql.Token, key, val string, regex *regexp.Regexp) [][]byte {
var names [][]byte
mitr := fs.MeasurementIterator()
for me := mitr.Next(); me != nil; me = mitr.Next() {
// If the operator is non-regex, only check the specified value.
var tagMatch bool
if op == influxql.EQ || op == influxql.NEQ {
if fs.HasTagValue(me.Name(), []byte(key), []byte(val)) {
tagMatch = true
}
} else {
// Else, the operator is a regex and we have to check all tag
// values against the regular expression.
vitr := fs.TagValueIterator(me.Name(), []byte(key))
if vitr != nil {
for ve := vitr.Next(); ve != nil; ve = vitr.Next() {
if regex.Match(ve.Value()) {
tagMatch = true
break
}
}
}
}
//
// XNOR gate
//
// tags match | operation is EQ | measurement matches
// --------------------------------------------------
// True | True | True
// True | False | False
// False | True | False
// False | False | True
if tagMatch == (op == influxql.EQ || op == influxql.EQREGEX) {
names = append(names, me.Name())
continue
}
}
bytesutil.Sort(names)
return names
}
// HasSeries returns true if the series exists and is not tombstoned.
func (fs *FileSet) HasSeries(name []byte, tags models.Tags, buf []byte) bool {
for _, f := range fs.files {
if exists, tombstoned := f.HasSeries(name, tags, buf); exists {
return !tombstoned
}
}
return false
}
// FilterNamesTags filters out any series which already exist. It modifies the
// provided slices of names and tags.
func (fs *FileSet) FilterNamesTags(names [][]byte, tagsSlice []models.Tags) ([][]byte, []models.Tags) {
buf := make([]byte, 4096)
// Filter across all log files.
// Log files obtain a read lock and should be done in bulk for performance.
for _, f := range fs.LogFiles() {
names, tagsSlice = f.FilterNamesTags(names, tagsSlice)
}
// Filter across remaining index files.
indexFiles := fs.IndexFiles()
newNames, newTagsSlice := names[:0], tagsSlice[:0]
for i := range names {
name, tags := names[i], tagsSlice[i]
currentLevel, skipLevel := -1, false
var exists, tombstoned bool
for j := 0; j < len(indexFiles); j++ {
f := indexFiles[j]
// Check for existence on the level when it changes.
if level := f.Level(); currentLevel != level {
currentLevel, skipLevel = level, false
if filter := fs.filters[level]; filter != nil {
if !filter.Contains(AppendSeriesKey(buf[:0], name, tags)) {
skipLevel = true
}
}
}
// Skip file if in level where it doesn't exist.
if skipLevel {
continue
}
// Stop once we find the series in a file.
if exists, tombstoned = f.HasSeries(name, tags, buf); exists {
break
}
}
// If the series doesn't exist or it has been tombstoned then add it.
if !exists || tombstoned {
newNames = append(newNames, name)
newTagsSlice = append(newTagsSlice, tags)
}
}
return newNames, newTagsSlice
}
// SeriesSketches returns the merged series sketches for the FileSet.
func (fs *FileSet) SeriesSketches() (estimator.Sketch, estimator.Sketch, error) {
sketch, tsketch := hll.NewDefaultPlus(), hll.NewDefaultPlus()
// Iterate over all the files and merge the sketches into the result.
for _, f := range fs.files {
if err := f.MergeSeriesSketches(sketch, tsketch); err != nil {
return nil, nil, err
}
}
return sketch, tsketch, nil
}
// MeasurementsSketches returns the merged measurement sketches for the FileSet.
func (fs *FileSet) MeasurementsSketches() (estimator.Sketch, estimator.Sketch, error) {
sketch, tsketch := hll.NewDefaultPlus(), hll.NewDefaultPlus()
// Iterate over all the files and merge the sketches into the result.
for _, f := range fs.files {
if err := f.MergeMeasurementsSketches(sketch, tsketch); err != nil {
return nil, nil, err
}
}
return sketch, tsketch, nil
}
// MeasurementSeriesByExprIterator returns a series iterator for a measurement
// that is filtered by expr. If expr only contains time expressions then this
// call is equivalent to MeasurementSeriesIterator().
func (fs *FileSet) MeasurementSeriesByExprIterator(name []byte, expr influxql.Expr, fieldset *tsdb.MeasurementFieldSet) (SeriesIterator, error) {
// Return all series for the measurement if there are no tag expressions.
if expr == nil || influxql.OnlyTimeExpr(expr) {
return fs.MeasurementSeriesIterator(name), nil
}
return fs.seriesByExprIterator(name, expr, fieldset.CreateFieldsIfNotExists(name))
}
// MeasurementSeriesKeysByExpr returns a list of series keys matching expr.
func (fs *FileSet) MeasurementSeriesKeysByExpr(name []byte, expr influxql.Expr, fieldset *tsdb.MeasurementFieldSet) ([][]byte, error) {
// Create iterator for all matching series.
itr, err := fs.MeasurementSeriesByExprIterator(name, expr, fieldset)
if err != nil {
return nil, err
} else if itr == nil {
return nil, nil
}
// Iterate over all series and generate keys.
var keys [][]byte
for e := itr.Next(); e != nil; e = itr.Next() {
// Check for unsupported field filters.
// Any remaining filters means there were fields (e.g., `WHERE value = 1.2`).
if e.Expr() != nil {
return nil, errors.New("fields not supported in WHERE clause during deletion")
}
keys = append(keys, models.MakeKey(e.Name(), e.Tags()))
}
return keys, nil
}
func (fs *FileSet) seriesByExprIterator(name []byte, expr influxql.Expr, mf *tsdb.MeasurementFields) (SeriesIterator, error) {
switch expr := expr.(type) {
case *influxql.BinaryExpr:
switch expr.Op {
case influxql.AND, influxql.OR:
// Get the series IDs and filter expressions for the LHS.
litr, err := fs.seriesByExprIterator(name, expr.LHS, mf)
if err != nil {
return nil, err
}
// Get the series IDs and filter expressions for the RHS.
ritr, err := fs.seriesByExprIterator(name, expr.RHS, mf)
if err != nil {
return nil, err
}
// Intersect iterators if expression is "AND".
if expr.Op == influxql.AND {
return IntersectSeriesIterators(litr, ritr), nil
}
// Union iterators if expression is "OR".
return UnionSeriesIterators(litr, ritr), nil
default:
return fs.seriesByBinaryExprIterator(name, expr, mf)
}
case *influxql.ParenExpr:
return fs.seriesByExprIterator(name, expr.Expr, mf)
default:
return nil, nil
}
}
// seriesByBinaryExprIterator returns a series iterator and a filtering expression.
func (fs *FileSet) seriesByBinaryExprIterator(name []byte, n *influxql.BinaryExpr, mf *tsdb.MeasurementFields) (SeriesIterator, error) {
// If this binary expression has another binary expression, then this
// is some expression math and we should just pass it to the underlying query.
if _, ok := n.LHS.(*influxql.BinaryExpr); ok {
return newSeriesExprIterator(fs.MeasurementSeriesIterator(name), n), nil
} else if _, ok := n.RHS.(*influxql.BinaryExpr); ok {
return newSeriesExprIterator(fs.MeasurementSeriesIterator(name), n), nil
}
// Retrieve the variable reference from the correct side of the expression.
key, ok := n.LHS.(*influxql.VarRef)
value := n.RHS
if !ok {
key, ok = n.RHS.(*influxql.VarRef)
if !ok {
return nil, fmt.Errorf("invalid expression: %s", n.String())
}
value = n.LHS
}
// For time literals, return all series and "true" as the filter.
if _, ok := value.(*influxql.TimeLiteral); ok || key.Val == "time" {
return newSeriesExprIterator(fs.MeasurementSeriesIterator(name), &influxql.BooleanLiteral{Val: true}), nil
}
// For fields, return all series from this measurement.
if key.Val != "_name" && ((key.Type == influxql.Unknown && mf.HasField(key.Val)) || key.Type == influxql.AnyField || (key.Type != influxql.Tag && key.Type != influxql.Unknown)) {
return newSeriesExprIterator(fs.MeasurementSeriesIterator(name), n), nil
} else if value, ok := value.(*influxql.VarRef); ok {
// Check if the RHS is a variable and if it is a field.
if value.Val != "_name" && ((value.Type == influxql.Unknown && mf.HasField(value.Val)) || key.Type == influxql.AnyField || (value.Type != influxql.Tag && value.Type != influxql.Unknown)) {
return newSeriesExprIterator(fs.MeasurementSeriesIterator(name), n), nil
}
}
// Create iterator based on value type.
switch value := value.(type) {
case *influxql.StringLiteral:
return fs.seriesByBinaryExprStringIterator(name, []byte(key.Val), []byte(value.Val), n.Op)
case *influxql.RegexLiteral:
return fs.seriesByBinaryExprRegexIterator(name, []byte(key.Val), value.Val, n.Op)
case *influxql.VarRef:
return fs.seriesByBinaryExprVarRefIterator(name, []byte(key.Val), value, n.Op)
default:
if n.Op == influxql.NEQ || n.Op == influxql.NEQREGEX {
return fs.MeasurementSeriesIterator(name), nil
}
return nil, nil
}
}
func (fs *FileSet) seriesByBinaryExprStringIterator(name, key, value []byte, op influxql.Token) (SeriesIterator, error) {
// Special handling for "_name" to match measurement name.
if bytes.Equal(key, []byte("_name")) {
if (op == influxql.EQ && bytes.Equal(value, name)) || (op == influxql.NEQ && !bytes.Equal(value, name)) {
return fs.MeasurementSeriesIterator(name), nil
}
return nil, nil
}
if op == influxql.EQ {
// Match a specific value.
if len(value) != 0 {
return fs.TagValueSeriesIterator(name, key, value), nil
}
// Return all measurement series that have no values from this tag key.
return DifferenceSeriesIterators(
fs.MeasurementSeriesIterator(name),
fs.TagKeySeriesIterator(name, key),
), nil
}
// Return all measurement series without this tag value.
if len(value) != 0 {
return DifferenceSeriesIterators(
fs.MeasurementSeriesIterator(name),
fs.TagValueSeriesIterator(name, key, value),
), nil
}
// Return all series across all values of this tag key.
return fs.TagKeySeriesIterator(name, key), nil
}
func (fs *FileSet) seriesByBinaryExprRegexIterator(name, key []byte, value *regexp.Regexp, op influxql.Token) (SeriesIterator, error) {
// Special handling for "_name" to match measurement name.
if bytes.Equal(key, []byte("_name")) {
match := value.Match(name)
if (op == influxql.EQREGEX && match) || (op == influxql.NEQREGEX && !match) {
return newSeriesExprIterator(fs.MeasurementSeriesIterator(name), &influxql.BooleanLiteral{Val: true}), nil
}
return nil, nil
}
return fs.MatchTagValueSeriesIterator(name, key, value, op == influxql.EQREGEX), nil
}
func (fs *FileSet) seriesByBinaryExprVarRefIterator(name, key []byte, value *influxql.VarRef, op influxql.Token) (SeriesIterator, error) {
if op == influxql.EQ {
return IntersectSeriesIterators(
fs.TagKeySeriesIterator(name, key),
fs.TagKeySeriesIterator(name, []byte(value.Val)),
), nil
}
return DifferenceSeriesIterators(
fs.TagKeySeriesIterator(name, key),
fs.TagKeySeriesIterator(name, []byte(value.Val)),
), nil
}
// buildFilters builds a series existence filter for each compaction level.
func (fs *FileSet) buildFilters() error {
if len(fs.levels) == 0 {
fs.filters = nil
return nil
}
// Generate filters for each level.
fs.filters = make([]*bloom.Filter, len(fs.levels))
// Merge filters at each level.
for _, f := range fs.files {
level := f.Level()
// Skip if file has no bloom filter.
if f.Filter() == nil {
continue
}
// Initialize a filter if it doesn't exist.
if fs.filters[level] == nil {
lvl := fs.levels[level]
fs.filters[level] = bloom.NewFilter(lvl.M, lvl.K)
}
// Merge filter.
if err := fs.filters[level].Merge(f.Filter()); err != nil {
return err
}
}
return nil
}
// File represents a log or index file.
type File interface {
Close() error
Path() string
ID() int
Level() int
Measurement(name []byte) MeasurementElem
MeasurementIterator() MeasurementIterator
HasSeries(name []byte, tags models.Tags, buf []byte) (exists, tombstoned bool)
Series(name []byte, tags models.Tags) SeriesElem
SeriesN() uint64
TagKey(name, key []byte) TagKeyElem
TagKeyIterator(name []byte) TagKeyIterator
TagValue(name, key, value []byte) TagValueElem
TagValueIterator(name, key []byte) TagValueIterator
// Series iteration.
SeriesIterator() SeriesIterator
MeasurementSeriesIterator(name []byte) SeriesIterator
TagKeySeriesIterator(name, key []byte) SeriesIterator
TagValueSeriesIterator(name, key, value []byte) SeriesIterator
// Sketches for cardinality estimation
MergeSeriesSketches(s, t estimator.Sketch) error
MergeMeasurementsSketches(s, t estimator.Sketch) error
// Series existence bloom filter.
Filter() *bloom.Filter
// Reference counting.
Retain()
Release()
}
type Files []File
func (a Files) IDs() []int {
ids := make([]int, len(a))
for i := range a {
ids[i] = a[i].ID()
}
return ids
}

View File

@@ -0,0 +1,324 @@
package tsi1_test
import (
"fmt"
"testing"
"github.com/influxdata/influxdb/models"
)
// Ensure fileset can return an iterator over all series in the index.
func TestFileSet_SeriesIterator(t *testing.T) {
idx := MustOpenIndex()
defer idx.Close()
// Create initial set of series.
if err := idx.CreateSeriesSliceIfNotExists([]Series{
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"})},
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west"})},
{Name: []byte("mem"), Tags: models.NewTags(map[string]string{"region": "east"})},
}); err != nil {
t.Fatal(err)
}
// Verify initial set of series.
idx.Run(t, func(t *testing.T) {
fs := idx.RetainFileSet()
defer fs.Release()
itr := fs.SeriesIterator()
if itr == nil {
t.Fatal("expected iterator")
}
if e := itr.Next(); string(e.Name()) != `cpu` || e.Tags().String() != `[{region east}]` {
t.Fatalf("unexpected series: %s/%s", e.Name(), e.Tags().String())
} else if e := itr.Next(); string(e.Name()) != `cpu` || e.Tags().String() != `[{region west}]` {
t.Fatalf("unexpected series: %s/%s", e.Name(), e.Tags().String())
} else if e := itr.Next(); string(e.Name()) != `mem` || e.Tags().String() != `[{region east}]` {
t.Fatalf("unexpected series: %s/%s", e.Name(), e.Tags().String())
} else if e := itr.Next(); e != nil {
t.Fatalf("expected nil series: %s/%s", e.Name(), e.Tags().String())
}
})
// Add more series.
if err := idx.CreateSeriesSliceIfNotExists([]Series{
{Name: []byte("disk")},
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "north"})},
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"})},
}); err != nil {
t.Fatal(err)
}
// Verify additional series.
idx.Run(t, func(t *testing.T) {
fs := idx.RetainFileSet()
defer fs.Release()
itr := fs.SeriesIterator()
if itr == nil {
t.Fatal("expected iterator")
}
if e := itr.Next(); string(e.Name()) != `cpu` || e.Tags().String() != `[{region east}]` {
t.Fatalf("unexpected series: %s/%s", e.Name(), e.Tags().String())
} else if e := itr.Next(); string(e.Name()) != `cpu` || e.Tags().String() != `[{region north}]` {
t.Fatalf("unexpected series: %s/%s", e.Name(), e.Tags().String())
} else if e := itr.Next(); string(e.Name()) != `cpu` || e.Tags().String() != `[{region west}]` {
t.Fatalf("unexpected series: %s/%s", e.Name(), e.Tags().String())
} else if e := itr.Next(); string(e.Name()) != `disk` || len(e.Tags()) != 0 {
t.Fatalf("unexpected series: %s/%s", e.Name(), e.Tags().String())
} else if e := itr.Next(); string(e.Name()) != `mem` || e.Tags().String() != `[{region east}]` {
t.Fatalf("unexpected series: %s/%s", e.Name(), e.Tags().String())
} else if e := itr.Next(); e != nil {
t.Fatalf("expected nil series: %s/%s", e.Name(), e.Tags().String())
}
})
}
// Ensure fileset can return an iterator over all series for one measurement.
func TestFileSet_MeasurementSeriesIterator(t *testing.T) {
idx := MustOpenIndex()
defer idx.Close()
// Create initial set of series.
if err := idx.CreateSeriesSliceIfNotExists([]Series{
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"})},
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west"})},
{Name: []byte("mem"), Tags: models.NewTags(map[string]string{"region": "east"})},
}); err != nil {
t.Fatal(err)
}
// Verify initial set of series.
idx.Run(t, func(t *testing.T) {
fs := idx.RetainFileSet()
defer fs.Release()
itr := fs.MeasurementSeriesIterator([]byte("cpu"))
if itr == nil {
t.Fatal("expected iterator")
}
if e := itr.Next(); string(e.Name()) != `cpu` || e.Tags().String() != `[{region east}]` {
t.Fatalf("unexpected series: %s/%s", e.Name(), e.Tags().String())
} else if e := itr.Next(); string(e.Name()) != `cpu` || e.Tags().String() != `[{region west}]` {
t.Fatalf("unexpected series: %s/%s", e.Name(), e.Tags().String())
} else if e := itr.Next(); e != nil {
t.Fatalf("expected nil series: %s/%s", e.Name(), e.Tags().String())
}
})
// Add more series.
if err := idx.CreateSeriesSliceIfNotExists([]Series{
{Name: []byte("disk")},
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "north"})},
}); err != nil {
t.Fatal(err)
}
// Verify additional series.
idx.Run(t, func(t *testing.T) {
fs := idx.RetainFileSet()
defer fs.Release()
itr := fs.MeasurementSeriesIterator([]byte("cpu"))
if itr == nil {
t.Fatalf("expected iterator")
}
if e := itr.Next(); string(e.Name()) != `cpu` || e.Tags().String() != `[{region east}]` {
t.Fatalf("unexpected series: %s/%s", e.Name(), e.Tags().String())
} else if e := itr.Next(); string(e.Name()) != `cpu` || e.Tags().String() != `[{region north}]` {
t.Fatalf("unexpected series: %s/%s", e.Name(), e.Tags().String())
} else if e := itr.Next(); string(e.Name()) != `cpu` || e.Tags().String() != `[{region west}]` {
t.Fatalf("unexpected series: %s/%s", e.Name(), e.Tags().String())
} else if e := itr.Next(); e != nil {
t.Fatalf("expected nil series: %s/%s", e.Name(), e.Tags().String())
}
})
}
// Ensure fileset can return an iterator over all measurements for the index.
func TestFileSet_MeasurementIterator(t *testing.T) {
idx := MustOpenIndex()
defer idx.Close()
// Create initial set of series.
if err := idx.CreateSeriesSliceIfNotExists([]Series{
{Name: []byte("cpu")},
{Name: []byte("mem")},
}); err != nil {
t.Fatal(err)
}
// Verify initial set of series.
idx.Run(t, func(t *testing.T) {
fs := idx.RetainFileSet()
defer fs.Release()
itr := fs.MeasurementIterator()
if itr == nil {
t.Fatal("expected iterator")
}
if e := itr.Next(); string(e.Name()) != `cpu` {
t.Fatalf("unexpected measurement: %s", e.Name())
} else if e := itr.Next(); string(e.Name()) != `mem` {
t.Fatalf("unexpected measurement: %s", e.Name())
} else if e := itr.Next(); e != nil {
t.Fatalf("expected nil measurement: %s", e.Name())
}
})
// Add more series.
if err := idx.CreateSeriesSliceIfNotExists([]Series{
{Name: []byte("disk"), Tags: models.NewTags(map[string]string{"foo": "bar"})},
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "north", "x": "y"})},
}); err != nil {
t.Fatal(err)
}
// Verify additional series.
idx.Run(t, func(t *testing.T) {
fs := idx.RetainFileSet()
defer fs.Release()
itr := fs.MeasurementIterator()
if itr == nil {
t.Fatal("expected iterator")
}
if e := itr.Next(); string(e.Name()) != `cpu` {
t.Fatalf("unexpected measurement: %s", e.Name())
} else if e := itr.Next(); string(e.Name()) != `disk` {
t.Fatalf("unexpected measurement: %s", e.Name())
} else if e := itr.Next(); string(e.Name()) != `mem` {
t.Fatalf("unexpected measurement: %s", e.Name())
} else if e := itr.Next(); e != nil {
t.Fatalf("expected nil measurement: %s", e.Name())
}
})
}
// Ensure fileset can return an iterator over all keys for one measurement.
func TestFileSet_TagKeyIterator(t *testing.T) {
idx := MustOpenIndex()
defer idx.Close()
// Create initial set of series.
if err := idx.CreateSeriesSliceIfNotExists([]Series{
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"})},
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west", "type": "gpu"})},
{Name: []byte("mem"), Tags: models.NewTags(map[string]string{"region": "east", "misc": "other"})},
}); err != nil {
t.Fatal(err)
}
// Verify initial set of series.
idx.Run(t, func(t *testing.T) {
fs := idx.RetainFileSet()
defer fs.Release()
itr := fs.TagKeyIterator([]byte("cpu"))
if itr == nil {
t.Fatalf("expected iterator")
}
if e := itr.Next(); string(e.Key()) != `region` {
t.Fatalf("unexpected key: %s", e.Key())
} else if e := itr.Next(); string(e.Key()) != `type` {
t.Fatalf("unexpected key: %s", e.Key())
} else if e := itr.Next(); e != nil {
t.Fatalf("expected nil key: %s", e.Key())
}
})
// Add more series.
if err := idx.CreateSeriesSliceIfNotExists([]Series{
{Name: []byte("disk"), Tags: models.NewTags(map[string]string{"foo": "bar"})},
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "north", "x": "y"})},
}); err != nil {
t.Fatal(err)
}
// Verify additional series.
idx.Run(t, func(t *testing.T) {
fs := idx.RetainFileSet()
defer fs.Release()
itr := fs.TagKeyIterator([]byte("cpu"))
if itr == nil {
t.Fatal("expected iterator")
}
if e := itr.Next(); string(e.Key()) != `region` {
t.Fatalf("unexpected key: %s", e.Key())
} else if e := itr.Next(); string(e.Key()) != `type` {
t.Fatalf("unexpected key: %s", e.Key())
} else if e := itr.Next(); string(e.Key()) != `x` {
t.Fatalf("unexpected key: %s", e.Key())
} else if e := itr.Next(); e != nil {
t.Fatalf("expected nil key: %s", e.Key())
}
})
}
var (
byteSliceResult [][]byte
tagsSliceResult []models.Tags
)
func BenchmarkFileset_FilterNamesTags(b *testing.B) {
idx := MustOpenIndex()
defer idx.Close()
allNames := make([][]byte, 0, 2000*1000)
allTags := make([]models.Tags, 0, 2000*1000)
for i := 0; i < 2000; i++ {
for j := 0; j < 1000; j++ {
name := []byte(fmt.Sprintf("measurement-%d", i))
tags := models.NewTags(map[string]string{"host": fmt.Sprintf("server-%d", j)})
allNames = append(allNames, name)
allTags = append(allTags, tags)
}
}
if err := idx.CreateSeriesListIfNotExists(nil, allNames, allTags); err != nil {
b.Fatal(err)
}
// idx.CheckFastCompaction()
fs := idx.RetainFileSet()
defer fs.Release()
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
b.StopTimer()
names := [][]byte{
[]byte("foo"),
[]byte("measurement-222"), // filtered
[]byte("measurement-222"), // kept (tags won't match)
[]byte("measurements-1"),
[]byte("measurement-900"), // filtered
[]byte("measurement-44444"),
[]byte("bar"),
}
tags := []models.Tags{
nil,
models.NewTags(map[string]string{"host": "server-297"}), // filtered
models.NewTags(map[string]string{"host": "wrong"}),
nil,
models.NewTags(map[string]string{"host": "server-1026"}), // filtered
models.NewTags(map[string]string{"host": "server-23"}), // kept (measurement won't match)
models.NewTags(map[string]string{"host": "zoo"}),
}
b.StartTimer()
byteSliceResult, tagsSliceResult = fs.FilterNamesTags(names, tags)
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,439 @@
package tsi1
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"sync"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/pkg/bloom"
"github.com/influxdata/influxdb/pkg/estimator"
"github.com/influxdata/influxdb/pkg/mmap"
)
// IndexFileVersion is the current TSI1 index file version.
const IndexFileVersion = 1
// FileSignature represents a magic number at the header of the index file.
const FileSignature = "TSI1"
// IndexFile field size constants.
const (
// IndexFile trailer fields
IndexFileVersionSize = 2
SeriesBlockOffsetSize = 8
SeriesBlockSizeSize = 8
MeasurementBlockOffsetSize = 8
MeasurementBlockSizeSize = 8
IndexFileTrailerSize = IndexFileVersionSize +
SeriesBlockOffsetSize +
SeriesBlockSizeSize +
MeasurementBlockOffsetSize +
MeasurementBlockSizeSize
)
// IndexFile errors.
var (
ErrInvalidIndexFile = errors.New("invalid index file")
ErrUnsupportedIndexFileVersion = errors.New("unsupported index file version")
)
// IndexFile represents a collection of measurement, tag, and series data.
type IndexFile struct {
wg sync.WaitGroup // ref count
data []byte
// Components
sblk SeriesBlock
tblks map[string]*TagBlock // tag blocks by measurement name
mblk MeasurementBlock
// Sortable identifier & filepath to the log file.
level int
id int
// Counters
seriesN int64 // Number of unique series in this indexFile.
// Compaction tracking.
mu sync.RWMutex
compacting bool
// Path to data file.
path string
}
// NewIndexFile returns a new instance of IndexFile.
func NewIndexFile() *IndexFile {
return &IndexFile{}
}
// Open memory maps the data file at the file's path.
func (f *IndexFile) Open() error {
// Extract identifier from path name.
f.id, f.level = ParseFilename(f.Path())
data, err := mmap.Map(f.Path())
if err != nil {
return err
}
return f.UnmarshalBinary(data)
}
// Close unmaps the data file.
func (f *IndexFile) Close() error {
// Wait until all references are released.
f.wg.Wait()
f.sblk = SeriesBlock{}
f.tblks = nil
f.mblk = MeasurementBlock{}
f.seriesN = 0
return mmap.Unmap(f.data)
}
// ID returns the file sequence identifier.
func (f *IndexFile) ID() int { return f.id }
// Path returns the file path.
func (f *IndexFile) Path() string { return f.path }
// SetPath sets the file's path.
func (f *IndexFile) SetPath(path string) { f.path = path }
// Level returns the compaction level for the file.
func (f *IndexFile) Level() int { return f.level }
// Filter returns the series existence filter for the file.
func (f *IndexFile) Filter() *bloom.Filter { return f.sblk.filter }
// Retain adds a reference count to the file.
func (f *IndexFile) Retain() { f.wg.Add(1) }
// Release removes a reference count from the file.
func (f *IndexFile) Release() { f.wg.Done() }
// Size returns the size of the index file, in bytes.
func (f *IndexFile) Size() int64 { return int64(len(f.data)) }
// Compacting returns true if the file is being compacted.
func (f *IndexFile) Compacting() bool {
f.mu.RLock()
v := f.compacting
f.mu.RUnlock()
return v
}
// setCompacting sets whether the index file is being compacted.
func (f *IndexFile) setCompacting(v bool) {
f.mu.Lock()
f.compacting = v
f.mu.Unlock()
}
// UnmarshalBinary opens an index from data.
// The byte slice is retained so it must be kept open.
func (f *IndexFile) UnmarshalBinary(data []byte) error {
// Ensure magic number exists at the beginning.
if len(data) < len(FileSignature) {
return io.ErrShortBuffer
} else if !bytes.Equal(data[:len(FileSignature)], []byte(FileSignature)) {
return ErrInvalidIndexFile
}
// Read index file trailer.
t, err := ReadIndexFileTrailer(data)
if err != nil {
return err
}
// Slice measurement block data.
buf := data[t.MeasurementBlock.Offset:]
buf = buf[:t.MeasurementBlock.Size]
// Unmarshal measurement block.
if err := f.mblk.UnmarshalBinary(buf); err != nil {
return err
}
// Unmarshal each tag block.
f.tblks = make(map[string]*TagBlock)
itr := f.mblk.Iterator()
for m := itr.Next(); m != nil; m = itr.Next() {
e := m.(*MeasurementBlockElem)
// Slice measurement block data.
buf := data[e.tagBlock.offset:]
buf = buf[:e.tagBlock.size]
// Unmarshal measurement block.
var tblk TagBlock
if err := tblk.UnmarshalBinary(buf); err != nil {
return err
}
f.tblks[string(e.name)] = &tblk
}
// Slice series list data.
buf = data[t.SeriesBlock.Offset:]
buf = buf[:t.SeriesBlock.Size]
// Unmarshal series list.
if err := f.sblk.UnmarshalBinary(buf); err != nil {
return err
}
// Save reference to entire data block.
f.data = data
return nil
}
// Measurement returns a measurement element.
func (f *IndexFile) Measurement(name []byte) MeasurementElem {
e, ok := f.mblk.Elem(name)
if !ok {
return nil
}
return &e
}
// MeasurementN returns the number of measurements in the file.
func (f *IndexFile) MeasurementN() (n uint64) {
mitr := f.mblk.Iterator()
for me := mitr.Next(); me != nil; me = mitr.Next() {
n++
}
return n
}
// TagValueIterator returns a value iterator for a tag key and a flag
// indicating if a tombstone exists on the measurement or key.
func (f *IndexFile) TagValueIterator(name, key []byte) TagValueIterator {
tblk := f.tblks[string(name)]
if tblk == nil {
return nil
}
// Find key element.
ke := tblk.TagKeyElem(key)
if ke == nil {
return nil
}
// Merge all value series iterators together.
return ke.TagValueIterator()
}
// TagKeySeriesIterator returns a series iterator for a tag key and a flag
// indicating if a tombstone exists on the measurement or key.
func (f *IndexFile) TagKeySeriesIterator(name, key []byte) SeriesIterator {
tblk := f.tblks[string(name)]
if tblk == nil {
return nil
}
// Find key element.
ke := tblk.TagKeyElem(key)
if ke == nil {
return nil
}
// Merge all value series iterators together.
vitr := ke.TagValueIterator()
var itrs []SeriesIterator
for ve := vitr.Next(); ve != nil; ve = vitr.Next() {
sitr := &rawSeriesIDIterator{data: ve.(*TagBlockValueElem).series.data}
itrs = append(itrs, newSeriesDecodeIterator(&f.sblk, sitr))
}
return MergeSeriesIterators(itrs...)
}
// TagValueSeriesIterator returns a series iterator for a tag value and a flag
// indicating if a tombstone exists on the measurement, key, or value.
func (f *IndexFile) TagValueSeriesIterator(name, key, value []byte) SeriesIterator {
tblk := f.tblks[string(name)]
if tblk == nil {
return nil
}
// Find value element.
ve := tblk.TagValueElem(key, value)
if ve == nil {
return nil
}
// Create an iterator over value's series.
return newSeriesDecodeIterator(
&f.sblk,
&rawSeriesIDIterator{
n: ve.(*TagBlockValueElem).series.n,
data: ve.(*TagBlockValueElem).series.data,
},
)
}
// TagKey returns a tag key.
func (f *IndexFile) TagKey(name, key []byte) TagKeyElem {
tblk := f.tblks[string(name)]
if tblk == nil {
return nil
}
return tblk.TagKeyElem(key)
}
// TagValue returns a tag value.
func (f *IndexFile) TagValue(name, key, value []byte) TagValueElem {
tblk := f.tblks[string(name)]
if tblk == nil {
return nil
}
return tblk.TagValueElem(key, value)
}
// HasSeries returns flags indicating if the series exists and if it is tombstoned.
func (f *IndexFile) HasSeries(name []byte, tags models.Tags, buf []byte) (exists, tombstoned bool) {
return f.sblk.HasSeries(name, tags, buf)
}
// Series returns the series and a flag indicating if the series has been
// tombstoned by the measurement.
func (f *IndexFile) Series(name []byte, tags models.Tags) SeriesElem {
return f.sblk.Series(name, tags)
}
// TagValueElem returns an element for a measurement/tag/value.
func (f *IndexFile) TagValueElem(name, key, value []byte) TagValueElem {
tblk, ok := f.tblks[string(name)]
if !ok {
return nil
}
return tblk.TagValueElem(key, value)
}
// MeasurementIterator returns an iterator over all measurements.
func (f *IndexFile) MeasurementIterator() MeasurementIterator {
return f.mblk.Iterator()
}
// TagKeyIterator returns an iterator over all tag keys for a measurement.
func (f *IndexFile) TagKeyIterator(name []byte) TagKeyIterator {
blk := f.tblks[string(name)]
if blk == nil {
return nil
}
return blk.TagKeyIterator()
}
// MeasurementSeriesIterator returns an iterator over a measurement's series.
func (f *IndexFile) MeasurementSeriesIterator(name []byte) SeriesIterator {
return &seriesDecodeIterator{
itr: f.mblk.seriesIDIterator(name),
sblk: &f.sblk,
}
}
// MergeMeasurementsSketches merges the index file's series sketches into the provided
// sketches.
func (f *IndexFile) MergeMeasurementsSketches(s, t estimator.Sketch) error {
if err := s.Merge(f.mblk.sketch); err != nil {
return err
}
return t.Merge(f.mblk.tSketch)
}
// SeriesN returns the total number of non-tombstoned series for the index file.
func (f *IndexFile) SeriesN() uint64 {
return uint64(f.sblk.seriesN - f.sblk.tombstoneN)
}
// SeriesIterator returns an iterator over all series.
func (f *IndexFile) SeriesIterator() SeriesIterator {
return f.sblk.SeriesIterator()
}
// MergeSeriesSketches merges the index file's series sketches into the provided
// sketches.
func (f *IndexFile) MergeSeriesSketches(s, t estimator.Sketch) error {
if err := s.Merge(f.sblk.sketch); err != nil {
return err
}
return t.Merge(f.sblk.tsketch)
}
// ReadIndexFileTrailer returns the index file trailer from data.
func ReadIndexFileTrailer(data []byte) (IndexFileTrailer, error) {
var t IndexFileTrailer
// Read version.
t.Version = int(binary.BigEndian.Uint16(data[len(data)-IndexFileVersionSize:]))
if t.Version != IndexFileVersion {
return t, ErrUnsupportedIndexFileVersion
}
// Slice trailer data.
buf := data[len(data)-IndexFileTrailerSize:]
// Read series list info.
t.SeriesBlock.Offset = int64(binary.BigEndian.Uint64(buf[0:SeriesBlockOffsetSize]))
buf = buf[SeriesBlockOffsetSize:]
t.SeriesBlock.Size = int64(binary.BigEndian.Uint64(buf[0:SeriesBlockSizeSize]))
buf = buf[SeriesBlockSizeSize:]
// Read measurement block info.
t.MeasurementBlock.Offset = int64(binary.BigEndian.Uint64(buf[0:MeasurementBlockOffsetSize]))
buf = buf[MeasurementBlockOffsetSize:]
t.MeasurementBlock.Size = int64(binary.BigEndian.Uint64(buf[0:MeasurementBlockSizeSize]))
buf = buf[MeasurementBlockSizeSize:]
return t, nil
}
// IndexFileTrailer represents meta data written to the end of the index file.
type IndexFileTrailer struct {
Version int
SeriesBlock struct {
Offset int64
Size int64
}
MeasurementBlock struct {
Offset int64
Size int64
}
}
// WriteTo writes the trailer to w.
func (t *IndexFileTrailer) WriteTo(w io.Writer) (n int64, err error) {
// Write series list info.
if err := writeUint64To(w, uint64(t.SeriesBlock.Offset), &n); err != nil {
return n, err
} else if err := writeUint64To(w, uint64(t.SeriesBlock.Size), &n); err != nil {
return n, err
}
// Write measurement block info.
if err := writeUint64To(w, uint64(t.MeasurementBlock.Offset), &n); err != nil {
return n, err
} else if err := writeUint64To(w, uint64(t.MeasurementBlock.Size), &n); err != nil {
return n, err
}
// Write index file encoding version.
if err := writeUint16To(w, IndexFileVersion, &n); err != nil {
return n, err
}
return n, nil
}
// FormatIndexFileName generates an index filename for the given index.
func FormatIndexFileName(id, level int) string {
return fmt.Sprintf("L%d-%08d%s", level, id, IndexFileExt)
}

View File

@@ -0,0 +1,154 @@
package tsi1_test
import (
"bytes"
"testing"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/tsdb/index/tsi1"
)
// Ensure a simple index file can be built and opened.
func TestCreateIndexFile(t *testing.T) {
f, err := CreateIndexFile([]Series{
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"})},
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west"})},
{Name: []byte("mem"), Tags: models.NewTags(map[string]string{"region": "east"})},
})
if err != nil {
t.Fatal(err)
}
if e := f.TagValueElem([]byte("cpu"), []byte("region"), []byte("west")); e == nil {
t.Fatal("expected element")
} else if n := e.(*tsi1.TagBlockValueElem).SeriesN(); n != 1 {
t.Fatalf("unexpected series count: %d", n)
}
}
// Ensure index file generation can be successfully built.
func TestGenerateIndexFile(t *testing.T) {
// Build generated index file.
f, err := GenerateIndexFile(10, 3, 4)
if err != nil {
t.Fatal(err)
}
// Verify that tag/value series can be fetched.
if e := f.TagValueElem([]byte("measurement0"), []byte("key0"), []byte("value0")); e == nil {
t.Fatal("expected element")
} else if n := e.(*tsi1.TagBlockValueElem).SeriesN(); n == 0 {
t.Fatal("expected series")
}
}
func BenchmarkIndexFile_TagValueSeries(b *testing.B) {
b.Run("M=1,K=2,V=3", func(b *testing.B) {
benchmarkIndexFile_TagValueSeries(b, MustFindOrGenerateIndexFile(1, 2, 3))
})
b.Run("M=10,K=5,V=5", func(b *testing.B) {
benchmarkIndexFile_TagValueSeries(b, MustFindOrGenerateIndexFile(10, 5, 5))
})
b.Run("M=10,K=7,V=5", func(b *testing.B) {
benchmarkIndexFile_TagValueSeries(b, MustFindOrGenerateIndexFile(10, 7, 7))
})
}
func benchmarkIndexFile_TagValueSeries(b *testing.B, idx *tsi1.IndexFile) {
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
if e := idx.TagValueElem([]byte("measurement0"), []byte("key0"), []byte("value0")); e == nil {
b.Fatal("expected element")
} else if e.(*tsi1.TagBlockValueElem).SeriesN() == 0 {
b.Fatal("expected series")
}
}
}
// CreateIndexFile creates an index file with a given set of series.
func CreateIndexFile(series []Series) (*tsi1.IndexFile, error) {
lf, err := CreateLogFile(series)
if err != nil {
return nil, err
}
// Write index file to buffer.
var buf bytes.Buffer
if _, err := lf.CompactTo(&buf, M, K); err != nil {
return nil, err
}
// Load index file from buffer.
var f tsi1.IndexFile
if err := f.UnmarshalBinary(buf.Bytes()); err != nil {
return nil, err
}
return &f, nil
}
// GenerateIndexFile generates an index file from a set of series based on the count arguments.
// Total series returned will equal measurementN * tagN * valueN.
func GenerateIndexFile(measurementN, tagN, valueN int) (*tsi1.IndexFile, error) {
// Generate a new log file first.
lf, err := GenerateLogFile(measurementN, tagN, valueN)
if err != nil {
return nil, err
}
// Compact log file to buffer.
var buf bytes.Buffer
if _, err := lf.CompactTo(&buf, M, K); err != nil {
return nil, err
}
// Load index file from buffer.
var f tsi1.IndexFile
if err := f.UnmarshalBinary(buf.Bytes()); err != nil {
return nil, err
}
return &f, nil
}
func MustGenerateIndexFile(measurementN, tagN, valueN int) *tsi1.IndexFile {
f, err := GenerateIndexFile(measurementN, tagN, valueN)
if err != nil {
panic(err)
}
return f
}
var indexFileCache struct {
MeasurementN int
TagN int
ValueN int
IndexFile *tsi1.IndexFile
}
// MustFindOrGenerateIndexFile returns a cached index file or generates one if it doesn't exist.
func MustFindOrGenerateIndexFile(measurementN, tagN, valueN int) *tsi1.IndexFile {
// Use cache if fields match and the index file has been generated.
if indexFileCache.MeasurementN == measurementN &&
indexFileCache.TagN == tagN &&
indexFileCache.ValueN == valueN &&
indexFileCache.IndexFile != nil {
return indexFileCache.IndexFile
}
// Generate and cache.
indexFileCache.MeasurementN = measurementN
indexFileCache.TagN = tagN
indexFileCache.ValueN = valueN
indexFileCache.IndexFile = MustGenerateIndexFile(measurementN, tagN, valueN)
return indexFileCache.IndexFile
}
func pow(x, y int) int {
r := 1
for i := 0; i < y; i++ {
r *= x
}
return r
}

View File

@@ -0,0 +1,362 @@
package tsi1
import (
"bufio"
"fmt"
"io"
"os"
"sort"
"time"
"github.com/influxdata/influxdb/pkg/estimator/hll"
"github.com/influxdata/influxdb/pkg/mmap"
)
// IndexFiles represents a layered set of index files.
type IndexFiles []*IndexFile
// IDs returns the ids for all index files.
func (p IndexFiles) IDs() []int {
a := make([]int, len(p))
for i, f := range p {
a[i] = f.ID()
}
return a
}
// Retain adds a reference count to all files.
func (p IndexFiles) Retain() {
for _, f := range p {
f.Retain()
}
}
// Release removes a reference count from all files.
func (p IndexFiles) Release() {
for _, f := range p {
f.Release()
}
}
// Files returns p as a list of File objects.
func (p IndexFiles) Files() []File {
other := make([]File, len(p))
for i, f := range p {
other[i] = f
}
return other
}
// MeasurementNames returns a sorted list of all measurement names for all files.
func (p *IndexFiles) MeasurementNames() [][]byte {
itr := p.MeasurementIterator()
var names [][]byte
for e := itr.Next(); e != nil; e = itr.Next() {
names = append(names, copyBytes(e.Name()))
}
sort.Sort(byteSlices(names))
return names
}
// MeasurementIterator returns an iterator that merges measurements across all files.
func (p IndexFiles) MeasurementIterator() MeasurementIterator {
a := make([]MeasurementIterator, 0, len(p))
for i := range p {
itr := p[i].MeasurementIterator()
if itr == nil {
continue
}
a = append(a, itr)
}
return MergeMeasurementIterators(a...)
}
// TagKeyIterator returns an iterator that merges tag keys across all files.
func (p *IndexFiles) TagKeyIterator(name []byte) (TagKeyIterator, error) {
a := make([]TagKeyIterator, 0, len(*p))
for _, f := range *p {
itr := f.TagKeyIterator(name)
if itr == nil {
continue
}
a = append(a, itr)
}
return MergeTagKeyIterators(a...), nil
}
// SeriesIterator returns an iterator that merges series across all files.
func (p IndexFiles) SeriesIterator() SeriesIterator {
a := make([]SeriesIterator, 0, len(p))
for _, f := range p {
itr := f.SeriesIterator()
if itr == nil {
continue
}
a = append(a, itr)
}
return MergeSeriesIterators(a...)
}
// MeasurementSeriesIterator returns an iterator that merges series across all files.
func (p IndexFiles) MeasurementSeriesIterator(name []byte) SeriesIterator {
a := make([]SeriesIterator, 0, len(p))
for _, f := range p {
itr := f.MeasurementSeriesIterator(name)
if itr == nil {
continue
}
a = append(a, itr)
}
return MergeSeriesIterators(a...)
}
// TagValueSeriesIterator returns an iterator that merges series across all files.
func (p IndexFiles) TagValueSeriesIterator(name, key, value []byte) SeriesIterator {
a := make([]SeriesIterator, 0, len(p))
for i := range p {
itr := p[i].TagValueSeriesIterator(name, key, value)
if itr != nil {
a = append(a, itr)
}
}
return MergeSeriesIterators(a...)
}
// CompactTo merges all index files and writes them to w.
func (p IndexFiles) CompactTo(w io.Writer, m, k uint64) (n int64, err error) {
var t IndexFileTrailer
// Wrap writer in buffered I/O.
bw := bufio.NewWriter(w)
// Setup context object to track shared data for this compaction.
var info indexCompactInfo
info.tagSets = make(map[string]indexTagSetPos)
// Write magic number.
if err := writeTo(bw, []byte(FileSignature), &n); err != nil {
return n, err
}
// Write combined series list.
t.SeriesBlock.Offset = n
if err := p.writeSeriesBlockTo(bw, m, k, &info, &n); err != nil {
return n, err
}
t.SeriesBlock.Size = n - t.SeriesBlock.Offset
// Flush buffer before re-mapping.
if err := bw.Flush(); err != nil {
return n, err
}
// Open series block as memory-mapped data.
sblk, data, err := mapIndexFileSeriesBlock(w)
if data != nil {
defer mmap.Unmap(data)
}
if err != nil {
return n, err
}
info.sblk = sblk
// Write tagset blocks in measurement order.
if err := p.writeTagsetsTo(bw, &info, &n); err != nil {
return n, err
}
// Write measurement block.
t.MeasurementBlock.Offset = n
if err := p.writeMeasurementBlockTo(bw, &info, &n); err != nil {
return n, err
}
t.MeasurementBlock.Size = n - t.MeasurementBlock.Offset
// Write trailer.
nn, err := t.WriteTo(bw)
n += nn
if err != nil {
return n, err
}
// Flush file.
if err := bw.Flush(); err != nil {
return n, err
}
return n, nil
}
func (p IndexFiles) writeSeriesBlockTo(w io.Writer, m, k uint64, info *indexCompactInfo, n *int64) error {
// Estimate series cardinality.
sketch := hll.NewDefaultPlus()
for _, f := range p {
if err := f.MergeSeriesSketches(sketch, sketch); err != nil {
return err
}
}
itr := p.SeriesIterator()
enc := NewSeriesBlockEncoder(w, uint32(sketch.Count()), m, k)
// Write all series.
for e := itr.Next(); e != nil; e = itr.Next() {
if err := enc.Encode(e.Name(), e.Tags(), e.Deleted()); err != nil {
return err
}
}
// Close and flush block.
err := enc.Close()
*n += int64(enc.N())
if err != nil {
return err
}
return nil
}
func (p IndexFiles) writeTagsetsTo(w io.Writer, info *indexCompactInfo, n *int64) error {
mitr := p.MeasurementIterator()
for m := mitr.Next(); m != nil; m = mitr.Next() {
if err := p.writeTagsetTo(w, m.Name(), info, n); err != nil {
return err
}
}
return nil
}
// writeTagsetTo writes a single tagset to w and saves the tagset offset.
func (p IndexFiles) writeTagsetTo(w io.Writer, name []byte, info *indexCompactInfo, n *int64) error {
var seriesKey []byte
kitr, err := p.TagKeyIterator(name)
if err != nil {
return err
}
enc := NewTagBlockEncoder(w)
for ke := kitr.Next(); ke != nil; ke = kitr.Next() {
// Encode key.
if err := enc.EncodeKey(ke.Key(), ke.Deleted()); err != nil {
return err
}
// Iterate over tag values.
vitr := ke.TagValueIterator()
for ve := vitr.Next(); ve != nil; ve = vitr.Next() {
// Merge all series together.
sitr := p.TagValueSeriesIterator(name, ke.Key(), ve.Value())
var seriesIDs []uint32
for se := sitr.Next(); se != nil; se = sitr.Next() {
seriesID, _ := info.sblk.Offset(se.Name(), se.Tags(), seriesKey[:0])
if seriesID == 0 {
return fmt.Errorf("expected series id: %s/%s", se.Name(), se.Tags().String())
}
seriesIDs = append(seriesIDs, seriesID)
}
sort.Sort(uint32Slice(seriesIDs))
// Encode value.
if err := enc.EncodeValue(ve.Value(), ve.Deleted(), seriesIDs); err != nil {
return err
}
}
}
// Save tagset offset to measurement.
pos := info.tagSets[string(name)]
pos.offset = *n
// Flush data to writer.
err = enc.Close()
*n += enc.N()
if err != nil {
return err
}
// Save tagset size to measurement.
pos.size = *n - pos.offset
info.tagSets[string(name)] = pos
return nil
}
func (p IndexFiles) writeMeasurementBlockTo(w io.Writer, info *indexCompactInfo, n *int64) error {
var seriesKey []byte
mw := NewMeasurementBlockWriter()
// Add measurement data & compute sketches.
mitr := p.MeasurementIterator()
for m := mitr.Next(); m != nil; m = mitr.Next() {
name := m.Name()
// Look-up series ids.
itr := p.MeasurementSeriesIterator(name)
var seriesIDs []uint32
for e := itr.Next(); e != nil; e = itr.Next() {
seriesID, _ := info.sblk.Offset(e.Name(), e.Tags(), seriesKey[:0])
if seriesID == 0 {
panic(fmt.Sprintf("expected series id: %s %s", e.Name(), e.Tags().String()))
}
seriesIDs = append(seriesIDs, seriesID)
}
sort.Sort(uint32Slice(seriesIDs))
// Add measurement to writer.
pos := info.tagSets[string(name)]
mw.Add(name, m.Deleted(), pos.offset, pos.size, seriesIDs)
}
// Flush data to writer.
nn, err := mw.WriteTo(w)
*n += nn
return err
}
// Stat returns the max index file size and the total file size for all index files.
func (p IndexFiles) Stat() (*IndexFilesInfo, error) {
var info IndexFilesInfo
for _, f := range p {
fi, err := os.Stat(f.Path())
if os.IsNotExist(err) {
continue
} else if err != nil {
return nil, err
}
if fi.Size() > info.MaxSize {
info.MaxSize = fi.Size()
}
if fi.ModTime().After(info.ModTime) {
info.ModTime = fi.ModTime()
}
info.Size += fi.Size()
}
return &info, nil
}
type IndexFilesInfo struct {
MaxSize int64 // largest file size
Size int64 // total file size
ModTime time.Time // last modified
}
// indexCompactInfo is a context object used for tracking position information
// during the compaction of index files.
type indexCompactInfo struct {
// Memory-mapped series block.
// Available after the series block has been written.
sblk *SeriesBlock
// Tracks offset/size for each measurement's tagset.
tagSets map[string]indexTagSetPos
}
// indexTagSetPos stores the offset/size of tagsets.
type indexTagSetPos struct {
offset int64
size int64
}

View File

@@ -0,0 +1,53 @@
package tsi1_test
import (
"bytes"
"testing"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/tsdb/index/tsi1"
)
// Ensure multiple index files can be compacted together.
func TestIndexFiles_WriteTo(t *testing.T) {
// Write first file.
f0, err := CreateIndexFile([]Series{
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"})},
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west"})},
{Name: []byte("mem"), Tags: models.NewTags(map[string]string{"region": "east"})},
})
if err != nil {
t.Fatal(err)
}
// Write second file.
f1, err := CreateIndexFile([]Series{
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west"})},
{Name: []byte("disk"), Tags: models.NewTags(map[string]string{"region": "east"})},
})
if err != nil {
t.Fatal(err)
}
// Compact the two together and write out to a buffer.
var buf bytes.Buffer
a := tsi1.IndexFiles{f0, f1}
if n, err := a.CompactTo(&buf, M, K); err != nil {
t.Fatal(err)
} else if n == 0 {
t.Fatal("expected data written")
}
// Unmarshal buffer into a new index file.
var f tsi1.IndexFile
if err := f.UnmarshalBinary(buf.Bytes()); err != nil {
t.Fatal(err)
}
// Verify data in compacted file.
if e := f.TagValueElem([]byte("cpu"), []byte("region"), []byte("west")); e == nil {
t.Fatal("expected element")
} else if n := e.(*tsi1.TagBlockValueElem).SeriesN(); n != 1 {
t.Fatalf("unexpected series count: %d", n)
}
}

View File

@@ -0,0 +1,329 @@
package tsi1_test
import (
"fmt"
"os"
"reflect"
"regexp"
"testing"
"github.com/influxdata/influxdb/influxql"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/tsdb/index/tsi1"
)
// Bloom filter settings used in tests.
const M, K = 4096, 6
// Ensure index can iterate over all measurement names.
func TestIndex_ForEachMeasurementName(t *testing.T) {
idx := MustOpenIndex()
defer idx.Close()
// Add series to index.
if err := idx.CreateSeriesSliceIfNotExists([]Series{
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"})},
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west"})},
{Name: []byte("mem"), Tags: models.NewTags(map[string]string{"region": "east"})},
}); err != nil {
t.Fatal(err)
}
// Verify measurements are returned.
idx.Run(t, func(t *testing.T) {
var names []string
if err := idx.ForEachMeasurementName(func(name []byte) error {
names = append(names, string(name))
return nil
}); err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(names, []string{"cpu", "mem"}) {
t.Fatalf("unexpected names: %#v", names)
}
})
// Add more series.
if err := idx.CreateSeriesSliceIfNotExists([]Series{
{Name: []byte("disk")},
{Name: []byte("mem")},
}); err != nil {
t.Fatal(err)
}
// Verify new measurements.
idx.Run(t, func(t *testing.T) {
var names []string
if err := idx.ForEachMeasurementName(func(name []byte) error {
names = append(names, string(name))
return nil
}); err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(names, []string{"cpu", "disk", "mem"}) {
t.Fatalf("unexpected names: %#v", names)
}
})
}
// Ensure index can return whether a measurement exists.
func TestIndex_MeasurementExists(t *testing.T) {
idx := MustOpenIndex()
defer idx.Close()
// Add series to index.
if err := idx.CreateSeriesSliceIfNotExists([]Series{
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"})},
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west"})},
}); err != nil {
t.Fatal(err)
}
// Verify measurement exists.
idx.Run(t, func(t *testing.T) {
if v, err := idx.MeasurementExists([]byte("cpu")); err != nil {
t.Fatal(err)
} else if !v {
t.Fatal("expected measurement to exist")
}
})
// Delete one series.
if err := idx.DropSeries(models.MakeKey([]byte("cpu"), models.NewTags(map[string]string{"region": "east"}))); err != nil {
t.Fatal(err)
}
// Verify measurement still exists.
idx.Run(t, func(t *testing.T) {
if v, err := idx.MeasurementExists([]byte("cpu")); err != nil {
t.Fatal(err)
} else if !v {
t.Fatal("expected measurement to still exist")
}
})
// Delete second series.
if err := idx.DropSeries(models.MakeKey([]byte("cpu"), models.NewTags(map[string]string{"region": "west"}))); err != nil {
t.Fatal(err)
}
// Verify measurement is now deleted.
idx.Run(t, func(t *testing.T) {
if v, err := idx.MeasurementExists([]byte("cpu")); err != nil {
t.Fatal(err)
} else if v {
t.Fatal("expected measurement to be deleted")
}
})
}
// Ensure index can return a list of matching measurements.
func TestIndex_MeasurementNamesByExpr(t *testing.T) {
idx := MustOpenIndex()
defer idx.Close()
// Add series to index.
if err := idx.CreateSeriesSliceIfNotExists([]Series{
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"})},
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west"})},
{Name: []byte("disk"), Tags: models.NewTags(map[string]string{"region": "north"})},
{Name: []byte("mem"), Tags: models.NewTags(map[string]string{"region": "west", "country": "us"})},
}); err != nil {
t.Fatal(err)
}
// Retrieve measurements by expression
idx.Run(t, func(t *testing.T) {
t.Run("EQ", func(t *testing.T) {
names, err := idx.MeasurementNamesByExpr(influxql.MustParseExpr(`region = 'west'`))
if err != nil {
t.Fatal(err)
} else if !reflect.DeepEqual(names, [][]byte{[]byte("cpu"), []byte("mem")}) {
t.Fatalf("unexpected names: %v", names)
}
})
t.Run("NEQ", func(t *testing.T) {
names, err := idx.MeasurementNamesByExpr(influxql.MustParseExpr(`region != 'east'`))
if err != nil {
t.Fatal(err)
} else if !reflect.DeepEqual(names, [][]byte{[]byte("disk"), []byte("mem")}) {
t.Fatalf("unexpected names: %v", names)
}
})
t.Run("EQREGEX", func(t *testing.T) {
names, err := idx.MeasurementNamesByExpr(influxql.MustParseExpr(`region =~ /east|west/`))
if err != nil {
t.Fatal(err)
} else if !reflect.DeepEqual(names, [][]byte{[]byte("cpu"), []byte("mem")}) {
t.Fatalf("unexpected names: %v", names)
}
})
t.Run("NEQREGEX", func(t *testing.T) {
names, err := idx.MeasurementNamesByExpr(influxql.MustParseExpr(`country !~ /^u/`))
if err != nil {
t.Fatal(err)
} else if !reflect.DeepEqual(names, [][]byte{[]byte("cpu"), []byte("disk")}) {
t.Fatalf("unexpected names: %v", names)
}
})
})
}
// Ensure index can return a list of matching measurements.
func TestIndex_MeasurementNamesByRegex(t *testing.T) {
idx := MustOpenIndex()
defer idx.Close()
// Add series to index.
if err := idx.CreateSeriesSliceIfNotExists([]Series{
{Name: []byte("cpu")},
{Name: []byte("disk")},
{Name: []byte("mem")},
}); err != nil {
t.Fatal(err)
}
// Retrieve measurements by regex.
idx.Run(t, func(t *testing.T) {
names, err := idx.MeasurementNamesByRegex(regexp.MustCompile(`cpu|mem`))
if err != nil {
t.Fatal(err)
} else if !reflect.DeepEqual(names, [][]byte{[]byte("cpu"), []byte("mem")}) {
t.Fatalf("unexpected names: %v", names)
}
})
}
// Ensure index can delete a measurement and all related keys, values, & series.
func TestIndex_DropMeasurement(t *testing.T) {
idx := MustOpenIndex()
defer idx.Close()
// Add series to index.
if err := idx.CreateSeriesSliceIfNotExists([]Series{
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"})},
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west"})},
{Name: []byte("disk"), Tags: models.NewTags(map[string]string{"region": "north"})},
{Name: []byte("mem"), Tags: models.NewTags(map[string]string{"region": "west", "country": "us"})},
}); err != nil {
t.Fatal(err)
}
// Drop measurement.
if err := idx.DropMeasurement([]byte("cpu")); err != nil {
t.Fatal(err)
}
// Verify data is gone in each stage.
idx.Run(t, func(t *testing.T) {
// Verify measurement is gone.
if v, err := idx.MeasurementExists([]byte("cpu")); err != nil {
t.Fatal(err)
} else if v {
t.Fatal("expected no measurement")
}
// Obtain file set to perform lower level checks.
fs := idx.RetainFileSet()
defer fs.Release()
// Verify tags & values are gone.
if e := fs.TagKeyIterator([]byte("cpu")).Next(); e != nil && !e.Deleted() {
t.Fatal("expected deleted tag key")
}
if itr := fs.TagValueIterator([]byte("cpu"), []byte("region")); itr != nil {
t.Fatal("expected nil tag value iterator")
}
})
}
// Index is a test wrapper for tsi1.Index.
type Index struct {
*tsi1.Index
}
// NewIndex returns a new instance of Index at a temporary path.
func NewIndex() *Index {
idx := &Index{Index: tsi1.NewIndex()}
idx.Path = MustTempDir()
return idx
}
// MustOpenIndex returns a new, open index. Panic on error.
func MustOpenIndex() *Index {
idx := NewIndex()
if err := idx.Open(); err != nil {
panic(err)
}
return idx
}
// Close closes and removes the index directory.
func (idx *Index) Close() error {
defer os.RemoveAll(idx.Path)
return idx.Index.Close()
}
// Reopen closes and opens the index.
func (idx *Index) Reopen() error {
if err := idx.Index.Close(); err != nil {
return err
}
path := idx.Path
idx.Index = tsi1.NewIndex()
idx.Path = path
if err := idx.Open(); err != nil {
return err
}
return nil
}
// Run executes a subtest for each of several different states:
//
// - Immediately
// - After reopen
// - After compaction
// - After reopen again
//
// The index should always respond in the same fashion regardless of
// how data is stored. This helper allows the index to be easily tested
// in all major states.
func (idx *Index) Run(t *testing.T, fn func(t *testing.T)) {
// Invoke immediately.
t.Run("state=initial", fn)
// Reopen and invoke again.
if err := idx.Reopen(); err != nil {
t.Fatalf("reopen error: %s", err)
}
t.Run("state=reopen", fn)
// TODO: Request a compaction.
// if err := idx.Compact(); err != nil {
// t.Fatalf("compact error: %s", err)
// }
// t.Run("state=post-compaction", fn)
// Reopen and invoke again.
if err := idx.Reopen(); err != nil {
t.Fatalf("post-compaction reopen error: %s", err)
}
t.Run("state=post-compaction-reopen", fn)
}
// CreateSeriesSliceIfNotExists creates multiple series at a time.
func (idx *Index) CreateSeriesSliceIfNotExists(a []Series) error {
for i, s := range a {
if err := idx.CreateSeriesIfNotExists(nil, s.Name, s.Tags); err != nil {
return fmt.Errorf("i=%d, name=%s, tags=%v, err=%s", i, s.Name, s.Tags, err)
}
}
return nil
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,339 @@
package tsi1_test
import (
"bytes"
"fmt"
"io/ioutil"
"math/rand"
"os"
"path/filepath"
"regexp"
"runtime/pprof"
"sort"
"testing"
"time"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/pkg/bloom"
"github.com/influxdata/influxdb/tsdb/index/tsi1"
)
// Ensure log file can append series.
func TestLogFile_AddSeries(t *testing.T) {
f := MustOpenLogFile()
defer f.Close()
// Add test data.
if err := f.AddSeries([]byte("mem"), models.Tags{{Key: []byte("host"), Value: []byte("serverA")}}); err != nil {
t.Fatal(err)
} else if err := f.AddSeries([]byte("cpu"), models.Tags{{Key: []byte("region"), Value: []byte("us-east")}}); err != nil {
t.Fatal(err)
} else if err := f.AddSeries([]byte("cpu"), models.Tags{{Key: []byte("region"), Value: []byte("us-west")}}); err != nil {
t.Fatal(err)
}
// Verify data.
itr := f.MeasurementIterator()
if e := itr.Next(); e == nil || string(e.Name()) != "cpu" {
t.Fatalf("unexpected measurement: %#v", e)
} else if e := itr.Next(); e == nil || string(e.Name()) != "mem" {
t.Fatalf("unexpected measurement: %#v", e)
} else if e := itr.Next(); e != nil {
t.Fatalf("expected eof, got: %#v", e)
}
// Reopen file and re-verify.
if err := f.Reopen(); err != nil {
t.Fatal(err)
}
// Verify data.
itr = f.MeasurementIterator()
if e := itr.Next(); e == nil || string(e.Name()) != "cpu" {
t.Fatalf("unexpected measurement: %#v", e)
} else if e := itr.Next(); e == nil || string(e.Name()) != "mem" {
t.Fatalf("unexpected measurement: %#v", e)
} else if e := itr.Next(); e != nil {
t.Fatalf("expected eof, got: %#v", e)
}
}
func TestLogFile_SeriesStoredInOrder(t *testing.T) {
f := MustOpenLogFile()
defer f.Close()
// Generate and add test data
tvm := make(map[string]struct{})
rand.Seed(time.Now().Unix())
for i := 0; i < 100; i++ {
tv := fmt.Sprintf("server-%d", rand.Intn(50)) // Encourage adding duplicate series.
tvm[tv] = struct{}{}
if err := f.AddSeries([]byte("mem"), models.Tags{models.NewTag([]byte("host"), []byte(tv))}); err != nil {
t.Fatal(err)
}
if err := f.AddSeries([]byte("cpu"), models.Tags{models.NewTag([]byte("host"), []byte(tv))}); err != nil {
t.Fatal(err)
}
}
// Sort the tag values so we know what order to expect.
tvs := make([]string, 0, len(tvm))
for tv := range tvm {
tvs = append(tvs, tv)
}
sort.Strings(tvs)
// Double the series values since we're adding them twice (two measurements)
tvs = append(tvs, tvs...)
// When we pull the series out via an iterator they should be in order.
itr := f.SeriesIterator()
if itr == nil {
t.Fatal("nil iterator")
}
mname := []string{"cpu", "mem"}
var j int
for i := 0; i < len(tvs); i++ {
serie := itr.Next()
if serie == nil {
t.Fatal("got nil series")
}
if got, exp := string(serie.Name()), mname[j]; got != exp {
t.Fatalf("[series %d] got %s, expected %s", i, got, exp)
}
if got, exp := string(serie.Tags()[0].Value), tvs[i]; got != exp {
t.Fatalf("[series %d] got %s, expected %s", i, got, exp)
}
if i == (len(tvs)/2)-1 {
// Next measurement
j++
}
}
}
// Ensure log file can delete an existing measurement.
func TestLogFile_DeleteMeasurement(t *testing.T) {
f := MustOpenLogFile()
defer f.Close()
// Add test data.
if err := f.AddSeries([]byte("mem"), models.Tags{{Key: []byte("host"), Value: []byte("serverA")}}); err != nil {
t.Fatal(err)
} else if err := f.AddSeries([]byte("cpu"), models.Tags{{Key: []byte("region"), Value: []byte("us-east")}}); err != nil {
t.Fatal(err)
} else if err := f.AddSeries([]byte("cpu"), models.Tags{{Key: []byte("region"), Value: []byte("us-west")}}); err != nil {
t.Fatal(err)
}
// Remove measurement.
if err := f.DeleteMeasurement([]byte("cpu")); err != nil {
t.Fatal(err)
}
// Verify data.
itr := f.MeasurementIterator()
if e := itr.Next(); string(e.Name()) != "cpu" || !e.Deleted() {
t.Fatalf("unexpected measurement: %s/%v", e.Name(), e.Deleted())
} else if e := itr.Next(); string(e.Name()) != "mem" || e.Deleted() {
t.Fatalf("unexpected measurement: %s/%v", e.Name(), e.Deleted())
} else if e := itr.Next(); e != nil {
t.Fatalf("expected eof, got: %#v", e)
}
}
// LogFile is a test wrapper for tsi1.LogFile.
type LogFile struct {
*tsi1.LogFile
}
// NewLogFile returns a new instance of LogFile with a temporary file path.
func NewLogFile() *LogFile {
file, err := ioutil.TempFile("", "tsi1-log-file-")
if err != nil {
panic(err)
}
file.Close()
return &LogFile{LogFile: tsi1.NewLogFile(file.Name())}
}
// MustOpenLogFile returns a new, open instance of LogFile. Panic on error.
func MustOpenLogFile() *LogFile {
f := NewLogFile()
if err := f.Open(); err != nil {
panic(err)
}
return f
}
// Close closes the log file and removes it from disk.
func (f *LogFile) Close() error {
defer os.Remove(f.Path())
return f.LogFile.Close()
}
// Reopen closes and reopens the file.
func (f *LogFile) Reopen() error {
if err := f.LogFile.Close(); err != nil {
return err
}
if err := f.LogFile.Open(); err != nil {
return err
}
return nil
}
// CreateLogFile creates a new temporary log file and adds a list of series.
func CreateLogFile(series []Series) (*LogFile, error) {
f := MustOpenLogFile()
for _, serie := range series {
if err := f.AddSeries(serie.Name, serie.Tags); err != nil {
return nil, err
}
}
return f, nil
}
// GenerateLogFile generates a log file from a set of series based on the count arguments.
// Total series returned will equal measurementN * tagN * valueN.
func GenerateLogFile(measurementN, tagN, valueN int) (*LogFile, error) {
tagValueN := pow(valueN, tagN)
f := MustOpenLogFile()
for i := 0; i < measurementN; i++ {
name := []byte(fmt.Sprintf("measurement%d", i))
// Generate tag sets.
for j := 0; j < tagValueN; j++ {
var tags models.Tags
for k := 0; k < tagN; k++ {
key := []byte(fmt.Sprintf("key%d", k))
value := []byte(fmt.Sprintf("value%d", (j / pow(valueN, k) % valueN)))
tags = append(tags, models.NewTag(key, value))
}
if err := f.AddSeries(name, tags); err != nil {
return nil, err
}
}
}
return f, nil
}
func MustGenerateLogFile(measurementN, tagN, valueN int) *LogFile {
f, err := GenerateLogFile(measurementN, tagN, valueN)
if err != nil {
panic(err)
}
return f
}
func benchmarkLogFile_AddSeries(b *testing.B, measurementN, seriesKeyN, seriesValueN int) {
b.StopTimer()
f := MustOpenLogFile()
type Datum struct {
Name []byte
Tags models.Tags
}
// Pre-generate everything.
var (
data []Datum
series int
)
tagValueN := pow(seriesValueN, seriesKeyN)
for i := 0; i < measurementN; i++ {
name := []byte(fmt.Sprintf("measurement%d", i))
for j := 0; j < tagValueN; j++ {
var tags models.Tags
for k := 0; k < seriesKeyN; k++ {
key := []byte(fmt.Sprintf("key%d", k))
value := []byte(fmt.Sprintf("value%d", (j / pow(seriesValueN, k) % seriesValueN)))
tags = append(tags, models.NewTag(key, value))
}
data = append(data, Datum{Name: name, Tags: tags})
series += len(tags)
}
}
b.StartTimer()
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, d := range data {
if err := f.AddSeries(d.Name, d.Tags); err != nil {
b.Fatal(err)
}
}
}
}
func BenchmarkLogFile_AddSeries_100_1_1(b *testing.B) { benchmarkLogFile_AddSeries(b, 100, 1, 1) } // 100 series
func BenchmarkLogFile_AddSeries_1000_1_1(b *testing.B) { benchmarkLogFile_AddSeries(b, 1000, 1, 1) } // 1000 series
func BenchmarkLogFile_AddSeries_10000_1_1(b *testing.B) { benchmarkLogFile_AddSeries(b, 10000, 1, 1) } // 10000 series
func BenchmarkLogFile_AddSeries_100_2_10(b *testing.B) { benchmarkLogFile_AddSeries(b, 100, 2, 10) } // ~20K series
func BenchmarkLogFile_AddSeries_100000_1_1(b *testing.B) { benchmarkLogFile_AddSeries(b, 100000, 1, 1) } // ~100K series
func BenchmarkLogFile_AddSeries_100_3_7(b *testing.B) { benchmarkLogFile_AddSeries(b, 100, 3, 7) } // ~100K series
func BenchmarkLogFile_AddSeries_200_3_7(b *testing.B) { benchmarkLogFile_AddSeries(b, 200, 3, 7) } // ~200K series
func BenchmarkLogFile_AddSeries_200_4_7(b *testing.B) { benchmarkLogFile_AddSeries(b, 200, 4, 7) } // ~1.9M series
func BenchmarkLogFile_WriteTo(b *testing.B) {
for _, seriesN := range []int{1000, 10000, 100000, 1000000} {
name := fmt.Sprintf("series=%d", seriesN)
b.Run(name, func(b *testing.B) {
f := MustOpenLogFile()
defer f.Close()
// Estimate bloom filter size.
m, k := bloom.Estimate(uint64(seriesN), 0.02)
// Initialize log file with series data.
for i := 0; i < seriesN; i++ {
if err := f.AddSeries(
[]byte("cpu"),
models.Tags{
{Key: []byte("host"), Value: []byte(fmt.Sprintf("server-%d", i))},
{Key: []byte("location"), Value: []byte("us-west")},
},
); err != nil {
b.Fatal(err)
}
}
b.ResetTimer()
// Create cpu profile for each subtest.
MustStartCPUProfile(name)
defer pprof.StopCPUProfile()
// Compact log file.
for i := 0; i < b.N; i++ {
buf := bytes.NewBuffer(make([]byte, 0, 150*seriesN))
if _, err := f.CompactTo(buf, m, k); err != nil {
b.Fatal(err)
}
b.Logf("sz=%db", buf.Len())
}
})
}
}
// MustStartCPUProfile starts a cpu profile in a temporary path based on name.
func MustStartCPUProfile(name string) {
name = regexp.MustCompile(`\W+`).ReplaceAllString(name, "-")
// Open file and start pprof.
f, err := os.Create(filepath.Join("/tmp", fmt.Sprintf("cpu-%s.pprof", name)))
if err != nil {
panic(err)
}
if err := pprof.StartCPUProfile(f); err != nil {
panic(err)
}
}

View File

@@ -0,0 +1,600 @@
package tsi1
import (
"bytes"
"encoding/binary"
"errors"
"io"
"sort"
"github.com/influxdata/influxdb/pkg/estimator"
"github.com/influxdata/influxdb/pkg/estimator/hll"
"github.com/influxdata/influxdb/pkg/rhh"
)
// MeasurementBlockVersion is the version of the measurement block.
const MeasurementBlockVersion = 1
// Measurement flag constants.
const (
MeasurementTombstoneFlag = 0x01
)
// Measurement field size constants.
const (
// 1 byte offset for the block to ensure non-zero offsets.
MeasurementFillSize = 1
// Measurement trailer fields
MeasurementTrailerSize = 0 +
2 + // version
8 + 8 + // data offset/size
8 + 8 + // hash index offset/size
8 + 8 + // measurement sketch offset/size
8 + 8 // tombstone measurement sketch offset/size
// Measurement key block fields.
MeasurementNSize = 8
MeasurementOffsetSize = 8
)
// Measurement errors.
var (
ErrUnsupportedMeasurementBlockVersion = errors.New("unsupported measurement block version")
ErrMeasurementBlockSizeMismatch = errors.New("measurement block size mismatch")
)
// MeasurementBlock represents a collection of all measurements in an index.
type MeasurementBlock struct {
data []byte
hashData []byte
// Series block sketch and tombstone sketch for cardinality estimation.
// While we have exact counts for the block, these sketches allow us to
// estimate cardinality across multiple blocks (which might contain
// duplicate series).
sketch, tSketch estimator.Sketch
version int // block version
}
// Version returns the encoding version parsed from the data.
// Only valid after UnmarshalBinary() has been successfully invoked.
func (blk *MeasurementBlock) Version() int { return blk.version }
// Elem returns an element for a measurement.
func (blk *MeasurementBlock) Elem(name []byte) (e MeasurementBlockElem, ok bool) {
n := int64(binary.BigEndian.Uint64(blk.hashData[:MeasurementNSize]))
hash := rhh.HashKey(name)
pos := hash % n
// Track current distance
var d int64
for {
// Find offset of measurement.
offset := binary.BigEndian.Uint64(blk.hashData[MeasurementNSize+(pos*MeasurementOffsetSize):])
if offset == 0 {
return MeasurementBlockElem{}, false
}
// Evaluate name if offset is not empty.
if offset > 0 {
// Parse into element.
var e MeasurementBlockElem
e.UnmarshalBinary(blk.data[offset:])
// Return if name match.
if bytes.Equal(e.name, name) {
return e, true
}
// Check if we've exceeded the probe distance.
if d > rhh.Dist(rhh.HashKey(e.name), pos, n) {
return MeasurementBlockElem{}, false
}
}
// Move position forward.
pos = (pos + 1) % n
d++
if d > n {
return MeasurementBlockElem{}, false
}
}
}
// UnmarshalBinary unpacks data into the block. Block is not copied so data
// should be retained and unchanged after being passed into this function.
func (blk *MeasurementBlock) UnmarshalBinary(data []byte) error {
// Read trailer.
t, err := ReadMeasurementBlockTrailer(data)
if err != nil {
return err
}
// Save data section.
blk.data = data[t.Data.Offset:]
blk.data = blk.data[:t.Data.Size]
// Save hash index block.
blk.hashData = data[t.HashIndex.Offset:]
blk.hashData = blk.hashData[:t.HashIndex.Size]
// Initialise sketches. We're currently using HLL+.
var s, ts = hll.NewDefaultPlus(), hll.NewDefaultPlus()
if err := s.UnmarshalBinary(data[t.Sketch.Offset:][:t.Sketch.Size]); err != nil {
return err
}
blk.sketch = s
if err := ts.UnmarshalBinary(data[t.TSketch.Offset:][:t.TSketch.Size]); err != nil {
return err
}
blk.tSketch = ts
return nil
}
// Iterator returns an iterator over all measurements.
func (blk *MeasurementBlock) Iterator() MeasurementIterator {
return &blockMeasurementIterator{data: blk.data[MeasurementFillSize:]}
}
// seriesIDIterator returns an iterator for all series ids in a measurement.
func (blk *MeasurementBlock) seriesIDIterator(name []byte) seriesIDIterator {
// Find measurement element.
e, ok := blk.Elem(name)
if !ok {
return &rawSeriesIDIterator{}
}
return &rawSeriesIDIterator{n: e.series.n, data: e.series.data}
}
// blockMeasurementIterator iterates over a list measurements in a block.
type blockMeasurementIterator struct {
elem MeasurementBlockElem
data []byte
}
// Next returns the next measurement. Returns nil when iterator is complete.
func (itr *blockMeasurementIterator) Next() MeasurementElem {
// Return nil when we run out of data.
if len(itr.data) == 0 {
return nil
}
// Unmarshal the element at the current position.
itr.elem.UnmarshalBinary(itr.data)
// Move the data forward past the record.
itr.data = itr.data[itr.elem.size:]
return &itr.elem
}
// rawSeriesIterator iterates over a list of raw series data.
type rawSeriesIDIterator struct {
prev uint32
n uint32
data []byte
}
// next returns the next decoded series.
func (itr *rawSeriesIDIterator) next() uint32 {
if len(itr.data) == 0 {
return 0
}
delta, n := binary.Uvarint(itr.data)
itr.data = itr.data[n:]
seriesID := itr.prev + uint32(delta)
itr.prev = seriesID
return seriesID
}
// MeasurementBlockTrailer represents meta data at the end of a MeasurementBlock.
type MeasurementBlockTrailer struct {
Version int // Encoding version
// Offset & size of data section.
Data struct {
Offset int64
Size int64
}
// Offset & size of hash map section.
HashIndex struct {
Offset int64
Size int64
}
// Offset and size of cardinality sketch for measurements.
Sketch struct {
Offset int64
Size int64
}
// Offset and size of cardinality sketch for tombstoned measurements.
TSketch struct {
Offset int64
Size int64
}
}
// ReadMeasurementBlockTrailer returns the block trailer from data.
func ReadMeasurementBlockTrailer(data []byte) (MeasurementBlockTrailer, error) {
var t MeasurementBlockTrailer
// Read version (which is located in the last two bytes of the trailer).
t.Version = int(binary.BigEndian.Uint16(data[len(data)-2:]))
if t.Version != MeasurementBlockVersion {
return t, ErrUnsupportedIndexFileVersion
}
// Slice trailer data.
buf := data[len(data)-MeasurementTrailerSize:]
// Read data section info.
t.Data.Offset, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:]
t.Data.Size, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:]
// Read measurement block info.
t.HashIndex.Offset, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:]
t.HashIndex.Size, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:]
// Read measurment sketch info.
t.Sketch.Offset, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:]
t.Sketch.Size, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:]
// Read tombstone measurment sketch info.
t.TSketch.Offset, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:]
t.TSketch.Size, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:]
return t, nil
}
// WriteTo writes the trailer to w.
func (t *MeasurementBlockTrailer) WriteTo(w io.Writer) (n int64, err error) {
// Write data section info.
if err := writeUint64To(w, uint64(t.Data.Offset), &n); err != nil {
return n, err
} else if err := writeUint64To(w, uint64(t.Data.Size), &n); err != nil {
return n, err
}
// Write hash index section info.
if err := writeUint64To(w, uint64(t.HashIndex.Offset), &n); err != nil {
return n, err
} else if err := writeUint64To(w, uint64(t.HashIndex.Size), &n); err != nil {
return n, err
}
// Write measurement sketch info.
if err := writeUint64To(w, uint64(t.Sketch.Offset), &n); err != nil {
return n, err
} else if err := writeUint64To(w, uint64(t.Sketch.Size), &n); err != nil {
return n, err
}
// Write tombstone measurement sketch info.
if err := writeUint64To(w, uint64(t.TSketch.Offset), &n); err != nil {
return n, err
} else if err := writeUint64To(w, uint64(t.TSketch.Size), &n); err != nil {
return n, err
}
// Write measurement block version.
if err := writeUint16To(w, MeasurementBlockVersion, &n); err != nil {
return n, err
}
return n, nil
}
// MeasurementBlockElem represents an internal measurement element.
type MeasurementBlockElem struct {
flag byte // flag
name []byte // measurement name
tagBlock struct {
offset int64
size int64
}
series struct {
n uint32 // series count
data []byte // serialized series data
}
// size in bytes, set after unmarshaling.
size int
}
// Name returns the measurement name.
func (e *MeasurementBlockElem) Name() []byte { return e.name }
// Deleted returns true if the tombstone flag is set.
func (e *MeasurementBlockElem) Deleted() bool {
return (e.flag & MeasurementTombstoneFlag) != 0
}
// TagBlockOffset returns the offset of the measurement's tag block.
func (e *MeasurementBlockElem) TagBlockOffset() int64 { return e.tagBlock.offset }
// TagBlockSize returns the size of the measurement's tag block.
func (e *MeasurementBlockElem) TagBlockSize() int64 { return e.tagBlock.size }
// SeriesData returns the raw series data.
func (e *MeasurementBlockElem) SeriesData() []byte { return e.series.data }
// SeriesN returns the number of series associated with the measurement.
func (e *MeasurementBlockElem) SeriesN() uint32 { return e.series.n }
// SeriesID returns series ID at an index.
func (e *MeasurementBlockElem) SeriesID(i int) uint32 {
return binary.BigEndian.Uint32(e.series.data[i*SeriesIDSize:])
}
// SeriesIDs returns a list of decoded series ids.
//
// NOTE: This should be used for testing and diagnostics purposes only.
// It requires loading the entire list of series in-memory.
func (e *MeasurementBlockElem) SeriesIDs() []uint32 {
a := make([]uint32, 0, e.series.n)
var prev uint32
for data := e.series.data; len(data) > 0; {
delta, n := binary.Uvarint(data)
data = data[n:]
seriesID := prev + uint32(delta)
a = append(a, seriesID)
prev = seriesID
}
return a
}
// Size returns the size of the element.
func (e *MeasurementBlockElem) Size() int { return e.size }
// UnmarshalBinary unmarshals data into e.
func (e *MeasurementBlockElem) UnmarshalBinary(data []byte) error {
start := len(data)
// Parse flag data.
e.flag, data = data[0], data[1:]
// Parse tag block offset.
e.tagBlock.offset, data = int64(binary.BigEndian.Uint64(data)), data[8:]
e.tagBlock.size, data = int64(binary.BigEndian.Uint64(data)), data[8:]
// Parse name.
sz, n := binary.Uvarint(data)
e.name, data = data[n:n+int(sz)], data[n+int(sz):]
// Parse series data.
v, n := binary.Uvarint(data)
e.series.n, data = uint32(v), data[n:]
sz, n = binary.Uvarint(data)
data = data[n:]
e.series.data, data = data[:sz], data[sz:]
// Save length of elem.
e.size = start - len(data)
return nil
}
// MeasurementBlockWriter writes a measurement block.
type MeasurementBlockWriter struct {
buf bytes.Buffer
mms map[string]measurement
// Measurement sketch and tombstoned measurement sketch.
sketch, tSketch estimator.Sketch
}
// NewMeasurementBlockWriter returns a new MeasurementBlockWriter.
func NewMeasurementBlockWriter() *MeasurementBlockWriter {
return &MeasurementBlockWriter{
mms: make(map[string]measurement),
sketch: hll.NewDefaultPlus(),
tSketch: hll.NewDefaultPlus(),
}
}
// Add adds a measurement with series and tag set offset/size.
func (mw *MeasurementBlockWriter) Add(name []byte, deleted bool, offset, size int64, seriesIDs []uint32) {
mm := mw.mms[string(name)]
mm.deleted = deleted
mm.tagBlock.offset = offset
mm.tagBlock.size = size
mm.seriesIDs = seriesIDs
mw.mms[string(name)] = mm
if deleted {
mw.tSketch.Add(name)
} else {
mw.sketch.Add(name)
}
}
// WriteTo encodes the measurements to w.
func (mw *MeasurementBlockWriter) WriteTo(w io.Writer) (n int64, err error) {
var t MeasurementBlockTrailer
// The sketches must be set before calling WriteTo.
if mw.sketch == nil {
return 0, errors.New("measurement sketch not set")
} else if mw.tSketch == nil {
return 0, errors.New("measurement tombstone sketch not set")
}
// Sort names.
names := make([]string, 0, len(mw.mms))
for name := range mw.mms {
names = append(names, name)
}
sort.Strings(names)
// Begin data section.
t.Data.Offset = n
// Write padding byte so no offsets are zero.
if err := writeUint8To(w, 0, &n); err != nil {
return n, err
}
// Encode key list.
for _, name := range names {
// Retrieve measurement and save offset.
mm := mw.mms[name]
mm.offset = n
mw.mms[name] = mm
// Write measurement
if err := mw.writeMeasurementTo(w, []byte(name), &mm, &n); err != nil {
return n, err
}
}
t.Data.Size = n - t.Data.Offset
// Build key hash map
m := rhh.NewHashMap(rhh.Options{
Capacity: int64(len(names)),
LoadFactor: LoadFactor,
})
for name := range mw.mms {
mm := mw.mms[name]
m.Put([]byte(name), &mm)
}
t.HashIndex.Offset = n
// Encode hash map length.
if err := writeUint64To(w, uint64(m.Cap()), &n); err != nil {
return n, err
}
// Encode hash map offset entries.
for i := int64(0); i < m.Cap(); i++ {
_, v := m.Elem(i)
var offset int64
if mm, ok := v.(*measurement); ok {
offset = mm.offset
}
if err := writeUint64To(w, uint64(offset), &n); err != nil {
return n, err
}
}
t.HashIndex.Size = n - t.HashIndex.Offset
// Write the sketches out.
t.Sketch.Offset = n
if err := writeSketchTo(w, mw.sketch, &n); err != nil {
return n, err
}
t.Sketch.Size = n - t.Sketch.Offset
t.TSketch.Offset = n
if err := writeSketchTo(w, mw.tSketch, &n); err != nil {
return n, err
}
t.TSketch.Size = n - t.TSketch.Offset
// Write trailer.
nn, err := t.WriteTo(w)
n += nn
if err != nil {
return n, err
}
return n, nil
}
// writeMeasurementTo encodes a single measurement entry into w.
func (mw *MeasurementBlockWriter) writeMeasurementTo(w io.Writer, name []byte, mm *measurement, n *int64) error {
// Write flag & tag block offset.
if err := writeUint8To(w, mm.flag(), n); err != nil {
return err
}
if err := writeUint64To(w, uint64(mm.tagBlock.offset), n); err != nil {
return err
} else if err := writeUint64To(w, uint64(mm.tagBlock.size), n); err != nil {
return err
}
// Write measurement name.
if err := writeUvarintTo(w, uint64(len(name)), n); err != nil {
return err
}
if err := writeTo(w, name, n); err != nil {
return err
}
// Write series data to buffer.
mw.buf.Reset()
var prev uint32
for _, seriesID := range mm.seriesIDs {
delta := seriesID - prev
var buf [binary.MaxVarintLen32]byte
i := binary.PutUvarint(buf[:], uint64(delta))
if _, err := mw.buf.Write(buf[:i]); err != nil {
return err
}
prev = seriesID
}
// Write series count.
if err := writeUvarintTo(w, uint64(len(mm.seriesIDs)), n); err != nil {
return err
}
// Write data size & buffer.
if err := writeUvarintTo(w, uint64(mw.buf.Len()), n); err != nil {
return err
}
nn, err := mw.buf.WriteTo(w)
if *n += nn; err != nil {
return err
}
return nil
}
// writeSketchTo writes an estimator.Sketch into w, updating the number of bytes
// written via n.
func writeSketchTo(w io.Writer, s estimator.Sketch, n *int64) error {
// TODO(edd): implement io.WriterTo on sketches.
data, err := s.MarshalBinary()
if err != nil {
return err
}
nn, err := w.Write(data)
*n += int64(nn)
return err
}
type measurement struct {
deleted bool
tagBlock struct {
offset int64
size int64
}
seriesIDs []uint32
offset int64
}
func (mm measurement) flag() byte {
var flag byte
if mm.deleted {
flag |= MeasurementTombstoneFlag
}
return flag
}

View File

@@ -0,0 +1,181 @@
package tsi1_test
import (
"bytes"
"encoding/binary"
"fmt"
"reflect"
"testing"
"github.com/influxdata/influxdb/tsdb/index/tsi1"
)
func TestReadMeasurementBlockTrailer(t *testing.T) {
// Build a trailer
var (
data = make([]byte, tsi1.MeasurementTrailerSize)
blockversion = uint16(1)
blockOffset, blockSize = uint64(1), uint64(2500)
hashIdxOffset, hashIdxSize = uint64(2501), uint64(1000)
sketchOffset, sketchSize = uint64(3501), uint64(250)
tsketchOffset, tsketchSize = uint64(3751), uint64(250)
)
binary.BigEndian.PutUint64(data[0:], blockOffset)
binary.BigEndian.PutUint64(data[8:], blockSize)
binary.BigEndian.PutUint64(data[16:], hashIdxOffset)
binary.BigEndian.PutUint64(data[24:], hashIdxSize)
binary.BigEndian.PutUint64(data[32:], sketchOffset)
binary.BigEndian.PutUint64(data[40:], sketchSize)
binary.BigEndian.PutUint64(data[48:], tsketchOffset)
binary.BigEndian.PutUint64(data[56:], tsketchSize)
binary.BigEndian.PutUint16(data[64:], blockversion)
trailer, err := tsi1.ReadMeasurementBlockTrailer(data)
if err != nil {
t.Logf("trailer is: %#v\n", trailer)
t.Fatal(err)
}
ok := true &&
trailer.Version == int(blockversion) &&
trailer.Data.Offset == int64(blockOffset) &&
trailer.Data.Size == int64(blockSize) &&
trailer.HashIndex.Offset == int64(hashIdxOffset) &&
trailer.HashIndex.Size == int64(hashIdxSize) &&
trailer.Sketch.Offset == int64(sketchOffset) &&
trailer.Sketch.Size == int64(sketchSize) &&
trailer.TSketch.Offset == int64(tsketchOffset) &&
trailer.TSketch.Size == int64(tsketchSize)
if !ok {
t.Fatalf("got %v\nwhich does not match expected", trailer)
}
}
func TestMeasurementBlockTrailer_WriteTo(t *testing.T) {
var trailer = tsi1.MeasurementBlockTrailer{
Version: 1,
Data: struct {
Offset int64
Size int64
}{Offset: 1, Size: 2},
HashIndex: struct {
Offset int64
Size int64
}{Offset: 3, Size: 4},
Sketch: struct {
Offset int64
Size int64
}{Offset: 5, Size: 6},
TSketch: struct {
Offset int64
Size int64
}{Offset: 7, Size: 8},
}
var buf bytes.Buffer
n, err := trailer.WriteTo(&buf)
if got, exp := n, int64(tsi1.MeasurementTrailerSize); got != exp {
t.Fatalf("got %v, exp %v", got, exp)
}
if got := err; got != nil {
t.Fatalf("got %v, exp %v", got, nil)
}
// Verify trailer written correctly.
exp := "" +
"0000000000000001" + // data offset
"0000000000000002" + // data size
"0000000000000003" + // hash index offset
"0000000000000004" + // hash index size
"0000000000000005" + // sketch offset
"0000000000000006" + // sketch size
"0000000000000007" + // tsketch offset
"0000000000000008" + // tsketch size
"0001" // version
if got, exp := fmt.Sprintf("%x", buf.String()), exp; got != exp {
t.Fatalf("got %v, exp %v", got, exp)
}
}
// Ensure measurement blocks can be written and opened.
func TestMeasurementBlockWriter(t *testing.T) {
ms := Measurements{
NewMeasurement([]byte("foo"), false, 100, 10, []uint32{1, 3, 4}),
NewMeasurement([]byte("bar"), false, 200, 20, []uint32{2}),
NewMeasurement([]byte("baz"), false, 300, 30, []uint32{5, 6}),
}
// Write the measurements to writer.
mw := tsi1.NewMeasurementBlockWriter()
for _, m := range ms {
mw.Add(m.Name, m.Deleted, m.Offset, m.Size, m.ids)
}
// Encode into buffer.
var buf bytes.Buffer
if n, err := mw.WriteTo(&buf); err != nil {
t.Fatal(err)
} else if n == 0 {
t.Fatal("expected bytes written")
}
// Unmarshal into a block.
var blk tsi1.MeasurementBlock
if err := blk.UnmarshalBinary(buf.Bytes()); err != nil {
t.Fatal(err)
}
// Verify data in block.
if e, ok := blk.Elem([]byte("foo")); !ok {
t.Fatal("expected element")
} else if e.TagBlockOffset() != 100 || e.TagBlockSize() != 10 {
t.Fatalf("unexpected offset/size: %v/%v", e.TagBlockOffset(), e.TagBlockSize())
} else if !reflect.DeepEqual(e.SeriesIDs(), []uint32{1, 3, 4}) {
t.Fatalf("unexpected series data: %#v", e.SeriesIDs())
}
if e, ok := blk.Elem([]byte("bar")); !ok {
t.Fatal("expected element")
} else if e.TagBlockOffset() != 200 || e.TagBlockSize() != 20 {
t.Fatalf("unexpected offset/size: %v/%v", e.TagBlockOffset(), e.TagBlockSize())
} else if !reflect.DeepEqual(e.SeriesIDs(), []uint32{2}) {
t.Fatalf("unexpected series data: %#v", e.SeriesIDs())
}
if e, ok := blk.Elem([]byte("baz")); !ok {
t.Fatal("expected element")
} else if e.TagBlockOffset() != 300 || e.TagBlockSize() != 30 {
t.Fatalf("unexpected offset/size: %v/%v", e.TagBlockOffset(), e.TagBlockSize())
} else if !reflect.DeepEqual(e.SeriesIDs(), []uint32{5, 6}) {
t.Fatalf("unexpected series data: %#v", e.SeriesIDs())
}
// Verify non-existent measurement doesn't exist.
if _, ok := blk.Elem([]byte("BAD_MEASUREMENT")); ok {
t.Fatal("expected no element")
}
}
type Measurements []Measurement
type Measurement struct {
Name []byte
Deleted bool
Offset int64
Size int64
ids []uint32
}
func NewMeasurement(name []byte, deleted bool, offset, size int64, ids []uint32) Measurement {
return Measurement{
Name: name,
Deleted: deleted,
Offset: offset,
Size: size,
ids: ids,
}
}

View File

@@ -0,0 +1,989 @@
package tsi1
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"os"
"sort"
"github.com/influxdata/influxdb/influxql"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/pkg/bloom"
"github.com/influxdata/influxdb/pkg/estimator"
"github.com/influxdata/influxdb/pkg/estimator/hll"
"github.com/influxdata/influxdb/pkg/mmap"
"github.com/influxdata/influxdb/pkg/rhh"
)
// ErrSeriesOverflow is returned when too many series are added to a series writer.
var ErrSeriesOverflow = errors.New("series overflow")
// Series list field size constants.
const (
// Series list trailer field sizes.
SeriesBlockTrailerSize = 0 +
4 + 4 + // series data offset/size
4 + 4 + 4 + // series index offset/size/capacity
8 + 4 + 4 + // bloom filter false positive rate, offset/size
4 + 4 + // series sketch offset/size
4 + 4 + // tombstone series sketch offset/size
4 + 4 + // series count and tombstone count
0
// Other field sizes
SeriesCountSize = 4
SeriesIDSize = 4
)
// Series flag constants.
const (
// Marks the series as having been deleted.
SeriesTombstoneFlag = 0x01
// Marks the following bytes as a hash index.
// These bytes should be skipped by an iterator.
SeriesHashIndexFlag = 0x02
)
// MaxSeriesBlockHashSize is the maximum number of series in a single hash.
const MaxSeriesBlockHashSize = (65536 * LoadFactor) / 100
// SeriesBlock represents the section of the index that holds series data.
type SeriesBlock struct {
data []byte
// Series data & index/capacity.
seriesData []byte
seriesIndexes []seriesBlockIndex
// Exact series counts for this block.
seriesN int32
tombstoneN int32
// Bloom filter used for fast series existence check.
filter *bloom.Filter
// Series block sketch and tombstone sketch for cardinality estimation.
// While we have exact counts for the block, these sketches allow us to
// estimate cardinality across multiple blocks (which might contain
// duplicate series).
sketch, tsketch estimator.Sketch
}
// HasSeries returns flags indicating if the series exists and if it is tombstoned.
func (blk *SeriesBlock) HasSeries(name []byte, tags models.Tags, buf []byte) (exists, tombstoned bool) {
offset, tombstoned := blk.Offset(name, tags, buf)
return offset != 0, tombstoned
}
// Series returns a series element.
func (blk *SeriesBlock) Series(name []byte, tags models.Tags) SeriesElem {
offset, _ := blk.Offset(name, tags, nil)
if offset == 0 {
return nil
}
var e SeriesBlockElem
e.UnmarshalBinary(blk.data[offset:])
return &e
}
// Offset returns the byte offset of the series within the block.
func (blk *SeriesBlock) Offset(name []byte, tags models.Tags, buf []byte) (offset uint32, tombstoned bool) {
// Exit if no series indexes exist.
if len(blk.seriesIndexes) == 0 {
return 0, false
}
// Compute series key.
buf = AppendSeriesKey(buf[:0], name, tags)
bufN := uint32(len(buf))
// Quickly check the bloom filter.
// If the key doesn't exist then we know for sure that it doesn't exist.
// If it does exist then we need to do a hash index check to verify. False
// positives are possible with a bloom filter.
if !blk.filter.Contains(buf) {
return 0, false
}
// Find the correct partition.
// Use previous index unless an exact match on the min value.
i := sort.Search(len(blk.seriesIndexes), func(i int) bool {
return CompareSeriesKeys(blk.seriesIndexes[i].min, buf) != -1
})
if i >= len(blk.seriesIndexes) || !bytes.Equal(blk.seriesIndexes[i].min, buf) {
i--
}
seriesIndex := blk.seriesIndexes[i]
// Search within partition.
n := int64(seriesIndex.capacity)
hash := rhh.HashKey(buf)
pos := hash % n
// Track current distance
var d int64
for {
// Find offset of series.
offset := binary.BigEndian.Uint32(seriesIndex.data[pos*SeriesIDSize:])
if offset == 0 {
return 0, false
}
// Evaluate encoded value matches expected.
key := ReadSeriesKey(blk.data[offset+1 : offset+1+bufN])
if bytes.Equal(buf, key) {
return offset, (blk.data[offset] & SeriesTombstoneFlag) != 0
}
// Check if we've exceeded the probe distance.
max := rhh.Dist(rhh.HashKey(key), pos, n)
if d > max {
return 0, false
}
// Move position forward.
pos = (pos + 1) % n
d++
if d > n {
return 0, false
}
}
}
// SeriesCount returns the number of series.
func (blk *SeriesBlock) SeriesCount() uint32 {
return uint32(blk.seriesN + blk.tombstoneN)
}
// SeriesIterator returns an iterator over all the series.
func (blk *SeriesBlock) SeriesIterator() SeriesIterator {
return &seriesBlockIterator{
n: blk.SeriesCount(),
offset: 1,
sblk: blk,
}
}
// UnmarshalBinary unpacks data into the series list.
//
// If data is an mmap then it should stay open until the series list is no
// longer used because data access is performed directly from the byte slice.
func (blk *SeriesBlock) UnmarshalBinary(data []byte) error {
t := ReadSeriesBlockTrailer(data)
// Save entire block.
blk.data = data
// Slice series data.
blk.seriesData = data[t.Series.Data.Offset:]
blk.seriesData = blk.seriesData[:t.Series.Data.Size]
// Read in all index partitions.
buf := data[t.Series.Index.Offset:]
buf = buf[:t.Series.Index.Size]
blk.seriesIndexes = make([]seriesBlockIndex, t.Series.Index.N)
for i := range blk.seriesIndexes {
idx := &blk.seriesIndexes[i]
// Read data block.
var offset, size uint32
offset, buf = binary.BigEndian.Uint32(buf[:4]), buf[4:]
size, buf = binary.BigEndian.Uint32(buf[:4]), buf[4:]
idx.data = blk.data[offset : offset+size]
// Read block capacity.
idx.capacity, buf = int32(binary.BigEndian.Uint32(buf[:4])), buf[4:]
// Read min key.
var n uint32
n, buf = binary.BigEndian.Uint32(buf[:4]), buf[4:]
idx.min, buf = buf[:n], buf[n:]
}
if len(buf) != 0 {
return fmt.Errorf("data remaining in index list buffer: %d", len(buf))
}
// Initialize bloom filter.
filter, err := bloom.NewFilterBuffer(data[t.Bloom.Offset:][:t.Bloom.Size], t.Bloom.K)
if err != nil {
return err
}
blk.filter = filter
// Initialise sketches. We're currently using HLL+.
var s, ts = hll.NewDefaultPlus(), hll.NewDefaultPlus()
if err := s.UnmarshalBinary(data[t.Sketch.Offset:][:t.Sketch.Size]); err != nil {
return err
}
blk.sketch = s
if err := ts.UnmarshalBinary(data[t.TSketch.Offset:][:t.TSketch.Size]); err != nil {
return err
}
blk.tsketch = ts
// Set the series and tombstone counts
blk.seriesN, blk.tombstoneN = t.SeriesN, t.TombstoneN
return nil
}
// seriesBlockIndex represents a partitioned series block index.
type seriesBlockIndex struct {
data []byte
min []byte
capacity int32
}
// seriesBlockIterator is an iterator over a series ids in a series list.
type seriesBlockIterator struct {
i, n uint32
offset uint32
sblk *SeriesBlock
e SeriesBlockElem // buffer
}
// Next returns the next series element.
func (itr *seriesBlockIterator) Next() SeriesElem {
for {
// Exit if at the end.
if itr.i == itr.n {
return nil
}
// If the current element is a hash index partition then skip it.
if flag := itr.sblk.data[itr.offset]; flag&SeriesHashIndexFlag != 0 {
// Skip flag
itr.offset++
// Read index capacity.
n := binary.BigEndian.Uint32(itr.sblk.data[itr.offset:])
itr.offset += 4
// Skip over index.
itr.offset += n * SeriesIDSize
continue
}
// Read next element.
itr.e.UnmarshalBinary(itr.sblk.data[itr.offset:])
// Move iterator and offset forward.
itr.i++
itr.offset += uint32(itr.e.size)
return &itr.e
}
}
// seriesDecodeIterator decodes a series id iterator into unmarshaled elements.
type seriesDecodeIterator struct {
itr seriesIDIterator
sblk *SeriesBlock
e SeriesBlockElem // buffer
}
// newSeriesDecodeIterator returns a new instance of seriesDecodeIterator.
func newSeriesDecodeIterator(sblk *SeriesBlock, itr seriesIDIterator) *seriesDecodeIterator {
return &seriesDecodeIterator{sblk: sblk, itr: itr}
}
// Next returns the next series element.
func (itr *seriesDecodeIterator) Next() SeriesElem {
// Read next series id.
id := itr.itr.next()
if id == 0 {
return nil
}
// Read next element.
itr.e.UnmarshalBinary(itr.sblk.data[id:])
return &itr.e
}
// SeriesBlockElem represents a series element in the series list.
type SeriesBlockElem struct {
flag byte
name []byte
tags models.Tags
size int
}
// Deleted returns true if the tombstone flag is set.
func (e *SeriesBlockElem) Deleted() bool { return (e.flag & SeriesTombstoneFlag) != 0 }
// Name returns the measurement name.
func (e *SeriesBlockElem) Name() []byte { return e.name }
// Tags returns the tag set.
func (e *SeriesBlockElem) Tags() models.Tags { return e.tags }
// Expr always returns a nil expression.
// This is only used by higher level query planning.
func (e *SeriesBlockElem) Expr() influxql.Expr { return nil }
// UnmarshalBinary unmarshals data into e.
func (e *SeriesBlockElem) UnmarshalBinary(data []byte) error {
start := len(data)
// Parse flag data.
e.flag, data = data[0], data[1:]
// Parse total size.
_, szN := binary.Uvarint(data)
data = data[szN:]
// Parse name.
n, data := binary.BigEndian.Uint16(data[:2]), data[2:]
e.name, data = data[:n], data[n:]
// Parse tags.
e.tags = e.tags[:0]
tagN, szN := binary.Uvarint(data)
data = data[szN:]
for i := uint64(0); i < tagN; i++ {
var tag models.Tag
n, data = binary.BigEndian.Uint16(data[:2]), data[2:]
tag.Key, data = data[:n], data[n:]
n, data = binary.BigEndian.Uint16(data[:2]), data[2:]
tag.Value, data = data[:n], data[n:]
e.tags = append(e.tags, tag)
}
// Save length of elem.
e.size = start - len(data)
return nil
}
// AppendSeriesElem serializes flag/name/tags to dst and returns the new buffer.
func AppendSeriesElem(dst []byte, flag byte, name []byte, tags models.Tags) []byte {
dst = append(dst, flag)
return AppendSeriesKey(dst, name, tags)
}
// AppendSeriesKey serializes name and tags to a byte slice.
// The total length is prepended as a uvarint.
func AppendSeriesKey(dst []byte, name []byte, tags models.Tags) []byte {
buf := make([]byte, binary.MaxVarintLen32)
origLen := len(dst)
// The tag count is variable encoded, so we need to know ahead of time what
// the size of the tag count value will be.
tcBuf := make([]byte, binary.MaxVarintLen32)
tcSz := binary.PutUvarint(tcBuf, uint64(len(tags)))
// Size of name/tags. Does not include total length.
size := 0 + //
2 + // size of measurement
len(name) + // measurement
tcSz + // size of number of tags
(4 * len(tags)) + // length of each tag key and value
tags.Size() // size of tag keys/values
// Variable encode length.
totalSz := binary.PutUvarint(buf, uint64(size))
// If caller doesn't provide a buffer then pre-allocate an exact one.
if dst == nil {
dst = make([]byte, 0, size+totalSz)
}
// Append total length.
dst = append(dst, buf[:totalSz]...)
// Append name.
binary.BigEndian.PutUint16(buf, uint16(len(name)))
dst = append(dst, buf[:2]...)
dst = append(dst, name...)
// Append tag count.
dst = append(dst, tcBuf[:tcSz]...)
// Append tags.
for _, tag := range tags {
binary.BigEndian.PutUint16(buf, uint16(len(tag.Key)))
dst = append(dst, buf[:2]...)
dst = append(dst, tag.Key...)
binary.BigEndian.PutUint16(buf, uint16(len(tag.Value)))
dst = append(dst, buf[:2]...)
dst = append(dst, tag.Value...)
}
// Verify that the total length equals the encoded byte count.
if got, exp := len(dst)-origLen, size+totalSz; got != exp {
panic(fmt.Sprintf("series key encoding does not match calculated total length: actual=%d, exp=%d, key=%x", got, exp, dst))
}
return dst
}
// ReadSeriesKey returns the series key from the beginning of the buffer.
func ReadSeriesKey(data []byte) []byte {
sz, n := binary.Uvarint(data)
return data[:int(sz)+n]
}
func CompareSeriesKeys(a, b []byte) int {
// Handle 'nil' keys.
if len(a) == 0 && len(b) == 0 {
return 0
} else if len(a) == 0 {
return -1
} else if len(b) == 0 {
return 1
}
// Read total size.
_, i := binary.Uvarint(a)
a = a[i:]
_, i = binary.Uvarint(b)
b = b[i:]
// Read names.
var n uint16
n, a = binary.BigEndian.Uint16(a), a[2:]
name0, a := a[:n], a[n:]
n, b = binary.BigEndian.Uint16(b), b[2:]
name1, b := b[:n], b[n:]
// Compare names, return if not equal.
if cmp := bytes.Compare(name0, name1); cmp != 0 {
return cmp
}
// Read tag counts.
tagN0, i := binary.Uvarint(a)
a = a[i:]
tagN1, i := binary.Uvarint(b)
b = b[i:]
// Compare each tag in order.
for i := uint64(0); ; i++ {
// Check for EOF.
if i == tagN0 && i == tagN1 {
return 0
} else if i == tagN0 {
return -1
} else if i == tagN1 {
return 1
}
// Read keys.
var key0, key1 []byte
n, a = binary.BigEndian.Uint16(a), a[2:]
key0, a = a[:n], a[n:]
n, b = binary.BigEndian.Uint16(b), b[2:]
key1, b = b[:n], b[n:]
// Compare keys.
if cmp := bytes.Compare(key0, key1); cmp != 0 {
return cmp
}
// Read values.
var value0, value1 []byte
n, a = binary.BigEndian.Uint16(a), a[2:]
value0, a = a[:n], a[n:]
n, b = binary.BigEndian.Uint16(b), b[2:]
value1, b = b[:n], b[n:]
// Compare values.
if cmp := bytes.Compare(value0, value1); cmp != 0 {
return cmp
}
}
}
type seriesKeys [][]byte
func (a seriesKeys) Len() int { return len(a) }
func (a seriesKeys) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a seriesKeys) Less(i, j int) bool {
return CompareSeriesKeys(a[i], a[j]) == -1
}
// SeriesBlockEncoder encodes series to a SeriesBlock in an underlying writer.
type SeriesBlockEncoder struct {
w io.Writer
// Double buffer for writing series.
// First elem is current buffer, second is previous buffer.
buf [2][]byte
// Track bytes written, sections, & offsets.
n int64
trailer SeriesBlockTrailer
offsets *rhh.HashMap
indexMin []byte
indexes []seriesBlockIndexEncodeInfo
// Bloom filter to check for series existance.
filter *bloom.Filter
// Series sketch and tombstoned series sketch. These must be
// set before calling WriteTo.
sketch, tSketch estimator.Sketch
}
// NewSeriesBlockEncoder returns a new instance of SeriesBlockEncoder.
func NewSeriesBlockEncoder(w io.Writer, n uint32, m, k uint64) *SeriesBlockEncoder {
return &SeriesBlockEncoder{
w: w,
offsets: rhh.NewHashMap(rhh.Options{
Capacity: MaxSeriesBlockHashSize,
LoadFactor: LoadFactor,
}),
filter: bloom.NewFilter(m, k),
sketch: hll.NewDefaultPlus(),
tSketch: hll.NewDefaultPlus(),
}
}
// N returns the number of bytes written.
func (enc *SeriesBlockEncoder) N() int64 { return enc.n }
// Encode writes a series to the underlying writer.
// The series must be lexicographical sorted after the previous encoded series.
func (enc *SeriesBlockEncoder) Encode(name []byte, tags models.Tags, deleted bool) error {
// An initial empty byte must be written.
if err := enc.ensureHeaderWritten(); err != nil {
return err
}
// Generate the series element.
buf := AppendSeriesElem(enc.buf[0][:0], encodeSerieFlag(deleted), name, tags)
// Verify series is after previous series.
if enc.buf[1] != nil {
// Skip the first byte since it is the flag. Remaining bytes are key.
key0, key1 := buf[1:], enc.buf[1][1:]
if cmp := CompareSeriesKeys(key0, key1); cmp == -1 {
return fmt.Errorf("series out of order: prev=%q, new=%q", enc.buf[1], buf)
} else if cmp == 0 {
return fmt.Errorf("series already encoded: %s", buf)
}
}
// Flush a hash index, if necessary.
if err := enc.checkFlushIndex(buf[1:]); err != nil {
return err
}
// Swap double buffer.
enc.buf[0], enc.buf[1] = enc.buf[1], buf
// Write encoded series to writer.
offset := enc.n
if err := writeTo(enc.w, buf, &enc.n); err != nil {
return err
}
// Save offset to generate index later.
// Key is copied by the RHH map.
enc.offsets.Put(buf[1:], uint32(offset))
// Update bloom filter.
enc.filter.Insert(buf[1:])
// Update sketches & trailer.
if deleted {
enc.trailer.TombstoneN++
enc.tSketch.Add(buf)
} else {
enc.trailer.SeriesN++
enc.sketch.Add(buf)
}
return nil
}
// Close writes the index and trailer.
// This should be called at the end once all series have been encoded.
func (enc *SeriesBlockEncoder) Close() error {
if err := enc.ensureHeaderWritten(); err != nil {
return err
}
// Flush outstanding hash index.
if err := enc.flushIndex(); err != nil {
return err
}
// Write dictionary-encoded series list.
enc.trailer.Series.Data.Offset = 1
enc.trailer.Series.Data.Size = int32(enc.n) - enc.trailer.Series.Data.Offset
// Write dictionary-encoded series hash index.
enc.trailer.Series.Index.Offset = int32(enc.n)
if err := enc.writeIndexEntries(); err != nil {
return err
}
enc.trailer.Series.Index.Size = int32(enc.n) - enc.trailer.Series.Index.Offset
// Flush bloom filter.
enc.trailer.Bloom.K = enc.filter.K()
enc.trailer.Bloom.Offset = int32(enc.n)
if err := writeTo(enc.w, enc.filter.Bytes(), &enc.n); err != nil {
return err
}
enc.trailer.Bloom.Size = int32(enc.n) - enc.trailer.Bloom.Offset
// Write the sketches out.
enc.trailer.Sketch.Offset = int32(enc.n)
if err := writeSketchTo(enc.w, enc.sketch, &enc.n); err != nil {
return err
}
enc.trailer.Sketch.Size = int32(enc.n) - enc.trailer.Sketch.Offset
enc.trailer.TSketch.Offset = int32(enc.n)
if err := writeSketchTo(enc.w, enc.tSketch, &enc.n); err != nil {
return err
}
enc.trailer.TSketch.Size = int32(enc.n) - enc.trailer.TSketch.Offset
// Write trailer.
nn, err := enc.trailer.WriteTo(enc.w)
enc.n += nn
if err != nil {
return err
}
return nil
}
// writeIndexEntries writes a list of series hash index entries.
func (enc *SeriesBlockEncoder) writeIndexEntries() error {
enc.trailer.Series.Index.N = int32(len(enc.indexes))
for _, idx := range enc.indexes {
// Write offset/size.
if err := writeUint32To(enc.w, uint32(idx.offset), &enc.n); err != nil {
return err
} else if err := writeUint32To(enc.w, uint32(idx.size), &enc.n); err != nil {
return err
}
// Write capacity.
if err := writeUint32To(enc.w, uint32(idx.capacity), &enc.n); err != nil {
return err
}
// Write min key.
if err := writeUint32To(enc.w, uint32(len(idx.min)), &enc.n); err != nil {
return err
} else if err := writeTo(enc.w, idx.min, &enc.n); err != nil {
return err
}
}
return nil
}
// ensureHeaderWritten writes a single empty byte at the front of the file
// so that series offsets will always be non-zero.
func (enc *SeriesBlockEncoder) ensureHeaderWritten() error {
if enc.n > 0 {
return nil
}
if _, err := enc.w.Write([]byte{0}); err != nil {
return err
}
enc.n++
return nil
}
// checkFlushIndex flushes a hash index segment if the index is too large.
// The min argument specifies the lowest series key in the next index, if one is created.
func (enc *SeriesBlockEncoder) checkFlushIndex(min []byte) error {
// Ignore if there is still room in the index.
if enc.offsets.Len() < MaxSeriesBlockHashSize {
return nil
}
// Flush index values.
if err := enc.flushIndex(); err != nil {
return nil
}
// Reset index and save minimum series key.
enc.offsets.Reset()
enc.indexMin = make([]byte, len(min))
copy(enc.indexMin, min)
return nil
}
// flushIndex flushes the hash index segment.
func (enc *SeriesBlockEncoder) flushIndex() error {
if enc.offsets.Len() == 0 {
return nil
}
// Write index segment flag.
if err := writeUint8To(enc.w, SeriesHashIndexFlag, &enc.n); err != nil {
return err
}
// Write index capacity.
// This is used for skipping over when iterating sequentially.
if err := writeUint32To(enc.w, uint32(enc.offsets.Cap()), &enc.n); err != nil {
return err
}
// Determine size.
var sz int64 = enc.offsets.Cap() * 4
// Save current position to ensure size is correct by the end.
offset := enc.n
// Encode hash map offset entries.
for i := int64(0); i < enc.offsets.Cap(); i++ {
_, v := enc.offsets.Elem(i)
seriesOffset, _ := v.(uint32)
if err := writeUint32To(enc.w, uint32(seriesOffset), &enc.n); err != nil {
return err
}
}
// Determine total size.
size := enc.n - offset
// Verify actual size equals calculated size.
if size != sz {
return fmt.Errorf("series hash index size mismatch: %d <> %d", size, sz)
}
// Add to index entries.
enc.indexes = append(enc.indexes, seriesBlockIndexEncodeInfo{
offset: uint32(offset),
size: uint32(size),
capacity: uint32(enc.offsets.Cap()),
min: enc.indexMin,
})
// Clear next min.
enc.indexMin = nil
return nil
}
// seriesBlockIndexEncodeInfo stores offset information for seriesBlockIndex structures.
type seriesBlockIndexEncodeInfo struct {
offset uint32
size uint32
capacity uint32
min []byte
}
// ReadSeriesBlockTrailer returns the series list trailer from data.
func ReadSeriesBlockTrailer(data []byte) SeriesBlockTrailer {
var t SeriesBlockTrailer
// Slice trailer data.
buf := data[len(data)-SeriesBlockTrailerSize:]
// Read series data info.
t.Series.Data.Offset, buf = int32(binary.BigEndian.Uint32(buf[0:4])), buf[4:]
t.Series.Data.Size, buf = int32(binary.BigEndian.Uint32(buf[0:4])), buf[4:]
// Read series hash index info.
t.Series.Index.Offset, buf = int32(binary.BigEndian.Uint32(buf[0:4])), buf[4:]
t.Series.Index.Size, buf = int32(binary.BigEndian.Uint32(buf[0:4])), buf[4:]
t.Series.Index.N, buf = int32(binary.BigEndian.Uint32(buf[0:4])), buf[4:]
// Read bloom filter info.
t.Bloom.K, buf = binary.BigEndian.Uint64(buf[0:8]), buf[8:]
t.Bloom.Offset, buf = int32(binary.BigEndian.Uint32(buf[0:4])), buf[4:]
t.Bloom.Size, buf = int32(binary.BigEndian.Uint32(buf[0:4])), buf[4:]
// Read series sketch info.
t.Sketch.Offset, buf = int32(binary.BigEndian.Uint32(buf[0:4])), buf[4:]
t.Sketch.Size, buf = int32(binary.BigEndian.Uint32(buf[0:4])), buf[4:]
// Read tombstone series sketch info.
t.TSketch.Offset, buf = int32(binary.BigEndian.Uint32(buf[0:4])), buf[4:]
t.TSketch.Size, buf = int32(binary.BigEndian.Uint32(buf[0:4])), buf[4:]
// Read series & tombstone count.
t.SeriesN, buf = int32(binary.BigEndian.Uint32(buf[0:4])), buf[4:]
t.TombstoneN, buf = int32(binary.BigEndian.Uint32(buf[0:4])), buf[4:]
return t
}
// SeriesBlockTrailer represents meta data written to the end of the series list.
type SeriesBlockTrailer struct {
Series struct {
Data struct {
Offset int32
Size int32
}
Index struct {
Offset int32
Size int32
N int32
}
}
// Bloom filter info.
Bloom struct {
K uint64
Offset int32
Size int32
}
// Offset and size of cardinality sketch for measurements.
Sketch struct {
Offset int32
Size int32
}
// Offset and size of cardinality sketch for tombstoned measurements.
TSketch struct {
Offset int32
Size int32
}
SeriesN int32
TombstoneN int32
}
func (t SeriesBlockTrailer) WriteTo(w io.Writer) (n int64, err error) {
if err := writeUint32To(w, uint32(t.Series.Data.Offset), &n); err != nil {
return n, err
} else if err := writeUint32To(w, uint32(t.Series.Data.Size), &n); err != nil {
return n, err
}
if err := writeUint32To(w, uint32(t.Series.Index.Offset), &n); err != nil {
return n, err
} else if err := writeUint32To(w, uint32(t.Series.Index.Size), &n); err != nil {
return n, err
} else if err := writeUint32To(w, uint32(t.Series.Index.N), &n); err != nil {
return n, err
}
// Write bloom filter info.
if err := writeUint64To(w, t.Bloom.K, &n); err != nil {
return n, err
} else if err := writeUint32To(w, uint32(t.Bloom.Offset), &n); err != nil {
return n, err
} else if err := writeUint32To(w, uint32(t.Bloom.Size), &n); err != nil {
return n, err
}
// Write measurement sketch info.
if err := writeUint32To(w, uint32(t.Sketch.Offset), &n); err != nil {
return n, err
} else if err := writeUint32To(w, uint32(t.Sketch.Size), &n); err != nil {
return n, err
}
// Write tombstone measurement sketch info.
if err := writeUint32To(w, uint32(t.TSketch.Offset), &n); err != nil {
return n, err
} else if err := writeUint32To(w, uint32(t.TSketch.Size), &n); err != nil {
return n, err
}
// Write series and tombstone count.
if err := writeUint32To(w, uint32(t.SeriesN), &n); err != nil {
return n, err
} else if err := writeUint32To(w, uint32(t.TombstoneN), &n); err != nil {
return n, err
}
return n, nil
}
type serie struct {
name []byte
tags models.Tags
deleted bool
offset uint32
}
func (s *serie) flag() uint8 { return encodeSerieFlag(s.deleted) }
func encodeSerieFlag(deleted bool) byte {
var flag byte
if deleted {
flag |= SeriesTombstoneFlag
}
return flag
}
type series []serie
func (a series) Len() int { return len(a) }
func (a series) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a series) Less(i, j int) bool {
if cmp := bytes.Compare(a[i].name, a[j].name); cmp != 0 {
return cmp == -1
}
return models.CompareTags(a[i].tags, a[j].tags) == -1
}
// mapIndexFileSeriesBlock maps a writer to a series block.
// Returns the series block and the mmap byte slice (if mmap is used).
// The memory-mapped slice MUST be unmapped by the caller.
func mapIndexFileSeriesBlock(w io.Writer) (*SeriesBlock, []byte, error) {
switch w := w.(type) {
case *bytes.Buffer:
return mapIndexFileSeriesBlockBuffer(w)
case *os.File:
return mapIndexFileSeriesBlockFile(w)
default:
return nil, nil, fmt.Errorf("invalid tsi1 writer type: %T", w)
}
}
// mapIndexFileSeriesBlockBuffer maps a buffer to a series block.
func mapIndexFileSeriesBlockBuffer(buf *bytes.Buffer) (*SeriesBlock, []byte, error) {
data := buf.Bytes()
data = data[len(FileSignature):] // Skip file signature.
var sblk SeriesBlock
if err := sblk.UnmarshalBinary(data); err != nil {
return nil, nil, err
}
return &sblk, nil, nil
}
// mapIndexFileSeriesBlockFile memory-maps a file to a series block.
func mapIndexFileSeriesBlockFile(f *os.File) (*SeriesBlock, []byte, error) {
// Open a read-only memory map of the existing data.
data, err := mmap.Map(f.Name())
if err != nil {
return nil, nil, err
}
sblk_data := data[len(FileSignature):] // Skip file signature.
// Unmarshal block on top of mmap.
var sblk SeriesBlock
if err := sblk.UnmarshalBinary(sblk_data); err != nil {
mmap.Unmap(data)
return nil, nil, err
}
return &sblk, data, nil
}

View File

@@ -0,0 +1,94 @@
package tsi1_test
import (
"bytes"
"fmt"
"testing"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/tsdb/index/tsi1"
)
// Ensure series block can be unmarshaled.
func TestSeriesBlock_UnmarshalBinary(t *testing.T) {
if _, err := CreateSeriesBlock([]Series{
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"})},
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west"})},
{Name: []byte("mem"), Tags: models.NewTags(map[string]string{"region": "east"})},
}); err != nil {
t.Fatal(err)
}
}
// Ensure series block contains the correct set of series.
func TestSeriesBlock_Series(t *testing.T) {
series := []Series{
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"})},
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west"})},
{Name: []byte("mem"), Tags: models.NewTags(map[string]string{"region": "east"})},
}
l := MustCreateSeriesBlock(series)
// Verify total number of series is correct.
if n := l.SeriesCount(); n != 3 {
t.Fatalf("unexpected series count: %d", n)
}
// Verify all series exist.
for i, s := range series {
if e := l.Series(s.Name, s.Tags); e == nil {
t.Fatalf("series does not exist: i=%d", i)
} else if !bytes.Equal(e.Name(), s.Name) || models.CompareTags(e.Tags(), s.Tags) != 0 {
t.Fatalf("series element does not match: i=%d, %s (%s) != %s (%s)", i, e.Name(), e.Tags().String(), s.Name, s.Tags.String())
} else if e.Deleted() {
t.Fatalf("series deleted: i=%d", i)
}
}
// Verify non-existent series doesn't exist.
if e := l.Series([]byte("foo"), models.NewTags(map[string]string{"region": "north"})); e != nil {
t.Fatalf("series should not exist: %#v", e)
}
}
// CreateSeriesBlock returns an in-memory SeriesBlock with a list of series.
func CreateSeriesBlock(a []Series) (*tsi1.SeriesBlock, error) {
var buf bytes.Buffer
// Create writer and sketches. Add series.
enc := tsi1.NewSeriesBlockEncoder(&buf, uint32(len(a)), M, K)
for i, s := range a {
if err := enc.Encode(s.Name, s.Tags, s.Deleted); err != nil {
return nil, fmt.Errorf("SeriesBlockWriter.Add(): i=%d, err=%s", i, err)
}
}
// Close and flush.
if err := enc.Close(); err != nil {
return nil, fmt.Errorf("SeriesBlockWriter.WriteTo(): %s", err)
}
// Unpack bytes into series block.
var blk tsi1.SeriesBlock
if err := blk.UnmarshalBinary(buf.Bytes()); err != nil {
return nil, fmt.Errorf("SeriesBlock.UnmarshalBinary(): %s", err)
}
return &blk, nil
}
// MustCreateSeriesBlock calls CreateSeriesBlock(). Panic on error.
func MustCreateSeriesBlock(a []Series) *tsi1.SeriesBlock {
l, err := CreateSeriesBlock(a)
if err != nil {
panic(err)
}
return l
}
// Series represents name/tagset pairs that are used in testing.
type Series struct {
Name []byte
Tags models.Tags
Deleted bool
}

View File

@@ -0,0 +1,752 @@
package tsi1
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"github.com/influxdata/influxdb/pkg/rhh"
)
// TagBlockVersion is the version of the tag block.
const TagBlockVersion = 1
// Tag key flag constants.
const (
TagKeyTombstoneFlag = 0x01
)
// Tag value flag constants.
const (
TagValueTombstoneFlag = 0x01
)
// TagBlock variable size constants.
const (
// TagBlock key block fields.
TagKeyNSize = 8
TagKeyOffsetSize = 8
// TagBlock value block fields.
TagValueNSize = 8
TagValueOffsetSize = 8
)
// TagBlock errors.
var (
ErrUnsupportedTagBlockVersion = errors.New("unsupported tag block version")
ErrTagBlockSizeMismatch = errors.New("tag block size mismatch")
)
// TagBlock represents tag key/value block for a single measurement.
type TagBlock struct {
data []byte
valueData []byte
keyData []byte
hashData []byte
version int // tag block version
}
// Version returns the encoding version parsed from the data.
// Only valid after UnmarshalBinary() has been successfully invoked.
func (blk *TagBlock) Version() int { return blk.version }
// UnmarshalBinary unpacks data into the tag block. Tag block is not copied so data
// should be retained and unchanged after being passed into this function.
func (blk *TagBlock) UnmarshalBinary(data []byte) error {
// Read trailer.
t, err := ReadTagBlockTrailer(data)
if err != nil {
return err
}
// Verify data size is correct.
if int64(len(data)) != t.Size {
return ErrTagBlockSizeMismatch
}
// Save data section.
blk.valueData = data[t.ValueData.Offset:]
blk.valueData = blk.valueData[:t.ValueData.Size]
// Save key data section.
blk.keyData = data[t.KeyData.Offset:]
blk.keyData = blk.keyData[:t.KeyData.Size]
// Save hash index block.
blk.hashData = data[t.HashIndex.Offset:]
blk.hashData = blk.hashData[:t.HashIndex.Size]
// Save entire block.
blk.data = data
return nil
}
// TagKeyElem returns an element for a tag key.
// Returns an element with a nil key if not found.
func (blk *TagBlock) TagKeyElem(key []byte) TagKeyElem {
keyN := int64(binary.BigEndian.Uint64(blk.hashData[:TagKeyNSize]))
hash := rhh.HashKey(key)
pos := hash % keyN
// Track current distance
var d int64
for {
// Find offset of tag key.
offset := binary.BigEndian.Uint64(blk.hashData[TagKeyNSize+(pos*TagKeyOffsetSize):])
if offset == 0 {
return nil
}
// Parse into element.
var e TagBlockKeyElem
e.unmarshal(blk.data[offset:], blk.data)
// Return if keys match.
if bytes.Equal(e.key, key) {
return &e
}
// Check if we've exceeded the probe distance.
if d > rhh.Dist(rhh.HashKey(e.key), pos, keyN) {
return nil
}
// Move position forward.
pos = (pos + 1) % keyN
d++
if d > keyN {
return nil
}
}
}
// TagValueElem returns an element for a tag value.
func (blk *TagBlock) TagValueElem(key, value []byte) TagValueElem {
// Find key element, exit if not found.
kelem, _ := blk.TagKeyElem(key).(*TagBlockKeyElem)
if kelem == nil {
return nil
}
// Slice hash index data.
hashData := kelem.hashIndex.buf
valueN := int64(binary.BigEndian.Uint64(hashData[:TagValueNSize]))
hash := rhh.HashKey(value)
pos := hash % valueN
// Track current distance
var d int64
for {
// Find offset of tag value.
offset := binary.BigEndian.Uint64(hashData[TagValueNSize+(pos*TagValueOffsetSize):])
if offset == 0 {
return nil
}
// Parse into element.
var e TagBlockValueElem
e.unmarshal(blk.data[offset:])
// Return if values match.
if bytes.Equal(e.value, value) {
return &e
}
// Check if we've exceeded the probe distance.
max := rhh.Dist(rhh.HashKey(e.value), pos, valueN)
if d > max {
return nil
}
// Move position forward.
pos = (pos + 1) % valueN
d++
if d > valueN {
return nil
}
}
}
// TagKeyIterator returns an iterator over all the keys in the block.
func (blk *TagBlock) TagKeyIterator() TagKeyIterator {
return &tagBlockKeyIterator{
blk: blk,
keyData: blk.keyData,
}
}
// tagBlockKeyIterator represents an iterator over all keys in a TagBlock.
type tagBlockKeyIterator struct {
blk *TagBlock
keyData []byte
e TagBlockKeyElem
}
// Next returns the next element in the iterator.
func (itr *tagBlockKeyIterator) Next() TagKeyElem {
// Exit when there is no data left.
if len(itr.keyData) == 0 {
return nil
}
// Unmarshal next element & move data forward.
itr.e.unmarshal(itr.keyData, itr.blk.data)
itr.keyData = itr.keyData[itr.e.size:]
assert(len(itr.e.Key()) > 0, "invalid zero-length tag key")
return &itr.e
}
// tagBlockValueIterator represents an iterator over all values for a tag key.
type tagBlockValueIterator struct {
data []byte
e TagBlockValueElem
}
// Next returns the next element in the iterator.
func (itr *tagBlockValueIterator) Next() TagValueElem {
// Exit when there is no data left.
if len(itr.data) == 0 {
return nil
}
// Unmarshal next element & move data forward.
itr.e.unmarshal(itr.data)
itr.data = itr.data[itr.e.size:]
assert(len(itr.e.Value()) > 0, "invalid zero-length tag value")
return &itr.e
}
// TagBlockKeyElem represents a tag key element in a TagBlock.
type TagBlockKeyElem struct {
flag byte
key []byte
// Value data
data struct {
offset uint64
size uint64
buf []byte
}
// Value hash index data
hashIndex struct {
offset uint64
size uint64
buf []byte
}
size int
// Reusable iterator.
itr tagBlockValueIterator
}
// Deleted returns true if the key has been tombstoned.
func (e *TagBlockKeyElem) Deleted() bool { return (e.flag & TagKeyTombstoneFlag) != 0 }
// Key returns the key name of the element.
func (e *TagBlockKeyElem) Key() []byte { return e.key }
// TagValueIterator returns an iterator over the key's values.
func (e *TagBlockKeyElem) TagValueIterator() TagValueIterator {
return &tagBlockValueIterator{data: e.data.buf}
}
// unmarshal unmarshals buf into e.
// The data argument represents the entire block data.
func (e *TagBlockKeyElem) unmarshal(buf, data []byte) {
start := len(buf)
// Parse flag data.
e.flag, buf = buf[0], buf[1:]
// Parse data offset/size.
e.data.offset, buf = binary.BigEndian.Uint64(buf), buf[8:]
e.data.size, buf = binary.BigEndian.Uint64(buf), buf[8:]
// Slice data.
e.data.buf = data[e.data.offset:]
e.data.buf = e.data.buf[:e.data.size]
// Parse hash index offset/size.
e.hashIndex.offset, buf = binary.BigEndian.Uint64(buf), buf[8:]
e.hashIndex.size, buf = binary.BigEndian.Uint64(buf), buf[8:]
// Slice hash index data.
e.hashIndex.buf = data[e.hashIndex.offset:]
e.hashIndex.buf = e.hashIndex.buf[:e.hashIndex.size]
// Parse key.
n, sz := binary.Uvarint(buf)
e.key, buf = buf[sz:sz+int(n)], buf[int(n)+sz:]
// Save length of elem.
e.size = start - len(buf)
}
// TagBlockValueElem represents a tag value element.
type TagBlockValueElem struct {
flag byte
value []byte
series struct {
n uint32 // Series count
data []byte // Raw series data
}
size int
}
// Deleted returns true if the element has been tombstoned.
func (e *TagBlockValueElem) Deleted() bool { return (e.flag & TagValueTombstoneFlag) != 0 }
// Value returns the value for the element.
func (e *TagBlockValueElem) Value() []byte { return e.value }
// SeriesN returns the series count.
func (e *TagBlockValueElem) SeriesN() uint32 { return e.series.n }
// SeriesData returns the raw series data.
func (e *TagBlockValueElem) SeriesData() []byte { return e.series.data }
// SeriesID returns series ID at an index.
func (e *TagBlockValueElem) SeriesID(i int) uint32 {
return binary.BigEndian.Uint32(e.series.data[i*SeriesIDSize:])
}
// SeriesIDs returns a list decoded series ids.
func (e *TagBlockValueElem) SeriesIDs() []uint32 {
a := make([]uint32, 0, e.series.n)
var prev uint32
for data := e.series.data; len(data) > 0; {
delta, n := binary.Uvarint(data)
data = data[n:]
seriesID := prev + uint32(delta)
a = append(a, seriesID)
prev = seriesID
}
return a
}
// Size returns the size of the element.
func (e *TagBlockValueElem) Size() int { return e.size }
// unmarshal unmarshals buf into e.
func (e *TagBlockValueElem) unmarshal(buf []byte) {
start := len(buf)
// Parse flag data.
e.flag, buf = buf[0], buf[1:]
// Parse value.
sz, n := binary.Uvarint(buf)
e.value, buf = buf[n:n+int(sz)], buf[n+int(sz):]
// Parse series count.
v, n := binary.Uvarint(buf)
e.series.n = uint32(v)
buf = buf[n:]
// Parse data block size.
sz, n = binary.Uvarint(buf)
buf = buf[n:]
// Save reference to series data.
e.series.data = buf[:sz]
buf = buf[sz:]
// Save length of elem.
e.size = start - len(buf)
}
// TagBlockTrailerSize is the total size of the on-disk trailer.
const TagBlockTrailerSize = 0 +
8 + 8 + // value data offset/size
8 + 8 + // key data offset/size
8 + 8 + // hash index offset/size
8 + // size
2 // version
// TagBlockTrailer represents meta data at the end of a TagBlock.
type TagBlockTrailer struct {
Version int // Encoding version
Size int64 // Total size w/ trailer
// Offset & size of value data section.
ValueData struct {
Offset int64
Size int64
}
// Offset & size of key data section.
KeyData struct {
Offset int64
Size int64
}
// Offset & size of hash map section.
HashIndex struct {
Offset int64
Size int64
}
}
// WriteTo writes the trailer to w.
func (t *TagBlockTrailer) WriteTo(w io.Writer) (n int64, err error) {
// Write data info.
if err := writeUint64To(w, uint64(t.ValueData.Offset), &n); err != nil {
return n, err
} else if err := writeUint64To(w, uint64(t.ValueData.Size), &n); err != nil {
return n, err
}
// Write key data info.
if err := writeUint64To(w, uint64(t.KeyData.Offset), &n); err != nil {
return n, err
} else if err := writeUint64To(w, uint64(t.KeyData.Size), &n); err != nil {
return n, err
}
// Write hash index info.
if err := writeUint64To(w, uint64(t.HashIndex.Offset), &n); err != nil {
return n, err
} else if err := writeUint64To(w, uint64(t.HashIndex.Size), &n); err != nil {
return n, err
}
// Write total size & encoding version.
if err := writeUint64To(w, uint64(t.Size), &n); err != nil {
return n, err
} else if err := writeUint16To(w, IndexFileVersion, &n); err != nil {
return n, err
}
return n, nil
}
// ReadTagBlockTrailer returns the tag block trailer from data.
func ReadTagBlockTrailer(data []byte) (TagBlockTrailer, error) {
var t TagBlockTrailer
// Read version.
t.Version = int(binary.BigEndian.Uint16(data[len(data)-2:]))
if t.Version != TagBlockVersion {
return t, ErrUnsupportedTagBlockVersion
}
// Slice trailer data.
buf := data[len(data)-TagBlockTrailerSize:]
// Read data section info.
t.ValueData.Offset, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:]
t.ValueData.Size, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:]
// Read key section info.
t.KeyData.Offset, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:]
t.KeyData.Size, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:]
// Read hash section info.
t.HashIndex.Offset, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:]
t.HashIndex.Size, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:]
// Read total size.
t.Size, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:]
return t, nil
}
// TagBlockEncoder encodes a tags to a TagBlock section.
type TagBlockEncoder struct {
w io.Writer
buf bytes.Buffer
// Track value offsets.
offsets *rhh.HashMap
// Track bytes written, sections.
n int64
trailer TagBlockTrailer
// Track tag keys.
keys []tagKeyEncodeEntry
}
// NewTagBlockEncoder returns a new TagBlockEncoder.
func NewTagBlockEncoder(w io.Writer) *TagBlockEncoder {
return &TagBlockEncoder{
w: w,
offsets: rhh.NewHashMap(rhh.Options{LoadFactor: LoadFactor}),
trailer: TagBlockTrailer{
Version: TagBlockVersion,
},
}
}
// N returns the number of bytes written.
func (enc *TagBlockEncoder) N() int64 { return enc.n }
// EncodeKey writes a tag key to the underlying writer.
func (enc *TagBlockEncoder) EncodeKey(key []byte, deleted bool) error {
// An initial empty byte must be written.
if err := enc.ensureHeaderWritten(); err != nil {
return err
}
// Verify key is lexicographically after previous key.
if len(enc.keys) > 0 {
prev := enc.keys[len(enc.keys)-1].key
if cmp := bytes.Compare(prev, key); cmp == 1 {
return fmt.Errorf("tag key out of order: prev=%s, new=%s", prev, key)
} else if cmp == 0 {
return fmt.Errorf("tag key already encoded: %s", key)
}
}
// Flush values section for key.
if err := enc.flushValueHashIndex(); err != nil {
return err
}
// Append key on to the end of the key list.
entry := tagKeyEncodeEntry{
key: key,
deleted: deleted,
}
entry.data.offset = enc.n
enc.keys = append(enc.keys, entry)
return nil
}
// EncodeValue writes a tag value to the underlying writer.
// The tag key must be lexicographical sorted after the previous encoded tag key.
func (enc *TagBlockEncoder) EncodeValue(value []byte, deleted bool, seriesIDs []uint32) error {
if len(enc.keys) == 0 {
return fmt.Errorf("tag key must be encoded before encoding values")
} else if len(value) == 0 {
return fmt.Errorf("zero length tag value not allowed")
}
// Save offset to hash map.
enc.offsets.Put(value, enc.n)
// Write flag.
if err := writeUint8To(enc.w, encodeTagValueFlag(deleted), &enc.n); err != nil {
return err
}
// Write value.
if err := writeUvarintTo(enc.w, uint64(len(value)), &enc.n); err != nil {
return err
} else if err := writeTo(enc.w, value, &enc.n); err != nil {
return err
}
// Build series data in buffer.
enc.buf.Reset()
var prev uint32
for _, seriesID := range seriesIDs {
delta := seriesID - prev
var buf [binary.MaxVarintLen32]byte
i := binary.PutUvarint(buf[:], uint64(delta))
if _, err := enc.buf.Write(buf[:i]); err != nil {
return err
}
prev = seriesID
}
// Write series count.
if err := writeUvarintTo(enc.w, uint64(len(seriesIDs)), &enc.n); err != nil {
return err
}
// Write data size & buffer.
if err := writeUvarintTo(enc.w, uint64(enc.buf.Len()), &enc.n); err != nil {
return err
}
nn, err := enc.buf.WriteTo(enc.w)
if enc.n += nn; err != nil {
return err
}
return nil
}
// Close flushes the trailer of the encoder to the writer.
func (enc *TagBlockEncoder) Close() error {
// Flush last value set.
if err := enc.ensureHeaderWritten(); err != nil {
return err
} else if err := enc.flushValueHashIndex(); err != nil {
return err
}
// Save ending position of entire data block.
enc.trailer.ValueData.Size = enc.n - enc.trailer.ValueData.Offset
// Write key block to point to value blocks.
if err := enc.encodeTagKeyBlock(); err != nil {
return err
}
// Compute total size w/ trailer.
enc.trailer.Size = enc.n + TagBlockTrailerSize
// Write trailer.
nn, err := enc.trailer.WriteTo(enc.w)
enc.n += nn
if err != nil {
return err
}
return nil
}
// ensureHeaderWritten writes a single byte to offset the rest of the block.
func (enc *TagBlockEncoder) ensureHeaderWritten() error {
if enc.n > 0 {
return nil
} else if _, err := enc.w.Write([]byte{0}); err != nil {
return err
}
enc.n++
enc.trailer.ValueData.Offset = enc.n
return nil
}
// flushValueHashIndex builds writes the hash map at the end of a value set.
func (enc *TagBlockEncoder) flushValueHashIndex() error {
// Ignore if no keys have been written.
if len(enc.keys) == 0 {
return nil
}
key := &enc.keys[len(enc.keys)-1]
// Save size of data section.
key.data.size = enc.n - key.data.offset
// Encode hash map length.
key.hashIndex.offset = enc.n
if err := writeUint64To(enc.w, uint64(enc.offsets.Cap()), &enc.n); err != nil {
return err
}
// Encode hash map offset entries.
for i := int64(0); i < enc.offsets.Cap(); i++ {
_, v := enc.offsets.Elem(i)
offset, _ := v.(int64)
if err := writeUint64To(enc.w, uint64(offset), &enc.n); err != nil {
return err
}
}
key.hashIndex.size = enc.n - key.hashIndex.offset
// Clear offsets.
enc.offsets = rhh.NewHashMap(rhh.Options{LoadFactor: LoadFactor})
return nil
}
// encodeTagKeyBlock encodes the keys section to the writer.
func (enc *TagBlockEncoder) encodeTagKeyBlock() error {
offsets := rhh.NewHashMap(rhh.Options{Capacity: int64(len(enc.keys)), LoadFactor: LoadFactor})
// Encode key list in sorted order.
enc.trailer.KeyData.Offset = enc.n
for i := range enc.keys {
entry := &enc.keys[i]
// Save current offset so we can use it in the hash index.
offsets.Put(entry.key, enc.n)
if err := writeUint8To(enc.w, encodeTagKeyFlag(entry.deleted), &enc.n); err != nil {
return err
}
// Write value data offset & size.
if err := writeUint64To(enc.w, uint64(entry.data.offset), &enc.n); err != nil {
return err
} else if err := writeUint64To(enc.w, uint64(entry.data.size), &enc.n); err != nil {
return err
}
// Write value hash index offset & size.
if err := writeUint64To(enc.w, uint64(entry.hashIndex.offset), &enc.n); err != nil {
return err
} else if err := writeUint64To(enc.w, uint64(entry.hashIndex.size), &enc.n); err != nil {
return err
}
// Write key length and data.
if err := writeUvarintTo(enc.w, uint64(len(entry.key)), &enc.n); err != nil {
return err
} else if err := writeTo(enc.w, entry.key, &enc.n); err != nil {
return err
}
}
enc.trailer.KeyData.Size = enc.n - enc.trailer.KeyData.Offset
// Encode hash map length.
enc.trailer.HashIndex.Offset = enc.n
if err := writeUint64To(enc.w, uint64(offsets.Cap()), &enc.n); err != nil {
return err
}
// Encode hash map offset entries.
for i := int64(0); i < offsets.Cap(); i++ {
_, v := offsets.Elem(i)
offset, _ := v.(int64)
if err := writeUint64To(enc.w, uint64(offset), &enc.n); err != nil {
return err
}
}
enc.trailer.HashIndex.Size = enc.n - enc.trailer.HashIndex.Offset
return nil
}
type tagKeyEncodeEntry struct {
key []byte
deleted bool
data struct {
offset int64
size int64
}
hashIndex struct {
offset int64
size int64
}
}
func encodeTagKeyFlag(deleted bool) byte {
var flag byte
if deleted {
flag |= TagKeyTombstoneFlag
}
return flag
}
func encodeTagValueFlag(deleted bool) byte {
var flag byte
if deleted {
flag |= TagValueTombstoneFlag
}
return flag
}

View File

@@ -0,0 +1,139 @@
package tsi1_test
import (
"bytes"
"fmt"
"reflect"
"testing"
"github.com/influxdata/influxdb/tsdb/index/tsi1"
)
// Ensure tag blocks can be written and opened.
func TestTagBlockWriter(t *testing.T) {
// Write 3 series to writer.
var buf bytes.Buffer
enc := tsi1.NewTagBlockEncoder(&buf)
if err := enc.EncodeKey([]byte("host"), false); err != nil {
t.Fatal(err)
} else if err := enc.EncodeValue([]byte("server0"), false, []uint32{1}); err != nil {
t.Fatal(err)
} else if err := enc.EncodeValue([]byte("server1"), false, []uint32{2}); err != nil {
t.Fatal(err)
} else if err := enc.EncodeValue([]byte("server2"), false, []uint32{3}); err != nil {
t.Fatal(err)
}
if err := enc.EncodeKey([]byte("region"), false); err != nil {
t.Fatal(err)
} else if err := enc.EncodeValue([]byte("us-east"), false, []uint32{1, 2}); err != nil {
t.Fatal(err)
} else if err := enc.EncodeValue([]byte("us-west"), false, []uint32{3}); err != nil {
t.Fatal(err)
}
// Flush encoder.
if err := enc.Close(); err != nil {
t.Fatal(err)
} else if int(enc.N()) != buf.Len() {
t.Fatalf("bytes written mismatch: %d, expected %d", enc.N(), buf.Len())
}
// Unmarshal into a block.
var blk tsi1.TagBlock
if err := blk.UnmarshalBinary(buf.Bytes()); err != nil {
t.Fatal(err)
}
// Verify data.
if e := blk.TagValueElem([]byte("region"), []byte("us-east")); e == nil {
t.Fatal("expected element")
} else if a := e.(*tsi1.TagBlockValueElem).SeriesIDs(); !reflect.DeepEqual(a, []uint32{1, 2}) {
t.Fatalf("unexpected series ids: %#v", a)
}
if e := blk.TagValueElem([]byte("region"), []byte("us-west")); e == nil {
t.Fatal("expected element")
} else if a := e.(*tsi1.TagBlockValueElem).SeriesIDs(); !reflect.DeepEqual(a, []uint32{3}) {
t.Fatalf("unexpected series ids: %#v", a)
}
if e := blk.TagValueElem([]byte("host"), []byte("server0")); e == nil {
t.Fatal("expected element")
} else if a := e.(*tsi1.TagBlockValueElem).SeriesIDs(); !reflect.DeepEqual(a, []uint32{1}) {
t.Fatalf("unexpected series ids: %#v", a)
}
if e := blk.TagValueElem([]byte("host"), []byte("server1")); e == nil {
t.Fatal("expected element")
} else if a := e.(*tsi1.TagBlockValueElem).SeriesIDs(); !reflect.DeepEqual(a, []uint32{2}) {
t.Fatalf("unexpected series ids: %#v", a)
}
if e := blk.TagValueElem([]byte("host"), []byte("server2")); e == nil {
t.Fatal("expected element")
} else if a := e.(*tsi1.TagBlockValueElem).SeriesIDs(); !reflect.DeepEqual(a, []uint32{3}) {
t.Fatalf("unexpected series ids: %#v", a)
}
}
var benchmarkTagBlock10x1000 *tsi1.TagBlock
var benchmarkTagBlock100x1000 *tsi1.TagBlock
var benchmarkTagBlock1000x1000 *tsi1.TagBlock
var benchmarkTagBlock1x1000000 *tsi1.TagBlock
func BenchmarkTagBlock_SeriesN_10_1000(b *testing.B) {
benchmarkTagBlock_SeriesN(b, 10, 1000, &benchmarkTagBlock10x1000)
}
func BenchmarkTagBlock_SeriesN_100_1000(b *testing.B) {
benchmarkTagBlock_SeriesN(b, 100, 1000, &benchmarkTagBlock100x1000)
}
func BenchmarkTagBlock_SeriesN_1000_1000(b *testing.B) {
benchmarkTagBlock_SeriesN(b, 1000, 1000, &benchmarkTagBlock1000x1000)
}
func BenchmarkTagBlock_SeriesN_1_1000000(b *testing.B) {
benchmarkTagBlock_SeriesN(b, 1, 1000000, &benchmarkTagBlock1x1000000)
}
func benchmarkTagBlock_SeriesN(b *testing.B, tagN, valueN int, blk **tsi1.TagBlock) {
if (*blk) == nil {
var buf bytes.Buffer
enc := tsi1.NewTagBlockEncoder(&buf)
// Write block.
for i := 0; i < tagN; i++ {
if err := enc.EncodeKey([]byte(fmt.Sprintf("%08d", i)), false); err != nil {
b.Fatal(err)
}
for j := 0; j < valueN; j++ {
if err := enc.EncodeValue([]byte(fmt.Sprintf("%08d", j)), false, []uint32{1}); err != nil {
b.Fatal(err)
}
}
}
// Flush encoder.
if err := enc.Close(); err != nil {
b.Fatal(err)
}
b.Log("size", buf.Len())
// Unmarshal into a block.
*blk = &tsi1.TagBlock{}
if err := (*blk).UnmarshalBinary(buf.Bytes()); err != nil {
b.Fatal(err)
}
}
// Benchmark lookups.
b.ReportAllocs()
b.ResetTimer()
key, value := []byte("0"), []byte("0")
for i := 0; i < b.N; i++ {
if e := (*blk).TagValueElem(key, value); e == nil {
b.Fatal("expected element")
} else if n := e.(*tsi1.TagBlockValueElem).SeriesN(); n != 1 {
b.Fatalf("unexpected series count: %d", n)
}
}
}

View File

@@ -0,0 +1,818 @@
package tsi1
import (
"bytes"
"encoding/binary"
"encoding/hex"
"fmt"
"io"
"os"
"github.com/influxdata/influxdb/influxql"
"github.com/influxdata/influxdb/models"
)
// LoadFactor is the fill percent for RHH indexes.
const LoadFactor = 80
// MeasurementElem represents a generic measurement element.
type MeasurementElem interface {
Name() []byte
Deleted() bool
}
// MeasurementElems represents a list of MeasurementElem.
type MeasurementElems []MeasurementElem
func (a MeasurementElems) Len() int { return len(a) }
func (a MeasurementElems) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a MeasurementElems) Less(i, j int) bool { return bytes.Compare(a[i].Name(), a[j].Name()) == -1 }
// MeasurementIterator represents a iterator over a list of measurements.
type MeasurementIterator interface {
Next() MeasurementElem
}
// MergeMeasurementIterators returns an iterator that merges a set of iterators.
// Iterators that are first in the list take precendence and a deletion by those
// early iterators will invalidate elements by later iterators.
func MergeMeasurementIterators(itrs ...MeasurementIterator) MeasurementIterator {
if len(itrs) == 0 {
return nil
}
return &measurementMergeIterator{
e: make(measurementMergeElem, 0, len(itrs)),
buf: make([]MeasurementElem, len(itrs)),
itrs: itrs,
}
}
type measurementMergeIterator struct {
e measurementMergeElem
buf []MeasurementElem
itrs []MeasurementIterator
}
// Next returns the element with the next lowest name across the iterators.
//
// If multiple iterators contain the same name then the first is returned
// and the remaining ones are skipped.
func (itr *measurementMergeIterator) Next() MeasurementElem {
// Find next lowest name amongst the buffers.
var name []byte
for i, buf := range itr.buf {
// Fill buffer if empty.
if buf == nil {
if buf = itr.itrs[i].Next(); buf != nil {
itr.buf[i] = buf
} else {
continue
}
}
// Find next lowest name.
if name == nil || bytes.Compare(itr.buf[i].Name(), name) == -1 {
name = itr.buf[i].Name()
}
}
// Return nil if no elements remaining.
if name == nil {
return nil
}
// Merge all elements together and clear buffers.
itr.e = itr.e[:0]
for i, buf := range itr.buf {
if buf == nil || !bytes.Equal(buf.Name(), name) {
continue
}
itr.e = append(itr.e, buf)
itr.buf[i] = nil
}
return itr.e
}
// measurementMergeElem represents a merged measurement element.
type measurementMergeElem []MeasurementElem
// Name returns the name of the first element.
func (p measurementMergeElem) Name() []byte {
if len(p) == 0 {
return nil
}
return p[0].Name()
}
// Deleted returns the deleted flag of the first element.
func (p measurementMergeElem) Deleted() bool {
if len(p) == 0 {
return false
}
return p[0].Deleted()
}
// filterUndeletedMeasurementIterator returns all measurements which are not deleted.
type filterUndeletedMeasurementIterator struct {
itr MeasurementIterator
}
// FilterUndeletedMeasurementIterator returns an iterator which filters all deleted measurement.
func FilterUndeletedMeasurementIterator(itr MeasurementIterator) MeasurementIterator {
if itr == nil {
return nil
}
return &filterUndeletedMeasurementIterator{itr: itr}
}
func (itr *filterUndeletedMeasurementIterator) Next() MeasurementElem {
for {
e := itr.itr.Next()
if e == nil {
return nil
} else if e.Deleted() {
continue
}
return e
}
}
// TagKeyElem represents a generic tag key element.
type TagKeyElem interface {
Key() []byte
Deleted() bool
TagValueIterator() TagValueIterator
}
// TagKeyIterator represents a iterator over a list of tag keys.
type TagKeyIterator interface {
Next() TagKeyElem
}
// MergeTagKeyIterators returns an iterator that merges a set of iterators.
// Iterators that are first in the list take precendence and a deletion by those
// early iterators will invalidate elements by later iterators.
func MergeTagKeyIterators(itrs ...TagKeyIterator) TagKeyIterator {
if len(itrs) == 0 {
return nil
}
return &tagKeyMergeIterator{
e: make(tagKeyMergeElem, 0, len(itrs)),
buf: make([]TagKeyElem, len(itrs)),
itrs: itrs,
}
}
type tagKeyMergeIterator struct {
e tagKeyMergeElem
buf []TagKeyElem
itrs []TagKeyIterator
}
// Next returns the element with the next lowest key across the iterators.
//
// If multiple iterators contain the same key then the first is returned
// and the remaining ones are skipped.
func (itr *tagKeyMergeIterator) Next() TagKeyElem {
// Find next lowest key amongst the buffers.
var key []byte
for i, buf := range itr.buf {
// Fill buffer.
if buf == nil {
if buf = itr.itrs[i].Next(); buf != nil {
itr.buf[i] = buf
} else {
continue
}
}
// Find next lowest key.
if key == nil || bytes.Compare(buf.Key(), key) == -1 {
key = buf.Key()
}
}
// Return nil if no elements remaining.
if key == nil {
return nil
}
// Merge elements together & clear buffer.
itr.e = itr.e[:0]
for i, buf := range itr.buf {
if buf == nil || !bytes.Equal(buf.Key(), key) {
continue
}
itr.e = append(itr.e, buf)
itr.buf[i] = nil
}
return itr.e
}
// tagKeyMergeElem represents a merged tag key element.
type tagKeyMergeElem []TagKeyElem
// Key returns the key of the first element.
func (p tagKeyMergeElem) Key() []byte {
if len(p) == 0 {
return nil
}
return p[0].Key()
}
// Deleted returns the deleted flag of the first element.
func (p tagKeyMergeElem) Deleted() bool {
if len(p) == 0 {
return false
}
return p[0].Deleted()
}
// TagValueIterator returns a merge iterator for all elements until a tombstone occurs.
func (p tagKeyMergeElem) TagValueIterator() TagValueIterator {
if len(p) == 0 {
return nil
}
a := make([]TagValueIterator, 0, len(p))
for _, e := range p {
itr := e.TagValueIterator()
a = append(a, itr)
if e.Deleted() {
break
}
}
return MergeTagValueIterators(a...)
}
// TagValueElem represents a generic tag value element.
type TagValueElem interface {
Value() []byte
Deleted() bool
}
// TagValueIterator represents a iterator over a list of tag values.
type TagValueIterator interface {
Next() TagValueElem
}
// MergeTagValueIterators returns an iterator that merges a set of iterators.
// Iterators that are first in the list take precendence and a deletion by those
// early iterators will invalidate elements by later iterators.
func MergeTagValueIterators(itrs ...TagValueIterator) TagValueIterator {
if len(itrs) == 0 {
return nil
}
return &tagValueMergeIterator{
e: make(tagValueMergeElem, 0, len(itrs)),
buf: make([]TagValueElem, len(itrs)),
itrs: itrs,
}
}
type tagValueMergeIterator struct {
e tagValueMergeElem
buf []TagValueElem
itrs []TagValueIterator
}
// Next returns the element with the next lowest value across the iterators.
//
// If multiple iterators contain the same value then the first is returned
// and the remaining ones are skipped.
func (itr *tagValueMergeIterator) Next() TagValueElem {
// Find next lowest value amongst the buffers.
var value []byte
for i, buf := range itr.buf {
// Fill buffer.
if buf == nil {
if buf = itr.itrs[i].Next(); buf != nil {
itr.buf[i] = buf
} else {
continue
}
}
// Find next lowest value.
if value == nil || bytes.Compare(buf.Value(), value) == -1 {
value = buf.Value()
}
}
// Return nil if no elements remaining.
if value == nil {
return nil
}
// Merge elements and clear buffers.
itr.e = itr.e[:0]
for i, buf := range itr.buf {
if buf == nil || !bytes.Equal(buf.Value(), value) {
continue
}
itr.e = append(itr.e, buf)
itr.buf[i] = nil
}
return itr.e
}
// tagValueMergeElem represents a merged tag value element.
type tagValueMergeElem []TagValueElem
// Name returns the value of the first element.
func (p tagValueMergeElem) Value() []byte {
if len(p) == 0 {
return nil
}
return p[0].Value()
}
// Deleted returns the deleted flag of the first element.
func (p tagValueMergeElem) Deleted() bool {
if len(p) == 0 {
return false
}
return p[0].Deleted()
}
// SeriesElem represents a generic series element.
type SeriesElem interface {
Name() []byte
Tags() models.Tags
Deleted() bool
// InfluxQL expression associated with series during filtering.
Expr() influxql.Expr
}
// SeriesElemKey encodes e as a series key.
func SeriesElemKey(e SeriesElem) []byte {
name, tags := e.Name(), e.Tags()
// TODO: Precompute allocation size.
// FIXME: Handle escaping.
var buf []byte
buf = append(buf, name...)
for _, t := range tags {
buf = append(buf, ',')
buf = append(buf, t.Key...)
buf = append(buf, '=')
buf = append(buf, t.Value...)
}
return buf
}
// CompareSeriesElem returns -1 if a < b, 1 if a > b, and 0 if equal.
func CompareSeriesElem(a, b SeriesElem) int {
if cmp := bytes.Compare(a.Name(), b.Name()); cmp != 0 {
return cmp
}
return models.CompareTags(a.Tags(), b.Tags())
}
// seriesElem represents an in-memory implementation of SeriesElem.
type seriesElem struct {
name []byte
tags models.Tags
deleted bool
}
func (e *seriesElem) Name() []byte { return e.name }
func (e *seriesElem) Tags() models.Tags { return e.tags }
func (e *seriesElem) Deleted() bool { return e.deleted }
func (e *seriesElem) Expr() influxql.Expr { return nil }
// SeriesIterator represents a iterator over a list of series.
type SeriesIterator interface {
Next() SeriesElem
}
// MergeSeriesIterators returns an iterator that merges a set of iterators.
// Iterators that are first in the list take precendence and a deletion by those
// early iterators will invalidate elements by later iterators.
func MergeSeriesIterators(itrs ...SeriesIterator) SeriesIterator {
if n := len(itrs); n == 0 {
return nil
} else if n == 1 {
return itrs[0]
}
return &seriesMergeIterator{
buf: make([]SeriesElem, len(itrs)),
itrs: itrs,
}
}
// seriesMergeIterator is an iterator that merges multiple iterators together.
type seriesMergeIterator struct {
buf []SeriesElem
itrs []SeriesIterator
}
// Next returns the element with the next lowest name/tags across the iterators.
//
// If multiple iterators contain the same name/tags then the first is returned
// and the remaining ones are skipped.
func (itr *seriesMergeIterator) Next() SeriesElem {
// Find next lowest name/tags amongst the buffers.
var name []byte
var tags models.Tags
for i, buf := range itr.buf {
// Fill buffer.
if buf == nil {
if buf = itr.itrs[i].Next(); buf != nil {
itr.buf[i] = buf
} else {
continue
}
}
// If the name is not set the pick the first non-empty name.
if name == nil {
name, tags = buf.Name(), buf.Tags()
continue
}
// Set name/tags if they are lower than what has been seen.
if cmp := bytes.Compare(buf.Name(), name); cmp == -1 || (cmp == 0 && models.CompareTags(buf.Tags(), tags) == -1) {
name, tags = buf.Name(), buf.Tags()
}
}
// Return nil if no elements remaining.
if name == nil {
return nil
}
// Refill buffer.
var e SeriesElem
for i, buf := range itr.buf {
if buf == nil || !bytes.Equal(buf.Name(), name) || models.CompareTags(buf.Tags(), tags) != 0 {
continue
}
// Copy first matching buffer to the return buffer.
if e == nil {
e = buf
}
// Clear buffer.
itr.buf[i] = nil
}
return e
}
// IntersectSeriesIterators returns an iterator that only returns series which
// occur in both iterators. If both series have associated expressions then
// they are combined together.
func IntersectSeriesIterators(itr0, itr1 SeriesIterator) SeriesIterator {
if itr0 == nil || itr1 == nil {
return nil
}
return &seriesIntersectIterator{itrs: [2]SeriesIterator{itr0, itr1}}
}
// seriesIntersectIterator is an iterator that merges two iterators together.
type seriesIntersectIterator struct {
e seriesExprElem
buf [2]SeriesElem
itrs [2]SeriesIterator
}
// Next returns the next element which occurs in both iterators.
func (itr *seriesIntersectIterator) Next() (e SeriesElem) {
for {
// Fill buffers.
if itr.buf[0] == nil {
itr.buf[0] = itr.itrs[0].Next()
}
if itr.buf[1] == nil {
itr.buf[1] = itr.itrs[1].Next()
}
// Exit if either buffer is still empty.
if itr.buf[0] == nil || itr.buf[1] == nil {
return nil
}
// Skip if both series are not equal.
if cmp := CompareSeriesElem(itr.buf[0], itr.buf[1]); cmp == -1 {
itr.buf[0] = nil
continue
} else if cmp == 1 {
itr.buf[1] = nil
continue
}
// Merge series together if equal.
itr.e.SeriesElem = itr.buf[0]
// Attach expression.
expr0 := itr.buf[0].Expr()
expr1 := itr.buf[1].Expr()
if expr0 == nil {
itr.e.expr = expr1
} else if expr1 == nil {
itr.e.expr = expr0
} else {
itr.e.expr = influxql.Reduce(&influxql.BinaryExpr{
Op: influxql.AND,
LHS: expr0,
RHS: expr1,
}, nil)
}
itr.buf[0], itr.buf[1] = nil, nil
return &itr.e
}
}
// UnionSeriesIterators returns an iterator that returns series from both
// both iterators. If both series have associated expressions then they are
// combined together.
func UnionSeriesIterators(itr0, itr1 SeriesIterator) SeriesIterator {
// Return other iterator if either one is nil.
if itr0 == nil {
return itr1
} else if itr1 == nil {
return itr0
}
return &seriesUnionIterator{itrs: [2]SeriesIterator{itr0, itr1}}
}
// seriesUnionIterator is an iterator that unions two iterators together.
type seriesUnionIterator struct {
e seriesExprElem
buf [2]SeriesElem
itrs [2]SeriesIterator
}
// Next returns the next element which occurs in both iterators.
func (itr *seriesUnionIterator) Next() (e SeriesElem) {
// Fill buffers.
if itr.buf[0] == nil {
itr.buf[0] = itr.itrs[0].Next()
}
if itr.buf[1] == nil {
itr.buf[1] = itr.itrs[1].Next()
}
// Return the other iterator if either one is empty.
if itr.buf[0] == nil {
e, itr.buf[1] = itr.buf[1], nil
return e
} else if itr.buf[1] == nil {
e, itr.buf[0] = itr.buf[0], nil
return e
}
// Return lesser series.
if cmp := CompareSeriesElem(itr.buf[0], itr.buf[1]); cmp == -1 {
e, itr.buf[0] = itr.buf[0], nil
return e
} else if cmp == 1 {
e, itr.buf[1] = itr.buf[1], nil
return e
}
// Attach element.
itr.e.SeriesElem = itr.buf[0]
// Attach expression.
expr0 := itr.buf[0].Expr()
expr1 := itr.buf[1].Expr()
if expr0 != nil && expr1 != nil {
itr.e.expr = influxql.Reduce(&influxql.BinaryExpr{
Op: influxql.OR,
LHS: expr0,
RHS: expr1,
}, nil)
} else {
itr.e.expr = nil
}
itr.buf[0], itr.buf[1] = nil, nil
return &itr.e
}
// DifferenceSeriesIterators returns an iterator that only returns series which
// occur the first iterator but not the second iterator.
func DifferenceSeriesIterators(itr0, itr1 SeriesIterator) SeriesIterator {
if itr0 != nil && itr1 == nil {
return itr0
} else if itr0 == nil {
return nil
}
return &seriesDifferenceIterator{itrs: [2]SeriesIterator{itr0, itr1}}
}
// seriesDifferenceIterator is an iterator that merges two iterators together.
type seriesDifferenceIterator struct {
buf [2]SeriesElem
itrs [2]SeriesIterator
}
// Next returns the next element which occurs only in the first iterator.
func (itr *seriesDifferenceIterator) Next() (e SeriesElem) {
for {
// Fill buffers.
if itr.buf[0] == nil {
itr.buf[0] = itr.itrs[0].Next()
}
if itr.buf[1] == nil {
itr.buf[1] = itr.itrs[1].Next()
}
// Exit if first buffer is still empty.
if itr.buf[0] == nil {
return nil
} else if itr.buf[1] == nil {
e, itr.buf[0] = itr.buf[0], nil
return e
}
// Return first series if it's less.
// If second series is less then skip it.
// If both series are equal then skip both.
if cmp := CompareSeriesElem(itr.buf[0], itr.buf[1]); cmp == -1 {
e, itr.buf[0] = itr.buf[0], nil
return e
} else if cmp == 1 {
itr.buf[1] = nil
continue
} else {
itr.buf[0], itr.buf[1] = nil, nil
continue
}
}
}
// filterUndeletedSeriesIterator returns all series which are not deleted.
type filterUndeletedSeriesIterator struct {
itr SeriesIterator
}
// FilterUndeletedSeriesIterator returns an iterator which filters all deleted series.
func FilterUndeletedSeriesIterator(itr SeriesIterator) SeriesIterator {
if itr == nil {
return nil
}
return &filterUndeletedSeriesIterator{itr: itr}
}
func (itr *filterUndeletedSeriesIterator) Next() SeriesElem {
for {
e := itr.itr.Next()
if e == nil {
return nil
} else if e.Deleted() {
continue
}
return e
}
}
// seriesExprElem holds a series and its associated filter expression.
type seriesExprElem struct {
SeriesElem
expr influxql.Expr
}
// Expr returns the associated expression.
func (e *seriesExprElem) Expr() influxql.Expr { return e.expr }
// seriesExprIterator is an iterator that attaches an associated expression.
type seriesExprIterator struct {
itr SeriesIterator
e seriesExprElem
}
// newSeriesExprIterator returns a new instance of seriesExprIterator.
func newSeriesExprIterator(itr SeriesIterator, expr influxql.Expr) SeriesIterator {
if itr == nil {
return nil
}
return &seriesExprIterator{
itr: itr,
e: seriesExprElem{
expr: expr,
},
}
}
// Next returns the next element in the iterator.
func (itr *seriesExprIterator) Next() SeriesElem {
itr.e.SeriesElem = itr.itr.Next()
if itr.e.SeriesElem == nil {
return nil
}
return &itr.e
}
// seriesIDIterator represents a iterator over a list of series ids.
type seriesIDIterator interface {
next() uint32
}
// writeTo writes write v into w. Updates n.
func writeTo(w io.Writer, v []byte, n *int64) error {
nn, err := w.Write(v)
*n += int64(nn)
return err
}
// writeUint8To writes write v into w. Updates n.
func writeUint8To(w io.Writer, v uint8, n *int64) error {
nn, err := w.Write([]byte{v})
*n += int64(nn)
return err
}
// writeUint16To writes write v into w using big endian encoding. Updates n.
func writeUint16To(w io.Writer, v uint16, n *int64) error {
var buf [2]byte
binary.BigEndian.PutUint16(buf[:], v)
nn, err := w.Write(buf[:])
*n += int64(nn)
return err
}
// writeUint32To writes write v into w using big endian encoding. Updates n.
func writeUint32To(w io.Writer, v uint32, n *int64) error {
var buf [4]byte
binary.BigEndian.PutUint32(buf[:], v)
nn, err := w.Write(buf[:])
*n += int64(nn)
return err
}
// writeUint64To writes write v into w using big endian encoding. Updates n.
func writeUint64To(w io.Writer, v uint64, n *int64) error {
var buf [8]byte
binary.BigEndian.PutUint64(buf[:], v)
nn, err := w.Write(buf[:])
*n += int64(nn)
return err
}
// writeUvarintTo writes write v into w using variable length encoding. Updates n.
func writeUvarintTo(w io.Writer, v uint64, n *int64) error {
var buf [binary.MaxVarintLen64]byte
i := binary.PutUvarint(buf[:], v)
nn, err := w.Write(buf[:i])
*n += int64(nn)
return err
}
type uint32Slice []uint32
func (a uint32Slice) Len() int { return len(a) }
func (a uint32Slice) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a uint32Slice) Less(i, j int) bool { return a[i] < a[j] }
type uint64Slice []uint64
func (a uint64Slice) Len() int { return len(a) }
func (a uint64Slice) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a uint64Slice) Less(i, j int) bool { return a[i] < a[j] }
type byteSlices [][]byte
func (a byteSlices) Len() int { return len(a) }
func (a byteSlices) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a byteSlices) Less(i, j int) bool { return bytes.Compare(a[i], a[j]) == -1 }
// copyBytes returns a copy of b.
func copyBytes(b []byte) []byte {
if b == nil {
return nil
}
buf := make([]byte, len(b))
copy(buf, b)
return buf
}
// assert will panic with a given formatted message if the given condition is false.
func assert(condition bool, msg string, v ...interface{}) {
if !condition {
panic(fmt.Sprintf("assert failed: "+msg, v...))
}
}
type byTagKey []*influxql.TagSet
func (t byTagKey) Len() int { return len(t) }
func (t byTagKey) Less(i, j int) bool { return bytes.Compare(t[i].Key, t[j].Key) < 0 }
func (t byTagKey) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
// hexdump is a helper for dumping binary data to stderr.
func hexdump(data []byte) { os.Stderr.Write([]byte(hex.Dump(data))) }

View File

@@ -0,0 +1,308 @@
package tsi1_test
import (
"bytes"
"io/ioutil"
"reflect"
"testing"
"github.com/influxdata/influxdb/influxql"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/tsdb/index/tsi1"
)
// Ensure iterator can operate over an in-memory list of elements.
func TestMeasurementIterator(t *testing.T) {
elems := []MeasurementElem{
MeasurementElem{name: []byte("cpu"), deleted: true},
MeasurementElem{name: []byte("mem")},
}
itr := MeasurementIterator{Elems: elems}
if e := itr.Next(); !reflect.DeepEqual(&elems[0], e) {
t.Fatalf("unexpected elem(0): %#v", e)
} else if e := itr.Next(); !reflect.DeepEqual(&elems[1], e) {
t.Fatalf("unexpected elem(1): %#v", e)
} else if e := itr.Next(); e != nil {
t.Fatalf("expected nil elem: %#v", e)
}
}
// Ensure iterator can merge multiple iterators together.
func TestMergeMeasurementIterators(t *testing.T) {
itr := tsi1.MergeMeasurementIterators(
&MeasurementIterator{Elems: []MeasurementElem{
{name: []byte("aaa")},
{name: []byte("bbb"), deleted: true},
{name: []byte("ccc")},
}},
&MeasurementIterator{},
&MeasurementIterator{Elems: []MeasurementElem{
{name: []byte("bbb")},
{name: []byte("ccc"), deleted: true},
{name: []byte("ddd")},
}},
)
if e := itr.Next(); !bytes.Equal(e.Name(), []byte("aaa")) || e.Deleted() {
t.Fatalf("unexpected elem(0): %s/%v", e.Name(), e.Deleted())
} else if e := itr.Next(); !bytes.Equal(e.Name(), []byte("bbb")) || !e.Deleted() {
t.Fatalf("unexpected elem(1): %s/%v", e.Name(), e.Deleted())
} else if e := itr.Next(); !bytes.Equal(e.Name(), []byte("ccc")) || e.Deleted() {
t.Fatalf("unexpected elem(2): %s/%v", e.Name(), e.Deleted())
} else if e := itr.Next(); !bytes.Equal(e.Name(), []byte("ddd")) || e.Deleted() {
t.Fatalf("unexpected elem(3): %s/%v", e.Name(), e.Deleted())
} else if e := itr.Next(); e != nil {
t.Fatalf("expected nil elem: %#v", e)
}
}
// Ensure iterator can operate over an in-memory list of tag key elements.
func TestTagKeyIterator(t *testing.T) {
elems := []TagKeyElem{
{key: []byte("aaa"), deleted: true},
{key: []byte("bbb")},
}
itr := TagKeyIterator{Elems: elems}
if e := itr.Next(); !reflect.DeepEqual(&elems[0], e) {
t.Fatalf("unexpected elem(0): %#v", e)
} else if e := itr.Next(); !reflect.DeepEqual(&elems[1], e) {
t.Fatalf("unexpected elem(1): %#v", e)
} else if e := itr.Next(); e != nil {
t.Fatalf("expected nil elem: %#v", e)
}
}
// Ensure iterator can merge multiple iterators together.
func TestMergeTagKeyIterators(t *testing.T) {
itr := tsi1.MergeTagKeyIterators(
&TagKeyIterator{Elems: []TagKeyElem{
{key: []byte("aaa")},
{key: []byte("bbb"), deleted: true},
{key: []byte("ccc")},
}},
&TagKeyIterator{},
&TagKeyIterator{Elems: []TagKeyElem{
{key: []byte("bbb")},
{key: []byte("ccc"), deleted: true},
{key: []byte("ddd")},
}},
)
if e := itr.Next(); !bytes.Equal(e.Key(), []byte("aaa")) || e.Deleted() {
t.Fatalf("unexpected elem(0): %s/%v", e.Key(), e.Deleted())
} else if e := itr.Next(); !bytes.Equal(e.Key(), []byte("bbb")) || !e.Deleted() {
t.Fatalf("unexpected elem(1): %s/%v", e.Key(), e.Deleted())
} else if e := itr.Next(); !bytes.Equal(e.Key(), []byte("ccc")) || e.Deleted() {
t.Fatalf("unexpected elem(2): %s/%v", e.Key(), e.Deleted())
} else if e := itr.Next(); !bytes.Equal(e.Key(), []byte("ddd")) || e.Deleted() {
t.Fatalf("unexpected elem(3): %s/%v", e.Key(), e.Deleted())
} else if e := itr.Next(); e != nil {
t.Fatalf("expected nil elem: %#v", e)
}
}
// Ensure iterator can operate over an in-memory list of tag value elements.
func TestTagValueIterator(t *testing.T) {
elems := []TagValueElem{
{value: []byte("aaa"), deleted: true},
{value: []byte("bbb")},
}
itr := &TagValueIterator{Elems: elems}
if e := itr.Next(); !reflect.DeepEqual(&elems[0], e) {
t.Fatalf("unexpected elem(0): %#v", e)
} else if e := itr.Next(); !reflect.DeepEqual(&elems[1], e) {
t.Fatalf("unexpected elem(1): %#v", e)
} else if e := itr.Next(); e != nil {
t.Fatalf("expected nil elem: %#v", e)
}
}
// Ensure iterator can merge multiple iterators together.
func TestMergeTagValueIterators(t *testing.T) {
itr := tsi1.MergeTagValueIterators(
&TagValueIterator{Elems: []TagValueElem{
{value: []byte("aaa")},
{value: []byte("bbb"), deleted: true},
{value: []byte("ccc")},
}},
&TagValueIterator{},
&TagValueIterator{Elems: []TagValueElem{
{value: []byte("bbb")},
{value: []byte("ccc"), deleted: true},
{value: []byte("ddd")},
}},
)
if e := itr.Next(); !bytes.Equal(e.Value(), []byte("aaa")) || e.Deleted() {
t.Fatalf("unexpected elem(0): %s/%v", e.Value(), e.Deleted())
} else if e := itr.Next(); !bytes.Equal(e.Value(), []byte("bbb")) || !e.Deleted() {
t.Fatalf("unexpected elem(1): %s/%v", e.Value(), e.Deleted())
} else if e := itr.Next(); !bytes.Equal(e.Value(), []byte("ccc")) || e.Deleted() {
t.Fatalf("unexpected elem(2): %s/%v", e.Value(), e.Deleted())
} else if e := itr.Next(); !bytes.Equal(e.Value(), []byte("ddd")) || e.Deleted() {
t.Fatalf("unexpected elem(3): %s/%v", e.Value(), e.Deleted())
} else if e := itr.Next(); e != nil {
t.Fatalf("expected nil elem: %#v", e)
}
}
// Ensure iterator can operate over an in-memory list of series.
func TestSeriesIterator(t *testing.T) {
elems := []SeriesElem{
{name: []byte("cpu"), tags: models.Tags{{Key: []byte("region"), Value: []byte("us-east")}}, deleted: true},
{name: []byte("mem")},
}
itr := SeriesIterator{Elems: elems}
if e := itr.Next(); !reflect.DeepEqual(&elems[0], e) {
t.Fatalf("unexpected elem(0): %#v", e)
} else if e := itr.Next(); !reflect.DeepEqual(&elems[1], e) {
t.Fatalf("unexpected elem(1): %#v", e)
} else if e := itr.Next(); e != nil {
t.Fatalf("expected nil elem: %#v", e)
}
}
// Ensure iterator can merge multiple iterators together.
func TestMergeSeriesIterators(t *testing.T) {
itr := tsi1.MergeSeriesIterators(
&SeriesIterator{Elems: []SeriesElem{
{name: []byte("aaa"), tags: models.Tags{{Key: []byte("region"), Value: []byte("us-east")}}, deleted: true},
{name: []byte("bbb"), deleted: true},
{name: []byte("ccc")},
}},
&SeriesIterator{},
&SeriesIterator{Elems: []SeriesElem{
{name: []byte("aaa"), tags: models.Tags{{Key: []byte("region"), Value: []byte("us-east")}}},
{name: []byte("aaa"), tags: models.Tags{{Key: []byte("region"), Value: []byte("us-west")}}},
{name: []byte("bbb")},
{name: []byte("ccc"), deleted: true},
{name: []byte("ddd")},
}},
)
if e := itr.Next(); !reflect.DeepEqual(e, &SeriesElem{name: []byte("aaa"), tags: models.Tags{{Key: []byte("region"), Value: []byte("us-east")}}, deleted: true}) {
t.Fatalf("unexpected elem(0): %#v", e)
} else if e := itr.Next(); !reflect.DeepEqual(e, &SeriesElem{name: []byte("aaa"), tags: models.Tags{{Key: []byte("region"), Value: []byte("us-west")}}}) {
t.Fatalf("unexpected elem(1): %#v", e)
} else if e := itr.Next(); !reflect.DeepEqual(e, &SeriesElem{name: []byte("bbb"), deleted: true}) {
t.Fatalf("unexpected elem(2): %#v", e)
} else if e := itr.Next(); !reflect.DeepEqual(e, &SeriesElem{name: []byte("ccc")}) {
t.Fatalf("unexpected elem(3): %#v", e)
} else if e := itr.Next(); !reflect.DeepEqual(e, &SeriesElem{name: []byte("ddd")}) {
t.Fatalf("unexpected elem(4): %#v", e)
} else if e := itr.Next(); e != nil {
t.Fatalf("expected nil elem: %#v", e)
}
}
// MeasurementElem represents a test implementation of tsi1.MeasurementElem.
type MeasurementElem struct {
name []byte
deleted bool
}
func (e *MeasurementElem) Name() []byte { return e.name }
func (e *MeasurementElem) Deleted() bool { return e.deleted }
func (e *MeasurementElem) TagKeyIterator() tsi1.TagKeyIterator { return nil }
// MeasurementIterator represents an iterator over a slice of measurements.
type MeasurementIterator struct {
Elems []MeasurementElem
}
// Next returns the next element in the iterator.
func (itr *MeasurementIterator) Next() (e tsi1.MeasurementElem) {
if len(itr.Elems) == 0 {
return nil
}
e, itr.Elems = &itr.Elems[0], itr.Elems[1:]
return e
}
// TagKeyElem represents a test implementation of tsi1.TagKeyElem.
type TagKeyElem struct {
key []byte
deleted bool
}
func (e *TagKeyElem) Key() []byte { return e.key }
func (e *TagKeyElem) Deleted() bool { return e.deleted }
func (e *TagKeyElem) TagValueIterator() tsi1.TagValueIterator { return nil }
// TagKeyIterator represents an iterator over a slice of tag keys.
type TagKeyIterator struct {
Elems []TagKeyElem
}
// Next returns the next element in the iterator.
func (itr *TagKeyIterator) Next() (e tsi1.TagKeyElem) {
if len(itr.Elems) == 0 {
return nil
}
e, itr.Elems = &itr.Elems[0], itr.Elems[1:]
return e
}
// TagValueElem represents a test implementation of tsi1.TagValueElem.
type TagValueElem struct {
value []byte
deleted bool
}
func (e *TagValueElem) Value() []byte { return e.value }
func (e *TagValueElem) Deleted() bool { return e.deleted }
func (e *TagValueElem) SeriesIterator() tsi1.SeriesIterator { return nil }
// TagValueIterator represents an iterator over a slice of tag values.
type TagValueIterator struct {
Elems []TagValueElem
}
// Next returns the next element in the iterator.
func (itr *TagValueIterator) Next() (e tsi1.TagValueElem) {
if len(itr.Elems) == 0 {
return nil
}
e, itr.Elems = &itr.Elems[0], itr.Elems[1:]
return e
}
// SeriesElem represents a test implementation of tsi1.SeriesElem.
type SeriesElem struct {
name []byte
tags models.Tags
deleted bool
expr influxql.Expr
}
func (e *SeriesElem) Name() []byte { return e.name }
func (e *SeriesElem) Tags() models.Tags { return e.tags }
func (e *SeriesElem) Deleted() bool { return e.deleted }
func (e *SeriesElem) Expr() influxql.Expr { return e.expr }
// SeriesIterator represents an iterator over a slice of tag values.
type SeriesIterator struct {
Elems []SeriesElem
}
// Next returns the next element in the iterator.
func (itr *SeriesIterator) Next() (e tsi1.SeriesElem) {
if len(itr.Elems) == 0 {
return nil
}
e, itr.Elems = &itr.Elems[0], itr.Elems[1:]
return e
}
// MustTempDir returns a temporary directory. Panic on error.
func MustTempDir() string {
path, err := ioutil.TempDir("", "tsi-")
if err != nil {
panic(err)
}
return path
}