vsphere-influxdb-go/vendor/github.com/influxdata/influxdb/tsdb/engine/tsm1/int.go

package tsm1

// Integer encoding uses two different strategies depending on the range of values in
// the uncompressed data.  Encoded values are first encoding used zig zag encoding.
// This interleaves positive and negative integers across a range of positive integers.
//
// For example, [-2,-1,0,1] becomes [3,1,0,2]. See
// https://developers.google.com/protocol-buffers/docs/encoding?hl=en#signed-integers
// for more information.
//
// If all the zig zag encoded values are less than 1 << 60 - 1, they are compressed using
// simple8b encoding.  If any value is larger than 1 << 60 - 1, the values are stored uncompressed.
//
// Each encoded byte slice contains a 1 byte header followed by multiple 8 byte packed integers
// or 8 byte uncompressed integers.  The 4 high bits of the first byte indicate the encoding type
// for the remaining bytes.
//
// There are currently two encoding types that can be used with room for 16 total.  These additional
// encoding slots are reserved for future use.  One improvement to be made is to use a patched
// encoding such as PFOR if only a small number of values exceed the max compressed value range.  This
// should improve compression ratios with very large integers near the ends of the int64 range.

import (
	"encoding/binary"
	"fmt"

	"github.com/jwilder/encoding/simple8b"
)

const (
	// intUncompressed is an uncompressed format using 8 bytes per point
	intUncompressed = 0
	// intCompressedSimple is a bit-packed format using simple8b encoding
	intCompressedSimple = 1
	// intCompressedRLE is a run-length encoding format
	intCompressedRLE = 2
)

// IntegerEncoder encodes int64s into byte slices.
type IntegerEncoder struct {
	prev   int64
	rle    bool
	values []uint64
}

// NewIntegerEncoder returns a new integer encoder with an initial buffer of values sized at sz.
func NewIntegerEncoder(sz int) IntegerEncoder {
	return IntegerEncoder{
		rle:    true,
		values: make([]uint64, 0, sz),
	}
}

// Flush is no-op
func (e *IntegerEncoder) Flush() {}

// Reset sets the encoder back to its initial state.
func (e *IntegerEncoder) Reset() {
	e.prev = 0
	e.rle = true
	e.values = e.values[:0]
}

// Write encodes v to the underlying buffers.
func (e *IntegerEncoder) Write(v int64) {
	// Delta-encode each value as it's written.  This happens before
	// ZigZagEncoding because the deltas could be negative.
	delta := v - e.prev
	e.prev = v
	enc := ZigZagEncode(delta)
	if len(e.values) > 1 {
		e.rle = e.rle && e.values[len(e.values)-1] == enc
	}

	e.values = append(e.values, enc)
}

// Bytes returns a copy of the underlying buffer.
func (e *IntegerEncoder) Bytes() ([]byte, error) {
	// Only run-length encode if it could reduce storage size.
	if e.rle && len(e.values) > 2 {
		return e.encodeRLE()
	}

	for _, v := range e.values {
		// Value is too large to encode using packed format
		if v > simple8b.MaxValue {
			return e.encodeUncompressed()
		}
	}

	return e.encodePacked()
}

func (e *IntegerEncoder) encodeRLE() ([]byte, error) {
	// Large varints can take up to 10 bytes.  We're storing 3 + 1
	// type byte.
	var b [31]byte

	// 4 high bits used for the encoding type
	b[0] = byte(intCompressedRLE) << 4

	i := 1
	// The first value
	binary.BigEndian.PutUint64(b[i:], e.values[0])
	i += 8
	// The first delta
	i += binary.PutUvarint(b[i:], e.values[1])
	// The number of times the delta is repeated
	i += binary.PutUvarint(b[i:], uint64(len(e.values)-1))

	return b[:i], nil
}

func (e *IntegerEncoder) encodePacked() ([]byte, error) {
	if len(e.values) == 0 {
		return nil, nil
	}

	// Encode all but the first value.  Fist value is written unencoded
	// using 8 bytes.
	encoded, err := simple8b.EncodeAll(e.values[1:])
	if err != nil {
		return nil, err
	}

	b := make([]byte, 1+(len(encoded)+1)*8)
	// 4 high bits of first byte store the encoding type for the block
	b[0] = byte(intCompressedSimple) << 4

	// Write the first value since it's not part of the encoded values
	binary.BigEndian.PutUint64(b[1:9], e.values[0])

	// Write the encoded values
	for i, v := range encoded {
		binary.BigEndian.PutUint64(b[9+i*8:9+i*8+8], v)
	}
	return b, nil
}

func (e *IntegerEncoder) encodeUncompressed() ([]byte, error) {
	if len(e.values) == 0 {
		return nil, nil
	}

	b := make([]byte, 1+len(e.values)*8)
	// 4 high bits of first byte store the encoding type for the block
	b[0] = byte(intUncompressed) << 4

	for i, v := range e.values {
		binary.BigEndian.PutUint64(b[1+i*8:1+i*8+8], v)
	}
	return b, nil
}

// IntegerDecoder decodes a byte slice into int64s.
type IntegerDecoder struct {
	// 240 is the maximum number of values that can be encoded into a single uint64 using simple8b
	values [240]uint64
	bytes  []byte
	i      int
	n      int
	prev   int64
	first  bool

	// The first value for a run-length encoded byte slice
	rleFirst uint64

	// The delta value for a run-length encoded byte slice
	rleDelta uint64
	encoding byte
	err      error
}

// SetBytes sets the underlying byte slice of the decoder.
func (d *IntegerDecoder) SetBytes(b []byte) {
	if len(b) > 0 {
		d.encoding = b[0] >> 4
		d.bytes = b[1:]
	} else {
		d.encoding = 0
		d.bytes = nil
	}

	d.i = 0
	d.n = 0
	d.prev = 0
	d.first = true

	d.rleFirst = 0
	d.rleDelta = 0
	d.err = nil
}

// Next returns true if there are any values remaining to be decoded.
func (d *IntegerDecoder) Next() bool {
	if d.i >= d.n && len(d.bytes) == 0 {
		return false
	}

	d.i++

	if d.i >= d.n {
		switch d.encoding {
		case intUncompressed:
			d.decodeUncompressed()
		case intCompressedSimple:
			d.decodePacked()
		case intCompressedRLE:
			d.decodeRLE()
		default:
			d.err = fmt.Errorf("unknown encoding %v", d.encoding)
		}
	}
	return d.err == nil && d.i < d.n
}

// Error returns the last error encountered by the decoder.
func (d *IntegerDecoder) Error() error {
	return d.err
}

// Read returns the next value from the decoder.
func (d *IntegerDecoder) Read() int64 {
	switch d.encoding {
	case intCompressedRLE:
		return ZigZagDecode(d.rleFirst) + int64(d.i)*ZigZagDecode(d.rleDelta)
	default:
		v := ZigZagDecode(d.values[d.i])
		// v is the delta encoded value, we need to add the prior value to get the original
		v = v + d.prev
		d.prev = v
		return v
	}
}

func (d *IntegerDecoder) decodeRLE() {
	if len(d.bytes) == 0 {
		return
	}

	if len(d.bytes) < 8 {
		d.err = fmt.Errorf("IntegerDecoder: not enough data to decode RLE starting value")
		return
	}

	var i, n int

	// Next 8 bytes is the starting value
	first := binary.BigEndian.Uint64(d.bytes[i : i+8])
	i += 8

	// Next 1-10 bytes is the delta value
	value, n := binary.Uvarint(d.bytes[i:])
	if n <= 0 {
		d.err = fmt.Errorf("IntegerDecoder: invalid RLE delta value")
		return
	}
	i += n

	// Last 1-10 bytes is how many times the value repeats
	count, n := binary.Uvarint(d.bytes[i:])
	if n <= 0 {
		d.err = fmt.Errorf("IntegerDecoder: invalid RLE repeat value")
		return
	}

	// Store the first value and delta value so we do not need to allocate
	// a large values slice.  We can compute the value at position d.i on
	// demand.
	d.rleFirst = first
	d.rleDelta = value
	d.n = int(count) + 1
	d.i = 0

	// We've process all the bytes
	d.bytes = nil
}

func (d *IntegerDecoder) decodePacked() {
	if len(d.bytes) == 0 {
		return
	}

	if len(d.bytes) < 8 {
		d.err = fmt.Errorf("IntegerDecoder: not enough data to decode packed value")
		return
	}

	v := binary.BigEndian.Uint64(d.bytes[0:8])
	// The first value is always unencoded
	if d.first {
		d.first = false
		d.n = 1
		d.values[0] = v
	} else {
		n, err := simple8b.Decode(&d.values, v)
		if err != nil {
			// Should never happen, only error that could be returned is if the the value to be decoded was not
			// actually encoded by simple8b encoder.
			d.err = fmt.Errorf("failed to decode value %v: %v", v, err)
		}

		d.n = n
	}
	d.i = 0
	d.bytes = d.bytes[8:]
}

func (d *IntegerDecoder) decodeUncompressed() {
	if len(d.bytes) == 0 {
		return
	}

	if len(d.bytes) < 8 {
		d.err = fmt.Errorf("IntegerDecoder: not enough data to decode uncompressed value")
		return
	}

	d.values[0] = binary.BigEndian.Uint64(d.bytes[0:8])
	d.i = 0
	d.n = 1
	d.bytes = d.bytes[8:]
}