vsphere-influxdb-go/vendor/github.com/influxdata/influxdb/tsdb/engine/tsm1/int.go

package tsm1

// Integer encoding uses two different strategies depending on the range of values in
// the uncompressed data.  Encoded values are first encoding used zig zag encoding.
// This interleaves positive and negative integers across a range of positive integers.
//
// For example, [-2,-1,0,1] becomes [3,1,0,2]. See
// https://developers.google.com/protocol-buffers/docs/encoding?hl=en#signed-integers
// for more information.
//
// If all the zig zag encoded values are less than 1 << 60 - 1, they are compressed using
// simple8b encoding.  If any value is larger than 1 << 60 - 1, the values are stored uncompressed.
//
// Each encoded byte slice contains a 1 byte header followed by multiple 8 byte packed integers
// or 8 byte uncompressed integers.  The 4 high bits of the first byte indicate the encoding type
// for the remaining bytes.
//
// There are currently two encoding types that can be used with room for 16 total.  These additional
// encoding slots are reserved for future use.  One improvement to be made is to use a patched
// encoding such as PFOR if only a small number of values exceed the max compressed value range.  This
// should improve compression ratios with very large integers near the ends of the int64 range.

import (
	"encoding/binary"
	"fmt"

	"github.com/jwilder/encoding/simple8b"
)

const (
	// intUncompressed is an uncompressed format using 8 bytes per point
	intUncompressed = 0
	// intCompressedSimple is a bit-packed format using simple8b encoding
	intCompressedSimple = 1
	// intCompressedRLE is a run-length encoding format
	intCompressedRLE = 2
)

// IntegerEncoder encodes int64s into byte slices.
type IntegerEncoder struct {
	prev   int64
	rle    bool
	values []uint64
}

// NewIntegerEncoder returns a new integer encoder with an initial buffer of values sized at sz.
func NewIntegerEncoder(sz int) IntegerEncoder {
	return IntegerEncoder{
		rle:    true,
		values: make([]uint64, 0, sz),
	}
}

// Flush is no-op
func (e *IntegerEncoder) Flush() {}

// Reset sets the encoder back to its initial state.
func (e *IntegerEncoder) Reset() {
	e.prev = 0
	e.rle = true
	e.values = e.values[:0]
}

// Write encodes v to the underlying buffers.
func (e *IntegerEncoder) Write(v int64) {
	// Delta-encode each value as it's written.  This happens before
	// ZigZagEncoding because the deltas could be negative.
	delta := v - e.prev
	e.prev = v
	enc := ZigZagEncode(delta)
	if len(e.values) > 1 {
		e.rle = e.rle && e.values[len(e.values)-1] == enc
	}

	e.values = append(e.values, enc)
}

// Bytes returns a copy of the underlying buffer.
func (e *IntegerEncoder) Bytes() ([]byte, error) {
	// Only run-length encode if it could reduce storage size.
	if e.rle && len(e.values) > 2 {
		return e.encodeRLE()
	}

	for _, v := range e.values {
		// Value is too large to encode using packed format
		if v > simple8b.MaxValue {
			return e.encodeUncompressed()
		}
	}

	return e.encodePacked()
}

func (e *IntegerEncoder) encodeRLE() ([]byte, error) {
	// Large varints can take up to 10 bytes.  We're storing 3 + 1
	// type byte.
	var b [31]byte

	// 4 high bits used for the encoding type
	b[0] = byte(intCompressedRLE) << 4

	i := 1
	// The first value
	binary.BigEndian.PutUint64(b[i:], e.values[0])
	i += 8
	// The first delta
	i += binary.PutUvarint(b[i:], e.values[1])
	// The number of times the delta is repeated
	i += binary.PutUvarint(b[i:], uint64(len(e.values)-1))

	return b[:i], nil
}

func (e *IntegerEncoder) encodePacked() ([]byte, error) {
	if len(e.values) == 0 {
		return nil, nil
	}

	// Encode all but the first value.  Fist value is written unencoded
	// using 8 bytes.
	encoded, err := simple8b.EncodeAll(e.values[1:])
	if err != nil {
		return nil, err
	}

	b := make([]byte, 1+(len(encoded)+1)*8)
	// 4 high bits of first byte store the encoding type for the block
	b[0] = byte(intCompressedSimple) << 4

	// Write the first value since it's not part of the encoded values
	binary.BigEndian.PutUint64(b[1:9], e.values[0])

	// Write the encoded values
	for i, v := range encoded {
		binary.BigEndian.PutUint64(b[9+i*8:9+i*8+8], v)
	}
	return b, nil
}

func (e *IntegerEncoder) encodeUncompressed() ([]byte, error) {
	if len(e.values) == 0 {
		return nil, nil
	}

	b := make([]byte, 1+len(e.values)*8)
	// 4 high bits of first byte store the encoding type for the block
	b[0] = byte(intUncompressed) << 4

	for i, v := range e.values {
		binary.BigEndian.PutUint64(b[1+i*8:1+i*8+8], v)
	}
	return b, nil
}

// IntegerDecoder decodes a byte slice into int64s.
type IntegerDecoder struct {
	// 240 is the maximum number of values that can be encoded into a single uint64 using simple8b
	values [240]uint64
	bytes  []byte
	i      int
	n      int
	prev   int64
	first  bool

	// The first value for a run-length encoded byte slice
	rleFirst uint64

	// The delta value for a run-length encoded byte slice
	rleDelta uint64
	encoding byte
	err      error
}

// SetBytes sets the underlying byte slice of the decoder.
func (d *IntegerDecoder) SetBytes(b []byte) {
	if len(b) > 0 {
		d.encoding = b[0] >> 4
		d.bytes = b[1:]
	} else {
		d.encoding = 0
		d.bytes = nil
	}

	d.i = 0
	d.n = 0
	d.prev = 0
	d.first = true

	d.rleFirst = 0
	d.rleDelta = 0
	d.err = nil
}

// Next returns true if there are any values remaining to be decoded.
func (d *IntegerDecoder) Next() bool {
	if d.i >= d.n && len(d.bytes) == 0 {
		return false
	}

	d.i++

	if d.i >= d.n {
		switch d.encoding {
		case intUncompressed:
			d.decodeUncompressed()
		case intCompressedSimple:
			d.decodePacked()
		case intCompressedRLE:
			d.decodeRLE()
		default:
			d.err = fmt.Errorf("unknown encoding %v", d.encoding)
		}
	}
	return d.err == nil && d.i < d.n
}

// Error returns the last error encountered by the decoder.
func (d *IntegerDecoder) Error() error {
	return d.err
}

// Read returns the next value from the decoder.
func (d *IntegerDecoder) Read() int64 {
	switch d.encoding {
	case intCompressedRLE:
		return ZigZagDecode(d.rleFirst) + int64(d.i)*ZigZagDecode(d.rleDelta)
	default:
		v := ZigZagDecode(d.values[d.i])
		// v is the delta encoded value, we need to add the prior value to get the original
		v = v + d.prev
		d.prev = v
		return v
	}
}

func (d *IntegerDecoder) decodeRLE() {
	if len(d.bytes) == 0 {
		return
	}

	if len(d.bytes) < 8 {
		d.err = fmt.Errorf("IntegerDecoder: not enough data to decode RLE starting value")
		return
	}

	var i, n int

	// Next 8 bytes is the starting value
	first := binary.BigEndian.Uint64(d.bytes[i : i+8])
	i += 8

	// Next 1-10 bytes is the delta value
	value, n := binary.Uvarint(d.bytes[i:])
	if n <= 0 {
		d.err = fmt.Errorf("IntegerDecoder: invalid RLE delta value")
		return
	}
	i += n

	// Last 1-10 bytes is how many times the value repeats
	count, n := binary.Uvarint(d.bytes[i:])
	if n <= 0 {
		d.err = fmt.Errorf("IntegerDecoder: invalid RLE repeat value")
		return
	}

	// Store the first value and delta value so we do not need to allocate
	// a large values slice.  We can compute the value at position d.i on
	// demand.
	d.rleFirst = first
	d.rleDelta = value
	d.n = int(count) + 1
	d.i = 0

	// We've process all the bytes
	d.bytes = nil
}

func (d *IntegerDecoder) decodePacked() {
	if len(d.bytes) == 0 {
		return
	}

	if len(d.bytes) < 8 {
		d.err = fmt.Errorf("IntegerDecoder: not enough data to decode packed value")
		return
	}

	v := binary.BigEndian.Uint64(d.bytes[0:8])
	// The first value is always unencoded
	if d.first {
		d.first = false
		d.n = 1
		d.values[0] = v
	} else {
		n, err := simple8b.Decode(&d.values, v)
		if err != nil {
			// Should never happen, only error that could be returned is if the the value to be decoded was not
			// actually encoded by simple8b encoder.
			d.err = fmt.Errorf("failed to decode value %v: %v", v, err)
		}

		d.n = n
	}
	d.i = 0
	d.bytes = d.bytes[8:]
}

func (d *IntegerDecoder) decodeUncompressed() {
	if len(d.bytes) == 0 {
		return
	}

	if len(d.bytes) < 8 {
		d.err = fmt.Errorf("IntegerDecoder: not enough data to decode uncompressed value")
		return
	}

	d.values[0] = binary.BigEndian.Uint64(d.bytes[0:8])
	d.i = 0
	d.n = 1
	d.bytes = d.bytes[8:]
}
add vendoring with go dep 2017-10-25 22:52:40 +02:00			`package tsm1`

			`// Integer encoding uses two different strategies depending on the range of values in`
			`// the uncompressed data. Encoded values are first encoding used zig zag encoding.`
			`// This interleaves positive and negative integers across a range of positive integers.`
			`//`
			`// For example, [-2,-1,0,1] becomes [3,1,0,2]. See`
			`// https://developers.google.com/protocol-buffers/docs/encoding?hl=en#signed-integers`
			`// for more information.`
			`//`
			`// If all the zig zag encoded values are less than 1 << 60 - 1, they are compressed using`
			`// simple8b encoding. If any value is larger than 1 << 60 - 1, the values are stored uncompressed.`
			`//`
			`// Each encoded byte slice contains a 1 byte header followed by multiple 8 byte packed integers`
			`// or 8 byte uncompressed integers. The 4 high bits of the first byte indicate the encoding type`
			`// for the remaining bytes.`
			`//`
			`// There are currently two encoding types that can be used with room for 16 total. These additional`
			`// encoding slots are reserved for future use. One improvement to be made is to use a patched`
			`// encoding such as PFOR if only a small number of values exceed the max compressed value range. This`
			`// should improve compression ratios with very large integers near the ends of the int64 range.`

			`import (`
			`"encoding/binary"`
			`"fmt"`

			`"github.com/jwilder/encoding/simple8b"`
			`)`

			`const (`
			`// intUncompressed is an uncompressed format using 8 bytes per point`
			`intUncompressed = 0`
			`// intCompressedSimple is a bit-packed format using simple8b encoding`
			`intCompressedSimple = 1`
			`// intCompressedRLE is a run-length encoding format`
			`intCompressedRLE = 2`
			`)`

			`// IntegerEncoder encodes int64s into byte slices.`
			`type IntegerEncoder struct {`
			`prev int64`
			`rle bool`
			`values []uint64`
			`}`

			`// NewIntegerEncoder returns a new integer encoder with an initial buffer of values sized at sz.`
			`func NewIntegerEncoder(sz int) IntegerEncoder {`
			`return IntegerEncoder{`
			`rle: true,`
			`values: make([]uint64, 0, sz),`
			`}`
			`}`

			`// Flush is no-op`
			`func (e *IntegerEncoder) Flush() {}`

			`// Reset sets the encoder back to its initial state.`
			`func (e *IntegerEncoder) Reset() {`
			`e.prev = 0`
			`e.rle = true`
			`e.values = e.values[:0]`
			`}`

			`// Write encodes v to the underlying buffers.`
			`func (e *IntegerEncoder) Write(v int64) {`
			`// Delta-encode each value as it's written. This happens before`
			`// ZigZagEncoding because the deltas could be negative.`
			`delta := v - e.prev`
			`e.prev = v`
			`enc := ZigZagEncode(delta)`
			`if len(e.values) > 1 {`
			`e.rle = e.rle && e.values[len(e.values)-1] == enc`
			`}`

			`e.values = append(e.values, enc)`
			`}`

			`// Bytes returns a copy of the underlying buffer.`
			`func (e *IntegerEncoder) Bytes() ([]byte, error) {`
			`// Only run-length encode if it could reduce storage size.`
			`if e.rle && len(e.values) > 2 {`
			`return e.encodeRLE()`
			`}`

			`for _, v := range e.values {`
			`// Value is too large to encode using packed format`
			`if v > simple8b.MaxValue {`
			`return e.encodeUncompressed()`
			`}`
			`}`

			`return e.encodePacked()`
			`}`

			`func (e *IntegerEncoder) encodeRLE() ([]byte, error) {`
			`// Large varints can take up to 10 bytes. We're storing 3 + 1`
			`// type byte.`
			`var b [31]byte`

			`// 4 high bits used for the encoding type`
			`b[0] = byte(intCompressedRLE) << 4`

			`i := 1`
			`// The first value`
			`binary.BigEndian.PutUint64(b[i:], e.values[0])`
			`i += 8`
			`// The first delta`
			`i += binary.PutUvarint(b[i:], e.values[1])`
			`// The number of times the delta is repeated`
			`i += binary.PutUvarint(b[i:], uint64(len(e.values)-1))`

			`return b[:i], nil`
			`}`

			`func (e *IntegerEncoder) encodePacked() ([]byte, error) {`
			`if len(e.values) == 0 {`
			`return nil, nil`
			`}`

			`// Encode all but the first value. Fist value is written unencoded`
			`// using 8 bytes.`
			`encoded, err := simple8b.EncodeAll(e.values[1:])`
			`if err != nil {`
			`return nil, err`
			`}`

			`b := make([]byte, 1+(len(encoded)+1)*8)`
			`// 4 high bits of first byte store the encoding type for the block`
			`b[0] = byte(intCompressedSimple) << 4`

			`// Write the first value since it's not part of the encoded values`
			`binary.BigEndian.PutUint64(b[1:9], e.values[0])`

			`// Write the encoded values`
			`for i, v := range encoded {`
			`binary.BigEndian.PutUint64(b[9+i8:9+i8+8], v)`
			`}`
			`return b, nil`
			`}`

			`func (e *IntegerEncoder) encodeUncompressed() ([]byte, error) {`
			`if len(e.values) == 0 {`
			`return nil, nil`
			`}`

			`b := make([]byte, 1+len(e.values)*8)`
			`// 4 high bits of first byte store the encoding type for the block`
			`b[0] = byte(intUncompressed) << 4`

			`for i, v := range e.values {`
			`binary.BigEndian.PutUint64(b[1+i8:1+i8+8], v)`
			`}`
			`return b, nil`
			`}`

			`// IntegerDecoder decodes a byte slice into int64s.`
			`type IntegerDecoder struct {`
			`// 240 is the maximum number of values that can be encoded into a single uint64 using simple8b`
			`values [240]uint64`
			`bytes []byte`
			`i int`
			`n int`
			`prev int64`
			`first bool`

			`// The first value for a run-length encoded byte slice`
			`rleFirst uint64`

			`// The delta value for a run-length encoded byte slice`
			`rleDelta uint64`
			`encoding byte`
			`err error`
			`}`

			`// SetBytes sets the underlying byte slice of the decoder.`
			`func (d *IntegerDecoder) SetBytes(b []byte) {`
			`if len(b) > 0 {`
			`d.encoding = b[0] >> 4`
			`d.bytes = b[1:]`
			`} else {`
			`d.encoding = 0`
			`d.bytes = nil`
			`}`

			`d.i = 0`
			`d.n = 0`
			`d.prev = 0`
			`d.first = true`

			`d.rleFirst = 0`
			`d.rleDelta = 0`
			`d.err = nil`
			`}`

			`// Next returns true if there are any values remaining to be decoded.`
			`func (d *IntegerDecoder) Next() bool {`
			`if d.i >= d.n && len(d.bytes) == 0 {`
			`return false`
			`}`

			`d.i++`

			`if d.i >= d.n {`
			`switch d.encoding {`
			`case intUncompressed:`
			`d.decodeUncompressed()`
			`case intCompressedSimple:`
			`d.decodePacked()`
			`case intCompressedRLE:`
			`d.decodeRLE()`
			`default:`
			`d.err = fmt.Errorf("unknown encoding %v", d.encoding)`
			`}`
			`}`
			`return d.err == nil && d.i < d.n`
			`}`

			`// Error returns the last error encountered by the decoder.`
			`func (d *IntegerDecoder) Error() error {`
			`return d.err`
			`}`

			`// Read returns the next value from the decoder.`
			`func (d *IntegerDecoder) Read() int64 {`
			`switch d.encoding {`
			`case intCompressedRLE:`
			`return ZigZagDecode(d.rleFirst) + int64(d.i)*ZigZagDecode(d.rleDelta)`
			`default:`
			`v := ZigZagDecode(d.values[d.i])`
			`// v is the delta encoded value, we need to add the prior value to get the original`
			`v = v + d.prev`
			`d.prev = v`
			`return v`
			`}`
			`}`

			`func (d *IntegerDecoder) decodeRLE() {`
			`if len(d.bytes) == 0 {`
			`return`
			`}`

			`if len(d.bytes) < 8 {`
			`d.err = fmt.Errorf("IntegerDecoder: not enough data to decode RLE starting value")`
			`return`
			`}`

			`var i, n int`

			`// Next 8 bytes is the starting value`
			`first := binary.BigEndian.Uint64(d.bytes[i : i+8])`
			`i += 8`

			`// Next 1-10 bytes is the delta value`
			`value, n := binary.Uvarint(d.bytes[i:])`
			`if n <= 0 {`
			`d.err = fmt.Errorf("IntegerDecoder: invalid RLE delta value")`
			`return`
			`}`
			`i += n`

			`// Last 1-10 bytes is how many times the value repeats`
			`count, n := binary.Uvarint(d.bytes[i:])`
			`if n <= 0 {`
			`d.err = fmt.Errorf("IntegerDecoder: invalid RLE repeat value")`
			`return`
			`}`

			`// Store the first value and delta value so we do not need to allocate`
			`// a large values slice. We can compute the value at position d.i on`
			`// demand.`
			`d.rleFirst = first`
			`d.rleDelta = value`
			`d.n = int(count) + 1`
			`d.i = 0`

			`// We've process all the bytes`
			`d.bytes = nil`
			`}`

			`func (d *IntegerDecoder) decodePacked() {`
			`if len(d.bytes) == 0 {`
			`return`
			`}`

			`if len(d.bytes) < 8 {`
			`d.err = fmt.Errorf("IntegerDecoder: not enough data to decode packed value")`
			`return`
			`}`

			`v := binary.BigEndian.Uint64(d.bytes[0:8])`
			`// The first value is always unencoded`
			`if d.first {`
			`d.first = false`
			`d.n = 1`
			`d.values[0] = v`
			`} else {`
			`n, err := simple8b.Decode(&d.values, v)`
			`if err != nil {`
			`// Should never happen, only error that could be returned is if the the value to be decoded was not`
			`// actually encoded by simple8b encoder.`
			`d.err = fmt.Errorf("failed to decode value %v: %v", v, err)`
			`}`

			`d.n = n`
			`}`
			`d.i = 0`
			`d.bytes = d.bytes[8:]`
			`}`

			`func (d *IntegerDecoder) decodeUncompressed() {`
			`if len(d.bytes) == 0 {`
			`return`
			`}`

			`if len(d.bytes) < 8 {`
			`d.err = fmt.Errorf("IntegerDecoder: not enough data to decode uncompressed value")`
			`return`
			`}`

			`d.values[0] = binary.BigEndian.Uint64(d.bytes[0:8])`
			`d.i = 0`
			`d.n = 1`
			`d.bytes = d.bytes[8:]`
			`}`