1
0
mirror of https://github.com/Oxalide/vsphere-influxdb-go.git synced 2023-10-10 11:36:51 +00:00

add vendoring with go dep

This commit is contained in:
Adrian Todorov
2017-10-25 20:52:40 +00:00
parent 704f4d20d1
commit a59409f16b
1627 changed files with 489673 additions and 0 deletions

View File

@@ -0,0 +1,37 @@
# The collectd Input
The [collectd](https://collectd.org) input allows InfluxDB to accept data transmitted in collectd native format. This data is transmitted over UDP.
## A note on UDP/IP OS Buffer sizes
If you're running Linux or FreeBSD, please adjust your OS UDP buffer
size limit, [see here for more details.](../udp/README.md#a-note-on-udpip-os-buffer-sizes)
## Configuration
Each collectd input allows the binding address, target database, and target retention policy to be set. If the database does not exist, it will be created automatically when the input is initialized. If the retention policy is not configured, then the default retention policy for the database is used. However if the retention policy is set, the retention policy must be explicitly created. The input will not automatically create it.
Each collectd input also performs internal batching of the points it receives, as batched writes to the database are more efficient. The default batch size is 1000, pending batch factor is 5, with a batch timeout of 1 second. This means the input will write batches of maximum size 1000, but if a batch has not reached 1000 points within 1 second of the first point being added to a batch, it will emit that batch regardless of size. The pending batch factor controls how many batches can be in memory at once, allowing the input to transmit a batch, while still building other batches.
The path to the collectd types database file may also be set.
## Large UDP packets
Please note that UDP packets larger than the standard size of 1452 are dropped at the time of ingestion. Be sure to set `MaxPacketSize` to 1452 in the collectd configuration.
## Config Example
```
[[collectd]]
enabled = true
bind-address = ":25826" # the bind address
database = "collectd" # Name of the database that will be written to
retention-policy = ""
batch-size = 5000 # will flush if this many points get buffered
batch-pending = 10 # number of batches that may be pending in memory
batch-timeout = "10s"
read-buffer = 0 # UDP read buffer size, 0 means to use OS default
typesdb = "/usr/share/collectd/types.db"
security-level = "none" # "none", "sign", or "encrypt"
auth-file = "/etc/collectd/auth_file"
```

View File

@@ -0,0 +1,209 @@
absolute value:ABSOLUTE:0:U
apache_bytes value:DERIVE:0:U
apache_connections value:GAUGE:0:65535
apache_idle_workers value:GAUGE:0:65535
apache_requests value:DERIVE:0:U
apache_scoreboard value:GAUGE:0:65535
ath_nodes value:GAUGE:0:65535
ath_stat value:DERIVE:0:U
backends value:GAUGE:0:65535
bitrate value:GAUGE:0:4294967295
bytes value:GAUGE:0:U
cache_eviction value:DERIVE:0:U
cache_operation value:DERIVE:0:U
cache_ratio value:GAUGE:0:100
cache_result value:DERIVE:0:U
cache_size value:GAUGE:0:U
charge value:GAUGE:0:U
compression_ratio value:GAUGE:0:2
compression uncompressed:DERIVE:0:U, compressed:DERIVE:0:U
connections value:DERIVE:0:U
conntrack value:GAUGE:0:4294967295
contextswitch value:DERIVE:0:U
counter value:COUNTER:U:U
cpufreq value:GAUGE:0:U
cpu value:DERIVE:0:U
current_connections value:GAUGE:0:U
current_sessions value:GAUGE:0:U
current value:GAUGE:U:U
delay value:GAUGE:-1000000:1000000
derive value:DERIVE:0:U
df_complex value:GAUGE:0:U
df_inodes value:GAUGE:0:U
df used:GAUGE:0:1125899906842623, free:GAUGE:0:1125899906842623
disk_latency read:GAUGE:0:U, write:GAUGE:0:U
disk_merged read:DERIVE:0:U, write:DERIVE:0:U
disk_octets read:DERIVE:0:U, write:DERIVE:0:U
disk_ops_complex value:DERIVE:0:U
disk_ops read:DERIVE:0:U, write:DERIVE:0:U
disk_time read:DERIVE:0:U, write:DERIVE:0:U
dns_answer value:DERIVE:0:U
dns_notify value:DERIVE:0:U
dns_octets queries:DERIVE:0:U, responses:DERIVE:0:U
dns_opcode value:DERIVE:0:U
dns_qtype_cached value:GAUGE:0:4294967295
dns_qtype value:DERIVE:0:U
dns_query value:DERIVE:0:U
dns_question value:DERIVE:0:U
dns_rcode value:DERIVE:0:U
dns_reject value:DERIVE:0:U
dns_request value:DERIVE:0:U
dns_resolver value:DERIVE:0:U
dns_response value:DERIVE:0:U
dns_transfer value:DERIVE:0:U
dns_update value:DERIVE:0:U
dns_zops value:DERIVE:0:U
duration seconds:GAUGE:0:U
email_check value:GAUGE:0:U
email_count value:GAUGE:0:U
email_size value:GAUGE:0:U
entropy value:GAUGE:0:4294967295
fanspeed value:GAUGE:0:U
file_size value:GAUGE:0:U
files value:GAUGE:0:U
flow value:GAUGE:0:U
fork_rate value:DERIVE:0:U
frequency_offset value:GAUGE:-1000000:1000000
frequency value:GAUGE:0:U
fscache_stat value:DERIVE:0:U
gauge value:GAUGE:U:U
hash_collisions value:DERIVE:0:U
http_request_methods value:DERIVE:0:U
http_requests value:DERIVE:0:U
http_response_codes value:DERIVE:0:U
humidity value:GAUGE:0:100
if_collisions value:DERIVE:0:U
if_dropped rx:DERIVE:0:U, tx:DERIVE:0:U
if_errors rx:DERIVE:0:U, tx:DERIVE:0:U
if_multicast value:DERIVE:0:U
if_octets rx:DERIVE:0:U, tx:DERIVE:0:U
if_packets rx:DERIVE:0:U, tx:DERIVE:0:U
if_rx_errors value:DERIVE:0:U
if_rx_octets value:DERIVE:0:U
if_tx_errors value:DERIVE:0:U
if_tx_octets value:DERIVE:0:U
invocations value:DERIVE:0:U
io_octets rx:DERIVE:0:U, tx:DERIVE:0:U
io_packets rx:DERIVE:0:U, tx:DERIVE:0:U
ipt_bytes value:DERIVE:0:U
ipt_packets value:DERIVE:0:U
irq value:DERIVE:0:U
latency value:GAUGE:0:U
links value:GAUGE:0:U
load shortterm:GAUGE:0:5000, midterm:GAUGE:0:5000, longterm:GAUGE:0:5000
md_disks value:GAUGE:0:U
memcached_command value:DERIVE:0:U
memcached_connections value:GAUGE:0:U
memcached_items value:GAUGE:0:U
memcached_octets rx:DERIVE:0:U, tx:DERIVE:0:U
memcached_ops value:DERIVE:0:U
memory value:GAUGE:0:281474976710656
multimeter value:GAUGE:U:U
mutex_operations value:DERIVE:0:U
mysql_commands value:DERIVE:0:U
mysql_handler value:DERIVE:0:U
mysql_locks value:DERIVE:0:U
mysql_log_position value:DERIVE:0:U
mysql_octets rx:DERIVE:0:U, tx:DERIVE:0:U
nfs_procedure value:DERIVE:0:U
nginx_connections value:GAUGE:0:U
nginx_requests value:DERIVE:0:U
node_octets rx:DERIVE:0:U, tx:DERIVE:0:U
node_rssi value:GAUGE:0:255
node_stat value:DERIVE:0:U
node_tx_rate value:GAUGE:0:127
objects value:GAUGE:0:U
operations value:DERIVE:0:U
percent value:GAUGE:0:100.1
percent_bytes value:GAUGE:0:100.1
percent_inodes value:GAUGE:0:100.1
pf_counters value:DERIVE:0:U
pf_limits value:DERIVE:0:U
pf_source value:DERIVE:0:U
pf_states value:GAUGE:0:U
pf_state value:DERIVE:0:U
pg_blks value:DERIVE:0:U
pg_db_size value:GAUGE:0:U
pg_n_tup_c value:DERIVE:0:U
pg_n_tup_g value:GAUGE:0:U
pg_numbackends value:GAUGE:0:U
pg_scan value:DERIVE:0:U
pg_xact value:DERIVE:0:U
ping_droprate value:GAUGE:0:100
ping_stddev value:GAUGE:0:65535
ping value:GAUGE:0:65535
players value:GAUGE:0:1000000
power value:GAUGE:0:U
protocol_counter value:DERIVE:0:U
ps_code value:GAUGE:0:9223372036854775807
ps_count processes:GAUGE:0:1000000, threads:GAUGE:0:1000000
ps_cputime user:DERIVE:0:U, syst:DERIVE:0:U
ps_data value:GAUGE:0:9223372036854775807
ps_disk_octets read:DERIVE:0:U, write:DERIVE:0:U
ps_disk_ops read:DERIVE:0:U, write:DERIVE:0:U
ps_pagefaults minflt:DERIVE:0:U, majflt:DERIVE:0:U
ps_rss value:GAUGE:0:9223372036854775807
ps_stacksize value:GAUGE:0:9223372036854775807
ps_state value:GAUGE:0:65535
ps_vm value:GAUGE:0:9223372036854775807
queue_length value:GAUGE:0:U
records value:GAUGE:0:U
requests value:GAUGE:0:U
response_time value:GAUGE:0:U
response_code value:GAUGE:0:U
route_etx value:GAUGE:0:U
route_metric value:GAUGE:0:U
routes value:GAUGE:0:U
serial_octets rx:DERIVE:0:U, tx:DERIVE:0:U
signal_noise value:GAUGE:U:0
signal_power value:GAUGE:U:0
signal_quality value:GAUGE:0:U
snr value:GAUGE:0:U
spam_check value:GAUGE:0:U
spam_score value:GAUGE:U:U
spl value:GAUGE:U:U
swap_io value:DERIVE:0:U
swap value:GAUGE:0:1099511627776
tcp_connections value:GAUGE:0:4294967295
temperature value:GAUGE:U:U
threads value:GAUGE:0:U
time_dispersion value:GAUGE:-1000000:1000000
timeleft value:GAUGE:0:U
time_offset value:GAUGE:-1000000:1000000
total_bytes value:DERIVE:0:U
total_connections value:DERIVE:0:U
total_objects value:DERIVE:0:U
total_operations value:DERIVE:0:U
total_requests value:DERIVE:0:U
total_sessions value:DERIVE:0:U
total_threads value:DERIVE:0:U
total_time_in_ms value:DERIVE:0:U
total_values value:DERIVE:0:U
uptime value:GAUGE:0:4294967295
users value:GAUGE:0:65535
vcl value:GAUGE:0:65535
vcpu value:GAUGE:0:U
virt_cpu_total value:DERIVE:0:U
virt_vcpu value:DERIVE:0:U
vmpage_action value:DERIVE:0:U
vmpage_faults minflt:DERIVE:0:U, majflt:DERIVE:0:U
vmpage_io in:DERIVE:0:U, out:DERIVE:0:U
vmpage_number value:GAUGE:0:4294967295
volatile_changes value:GAUGE:0:U
voltage_threshold value:GAUGE:U:U, threshold:GAUGE:U:U
voltage value:GAUGE:U:U
vs_memory value:GAUGE:0:9223372036854775807
vs_processes value:GAUGE:0:65535
vs_threads value:GAUGE:0:65535
#
# Legacy types
# (required for the v5 upgrade target)
#
arc_counts demand_data:COUNTER:0:U, demand_metadata:COUNTER:0:U, prefetch_data:COUNTER:0:U, prefetch_metadata:COUNTER:0:U
arc_l2_bytes read:COUNTER:0:U, write:COUNTER:0:U
arc_l2_size value:GAUGE:0:U
arc_ratio value:GAUGE:0:U
arc_size current:GAUGE:0:U, target:GAUGE:0:U, minlimit:GAUGE:0:U, maxlimit:GAUGE:0:U
mysql_qcache hits:COUNTER:0:U, inserts:COUNTER:0:U, not_cached:COUNTER:0:U, lowmem_prunes:COUNTER:0:U, queries_in_cache:GAUGE:0:U
mysql_threads running:GAUGE:0:U, connected:GAUGE:0:U, cached:GAUGE:0:U, created:COUNTER:0:U

View File

@@ -0,0 +1,163 @@
package collectd
import (
"errors"
"time"
"github.com/influxdata/influxdb/monitor/diagnostics"
"github.com/influxdata/influxdb/toml"
)
const (
// DefaultBindAddress is the default port to bind to.
DefaultBindAddress = ":25826"
// DefaultDatabase is the default DB to write to.
DefaultDatabase = "collectd"
// DefaultRetentionPolicy is the default retention policy of the writes.
DefaultRetentionPolicy = ""
// DefaultBatchSize is the default write batch size.
DefaultBatchSize = 5000
// DefaultBatchPending is the default number of pending write batches.
DefaultBatchPending = 10
// DefaultBatchDuration is the default batch timeout duration.
DefaultBatchDuration = toml.Duration(10 * time.Second)
// DefaultTypesDB is the default location of the collectd types db file.
DefaultTypesDB = "/usr/share/collectd/types.db"
// DefaultReadBuffer is the default buffer size for the UDP listener.
// Sets the size of the operating system's receive buffer associated with
// the UDP traffic. Keep in mind that the OS must be able
// to handle the number set here or the UDP listener will error and exit.
//
// DefaultReadBuffer = 0 means to use the OS default, which is usually too
// small for high UDP performance.
//
// Increasing OS buffer limits:
// Linux: sudo sysctl -w net.core.rmem_max=<read-buffer>
// BSD/Darwin: sudo sysctl -w kern.ipc.maxsockbuf=<read-buffer>
DefaultReadBuffer = 0
// DefaultSecurityLevel is the default security level.
DefaultSecurityLevel = "none"
// DefaultAuthFile is the default location of the user/password file.
DefaultAuthFile = "/etc/collectd/auth_file"
)
// Config represents a configuration for the collectd service.
type Config struct {
Enabled bool `toml:"enabled"`
BindAddress string `toml:"bind-address"`
Database string `toml:"database"`
RetentionPolicy string `toml:"retention-policy"`
BatchSize int `toml:"batch-size"`
BatchPending int `toml:"batch-pending"`
BatchDuration toml.Duration `toml:"batch-timeout"`
ReadBuffer int `toml:"read-buffer"`
TypesDB string `toml:"typesdb"`
SecurityLevel string `toml:"security-level"`
AuthFile string `toml:"auth-file"`
}
// NewConfig returns a new instance of Config with defaults.
func NewConfig() Config {
return Config{
BindAddress: DefaultBindAddress,
Database: DefaultDatabase,
RetentionPolicy: DefaultRetentionPolicy,
ReadBuffer: DefaultReadBuffer,
BatchSize: DefaultBatchSize,
BatchPending: DefaultBatchPending,
BatchDuration: DefaultBatchDuration,
TypesDB: DefaultTypesDB,
SecurityLevel: DefaultSecurityLevel,
AuthFile: DefaultAuthFile,
}
}
// WithDefaults takes the given config and returns a new config with any required
// default values set.
func (c *Config) WithDefaults() *Config {
d := *c
if d.BindAddress == "" {
d.BindAddress = DefaultBindAddress
}
if d.Database == "" {
d.Database = DefaultDatabase
}
if d.RetentionPolicy == "" {
d.RetentionPolicy = DefaultRetentionPolicy
}
if d.BatchSize == 0 {
d.BatchSize = DefaultBatchSize
}
if d.BatchPending == 0 {
d.BatchPending = DefaultBatchPending
}
if d.BatchDuration == 0 {
d.BatchDuration = DefaultBatchDuration
}
if d.ReadBuffer == 0 {
d.ReadBuffer = DefaultReadBuffer
}
if d.TypesDB == "" {
d.TypesDB = DefaultTypesDB
}
if d.SecurityLevel == "" {
d.SecurityLevel = DefaultSecurityLevel
}
if d.AuthFile == "" {
d.AuthFile = DefaultAuthFile
}
return &d
}
// Validate returns an error if the Config is invalid.
func (c *Config) Validate() error {
switch c.SecurityLevel {
case "none", "sign", "encrypt":
default:
return errors.New("Invalid security level")
}
return nil
}
// Configs wraps a slice of Config to aggregate diagnostics.
type Configs []Config
// Diagnostics returns one set of diagnostics for all of the Configs.
func (c Configs) Diagnostics() (*diagnostics.Diagnostics, error) {
d := &diagnostics.Diagnostics{
Columns: []string{"enabled", "bind-address", "database", "retention-policy", "batch-size", "batch-pending", "batch-timeout"},
}
for _, cc := range c {
if !cc.Enabled {
d.AddRow([]interface{}{false})
continue
}
r := []interface{}{true, cc.BindAddress, cc.Database, cc.RetentionPolicy, cc.BatchSize, cc.BatchPending, cc.BatchDuration}
d.AddRow(r)
}
return d, nil
}
// Enabled returns true if any underlying Config is Enabled.
func (c Configs) Enabled() bool {
for _, cc := range c {
if cc.Enabled {
return true
}
}
return false
}

View File

@@ -0,0 +1,32 @@
package collectd_test
import (
"testing"
"github.com/BurntSushi/toml"
"github.com/influxdata/influxdb/services/collectd"
)
func TestConfig_Parse(t *testing.T) {
// Parse configuration.
var c collectd.Config
if _, err := toml.Decode(`
enabled = true
bind-address = ":9000"
database = "xxx"
typesdb = "yyy"
`, &c); err != nil {
t.Fatal(err)
}
// Validate configuration.
if c.Enabled != true {
t.Fatalf("unexpected enabled: %v", c.Enabled)
} else if c.BindAddress != ":9000" {
t.Fatalf("unexpected bind address: %s", c.BindAddress)
} else if c.Database != "xxx" {
t.Fatalf("unexpected database: %s", c.Database)
} else if c.TypesDB != "yyy" {
t.Fatalf("unexpected types db: %s", c.TypesDB)
}
}

View File

@@ -0,0 +1,433 @@
// Package collectd provides a service for InfluxDB to ingest data via the collectd protocol.
package collectd // import "github.com/influxdata/influxdb/services/collectd"
import (
"bytes"
"fmt"
"io/ioutil"
"net"
"os"
"path/filepath"
"strings"
"sync"
"sync/atomic"
"time"
"collectd.org/api"
"collectd.org/network"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/services/meta"
"github.com/influxdata/influxdb/tsdb"
"github.com/uber-go/zap"
)
// statistics gathered by the collectd service.
const (
statPointsReceived = "pointsRx"
statBytesReceived = "bytesRx"
statPointsParseFail = "pointsParseFail"
statReadFail = "readFail"
statBatchesTransmitted = "batchesTx"
statPointsTransmitted = "pointsTx"
statBatchesTransmitFail = "batchesTxFail"
statDroppedPointsInvalid = "droppedPointsInvalid"
)
// pointsWriter is an internal interface to make testing easier.
type pointsWriter interface {
WritePointsPrivileged(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error
}
// metaClient is an internal interface to make testing easier.
type metaClient interface {
CreateDatabase(name string) (*meta.DatabaseInfo, error)
}
// TypesDBFile reads a collectd types db from a file.
func TypesDBFile(path string) (typesdb *api.TypesDB, err error) {
var reader *os.File
reader, err = os.Open(path)
if err == nil {
typesdb, err = api.NewTypesDB(reader)
}
return
}
// Service represents a UDP server which receives metrics in collectd's binary
// protocol and stores them in InfluxDB.
type Service struct {
Config *Config
MetaClient metaClient
PointsWriter pointsWriter
Logger zap.Logger
wg sync.WaitGroup
conn *net.UDPConn
batcher *tsdb.PointBatcher
popts network.ParseOpts
addr net.Addr
mu sync.RWMutex
ready bool // Has the required database been created?
done chan struct{} // Is the service closing or closed?
// expvar-based stats.
stats *Statistics
defaultTags models.StatisticTags
}
// NewService returns a new instance of the collectd service.
func NewService(c Config) *Service {
s := Service{
// Use defaults where necessary.
Config: c.WithDefaults(),
Logger: zap.New(zap.NullEncoder()),
stats: &Statistics{},
defaultTags: models.StatisticTags{"bind": c.BindAddress},
}
return &s
}
// Open starts the service.
func (s *Service) Open() error {
s.mu.Lock()
defer s.mu.Unlock()
if !s.closed() {
return nil // Already open.
}
s.done = make(chan struct{})
s.Logger.Info("Starting collectd service")
if s.Config.BindAddress == "" {
return fmt.Errorf("bind address is blank")
} else if s.Config.Database == "" {
return fmt.Errorf("database name is blank")
} else if s.PointsWriter == nil {
return fmt.Errorf("PointsWriter is nil")
}
if s.popts.TypesDB == nil {
// Open collectd types.
if stat, err := os.Stat(s.Config.TypesDB); err != nil {
return fmt.Errorf("Stat(): %s", err)
} else if stat.IsDir() {
alltypesdb, err := api.NewTypesDB(&bytes.Buffer{})
if err != nil {
return err
}
var readdir func(path string)
readdir = func(path string) {
files, err := ioutil.ReadDir(path)
if err != nil {
s.Logger.Info(fmt.Sprintf("Unable to read directory %s: %s\n", path, err))
return
}
for _, f := range files {
fullpath := filepath.Join(path, f.Name())
if f.IsDir() {
readdir(fullpath)
continue
}
s.Logger.Info(fmt.Sprintf("Loading %s\n", fullpath))
types, err := TypesDBFile(fullpath)
if err != nil {
s.Logger.Info(fmt.Sprintf("Unable to parse collectd types file: %s\n", f.Name()))
continue
}
alltypesdb.Merge(types)
}
}
readdir(s.Config.TypesDB)
s.popts.TypesDB = alltypesdb
} else {
s.Logger.Info(fmt.Sprintf("Loading %s\n", s.Config.TypesDB))
types, err := TypesDBFile(s.Config.TypesDB)
if err != nil {
return fmt.Errorf("Open(): %s", err)
}
s.popts.TypesDB = types
}
}
// Sets the security level according to the config.
// Default not necessary because we validate the config.
switch s.Config.SecurityLevel {
case "none":
s.popts.SecurityLevel = network.None
case "sign":
s.popts.SecurityLevel = network.Sign
case "encrypt":
s.popts.SecurityLevel = network.Encrypt
}
// Sets the auth file according to the config.
if s.popts.PasswordLookup == nil {
s.popts.PasswordLookup = network.NewAuthFile(s.Config.AuthFile)
}
// Resolve our address.
addr, err := net.ResolveUDPAddr("udp", s.Config.BindAddress)
if err != nil {
return fmt.Errorf("unable to resolve UDP address: %s", err)
}
s.addr = addr
// Start listening
conn, err := net.ListenUDP("udp", addr)
if err != nil {
return fmt.Errorf("unable to listen on UDP: %s", err)
}
if s.Config.ReadBuffer != 0 {
err = conn.SetReadBuffer(s.Config.ReadBuffer)
if err != nil {
return fmt.Errorf("unable to set UDP read buffer to %d: %s",
s.Config.ReadBuffer, err)
}
}
s.conn = conn
s.Logger.Info(fmt.Sprint("Listening on UDP: ", conn.LocalAddr().String()))
// Start the points batcher.
s.batcher = tsdb.NewPointBatcher(s.Config.BatchSize, s.Config.BatchPending, time.Duration(s.Config.BatchDuration))
s.batcher.Start()
// Create waitgroup for signalling goroutines to stop and start goroutines
// that process collectd packets.
s.wg.Add(2)
go func() { defer s.wg.Done(); s.serve() }()
go func() { defer s.wg.Done(); s.writePoints() }()
return nil
}
// Close stops the service.
func (s *Service) Close() error {
s.mu.Lock()
defer s.mu.Unlock()
if s.closed() {
return nil // Already closed.
}
close(s.done)
// Close the connection, and wait for the goroutine to exit.
if s.conn != nil {
s.conn.Close()
}
if s.batcher != nil {
s.batcher.Stop()
}
s.wg.Wait()
// Release all remaining resources.
s.conn = nil
s.batcher = nil
s.Logger.Info("collectd UDP closed")
s.done = nil
return nil
}
func (s *Service) closed() bool {
select {
case <-s.done:
// Service is closing.
return true
default:
}
return s.done == nil
}
// createInternalStorage ensures that the required database has been created.
func (s *Service) createInternalStorage() error {
s.mu.RLock()
ready := s.ready
s.mu.RUnlock()
if ready {
return nil
}
if _, err := s.MetaClient.CreateDatabase(s.Config.Database); err != nil {
return err
}
// The service is now ready.
s.mu.Lock()
s.ready = true
s.mu.Unlock()
return nil
}
// WithLogger sets the service's logger.
func (s *Service) WithLogger(log zap.Logger) {
s.Logger = log.With(zap.String("service", "collectd"))
}
// Statistics maintains statistics for the collectd service.
type Statistics struct {
PointsReceived int64
BytesReceived int64
PointsParseFail int64
ReadFail int64
BatchesTransmitted int64
PointsTransmitted int64
BatchesTransmitFail int64
InvalidDroppedPoints int64
}
// Statistics returns statistics for periodic monitoring.
func (s *Service) Statistics(tags map[string]string) []models.Statistic {
return []models.Statistic{{
Name: "collectd",
Tags: s.defaultTags.Merge(tags),
Values: map[string]interface{}{
statPointsReceived: atomic.LoadInt64(&s.stats.PointsReceived),
statBytesReceived: atomic.LoadInt64(&s.stats.BytesReceived),
statPointsParseFail: atomic.LoadInt64(&s.stats.PointsParseFail),
statReadFail: atomic.LoadInt64(&s.stats.ReadFail),
statBatchesTransmitted: atomic.LoadInt64(&s.stats.BatchesTransmitted),
statPointsTransmitted: atomic.LoadInt64(&s.stats.PointsTransmitted),
statBatchesTransmitFail: atomic.LoadInt64(&s.stats.BatchesTransmitFail),
statDroppedPointsInvalid: atomic.LoadInt64(&s.stats.InvalidDroppedPoints),
},
}}
}
// SetTypes sets collectd types db.
func (s *Service) SetTypes(types string) (err error) {
reader := strings.NewReader(types)
s.popts.TypesDB, err = api.NewTypesDB(reader)
return
}
// Addr returns the listener's address. It returns nil if listener is closed.
func (s *Service) Addr() net.Addr {
return s.conn.LocalAddr()
}
func (s *Service) serve() {
// From https://collectd.org/wiki/index.php/Binary_protocol
// 1024 bytes (payload only, not including UDP / IP headers)
// In versions 4.0 through 4.7, the receive buffer has a fixed size
// of 1024 bytes. When longer packets are received, the trailing data
// is simply ignored. Since version 4.8, the buffer size can be
// configured. Version 5.0 will increase the default buffer size to
// 1452 bytes (the maximum payload size when using UDP/IPv6 over
// Ethernet).
buffer := make([]byte, 1452)
for {
select {
case <-s.done:
// We closed the connection, time to go.
return
default:
// Keep processing.
}
n, _, err := s.conn.ReadFromUDP(buffer)
if err != nil {
atomic.AddInt64(&s.stats.ReadFail, 1)
s.Logger.Info(fmt.Sprintf("collectd ReadFromUDP error: %s", err))
continue
}
if n > 0 {
atomic.AddInt64(&s.stats.BytesReceived, int64(n))
s.handleMessage(buffer[:n])
}
}
}
func (s *Service) handleMessage(buffer []byte) {
valueLists, err := network.Parse(buffer, s.popts)
if err != nil {
atomic.AddInt64(&s.stats.PointsParseFail, 1)
s.Logger.Info(fmt.Sprintf("Collectd parse error: %s", err))
return
}
for _, valueList := range valueLists {
points := s.UnmarshalValueList(valueList)
for _, p := range points {
s.batcher.In() <- p
}
atomic.AddInt64(&s.stats.PointsReceived, int64(len(points)))
}
}
func (s *Service) writePoints() {
for {
select {
case <-s.done:
return
case batch := <-s.batcher.Out():
// Will attempt to create database if not yet created.
if err := s.createInternalStorage(); err != nil {
s.Logger.Info(fmt.Sprintf("Required database %s not yet created: %s", s.Config.Database, err.Error()))
continue
}
if err := s.PointsWriter.WritePointsPrivileged(s.Config.Database, s.Config.RetentionPolicy, models.ConsistencyLevelAny, batch); err == nil {
atomic.AddInt64(&s.stats.BatchesTransmitted, 1)
atomic.AddInt64(&s.stats.PointsTransmitted, int64(len(batch)))
} else {
s.Logger.Info(fmt.Sprintf("failed to write point batch to database %q: %s", s.Config.Database, err))
atomic.AddInt64(&s.stats.BatchesTransmitFail, 1)
}
}
}
}
// UnmarshalValueList translates a ValueList into InfluxDB data points.
func (s *Service) UnmarshalValueList(vl *api.ValueList) []models.Point {
timestamp := vl.Time.UTC()
var points []models.Point
for i := range vl.Values {
var name string
name = fmt.Sprintf("%s_%s", vl.Identifier.Plugin, vl.DSName(i))
tags := make(map[string]string)
fields := make(map[string]interface{})
// Convert interface back to actual type, then to float64
switch value := vl.Values[i].(type) {
case api.Gauge:
fields["value"] = float64(value)
case api.Derive:
fields["value"] = float64(value)
case api.Counter:
fields["value"] = float64(value)
}
if vl.Identifier.Host != "" {
tags["host"] = vl.Identifier.Host
}
if vl.Identifier.PluginInstance != "" {
tags["instance"] = vl.Identifier.PluginInstance
}
if vl.Identifier.Type != "" {
tags["type"] = vl.Identifier.Type
}
if vl.Identifier.TypeInstance != "" {
tags["type_instance"] = vl.Identifier.TypeInstance
}
// Drop invalid points
p, err := models.NewPoint(name, models.NewTags(tags), fields, timestamp)
if err != nil {
s.Logger.Info(fmt.Sprintf("Dropping point %v: %v", name, err))
atomic.AddInt64(&s.stats.InvalidDroppedPoints, 1)
continue
}
points = append(points, p)
}
return points
}

View File

@@ -0,0 +1,647 @@
package collectd
import (
"encoding/hex"
"errors"
"io/ioutil"
"net"
"os"
"path"
"strings"
"testing"
"time"
"github.com/influxdata/influxdb/internal"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/services/meta"
"github.com/influxdata/influxdb/toml"
"github.com/uber-go/zap"
)
func TestService_OpenClose(t *testing.T) {
service := NewTestService(1, time.Second)
// Closing a closed service is fine.
if err := service.Service.Close(); err != nil {
t.Fatal(err)
}
// Closing a closed service again is fine.
if err := service.Service.Close(); err != nil {
t.Fatal(err)
}
if err := service.Service.Open(); err != nil {
t.Fatal(err)
}
// Opening an already open service is fine.
if err := service.Service.Open(); err != nil {
t.Fatal(err)
}
// Reopening a previously opened service is fine.
if err := service.Service.Close(); err != nil {
t.Fatal(err)
}
if err := service.Service.Open(); err != nil {
t.Fatal(err)
}
// Tidy up.
if err := service.Service.Close(); err != nil {
t.Fatal(err)
}
}
// Test that the service can read types DB files from a directory.
func TestService_Open_TypesDBDir(t *testing.T) {
t.Parallel()
// Make a temp dir to write types.db into.
tmpDir, err := ioutil.TempDir(os.TempDir(), "")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(tmpDir)
// Write types.db.
if err := ioutil.WriteFile(path.Join(tmpDir, "types.db"), []byte(typesDBText), 0777); err != nil {
t.Fatal(err)
}
// Setup config to read all files in the temp dir.
c := Config{
BindAddress: "127.0.0.1:0",
Database: "collectd_test",
BatchSize: 1000,
BatchDuration: toml.Duration(time.Second),
TypesDB: tmpDir,
}
s := &TestService{
Config: c,
Service: NewService(c),
MetaClient: &internal.MetaClientMock{},
}
if testing.Verbose() {
s.Service.WithLogger(zap.New(
zap.NewTextEncoder(),
zap.Output(os.Stderr),
))
}
s.MetaClient.CreateDatabaseFn = func(name string) (*meta.DatabaseInfo, error) {
return nil, nil
}
s.Service.PointsWriter = s
s.Service.MetaClient = s.MetaClient
if err := s.Service.Open(); err != nil {
t.Fatal(err)
}
if err := s.Service.Close(); err != nil {
t.Fatal(err)
}
}
// Test that the service checks / creates the target database every time we
// try to write points.
func TestService_CreatesDatabase(t *testing.T) {
t.Parallel()
s := NewTestService(1, time.Second)
s.WritePointsFn = func(string, string, models.ConsistencyLevel, []models.Point) error {
return nil
}
called := make(chan struct{})
s.MetaClient.CreateDatabaseFn = func(name string) (*meta.DatabaseInfo, error) {
if name != s.Config.Database {
t.Errorf("\n\texp = %s\n\tgot = %s\n", s.Config.Database, name)
}
// Allow some time for the caller to return and the ready status to
// be set.
time.AfterFunc(10*time.Millisecond, func() { called <- struct{}{} })
return nil, errors.New("an error")
}
if err := s.Service.Open(); err != nil {
t.Fatal(err)
}
points, err := models.ParsePointsString(`cpu value=1`)
if err != nil {
t.Fatal(err)
}
s.Service.batcher.In() <- points[0] // Send a point.
s.Service.batcher.Flush()
select {
case <-called:
// OK
case <-time.NewTimer(5 * time.Second).C:
t.Fatal("Service should have attempted to create database")
}
// ready status should not have been switched due to meta client error.
s.Service.mu.RLock()
ready := s.Service.ready
s.Service.mu.RUnlock()
if got, exp := ready, false; got != exp {
t.Fatalf("got %v, expected %v", got, exp)
}
// This time MC won't cause an error.
s.MetaClient.CreateDatabaseFn = func(name string) (*meta.DatabaseInfo, error) {
// Allow some time for the caller to return and the ready status to
// be set.
time.AfterFunc(10*time.Millisecond, func() { called <- struct{}{} })
return nil, nil
}
s.Service.batcher.In() <- points[0] // Send a point.
s.Service.batcher.Flush()
select {
case <-called:
// OK
case <-time.NewTimer(5 * time.Second).C:
t.Fatal("Service should have attempted to create database")
}
// ready status should not have been switched due to meta client error.
s.Service.mu.RLock()
ready = s.Service.ready
s.Service.mu.RUnlock()
if got, exp := ready, true; got != exp {
t.Fatalf("got %v, expected %v", got, exp)
}
s.Service.Close()
}
// Test that the collectd service correctly batches points by BatchSize.
func TestService_BatchSize(t *testing.T) {
t.Parallel()
totalPoints := len(expPoints)
// Batch sizes that totalTestPoints divide evenly by.
batchSizes := []int{1, 2, 13}
for _, batchSize := range batchSizes {
func() {
s := NewTestService(batchSize, time.Second)
pointCh := make(chan models.Point)
s.WritePointsFn = func(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error {
if len(points) != batchSize {
t.Errorf("\n\texp = %d\n\tgot = %d\n", batchSize, len(points))
}
for _, p := range points {
pointCh <- p
}
return nil
}
if err := s.Service.Open(); err != nil {
t.Fatal(err)
}
defer func() { t.Log("closing service"); s.Service.Close() }()
// Get the address & port the service is listening on for collectd data.
addr := s.Service.Addr()
conn, err := net.Dial("udp", addr.String())
if err != nil {
t.Fatal(err)
}
// Send the test data to the service.
if n, err := conn.Write(testData); err != nil {
t.Fatal(err)
} else if n != len(testData) {
t.Fatalf("only sent %d of %d bytes", n, len(testData))
}
points := []models.Point{}
Loop:
for {
select {
case p := <-pointCh:
points = append(points, p)
if len(points) == totalPoints {
break Loop
}
case <-time.After(time.Second):
t.Logf("exp %d points, got %d", totalPoints, len(points))
t.Fatal("timed out waiting for points from collectd service")
}
}
if len(points) != totalPoints {
t.Fatalf("exp %d points, got %d", totalPoints, len(points))
}
for i, exp := range expPoints {
got := points[i].String()
if got != exp {
t.Fatalf("\n\texp = %s\n\tgot = %s\n", exp, got)
}
}
}()
}
}
// Test that the collectd service correctly batches points using BatchDuration.
func TestService_BatchDuration(t *testing.T) {
t.Parallel()
totalPoints := len(expPoints)
s := NewTestService(5000, 250*time.Millisecond)
pointCh := make(chan models.Point, 1000)
s.WritePointsFn = func(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error {
for _, p := range points {
pointCh <- p
}
return nil
}
if err := s.Service.Open(); err != nil {
t.Fatal(err)
}
defer func() { t.Log("closing service"); s.Service.Close() }()
// Get the address & port the service is listening on for collectd data.
addr := s.Service.Addr()
conn, err := net.Dial("udp", addr.String())
if err != nil {
t.Fatal(err)
}
// Send the test data to the service.
if n, err := conn.Write(testData); err != nil {
t.Fatal(err)
} else if n != len(testData) {
t.Fatalf("only sent %d of %d bytes", n, len(testData))
}
points := []models.Point{}
Loop:
for {
select {
case p := <-pointCh:
points = append(points, p)
if len(points) == totalPoints {
break Loop
}
case <-time.After(time.Second):
t.Logf("exp %d points, got %d", totalPoints, len(points))
t.Fatal("timed out waiting for points from collectd service")
}
}
if len(points) != totalPoints {
t.Fatalf("exp %d points, got %d", totalPoints, len(points))
}
for i, exp := range expPoints {
got := points[i].String()
if got != exp {
t.Fatalf("\n\texp = %s\n\tgot = %s\n", exp, got)
}
}
}
type TestService struct {
Service *Service
Config Config
MetaClient *internal.MetaClientMock
WritePointsFn func(string, string, models.ConsistencyLevel, []models.Point) error
}
func NewTestService(batchSize int, batchDuration time.Duration) *TestService {
c := Config{
BindAddress: "127.0.0.1:0",
Database: "collectd_test",
BatchSize: batchSize,
BatchDuration: toml.Duration(batchDuration),
}
s := &TestService{
Config: c,
Service: NewService(c),
MetaClient: &internal.MetaClientMock{},
}
s.MetaClient.CreateDatabaseFn = func(name string) (*meta.DatabaseInfo, error) {
return nil, nil
}
s.Service.PointsWriter = s
s.Service.MetaClient = s.MetaClient
// Set the collectd types using test string.
if err := s.Service.SetTypes(typesDBText); err != nil {
panic(err)
}
if testing.Verbose() {
s.Service.WithLogger(zap.New(
zap.NewTextEncoder(),
zap.Output(os.Stderr),
))
}
return s
}
func (w *TestService) WritePointsPrivileged(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error {
return w.WritePointsFn(database, retentionPolicy, consistencyLevel, points)
}
func check(err error) {
if err != nil {
panic(err)
}
}
// Raw data sent by collectd, captured using Wireshark.
var testData = func() []byte {
data := []string{
"000000167066312d36322d3231302d39342d313733000001000c00000000544928ff0007000c0000000",
"0000000050002000c656e74726f7079000004000c656e74726f7079000006000f000101000000000000",
"7240000200086370750000030006310000040008637075000005000969646c65000006000f000100000",
"0000000a674620005000977616974000006000f00010000000000000000000002000764660000030005",
"00000400076466000005000d6c6976652d636f7700000600180002010100000000a090b641000000a0c",
"b6a2742000200086370750000030006310000040008637075000005000e696e74657272757074000006",
"000f00010000000000000000fe0005000c736f6674697271000006000f0001000000000000000000000",
"20007646600000300050000040007646600000500096c69766500000600180002010100000000000000",
"00000000e0ec972742000200086370750000030006310000040008637075000005000a737465616c000",
"006000f00010000000000000000000003000632000005000975736572000006000f0001000000000000",
"005f36000500096e696365000006000f0001000000000000000ad80002000e696e74657266616365000",
"0030005000004000e69665f6f6374657473000005000b64756d6d793000000600180002000000000000",
"00000000000000000000041a000200076466000004000764660000050008746d7000000600180002010",
"1000000000000f240000000a0ea97274200020008637075000003000632000004000863707500000500",
"0b73797374656d000006000f00010000000000000045d30002000e696e7465726661636500000300050",
"00004000f69665f7061636b657473000005000b64756d6d793000000600180002000000000000000000",
"00000000000000000f000200086370750000030006320000040008637075000005000969646c6500000",
"6000f0001000000000000a66480000200076466000003000500000400076466000005000d72756e2d6c",
"6f636b000006001800020101000000000000000000000000000054410002000e696e746572666163650",
"00004000e69665f6572726f7273000005000b64756d6d79300000060018000200000000000000000000",
"00000000000000000002000863707500000300063200000400086370750000050009776169740000060",
"00f00010000000000000000000005000e696e74657272757074000006000f0001000000000000000132",
}
b, err := hex.DecodeString(strings.Join(data, ""))
check(err)
return b
}()
var expPoints = []string{
"entropy_value,host=pf1-62-210-94-173,type=entropy value=288 1414080767000000000",
"cpu_value,host=pf1-62-210-94-173,instance=1,type=cpu,type_instance=idle value=10908770 1414080767000000000",
"cpu_value,host=pf1-62-210-94-173,instance=1,type=cpu,type_instance=wait value=0 1414080767000000000",
"df_used,host=pf1-62-210-94-173,type=df,type_instance=live-cow value=378576896 1414080767000000000",
"df_free,host=pf1-62-210-94-173,type=df,type_instance=live-cow value=50287988736 1414080767000000000",
"cpu_value,host=pf1-62-210-94-173,instance=1,type=cpu,type_instance=interrupt value=254 1414080767000000000",
"cpu_value,host=pf1-62-210-94-173,instance=1,type=cpu,type_instance=softirq value=0 1414080767000000000",
"df_used,host=pf1-62-210-94-173,type=df,type_instance=live value=0 1414080767000000000",
"df_free,host=pf1-62-210-94-173,type=df,type_instance=live value=50666565632 1414080767000000000",
"cpu_value,host=pf1-62-210-94-173,instance=1,type=cpu,type_instance=steal value=0 1414080767000000000",
"cpu_value,host=pf1-62-210-94-173,instance=2,type=cpu,type_instance=user value=24374 1414080767000000000",
"cpu_value,host=pf1-62-210-94-173,instance=2,type=cpu,type_instance=nice value=2776 1414080767000000000",
"interface_rx,host=pf1-62-210-94-173,type=if_octets,type_instance=dummy0 value=0 1414080767000000000",
"interface_tx,host=pf1-62-210-94-173,type=if_octets,type_instance=dummy0 value=1050 1414080767000000000",
"df_used,host=pf1-62-210-94-173,type=df,type_instance=tmp value=73728 1414080767000000000",
"df_free,host=pf1-62-210-94-173,type=df,type_instance=tmp value=50666491904 1414080767000000000",
"cpu_value,host=pf1-62-210-94-173,instance=2,type=cpu,type_instance=system value=17875 1414080767000000000",
"interface_rx,host=pf1-62-210-94-173,type=if_packets,type_instance=dummy0 value=0 1414080767000000000",
"interface_tx,host=pf1-62-210-94-173,type=if_packets,type_instance=dummy0 value=15 1414080767000000000",
"cpu_value,host=pf1-62-210-94-173,instance=2,type=cpu,type_instance=idle value=10904704 1414080767000000000",
"df_used,host=pf1-62-210-94-173,type=df,type_instance=run-lock value=0 1414080767000000000",
"df_free,host=pf1-62-210-94-173,type=df,type_instance=run-lock value=5242880 1414080767000000000",
"interface_rx,host=pf1-62-210-94-173,type=if_errors,type_instance=dummy0 value=0 1414080767000000000",
"interface_tx,host=pf1-62-210-94-173,type=if_errors,type_instance=dummy0 value=0 1414080767000000000",
"cpu_value,host=pf1-62-210-94-173,instance=2,type=cpu,type_instance=wait value=0 1414080767000000000",
"cpu_value,host=pf1-62-210-94-173,instance=2,type=cpu,type_instance=interrupt value=306 1414080767000000000",
}
// Taken from /usr/share/collectd/types.db on a Ubuntu system
var typesDBText = `
absolute value:ABSOLUTE:0:U
apache_bytes value:DERIVE:0:U
apache_connections value:GAUGE:0:65535
apache_idle_workers value:GAUGE:0:65535
apache_requests value:DERIVE:0:U
apache_scoreboard value:GAUGE:0:65535
ath_nodes value:GAUGE:0:65535
ath_stat value:DERIVE:0:U
backends value:GAUGE:0:65535
bitrate value:GAUGE:0:4294967295
bytes value:GAUGE:0:U
cache_eviction value:DERIVE:0:U
cache_operation value:DERIVE:0:U
cache_ratio value:GAUGE:0:100
cache_result value:DERIVE:0:U
cache_size value:GAUGE:0:4294967295
charge value:GAUGE:0:U
compression_ratio value:GAUGE:0:2
compression uncompressed:DERIVE:0:U, compressed:DERIVE:0:U
connections value:DERIVE:0:U
conntrack value:GAUGE:0:4294967295
contextswitch value:DERIVE:0:U
counter value:COUNTER:U:U
cpufreq value:GAUGE:0:U
cpu value:DERIVE:0:U
current_connections value:GAUGE:0:U
current_sessions value:GAUGE:0:U
current value:GAUGE:U:U
delay value:GAUGE:-1000000:1000000
derive value:DERIVE:0:U
df_complex value:GAUGE:0:U
df_inodes value:GAUGE:0:U
df used:GAUGE:0:1125899906842623, free:GAUGE:0:1125899906842623
disk_latency read:GAUGE:0:U, write:GAUGE:0:U
disk_merged read:DERIVE:0:U, write:DERIVE:0:U
disk_octets read:DERIVE:0:U, write:DERIVE:0:U
disk_ops_complex value:DERIVE:0:U
disk_ops read:DERIVE:0:U, write:DERIVE:0:U
disk_time read:DERIVE:0:U, write:DERIVE:0:U
dns_answer value:DERIVE:0:U
dns_notify value:DERIVE:0:U
dns_octets queries:DERIVE:0:U, responses:DERIVE:0:U
dns_opcode value:DERIVE:0:U
dns_qtype_cached value:GAUGE:0:4294967295
dns_qtype value:DERIVE:0:U
dns_query value:DERIVE:0:U
dns_question value:DERIVE:0:U
dns_rcode value:DERIVE:0:U
dns_reject value:DERIVE:0:U
dns_request value:DERIVE:0:U
dns_resolver value:DERIVE:0:U
dns_response value:DERIVE:0:U
dns_transfer value:DERIVE:0:U
dns_update value:DERIVE:0:U
dns_zops value:DERIVE:0:U
duration seconds:GAUGE:0:U
email_check value:GAUGE:0:U
email_count value:GAUGE:0:U
email_size value:GAUGE:0:U
entropy value:GAUGE:0:4294967295
fanspeed value:GAUGE:0:U
file_size value:GAUGE:0:U
files value:GAUGE:0:U
fork_rate value:DERIVE:0:U
frequency_offset value:GAUGE:-1000000:1000000
frequency value:GAUGE:0:U
fscache_stat value:DERIVE:0:U
gauge value:GAUGE:U:U
hash_collisions value:DERIVE:0:U
http_request_methods value:DERIVE:0:U
http_requests value:DERIVE:0:U
http_response_codes value:DERIVE:0:U
humidity value:GAUGE:0:100
if_collisions value:DERIVE:0:U
if_dropped rx:DERIVE:0:U, tx:DERIVE:0:U
if_errors rx:DERIVE:0:U, tx:DERIVE:0:U
if_multicast value:DERIVE:0:U
if_octets rx:DERIVE:0:U, tx:DERIVE:0:U
if_packets rx:DERIVE:0:U, tx:DERIVE:0:U
if_rx_errors value:DERIVE:0:U
if_rx_octets value:DERIVE:0:U
if_tx_errors value:DERIVE:0:U
if_tx_octets value:DERIVE:0:U
invocations value:DERIVE:0:U
io_octets rx:DERIVE:0:U, tx:DERIVE:0:U
io_packets rx:DERIVE:0:U, tx:DERIVE:0:U
ipt_bytes value:DERIVE:0:U
ipt_packets value:DERIVE:0:U
irq value:DERIVE:0:U
latency value:GAUGE:0:U
links value:GAUGE:0:U
load shortterm:GAUGE:0:5000, midterm:GAUGE:0:5000, longterm:GAUGE:0:5000
md_disks value:GAUGE:0:U
memcached_command value:DERIVE:0:U
memcached_connections value:GAUGE:0:U
memcached_items value:GAUGE:0:U
memcached_octets rx:DERIVE:0:U, tx:DERIVE:0:U
memcached_ops value:DERIVE:0:U
memory value:GAUGE:0:281474976710656
multimeter value:GAUGE:U:U
mutex_operations value:DERIVE:0:U
mysql_commands value:DERIVE:0:U
mysql_handler value:DERIVE:0:U
mysql_locks value:DERIVE:0:U
mysql_log_position value:DERIVE:0:U
mysql_octets rx:DERIVE:0:U, tx:DERIVE:0:U
nfs_procedure value:DERIVE:0:U
nginx_connections value:GAUGE:0:U
nginx_requests value:DERIVE:0:U
node_octets rx:DERIVE:0:U, tx:DERIVE:0:U
node_rssi value:GAUGE:0:255
node_stat value:DERIVE:0:U
node_tx_rate value:GAUGE:0:127
objects value:GAUGE:0:U
operations value:DERIVE:0:U
percent value:GAUGE:0:100.1
percent_bytes value:GAUGE:0:100.1
percent_inodes value:GAUGE:0:100.1
pf_counters value:DERIVE:0:U
pf_limits value:DERIVE:0:U
pf_source value:DERIVE:0:U
pf_states value:GAUGE:0:U
pf_state value:DERIVE:0:U
pg_blks value:DERIVE:0:U
pg_db_size value:GAUGE:0:U
pg_n_tup_c value:DERIVE:0:U
pg_n_tup_g value:GAUGE:0:U
pg_numbackends value:GAUGE:0:U
pg_scan value:DERIVE:0:U
pg_xact value:DERIVE:0:U
ping_droprate value:GAUGE:0:100
ping_stddev value:GAUGE:0:65535
ping value:GAUGE:0:65535
players value:GAUGE:0:1000000
power value:GAUGE:0:U
protocol_counter value:DERIVE:0:U
ps_code value:GAUGE:0:9223372036854775807
ps_count processes:GAUGE:0:1000000, threads:GAUGE:0:1000000
ps_cputime user:DERIVE:0:U, syst:DERIVE:0:U
ps_data value:GAUGE:0:9223372036854775807
ps_disk_octets read:DERIVE:0:U, write:DERIVE:0:U
ps_disk_ops read:DERIVE:0:U, write:DERIVE:0:U
ps_pagefaults minflt:DERIVE:0:U, majflt:DERIVE:0:U
ps_rss value:GAUGE:0:9223372036854775807
ps_stacksize value:GAUGE:0:9223372036854775807
ps_state value:GAUGE:0:65535
ps_vm value:GAUGE:0:9223372036854775807
queue_length value:GAUGE:0:U
records value:GAUGE:0:U
requests value:GAUGE:0:U
response_time value:GAUGE:0:U
response_code value:GAUGE:0:U
route_etx value:GAUGE:0:U
route_metric value:GAUGE:0:U
routes value:GAUGE:0:U
serial_octets rx:DERIVE:0:U, tx:DERIVE:0:U
signal_noise value:GAUGE:U:0
signal_power value:GAUGE:U:0
signal_quality value:GAUGE:0:U
snr value:GAUGE:0:U
spam_check value:GAUGE:0:U
spam_score value:GAUGE:U:U
spl value:GAUGE:U:U
swap_io value:DERIVE:0:U
swap value:GAUGE:0:1099511627776
tcp_connections value:GAUGE:0:4294967295
temperature value:GAUGE:U:U
threads value:GAUGE:0:U
time_dispersion value:GAUGE:-1000000:1000000
timeleft value:GAUGE:0:U
time_offset value:GAUGE:-1000000:1000000
total_bytes value:DERIVE:0:U
total_connections value:DERIVE:0:U
total_objects value:DERIVE:0:U
total_operations value:DERIVE:0:U
total_requests value:DERIVE:0:U
total_sessions value:DERIVE:0:U
total_threads value:DERIVE:0:U
total_time_in_ms value:DERIVE:0:U
total_values value:DERIVE:0:U
uptime value:GAUGE:0:4294967295
users value:GAUGE:0:65535
vcl value:GAUGE:0:65535
vcpu value:GAUGE:0:U
virt_cpu_total value:DERIVE:0:U
virt_vcpu value:DERIVE:0:U
vmpage_action value:DERIVE:0:U
vmpage_faults minflt:DERIVE:0:U, majflt:DERIVE:0:U
vmpage_io in:DERIVE:0:U, out:DERIVE:0:U
vmpage_number value:GAUGE:0:4294967295
volatile_changes value:GAUGE:0:U
voltage_threshold value:GAUGE:U:U, threshold:GAUGE:U:U
voltage value:GAUGE:U:U
vs_memory value:GAUGE:0:9223372036854775807
vs_processes value:GAUGE:0:65535
vs_threads value:GAUGE:0:65535
#
# Legacy types
# (required for the v5 upgrade target)
#
arc_counts demand_data:COUNTER:0:U, demand_metadata:COUNTER:0:U, prefetch_data:COUNTER:0:U, prefetch_metadata:COUNTER:0:U
arc_l2_bytes read:COUNTER:0:U, write:COUNTER:0:U
arc_l2_size value:GAUGE:0:U
arc_ratio value:GAUGE:0:U
arc_size current:GAUGE:0:U, target:GAUGE:0:U, minlimit:GAUGE:0:U, maxlimit:GAUGE:0:U
mysql_qcache hits:COUNTER:0:U, inserts:COUNTER:0:U, not_cached:COUNTER:0:U, lowmem_prunes:COUNTER:0:U, queries_in_cache:GAUGE:0:U
mysql_threads running:GAUGE:0:U, connected:GAUGE:0:U, cached:GAUGE:0:U, created:COUNTER:0:U
`

View File

@@ -0,0 +1,3 @@
collectD Client
============
This directory contains code for generating collectd load.

View File

@@ -0,0 +1,73 @@
package main
import (
"collectd.org/api"
"collectd.org/network"
"context"
"flag"
"fmt"
"math/rand"
"os"
"strconv"
"time"
)
var nMeasurments = flag.Int("m", 1, "Number of measurements")
var tagVariance = flag.Int("v", 1, "Number of values per tag. Client is fixed at one tag")
var rate = flag.Int("r", 1, "Number of points per second")
var total = flag.Int("t", -1, "Total number of points to send (default is no limit)")
var host = flag.String("u", "127.0.0.1:25826", "Destination host in the form host:port")
func main() {
flag.Parse()
conn, err := network.Dial(*host, network.ClientOptions{})
if err != nil {
fmt.Println(err)
os.Exit(1)
}
defer conn.Close()
rateLimiter := make(chan int, *rate)
go func() {
ticker := time.NewTicker(time.Second)
for {
select {
case <-ticker.C:
for i := 0; i < *rate; i++ {
rateLimiter <- i
}
}
}
}()
nSent := 0
for {
if nSent >= *total && *total > 0 {
break
}
<-rateLimiter
vl := api.ValueList{
Identifier: api.Identifier{
Host: "tagvalue" + strconv.Itoa(int(rand.Int31n(int32(*tagVariance)))),
Plugin: "golang" + strconv.Itoa(int(rand.Int31n(int32(*nMeasurments)))),
Type: "gauge",
},
Time: time.Now(),
Interval: 10 * time.Second,
Values: []api.Value{api.Gauge(42.0)},
}
ctx := context.TODO()
if err := conn.Write(ctx, &vl); err != nil {
fmt.Println(err)
os.Exit(1)
}
conn.Flush()
nSent = nSent + 1
}
fmt.Println("Number of points sent:", nSent)
}

View File

@@ -0,0 +1,68 @@
package continuous_querier
import (
"errors"
"time"
"github.com/influxdata/influxdb/monitor/diagnostics"
"github.com/influxdata/influxdb/toml"
)
// Default values for aspects of interval computation.
const (
// The default value of how often to check whether any CQs need to be run.
DefaultRunInterval = time.Second
)
// Config represents a configuration for the continuous query service.
type Config struct {
// Enables logging in CQ service to display when CQ's are processed and how many points are wrote.
LogEnabled bool `toml:"log-enabled"`
// If this flag is set to false, both the brokers and data nodes should ignore any CQ processing.
Enabled bool `toml:"enabled"`
// Run interval for checking continuous queries. This should be set to the least common factor
// of the interval for running continuous queries. If you only aggregate continuous queries
// every minute, this should be set to 1 minute. The default is set to '1s' so the interval
// is compatible with most aggregations.
RunInterval toml.Duration `toml:"run-interval"`
}
// NewConfig returns a new instance of Config with defaults.
func NewConfig() Config {
return Config{
LogEnabled: true,
Enabled: true,
RunInterval: toml.Duration(DefaultRunInterval),
}
}
// Validate returns an error if the Config is invalid.
func (c Config) Validate() error {
if !c.Enabled {
return nil
}
// TODO: Should we enforce a minimum interval?
// Polling every nanosecond, for instance, will greatly impact performance.
if c.RunInterval <= 0 {
return errors.New("run-interval must be positive")
}
return nil
}
// Diagnostics returns a diagnostics representation of a subset of the Config.
func (c Config) Diagnostics() (*diagnostics.Diagnostics, error) {
if !c.Enabled {
return diagnostics.RowFromMap(map[string]interface{}{
"enabled": false,
}), nil
}
return diagnostics.RowFromMap(map[string]interface{}{
"enabled": true,
"run-interval": c.RunInterval,
}), nil
}

View File

@@ -0,0 +1,46 @@
package continuous_querier_test
import (
"testing"
"time"
"github.com/BurntSushi/toml"
"github.com/influxdata/influxdb/services/continuous_querier"
)
func TestConfig_Parse(t *testing.T) {
// Parse configuration.
var c continuous_querier.Config
if _, err := toml.Decode(`
run-interval = "1m"
enabled = true
`, &c); err != nil {
t.Fatal(err)
}
// Validate configuration.
if time.Duration(c.RunInterval) != time.Minute {
t.Fatalf("unexpected run interval: %v", c.RunInterval)
} else if c.Enabled != true {
t.Fatalf("unexpected enabled: %v", c.Enabled)
}
}
func TestConfig_Validate(t *testing.T) {
c := continuous_querier.NewConfig()
if err := c.Validate(); err != nil {
t.Fatalf("unexpected validation fail from NewConfig: %s", err)
}
c = continuous_querier.NewConfig()
c.RunInterval = 0
if err := c.Validate(); err == nil {
t.Fatal("expected error for run-interval = 0, got nil")
}
c = continuous_querier.NewConfig()
c.RunInterval *= -1
if err := c.Validate(); err == nil {
t.Fatal("expected error for negative run-interval, got nil")
}
}

View File

@@ -0,0 +1,235 @@
# Continuous Queries
This document lays out continuous queries and a proposed architecture for how they'll work within an InfluxDB cluster.
## Definition of Continuous Queries
Continuous queries serve two purposes in InfluxDB:
1. Combining many series into a single series (i.e. removing 1 or more tag dimensions to make queries more efficient)
2. Aggregating and downsampling series
The purpose of both types of continuous query is to duplicate or downsample data automatically in the background, to make querying their results fast and efficient. Think of them as another way to create indexes on data.
Generally, there are continuous queries that create copies of data into another measurement or tagset, and queries that downsample and aggregate data. The only difference between the two types is if the query has a `GROUP BY time` clause.
Before we get to the continuous query examples, we need to define the `INTO` syntax of queries.
### INTO
`INTO` is a method for running a query and having it output into either another measurement name, retention policy, or database. The syntax looks like this:
```sql
SELECT *
INTO [<retention policy>.]<measurement> [ON <database>]
FROM <measurement>
[WHERE ...]
[GROUP BY ...]
```
The syntax states that the retention policy, database, where clause, and group by clause are all optional. If a retention policy isn't specified, the database's default retention policy will be written into. If the database isn't specified, the database the query is running from will be written into.
By selecting specific fields, `INTO` can merge many series into one that will go into either a new measurement, retention policy, or database. For example:
```sql
SELECT mean(value) as value, region
INTO "1h.cpu_load"
FROM cpu_load
GROUP BY time(1h), region
```
That will give 1h summaries of the mean value of the `cpu_load` for each `region`. Specifying `region` in the `GROUP BY` clause is unnecessary since having it in the `SELECT` clause forces it to be grouped by that tag, we've just included it in the example for clarity.
With `SELECT ... INTO`, fields will be written as fields and tags will be written as tags.
### Continuous Query Syntax
The `INTO` queries run once. Continuous queries will turn `INTO` queries into something that run in the background in the cluster. They're kind of like triggers in SQL.
```sql
CREATE CONTINUOUS QUERY "1h_cpu_load"
ON database_name
BEGIN
SELECT mean(value) as value, region
INTO "1h.cpu_load"
FROM cpu_load
GROUP BY time(1h), region
END
```
Or chain them together:
```sql
CREATE CONTINUOUS QUERY "10m_event_count"
ON database_name
BEGIN
SELECT count(value)
INTO "10m.events"
FROM events
GROUP BY time(10m)
END
-- this selects from the output of one continuous query and outputs to another series
CREATE CONTINUOUS QUERY "1h_event_count"
ON database_name
BEGIN
SELECT sum(count) as count
INTO "1h.events"
FROM events
GROUP BY time(1h)
END
```
Or multiple aggregations from all series in a measurement. This example assumes you have a retention policy named `1h`.
```sql
CREATE CONTINUOUS QUERY "1h_cpu_load"
ON database_name
BEGIN
SELECT mean(value), percentile(80, value) as percentile_80, percentile(95, value) as percentile_95
INTO "1h.cpu_load"
FROM cpu_load
GROUP BY time(1h), *
END
```
The `GROUP BY *` indicates that we want to group by the tagset of the points written in. The same tags will be written to the output series. The multiple aggregates in the `SELECT` clause (percentile, mean) will be written in as fields to the resulting series.
Showing what continuous queries we have:
```sql
SHOW CONTINUOUS QUERIES
```
Dropping continuous queries:
```sql
DROP CONTINUOUS QUERY <name> ON <database>
```
### Security
To create or drop a continuous query, the user must be an admin.
### Limitations
In order to prevent cycles and endless copying of data, the following limitation is enforced on continuous queries at create time:
*The output of a continuous query must go to either a different measurement or to a different retention policy.*
In theory they'd still be able to create a cycle with multiple continuous queries. We should check for these and disallow.
## Proposed Architecture
Continuous queries should be stored in the metastore cluster wide. That is, they amount to a database schema that should be stored in every server in a cluster.
Continuous queries will have to be handled in a different way for two different use cases: those that simply copy data (CQs without a group by time) and those that aggregate and downsample data (those with a group by time).
### No GROUP BY time
For CQs that have no `GROUP BY time` clause, they should be evaluated at the data node as part of the write. The single write should create any other writes for the CQ and submit those in the same request to the brokers to ensure that all writes succeed (both the original and the new CQ writes) or none do.
I imagine the process going something like this:
1. Convert the data point into its compact form `<series id><time><values>`
2. For each CQ on the measurement and retention policy without a `GROUP BY time`:
2.1. Run the data point through a special query engine that will output 0 or 1 data point.
2.2. GOTO 1. for each newly generated data point
2.3. Write all the data points in a single call to the brokers
2.4. Return success to the user
Note that for the generated data points, we need to go through and run this process against them since they can feed into different retention policies, measurements, and new tag-sets. On 2.2 I mention that the output will either be a data point or not. That's because of `WHERE` clauses on the query. However, it will never be more than a single data point.
I mention that we'll need a special query engine for these types of queries. In this case, they never have an aggregate function. Any query with an aggregate function also has a group by time, and these queries by definition don't have that.
The only thing we have to worry about is which fields are being selected, and what the where clause looks like. We should be able to put the raw data point through a simple transform function that either outputs another raw points or doesn't.
I think this transform function be something separate from the regular query planner and engine. It can be in `influxQL` but it should be something fairly simply since the only purpose of these types of queries is to either filter some data out and output to a new series or transform into a new series by dropping tags.
### Has GROUP BY time
CQs that have a `GROUP BY time` (or aggregate CQs) will need to be handled differently.
One key point on continuous queries with a `GROUP BY time`, is that all their writes should always be `overwrite = true`. That is, they should only have a single data point for each timestamp. This distinction means that continuous queries for previous blocks of time can be safely run multiple times without duplicating data (i.e. they're idempotent).
There are two different ideas I have for how CQs with group by time could be handled. The first is through periodic updates handled by the Raft Leader. The second would be to expand out writes for each CQ and handle them on the data node.
#### Periodic Updates
In this approach the management of how CQs run in a cluster will be centrally located on the Raft Leader. It will be responsible for orchestrating which data nodes run CQs and when.
The naive approach would be to have the leader hand out each CQ for a block of time periodically. The leader could also rerun CQ for periods of time that have recently passed. This would be an easy way to handle the "lagging data" problem, but it's not precise.
Unfortunately, there's no easy way to tell cluster wide if there were data points written in an already passed window of time for a CQ. We might be able to add this at the data nodes and have them track it, but it would be quite a bit more work.
The easy way would just be to have CQs re-execute for periods that recently passed and have some user-configurable window of time that they stop checking after. Then we could give the user the ability to recalculate CQs ranges of time if they need to correct for some problem that occurred or the loading of a bunch of historical data.
With this approach, we'd have the metadata in the database store the last time each CQ was run. Whenever the Raft leader sent out a command to a data node to handle a CQ, the data node would use this metadata to determine which windows of time it should compute.
This approach is like what exists in 0.8, with the exception that it will automatically catch data that is lagged behind in a small window of time and give the user the ability to force recalculation.
#### Expanding writes
When a write comes into a data node, we could have it evaluated against group by CQs in addition to the non-group by ones. It would then create writes that would then go through the brokers. When the CQ writes arrive at the data nodes, they would have to handle each write differently depending on if it was a write to a raw series or if it was a CQ write.
Let's lay out a concrete example.
```sql
CREATE CONTINUOUS QUERY "10m_cpu_by_region"
ON foo
BEGIN
SELECT mean(value)
INTO cpu_by_region
FROM cpu
GROUP BY time(10m), region
END
```
In this example we write values into `cpu` with the tags `region` and `host`.
Here's another example CQ:
```sql
CREATE CONTINUOUS QUERY "1h_cpu"
ON foo
BEGIN
SELECT mean(value)
INTO "1h.cpu"
FROM raw.cpu
GROUP BY time(10m), *
END
```
That would output one series into the `1h` retention policy for the `cpu` measurement for every series from the `raw` retention policy and the `cpu` measurement.
Both of these examples would be handled the same way despite one being a big merge of a bunch of series into one and the other being an aggregation of series in a 1-to-1 mapping.
Say we're collecting data for two hosts in a single region. Then we'd have two distinct series like this:
```
1 - cpu host=serverA region=uswest
2 - cpu host=serverB region=uswest
```
Whenever a write came into a server, we'd look at the continuous queries and see if we needed to create new writes. If we had the two CQ examples above, we'd have to expand a single write into two more writes (one for each CQ).
The first CQ would have to create a new series:
```
3 - cpu_by_region region=uswest
```
The second CQ would use the same series id as the write, but would send it to another retention policy (and thus shard).
We'd need to keep track of which series + retention policy combinations were the result of a CQ. When the data nodes get writes replicated downward, they would have to handle them like this:
1. If write is normal, write through
2. If write is CQ write, compute based on existing values, write to DB
#### Approach tradeoffs
The first approach of periodically running queries would almost certainly be the easiest to implement quickly. It also has the added advantage of not putting additional load on the brokers by ballooning up the number of writes that go through the system.
The second approach is appealing because it would be accurate regardless of when writes come in. However, it would take more work and cause the number of writes going through the brokers to be multiplied by the number of continuous queries, which might not scale to where we need.
Also, if the data nodes write for every single update, the load on the underlying storage engine would go up significantly as well.

View File

@@ -0,0 +1,529 @@
// Package continuous_querier provides the continuous query service.
package continuous_querier // import "github.com/influxdata/influxdb/services/continuous_querier"
import (
"errors"
"fmt"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/influxdata/influxdb/influxql"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/services/meta"
"github.com/uber-go/zap"
)
const (
// NoChunkingSize specifies when not to chunk results. When planning
// a select statement, passing zero tells it not to chunk results.
// Only applies to raw queries.
NoChunkingSize = 0
// idDelimiter is used as a delimiter when creating a unique name for a
// Continuous Query.
idDelimiter = string(rune(31)) // unit separator
)
// Statistics for the CQ service.
const (
statQueryOK = "queryOk"
statQueryFail = "queryFail"
)
// ContinuousQuerier represents a service that executes continuous queries.
type ContinuousQuerier interface {
// Run executes the named query in the named database. Blank database or name matches all.
Run(database, name string, t time.Time) error
}
// metaClient is an internal interface to make testing easier.
type metaClient interface {
AcquireLease(name string) (l *meta.Lease, err error)
Databases() []meta.DatabaseInfo
Database(name string) *meta.DatabaseInfo
}
// RunRequest is a request to run one or more CQs.
type RunRequest struct {
// Now tells the CQ serivce what the current time is.
Now time.Time
// CQs tells the CQ service which queries to run.
// If nil, all queries will be run.
CQs []string
}
// matches returns true if the CQ matches one of the requested CQs.
func (rr *RunRequest) matches(cq *meta.ContinuousQueryInfo) bool {
if rr.CQs == nil {
return true
}
for _, q := range rr.CQs {
if q == cq.Name {
return true
}
}
return false
}
// Service manages continuous query execution.
type Service struct {
MetaClient metaClient
QueryExecutor *influxql.QueryExecutor
Config *Config
RunInterval time.Duration
// RunCh can be used by clients to signal service to run CQs.
RunCh chan *RunRequest
Logger zap.Logger
loggingEnabled bool
stats *Statistics
// lastRuns maps CQ name to last time it was run.
mu sync.RWMutex
lastRuns map[string]time.Time
stop chan struct{}
wg *sync.WaitGroup
}
// NewService returns a new instance of Service.
func NewService(c Config) *Service {
s := &Service{
Config: &c,
RunInterval: time.Duration(c.RunInterval),
RunCh: make(chan *RunRequest),
loggingEnabled: c.LogEnabled,
Logger: zap.New(zap.NullEncoder()),
stats: &Statistics{},
lastRuns: map[string]time.Time{},
}
return s
}
// Open starts the service.
func (s *Service) Open() error {
s.Logger.Info("Starting continuous query service")
if s.stop != nil {
return nil
}
assert(s.MetaClient != nil, "MetaClient is nil")
assert(s.QueryExecutor != nil, "QueryExecutor is nil")
s.stop = make(chan struct{})
s.wg = &sync.WaitGroup{}
s.wg.Add(1)
go s.backgroundLoop()
return nil
}
// Close stops the service.
func (s *Service) Close() error {
if s.stop == nil {
return nil
}
close(s.stop)
s.wg.Wait()
s.wg = nil
s.stop = nil
return nil
}
// WithLogger sets the logger on the service.
func (s *Service) WithLogger(log zap.Logger) {
s.Logger = log.With(zap.String("service", "continuous_querier"))
}
// Statistics maintains the statistics for the continuous query service.
type Statistics struct {
QueryOK int64
QueryFail int64
}
// Statistics returns statistics for periodic monitoring.
func (s *Service) Statistics(tags map[string]string) []models.Statistic {
return []models.Statistic{{
Name: "cq",
Tags: tags,
Values: map[string]interface{}{
statQueryOK: atomic.LoadInt64(&s.stats.QueryOK),
statQueryFail: atomic.LoadInt64(&s.stats.QueryFail),
},
}}
}
// Run runs the specified continuous query, or all CQs if none is specified.
func (s *Service) Run(database, name string, t time.Time) error {
var dbs []meta.DatabaseInfo
if database != "" {
// Find the requested database.
db := s.MetaClient.Database(database)
if db == nil {
return influxql.ErrDatabaseNotFound(database)
}
dbs = append(dbs, *db)
} else {
// Get all databases.
dbs = s.MetaClient.Databases()
}
// Loop through databases.
s.mu.Lock()
defer s.mu.Unlock()
for _, db := range dbs {
// Loop through CQs in each DB executing the ones that match name.
for _, cq := range db.ContinuousQueries {
if name == "" || cq.Name == name {
// Remove the last run time for the CQ
id := fmt.Sprintf("%s%s%s", db.Name, idDelimiter, cq.Name)
if _, ok := s.lastRuns[id]; ok {
delete(s.lastRuns, id)
}
}
}
}
// Signal the background routine to run CQs.
s.RunCh <- &RunRequest{Now: t}
return nil
}
// backgroundLoop runs on a go routine and periodically executes CQs.
func (s *Service) backgroundLoop() {
leaseName := "continuous_querier"
t := time.NewTimer(s.RunInterval)
defer t.Stop()
defer s.wg.Done()
for {
select {
case <-s.stop:
s.Logger.Info("continuous query service terminating")
return
case req := <-s.RunCh:
if !s.hasContinuousQueries() {
continue
}
if _, err := s.MetaClient.AcquireLease(leaseName); err == nil {
s.Logger.Info(fmt.Sprintf("running continuous queries by request for time: %v", req.Now))
s.runContinuousQueries(req)
}
case <-t.C:
if !s.hasContinuousQueries() {
t.Reset(s.RunInterval)
continue
}
if _, err := s.MetaClient.AcquireLease(leaseName); err == nil {
s.runContinuousQueries(&RunRequest{Now: time.Now()})
}
t.Reset(s.RunInterval)
}
}
}
// hasContinuousQueries returns true if any CQs exist.
func (s *Service) hasContinuousQueries() bool {
// Get list of all databases.
dbs := s.MetaClient.Databases()
// Loop through all databases executing CQs.
for _, db := range dbs {
if len(db.ContinuousQueries) > 0 {
return true
}
}
return false
}
// runContinuousQueries gets CQs from the meta store and runs them.
func (s *Service) runContinuousQueries(req *RunRequest) {
// Get list of all databases.
dbs := s.MetaClient.Databases()
// Loop through all databases executing CQs.
for _, db := range dbs {
// TODO: distribute across nodes
for _, cq := range db.ContinuousQueries {
if !req.matches(&cq) {
continue
}
if ok, err := s.ExecuteContinuousQuery(&db, &cq, req.Now); err != nil {
s.Logger.Info(fmt.Sprintf("error executing query: %s: err = %s", cq.Query, err))
atomic.AddInt64(&s.stats.QueryFail, 1)
} else if ok {
atomic.AddInt64(&s.stats.QueryOK, 1)
}
}
}
}
// ExecuteContinuousQuery may execute a single CQ. This will return false if there were no errors and the CQ was not run.
func (s *Service) ExecuteContinuousQuery(dbi *meta.DatabaseInfo, cqi *meta.ContinuousQueryInfo, now time.Time) (bool, error) {
// TODO: re-enable stats
//s.stats.Inc("continuousQueryExecuted")
// Local wrapper / helper.
cq, err := NewContinuousQuery(dbi.Name, cqi)
if err != nil {
return false, err
}
// Set the time zone on the now time if the CQ has one. Otherwise, force UTC.
now = now.UTC()
if cq.q.Location != nil {
now = now.In(cq.q.Location)
}
// Get the last time this CQ was run from the service's cache.
s.mu.Lock()
defer s.mu.Unlock()
id := fmt.Sprintf("%s%s%s", dbi.Name, idDelimiter, cqi.Name)
cq.LastRun, cq.HasRun = s.lastRuns[id]
// Set the retention policy to default if it wasn't specified in the query.
if cq.intoRP() == "" {
cq.setIntoRP(dbi.DefaultRetentionPolicy)
}
// Get the group by interval.
interval, err := cq.q.GroupByInterval()
if err != nil {
return false, err
} else if interval == 0 {
return false, nil
}
// Get the group by offset.
offset, err := cq.q.GroupByOffset()
if err != nil {
return false, err
}
// See if this query needs to be run.
run, nextRun, err := cq.shouldRunContinuousQuery(now, interval)
if err != nil {
return false, err
} else if !run {
return false, nil
}
resampleEvery := interval
if cq.Resample.Every != 0 {
resampleEvery = cq.Resample.Every
}
// We're about to run the query so store the current time closest to the nearest interval.
// If all is going well, this time should be the same as nextRun.
cq.LastRun = truncate(now.Add(-offset), resampleEvery).Add(offset)
s.lastRuns[id] = cq.LastRun
// Retrieve the oldest interval we should calculate based on the next time
// interval. We do this instead of using the current time just in case any
// time intervals were missed. The start time of the oldest interval is what
// we use as the start time.
resampleFor := interval
if cq.Resample.For != 0 {
resampleFor = cq.Resample.For
} else if interval < resampleEvery {
resampleFor = resampleEvery
}
// If the resample interval is greater than the interval of the query, use the
// query interval instead.
if interval < resampleEvery {
resampleEvery = interval
}
// Calculate and set the time range for the query.
startTime := truncate(nextRun.Add(interval-resampleFor-offset-1), interval).Add(offset)
endTime := truncate(now.Add(interval-resampleEvery-offset), interval).Add(offset)
if !endTime.After(startTime) {
// Exit early since there is no time interval.
return false, nil
}
if err := cq.q.SetTimeRange(startTime, endTime); err != nil {
s.Logger.Info(fmt.Sprintf("error setting time range: %s\n", err))
return false, err
}
var start time.Time
if s.loggingEnabled {
s.Logger.Info(fmt.Sprintf("executing continuous query %s (%v to %v)", cq.Info.Name, startTime, endTime))
start = time.Now()
}
// Do the actual processing of the query & writing of results.
if err := s.runContinuousQueryAndWriteResult(cq); err != nil {
s.Logger.Info(fmt.Sprintf("error: %s. running: %s\n", err, cq.q.String()))
return false, err
}
if s.loggingEnabled {
s.Logger.Info(fmt.Sprintf("finished continuous query %s (%v to %v) in %s", cq.Info.Name, startTime, endTime, time.Since(start)))
}
return true, nil
}
// runContinuousQueryAndWriteResult will run the query against the cluster and write the results back in
func (s *Service) runContinuousQueryAndWriteResult(cq *ContinuousQuery) error {
// Wrap the CQ's inner SELECT statement in a Query for the QueryExecutor.
q := &influxql.Query{
Statements: influxql.Statements([]influxql.Statement{cq.q}),
}
closing := make(chan struct{})
defer close(closing)
// Execute the SELECT.
ch := s.QueryExecutor.ExecuteQuery(q, influxql.ExecutionOptions{
Database: cq.Database,
}, closing)
// There is only one statement, so we will only ever receive one result
res, ok := <-ch
if !ok {
panic("result channel was closed")
}
if res.Err != nil {
return res.Err
}
return nil
}
// ContinuousQuery is a local wrapper / helper around continuous queries.
type ContinuousQuery struct {
Database string
Info *meta.ContinuousQueryInfo
HasRun bool
LastRun time.Time
Resample ResampleOptions
q *influxql.SelectStatement
}
func (cq *ContinuousQuery) intoRP() string { return cq.q.Target.Measurement.RetentionPolicy }
func (cq *ContinuousQuery) setIntoRP(rp string) { cq.q.Target.Measurement.RetentionPolicy = rp }
// ResampleOptions controls the resampling intervals and duration of this continuous query.
type ResampleOptions struct {
// The query will be resampled at this time interval. The first query will be
// performed at this time interval. If this option is not given, the resample
// interval is set to the group by interval.
Every time.Duration
// The query will continue being resampled for this time duration. If this
// option is not given, the resample duration is the same as the group by
// interval. A bucket's time is calculated based on the bucket's start time,
// so a 40m resample duration with a group by interval of 10m will resample
// the bucket 4 times (using the default time interval).
For time.Duration
}
// NewContinuousQuery returns a ContinuousQuery object with a parsed influxql.CreateContinuousQueryStatement.
func NewContinuousQuery(database string, cqi *meta.ContinuousQueryInfo) (*ContinuousQuery, error) {
stmt, err := influxql.NewParser(strings.NewReader(cqi.Query)).ParseStatement()
if err != nil {
return nil, err
}
q, ok := stmt.(*influxql.CreateContinuousQueryStatement)
if !ok || q.Source.Target == nil || q.Source.Target.Measurement == nil {
return nil, errors.New("query isn't a valid continuous query")
}
cquery := &ContinuousQuery{
Database: database,
Info: cqi,
Resample: ResampleOptions{
Every: q.ResampleEvery,
For: q.ResampleFor,
},
q: q.Source,
}
return cquery, nil
}
// shouldRunContinuousQuery returns true if the CQ should be schedule to run. It will use the
// lastRunTime of the CQ and the rules for when to run set through the query to determine
// if this CQ should be run.
func (cq *ContinuousQuery) shouldRunContinuousQuery(now time.Time, interval time.Duration) (bool, time.Time, error) {
// If it's not aggregated, do not run the query.
if cq.q.IsRawQuery {
return false, cq.LastRun, errors.New("continuous queries must be aggregate queries")
}
// Override the query's default run interval with the resample options.
resampleEvery := interval
if cq.Resample.Every != 0 {
resampleEvery = cq.Resample.Every
}
// Determine if we should run the continuous query based on the last time it ran.
// If the query never ran, execute it using the current time.
if cq.HasRun {
// Retrieve the zone offset for the previous window.
_, startOffset := cq.LastRun.Add(-1).Zone()
nextRun := cq.LastRun.Add(resampleEvery)
// Retrieve the end zone offset for the end of the current interval.
if _, endOffset := nextRun.Add(-1).Zone(); startOffset != endOffset {
diff := int64(startOffset-endOffset) * int64(time.Second)
if abs(diff) < int64(resampleEvery) {
nextRun = nextRun.Add(time.Duration(diff))
}
}
if nextRun.UnixNano() <= now.UnixNano() {
return true, nextRun, nil
}
} else {
// Retrieve the location from the CQ.
loc := cq.q.Location
if loc == nil {
loc = time.UTC
}
return true, now.In(loc), nil
}
return false, cq.LastRun, nil
}
// assert will panic with a given formatted message if the given condition is false.
func assert(condition bool, msg string, v ...interface{}) {
if !condition {
panic(fmt.Sprintf("assert failed: "+msg, v...))
}
}
// truncate truncates the time based on the unix timestamp instead of the
// Go time library. The Go time library has the start of the week on Monday
// while the start of the week for the unix timestamp is a Thursday.
func truncate(ts time.Time, d time.Duration) time.Time {
t := ts.UnixNano()
offset := zone(ts)
dt := (t + offset) % int64(d)
if dt < 0 {
// Negative modulo rounds up instead of down, so offset
// with the duration.
dt += int64(d)
}
ts = time.Unix(0, t-dt).In(ts.Location())
if adjustedOffset := zone(ts); adjustedOffset != offset {
diff := offset - adjustedOffset
if abs(diff) < int64(d) {
ts = ts.Add(time.Duration(diff))
}
}
return ts
}
func zone(ts time.Time) int64 {
_, offset := ts.Zone()
return int64(offset) * int64(time.Second)
}
func abs(v int64) int64 {
if v < 0 {
return -v
}
return v
}

View File

@@ -0,0 +1,768 @@
package continuous_querier
import (
"errors"
"fmt"
"os"
"sync"
"testing"
"time"
"github.com/influxdata/influxdb/influxql"
"github.com/influxdata/influxdb/services/meta"
"github.com/uber-go/zap"
)
var (
errExpected = errors.New("expected error")
errUnexpected = errors.New("unexpected error")
)
// Test closing never opened, open, open already open, close, and close already closed.
func TestOpenAndClose(t *testing.T) {
s := NewTestService(t)
if err := s.Close(); err != nil {
t.Error(err)
} else if err = s.Open(); err != nil {
t.Error(err)
} else if err = s.Open(); err != nil {
t.Error(err)
} else if err = s.Close(); err != nil {
t.Error(err)
} else if err = s.Close(); err != nil {
t.Error(err)
}
}
// Test Run method.
func TestContinuousQueryService_Run(t *testing.T) {
s := NewTestService(t)
// Set RunInterval high so we can trigger using Run method.
s.RunInterval = 10 * time.Minute
done := make(chan struct{})
expectCallCnt := 3
callCnt := 0
// Set a callback for ExecuteStatement.
s.QueryExecutor.StatementExecutor = &StatementExecutor{
ExecuteStatementFn: func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
callCnt++
if callCnt >= expectCallCnt {
done <- struct{}{}
}
ctx.Results <- &influxql.Result{}
return nil
},
}
// Use a custom "now" time since the internals of last run care about
// what the actual time is. Truncate to 10 minutes we are starting on an interval.
now := time.Now().Truncate(10 * time.Minute)
s.Open()
// Trigger service to run all CQs.
s.Run("", "", now)
// Shouldn't time out.
if err := wait(done, 100*time.Millisecond); err != nil {
t.Error(err)
}
// This time it should timeout because ExecuteQuery should not get called again.
if err := wait(done, 100*time.Millisecond); err == nil {
t.Error("too many queries executed")
}
s.Close()
// Now test just one query.
expectCallCnt = 1
callCnt = 0
s.Open()
s.Run("db", "cq", now)
// Shouldn't time out.
if err := wait(done, 100*time.Millisecond); err != nil {
t.Error(err)
}
// This time it should timeout because ExecuteQuery should not get called again.
if err := wait(done, 100*time.Millisecond); err == nil {
t.Error("too many queries executed")
}
s.Close()
}
func TestContinuousQueryService_ResampleOptions(t *testing.T) {
s := NewTestService(t)
mc := NewMetaClient(t)
mc.CreateDatabase("db", "")
mc.CreateContinuousQuery("db", "cq", `CREATE CONTINUOUS QUERY cq ON db RESAMPLE EVERY 10s FOR 2m BEGIN SELECT mean(value) INTO cpu_mean FROM cpu GROUP BY time(1m) END`)
s.MetaClient = mc
db := s.MetaClient.Database("db")
cq, err := NewContinuousQuery(db.Name, &db.ContinuousQueries[0])
if err != nil {
t.Fatal(err)
} else if cq.Resample.Every != 10*time.Second {
t.Errorf("expected resample every to be 10s, got %s", influxql.FormatDuration(cq.Resample.Every))
} else if cq.Resample.For != 2*time.Minute {
t.Errorf("expected resample for 2m, got %s", influxql.FormatDuration(cq.Resample.For))
}
// Set RunInterval high so we can trigger using Run method.
s.RunInterval = 10 * time.Minute
done := make(chan struct{})
var expected struct {
min time.Time
max time.Time
}
// Set a callback for ExecuteStatement.
s.QueryExecutor.StatementExecutor = &StatementExecutor{
ExecuteStatementFn: func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
s := stmt.(*influxql.SelectStatement)
min, max, err := influxql.TimeRange(s.Condition, s.Location)
if err != nil {
t.Errorf("unexpected error parsing time range: %s", err)
} else if !expected.min.Equal(min) || !expected.max.Equal(max) {
t.Errorf("mismatched time range: got=(%s, %s) exp=(%s, %s)", min, max, expected.min, expected.max)
}
done <- struct{}{}
ctx.Results <- &influxql.Result{}
return nil
},
}
s.Open()
defer s.Close()
// Set the 'now' time to the start of a 10 minute interval. Then trigger a run.
// This should trigger two queries (one for the current time interval, one for the previous).
now := time.Now().UTC().Truncate(10 * time.Minute)
expected.min = now.Add(-2 * time.Minute)
expected.max = now.Add(-1)
s.RunCh <- &RunRequest{Now: now}
if err := wait(done, 100*time.Millisecond); err != nil {
t.Fatal(err)
}
// Trigger another run 10 seconds later. Another two queries should happen,
// but it will be a different two queries.
expected.min = expected.min.Add(time.Minute)
expected.max = expected.max.Add(time.Minute)
s.RunCh <- &RunRequest{Now: now.Add(10 * time.Second)}
if err := wait(done, 100*time.Millisecond); err != nil {
t.Fatal(err)
}
// Reset the time period and send the initial request at 5 seconds after the
// 10 minute mark. There should be exactly one call since the current interval is too
// young and only one interval matches the FOR duration.
expected.min = now.Add(-time.Minute)
expected.max = now.Add(-1)
s.Run("", "", now.Add(5*time.Second))
if err := wait(done, 100*time.Millisecond); err != nil {
t.Fatal(err)
}
// Send a message 10 minutes later and ensure that the system plays catchup.
expected.max = now.Add(10*time.Minute - 1)
s.RunCh <- &RunRequest{Now: now.Add(10 * time.Minute)}
if err := wait(done, 100*time.Millisecond); err != nil {
t.Fatal(err)
}
// No overflow should be sent.
if err := wait(done, 100*time.Millisecond); err == nil {
t.Error("too many queries executed")
}
}
func TestContinuousQueryService_EveryHigherThanInterval(t *testing.T) {
s := NewTestService(t)
ms := NewMetaClient(t)
ms.CreateDatabase("db", "")
ms.CreateContinuousQuery("db", "cq", `CREATE CONTINUOUS QUERY cq ON db RESAMPLE EVERY 1m BEGIN SELECT mean(value) INTO cpu_mean FROM cpu GROUP BY time(30s) END`)
s.MetaClient = ms
// Set RunInterval high so we can trigger using Run method.
s.RunInterval = 10 * time.Minute
done := make(chan struct{})
var expected struct {
min time.Time
max time.Time
}
// Set a callback for ExecuteQuery.
s.QueryExecutor.StatementExecutor = &StatementExecutor{
ExecuteStatementFn: func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
s := stmt.(*influxql.SelectStatement)
min, max, err := influxql.TimeRange(s.Condition, s.Location)
if err != nil {
t.Errorf("unexpected error parsing time range: %s", err)
} else if !expected.min.Equal(min) || !expected.max.Equal(max) {
t.Errorf("mismatched time range: got=(%s, %s) exp=(%s, %s)", min, max, expected.min, expected.max)
}
done <- struct{}{}
ctx.Results <- &influxql.Result{}
return nil
},
}
s.Open()
defer s.Close()
// Set the 'now' time to the start of a 10 minute interval. Then trigger a run.
// This should trigger two queries (one for the current time interval, one for the previous)
// since the default FOR interval should be EVERY, not the GROUP BY interval.
now := time.Now().Truncate(10 * time.Minute)
expected.min = now.Add(-time.Minute)
expected.max = now.Add(-1)
s.RunCh <- &RunRequest{Now: now}
if err := wait(done, 100*time.Millisecond); err != nil {
t.Fatal(err)
}
// Trigger 30 seconds later. Nothing should run.
s.RunCh <- &RunRequest{Now: now.Add(30 * time.Second)}
if err := wait(done, 100*time.Millisecond); err == nil {
t.Fatal("too many queries")
}
// Run again 1 minute later. Another two queries should run.
expected.min = now
expected.max = now.Add(time.Minute - 1)
s.RunCh <- &RunRequest{Now: now.Add(time.Minute)}
if err := wait(done, 100*time.Millisecond); err != nil {
t.Fatal(err)
}
// No overflow should be sent.
if err := wait(done, 100*time.Millisecond); err == nil {
t.Error("too many queries executed")
}
}
func TestContinuousQueryService_GroupByOffset(t *testing.T) {
s := NewTestService(t)
mc := NewMetaClient(t)
mc.CreateDatabase("db", "")
mc.CreateContinuousQuery("db", "cq", `CREATE CONTINUOUS QUERY cq ON db BEGIN SELECT mean(value) INTO cpu_mean FROM cpu GROUP BY time(1m, 30s) END`)
s.MetaClient = mc
// Set RunInterval high so we can trigger using Run method.
s.RunInterval = 10 * time.Minute
done := make(chan struct{})
var expected struct {
min time.Time
max time.Time
}
// Set a callback for ExecuteStatement.
s.QueryExecutor.StatementExecutor = &StatementExecutor{
ExecuteStatementFn: func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
s := stmt.(*influxql.SelectStatement)
min, max, err := influxql.TimeRange(s.Condition, s.Location)
if err != nil {
t.Errorf("unexpected error parsing time range: %s", err)
} else if !expected.min.Equal(min) || !expected.max.Equal(max) {
t.Errorf("mismatched time range: got=(%s, %s) exp=(%s, %s)", min, max, expected.min, expected.max)
}
done <- struct{}{}
ctx.Results <- &influxql.Result{}
return nil
},
}
s.Open()
defer s.Close()
// Set the 'now' time to the start of a 10 minute interval with a 30 second offset.
// Then trigger a run. This should trigger two queries (one for the current time
// interval, one for the previous).
now := time.Now().UTC().Truncate(10 * time.Minute).Add(30 * time.Second)
expected.min = now.Add(-time.Minute)
expected.max = now.Add(-1)
s.RunCh <- &RunRequest{Now: now}
if err := wait(done, 100*time.Millisecond); err != nil {
t.Fatal(err)
}
}
// Test service when not the cluster leader (CQs shouldn't run).
func TestContinuousQueryService_NotLeader(t *testing.T) {
s := NewTestService(t)
// Set RunInterval high so we can test triggering with the RunCh below.
s.RunInterval = 10 * time.Second
s.MetaClient.(*MetaClient).Leader = false
done := make(chan struct{})
// Set a callback for ExecuteStatement. Shouldn't get called because we're not the leader.
s.QueryExecutor.StatementExecutor = &StatementExecutor{
ExecuteStatementFn: func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
done <- struct{}{}
ctx.Results <- &influxql.Result{Err: errUnexpected}
return nil
},
}
s.Open()
// Trigger service to run CQs.
s.RunCh <- &RunRequest{Now: time.Now()}
// Expect timeout error because ExecuteQuery callback wasn't called.
if err := wait(done, 100*time.Millisecond); err == nil {
t.Error(err)
}
s.Close()
}
// Test ExecuteContinuousQuery with invalid queries.
func TestExecuteContinuousQuery_InvalidQueries(t *testing.T) {
s := NewTestService(t)
s.QueryExecutor.StatementExecutor = &StatementExecutor{
ExecuteStatementFn: func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
return errUnexpected
},
}
dbis := s.MetaClient.Databases()
dbi := dbis[0]
cqi := dbi.ContinuousQueries[0]
cqi.Query = `this is not a query`
if _, err := s.ExecuteContinuousQuery(&dbi, &cqi, time.Now()); err == nil {
t.Error("expected error but got nil")
}
// Valid query but invalid continuous query.
cqi.Query = `SELECT * FROM cpu`
if _, err := s.ExecuteContinuousQuery(&dbi, &cqi, time.Now()); err == nil {
t.Error("expected error but got nil")
}
// Group by requires aggregate.
cqi.Query = `SELECT value INTO other_value FROM cpu WHERE time > now() - 1h GROUP BY time(1s)`
if _, err := s.ExecuteContinuousQuery(&dbi, &cqi, time.Now()); err == nil {
t.Error("expected error but got nil")
}
}
// Test the time range for different CQ durations.
func TestExecuteContinuousQuery_TimeRange(t *testing.T) {
// Choose a start date that is not on an interval border for anyone.
now := mustParseTime(t, "2000-01-01T00:00:00Z")
for _, tt := range []struct {
d string
start, end time.Time
}{
{
d: "10s",
start: mustParseTime(t, "2000-01-01T00:00:00Z"),
end: mustParseTime(t, "2000-01-01T00:00:10Z"),
},
{
d: "1m",
start: mustParseTime(t, "2000-01-01T00:00:00Z"),
end: mustParseTime(t, "2000-01-01T00:01:00Z"),
},
{
d: "10m",
start: mustParseTime(t, "2000-01-01T00:00:00Z"),
end: mustParseTime(t, "2000-01-01T00:10:00Z"),
},
{
d: "30m",
start: mustParseTime(t, "2000-01-01T00:00:00Z"),
end: mustParseTime(t, "2000-01-01T00:30:00Z"),
},
{
d: "1h",
start: mustParseTime(t, "2000-01-01T00:00:00Z"),
end: mustParseTime(t, "2000-01-01T01:00:00Z"),
},
{
d: "2h",
start: mustParseTime(t, "2000-01-01T00:00:00Z"),
end: mustParseTime(t, "2000-01-01T02:00:00Z"),
},
{
d: "12h",
start: mustParseTime(t, "2000-01-01T00:00:00Z"),
end: mustParseTime(t, "2000-01-01T12:00:00Z"),
},
{
d: "1d",
start: mustParseTime(t, "2000-01-01T00:00:00Z"),
end: mustParseTime(t, "2000-01-02T00:00:00Z"),
},
{
d: "1w",
start: mustParseTime(t, "1999-12-30T00:00:00Z"),
end: mustParseTime(t, "2000-01-06T00:00:00Z"),
},
} {
t.Run(tt.d, func(t *testing.T) {
d, err := influxql.ParseDuration(tt.d)
if err != nil {
t.Fatalf("unable to parse duration: %s", err)
}
s := NewTestService(t)
mc := NewMetaClient(t)
mc.CreateDatabase("db", "")
mc.CreateContinuousQuery("db", "cq",
fmt.Sprintf(`CREATE CONTINUOUS QUERY cq ON db BEGIN SELECT mean(value) INTO cpu_mean FROM cpu GROUP BY time(%s) END`, tt.d))
s.MetaClient = mc
// Set RunInterval high so we can trigger using Run method.
s.RunInterval = 10 * time.Minute
done := make(chan struct{})
// Set a callback for ExecuteStatement.
s.QueryExecutor.StatementExecutor = &StatementExecutor{
ExecuteStatementFn: func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
s := stmt.(*influxql.SelectStatement)
min, max, err := influxql.TimeRange(s.Condition, s.Location)
max = max.Add(time.Nanosecond)
if err != nil {
t.Errorf("unexpected error parsing time range: %s", err)
} else if !tt.start.Equal(min) || !tt.end.Equal(max) {
t.Errorf("mismatched time range: got=(%s, %s) exp=(%s, %s)", min, max, tt.start, tt.end)
}
done <- struct{}{}
ctx.Results <- &influxql.Result{}
return nil
},
}
s.Open()
defer s.Close()
// Send an initial run request one nanosecond after the start to
// prime the last CQ map.
s.RunCh <- &RunRequest{Now: now.Add(time.Nanosecond)}
// Execute the real request after the time interval.
s.RunCh <- &RunRequest{Now: now.Add(d)}
if err := wait(done, 100*time.Millisecond); err != nil {
t.Fatal(err)
}
})
}
}
// Test the time range for different CQ durations.
func TestExecuteContinuousQuery_TimeZone(t *testing.T) {
type test struct {
now time.Time
start, end time.Time
}
// Choose a start date that is not on an interval border for anyone.
for _, tt := range []struct {
name string
d string
options string
initial time.Time
tests []test
}{
{
name: "DaylightSavingsStart/1d",
d: "1d",
initial: mustParseTime(t, "2000-04-02T00:00:00-05:00"),
tests: []test{
{
start: mustParseTime(t, "2000-04-02T00:00:00-05:00"),
end: mustParseTime(t, "2000-04-03T00:00:00-04:00"),
},
},
},
{
name: "DaylightSavingsStart/2h",
d: "2h",
initial: mustParseTime(t, "2000-04-02T00:00:00-05:00"),
tests: []test{
{
start: mustParseTime(t, "2000-04-02T00:00:00-05:00"),
end: mustParseTime(t, "2000-04-02T03:00:00-04:00"),
},
{
start: mustParseTime(t, "2000-04-02T03:00:00-04:00"),
end: mustParseTime(t, "2000-04-02T04:00:00-04:00"),
},
},
},
{
name: "DaylightSavingsEnd/1d",
d: "1d",
initial: mustParseTime(t, "2000-10-29T00:00:00-04:00"),
tests: []test{
{
start: mustParseTime(t, "2000-10-29T00:00:00-04:00"),
end: mustParseTime(t, "2000-10-30T00:00:00-05:00"),
},
},
},
{
name: "DaylightSavingsEnd/2h",
d: "2h",
initial: mustParseTime(t, "2000-10-29T00:00:00-04:00"),
tests: []test{
{
start: mustParseTime(t, "2000-10-29T00:00:00-04:00"),
end: mustParseTime(t, "2000-10-29T02:00:00-05:00"),
},
{
start: mustParseTime(t, "2000-10-29T02:00:00-05:00"),
end: mustParseTime(t, "2000-10-29T04:00:00-05:00"),
},
},
},
} {
t.Run(tt.name, func(t *testing.T) {
s := NewTestService(t)
mc := NewMetaClient(t)
mc.CreateDatabase("db", "")
mc.CreateContinuousQuery("db", "cq",
fmt.Sprintf(`CREATE CONTINUOUS QUERY cq ON db %s BEGIN SELECT mean(value) INTO cpu_mean FROM cpu GROUP BY time(%s) TZ('America/New_York') END`, tt.options, tt.d))
s.MetaClient = mc
// Set RunInterval high so we can trigger using Run method.
s.RunInterval = 10 * time.Minute
done := make(chan struct{})
// Set a callback for ExecuteStatement.
tests := make(chan test, 1)
s.QueryExecutor.StatementExecutor = &StatementExecutor{
ExecuteStatementFn: func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
test := <-tests
s := stmt.(*influxql.SelectStatement)
min, max, err := influxql.TimeRange(s.Condition, s.Location)
max = max.Add(time.Nanosecond)
if err != nil {
t.Errorf("unexpected error parsing time range: %s", err)
} else if !test.start.Equal(min) || !test.end.Equal(max) {
t.Errorf("mismatched time range: got=(%s, %s) exp=(%s, %s)", min, max, test.start, test.end)
}
done <- struct{}{}
ctx.Results <- &influxql.Result{}
return nil
},
}
s.Open()
defer s.Close()
// Send an initial run request one nanosecond after the start to
// prime the last CQ map.
s.RunCh <- &RunRequest{Now: tt.initial.Add(time.Nanosecond)}
// Execute each of the tests and ensure the times are correct.
for i, test := range tt.tests {
tests <- test
now := test.now
if now.IsZero() {
now = test.end
}
s.RunCh <- &RunRequest{Now: now}
if err := wait(done, 100*time.Millisecond); err != nil {
t.Fatal(fmt.Errorf("%d. %s", i+1, err))
}
}
})
}
}
// Test ExecuteContinuousQuery when QueryExecutor returns an error.
func TestExecuteContinuousQuery_QueryExecutor_Error(t *testing.T) {
s := NewTestService(t)
s.QueryExecutor.StatementExecutor = &StatementExecutor{
ExecuteStatementFn: func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
return errExpected
},
}
dbis := s.MetaClient.Databases()
dbi := dbis[0]
cqi := dbi.ContinuousQueries[0]
now := time.Now().Truncate(10 * time.Minute)
if _, err := s.ExecuteContinuousQuery(&dbi, &cqi, now); err != errExpected {
t.Errorf("exp = %s, got = %v", errExpected, err)
}
}
// NewTestService returns a new *Service with default mock object members.
func NewTestService(t *testing.T) *Service {
s := NewService(NewConfig())
ms := NewMetaClient(t)
s.MetaClient = ms
s.QueryExecutor = influxql.NewQueryExecutor()
s.RunInterval = time.Millisecond
// Set Logger to write to dev/null so stdout isn't polluted.
if testing.Verbose() {
s.WithLogger(zap.New(
zap.NewTextEncoder(),
zap.Output(os.Stderr),
))
}
// Add a couple test databases and CQs.
ms.CreateDatabase("db", "rp")
ms.CreateContinuousQuery("db", "cq", `CREATE CONTINUOUS QUERY cq ON db BEGIN SELECT count(cpu) INTO cpu_count FROM cpu WHERE time > now() - 1h GROUP BY time(1s) END`)
ms.CreateDatabase("db2", "default")
ms.CreateContinuousQuery("db2", "cq2", `CREATE CONTINUOUS QUERY cq2 ON db2 BEGIN SELECT mean(value) INTO cpu_mean FROM cpu WHERE time > now() - 10m GROUP BY time(1m) END`)
ms.CreateDatabase("db3", "default")
ms.CreateContinuousQuery("db3", "cq3", `CREATE CONTINUOUS QUERY cq3 ON db3 BEGIN SELECT mean(value) INTO "1hAverages".:MEASUREMENT FROM /cpu[0-9]?/ GROUP BY time(10s) END`)
return s
}
// MetaClient is a mock meta store.
type MetaClient struct {
mu sync.RWMutex
Leader bool
AllowLease bool
DatabaseInfos []meta.DatabaseInfo
Err error
t *testing.T
nodeID uint64
}
// NewMetaClient returns a *MetaClient.
func NewMetaClient(t *testing.T) *MetaClient {
return &MetaClient{
Leader: true,
AllowLease: true,
t: t,
nodeID: 1,
}
}
// NodeID returns the client's node ID.
func (ms *MetaClient) NodeID() uint64 { return ms.nodeID }
// AcquireLease attempts to acquire the specified lease.
func (ms *MetaClient) AcquireLease(name string) (l *meta.Lease, err error) {
if ms.Leader {
if ms.AllowLease {
return &meta.Lease{Name: name}, nil
}
return nil, errors.New("another node owns the lease")
}
return nil, meta.ErrServiceUnavailable
}
// Databases returns a list of database info about each database in the coordinator.
func (ms *MetaClient) Databases() []meta.DatabaseInfo {
ms.mu.RLock()
defer ms.mu.RUnlock()
return ms.DatabaseInfos
}
// Database returns a single database by name.
func (ms *MetaClient) Database(name string) *meta.DatabaseInfo {
ms.mu.RLock()
defer ms.mu.RUnlock()
return ms.database(name)
}
func (ms *MetaClient) database(name string) *meta.DatabaseInfo {
if ms.Err != nil {
return nil
}
for i := range ms.DatabaseInfos {
if ms.DatabaseInfos[i].Name == name {
return &ms.DatabaseInfos[i]
}
}
return nil
}
// CreateDatabase adds a new database to the meta store.
func (ms *MetaClient) CreateDatabase(name, defaultRetentionPolicy string) error {
ms.mu.Lock()
defer ms.mu.Unlock()
if ms.Err != nil {
return ms.Err
}
// See if the database already exists.
for _, dbi := range ms.DatabaseInfos {
if dbi.Name == name {
return fmt.Errorf("database already exists: %s", name)
}
}
// Create database.
ms.DatabaseInfos = append(ms.DatabaseInfos, meta.DatabaseInfo{
Name: name,
DefaultRetentionPolicy: defaultRetentionPolicy,
})
return nil
}
// CreateContinuousQuery adds a CQ to the meta store.
func (ms *MetaClient) CreateContinuousQuery(database, name, query string) error {
ms.mu.Lock()
defer ms.mu.Unlock()
if ms.Err != nil {
return ms.Err
}
dbi := ms.database(database)
if dbi == nil {
return fmt.Errorf("database not found: %s", database)
}
// See if CQ already exists.
for _, cqi := range dbi.ContinuousQueries {
if cqi.Name == name {
return fmt.Errorf("continuous query already exists: %s", name)
}
}
// Create a new CQ and store it.
dbi.ContinuousQueries = append(dbi.ContinuousQueries, meta.ContinuousQueryInfo{
Name: name,
Query: query,
})
return nil
}
// StatementExecutor is a mock statement executor.
type StatementExecutor struct {
ExecuteStatementFn func(stmt influxql.Statement, ctx influxql.ExecutionContext) error
}
func (e *StatementExecutor) ExecuteStatement(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
return e.ExecuteStatementFn(stmt, ctx)
}
func wait(c chan struct{}, d time.Duration) (err error) {
select {
case <-c:
case <-time.After(d):
err = errors.New("timed out")
}
return
}
func mustParseTime(t *testing.T, value string) time.Time {
ts, err := time.Parse(time.RFC3339, value)
if err != nil {
t.Fatalf("unable to parse time: %s", err)
}
return ts
}

View File

@@ -0,0 +1,192 @@
# The Graphite Input
## A Note On UDP/IP OS Buffer Sizes
If you're using UDP input and running Linux or FreeBSD, please adjust your UDP buffer
size limit, [see here for more details.](../udp/README.md#a-note-on-udpip-os-buffer-sizes)
## Configuration
Each Graphite input allows the binding address, target database, and protocol to be set. If the database does not exist, it will be created automatically when the input is initialized. The write-consistency-level can also be set. If any write operations do not meet the configured consistency guarantees, an error will occur and the data will not be indexed. The default consistency-level is `ONE`.
Each Graphite input also performs internal batching of the points it receives, as batched writes to the database are more efficient. The default _batch size_ is 1000, _pending batch_ factor is 5, with a _batch timeout_ of 1 second. This means the input will write batches of maximum size 1000, but if a batch has not reached 1000 points within 1 second of the first point being added to a batch, it will emit that batch regardless of size. The pending batch factor controls how many batches can be in memory at once, allowing the input to transmit a batch, while still building other batches.
## Parsing Metrics
The Graphite plugin allows measurements to be saved using the Graphite line protocol. By default, enabling the Graphite plugin will allow you to collect metrics and store them using the metric name as the measurement. If you send a metric named `servers.localhost.cpu.loadavg.10`, it will store the full metric name as the measurement with no extracted tags.
While this default setup works, it is not the ideal way to store measurements in InfluxDB since it does not take advantage of tags. It also will not perform optimally with large dataset sizes since queries will be forced to use regexes which is known to not scale well.
To extract tags from metrics, one or more templates must be configured to parse metrics into tags and measurements.
## Templates
Templates allow matching parts of a metric name to be used as tag keys in the stored metric. They have a similar format to Graphite metric names. The values in between the separators are used as the tag keys. The location of the tag key that matches the same position as the Graphite metric section is used as the value. If there is no value, the Graphite portion is skipped.
The special value _measurement_ is used to define the measurement name. It can have a trailing `*` to indicate that the remainder of the metric should be used. If a _measurement_ is not specified, the full metric name is used.
### Basic Matching
`servers.localhost.cpu.loadavg.10`
* Template: `.host.resource.measurement*`
* Output: _measurement_ =`loadavg.10` _tags_ =`host=localhost resource=cpu`
### Multiple Measurement & Tags Matching
The _measurement_ can be specified multiple times in a template to provide more control over the measurement name. Tags can also be
matched multiple times. Multiple values will be joined together using the _Separator_ config variable. By default, this value is `.`.
`servers.localhost.localdomain.cpu.cpu0.user`
* Template: `.host.host.measurement.cpu.measurement`
* Output: _measurement_ = `cpu.user` _tags_ = `host=localhost.localdomain cpu=cpu0`
Since `.` requires queries on measurements to be double-quoted, you may want to set this to `_` to simplify querying parsed metrics.
`servers.localhost.cpu.cpu0.user`
* Separator: `_`
* Template: `.host.measurement.cpu.measurement`
* Output: _measurement_ = `cpu_user` _tags_ = `host=localhost cpu=cpu0`
### Adding Tags
Additional tags can be added to a metric if they don't exist on the received metric. You can add additional tags by specifying them after the pattern. Tags have the same format as the line protocol. Multiple tags are separated by commas.
`servers.localhost.cpu.loadavg.10`
* Template: `.host.resource.measurement* region=us-west,zone=1a`
* Output: _measurement_ = `loadavg.10` _tags_ = `host=localhost resource=cpu region=us-west zone=1a`
### Fields
A field key can be specified by using the keyword _field_. By default if no _field_ keyword is specified then the metric will be written to a field named _value_.
The field key can also be derived from the second "half" of the input metric-name by specifying ```field*``` (eg ```measurement.measurement.field*```). This cannot be used in conjunction with "measurement*"!
It's possible to amend measurement metrics with additional fields, e.g:
Input:
```
sensu.metric.net.server0.eth0.rx_packets 461295119435 1444234982
sensu.metric.net.server0.eth0.tx_bytes 1093086493388480 1444234982
sensu.metric.net.server0.eth0.rx_bytes 1015633926034834 1444234982
sensu.metric.net.server0.eth0.tx_errors 0 1444234982
sensu.metric.net.server0.eth0.rx_errors 0 1444234982
sensu.metric.net.server0.eth0.tx_dropped 0 1444234982
sensu.metric.net.server0.eth0.rx_dropped 0 1444234982
```
With template:
```
sensu.metric.* ..measurement.host.interface.field
```
Becomes database entry:
```
> select * from net
name: net
---------
time host interface rx_bytes rx_dropped rx_errors rx_packets tx_bytes tx_dropped tx_errors
1444234982000000000 server0 eth0 1.015633926034834e+15 0 0 4.61295119435e+11 1.09308649338848e+15 0 0
```
## Multiple Templates
One template may not match all metrics. For example, using multiple plugins with diamond will produce metrics in different formats. If you need to use multiple templates, you'll need to define a prefix filter that must match before the template can be applied.
### Filters
Filters have a similar format to templates but work more like wildcard expressions. When multiple filters would match a metric, the more specific one is chosen. Filters are configured by adding them before the template.
For example,
```
servers.localhost.cpu.loadavg.10
servers.host123.elasticsearch.cache_hits 100
servers.host456.mysql.tx_count 10
servers.host789.prod.mysql.tx_count 10
```
* `servers.*` would match all values
* `servers.*.mysql` would match `servers.host456.mysql.tx_count 10`
* `servers.localhost.*` would match `servers.localhost.cpu.loadavg`
* `servers.*.*.mysql` would match `servers.host789.prod.mysql.tx_count 10`
## Default Templates
If no template filters are defined or you want to just have one basic template, you can define a default template. This template will apply to any metric that has not already matched a filter.
```
dev.http.requests.200
prod.myapp.errors.count
dev.db.queries.count
```
* `env.app.measurement*` would create
* _measurement_=`requests.200` _tags_=`env=dev,app=http`
* _measurement_= `errors.count` _tags_=`env=prod,app=myapp`
* _measurement_=`queries.count` _tags_=`env=dev,app=db`
## Global Tags
If you need to add the same set of tags to all metrics, you can define them globally at the plugin level and not within each template description.
## Minimal Config
```
[[graphite]]
enabled = true
# bind-address = ":2003"
# protocol = "tcp"
# consistency-level = "one"
### If matching multiple measurement files, this string will be used to join the matched values.
# separator = "."
### Default tags that will be added to all metrics. These can be overridden at the template level
### or by tags extracted from metric
# tags = ["region=us-east", "zone=1c"]
### Each template line requires a template pattern. It can have an optional
### filter before the template and separated by spaces. It can also have optional extra
### tags following the template. Multiple tags should be separated by commas and no spaces
### similar to the line protocol format. The can be only one default template.
# templates = [
# "*.app env.service.resource.measurement",
# # Default template
# "server.*",
#]
```
## Customized Config
```
[[graphite]]
enabled = true
separator = "_"
tags = ["region=us-east", "zone=1c"]
templates = [
# filter + template
"*.app env.service.resource.measurement",
# filter + template + extra tag
"stats.* .host.measurement* region=us-west,agent=sensu",
# filter + template with field key
"stats.* .host.measurement.field",
# default template. Ignore the first Graphite component "servers"
".measurement*",
]
```
## Two Graphite Listeners, UDP & TCP, Config
```
[[graphite]]
enabled = true
bind-address = ":2003"
protocol = "tcp"
# consistency-level = "one"
[[graphite]]
enabled = true
bind-address = ":2004" # the bind address
protocol = "udp" # protocol to read via
udp-read-buffer = 8388608 # (8*1024*1024) UDP read buffer size
```

View File

@@ -0,0 +1,288 @@
package graphite
import (
"fmt"
"strings"
"time"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/monitor/diagnostics"
"github.com/influxdata/influxdb/toml"
)
const (
// DefaultBindAddress is the default binding interface if none is specified.
DefaultBindAddress = ":2003"
// DefaultDatabase is the default database if none is specified.
DefaultDatabase = "graphite"
// DefaultProtocol is the default IP protocol used by the Graphite input.
DefaultProtocol = "tcp"
// DefaultConsistencyLevel is the default write consistency for the Graphite input.
DefaultConsistencyLevel = "one"
// DefaultSeparator is the default join character to use when joining multiple
// measurement parts in a template.
DefaultSeparator = "."
// DefaultBatchSize is the default write batch size.
DefaultBatchSize = 5000
// DefaultBatchPending is the default number of pending write batches.
DefaultBatchPending = 10
// DefaultBatchTimeout is the default Graphite batch timeout.
DefaultBatchTimeout = time.Second
// DefaultUDPReadBuffer is the default buffer size for the UDP listener.
// Sets the size of the operating system's receive buffer associated with
// the UDP traffic. Keep in mind that the OS must be able
// to handle the number set here or the UDP listener will error and exit.
//
// DefaultReadBuffer = 0 means to use the OS default, which is usually too
// small for high UDP performance.
//
// Increasing OS buffer limits:
// Linux: sudo sysctl -w net.core.rmem_max=<read-buffer>
// BSD/Darwin: sudo sysctl -w kern.ipc.maxsockbuf=<read-buffer>
DefaultUDPReadBuffer = 0
)
// Config represents the configuration for Graphite endpoints.
type Config struct {
Enabled bool `toml:"enabled"`
BindAddress string `toml:"bind-address"`
Database string `toml:"database"`
RetentionPolicy string `toml:"retention-policy"`
Protocol string `toml:"protocol"`
BatchSize int `toml:"batch-size"`
BatchPending int `toml:"batch-pending"`
BatchTimeout toml.Duration `toml:"batch-timeout"`
ConsistencyLevel string `toml:"consistency-level"`
Templates []string `toml:"templates"`
Tags []string `toml:"tags"`
Separator string `toml:"separator"`
UDPReadBuffer int `toml:"udp-read-buffer"`
}
// NewConfig returns a new instance of Config with defaults.
func NewConfig() Config {
return Config{
BindAddress: DefaultBindAddress,
Database: DefaultDatabase,
Protocol: DefaultProtocol,
BatchSize: DefaultBatchSize,
BatchPending: DefaultBatchPending,
BatchTimeout: toml.Duration(DefaultBatchTimeout),
ConsistencyLevel: DefaultConsistencyLevel,
Separator: DefaultSeparator,
}
}
// WithDefaults takes the given config and returns a new config with any required
// default values set.
func (c *Config) WithDefaults() *Config {
d := *c
if d.BindAddress == "" {
d.BindAddress = DefaultBindAddress
}
if d.Database == "" {
d.Database = DefaultDatabase
}
if d.Protocol == "" {
d.Protocol = DefaultProtocol
}
if d.BatchSize == 0 {
d.BatchSize = DefaultBatchSize
}
if d.BatchPending == 0 {
d.BatchPending = DefaultBatchPending
}
if d.BatchTimeout == 0 {
d.BatchTimeout = toml.Duration(DefaultBatchTimeout)
}
if d.ConsistencyLevel == "" {
d.ConsistencyLevel = DefaultConsistencyLevel
}
if d.Separator == "" {
d.Separator = DefaultSeparator
}
if d.UDPReadBuffer == 0 {
d.UDPReadBuffer = DefaultUDPReadBuffer
}
return &d
}
// DefaultTags returns the config's tags.
func (c *Config) DefaultTags() models.Tags {
m := make(map[string]string, len(c.Tags))
for _, t := range c.Tags {
parts := strings.Split(t, "=")
m[parts[0]] = parts[1]
}
return models.NewTags(m)
}
// Validate validates the config's templates and tags.
func (c *Config) Validate() error {
if err := c.validateTemplates(); err != nil {
return err
}
if err := c.validateTags(); err != nil {
return err
}
return nil
}
func (c *Config) validateTemplates() error {
// map to keep track of filters we see
filters := map[string]struct{}{}
for i, t := range c.Templates {
parts := strings.Fields(t)
// Ensure template string is non-empty
if len(parts) == 0 {
return fmt.Errorf("missing template at position: %d", i)
}
if len(parts) == 1 && parts[0] == "" {
return fmt.Errorf("missing template at position: %d", i)
}
if len(parts) > 3 {
return fmt.Errorf("invalid template format: '%s'", t)
}
template := t
filter := ""
tags := ""
if len(parts) >= 2 {
// We could have <filter> <template> or <template> <tags>. Equals is only allowed in
// tags section.
if strings.Contains(parts[1], "=") {
template = parts[0]
tags = parts[1]
} else {
filter = parts[0]
template = parts[1]
}
}
if len(parts) == 3 {
tags = parts[2]
}
// Validate the template has one and only one measurement
if err := c.validateTemplate(template); err != nil {
return err
}
// Prevent duplicate filters in the config
if _, ok := filters[filter]; ok {
return fmt.Errorf("duplicate filter '%s' found at position: %d", filter, i)
}
filters[filter] = struct{}{}
if filter != "" {
// Validate filter expression is valid
if err := c.validateFilter(filter); err != nil {
return err
}
}
if tags != "" {
// Validate tags
for _, tagStr := range strings.Split(tags, ",") {
if err := c.validateTag(tagStr); err != nil {
return err
}
}
}
}
return nil
}
func (c *Config) validateTags() error {
for _, t := range c.Tags {
if err := c.validateTag(t); err != nil {
return err
}
}
return nil
}
func (c *Config) validateTemplate(template string) error {
hasMeasurement := false
for _, p := range strings.Split(template, ".") {
if p == "measurement" || p == "measurement*" {
hasMeasurement = true
}
}
if !hasMeasurement {
return fmt.Errorf("no measurement in template `%s`", template)
}
return nil
}
func (c *Config) validateFilter(filter string) error {
for _, p := range strings.Split(filter, ".") {
if p == "" {
return fmt.Errorf("filter contains blank section: %s", filter)
}
if strings.Contains(p, "*") && p != "*" {
return fmt.Errorf("invalid filter wildcard section: %s", filter)
}
}
return nil
}
func (c *Config) validateTag(keyValue string) error {
parts := strings.Split(keyValue, "=")
if len(parts) != 2 {
return fmt.Errorf("invalid template tags: '%s'", keyValue)
}
if parts[0] == "" || parts[1] == "" {
return fmt.Errorf("invalid template tags: %s'", keyValue)
}
return nil
}
// Configs wraps a slice of Config to aggregate diagnostics.
type Configs []Config
// Diagnostics returns one set of diagnostics for all of the Configs.
func (c Configs) Diagnostics() (*diagnostics.Diagnostics, error) {
d := &diagnostics.Diagnostics{
Columns: []string{"enabled", "bind-address", "protocol", "database", "retention-policy", "batch-size", "batch-pending", "batch-timeout"},
}
for _, cc := range c {
if !cc.Enabled {
d.AddRow([]interface{}{false})
continue
}
r := []interface{}{true, cc.BindAddress, cc.Protocol, cc.Database, cc.RetentionPolicy, cc.BatchSize, cc.BatchPending, cc.BatchTimeout}
d.AddRow(r)
}
return d, nil
}
// Enabled returns true if any underlying Config is Enabled.
func (c Configs) Enabled() bool {
for _, cc := range c {
if cc.Enabled {
return true
}
}
return false
}

View File

@@ -0,0 +1,170 @@
package graphite_test
import (
"testing"
"time"
"github.com/BurntSushi/toml"
"github.com/influxdata/influxdb/services/graphite"
)
func TestConfig_Parse(t *testing.T) {
// Parse configuration.
var c graphite.Config
if _, err := toml.Decode(`
bind-address = ":8080"
database = "mydb"
retention-policy = "myrp"
enabled = true
protocol = "tcp"
batch-size=100
batch-pending=77
batch-timeout="1s"
consistency-level="one"
templates=["servers.* .host.measurement*"]
tags=["region=us-east"]
`, &c); err != nil {
t.Fatal(err)
}
// Validate configuration.
if c.BindAddress != ":8080" {
t.Fatalf("unexpected bind address: %s", c.BindAddress)
} else if c.Database != "mydb" {
t.Fatalf("unexpected database selected: %s", c.Database)
} else if c.RetentionPolicy != "myrp" {
t.Fatalf("unexpected retention policy selected: %s", c.RetentionPolicy)
} else if c.Enabled != true {
t.Fatalf("unexpected graphite enabled: %v", c.Enabled)
} else if c.Protocol != "tcp" {
t.Fatalf("unexpected graphite protocol: %s", c.Protocol)
} else if c.BatchSize != 100 {
t.Fatalf("unexpected graphite batch size: %d", c.BatchSize)
} else if c.BatchPending != 77 {
t.Fatalf("unexpected graphite batch pending: %d", c.BatchPending)
} else if time.Duration(c.BatchTimeout) != time.Second {
t.Fatalf("unexpected graphite batch timeout: %v", c.BatchTimeout)
} else if c.ConsistencyLevel != "one" {
t.Fatalf("unexpected graphite consistency setting: %s", c.ConsistencyLevel)
}
if len(c.Templates) != 1 && c.Templates[0] != "servers.* .host.measurement*" {
t.Fatalf("unexpected graphite templates setting: %v", c.Templates)
}
if len(c.Tags) != 1 && c.Tags[0] != "regsion=us-east" {
t.Fatalf("unexpected graphite templates setting: %v", c.Tags)
}
}
func TestConfigValidateEmptyTemplate(t *testing.T) {
c := &graphite.Config{}
c.Templates = []string{""}
if err := c.Validate(); err == nil {
t.Errorf("config validate expected error. got nil")
}
c.Templates = []string{" "}
if err := c.Validate(); err == nil {
t.Errorf("config validate expected error. got nil")
}
}
func TestConfigValidateTooManyField(t *testing.T) {
c := &graphite.Config{}
c.Templates = []string{"a measurement b c"}
if err := c.Validate(); err == nil {
t.Errorf("config validate expected error. got nil")
}
}
func TestConfigValidateTemplatePatterns(t *testing.T) {
c := &graphite.Config{}
c.Templates = []string{"*measurement"}
if err := c.Validate(); err == nil {
t.Errorf("config validate expected error. got nil")
}
c.Templates = []string{".host.region"}
if err := c.Validate(); err == nil {
t.Errorf("config validate expected error. got nil")
}
}
func TestConfigValidateFilter(t *testing.T) {
c := &graphite.Config{}
c.Templates = []string{".server measurement*"}
if err := c.Validate(); err == nil {
t.Errorf("config validate expected error. got nil")
}
c.Templates = []string{". .server measurement*"}
if err := c.Validate(); err == nil {
t.Errorf("config validate expected error. got nil")
}
c.Templates = []string{"server* measurement*"}
if err := c.Validate(); err == nil {
t.Errorf("config validate expected error. got nil")
}
}
func TestConfigValidateTemplateTags(t *testing.T) {
c := &graphite.Config{}
c.Templates = []string{"*.server measurement* foo"}
if err := c.Validate(); err == nil {
t.Errorf("config validate expected error. got nil")
}
c.Templates = []string{"*.server measurement* foo=bar="}
if err := c.Validate(); err == nil {
t.Errorf("config validate expected error. got nil")
}
c.Templates = []string{"*.server measurement* foo=bar,"}
if err := c.Validate(); err == nil {
t.Errorf("config validate expected error. got nil")
}
c.Templates = []string{"*.server measurement* ="}
if err := c.Validate(); err == nil {
t.Errorf("config validate expected error. got nil")
}
}
func TestConfigValidateDefaultTags(t *testing.T) {
c := &graphite.Config{}
c.Tags = []string{"foo"}
if err := c.Validate(); err == nil {
t.Errorf("config validate expected error. got nil")
}
c.Tags = []string{"foo=bar="}
if err := c.Validate(); err == nil {
t.Errorf("config validate expected error. got nil")
}
c.Tags = []string{"foo=bar", ""}
if err := c.Validate(); err == nil {
t.Errorf("config validate expected error. got nil")
}
c.Tags = []string{"="}
if err := c.Validate(); err == nil {
t.Errorf("config validate expected error. got nil")
}
}
func TestConfigValidateFilterDuplicates(t *testing.T) {
c := &graphite.Config{}
c.Templates = []string{"foo measurement*", "foo .host.measurement"}
if err := c.Validate(); err == nil {
t.Errorf("config validate expected error. got nil")
}
// duplicate default templates
c.Templates = []string{"measurement*", ".host.measurement"}
if err := c.Validate(); err == nil {
t.Errorf("config validate expected error. got nil")
}
}

View File

@@ -0,0 +1,14 @@
package graphite
import "fmt"
// An UnsupportedValueError is returned when a parsed value is not
// supported.
type UnsupportedValueError struct {
Field string
Value float64
}
func (err *UnsupportedValueError) Error() string {
return fmt.Sprintf(`field "%s" value: "%v" is unsupported`, err.Field, err.Value)
}

View File

@@ -0,0 +1,422 @@
package graphite
import (
"fmt"
"math"
"sort"
"strconv"
"strings"
"time"
"github.com/influxdata/influxdb/models"
)
// Minimum and maximum supported dates for timestamps.
var (
// The minimum graphite timestamp allowed.
MinDate = time.Date(1901, 12, 13, 0, 0, 0, 0, time.UTC)
// The maximum graphite timestamp allowed.
MaxDate = time.Date(2038, 1, 19, 0, 0, 0, 0, time.UTC)
)
var defaultTemplate *template
func init() {
var err error
defaultTemplate, err = NewTemplate("measurement*", nil, DefaultSeparator)
if err != nil {
panic(err)
}
}
// Parser encapsulates a Graphite Parser.
type Parser struct {
matcher *matcher
tags models.Tags
}
// Options are configurable values that can be provided to a Parser.
type Options struct {
Separator string
Templates []string
DefaultTags models.Tags
}
// NewParserWithOptions returns a graphite parser using the given options.
func NewParserWithOptions(options Options) (*Parser, error) {
matcher := newMatcher()
matcher.AddDefaultTemplate(defaultTemplate)
for _, pattern := range options.Templates {
template := pattern
filter := ""
// Format is [filter] <template> [tag1=value1,tag2=value2]
parts := strings.Fields(pattern)
if len(parts) < 1 {
continue
} else if len(parts) >= 2 {
if strings.Contains(parts[1], "=") {
template = parts[0]
} else {
filter = parts[0]
template = parts[1]
}
}
// Parse out the default tags specific to this template
var tags models.Tags
if strings.Contains(parts[len(parts)-1], "=") {
tagStrs := strings.Split(parts[len(parts)-1], ",")
for _, kv := range tagStrs {
parts := strings.Split(kv, "=")
tags.SetString(parts[0], parts[1])
}
}
tmpl, err := NewTemplate(template, tags, options.Separator)
if err != nil {
return nil, err
}
matcher.Add(filter, tmpl)
}
return &Parser{matcher: matcher, tags: options.DefaultTags}, nil
}
// NewParser returns a GraphiteParser instance.
func NewParser(templates []string, defaultTags models.Tags) (*Parser, error) {
return NewParserWithOptions(
Options{
Templates: templates,
DefaultTags: defaultTags,
Separator: DefaultSeparator,
})
}
// Parse performs Graphite parsing of a single line.
func (p *Parser) Parse(line string) (models.Point, error) {
// Break into 3 fields (name, value, timestamp).
fields := strings.Fields(line)
if len(fields) != 2 && len(fields) != 3 {
return nil, fmt.Errorf("received %q which doesn't have required fields", line)
}
// decode the name and tags
template := p.matcher.Match(fields[0])
measurement, tags, field, err := template.Apply(fields[0])
if err != nil {
return nil, err
}
// Could not extract measurement, use the raw value
if measurement == "" {
measurement = fields[0]
}
// Parse value.
v, err := strconv.ParseFloat(fields[1], 64)
if err != nil {
return nil, fmt.Errorf(`field "%s" value: %s`, fields[0], err)
}
if math.IsNaN(v) || math.IsInf(v, 0) {
return nil, &UnsupportedValueError{Field: fields[0], Value: v}
}
fieldValues := map[string]interface{}{}
if field != "" {
fieldValues[field] = v
} else {
fieldValues["value"] = v
}
// If no 3rd field, use now as timestamp
timestamp := time.Now().UTC()
if len(fields) == 3 {
// Parse timestamp.
unixTime, err := strconv.ParseFloat(fields[2], 64)
if err != nil {
return nil, fmt.Errorf(`field "%s" time: %s`, fields[0], err)
}
// -1 is a special value that gets converted to current UTC time
// See https://github.com/graphite-project/carbon/issues/54
if unixTime != float64(-1) {
// Check if we have fractional seconds
timestamp = time.Unix(int64(unixTime), int64((unixTime-math.Floor(unixTime))*float64(time.Second)))
if timestamp.Before(MinDate) || timestamp.After(MaxDate) {
return nil, fmt.Errorf("timestamp out of range")
}
}
}
// Set the default tags on the point if they are not already set
for _, t := range p.tags {
if _, ok := tags[string(t.Key)]; !ok {
tags[string(t.Key)] = string(t.Value)
}
}
return models.NewPoint(measurement, models.NewTags(tags), fieldValues, timestamp)
}
// ApplyTemplate extracts the template fields from the given line and
// returns the measurement name and tags.
func (p *Parser) ApplyTemplate(line string) (string, map[string]string, string, error) {
// Break line into fields (name, value, timestamp), only name is used
fields := strings.Fields(line)
if len(fields) == 0 {
return "", make(map[string]string), "", nil
}
// decode the name and tags
template := p.matcher.Match(fields[0])
name, tags, field, err := template.Apply(fields[0])
// Set the default tags on the point if they are not already set
for _, t := range p.tags {
if _, ok := tags[string(t.Key)]; !ok {
tags[string(t.Key)] = string(t.Value)
}
}
return name, tags, field, err
}
// template represents a pattern and tags to map a graphite metric string to a influxdb Point.
type template struct {
tags []string
defaultTags models.Tags
greedyMeasurement bool
separator string
}
// NewTemplate returns a new template ensuring it has a measurement
// specified.
func NewTemplate(pattern string, defaultTags models.Tags, separator string) (*template, error) {
tags := strings.Split(pattern, ".")
hasMeasurement := false
template := &template{tags: tags, defaultTags: defaultTags, separator: separator}
for _, tag := range tags {
if strings.HasPrefix(tag, "measurement") {
hasMeasurement = true
}
if tag == "measurement*" {
template.greedyMeasurement = true
}
}
if !hasMeasurement {
return nil, fmt.Errorf("no measurement specified for template. %q", pattern)
}
return template, nil
}
// Apply extracts the template fields from the given line and returns the measurement
// name and tags.
func (t *template) Apply(line string) (string, map[string]string, string, error) {
fields := strings.Split(line, ".")
var (
measurement []string
tags = make(map[string][]string)
field string
hasFieldWildcard = false
hasMeasurementWildcard = false
)
// Set any default tags
for _, t := range t.defaultTags {
tags[string(t.Key)] = append(tags[string(t.Key)], string(t.Value))
}
// See if an invalid combination has been specified in the template:
for _, tag := range t.tags {
if tag == "measurement*" {
hasMeasurementWildcard = true
} else if tag == "field*" {
hasFieldWildcard = true
}
}
if hasFieldWildcard && hasMeasurementWildcard {
return "", nil, "", fmt.Errorf("either 'field*' or 'measurement*' can be used in each template (but not both together): %q", strings.Join(t.tags, t.separator))
}
for i, tag := range t.tags {
if i >= len(fields) {
continue
}
if tag == "measurement" {
measurement = append(measurement, fields[i])
} else if tag == "field" {
if len(field) != 0 {
return "", nil, "", fmt.Errorf("'field' can only be used once in each template: %q", line)
}
field = fields[i]
} else if tag == "field*" {
field = strings.Join(fields[i:], t.separator)
break
} else if tag == "measurement*" {
measurement = append(measurement, fields[i:]...)
break
} else if tag != "" {
tags[tag] = append(tags[tag], fields[i])
}
}
// Convert to map of strings.
out_tags := make(map[string]string)
for k, values := range tags {
out_tags[k] = strings.Join(values, t.separator)
}
return strings.Join(measurement, t.separator), out_tags, field, nil
}
// matcher determines which template should be applied to a given metric
// based on a filter tree.
type matcher struct {
root *node
defaultTemplate *template
}
func newMatcher() *matcher {
return &matcher{
root: &node{},
}
}
// Add inserts the template in the filter tree based the given filter.
func (m *matcher) Add(filter string, template *template) {
if filter == "" {
m.AddDefaultTemplate(template)
return
}
m.root.Insert(filter, template)
}
func (m *matcher) AddDefaultTemplate(template *template) {
m.defaultTemplate = template
}
// Match returns the template that matches the given graphite line.
func (m *matcher) Match(line string) *template {
tmpl := m.root.Search(line)
if tmpl != nil {
return tmpl
}
return m.defaultTemplate
}
// node is an item in a sorted k-ary tree. Each child is sorted by its value.
// The special value of "*", is always last.
type node struct {
value string
children nodes
template *template
}
func (n *node) insert(values []string, template *template) {
// Add the end, set the template
if len(values) == 0 {
n.template = template
return
}
// See if the the current element already exists in the tree. If so, insert the
// into that sub-tree
for _, v := range n.children {
if v.value == values[0] {
v.insert(values[1:], template)
return
}
}
// New element, add it to the tree and sort the children
newNode := &node{value: values[0]}
n.children = append(n.children, newNode)
sort.Sort(&n.children)
// Inherit template if value is wildcard
if values[0] == "*" {
newNode.template = n.template
}
// Now insert the rest of the tree into the new element
newNode.insert(values[1:], template)
}
// Insert inserts the given string template into the tree. The filter string is separated
// on "." and each part is used as the path in the tree.
func (n *node) Insert(filter string, template *template) {
n.insert(strings.Split(filter, "."), template)
}
func (n *node) search(lineParts []string) *template {
// Nothing to search
if len(lineParts) == 0 || len(n.children) == 0 {
return n.template
}
// If last element is a wildcard, don't include in this search since it's sorted
// to the end but lexicographically it would not always be and sort.Search assumes
// the slice is sorted.
length := len(n.children)
if n.children[length-1].value == "*" {
length--
}
// Find the index of child with an exact match
i := sort.Search(length, func(i int) bool {
return n.children[i].value >= lineParts[0]
})
// Found an exact match, so search that child sub-tree
if i < len(n.children) && n.children[i].value == lineParts[0] {
return n.children[i].search(lineParts[1:])
}
// Not an exact match, see if we have a wildcard child to search
if n.children[len(n.children)-1].value == "*" {
return n.children[len(n.children)-1].search(lineParts[1:])
}
return n.template
}
func (n *node) Search(line string) *template {
return n.search(strings.Split(line, "."))
}
type nodes []*node
// Less returns a boolean indicating whether the filter at position j
// is less than the filter at position k. Filters are order by string
// comparison of each component parts. A wildcard value "*" is never
// less than a non-wildcard value.
//
// For example, the filters:
// "*.*"
// "servers.*"
// "servers.localhost"
// "*.localhost"
//
// Would be sorted as:
// "servers.localhost"
// "servers.*"
// "*.localhost"
// "*.*"
func (n *nodes) Less(j, k int) bool {
if (*n)[j].value == "*" && (*n)[k].value != "*" {
return false
}
if (*n)[j].value != "*" && (*n)[k].value == "*" {
return true
}
return (*n)[j].value < (*n)[k].value
}
func (n *nodes) Swap(i, j int) { (*n)[i], (*n)[j] = (*n)[j], (*n)[i] }
func (n *nodes) Len() int { return len(*n) }

View File

@@ -0,0 +1,724 @@
package graphite_test
import (
"reflect"
"strconv"
"testing"
"time"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/services/graphite"
)
func BenchmarkParse(b *testing.B) {
p, err := graphite.NewParser([]string{
"*.* .wrong.measurement*",
"servers.* .host.measurement*",
"servers.localhost .host.measurement*",
"*.localhost .host.measurement*",
"*.*.cpu .host.measurement*",
"a.b.c .host.measurement*",
"influxd.*.foo .host.measurement*",
"prod.*.mem .host.measurement*",
}, nil)
if err != nil {
b.Fatalf("unexpected error creating parser, got %v", err)
}
for i := 0; i < b.N; i++ {
p.Parse("servers.localhost.cpu.load 11 1435077219")
}
}
func TestTemplateApply(t *testing.T) {
var tests = []struct {
test string
input string
template string
measurement string
tags map[string]string
err string
}{
{
test: "metric only",
input: "cpu",
template: "measurement",
measurement: "cpu",
},
{
test: "metric with single series",
input: "cpu.server01",
template: "measurement.hostname",
measurement: "cpu",
tags: map[string]string{"hostname": "server01"},
},
{
test: "metric with multiple series",
input: "cpu.us-west.server01",
template: "measurement.region.hostname",
measurement: "cpu",
tags: map[string]string{"hostname": "server01", "region": "us-west"},
},
{
test: "metric with multiple tags",
input: "server01.example.org.cpu.us-west",
template: "hostname.hostname.hostname.measurement.region",
measurement: "cpu",
tags: map[string]string{"hostname": "server01.example.org", "region": "us-west"},
},
{
test: "no metric",
tags: make(map[string]string),
err: `no measurement specified for template. ""`,
},
{
test: "ignore unnamed",
input: "foo.cpu",
template: "measurement",
measurement: "foo",
tags: make(map[string]string),
},
{
test: "name shorter than template",
input: "foo",
template: "measurement.A.B.C",
measurement: "foo",
tags: make(map[string]string),
},
{
test: "wildcard measurement at end",
input: "prod.us-west.server01.cpu.load",
template: "env.zone.host.measurement*",
measurement: "cpu.load",
tags: map[string]string{"env": "prod", "zone": "us-west", "host": "server01"},
},
{
test: "skip fields",
input: "ignore.us-west.ignore-this-too.cpu.load",
template: ".zone..measurement*",
measurement: "cpu.load",
tags: map[string]string{"zone": "us-west"},
},
{
test: "conjoined fields",
input: "prod.us-west.server01.cpu.util.idle.percent",
template: "env.zone.host.measurement.measurement.field*",
measurement: "cpu.util",
tags: map[string]string{"env": "prod", "zone": "us-west", "host": "server01"},
},
}
for _, test := range tests {
tmpl, err := graphite.NewTemplate(test.template, nil, graphite.DefaultSeparator)
if errstr(err) != test.err {
t.Fatalf("err does not match. expected %v, got %v", test.err, err)
}
if err != nil {
// If we erred out,it was intended and the following tests won't work
continue
}
measurement, tags, _, _ := tmpl.Apply(test.input)
if measurement != test.measurement {
t.Fatalf("name parse failer. expected %v, got %v", test.measurement, measurement)
}
if len(tags) != len(test.tags) {
t.Fatalf("unexpected number of tags. expected %v, got %v", test.tags, tags)
}
for k, v := range test.tags {
if tags[k] != v {
t.Fatalf("unexpected tag value for tags[%s]. expected %q, got %q", k, v, tags[k])
}
}
}
}
func TestParseMissingMeasurement(t *testing.T) {
_, err := graphite.NewParser([]string{"a.b.c"}, nil)
if err == nil {
t.Fatalf("expected error creating parser, got nil")
}
}
func TestParse(t *testing.T) {
testTime := time.Now().Round(time.Second)
epochTime := testTime.Unix()
strTime := strconv.FormatInt(epochTime, 10)
var tests = []struct {
test string
input string
measurement string
tags map[string]string
value float64
time time.Time
template string
err string
}{
{
test: "normal case",
input: `cpu.foo.bar 50 ` + strTime,
template: "measurement.foo.bar",
measurement: "cpu",
tags: map[string]string{
"foo": "foo",
"bar": "bar",
},
value: 50,
time: testTime,
},
{
test: "metric only with float value",
input: `cpu 50.554 ` + strTime,
measurement: "cpu",
template: "measurement",
value: 50.554,
time: testTime,
},
{
test: "missing metric",
input: `1419972457825`,
template: "measurement",
err: `received "1419972457825" which doesn't have required fields`,
},
{
test: "should error parsing invalid float",
input: `cpu 50.554z 1419972457825`,
template: "measurement",
err: `field "cpu" value: strconv.ParseFloat: parsing "50.554z": invalid syntax`,
},
{
test: "should error parsing invalid int",
input: `cpu 50z 1419972457825`,
template: "measurement",
err: `field "cpu" value: strconv.ParseFloat: parsing "50z": invalid syntax`,
},
{
test: "should error parsing invalid time",
input: `cpu 50.554 14199724z57825`,
template: "measurement",
err: `field "cpu" time: strconv.ParseFloat: parsing "14199724z57825": invalid syntax`,
},
{
test: "measurement* and field* (invalid)",
input: `prod.us-west.server01.cpu.util.idle.percent 99.99 1419972457825`,
template: "env.zone.host.measurement*.field*",
err: `either 'field*' or 'measurement*' can be used in each template (but not both together): "env.zone.host.measurement*.field*"`,
},
}
for _, test := range tests {
p, err := graphite.NewParser([]string{test.template}, nil)
if err != nil {
t.Fatalf("unexpected error creating graphite parser: %v", err)
}
point, err := p.Parse(test.input)
if errstr(err) != test.err {
t.Fatalf("err does not match. expected %v, got %v", test.err, err)
}
if err != nil {
// If we erred out,it was intended and the following tests won't work
continue
}
if string(point.Name()) != test.measurement {
t.Fatalf("name parse failer. expected %v, got %v", test.measurement, string(point.Name()))
}
if len(point.Tags()) != len(test.tags) {
t.Fatalf("tags len mismatch. expected %d, got %d", len(test.tags), len(point.Tags()))
}
fields, err := point.Fields()
if err != nil {
t.Fatal(err)
}
f := fields["value"].(float64)
if fields["value"] != f {
t.Fatalf("floatValue value mismatch. expected %v, got %v", test.value, f)
}
if point.Time().UnixNano()/1000000 != test.time.UnixNano()/1000000 {
t.Fatalf("time value mismatch. expected %v, got %v", test.time.UnixNano(), point.Time().UnixNano())
}
}
}
func TestParseNaN(t *testing.T) {
p, err := graphite.NewParser([]string{"measurement*"}, nil)
if err != nil {
t.Fatalf("unexpected error creating parser, got %v", err)
}
_, err = p.Parse("servers.localhost.cpu_load NaN 1435077219")
if err == nil {
t.Fatalf("expected error. got nil")
}
if _, ok := err.(*graphite.UnsupportedValueError); !ok {
t.Fatalf("expected *graphite.ErrUnsupportedValue, got %v", reflect.TypeOf(err))
}
}
func TestFilterMatchDefault(t *testing.T) {
p, err := graphite.NewParser([]string{"servers.localhost .host.measurement*"}, nil)
if err != nil {
t.Fatalf("unexpected error creating parser, got %v", err)
}
exp := models.MustNewPoint("miss.servers.localhost.cpu_load",
models.NewTags(map[string]string{}),
models.Fields{"value": float64(11)},
time.Unix(1435077219, 0))
pt, err := p.Parse("miss.servers.localhost.cpu_load 11 1435077219")
if err != nil {
t.Fatalf("parse error: %v", err)
}
if exp.String() != pt.String() {
t.Errorf("parse mismatch: got %v, exp %v", pt.String(), exp.String())
}
}
func TestFilterMatchMultipleMeasurement(t *testing.T) {
p, err := graphite.NewParser([]string{"servers.localhost .host.measurement.measurement*"}, nil)
if err != nil {
t.Fatalf("unexpected error creating parser, got %v", err)
}
exp := models.MustNewPoint("cpu.cpu_load.10",
models.NewTags(map[string]string{"host": "localhost"}),
models.Fields{"value": float64(11)},
time.Unix(1435077219, 0))
pt, err := p.Parse("servers.localhost.cpu.cpu_load.10 11 1435077219")
if err != nil {
t.Fatalf("parse error: %v", err)
}
if exp.String() != pt.String() {
t.Errorf("parse mismatch: got %v, exp %v", pt.String(), exp.String())
}
}
func TestFilterMatchMultipleMeasurementSeparator(t *testing.T) {
p, err := graphite.NewParserWithOptions(graphite.Options{
Templates: []string{"servers.localhost .host.measurement.measurement*"},
Separator: "_",
})
if err != nil {
t.Fatalf("unexpected error creating parser, got %v", err)
}
exp := models.MustNewPoint("cpu_cpu_load_10",
models.NewTags(map[string]string{"host": "localhost"}),
models.Fields{"value": float64(11)},
time.Unix(1435077219, 0))
pt, err := p.Parse("servers.localhost.cpu.cpu_load.10 11 1435077219")
if err != nil {
t.Fatalf("parse error: %v", err)
}
if exp.String() != pt.String() {
t.Errorf("parse mismatch: got %v, exp %v", pt.String(), exp.String())
}
}
func TestFilterMatchSingle(t *testing.T) {
p, err := graphite.NewParser([]string{"servers.localhost .host.measurement*"}, nil)
if err != nil {
t.Fatalf("unexpected error creating parser, got %v", err)
}
exp := models.MustNewPoint("cpu_load",
models.NewTags(map[string]string{"host": "localhost"}),
models.Fields{"value": float64(11)},
time.Unix(1435077219, 0))
pt, err := p.Parse("servers.localhost.cpu_load 11 1435077219")
if err != nil {
t.Fatalf("parse error: %v", err)
}
if exp.String() != pt.String() {
t.Errorf("parse mismatch: got %v, exp %v", pt.String(), exp.String())
}
}
func TestParseNoMatch(t *testing.T) {
p, err := graphite.NewParser([]string{"servers.*.cpu .host.measurement.cpu.measurement"}, nil)
if err != nil {
t.Fatalf("unexpected error creating parser, got %v", err)
}
exp := models.MustNewPoint("servers.localhost.memory.VmallocChunk",
models.NewTags(map[string]string{}),
models.Fields{"value": float64(11)},
time.Unix(1435077219, 0))
pt, err := p.Parse("servers.localhost.memory.VmallocChunk 11 1435077219")
if err != nil {
t.Fatalf("parse error: %v", err)
}
if exp.String() != pt.String() {
t.Errorf("parse mismatch: got %v, exp %v", pt.String(), exp.String())
}
}
func TestFilterMatchWildcard(t *testing.T) {
p, err := graphite.NewParser([]string{"servers.* .host.measurement*"}, nil)
if err != nil {
t.Fatalf("unexpected error creating parser, got %v", err)
}
exp := models.MustNewPoint("cpu_load",
models.NewTags(map[string]string{"host": "localhost"}),
models.Fields{"value": float64(11)},
time.Unix(1435077219, 0))
pt, err := p.Parse("servers.localhost.cpu_load 11 1435077219")
if err != nil {
t.Fatalf("parse error: %v", err)
}
if exp.String() != pt.String() {
t.Errorf("parse mismatch: got %v, exp %v", pt.String(), exp.String())
}
}
func TestFilterMatchExactBeforeWildcard(t *testing.T) {
p, err := graphite.NewParser([]string{
"servers.* .wrong.measurement*",
"servers.localhost .host.measurement*"}, nil)
if err != nil {
t.Fatalf("unexpected error creating parser, got %v", err)
}
exp := models.MustNewPoint("cpu_load",
models.NewTags(map[string]string{"host": "localhost"}),
models.Fields{"value": float64(11)},
time.Unix(1435077219, 0))
pt, err := p.Parse("servers.localhost.cpu_load 11 1435077219")
if err != nil {
t.Fatalf("parse error: %v", err)
}
if exp.String() != pt.String() {
t.Errorf("parse mismatch: got %v, exp %v", pt.String(), exp.String())
}
}
func TestFilterMatchMostLongestFilter(t *testing.T) {
p, err := graphite.NewParser([]string{
"*.* .wrong.measurement*",
"servers.* .wrong.measurement*",
"servers.localhost .wrong.measurement*",
"servers.localhost.cpu .host.resource.measurement*", // should match this
"*.localhost .wrong.measurement*",
}, nil)
if err != nil {
t.Fatalf("unexpected error creating parser, got %v", err)
}
exp := models.MustNewPoint("cpu_load",
models.NewTags(map[string]string{"host": "localhost", "resource": "cpu"}),
models.Fields{"value": float64(11)},
time.Unix(1435077219, 0))
pt, err := p.Parse("servers.localhost.cpu.cpu_load 11 1435077219")
if err != nil {
t.Fatalf("parse error: %v", err)
}
if exp.String() != pt.String() {
t.Errorf("parse mismatch: got %v, exp %v", pt.String(), exp.String())
}
}
func TestFilterMatchMultipleWildcards(t *testing.T) {
p, err := graphite.NewParser([]string{
"*.* .wrong.measurement*",
"servers.* .host.measurement*", // should match this
"servers.localhost .wrong.measurement*",
"*.localhost .wrong.measurement*",
}, nil)
if err != nil {
t.Fatalf("unexpected error creating parser, got %v", err)
}
exp := models.MustNewPoint("cpu_load",
models.NewTags(map[string]string{"host": "server01"}),
models.Fields{"value": float64(11)},
time.Unix(1435077219, 0))
pt, err := p.Parse("servers.server01.cpu_load 11 1435077219")
if err != nil {
t.Fatalf("parse error: %v", err)
}
if exp.String() != pt.String() {
t.Errorf("parse mismatch: got %v, exp %v", pt.String(), exp.String())
}
}
func TestParseDefaultTags(t *testing.T) {
p, err := graphite.NewParser([]string{"servers.localhost .host.measurement*"}, models.NewTags(map[string]string{
"region": "us-east",
"zone": "1c",
"host": "should not set",
}))
if err != nil {
t.Fatalf("unexpected error creating parser, got %v", err)
}
exp := models.MustNewPoint("cpu_load",
models.NewTags(map[string]string{"host": "localhost", "region": "us-east", "zone": "1c"}),
models.Fields{"value": float64(11)},
time.Unix(1435077219, 0))
pt, err := p.Parse("servers.localhost.cpu_load 11 1435077219")
if err != nil {
t.Fatalf("parse error: %v", err)
}
if exp.String() != pt.String() {
t.Errorf("parse mismatch: got %v, exp %v", pt.String(), exp.String())
}
}
func TestParseDefaultTemplateTags(t *testing.T) {
p, err := graphite.NewParser([]string{"servers.localhost .host.measurement* zone=1c"}, models.NewTags(map[string]string{
"region": "us-east",
"host": "should not set",
}))
if err != nil {
t.Fatalf("unexpected error creating parser, got %v", err)
}
exp := models.MustNewPoint("cpu_load",
models.NewTags(map[string]string{"host": "localhost", "region": "us-east", "zone": "1c"}),
models.Fields{"value": float64(11)},
time.Unix(1435077219, 0))
pt, err := p.Parse("servers.localhost.cpu_load 11 1435077219")
if err != nil {
t.Fatalf("parse error: %v", err)
}
if exp.String() != pt.String() {
t.Errorf("parse mismatch: got %v, exp %v", pt.String(), exp.String())
}
}
func TestParseDefaultTemplateTagsOverridGlobal(t *testing.T) {
p, err := graphite.NewParser([]string{"servers.localhost .host.measurement* zone=1c,region=us-east"}, models.NewTags(map[string]string{
"region": "shot not be set",
"host": "should not set",
}))
if err != nil {
t.Fatalf("unexpected error creating parser, got %v", err)
}
exp := models.MustNewPoint("cpu_load",
models.NewTags(map[string]string{"host": "localhost", "region": "us-east", "zone": "1c"}),
models.Fields{"value": float64(11)},
time.Unix(1435077219, 0))
pt, err := p.Parse("servers.localhost.cpu_load 11 1435077219")
if err != nil {
t.Fatalf("parse error: %v", err)
}
if exp.String() != pt.String() {
t.Errorf("parse mismatch: got %v, exp %v", pt.String(), exp.String())
}
}
func TestParseTemplateWhitespace(t *testing.T) {
p, err := graphite.NewParser([]string{"servers.localhost .host.measurement* zone=1c"}, models.NewTags(map[string]string{
"region": "us-east",
"host": "should not set",
}))
if err != nil {
t.Fatalf("unexpected error creating parser, got %v", err)
}
exp := models.MustNewPoint("cpu_load",
models.NewTags(map[string]string{"host": "localhost", "region": "us-east", "zone": "1c"}),
models.Fields{"value": float64(11)},
time.Unix(1435077219, 0))
pt, err := p.Parse("servers.localhost.cpu_load 11 1435077219")
if err != nil {
t.Fatalf("parse error: %v", err)
}
if exp.String() != pt.String() {
t.Errorf("parse mismatch: got %v, exp %v", pt.String(), exp.String())
}
}
// Test basic functionality of ApplyTemplate
func TestApplyTemplate(t *testing.T) {
o := graphite.Options{
Separator: "_",
Templates: []string{"current.* measurement.measurement"},
}
p, err := graphite.NewParserWithOptions(o)
if err != nil {
t.Fatalf("unexpected error creating parser, got %v", err)
}
measurement, _, _, _ := p.ApplyTemplate("current.users")
if measurement != "current_users" {
t.Errorf("Parser.ApplyTemplate unexpected result. got %s, exp %s",
measurement, "current_users")
}
}
// Test basic functionality of ApplyTemplate
func TestApplyTemplateNoMatch(t *testing.T) {
o := graphite.Options{
Separator: "_",
Templates: []string{"foo.bar measurement.measurement"},
}
p, err := graphite.NewParserWithOptions(o)
if err != nil {
t.Fatalf("unexpected error creating parser, got %v", err)
}
measurement, _, _, _ := p.ApplyTemplate("current.users")
if measurement != "current.users" {
t.Errorf("Parser.ApplyTemplate unexpected result. got %s, exp %s",
measurement, "current.users")
}
}
// Test that most specific template is chosen
func TestApplyTemplateSpecific(t *testing.T) {
o := graphite.Options{
Separator: "_",
Templates: []string{
"current.* measurement.measurement",
"current.*.* measurement.measurement.service",
},
}
p, err := graphite.NewParserWithOptions(o)
if err != nil {
t.Fatalf("unexpected error creating parser, got %v", err)
}
measurement, tags, _, _ := p.ApplyTemplate("current.users.facebook")
if measurement != "current_users" {
t.Errorf("Parser.ApplyTemplate unexpected result. got %s, exp %s",
measurement, "current_users")
}
service, ok := tags["service"]
if !ok {
t.Error("Expected for template to apply a 'service' tag, but not found")
}
if service != "facebook" {
t.Errorf("Expected service='facebook' tag, got service='%s'", service)
}
}
// Test that most specific template is N/A
func TestApplyTemplateSpecificIsNA(t *testing.T) {
o := graphite.Options{
Separator: "_",
Templates: []string{
"current.* measurement.service",
"current.*.*.test measurement.measurement.service",
},
}
p, err := graphite.NewParserWithOptions(o)
if err != nil {
t.Fatalf("unexpected error creating parser, got %v", err)
}
measurement, _, _, _ := p.ApplyTemplate("current.users.facebook")
if measurement != "current" {
t.Errorf("Parser.ApplyTemplate unexpected result. got %s, exp %s",
measurement, "current")
}
}
func TestApplyTemplateTags(t *testing.T) {
o := graphite.Options{
Separator: "_",
Templates: []string{"current.* measurement.measurement region=us-west"},
}
p, err := graphite.NewParserWithOptions(o)
if err != nil {
t.Fatalf("unexpected error creating parser, got %v", err)
}
measurement, tags, _, _ := p.ApplyTemplate("current.users")
if measurement != "current_users" {
t.Errorf("Parser.ApplyTemplate unexpected result. got %s, exp %s",
measurement, "current_users")
}
region, ok := tags["region"]
if !ok {
t.Error("Expected for template to apply a 'region' tag, but not found")
}
if region != "us-west" {
t.Errorf("Expected region='us-west' tag, got region='%s'", region)
}
}
func TestApplyTemplateField(t *testing.T) {
o := graphite.Options{
Separator: "_",
Templates: []string{"current.* measurement.measurement.field"},
}
p, err := graphite.NewParserWithOptions(o)
if err != nil {
t.Fatalf("unexpected error creating parser, got %v", err)
}
measurement, _, field, err := p.ApplyTemplate("current.users.logged_in")
if err != nil {
t.Fatal(err)
}
if measurement != "current_users" {
t.Errorf("Parser.ApplyTemplate unexpected result. got %s, exp %s",
measurement, "current_users")
}
if field != "logged_in" {
t.Errorf("Parser.ApplyTemplate unexpected result. got %s, exp %s",
field, "logged_in")
}
}
func TestApplyTemplateFieldError(t *testing.T) {
o := graphite.Options{
Separator: "_",
Templates: []string{"current.* measurement.field.field"},
}
p, err := graphite.NewParserWithOptions(o)
if err != nil {
t.Fatalf("unexpected error creating parser, got %v", err)
}
_, _, _, err = p.ApplyTemplate("current.users.logged_in")
if err == nil {
t.Errorf("Parser.ApplyTemplate unexpected result. got %s, exp %s", err,
"'field' can only be used once in each template: current.users.logged_in")
}
}
// Test Helpers
func errstr(err error) string {
if err != nil {
return err.Error()
}
return ""
}

View File

@@ -0,0 +1,474 @@
// Package graphite provides a service for InfluxDB to ingest data via the graphite protocol.
package graphite // import "github.com/influxdata/influxdb/services/graphite"
import (
"bufio"
"fmt"
"math"
"net"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/monitor/diagnostics"
"github.com/influxdata/influxdb/services/meta"
"github.com/influxdata/influxdb/tsdb"
"github.com/uber-go/zap"
)
const udpBufferSize = 65536
// statistics gathered by the graphite package.
const (
statPointsReceived = "pointsRx"
statBytesReceived = "bytesRx"
statPointsParseFail = "pointsParseFail"
statPointsNaNFail = "pointsNaNFail"
statBatchesTransmitted = "batchesTx"
statPointsTransmitted = "pointsTx"
statBatchesTransmitFail = "batchesTxFail"
statConnectionsActive = "connsActive"
statConnectionsHandled = "connsHandled"
)
type tcpConnection struct {
conn net.Conn
connectTime time.Time
}
func (c *tcpConnection) Close() {
c.conn.Close()
}
// Service represents a Graphite service.
type Service struct {
bindAddress string
database string
retentionPolicy string
protocol string
batchSize int
batchPending int
batchTimeout time.Duration
udpReadBuffer int
batcher *tsdb.PointBatcher
parser *Parser
logger zap.Logger
stats *Statistics
defaultTags models.StatisticTags
tcpConnectionsMu sync.Mutex
tcpConnections map[string]*tcpConnection
diagsKey string
ln net.Listener
addr net.Addr
udpConn *net.UDPConn
wg sync.WaitGroup
mu sync.RWMutex
ready bool // Has the required database been created?
done chan struct{} // Is the service closing or closed?
Monitor interface {
RegisterDiagnosticsClient(name string, client diagnostics.Client)
DeregisterDiagnosticsClient(name string)
}
PointsWriter interface {
WritePointsPrivileged(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error
}
MetaClient interface {
CreateDatabaseWithRetentionPolicy(name string, spec *meta.RetentionPolicySpec) (*meta.DatabaseInfo, error)
CreateRetentionPolicy(database string, spec *meta.RetentionPolicySpec, makeDefault bool) (*meta.RetentionPolicyInfo, error)
Database(name string) *meta.DatabaseInfo
RetentionPolicy(database, name string) (*meta.RetentionPolicyInfo, error)
}
}
// NewService returns an instance of the Graphite service.
func NewService(c Config) (*Service, error) {
// Use defaults where necessary.
d := c.WithDefaults()
s := Service{
bindAddress: d.BindAddress,
database: d.Database,
retentionPolicy: d.RetentionPolicy,
protocol: d.Protocol,
batchSize: d.BatchSize,
batchPending: d.BatchPending,
udpReadBuffer: d.UDPReadBuffer,
batchTimeout: time.Duration(d.BatchTimeout),
logger: zap.New(zap.NullEncoder()),
stats: &Statistics{},
defaultTags: models.StatisticTags{"proto": d.Protocol, "bind": d.BindAddress},
tcpConnections: make(map[string]*tcpConnection),
diagsKey: strings.Join([]string{"graphite", d.Protocol, d.BindAddress}, ":"),
}
parser, err := NewParserWithOptions(Options{
Templates: d.Templates,
DefaultTags: d.DefaultTags(),
Separator: d.Separator})
if err != nil {
return nil, err
}
s.parser = parser
return &s, nil
}
// Open starts the Graphite input processing data.
func (s *Service) Open() error {
s.mu.Lock()
defer s.mu.Unlock()
if !s.closed() {
return nil // Already open.
}
s.done = make(chan struct{})
s.logger.Info(fmt.Sprintf("Starting graphite service, batch size %d, batch timeout %s", s.batchSize, s.batchTimeout))
// Register diagnostics if a Monitor service is available.
if s.Monitor != nil {
s.Monitor.RegisterDiagnosticsClient(s.diagsKey, s)
}
s.batcher = tsdb.NewPointBatcher(s.batchSize, s.batchPending, s.batchTimeout)
s.batcher.Start()
// Start processing batches.
s.wg.Add(1)
go s.processBatches(s.batcher)
var err error
if strings.ToLower(s.protocol) == "tcp" {
s.addr, err = s.openTCPServer()
} else if strings.ToLower(s.protocol) == "udp" {
s.addr, err = s.openUDPServer()
} else {
return fmt.Errorf("unrecognized Graphite input protocol %s", s.protocol)
}
if err != nil {
return err
}
s.logger.Info(fmt.Sprintf("Listening on %s: %s", strings.ToUpper(s.protocol), s.addr.String()))
return nil
}
func (s *Service) closeAllConnections() {
s.tcpConnectionsMu.Lock()
defer s.tcpConnectionsMu.Unlock()
for _, c := range s.tcpConnections {
c.Close()
}
}
// Close stops all data processing on the Graphite input.
func (s *Service) Close() error {
s.mu.Lock()
defer s.mu.Unlock()
if s.closed() {
return nil // Already closed.
}
close(s.done)
s.closeAllConnections()
if s.ln != nil {
s.ln.Close()
}
if s.udpConn != nil {
s.udpConn.Close()
}
if s.batcher != nil {
s.batcher.Stop()
}
if s.Monitor != nil {
s.Monitor.DeregisterDiagnosticsClient(s.diagsKey)
}
s.wg.Wait()
s.done = nil
return nil
}
// Closed returns true if the service is currently closed.
func (s *Service) Closed() bool {
s.mu.Lock()
defer s.mu.Unlock()
return s.closed()
}
func (s *Service) closed() bool {
select {
case <-s.done:
// Service is closing.
return true
default:
}
return s.done == nil
}
// createInternalStorage ensures that the required database has been created.
func (s *Service) createInternalStorage() error {
s.mu.RLock()
ready := s.ready
s.mu.RUnlock()
if ready {
return nil
}
if db := s.MetaClient.Database(s.database); db != nil {
if rp, _ := s.MetaClient.RetentionPolicy(s.database, s.retentionPolicy); rp == nil {
spec := meta.RetentionPolicySpec{Name: s.retentionPolicy}
if _, err := s.MetaClient.CreateRetentionPolicy(s.database, &spec, true); err != nil {
return err
}
}
} else {
spec := meta.RetentionPolicySpec{Name: s.retentionPolicy}
if _, err := s.MetaClient.CreateDatabaseWithRetentionPolicy(s.database, &spec); err != nil {
return err
}
}
// The service is now ready.
s.mu.Lock()
s.ready = true
s.mu.Unlock()
return nil
}
// WithLogger sets the logger on the service.
func (s *Service) WithLogger(log zap.Logger) {
s.logger = log.With(
zap.String("service", "graphite"),
zap.String("addr", s.bindAddress),
)
}
// Statistics maintains statistics for the graphite service.
type Statistics struct {
PointsReceived int64
BytesReceived int64
PointsParseFail int64
PointsNaNFail int64
BatchesTransmitted int64
PointsTransmitted int64
BatchesTransmitFail int64
ActiveConnections int64
HandledConnections int64
}
// Statistics returns statistics for periodic monitoring.
func (s *Service) Statistics(tags map[string]string) []models.Statistic {
return []models.Statistic{{
Name: "graphite",
Tags: s.defaultTags.Merge(tags),
Values: map[string]interface{}{
statPointsReceived: atomic.LoadInt64(&s.stats.PointsReceived),
statBytesReceived: atomic.LoadInt64(&s.stats.BytesReceived),
statPointsParseFail: atomic.LoadInt64(&s.stats.PointsParseFail),
statPointsNaNFail: atomic.LoadInt64(&s.stats.PointsNaNFail),
statBatchesTransmitted: atomic.LoadInt64(&s.stats.BatchesTransmitted),
statPointsTransmitted: atomic.LoadInt64(&s.stats.PointsTransmitted),
statBatchesTransmitFail: atomic.LoadInt64(&s.stats.BatchesTransmitFail),
statConnectionsActive: atomic.LoadInt64(&s.stats.ActiveConnections),
statConnectionsHandled: atomic.LoadInt64(&s.stats.HandledConnections),
},
}}
}
// Addr returns the address the Service binds to.
func (s *Service) Addr() net.Addr {
return s.addr
}
// openTCPServer opens the Graphite input in TCP mode and starts processing data.
func (s *Service) openTCPServer() (net.Addr, error) {
ln, err := net.Listen("tcp", s.bindAddress)
if err != nil {
return nil, err
}
s.ln = ln
s.wg.Add(1)
go func() {
defer s.wg.Done()
for {
conn, err := s.ln.Accept()
if opErr, ok := err.(*net.OpError); ok && !opErr.Temporary() {
s.logger.Info("graphite TCP listener closed")
return
}
if err != nil {
s.logger.Info("error accepting TCP connection", zap.Error(err))
continue
}
s.wg.Add(1)
go s.handleTCPConnection(conn)
}
}()
return ln.Addr(), nil
}
// handleTCPConnection services an individual TCP connection for the Graphite input.
func (s *Service) handleTCPConnection(conn net.Conn) {
defer s.wg.Done()
defer conn.Close()
defer atomic.AddInt64(&s.stats.ActiveConnections, -1)
defer s.untrackConnection(conn)
atomic.AddInt64(&s.stats.ActiveConnections, 1)
atomic.AddInt64(&s.stats.HandledConnections, 1)
s.trackConnection(conn)
reader := bufio.NewReader(conn)
for {
// Read up to the next newline.
buf, err := reader.ReadBytes('\n')
if err != nil {
return
}
// Trim the buffer, even though there should be no padding
line := strings.TrimSpace(string(buf))
atomic.AddInt64(&s.stats.PointsReceived, 1)
atomic.AddInt64(&s.stats.BytesReceived, int64(len(buf)))
s.handleLine(line)
}
}
func (s *Service) trackConnection(c net.Conn) {
s.tcpConnectionsMu.Lock()
defer s.tcpConnectionsMu.Unlock()
s.tcpConnections[c.RemoteAddr().String()] = &tcpConnection{
conn: c,
connectTime: time.Now().UTC(),
}
}
func (s *Service) untrackConnection(c net.Conn) {
s.tcpConnectionsMu.Lock()
defer s.tcpConnectionsMu.Unlock()
delete(s.tcpConnections, c.RemoteAddr().String())
}
// openUDPServer opens the Graphite input in UDP mode and starts processing incoming data.
func (s *Service) openUDPServer() (net.Addr, error) {
addr, err := net.ResolveUDPAddr("udp", s.bindAddress)
if err != nil {
return nil, err
}
s.udpConn, err = net.ListenUDP("udp", addr)
if err != nil {
return nil, err
}
if s.udpReadBuffer != 0 {
err = s.udpConn.SetReadBuffer(s.udpReadBuffer)
if err != nil {
return nil, fmt.Errorf("unable to set UDP read buffer to %d: %s",
s.udpReadBuffer, err)
}
}
buf := make([]byte, udpBufferSize)
s.wg.Add(1)
go func() {
defer s.wg.Done()
for {
n, _, err := s.udpConn.ReadFromUDP(buf)
if err != nil {
s.udpConn.Close()
return
}
lines := strings.Split(string(buf[:n]), "\n")
for _, line := range lines {
s.handleLine(line)
}
atomic.AddInt64(&s.stats.PointsReceived, int64(len(lines)))
atomic.AddInt64(&s.stats.BytesReceived, int64(n))
}
}()
return s.udpConn.LocalAddr(), nil
}
func (s *Service) handleLine(line string) {
if line == "" {
return
}
// Parse it.
point, err := s.parser.Parse(line)
if err != nil {
switch err := err.(type) {
case *UnsupportedValueError:
// Graphite ignores NaN values with no error.
if math.IsNaN(err.Value) {
atomic.AddInt64(&s.stats.PointsNaNFail, 1)
return
}
}
s.logger.Info(fmt.Sprintf("unable to parse line: %s: %s", line, err))
atomic.AddInt64(&s.stats.PointsParseFail, 1)
return
}
s.batcher.In() <- point
}
// processBatches continually drains the given batcher and writes the batches to the database.
func (s *Service) processBatches(batcher *tsdb.PointBatcher) {
defer s.wg.Done()
for {
select {
case batch := <-batcher.Out():
// Will attempt to create database if not yet created.
if err := s.createInternalStorage(); err != nil {
s.logger.Info(fmt.Sprintf("Required database or retention policy do not yet exist: %s", err.Error()))
continue
}
if err := s.PointsWriter.WritePointsPrivileged(s.database, s.retentionPolicy, models.ConsistencyLevelAny, batch); err == nil {
atomic.AddInt64(&s.stats.BatchesTransmitted, 1)
atomic.AddInt64(&s.stats.PointsTransmitted, int64(len(batch)))
} else {
s.logger.Info(fmt.Sprintf("failed to write point batch to database %q: %s", s.database, err))
atomic.AddInt64(&s.stats.BatchesTransmitFail, 1)
}
case <-s.done:
return
}
}
}
// Diagnostics returns diagnostics of the graphite service.
func (s *Service) Diagnostics() (*diagnostics.Diagnostics, error) {
s.tcpConnectionsMu.Lock()
defer s.tcpConnectionsMu.Unlock()
d := &diagnostics.Diagnostics{
Columns: []string{"local", "remote", "connect time"},
Rows: make([][]interface{}, 0, len(s.tcpConnections)),
}
for _, v := range s.tcpConnections {
d.Rows = append(d.Rows, []interface{}{v.conn.LocalAddr().String(), v.conn.RemoteAddr().String(), v.connectTime})
}
return d, nil
}

View File

@@ -0,0 +1,309 @@
package graphite
import (
"errors"
"fmt"
"net"
"os"
"sync"
"testing"
"time"
"github.com/influxdata/influxdb/internal"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/services/meta"
"github.com/influxdata/influxdb/toml"
"github.com/uber-go/zap"
)
func Test_Service_OpenClose(t *testing.T) {
// Let the OS assign a random port since we are only opening and closing the service,
// not actually connecting to it.
c := Config{BindAddress: "127.0.0.1:0"}
service := NewTestService(&c)
// Closing a closed service is fine.
if err := service.Service.Close(); err != nil {
t.Fatal(err)
}
// Closing a closed service again is fine.
if err := service.Service.Close(); err != nil {
t.Fatal(err)
}
if err := service.Service.Open(); err != nil {
t.Fatal(err)
}
// Opening an already open service is fine.
if err := service.Service.Open(); err != nil {
t.Fatal(err)
}
// Reopening a previously opened service is fine.
if err := service.Service.Close(); err != nil {
t.Fatal(err)
}
if err := service.Service.Open(); err != nil {
t.Fatal(err)
}
// Tidy up.
if err := service.Service.Close(); err != nil {
t.Fatal(err)
}
}
func TestService_CreatesDatabase(t *testing.T) {
t.Parallel()
s := NewTestService(nil)
s.WritePointsFn = func(string, string, models.ConsistencyLevel, []models.Point) error {
return nil
}
called := make(chan struct{})
s.MetaClient.CreateDatabaseWithRetentionPolicyFn = func(name string, _ *meta.RetentionPolicySpec) (*meta.DatabaseInfo, error) {
if name != s.Service.database {
t.Errorf("\n\texp = %s\n\tgot = %s\n", s.Service.database, name)
}
// Allow some time for the caller to return and the ready status to
// be set.
time.AfterFunc(10*time.Millisecond, func() { called <- struct{}{} })
return nil, errors.New("an error")
}
if err := s.Service.Open(); err != nil {
t.Fatal(err)
}
points, err := models.ParsePointsString(`cpu value=1`)
if err != nil {
t.Fatal(err)
}
s.Service.batcher.In() <- points[0] // Send a point.
s.Service.batcher.Flush()
select {
case <-called:
// OK
case <-time.NewTimer(5 * time.Second).C:
t.Fatal("Service should have attempted to create database")
}
// ready status should not have been switched due to meta client error.
s.Service.mu.RLock()
ready := s.Service.ready
s.Service.mu.RUnlock()
if got, exp := ready, false; got != exp {
t.Fatalf("got %v, expected %v", got, exp)
}
// This time MC won't cause an error.
s.MetaClient.CreateDatabaseWithRetentionPolicyFn = func(name string, _ *meta.RetentionPolicySpec) (*meta.DatabaseInfo, error) {
// Allow some time for the caller to return and the ready status to
// be set.
time.AfterFunc(10*time.Millisecond, func() { called <- struct{}{} })
return nil, nil
}
s.Service.batcher.In() <- points[0] // Send a point.
s.Service.batcher.Flush()
select {
case <-called:
// OK
case <-time.NewTimer(5 * time.Second).C:
t.Fatal("Service should have attempted to create database")
}
// ready status should now be true.
s.Service.mu.RLock()
ready = s.Service.ready
s.Service.mu.RUnlock()
if got, exp := ready, true; got != exp {
t.Fatalf("got %v, expected %v", got, exp)
}
s.Service.Close()
}
func Test_Service_TCP(t *testing.T) {
t.Parallel()
now := time.Now().UTC().Round(time.Second)
config := Config{}
config.Database = "graphitedb"
config.BatchSize = 0 // No batching.
config.BatchTimeout = toml.Duration(time.Second)
config.BindAddress = ":0"
service := NewTestService(&config)
// Allow test to wait until points are written.
var wg sync.WaitGroup
wg.Add(1)
service.WritePointsFn = func(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error {
defer wg.Done()
pt, _ := models.NewPoint(
"cpu",
models.NewTags(map[string]string{}),
map[string]interface{}{"value": 23.456},
time.Unix(now.Unix(), 0))
if database != "graphitedb" {
t.Fatalf("unexpected database: %s", database)
} else if retentionPolicy != "" {
t.Fatalf("unexpected retention policy: %s", retentionPolicy)
} else if len(points) != 1 {
t.Fatalf("expected 1 point, got %d", len(points))
} else if points[0].String() != pt.String() {
t.Fatalf("expected point %v, got %v", pt.String(), points[0].String())
}
return nil
}
if err := service.Service.Open(); err != nil {
t.Fatalf("failed to open Graphite service: %s", err.Error())
}
// Connect to the graphite endpoint we just spun up
_, port, _ := net.SplitHostPort(service.Service.Addr().String())
conn, err := net.Dial("tcp", "127.0.0.1:"+port)
if err != nil {
t.Fatal(err)
}
data := []byte(`cpu 23.456 `)
data = append(data, []byte(fmt.Sprintf("%d", now.Unix()))...)
data = append(data, '\n')
data = append(data, []byte(`memory NaN `)...)
data = append(data, []byte(fmt.Sprintf("%d", now.Unix()))...)
data = append(data, '\n')
_, err = conn.Write(data)
conn.Close()
if err != nil {
t.Fatal(err)
}
wg.Wait()
}
func Test_Service_UDP(t *testing.T) {
t.Parallel()
now := time.Now().UTC().Round(time.Second)
config := Config{}
config.Database = "graphitedb"
config.BatchSize = 0 // No batching.
config.BatchTimeout = toml.Duration(time.Second)
config.BindAddress = ":10000"
config.Protocol = "udp"
service := NewTestService(&config)
// Allow test to wait until points are written.
var wg sync.WaitGroup
wg.Add(1)
service.WritePointsFn = func(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error {
defer wg.Done()
pt, _ := models.NewPoint(
"cpu",
models.NewTags(map[string]string{}),
map[string]interface{}{"value": 23.456},
time.Unix(now.Unix(), 0))
if database != "graphitedb" {
t.Fatalf("unexpected database: %s", database)
} else if retentionPolicy != "" {
t.Fatalf("unexpected retention policy: %s", retentionPolicy)
} else if points[0].String() != pt.String() {
t.Fatalf("unexpected points: %#v", points[0].String())
}
return nil
}
if err := service.Service.Open(); err != nil {
t.Fatalf("failed to open Graphite service: %s", err.Error())
}
// Connect to the graphite endpoint we just spun up
_, port, _ := net.SplitHostPort(service.Service.Addr().String())
conn, err := net.Dial("udp", "127.0.0.1:"+port)
if err != nil {
t.Fatal(err)
}
data := []byte(`cpu 23.456 `)
data = append(data, []byte(fmt.Sprintf("%d", now.Unix()))...)
data = append(data, '\n')
_, err = conn.Write(data)
if err != nil {
t.Fatal(err)
}
wg.Wait()
conn.Close()
}
type TestService struct {
Service *Service
MetaClient *internal.MetaClientMock
WritePointsFn func(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error
}
func NewTestService(c *Config) *TestService {
if c == nil {
defaultC := NewConfig()
c = &defaultC
}
gservice, err := NewService(*c)
if err != nil {
panic(err)
}
service := &TestService{
Service: gservice,
MetaClient: &internal.MetaClientMock{},
}
service.MetaClient.CreateRetentionPolicyFn = func(string, *meta.RetentionPolicySpec, bool) (*meta.RetentionPolicyInfo, error) {
return nil, nil
}
service.MetaClient.CreateDatabaseWithRetentionPolicyFn = func(string, *meta.RetentionPolicySpec) (*meta.DatabaseInfo, error) {
return nil, nil
}
service.MetaClient.DatabaseFn = func(string) *meta.DatabaseInfo {
return nil
}
service.MetaClient.RetentionPolicyFn = func(string, string) (*meta.RetentionPolicyInfo, error) {
return nil, nil
}
if testing.Verbose() {
service.Service.WithLogger(zap.New(
zap.NewTextEncoder(),
zap.Output(os.Stderr),
))
}
// Set the Meta Client and PointsWriter.
service.Service.MetaClient = service.MetaClient
service.Service.PointsWriter = service
return service
}
func (s *TestService) WritePointsPrivileged(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error {
return s.WritePointsFn(database, retentionPolicy, consistencyLevel, points)
}

View File

@@ -0,0 +1,66 @@
package httpd
import "github.com/influxdata/influxdb/monitor/diagnostics"
const (
// DefaultBindAddress is the default address to bind to.
DefaultBindAddress = ":8086"
// DefaultRealm is the default realm sent back when issuing a basic auth challenge.
DefaultRealm = "InfluxDB"
// DefaultBindSocket is the default unix socket to bind to.
DefaultBindSocket = "/var/run/influxdb.sock"
)
// Config represents a configuration for a HTTP service.
type Config struct {
Enabled bool `toml:"enabled"`
BindAddress string `toml:"bind-address"`
AuthEnabled bool `toml:"auth-enabled"`
LogEnabled bool `toml:"log-enabled"`
WriteTracing bool `toml:"write-tracing"`
PprofEnabled bool `toml:"pprof-enabled"`
HTTPSEnabled bool `toml:"https-enabled"`
HTTPSCertificate string `toml:"https-certificate"`
HTTPSPrivateKey string `toml:"https-private-key"`
MaxRowLimit int `toml:"max-row-limit"`
MaxConnectionLimit int `toml:"max-connection-limit"`
SharedSecret string `toml:"shared-secret"`
Realm string `toml:"realm"`
UnixSocketEnabled bool `toml:"unix-socket-enabled"`
BindSocket string `toml:"bind-socket"`
}
// NewConfig returns a new Config with default settings.
func NewConfig() Config {
return Config{
Enabled: true,
BindAddress: DefaultBindAddress,
LogEnabled: true,
PprofEnabled: true,
HTTPSEnabled: false,
HTTPSCertificate: "/etc/ssl/influxdb.pem",
MaxRowLimit: 0,
Realm: DefaultRealm,
UnixSocketEnabled: false,
BindSocket: DefaultBindSocket,
}
}
// Diagnostics returns a diagnostics representation of a subset of the Config.
func (c Config) Diagnostics() (*diagnostics.Diagnostics, error) {
if !c.Enabled {
return diagnostics.RowFromMap(map[string]interface{}{
"enabled": false,
}), nil
}
return diagnostics.RowFromMap(map[string]interface{}{
"enabled": true,
"bind-address": c.BindAddress,
"https-enabled": c.HTTPSEnabled,
"max-row-limit": c.MaxRowLimit,
"max-connection-limit": c.MaxConnectionLimit,
}), nil
}

View File

@@ -0,0 +1,55 @@
package httpd_test
import (
"testing"
"github.com/BurntSushi/toml"
"github.com/influxdata/influxdb/services/httpd"
)
func TestConfig_Parse(t *testing.T) {
// Parse configuration.
var c httpd.Config
if _, err := toml.Decode(`
enabled = true
bind-address = ":8080"
auth-enabled = true
log-enabled = true
write-tracing = true
https-enabled = true
https-certificate = "/dev/null"
unix-socket-enabled = true
bind-socket = "/var/run/influxdb.sock"
`, &c); err != nil {
t.Fatal(err)
}
// Validate configuration.
if c.Enabled != true {
t.Fatalf("unexpected enabled: %v", c.Enabled)
} else if c.BindAddress != ":8080" {
t.Fatalf("unexpected bind address: %s", c.BindAddress)
} else if c.AuthEnabled != true {
t.Fatalf("unexpected auth enabled: %v", c.AuthEnabled)
} else if c.LogEnabled != true {
t.Fatalf("unexpected log enabled: %v", c.LogEnabled)
} else if c.WriteTracing != true {
t.Fatalf("unexpected write tracing: %v", c.WriteTracing)
} else if c.HTTPSEnabled != true {
t.Fatalf("unexpected https enabled: %v", c.HTTPSEnabled)
} else if c.HTTPSCertificate != "/dev/null" {
t.Fatalf("unexpected https certificate: %v", c.HTTPSCertificate)
} else if c.UnixSocketEnabled != true {
t.Fatalf("unexpected unix socket enabled: %v", c.UnixSocketEnabled)
} else if c.BindSocket != "/var/run/influxdb.sock" {
t.Fatalf("unexpected bind unix socket: %v", c.BindSocket)
}
}
func TestConfig_WriteTracing(t *testing.T) {
c := httpd.Config{WriteTracing: true}
s := httpd.NewService(c)
if !s.Handler.Config.WriteTracing {
t.Fatalf("write tracing was not set")
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,706 @@
package httpd_test
import (
"bytes"
"errors"
"fmt"
"io"
"log"
"mime/multipart"
"net/http"
"net/http/httptest"
"net/url"
"strings"
"testing"
"time"
"github.com/influxdata/influxdb/internal"
"github.com/dgrijalva/jwt-go"
"github.com/influxdata/influxdb/influxql"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/services/httpd"
"github.com/influxdata/influxdb/services/meta"
)
// Ensure the handler returns results from a query (including nil results).
func TestHandler_Query(t *testing.T) {
h := NewHandler(false)
h.StatementExecutor.ExecuteStatementFn = func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
if stmt.String() != `SELECT * FROM bar` {
t.Fatalf("unexpected query: %s", stmt.String())
} else if ctx.Database != `foo` {
t.Fatalf("unexpected db: %s", ctx.Database)
}
ctx.Results <- &influxql.Result{StatementID: 1, Series: models.Rows([]*models.Row{{Name: "series0"}})}
ctx.Results <- &influxql.Result{StatementID: 2, Series: models.Rows([]*models.Row{{Name: "series1"}})}
return nil
}
w := httptest.NewRecorder()
h.ServeHTTP(w, MustNewJSONRequest("GET", "/query?db=foo&q=SELECT+*+FROM+bar", nil))
if w.Code != http.StatusOK {
t.Fatalf("unexpected status: %d", w.Code)
} else if body := strings.TrimSpace(w.Body.String()); body != `{"results":[{"statement_id":1,"series":[{"name":"series0"}]},{"statement_id":2,"series":[{"name":"series1"}]}]}` {
t.Fatalf("unexpected body: %s", body)
}
}
// Ensure the handler returns results from a query passed as a file.
func TestHandler_Query_File(t *testing.T) {
h := NewHandler(false)
h.StatementExecutor.ExecuteStatementFn = func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
if stmt.String() != `SELECT * FROM bar` {
t.Fatalf("unexpected query: %s", stmt.String())
} else if ctx.Database != `foo` {
t.Fatalf("unexpected db: %s", ctx.Database)
}
ctx.Results <- &influxql.Result{StatementID: 1, Series: models.Rows([]*models.Row{{Name: "series0"}})}
ctx.Results <- &influxql.Result{StatementID: 2, Series: models.Rows([]*models.Row{{Name: "series1"}})}
return nil
}
var body bytes.Buffer
writer := multipart.NewWriter(&body)
part, err := writer.CreateFormFile("q", "")
if err != nil {
t.Fatal(err)
}
io.WriteString(part, "SELECT * FROM bar")
if err := writer.Close(); err != nil {
t.Fatal(err)
}
r := MustNewJSONRequest("POST", "/query?db=foo", &body)
r.Header.Set("Content-Type", writer.FormDataContentType())
w := httptest.NewRecorder()
h.ServeHTTP(w, r)
if w.Code != http.StatusOK {
t.Fatalf("unexpected status: %d", w.Code)
} else if body := strings.TrimSpace(w.Body.String()); body != `{"results":[{"statement_id":1,"series":[{"name":"series0"}]},{"statement_id":2,"series":[{"name":"series1"}]}]}` {
t.Fatalf("unexpected body: %s", body)
}
}
// Test query with user authentication.
func TestHandler_Query_Auth(t *testing.T) {
// Create the handler to be tested.
h := NewHandler(true)
// Set mock meta client functions for the handler to use.
h.MetaClient.AdminUserExistsFn = func() bool { return true }
h.MetaClient.UserFn = func(username string) (meta.User, error) {
if username != "user1" {
return nil, meta.ErrUserNotFound
}
return &meta.UserInfo{
Name: "user1",
Hash: "abcd",
Admin: true,
}, nil
}
h.MetaClient.AuthenticateFn = func(u, p string) (meta.User, error) {
if u != "user1" {
return nil, fmt.Errorf("unexpected user: exp: user1, got: %s", u)
} else if p != "abcd" {
return nil, fmt.Errorf("unexpected password: exp: abcd, got: %s", p)
}
return h.MetaClient.User(u)
}
// Set mock query authorizer for handler to use.
h.QueryAuthorizer.AuthorizeQueryFn = func(u meta.User, query *influxql.Query, database string) error {
return nil
}
// Set mock statement executor for handler to use.
h.StatementExecutor.ExecuteStatementFn = func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
if stmt.String() != `SELECT * FROM bar` {
t.Fatalf("unexpected query: %s", stmt.String())
} else if ctx.Database != `foo` {
t.Fatalf("unexpected db: %s", ctx.Database)
}
ctx.Results <- &influxql.Result{StatementID: 1, Series: models.Rows([]*models.Row{{Name: "series0"}})}
ctx.Results <- &influxql.Result{StatementID: 2, Series: models.Rows([]*models.Row{{Name: "series1"}})}
return nil
}
// Test the handler with valid user and password in the URL parameters.
w := httptest.NewRecorder()
h.ServeHTTP(w, MustNewJSONRequest("GET", "/query?u=user1&p=abcd&db=foo&q=SELECT+*+FROM+bar", nil))
if w.Code != http.StatusOK {
t.Fatalf("unexpected status: %d: %s", w.Code, w.Body.String())
} else if body := strings.TrimSpace(w.Body.String()); body != `{"results":[{"statement_id":1,"series":[{"name":"series0"}]},{"statement_id":2,"series":[{"name":"series1"}]}]}` {
t.Fatalf("unexpected body: %s", body)
}
// Test the handler with valid user and password using basic auth.
w = httptest.NewRecorder()
r := MustNewJSONRequest("GET", "/query?db=foo&q=SELECT+*+FROM+bar", nil)
r.SetBasicAuth("user1", "abcd")
h.ServeHTTP(w, r)
if w.Code != http.StatusOK {
t.Fatalf("unexpected status: %d: %s", w.Code, w.Body.String())
} else if body := strings.TrimSpace(w.Body.String()); body != `{"results":[{"statement_id":1,"series":[{"name":"series0"}]},{"statement_id":2,"series":[{"name":"series1"}]}]}` {
t.Fatalf("unexpected body: %s", body)
}
// Test the handler with valid JWT bearer token.
req := MustNewJSONRequest("GET", "/query?db=foo&q=SELECT+*+FROM+bar", nil)
// Create a signed JWT token string and add it to the request header.
_, signedToken := MustJWTToken("user1", h.Config.SharedSecret, false)
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", signedToken))
w = httptest.NewRecorder()
h.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Fatalf("unexpected status: %d: %s", w.Code, w.Body.String())
} else if body := strings.TrimSpace(w.Body.String()); body != `{"results":[{"statement_id":1,"series":[{"name":"series0"}]},{"statement_id":2,"series":[{"name":"series1"}]}]}` {
t.Fatalf("unexpected body: %s", body)
}
// Test the handler with JWT token signed with invalid key.
req = MustNewJSONRequest("GET", "/query?db=foo&q=SELECT+*+FROM+bar", nil)
// Create a signed JWT token string and add it to the request header.
_, signedToken = MustJWTToken("user1", "invalid key", false)
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", signedToken))
w = httptest.NewRecorder()
h.ServeHTTP(w, req)
if w.Code != http.StatusUnauthorized {
t.Fatalf("unexpected status: %d: %s", w.Code, w.Body.String())
} else if body := strings.TrimSpace(w.Body.String()); body != `{"error":"signature is invalid"}` {
t.Fatalf("unexpected body: %s", body)
}
// Test handler with valid JWT token carrying non-existant user.
_, signedToken = MustJWTToken("bad_user", h.Config.SharedSecret, false)
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", signedToken))
w = httptest.NewRecorder()
h.ServeHTTP(w, req)
if w.Code != http.StatusUnauthorized {
t.Fatalf("unexpected status: %d: %s", w.Code, w.Body.String())
} else if body := strings.TrimSpace(w.Body.String()); body != `{"error":"user not found"}` {
t.Fatalf("unexpected body: %s", body)
}
// Test handler with expired JWT token.
_, signedToken = MustJWTToken("user1", h.Config.SharedSecret, true)
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", signedToken))
w = httptest.NewRecorder()
h.ServeHTTP(w, req)
if w.Code != http.StatusUnauthorized {
t.Fatalf("unexpected status: %d: %s", w.Code, w.Body.String())
} else if !strings.Contains(w.Body.String(), `{"error":"Token is expired`) {
t.Fatalf("unexpected body: %s", w.Body.String())
}
// Test handler with JWT token that has no expiration set.
token, _ := MustJWTToken("user1", h.Config.SharedSecret, false)
delete(token.Claims.(jwt.MapClaims), "exp")
signedToken, err := token.SignedString([]byte(h.Config.SharedSecret))
if err != nil {
t.Fatal(err)
}
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", signedToken))
w = httptest.NewRecorder()
h.ServeHTTP(w, req)
if w.Code != http.StatusUnauthorized {
t.Fatalf("unexpected status: %d: %s", w.Code, w.Body.String())
} else if body := strings.TrimSpace(w.Body.String()); body != `{"error":"token expiration required"}` {
t.Fatalf("unexpected body: %s", body)
}
// Test the handler with valid user and password in the url and invalid in
// basic auth (prioritize url).
w = httptest.NewRecorder()
r = MustNewJSONRequest("GET", "/query?u=user1&p=abcd&db=foo&q=SELECT+*+FROM+bar", nil)
r.SetBasicAuth("user1", "efgh")
h.ServeHTTP(w, r)
if w.Code != http.StatusOK {
t.Fatalf("unexpected status: %d: %s", w.Code, w.Body.String())
} else if body := strings.TrimSpace(w.Body.String()); body != `{"results":[{"statement_id":1,"series":[{"name":"series0"}]},{"statement_id":2,"series":[{"name":"series1"}]}]}` {
t.Fatalf("unexpected body: %s", body)
}
}
// Ensure the handler returns results from a query (including nil results).
func TestHandler_QueryRegex(t *testing.T) {
h := NewHandler(false)
h.StatementExecutor.ExecuteStatementFn = func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
if stmt.String() != `SELECT * FROM test WHERE url =~ /http\:\/\/www.akamai\.com/` {
t.Fatalf("unexpected query: %s", stmt.String())
} else if ctx.Database != `test` {
t.Fatalf("unexpected db: %s", ctx.Database)
}
ctx.Results <- nil
return nil
}
w := httptest.NewRecorder()
h.ServeHTTP(w, MustNewRequest("GET", "/query?db=test&q=SELECT%20%2A%20FROM%20test%20WHERE%20url%20%3D~%20%2Fhttp%5C%3A%5C%2F%5C%2Fwww.akamai%5C.com%2F", nil))
}
// Ensure the handler merges results from the same statement.
func TestHandler_Query_MergeResults(t *testing.T) {
h := NewHandler(false)
h.StatementExecutor.ExecuteStatementFn = func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
ctx.Results <- &influxql.Result{StatementID: 1, Series: models.Rows([]*models.Row{{Name: "series0"}})}
ctx.Results <- &influxql.Result{StatementID: 1, Series: models.Rows([]*models.Row{{Name: "series1"}})}
return nil
}
w := httptest.NewRecorder()
h.ServeHTTP(w, MustNewJSONRequest("GET", "/query?db=foo&q=SELECT+*+FROM+bar", nil))
if w.Code != http.StatusOK {
t.Fatalf("unexpected status: %d", w.Code)
} else if body := strings.TrimSpace(w.Body.String()); body != `{"results":[{"statement_id":1,"series":[{"name":"series0"},{"name":"series1"}]}]}` {
t.Fatalf("unexpected body: %s", body)
}
}
// Ensure the handler merges results from the same statement.
func TestHandler_Query_MergeEmptyResults(t *testing.T) {
h := NewHandler(false)
h.StatementExecutor.ExecuteStatementFn = func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
ctx.Results <- &influxql.Result{StatementID: 1, Series: models.Rows{}}
ctx.Results <- &influxql.Result{StatementID: 1, Series: models.Rows([]*models.Row{{Name: "series1"}})}
return nil
}
w := httptest.NewRecorder()
h.ServeHTTP(w, MustNewJSONRequest("GET", "/query?db=foo&q=SELECT+*+FROM+bar", nil))
if w.Code != http.StatusOK {
t.Fatalf("unexpected status: %d", w.Code)
} else if body := strings.TrimSpace(w.Body.String()); body != `{"results":[{"statement_id":1,"series":[{"name":"series1"}]}]}` {
t.Fatalf("unexpected body: %s", body)
}
}
// Ensure the handler can parse chunked and chunk size query parameters.
func TestHandler_Query_Chunked(t *testing.T) {
h := NewHandler(false)
h.StatementExecutor.ExecuteStatementFn = func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
if ctx.ChunkSize != 2 {
t.Fatalf("unexpected chunk size: %d", ctx.ChunkSize)
}
ctx.Results <- &influxql.Result{StatementID: 1, Series: models.Rows([]*models.Row{{Name: "series0"}})}
ctx.Results <- &influxql.Result{StatementID: 1, Series: models.Rows([]*models.Row{{Name: "series1"}})}
return nil
}
w := httptest.NewRecorder()
h.ServeHTTP(w, MustNewJSONRequest("GET", "/query?db=foo&q=SELECT+*+FROM+bar&chunked=true&chunk_size=2", nil))
if w.Code != http.StatusOK {
t.Fatalf("unexpected status: %d", w.Code)
} else if w.Body.String() != `{"results":[{"statement_id":1,"series":[{"name":"series0"}]}]}
{"results":[{"statement_id":1,"series":[{"name":"series1"}]}]}
` {
t.Fatalf("unexpected body: %s", w.Body.String())
}
}
// Ensure the handler can accept an async query.
func TestHandler_Query_Async(t *testing.T) {
done := make(chan struct{})
h := NewHandler(false)
h.StatementExecutor.ExecuteStatementFn = func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
if stmt.String() != `SELECT * FROM bar` {
t.Fatalf("unexpected query: %s", stmt.String())
} else if ctx.Database != `foo` {
t.Fatalf("unexpected db: %s", ctx.Database)
}
ctx.Results <- &influxql.Result{StatementID: 1, Series: models.Rows([]*models.Row{{Name: "series0"}})}
ctx.Results <- &influxql.Result{StatementID: 2, Series: models.Rows([]*models.Row{{Name: "series1"}})}
close(done)
return nil
}
w := httptest.NewRecorder()
h.ServeHTTP(w, MustNewJSONRequest("GET", "/query?db=foo&q=SELECT+*+FROM+bar&async=true", nil))
if w.Code != http.StatusNoContent {
t.Fatalf("unexpected status: %d", w.Code)
} else if body := strings.TrimSpace(w.Body.String()); body != `` {
t.Fatalf("unexpected body: %s", body)
}
// Wait to make sure the async query runs and completes.
timer := time.NewTimer(100 * time.Millisecond)
defer timer.Stop()
select {
case <-timer.C:
t.Fatal("timeout while waiting for async query to complete")
case <-done:
}
}
// Ensure the handler returns a status 400 if the query is not passed in.
func TestHandler_Query_ErrQueryRequired(t *testing.T) {
h := NewHandler(false)
w := httptest.NewRecorder()
h.ServeHTTP(w, MustNewJSONRequest("GET", "/query", nil))
if w.Code != http.StatusBadRequest {
t.Fatalf("unexpected status: %d", w.Code)
} else if body := strings.TrimSpace(w.Body.String()); body != `{"error":"missing required parameter \"q\""}` {
t.Fatalf("unexpected body: %s", body)
}
}
// Ensure the handler returns a status 400 if the query cannot be parsed.
func TestHandler_Query_ErrInvalidQuery(t *testing.T) {
h := NewHandler(false)
w := httptest.NewRecorder()
h.ServeHTTP(w, MustNewJSONRequest("GET", "/query?q=SELECT", nil))
if w.Code != http.StatusBadRequest {
t.Fatalf("unexpected status: %d", w.Code)
} else if body := strings.TrimSpace(w.Body.String()); body != `{"error":"error parsing query: found EOF, expected identifier, string, number, bool at line 1, char 8"}` {
t.Fatalf("unexpected body: %s", body)
}
}
// Ensure the handler returns an appropriate 401 or 403 status when authentication or authorization fails.
func TestHandler_Query_ErrAuthorize(t *testing.T) {
h := NewHandler(true)
h.QueryAuthorizer.AuthorizeQueryFn = func(u meta.User, q *influxql.Query, db string) error {
return errors.New("marker")
}
h.MetaClient.AdminUserExistsFn = func() bool { return true }
h.MetaClient.AuthenticateFn = func(u, p string) (meta.User, error) {
users := []meta.UserInfo{
{
Name: "admin",
Hash: "admin",
Admin: true,
},
{
Name: "user1",
Hash: "abcd",
Privileges: map[string]influxql.Privilege{
"db0": influxql.ReadPrivilege,
},
},
}
for _, user := range users {
if u == user.Name {
if p == user.Hash {
return &user, nil
}
return nil, meta.ErrAuthenticate
}
}
return nil, meta.ErrUserNotFound
}
for i, tt := range []struct {
user string
password string
query string
code int
}{
{
query: "/query?q=SHOW+DATABASES",
code: http.StatusUnauthorized,
},
{
user: "user1",
password: "abcd",
query: "/query?q=SHOW+DATABASES",
code: http.StatusForbidden,
},
{
user: "user2",
password: "abcd",
query: "/query?q=SHOW+DATABASES",
code: http.StatusUnauthorized,
},
} {
w := httptest.NewRecorder()
r := MustNewJSONRequest("GET", tt.query, nil)
params := r.URL.Query()
if tt.user != "" {
params.Set("u", tt.user)
}
if tt.password != "" {
params.Set("p", tt.password)
}
r.URL.RawQuery = params.Encode()
h.ServeHTTP(w, r)
if w.Code != tt.code {
t.Errorf("%d. unexpected status: got=%d exp=%d\noutput: %s", i, w.Code, tt.code, w.Body.String())
}
}
}
// Ensure the handler returns a status 200 if an error is returned in the result.
func TestHandler_Query_ErrResult(t *testing.T) {
h := NewHandler(false)
h.StatementExecutor.ExecuteStatementFn = func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
return errors.New("measurement not found")
}
w := httptest.NewRecorder()
h.ServeHTTP(w, MustNewJSONRequest("GET", "/query?db=foo&q=SHOW+SERIES+from+bin", nil))
if w.Code != http.StatusOK {
t.Fatalf("unexpected status: %d", w.Code)
} else if body := strings.TrimSpace(w.Body.String()); body != `{"results":[{"statement_id":0,"error":"measurement not found"}]}` {
t.Fatalf("unexpected body: %s", body)
}
}
// Ensure that closing the HTTP connection causes the query to be interrupted.
func TestHandler_Query_CloseNotify(t *testing.T) {
// Avoid leaking a goroutine when this fails.
done := make(chan struct{})
defer close(done)
interrupted := make(chan struct{})
h := NewHandler(false)
h.StatementExecutor.ExecuteStatementFn = func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
select {
case <-ctx.InterruptCh:
case <-done:
}
close(interrupted)
return nil
}
s := httptest.NewServer(h)
defer s.Close()
// Parse the URL and generate a query request.
u, err := url.Parse(s.URL)
if err != nil {
t.Fatal(err)
}
u.Path = "/query"
values := url.Values{}
values.Set("q", "SELECT * FROM cpu")
values.Set("db", "db0")
values.Set("rp", "rp0")
values.Set("chunked", "true")
u.RawQuery = values.Encode()
req, err := http.NewRequest("GET", u.String(), nil)
if err != nil {
t.Fatal(err)
}
// Perform the request and retrieve the response.
resp, err := http.DefaultClient.Do(req)
if err != nil {
t.Fatal(err)
}
// Validate that the interrupted channel has NOT been closed yet.
timer := time.NewTimer(100 * time.Millisecond)
select {
case <-interrupted:
timer.Stop()
t.Fatal("query interrupted unexpectedly")
case <-timer.C:
}
// Close the response body which should abort the query in the handler.
resp.Body.Close()
// The query should abort within 100 milliseconds.
timer.Reset(100 * time.Millisecond)
select {
case <-interrupted:
timer.Stop()
case <-timer.C:
t.Fatal("timeout while waiting for query to abort")
}
}
// Ensure the handler handles ping requests correctly.
// TODO: This should be expanded to verify the MetaClient check in servePing is working correctly
func TestHandler_Ping(t *testing.T) {
h := NewHandler(false)
w := httptest.NewRecorder()
h.ServeHTTP(w, MustNewRequest("GET", "/ping", nil))
if w.Code != http.StatusNoContent {
t.Fatalf("unexpected status: %d", w.Code)
}
h.ServeHTTP(w, MustNewRequest("HEAD", "/ping", nil))
if w.Code != http.StatusNoContent {
t.Fatalf("unexpected status: %d", w.Code)
}
}
// Ensure the handler returns the version correctly from the different endpoints.
func TestHandler_Version(t *testing.T) {
h := NewHandler(false)
h.StatementExecutor.ExecuteStatementFn = func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
return nil
}
tests := []struct {
method string
endpoint string
body io.Reader
}{
{
method: "GET",
endpoint: "/ping",
body: nil,
},
{
method: "GET",
endpoint: "/query?db=foo&q=SELECT+*+FROM+bar",
body: nil,
},
{
method: "POST",
endpoint: "/write",
body: bytes.NewReader(make([]byte, 10)),
},
{
method: "GET",
endpoint: "/notfound",
body: nil,
},
}
for _, test := range tests {
w := httptest.NewRecorder()
h.ServeHTTP(w, MustNewRequest(test.method, test.endpoint, test.body))
if v, ok := w.HeaderMap["X-Influxdb-Version"]; ok {
if v[0] != "0.0.0" {
t.Fatalf("unexpected version: %s", v)
}
} else {
t.Fatalf("Header entry 'X-Influxdb-Version' not present")
}
}
}
// Ensure the handler handles status requests correctly.
func TestHandler_Status(t *testing.T) {
h := NewHandler(false)
w := httptest.NewRecorder()
h.ServeHTTP(w, MustNewRequest("GET", "/status", nil))
if w.Code != http.StatusNoContent {
t.Fatalf("unexpected status: %d", w.Code)
}
h.ServeHTTP(w, MustNewRequest("HEAD", "/status", nil))
if w.Code != http.StatusNoContent {
t.Fatalf("unexpected status: %d", w.Code)
}
}
// Ensure write endpoint can handle bad requests
func TestHandler_HandleBadRequestBody(t *testing.T) {
b := bytes.NewReader(make([]byte, 10))
h := NewHandler(false)
w := httptest.NewRecorder()
h.ServeHTTP(w, MustNewRequest("POST", "/write", b))
if w.Code != http.StatusBadRequest {
t.Fatalf("unexpected status: %d", w.Code)
}
}
// Ensure X-Forwarded-For header writes the correct log message.
func TestHandler_XForwardedFor(t *testing.T) {
var buf bytes.Buffer
h := NewHandler(false)
h.CLFLogger = log.New(&buf, "", 0)
req := MustNewRequest("GET", "/query", nil)
req.Header.Set("X-Forwarded-For", "192.168.0.1")
req.RemoteAddr = "127.0.0.1"
h.ServeHTTP(httptest.NewRecorder(), req)
parts := strings.Split(buf.String(), " ")
if parts[0] != "192.168.0.1,127.0.0.1" {
t.Errorf("unexpected host ip address: %s", parts[0])
}
}
// NewHandler represents a test wrapper for httpd.Handler.
type Handler struct {
*httpd.Handler
MetaClient *internal.MetaClientMock
StatementExecutor HandlerStatementExecutor
QueryAuthorizer HandlerQueryAuthorizer
}
// NewHandler returns a new instance of Handler.
func NewHandler(requireAuthentication bool) *Handler {
config := httpd.NewConfig()
config.AuthEnabled = requireAuthentication
config.SharedSecret = "super secret key"
h := &Handler{
Handler: httpd.NewHandler(config),
}
h.MetaClient = &internal.MetaClientMock{}
h.Handler.MetaClient = h.MetaClient
h.Handler.QueryExecutor = influxql.NewQueryExecutor()
h.Handler.QueryExecutor.StatementExecutor = &h.StatementExecutor
h.Handler.QueryAuthorizer = &h.QueryAuthorizer
h.Handler.Version = "0.0.0"
return h
}
// HandlerStatementExecutor is a mock implementation of Handler.StatementExecutor.
type HandlerStatementExecutor struct {
ExecuteStatementFn func(stmt influxql.Statement, ctx influxql.ExecutionContext) error
}
func (e *HandlerStatementExecutor) ExecuteStatement(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
return e.ExecuteStatementFn(stmt, ctx)
}
// HandlerQueryAuthorizer is a mock implementation of Handler.QueryAuthorizer.
type HandlerQueryAuthorizer struct {
AuthorizeQueryFn func(u meta.User, query *influxql.Query, database string) error
}
func (a *HandlerQueryAuthorizer) AuthorizeQuery(u meta.User, query *influxql.Query, database string) error {
return a.AuthorizeQueryFn(u, query, database)
}
// MustNewRequest returns a new HTTP request. Panic on error.
func MustNewRequest(method, urlStr string, body io.Reader) *http.Request {
r, err := http.NewRequest(method, urlStr, body)
if err != nil {
panic(err.Error())
}
return r
}
// MustNewRequest returns a new HTTP request with the content type set. Panic on error.
func MustNewJSONRequest(method, urlStr string, body io.Reader) *http.Request {
r := MustNewRequest(method, urlStr, body)
r.Header.Set("Accept", "application/json")
return r
}
// MustJWTToken returns a new JWT token and signed string or panics trying.
func MustJWTToken(username, secret string, expired bool) (*jwt.Token, string) {
token := jwt.New(jwt.GetSigningMethod("HS512"))
token.Claims.(jwt.MapClaims)["username"] = username
if expired {
token.Claims.(jwt.MapClaims)["exp"] = time.Now().Add(-time.Second).Unix()
} else {
token.Claims.(jwt.MapClaims)["exp"] = time.Now().Add(time.Minute * 10).Unix()
}
signed, err := token.SignedString([]byte(secret))
if err != nil {
panic(err)
}
return token, signed
}

View File

@@ -0,0 +1,51 @@
package httpd
import (
"net"
"sync"
)
// LimitListener returns a Listener that accepts at most n simultaneous
// connections from the provided Listener and will drop extra connections.
func LimitListener(l net.Listener, n int) net.Listener {
return &limitListener{Listener: l, sem: make(chan struct{}, n)}
}
// limitListener is a listener that limits the number of active connections
// at any given time.
type limitListener struct {
net.Listener
sem chan struct{}
}
func (l *limitListener) release() {
<-l.sem
}
func (l *limitListener) Accept() (net.Conn, error) {
for {
c, err := l.Listener.Accept()
if err != nil {
return nil, err
}
select {
case l.sem <- struct{}{}:
return &limitListenerConn{Conn: c, release: l.release}, nil
default:
c.Close()
}
}
}
type limitListenerConn struct {
net.Conn
releaseOnce sync.Once
release func()
}
func (l *limitListenerConn) Close() error {
err := l.Conn.Close()
l.releaseOnce.Do(l.release)
return err
}

View File

@@ -0,0 +1,108 @@
package httpd_test
import (
"io"
"net"
"sync"
"testing"
"time"
"github.com/influxdata/influxdb/services/httpd"
)
type fakeListener struct {
AcceptFn func() (net.Conn, error)
}
func (l *fakeListener) Accept() (net.Conn, error) {
if l.AcceptFn != nil {
return l.AcceptFn()
}
return &fakeConn{}, nil
}
func (*fakeListener) Close() error { return nil }
func (*fakeListener) Addr() net.Addr { return nil }
type fakeConn struct {
closed bool
}
func (*fakeConn) Read([]byte) (int, error) { return 0, io.EOF }
func (*fakeConn) Write(b []byte) (int, error) { return len(b), nil }
func (c *fakeConn) Close() error {
c.closed = true
return nil
}
func (*fakeConn) LocalAddr() net.Addr { return nil }
func (*fakeConn) RemoteAddr() net.Addr { return nil }
func (*fakeConn) SetDeadline(time.Time) error { return nil }
func (*fakeConn) SetReadDeadline(time.Time) error { return nil }
func (*fakeConn) SetWriteDeadline(time.Time) error { return nil }
func TestLimitListener(t *testing.T) {
conns := make(chan net.Conn, 2)
l := httpd.LimitListener(&fakeListener{
AcceptFn: func() (net.Conn, error) {
select {
case c := <-conns:
if c != nil {
return c, nil
}
default:
}
return nil, io.EOF
},
}, 1)
c1, c2 := &fakeConn{}, &fakeConn{}
conns <- c1
conns <- c2
var c net.Conn
var err error
if c, err = l.Accept(); err != nil {
t.Fatalf("expected accept to succeed: %s", err)
}
if _, err = l.Accept(); err != io.EOF {
t.Fatalf("expected eof, got %s", err)
} else if !c2.closed {
t.Fatalf("expected connection to be automatically closed")
}
c.Close()
conns <- &fakeConn{}
if _, err = l.Accept(); err != nil {
t.Fatalf("expeced accept to succeed: %s", err)
}
}
func BenchmarkLimitListener(b *testing.B) {
var wg sync.WaitGroup
wg.Add(b.N)
l := httpd.LimitListener(&fakeListener{}, b.N)
errC := make(chan error)
for i := 0; i < b.N; i++ {
go func() {
defer wg.Done()
c, err := l.Accept()
if err != nil {
errC <- err
return
}
c.Close()
}()
}
go func() {
wg.Wait()
close(errC)
}()
for err := range errC {
if err != nil {
b.Error(err)
}
}
}

View File

@@ -0,0 +1,336 @@
package httpd
import (
"archive/tar"
"bytes"
"compress/gzip"
"fmt"
"io"
"net/http"
httppprof "net/http/pprof"
"runtime/pprof"
"sort"
"strconv"
"text/tabwriter"
"time"
"github.com/influxdata/influxdb/models"
)
// handleProfiles determines which profile to return to the requester.
func (h *Handler) handleProfiles(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/debug/pprof/cmdline":
httppprof.Cmdline(w, r)
case "/debug/pprof/profile":
httppprof.Profile(w, r)
case "/debug/pprof/symbol":
httppprof.Symbol(w, r)
case "/debug/pprof/all":
h.archiveProfilesAndQueries(w, r)
default:
httppprof.Index(w, r)
}
}
// prof describes a profile name and a debug value, or in the case of a CPU
// profile, the number of seconds to collect the profile for.
type prof struct {
Name string
Debug int64
}
// archiveProfilesAndQueries collects the following profiles:
// - goroutine profile
// - heap profile
// - blocking profile
// - (optionally) CPU profile
//
// It also collects the following query results:
//
// - SHOW SHARDS
// - SHOW STATS
// - SHOW DIAGNOSTICS
//
// All information is added to a tar archive and then compressed, before being
// returned to the requester as an archive file. Where profiles support debug
// parameters, the profile is collected with debug=1. To optionally include a
// CPU profile, the requester should provide a `cpu` query parameter, and can
// also provide a `seconds` parameter to specify a non-default profile
// collection time. The default CPU profile collection time is 30 seconds.
//
// Example request including CPU profile:
//
// http://localhost:8086/debug/pprof/all?cpu=true&seconds=45
//
// The value after the `cpu` query parameter is not actually important, as long
// as there is something there.
//
func (h *Handler) archiveProfilesAndQueries(w http.ResponseWriter, r *http.Request) {
var allProfs = []*prof{
{Name: "goroutine", Debug: 1},
{Name: "block", Debug: 1},
{Name: "heap", Debug: 1},
}
// Capture a CPU profile?
if r.FormValue("cpu") != "" {
profile := &prof{Name: "cpu"}
// For a CPU profile we'll use the Debug field to indicate the number of
// seconds to capture the profile for.
profile.Debug, _ = strconv.ParseInt(r.FormValue("seconds"), 10, 64)
if profile.Debug <= 0 {
profile.Debug = 30
}
allProfs = append([]*prof{profile}, allProfs...) // CPU profile first.
}
var (
resp bytes.Buffer // Temporary buffer for entire archive.
buf bytes.Buffer // Temporary buffer for each profile/query result.
)
gz := gzip.NewWriter(&resp)
tw := tar.NewWriter(gz)
// Collect and write out profiles.
for _, profile := range allProfs {
if profile.Name == "cpu" {
if err := pprof.StartCPUProfile(&buf); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
sleep(w, time.Duration(profile.Debug)*time.Second)
pprof.StopCPUProfile()
} else {
prof := pprof.Lookup(profile.Name)
if prof == nil {
http.Error(w, "unable to find profile "+profile.Name, http.StatusInternalServerError)
return
}
if err := prof.WriteTo(&buf, int(profile.Debug)); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
}
// Write the profile file's header.
err := tw.WriteHeader(&tar.Header{
Name: profile.Name + ".txt",
Mode: 0600,
Size: int64(buf.Len()),
})
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
// Write the profile file's data.
if _, err := tw.Write(buf.Bytes()); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
// Reset the buffer for the next profile.
buf.Reset()
}
// Collect and write out the queries.
var allQueries = []struct {
name string
fn func() ([]*models.Row, error)
}{
{"shards", h.showShards},
{"stats", h.showStats},
{"diagnostics", h.showDiagnostics},
}
tabW := tabwriter.NewWriter(&buf, 8, 8, 1, '\t', 0)
for _, query := range allQueries {
rows, err := query.fn()
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
for i, row := range rows {
var out []byte
// Write the columns
for _, col := range row.Columns {
out = append(out, []byte(col+"\t")...)
}
out = append(out, '\n')
if _, err := tabW.Write(out); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
// Write all the values
for _, val := range row.Values {
out = out[:0]
for _, v := range val {
out = append(out, []byte(fmt.Sprintf("%v\t", v))...)
}
out = append(out, '\n')
if _, err := tabW.Write(out); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
}
// Write a final newline
if i < len(rows)-1 {
if _, err := tabW.Write([]byte("\n")); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
}
}
if err := tabW.Flush(); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
err = tw.WriteHeader(&tar.Header{
Name: query.name + ".txt",
Mode: 0600,
Size: int64(buf.Len()),
})
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
// Write the query file's data.
if _, err := tw.Write(buf.Bytes()); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
// Reset the buffer for the next query.
buf.Reset()
}
// Close the tar writer.
if err := tw.Close(); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
// Close the gzip writer.
if err := gz.Close(); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
// Return the gzipped archive.
w.Header().Set("Content-Disposition", "attachment; filename=profiles.tar.gz")
w.Header().Set("Content-Type", "application/gzip")
io.Copy(w, &resp) // Nothing we can really do about an error at this point.
}
// showShards generates the same values that a StatementExecutor would if a
// SHOW SHARDS query was executed.
func (h *Handler) showShards() ([]*models.Row, error) {
dis := h.MetaClient.Databases()
rows := []*models.Row{}
for _, di := range dis {
row := &models.Row{Columns: []string{"id", "database", "retention_policy", "shard_group", "start_time", "end_time", "expiry_time", "owners"}, Name: di.Name}
for _, rpi := range di.RetentionPolicies {
for _, sgi := range rpi.ShardGroups {
// Shards associated with deleted shard groups are effectively deleted.
// Don't list them.
if sgi.Deleted() {
continue
}
for _, si := range sgi.Shards {
ownerIDs := make([]uint64, len(si.Owners))
for i, owner := range si.Owners {
ownerIDs[i] = owner.NodeID
}
row.Values = append(row.Values, []interface{}{
si.ID,
di.Name,
rpi.Name,
sgi.ID,
sgi.StartTime.UTC().Format(time.RFC3339),
sgi.EndTime.UTC().Format(time.RFC3339),
sgi.EndTime.Add(rpi.Duration).UTC().Format(time.RFC3339),
joinUint64(ownerIDs),
})
}
}
}
rows = append(rows, row)
}
return rows, nil
}
// showDiagnostics generates the same values that a StatementExecutor would if a
// SHOW DIAGNOSTICS query was executed.
func (h *Handler) showDiagnostics() ([]*models.Row, error) {
diags, err := h.Monitor.Diagnostics()
if err != nil {
return nil, err
}
// Get a sorted list of diagnostics keys.
sortedKeys := make([]string, 0, len(diags))
for k := range diags {
sortedKeys = append(sortedKeys, k)
}
sort.Strings(sortedKeys)
rows := make([]*models.Row, 0, len(diags))
for _, k := range sortedKeys {
row := &models.Row{Name: k}
row.Columns = diags[k].Columns
row.Values = diags[k].Rows
rows = append(rows, row)
}
return rows, nil
}
// showStats generates the same values that a StatementExecutor would if a
// SHOW STATS query was executed.
func (h *Handler) showStats() ([]*models.Row, error) {
stats, err := h.Monitor.Statistics(nil)
if err != nil {
return nil, err
}
var rows []*models.Row
for _, stat := range stats {
row := &models.Row{Name: stat.Name, Tags: stat.Tags}
values := make([]interface{}, 0, len(stat.Values))
for _, k := range stat.ValueNames() {
row.Columns = append(row.Columns, k)
values = append(values, stat.Values[k])
}
row.Values = [][]interface{}{values}
rows = append(rows, row)
}
return rows, nil
}
// joinUint64 returns a comma-delimited string of uint64 numbers.
func joinUint64(a []uint64) string {
var buf []byte // Could take a guess at initial size here.
for i, x := range a {
if i != 0 {
buf = append(buf, ',')
}
buf = strconv.AppendUint(buf, x, 10)
}
return string(buf)
}
// Taken from net/http/pprof/pprof.go
func sleep(w http.ResponseWriter, d time.Duration) {
var clientGone <-chan bool
if cn, ok := w.(http.CloseNotifier); ok {
clientGone = cn.CloseNotify()
}
select {
case <-time.After(d):
case <-clientGone:
}
}

View File

@@ -0,0 +1,140 @@
package httpd
import (
"container/list"
"fmt"
"net"
"net/http"
"sync"
"sync/atomic"
"github.com/influxdata/influxdb/services/meta"
)
type RequestInfo struct {
IPAddr string
Username string
}
type RequestStats struct {
Writes int64 `json:"writes"`
Queries int64 `json:"queries"`
}
func (r *RequestInfo) String() string {
if r.Username != "" {
return fmt.Sprintf("%s:%s", r.Username, r.IPAddr)
}
return r.IPAddr
}
type RequestProfile struct {
tracker *RequestTracker
elem *list.Element
mu sync.RWMutex
Requests map[RequestInfo]*RequestStats
}
func (p *RequestProfile) AddWrite(info RequestInfo) {
p.add(info, p.addWrite)
}
func (p *RequestProfile) AddQuery(info RequestInfo) {
p.add(info, p.addQuery)
}
func (p *RequestProfile) add(info RequestInfo, fn func(*RequestStats)) {
// Look for a request entry for this request.
p.mu.RLock()
st, ok := p.Requests[info]
p.mu.RUnlock()
if ok {
fn(st)
return
}
// There is no entry in the request tracker. Create one.
p.mu.Lock()
if st, ok := p.Requests[info]; ok {
// Something else created this entry while we were waiting for the lock.
p.mu.Unlock()
fn(st)
return
}
st = &RequestStats{}
p.Requests[info] = st
p.mu.Unlock()
fn(st)
}
func (p *RequestProfile) addWrite(st *RequestStats) {
atomic.AddInt64(&st.Writes, 1)
}
func (p *RequestProfile) addQuery(st *RequestStats) {
atomic.AddInt64(&st.Queries, 1)
}
// Stop informs the RequestTracker to stop collecting statistics for this
// profile.
func (p *RequestProfile) Stop() {
p.tracker.mu.Lock()
p.tracker.profiles.Remove(p.elem)
p.tracker.mu.Unlock()
}
type RequestTracker struct {
mu sync.RWMutex
profiles *list.List
}
func NewRequestTracker() *RequestTracker {
return &RequestTracker{
profiles: list.New(),
}
}
func (rt *RequestTracker) TrackRequests() *RequestProfile {
// Perform the memory allocation outside of the lock.
profile := &RequestProfile{
Requests: make(map[RequestInfo]*RequestStats),
tracker: rt,
}
rt.mu.Lock()
profile.elem = rt.profiles.PushBack(profile)
rt.mu.Unlock()
return profile
}
func (rt *RequestTracker) Add(req *http.Request, user meta.User) {
rt.mu.RLock()
if rt.profiles.Len() == 0 {
rt.mu.RUnlock()
return
}
defer rt.mu.RUnlock()
var info RequestInfo
host, _, err := net.SplitHostPort(req.RemoteAddr)
if err != nil {
return
}
info.IPAddr = host
if user != nil {
info.Username = user.ID()
}
// Add the request info to the profiles.
for p := rt.profiles.Front(); p != nil; p = p.Next() {
profile := p.Value.(*RequestProfile)
if req.URL.Path == "/query" {
profile.AddQuery(info)
} else if req.URL.Path == "/write" {
profile.AddWrite(info)
}
}
}

View File

@@ -0,0 +1,166 @@
package httpd
import (
"fmt"
"net"
"net/http"
"strconv"
"strings"
"time"
"github.com/influxdata/influxdb/influxql"
)
// responseLogger is wrapper of http.ResponseWriter that keeps track of its HTTP status
// code and body size
type responseLogger struct {
w http.ResponseWriter
status int
size int
}
func (l *responseLogger) CloseNotify() <-chan bool {
if notifier, ok := l.w.(http.CloseNotifier); ok {
return notifier.CloseNotify()
}
// needed for response recorder for testing
return make(<-chan bool)
}
func (l *responseLogger) Header() http.Header {
return l.w.Header()
}
func (l *responseLogger) Flush() {
l.w.(http.Flusher).Flush()
}
func (l *responseLogger) Write(b []byte) (int, error) {
if l.status == 0 {
// Set status if WriteHeader has not been called
l.status = http.StatusOK
}
size, err := l.w.Write(b)
l.size += size
return size, err
}
func (l *responseLogger) WriteHeader(s int) {
l.w.WriteHeader(s)
l.status = s
}
func (l *responseLogger) Status() int {
if l.status == 0 {
// This can happen if we never actually write data, but only set response headers.
l.status = http.StatusOK
}
return l.status
}
func (l *responseLogger) Size() int {
return l.size
}
// redact any occurrence of a password parameter, 'p'
func redactPassword(r *http.Request) {
q := r.URL.Query()
if p := q.Get("p"); p != "" {
q.Set("p", "[REDACTED]")
r.URL.RawQuery = q.Encode()
}
}
// Common Log Format: http://en.wikipedia.org/wiki/Common_Log_Format
// buildLogLine creates a common log format
// in addition to the common fields, we also append referrer, user agent,
// request ID and response time (microseconds)
// ie, in apache mod_log_config terms:
// %h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"" %L %D
func buildLogLine(l *responseLogger, r *http.Request, start time.Time) string {
redactPassword(r)
username := parseUsername(r)
host, _, err := net.SplitHostPort(r.RemoteAddr)
if err != nil {
host = r.RemoteAddr
}
if xff := r.Header["X-Forwarded-For"]; xff != nil {
addrs := append(xff, host)
host = strings.Join(addrs, ",")
}
uri := r.URL.RequestURI()
referer := r.Referer()
userAgent := r.UserAgent()
return fmt.Sprintf(`%s - %s [%s] "%s %s %s" %s %s "%s" "%s" %s %d`,
host,
detect(username, "-"),
start.Format("02/Jan/2006:15:04:05 -0700"),
r.Method,
uri,
r.Proto,
detect(strconv.Itoa(l.Status()), "-"),
strconv.Itoa(l.Size()),
detect(referer, "-"),
detect(userAgent, "-"),
r.Header.Get("Request-Id"),
// response time, report in microseconds because this is consistent
// with apache's %D parameter in mod_log_config
int64(time.Since(start)/time.Microsecond))
}
// detect detects the first presence of a non blank string and returns it
func detect(values ...string) string {
for _, v := range values {
if v != "" {
return v
}
}
return ""
}
// parses the username either from the url or auth header
func parseUsername(r *http.Request) string {
var (
username = ""
url = r.URL
)
// get username from the url if passed there
if url.User != nil {
if name := url.User.Username(); name != "" {
username = name
}
}
// Try to get the username from the query param 'u'
q := url.Query()
if u := q.Get("u"); u != "" {
username = u
}
// Try to get it from the authorization header if set there
if username == "" {
if u, _, ok := r.BasicAuth(); ok {
username = u
}
}
return username
}
// sanitize redacts passwords from query string for logging.
func sanitize(r *http.Request) {
values := r.URL.Query()
for i, q := range values["q"] {
values["q"][i] = influxql.Sanitize(q)
}
r.URL.RawQuery = values.Encode()
}

View File

@@ -0,0 +1,181 @@
package httpd
import (
"encoding/csv"
"encoding/json"
"io"
"net/http"
"strconv"
"time"
"github.com/influxdata/influxdb/models"
)
// ResponseWriter is an interface for writing a response.
type ResponseWriter interface {
// WriteResponse writes a response.
WriteResponse(resp Response) (int, error)
http.ResponseWriter
}
// NewResponseWriter creates a new ResponseWriter based on the Accept header
// in the request that wraps the ResponseWriter.
func NewResponseWriter(w http.ResponseWriter, r *http.Request) ResponseWriter {
pretty := r.URL.Query().Get("pretty") == "true"
rw := &responseWriter{ResponseWriter: w}
switch r.Header.Get("Accept") {
case "application/csv", "text/csv":
w.Header().Add("Content-Type", "text/csv")
rw.formatter = &csvFormatter{statementID: -1, Writer: w}
case "application/json":
fallthrough
default:
w.Header().Add("Content-Type", "application/json")
rw.formatter = &jsonFormatter{Pretty: pretty, Writer: w}
}
return rw
}
// WriteError is a convenience function for writing an error response to the ResponseWriter.
func WriteError(w ResponseWriter, err error) (int, error) {
return w.WriteResponse(Response{Err: err})
}
// responseWriter is an implementation of ResponseWriter.
type responseWriter struct {
formatter interface {
WriteResponse(resp Response) (int, error)
}
http.ResponseWriter
}
// WriteResponse writes the response using the formatter.
func (w *responseWriter) WriteResponse(resp Response) (int, error) {
return w.formatter.WriteResponse(resp)
}
// Flush flushes the ResponseWriter if it has a Flush() method.
func (w *responseWriter) Flush() {
if w, ok := w.ResponseWriter.(http.Flusher); ok {
w.Flush()
}
}
// CloseNotify calls CloseNotify on the underlying http.ResponseWriter if it
// exists. Otherwise, it returns a nil channel that will never notify.
func (w *responseWriter) CloseNotify() <-chan bool {
if notifier, ok := w.ResponseWriter.(http.CloseNotifier); ok {
return notifier.CloseNotify()
}
return nil
}
type jsonFormatter struct {
io.Writer
Pretty bool
}
func (w *jsonFormatter) WriteResponse(resp Response) (n int, err error) {
var b []byte
if w.Pretty {
b, err = json.MarshalIndent(resp, "", " ")
} else {
b, err = json.Marshal(resp)
}
if err != nil {
n, err = io.WriteString(w, err.Error())
} else {
n, err = w.Write(b)
}
w.Write([]byte("\n"))
n++
return n, err
}
type csvFormatter struct {
io.Writer
statementID int
columns []string
}
func (w *csvFormatter) WriteResponse(resp Response) (n int, err error) {
csv := csv.NewWriter(w)
for _, result := range resp.Results {
if result.StatementID != w.statementID {
// If there are no series in the result, skip past this result.
if len(result.Series) == 0 {
continue
}
// Set the statement id and print out a newline if this is not the first statement.
if w.statementID >= 0 {
// Flush the csv writer and write a newline.
csv.Flush()
if err := csv.Error(); err != nil {
return n, err
}
out, err := io.WriteString(w, "\n")
if err != nil {
return n, err
}
n += out
}
w.statementID = result.StatementID
// Print out the column headers from the first series.
w.columns = make([]string, 2+len(result.Series[0].Columns))
w.columns[0] = "name"
w.columns[1] = "tags"
copy(w.columns[2:], result.Series[0].Columns)
if err := csv.Write(w.columns); err != nil {
return n, err
}
}
for _, row := range result.Series {
w.columns[0] = row.Name
if len(row.Tags) > 0 {
w.columns[1] = string(models.NewTags(row.Tags).HashKey()[1:])
} else {
w.columns[1] = ""
}
for _, values := range row.Values {
for i, value := range values {
if value == nil {
w.columns[i+2] = ""
continue
}
switch v := value.(type) {
case float64:
w.columns[i+2] = strconv.FormatFloat(v, 'f', -1, 64)
case int64:
w.columns[i+2] = strconv.FormatInt(v, 10)
case string:
w.columns[i+2] = v
case bool:
if v {
w.columns[i+2] = "true"
} else {
w.columns[i+2] = "false"
}
case time.Time:
w.columns[i+2] = strconv.FormatInt(v.UnixNano(), 10)
case *float64, *int64, *string, *bool:
w.columns[i+2] = ""
}
}
csv.Write(w.columns)
}
}
}
csv.Flush()
if err := csv.Error(); err != nil {
return n, err
}
return n, nil
}

View File

@@ -0,0 +1,61 @@
package httpd_test
import (
"net/http"
"net/http/httptest"
"net/url"
"testing"
"time"
"github.com/influxdata/influxdb/influxql"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/services/httpd"
)
func TestResponseWriter_CSV(t *testing.T) {
header := make(http.Header)
header.Set("Accept", "text/csv")
r := &http.Request{
Header: header,
URL: &url.URL{},
}
w := httptest.NewRecorder()
writer := httpd.NewResponseWriter(w, r)
writer.WriteResponse(httpd.Response{
Results: []*influxql.Result{
{
StatementID: 0,
Series: []*models.Row{
{
Name: "cpu",
Tags: map[string]string{
"host": "server01",
"region": "uswest",
},
Columns: []string{"time", "value"},
Values: [][]interface{}{
{time.Unix(0, 10), float64(2.5)},
{time.Unix(0, 20), int64(5)},
{time.Unix(0, 30), nil},
{time.Unix(0, 40), "foobar"},
{time.Unix(0, 50), true},
{time.Unix(0, 60), false},
},
},
},
},
},
})
if got, want := w.Body.String(), `name,tags,time,value
cpu,"host=server01,region=uswest",10,2.5
cpu,"host=server01,region=uswest",20,5
cpu,"host=server01,region=uswest",30,
cpu,"host=server01,region=uswest",40,foobar
cpu,"host=server01,region=uswest",50,true
cpu,"host=server01,region=uswest",60,false
`; got != want {
t.Errorf("unexpected output:\n\ngot=%v\nwant=%s", got, want)
}
}

View File

@@ -0,0 +1,214 @@
// Package httpd implements the HTTP service and REST API for InfluxDB.
package httpd // import "github.com/influxdata/influxdb/services/httpd"
import (
"crypto/tls"
"fmt"
"net"
"net/http"
"os"
"path"
"runtime"
"strings"
"syscall"
"time"
"github.com/influxdata/influxdb/models"
"github.com/uber-go/zap"
)
// statistics gathered by the httpd package.
const (
statRequest = "req" // Number of HTTP requests served
statQueryRequest = "queryReq" // Number of query requests served
statWriteRequest = "writeReq" // Number of write requests serverd
statPingRequest = "pingReq" // Number of ping requests served
statStatusRequest = "statusReq" // Number of status requests served
statWriteRequestBytesReceived = "writeReqBytes" // Sum of all bytes in write requests
statQueryRequestBytesTransmitted = "queryRespBytes" // Sum of all bytes returned in query reponses
statPointsWrittenOK = "pointsWrittenOK" // Number of points written OK
statPointsWrittenDropped = "pointsWrittenDropped" // Number of points dropped by the storage engine
statPointsWrittenFail = "pointsWrittenFail" // Number of points that failed to be written
statAuthFail = "authFail" // Number of authentication failures
statRequestDuration = "reqDurationNs" // Number of (wall-time) nanoseconds spent inside requests
statQueryRequestDuration = "queryReqDurationNs" // Number of (wall-time) nanoseconds spent inside query requests
statWriteRequestDuration = "writeReqDurationNs" // Number of (wall-time) nanoseconds spent inside write requests
statRequestsActive = "reqActive" // Number of currently active requests
statWriteRequestsActive = "writeReqActive" // Number of currently active write requests
statClientError = "clientError" // Number of HTTP responses due to client error
statServerError = "serverError" // Number of HTTP responses due to server error
)
// Service manages the listener and handler for an HTTP endpoint.
type Service struct {
ln net.Listener
addr string
https bool
cert string
key string
limit int
err chan error
unixSocket bool
bindSocket string
unixSocketListener net.Listener
Handler *Handler
Logger zap.Logger
}
// NewService returns a new instance of Service.
func NewService(c Config) *Service {
s := &Service{
addr: c.BindAddress,
https: c.HTTPSEnabled,
cert: c.HTTPSCertificate,
key: c.HTTPSPrivateKey,
limit: c.MaxConnectionLimit,
err: make(chan error),
unixSocket: c.UnixSocketEnabled,
bindSocket: c.BindSocket,
Handler: NewHandler(c),
Logger: zap.New(zap.NullEncoder()),
}
if s.key == "" {
s.key = s.cert
}
s.Handler.Logger = s.Logger
return s
}
// Open starts the service.
func (s *Service) Open() error {
s.Logger.Info("Starting HTTP service")
s.Logger.Info(fmt.Sprint("Authentication enabled:", s.Handler.Config.AuthEnabled))
// Open listener.
if s.https {
cert, err := tls.LoadX509KeyPair(s.cert, s.key)
if err != nil {
return err
}
listener, err := tls.Listen("tcp", s.addr, &tls.Config{
Certificates: []tls.Certificate{cert},
})
if err != nil {
return err
}
s.Logger.Info(fmt.Sprint("Listening on HTTPS:", listener.Addr().String()))
s.ln = listener
} else {
listener, err := net.Listen("tcp", s.addr)
if err != nil {
return err
}
s.Logger.Info(fmt.Sprint("Listening on HTTP:", listener.Addr().String()))
s.ln = listener
}
// Open unix socket listener.
if s.unixSocket {
if runtime.GOOS == "windows" {
return fmt.Errorf("unable to use unix socket on windows")
}
if err := os.MkdirAll(path.Dir(s.bindSocket), 0777); err != nil {
return err
}
if err := syscall.Unlink(s.bindSocket); err != nil && !os.IsNotExist(err) {
return err
}
listener, err := net.Listen("unix", s.bindSocket)
if err != nil {
return err
}
s.Logger.Info(fmt.Sprint("Listening on unix socket:", listener.Addr().String()))
s.unixSocketListener = listener
go s.serveUnixSocket()
}
// Enforce a connection limit if one has been given.
if s.limit > 0 {
s.ln = LimitListener(s.ln, s.limit)
}
// wait for the listeners to start
timeout := time.Now().Add(time.Second)
for {
if s.ln.Addr() != nil {
break
}
if time.Now().After(timeout) {
return fmt.Errorf("unable to open without http listener running")
}
time.Sleep(10 * time.Millisecond)
}
// Begin listening for requests in a separate goroutine.
go s.serveTCP()
return nil
}
// Close closes the underlying listener.
func (s *Service) Close() error {
if s.ln != nil {
if err := s.ln.Close(); err != nil {
return err
}
}
if s.unixSocketListener != nil {
if err := s.unixSocketListener.Close(); err != nil {
return err
}
}
return nil
}
// WithLogger sets the logger for the service.
func (s *Service) WithLogger(log zap.Logger) {
s.Logger = log.With(zap.String("service", "httpd"))
s.Handler.Logger = s.Logger
}
// Err returns a channel for fatal errors that occur on the listener.
func (s *Service) Err() <-chan error { return s.err }
// Addr returns the listener's address. Returns nil if listener is closed.
func (s *Service) Addr() net.Addr {
if s.ln != nil {
return s.ln.Addr()
}
return nil
}
// Statistics returns statistics for periodic monitoring.
func (s *Service) Statistics(tags map[string]string) []models.Statistic {
return s.Handler.Statistics(models.NewTags(map[string]string{"bind": s.addr}).Merge(tags).Map())
}
// serveTCP serves the handler from the TCP listener.
func (s *Service) serveTCP() {
s.serve(s.ln)
}
// serveUnixSocket serves the handler from the unix socket listener.
func (s *Service) serveUnixSocket() {
s.serve(s.unixSocketListener)
}
// serve serves the handler from the listener.
func (s *Service) serve(listener net.Listener) {
// The listener was closed so exit
// See https://github.com/golang/go/issues/4373
err := http.Serve(listener, s.Handler)
if err != nil && !strings.Contains(err.Error(), "closed") {
s.err <- fmt.Errorf("listener failed: addr=%s, err=%s", s.Addr(), err)
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,47 @@
package meta
import (
"errors"
"time"
"github.com/influxdata/influxdb/monitor/diagnostics"
)
const (
// DefaultLeaseDuration is the default duration for leases.
DefaultLeaseDuration = 60 * time.Second
// DefaultLoggingEnabled determines if log messages are printed for the meta service.
DefaultLoggingEnabled = true
)
// Config represents the meta configuration.
type Config struct {
Dir string `toml:"dir"`
RetentionAutoCreate bool `toml:"retention-autocreate"`
LoggingEnabled bool `toml:"logging-enabled"`
}
// NewConfig builds a new configuration with default values.
func NewConfig() *Config {
return &Config{
RetentionAutoCreate: true,
LoggingEnabled: DefaultLoggingEnabled,
}
}
// Validate returns an error if the config is invalid.
func (c *Config) Validate() error {
if c.Dir == "" {
return errors.New("Meta.Dir must be specified")
}
return nil
}
// Diagnostics returns a diagnostics representation of a subset of the Config.
func (c *Config) Diagnostics() (*diagnostics.Diagnostics, error) {
return diagnostics.RowFromMap(map[string]interface{}{
"dir": c.Dir,
}), nil
}

View File

@@ -0,0 +1,26 @@
package meta_test
import (
"testing"
"github.com/BurntSushi/toml"
"github.com/influxdata/influxdb/services/meta"
)
func TestConfig_Parse(t *testing.T) {
// Parse configuration.
var c meta.Config
if _, err := toml.Decode(`
dir = "/tmp/foo"
logging-enabled = false
`, &c); err != nil {
t.Fatal(err)
}
// Validate configuration.
if c.Dir != "/tmp/foo" {
t.Fatalf("unexpected dir: %s", c.Dir)
} else if c.LoggingEnabled {
t.Fatalf("unexpected logging enabled: %v", c.LoggingEnabled)
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,64 @@
package meta
import (
"sort"
"time"
"testing"
)
func TestShardGroupSort(t *testing.T) {
sg1 := ShardGroupInfo{
ID: 1,
StartTime: time.Unix(1000, 0),
EndTime: time.Unix(1100, 0),
TruncatedAt: time.Unix(1050, 0),
}
sg2 := ShardGroupInfo{
ID: 2,
StartTime: time.Unix(1000, 0),
EndTime: time.Unix(1100, 0),
}
sgs := ShardGroupInfos{sg2, sg1}
sort.Sort(sgs)
if sgs[len(sgs)-1].ID != 2 {
t.Fatal("unstable sort for ShardGroupInfos")
}
}
func Test_Data_RetentionPolicy_MarshalBinary(t *testing.T) {
zeroTime := time.Time{}
epoch := time.Unix(0, 0).UTC()
startTime := zeroTime
sgi := &ShardGroupInfo{
StartTime: startTime,
}
isgi := sgi.marshal()
sgi.unmarshal(isgi)
if got, exp := sgi.StartTime.UTC(), epoch.UTC(); got != exp {
t.Errorf("unexpected start time. got: %s, exp: %s", got, exp)
}
startTime = time.Unix(0, 0)
endTime := startTime.Add(time.Hour * 24)
sgi = &ShardGroupInfo{
StartTime: startTime,
EndTime: endTime,
}
isgi = sgi.marshal()
sgi.unmarshal(isgi)
if got, exp := sgi.StartTime.UTC(), startTime.UTC(); got != exp {
t.Errorf("unexpected start time. got: %s, exp: %s", got, exp)
}
if got, exp := sgi.EndTime.UTC(), endTime.UTC(); got != exp {
t.Errorf("unexpected end time. got: %s, exp: %s", got, exp)
}
if got, exp := sgi.DeletedAt.UTC(), zeroTime.UTC(); got != exp {
t.Errorf("unexpected DeletedAt time. got: %s, exp: %s", got, exp)
}
}

View File

@@ -0,0 +1,204 @@
package meta_test
import (
"reflect"
"testing"
"time"
"github.com/influxdata/influxdb/influxql"
"github.com/influxdata/influxdb/services/meta"
)
func Test_Data_DropDatabase(t *testing.T) {
data := &meta.Data{
Databases: []meta.DatabaseInfo{
{Name: "db0"},
{Name: "db1"},
{Name: "db2"},
{Name: "db4"},
{Name: "db5"},
},
Users: []meta.UserInfo{
{Name: "user1", Privileges: map[string]influxql.Privilege{"db1": influxql.ReadPrivilege, "db2": influxql.ReadPrivilege}},
{Name: "user2", Privileges: map[string]influxql.Privilege{"db2": influxql.ReadPrivilege}},
},
}
// Dropping the first database removes it from the Data object.
expDbs := make([]meta.DatabaseInfo, 4)
copy(expDbs, data.Databases[1:])
if err := data.DropDatabase("db0"); err != nil {
t.Fatal(err)
} else if got, exp := data.Databases, expDbs; !reflect.DeepEqual(got, exp) {
t.Fatalf("got %v, expected %v", got, exp)
}
// Dropping a middle database removes it from the data object.
expDbs = []meta.DatabaseInfo{{Name: "db1"}, {Name: "db2"}, {Name: "db5"}}
if err := data.DropDatabase("db4"); err != nil {
t.Fatal(err)
} else if got, exp := data.Databases, expDbs; !reflect.DeepEqual(got, exp) {
t.Fatalf("got %v, expected %v", got, exp)
}
// Dropping the last database removes it from the data object.
expDbs = []meta.DatabaseInfo{{Name: "db1"}, {Name: "db2"}}
if err := data.DropDatabase("db5"); err != nil {
t.Fatal(err)
} else if got, exp := data.Databases, expDbs; !reflect.DeepEqual(got, exp) {
t.Fatalf("got %v, expected %v", got, exp)
}
// Dropping a database also drops all the user privileges associated with
// it.
expUsers := []meta.UserInfo{
{Name: "user1", Privileges: map[string]influxql.Privilege{"db1": influxql.ReadPrivilege}},
{Name: "user2", Privileges: map[string]influxql.Privilege{}},
}
if err := data.DropDatabase("db2"); err != nil {
t.Fatal(err)
} else if got, exp := data.Users, expUsers; !reflect.DeepEqual(got, exp) {
t.Fatalf("got %v, expected %v", got, exp)
}
}
func Test_Data_CreateRetentionPolicy(t *testing.T) {
data := meta.Data{}
err := data.CreateDatabase("foo")
if err != nil {
t.Fatal(err)
}
err = data.CreateRetentionPolicy("foo", &meta.RetentionPolicyInfo{
Name: "bar",
ReplicaN: 1,
Duration: 24 * time.Hour,
}, false)
if err != nil {
t.Fatal(err)
}
rp, err := data.RetentionPolicy("foo", "bar")
if err != nil {
t.Fatal(err)
}
if rp == nil {
t.Fatal("creation of retention policy failed")
}
// Try to recreate the same RP with default set to true, should fail
err = data.CreateRetentionPolicy("foo", &meta.RetentionPolicyInfo{
Name: "bar",
ReplicaN: 1,
Duration: 24 * time.Hour,
}, true)
if err == nil || err != meta.ErrRetentionPolicyConflict {
t.Fatalf("unexpected error. got: %v, exp: %s", err, meta.ErrRetentionPolicyConflict)
}
// Creating the same RP with the same specifications should succeed
err = data.CreateRetentionPolicy("foo", &meta.RetentionPolicyInfo{
Name: "bar",
ReplicaN: 1,
Duration: 24 * time.Hour,
}, false)
if err != nil {
t.Fatal(err)
}
}
func TestData_AdminUserExists(t *testing.T) {
data := meta.Data{}
// No users means no admin.
if data.AdminUserExists() {
t.Fatal("no admin user should exist")
}
// Add a non-admin user.
if err := data.CreateUser("user1", "a", false); err != nil {
t.Fatal(err)
}
if got, exp := data.AdminUserExists(), false; got != exp {
t.Fatalf("got %v, expected %v", got, exp)
}
// Add an admin user.
if err := data.CreateUser("admin1", "a", true); err != nil {
t.Fatal(err)
}
if got, exp := data.AdminUserExists(), true; got != exp {
t.Fatalf("got %v, expected %v", got, exp)
}
// Remove the original user
if err := data.DropUser("user1"); err != nil {
t.Fatal(err)
}
if got, exp := data.AdminUserExists(), true; got != exp {
t.Fatalf("got %v, expected %v", got, exp)
}
// Add another admin
if err := data.CreateUser("admin2", "a", true); err != nil {
t.Fatal(err)
}
if got, exp := data.AdminUserExists(), true; got != exp {
t.Fatalf("got %v, expected %v", got, exp)
}
// Revoke privileges of the first admin
if err := data.SetAdminPrivilege("admin1", false); err != nil {
t.Fatal(err)
}
if got, exp := data.AdminUserExists(), true; got != exp {
t.Fatalf("got %v, expected %v", got, exp)
}
// Add user1 back.
if err := data.CreateUser("user1", "a", false); err != nil {
t.Fatal(err)
}
// Revoke remaining admin.
if err := data.SetAdminPrivilege("admin2", false); err != nil {
t.Fatal(err)
}
// No longer any admins
if got, exp := data.AdminUserExists(), false; got != exp {
t.Fatalf("got %v, expected %v", got, exp)
}
// Make user1 an admin
if err := data.SetAdminPrivilege("user1", true); err != nil {
t.Fatal(err)
}
if got, exp := data.AdminUserExists(), true; got != exp {
t.Fatalf("got %v, expected %v", got, exp)
}
// Drop user1...
if err := data.DropUser("user1"); err != nil {
t.Fatal(err)
}
if got, exp := data.AdminUserExists(), false; got != exp {
t.Fatalf("got %v, expected %v", got, exp)
}
}
func TestUserInfo_AuthorizeDatabase(t *testing.T) {
emptyUser := &meta.UserInfo{}
if !emptyUser.AuthorizeDatabase(influxql.NoPrivileges, "anydb") {
t.Fatal("expected NoPrivileges to be authorized but it wasn't")
}
if emptyUser.AuthorizeDatabase(influxql.ReadPrivilege, "anydb") {
t.Fatal("expected ReadPrivilege to prevent authorization, but it was authorized")
}
adminUser := &meta.UserInfo{Admin: true}
if !adminUser.AuthorizeDatabase(influxql.AllPrivileges, "anydb") {
t.Fatalf("expected admin to be authorized but it wasn't")
}
}

View File

@@ -0,0 +1,115 @@
package meta
import (
"errors"
"fmt"
)
var (
// ErrStoreOpen is returned when opening an already open store.
ErrStoreOpen = errors.New("store already open")
// ErrStoreClosed is returned when closing an already closed store.
ErrStoreClosed = errors.New("raft store already closed")
)
var (
// ErrDatabaseExists is returned when creating an already existing database.
ErrDatabaseExists = errors.New("database already exists")
// ErrDatabaseNotExists is returned when operating on a not existing database.
ErrDatabaseNotExists = errors.New("database does not exist")
// ErrDatabaseNameRequired is returned when creating a database without a name.
ErrDatabaseNameRequired = errors.New("database name required")
// ErrInvalidName is returned when attempting to create a database or retention policy with an invalid name
ErrInvalidName = errors.New("invalid name")
)
var (
// ErrRetentionPolicyExists is returned when creating an already existing policy.
ErrRetentionPolicyExists = errors.New("retention policy already exists")
// ErrRetentionPolicyNotFound is returned when an expected policy wasn't found.
ErrRetentionPolicyNotFound = errors.New("retention policy not found")
// ErrRetentionPolicyDefault is returned when attempting a prohibited operation
// on a default retention policy.
ErrRetentionPolicyDefault = errors.New("retention policy is default")
// ErrRetentionPolicyRequired is returned when a retention policy is required
// by an operation, but a nil policy was passed.
ErrRetentionPolicyRequired = errors.New("retention policy required")
// ErrRetentionPolicyNameRequired is returned when creating a policy without a name.
ErrRetentionPolicyNameRequired = errors.New("retention policy name required")
// ErrRetentionPolicyNameExists is returned when renaming a policy to
// the same name as another existing policy.
ErrRetentionPolicyNameExists = errors.New("retention policy name already exists")
// ErrRetentionPolicyDurationTooLow is returned when updating a retention
// policy that has a duration lower than the allowed minimum.
ErrRetentionPolicyDurationTooLow = fmt.Errorf("retention policy duration must be at least %s", MinRetentionPolicyDuration)
// ErrRetentionPolicyConflict is returned when creating a retention policy conflicts
// with an existing policy.
ErrRetentionPolicyConflict = errors.New("retention policy conflicts with an existing policy")
// ErrIncompatibleDurations is returned when creating or updating a
// retention policy that has a duration lower than the current shard
// duration.
ErrIncompatibleDurations = errors.New("retention policy duration must be greater than the shard duration")
// ErrReplicationFactorTooLow is returned when the replication factor is not in an
// acceptable range.
ErrReplicationFactorTooLow = errors.New("replication factor must be greater than 0")
)
var (
// ErrShardGroupExists is returned when creating an already existing shard group.
ErrShardGroupExists = errors.New("shard group already exists")
// ErrShardGroupNotFound is returned when mutating a shard group that doesn't exist.
ErrShardGroupNotFound = errors.New("shard group not found")
// ErrShardNotReplicated is returned if the node requested to be dropped has
// the last copy of a shard present and the force keyword was not used
ErrShardNotReplicated = errors.New("shard not replicated")
)
var (
// ErrContinuousQueryExists is returned when creating an already existing continuous query.
ErrContinuousQueryExists = errors.New("continuous query already exists")
// ErrContinuousQueryNotFound is returned when removing a continuous query that doesn't exist.
ErrContinuousQueryNotFound = errors.New("continuous query not found")
)
var (
// ErrSubscriptionExists is returned when creating an already existing subscription.
ErrSubscriptionExists = errors.New("subscription already exists")
// ErrSubscriptionNotFound is returned when removing a subscription that doesn't exist.
ErrSubscriptionNotFound = errors.New("subscription not found")
)
// ErrInvalidSubscriptionURL is returned when the subscription's destination URL is invalid.
func ErrInvalidSubscriptionURL(url string) error {
return fmt.Errorf("invalid subscription URL: %s", url)
}
var (
// ErrUserExists is returned when creating an already existing user.
ErrUserExists = errors.New("user already exists")
// ErrUserNotFound is returned when mutating a user that doesn't exist.
ErrUserNotFound = errors.New("user not found")
// ErrUsernameRequired is returned when creating a user without a username.
ErrUsernameRequired = errors.New("username required")
// ErrAuthenticate is returned when authentication fails.
ErrAuthenticate = errors.New("authentication failed")
)

View File

@@ -0,0 +1,10 @@
// +build !windows
package meta
import "os"
// renameFile will rename the source to target using os function.
func renameFile(oldpath, newpath string) error {
return os.Rename(oldpath, newpath)
}

View File

@@ -0,0 +1,14 @@
package meta
import "os"
// renameFile will rename the source to target using os function. If target exists it will be removed before renaming.
func renameFile(oldpath, newpath string) error {
if _, err := os.Stat(newpath); err == nil {
if err = os.Remove(newpath); nil != err {
return err
}
}
return os.Rename(oldpath, newpath)
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,393 @@
package meta;
//========================================================================
//
// Metadata
//
//========================================================================
message Data {
required uint64 Term = 1;
required uint64 Index = 2;
required uint64 ClusterID = 3;
repeated NodeInfo Nodes = 4;
repeated DatabaseInfo Databases = 5;
repeated UserInfo Users = 6;
required uint64 MaxNodeID = 7;
required uint64 MaxShardGroupID = 8;
required uint64 MaxShardID = 9;
// added for 0.10.0
repeated NodeInfo DataNodes = 10;
repeated NodeInfo MetaNodes = 11;
}
message NodeInfo {
required uint64 ID = 1;
required string Host = 2;
optional string TCPHost = 3;
}
message DatabaseInfo {
required string Name = 1;
required string DefaultRetentionPolicy = 2;
repeated RetentionPolicyInfo RetentionPolicies = 3;
repeated ContinuousQueryInfo ContinuousQueries = 4;
}
message RetentionPolicySpec {
optional string Name = 1;
optional int64 Duration = 2;
optional int64 ShardGroupDuration = 3;
optional uint32 ReplicaN = 4;
}
message RetentionPolicyInfo {
required string Name = 1;
required int64 Duration = 2;
required int64 ShardGroupDuration = 3;
required uint32 ReplicaN = 4;
repeated ShardGroupInfo ShardGroups = 5;
repeated SubscriptionInfo Subscriptions = 6;
}
message ShardGroupInfo {
required uint64 ID = 1;
required int64 StartTime = 2;
required int64 EndTime = 3;
required int64 DeletedAt = 4;
repeated ShardInfo Shards = 5;
optional int64 TruncatedAt = 6;
}
message ShardInfo {
required uint64 ID = 1;
repeated uint64 OwnerIDs = 2 [deprecated=true];
repeated ShardOwner Owners = 3;
}
message SubscriptionInfo{
required string Name = 1;
required string Mode = 2;
repeated string Destinations = 3;
}
message ShardOwner {
required uint64 NodeID = 1;
}
message ContinuousQueryInfo {
required string Name = 1;
required string Query = 2;
}
message UserInfo {
required string Name = 1;
required string Hash = 2;
required bool Admin = 3;
repeated UserPrivilege Privileges = 4;
}
message UserPrivilege {
required string Database = 1;
required int32 Privilege = 2;
}
//========================================================================
//
// COMMANDS
//
//========================================================================
message Command {
extensions 100 to max;
enum Type {
CreateNodeCommand = 1;
DeleteNodeCommand = 2;
CreateDatabaseCommand = 3;
DropDatabaseCommand = 4;
CreateRetentionPolicyCommand = 5;
DropRetentionPolicyCommand = 6;
SetDefaultRetentionPolicyCommand = 7;
UpdateRetentionPolicyCommand = 8;
CreateShardGroupCommand = 9;
DeleteShardGroupCommand = 10;
CreateContinuousQueryCommand = 11;
DropContinuousQueryCommand = 12;
CreateUserCommand = 13;
DropUserCommand = 14;
UpdateUserCommand = 15;
SetPrivilegeCommand = 16;
SetDataCommand = 17;
SetAdminPrivilegeCommand = 18;
UpdateNodeCommand = 19;
CreateSubscriptionCommand = 21;
DropSubscriptionCommand = 22;
RemovePeerCommand = 23;
CreateMetaNodeCommand = 24;
CreateDataNodeCommand = 25;
UpdateDataNodeCommand = 26;
DeleteMetaNodeCommand = 27;
DeleteDataNodeCommand = 28;
SetMetaNodeCommand = 29;
DropShardCommand = 30;
}
required Type type = 1;
}
// This isn't used in >= 0.10.0. Kept around for upgrade purposes. Instead
// look at CreateDataNodeCommand and CreateMetaNodeCommand
message CreateNodeCommand {
extend Command {
optional CreateNodeCommand command = 101;
}
required string Host = 1;
required uint64 Rand = 2;
}
message DeleteNodeCommand {
extend Command {
optional DeleteNodeCommand command = 102;
}
required uint64 ID = 1;
required bool Force = 2;
}
message CreateDatabaseCommand {
extend Command {
optional CreateDatabaseCommand command = 103;
}
required string Name = 1;
optional RetentionPolicyInfo RetentionPolicy = 2;
}
message DropDatabaseCommand {
extend Command {
optional DropDatabaseCommand command = 104;
}
required string Name = 1;
}
message CreateRetentionPolicyCommand {
extend Command {
optional CreateRetentionPolicyCommand command = 105;
}
required string Database = 1;
required RetentionPolicyInfo RetentionPolicy = 2;
}
message DropRetentionPolicyCommand {
extend Command {
optional DropRetentionPolicyCommand command = 106;
}
required string Database = 1;
required string Name = 2;
}
message SetDefaultRetentionPolicyCommand {
extend Command {
optional SetDefaultRetentionPolicyCommand command = 107;
}
required string Database = 1;
required string Name = 2;
}
message UpdateRetentionPolicyCommand {
extend Command {
optional UpdateRetentionPolicyCommand command = 108;
}
required string Database = 1;
required string Name = 2;
optional string NewName = 3;
optional int64 Duration = 4;
optional uint32 ReplicaN = 5;
}
message CreateShardGroupCommand {
extend Command {
optional CreateShardGroupCommand command = 109;
}
required string Database = 1;
required string Policy = 2;
required int64 Timestamp = 3;
}
message DeleteShardGroupCommand {
extend Command {
optional DeleteShardGroupCommand command = 110;
}
required string Database = 1;
required string Policy = 2;
required uint64 ShardGroupID = 3;
}
message CreateContinuousQueryCommand {
extend Command {
optional CreateContinuousQueryCommand command = 111;
}
required string Database = 1;
required string Name = 2;
required string Query = 3;
}
message DropContinuousQueryCommand {
extend Command {
optional DropContinuousQueryCommand command = 112;
}
required string Database = 1;
required string Name = 2;
}
message CreateUserCommand {
extend Command {
optional CreateUserCommand command = 113;
}
required string Name = 1;
required string Hash = 2;
required bool Admin = 3;
}
message DropUserCommand {
extend Command {
optional DropUserCommand command = 114;
}
required string Name = 1;
}
message UpdateUserCommand {
extend Command {
optional UpdateUserCommand command = 115;
}
required string Name = 1;
required string Hash = 2;
}
message SetPrivilegeCommand {
extend Command {
optional SetPrivilegeCommand command = 116;
}
required string Username = 1;
required string Database = 2;
required int32 Privilege = 3;
}
message SetDataCommand {
extend Command {
optional SetDataCommand command = 117;
}
required Data Data = 1;
}
message SetAdminPrivilegeCommand {
extend Command {
optional SetAdminPrivilegeCommand command = 118;
}
required string Username = 1;
required bool Admin = 2;
}
message UpdateNodeCommand {
extend Command {
optional UpdateNodeCommand command = 119;
}
required uint64 ID = 1;
required string Host = 2;
}
message CreateSubscriptionCommand {
extend Command {
optional CreateSubscriptionCommand command = 121;
}
required string Name = 1;
required string Database = 2;
required string RetentionPolicy = 3;
required string Mode = 4;
repeated string Destinations = 5;
}
message DropSubscriptionCommand {
extend Command {
optional DropSubscriptionCommand command = 122;
}
required string Name = 1;
required string Database = 2;
required string RetentionPolicy = 3;
}
message RemovePeerCommand {
extend Command {
optional RemovePeerCommand command = 123;
}
optional uint64 ID = 1;
required string Addr = 2;
}
message CreateMetaNodeCommand {
extend Command {
optional CreateMetaNodeCommand command = 124;
}
required string HTTPAddr = 1;
required string TCPAddr = 2;
required uint64 Rand = 3;
}
message CreateDataNodeCommand {
extend Command {
optional CreateDataNodeCommand command = 125;
}
required string HTTPAddr = 1;
required string TCPAddr = 2;
}
message UpdateDataNodeCommand {
extend Command {
optional UpdateDataNodeCommand command = 126;
}
required uint64 ID = 1;
required string Host = 2;
required string TCPHost = 3;
}
message DeleteMetaNodeCommand {
extend Command {
optional DeleteMetaNodeCommand command = 127;
}
required uint64 ID = 1;
}
message DeleteDataNodeCommand {
extend Command {
optional DeleteDataNodeCommand command = 128;
}
required uint64 ID = 1;
}
message Response {
required bool OK = 1;
optional string Error = 2;
optional uint64 Index = 3;
}
// SetMetaNodeCommand is for the initial metanode in a cluster or
// if the single host restarts and its hostname changes, this will update it
message SetMetaNodeCommand {
extend Command {
optional SetMetaNodeCommand command = 129;
}
required string HTTPAddr = 1;
required string TCPAddr = 2;
required uint64 Rand = 3;
}
message DropShardCommand {
extend Command {
optional DropShardCommand command = 130;
}
required uint64 ID = 1;
}

View File

@@ -0,0 +1,7 @@
package meta
import "golang.org/x/crypto/bcrypt"
func init() {
bcryptCost = bcrypt.MinCost
}

View File

@@ -0,0 +1,117 @@
package meta
import (
"fmt"
"github.com/influxdata/influxdb/influxql"
)
// QueryAuthorizer determines whether a user is authorized to execute a given query.
type QueryAuthorizer struct {
Client *Client
}
// NewQueryAuthorizer returns a new instance of QueryAuthorizer.
func NewQueryAuthorizer(c *Client) *QueryAuthorizer {
return &QueryAuthorizer{
Client: c,
}
}
// AuthorizeQuery authorizes u to execute q on database.
// Database can be "" for queries that do not require a database.
// If no user is provided it will return an error unless the query's first statement is to create
// a root user.
func (a *QueryAuthorizer) AuthorizeQuery(u User, query *influxql.Query, database string) error {
// Special case if no users exist.
if n := a.Client.UserCount(); n == 0 {
// Ensure there is at least one statement.
if len(query.Statements) > 0 {
// First statement in the query must create a user with admin privilege.
cu, ok := query.Statements[0].(*influxql.CreateUserStatement)
if ok && cu.Admin == true {
return nil
}
}
return &ErrAuthorize{
Query: query,
Database: database,
Message: "create admin user first or disable authentication",
}
}
if u == nil {
return &ErrAuthorize{
Query: query,
Database: database,
Message: "no user provided",
}
}
return u.AuthorizeQuery(database, query)
}
func (u *UserInfo) AuthorizeQuery(database string, query *influxql.Query) error {
// Admin privilege allows the user to execute all statements.
if u.Admin {
return nil
}
// Check each statement in the query.
for _, stmt := range query.Statements {
// Get the privileges required to execute the statement.
privs, err := stmt.RequiredPrivileges()
if err != nil {
return err
}
// Make sure the user has the privileges required to execute
// each statement.
for _, p := range privs {
if p.Admin {
// Admin privilege already checked so statement requiring admin
// privilege cannot be run.
return &ErrAuthorize{
Query: query,
User: u.Name,
Database: database,
Message: fmt.Sprintf("statement '%s', requires admin privilege", stmt),
}
}
// Use the db name specified by the statement or the db
// name passed by the caller if one wasn't specified by
// the statement.
db := p.Name
if db == "" {
db = database
}
if !u.AuthorizeDatabase(p.Privilege, db) {
return &ErrAuthorize{
Query: query,
User: u.Name,
Database: database,
Message: fmt.Sprintf("statement '%s', requires %s on %s", stmt, p.Privilege.String(), db),
}
}
}
}
return nil
}
// ErrAuthorize represents an authorization error.
type ErrAuthorize struct {
Query *influxql.Query
User string
Database string
Message string
}
// Error returns the text of the error.
func (e ErrAuthorize) Error() string {
if e.User == "" {
return fmt.Sprint(e.Message)
}
return fmt.Sprintf("%s not authorized to execute %s", e.User, e.Message)
}

View File

@@ -0,0 +1,29 @@
package meta
import (
"fmt"
"github.com/influxdata/influxdb/influxql"
)
// WriteAuthorizer determines whether a user is authorized to write to a given database.
type WriteAuthorizer struct {
Client *Client
}
// NewWriteAuthorizer returns a new instance of WriteAuthorizer.
func NewWriteAuthorizer(c *Client) *WriteAuthorizer {
return &WriteAuthorizer{Client: c}
}
// AuthorizeWrite returns nil if the user has permission to write to the database.
func (a WriteAuthorizer) AuthorizeWrite(username, database string) error {
u, err := a.Client.User(username)
if err != nil || u == nil || !u.AuthorizeDatabase(influxql.WritePrivilege, database) {
return &ErrAuthorize{
Database: database,
Message: fmt.Sprintf("%s not authorized to write to %s", username, database),
}
}
return nil
}

View File

@@ -0,0 +1,10 @@
OpenTSDB Input
============
InfluxDB supports both the telnet and HTTP OpenTSDB protocol. This means that InfluxDB can act as a drop-in replacement for your OpenTSDB system.
## Configuration
The OpenTSDB inputs allow the binding address, target database, and target retention policy within that database, to be set. If the database does not exist, it will be created automatically when the input is initialized. If you also decide to configure retention policy (without configuration the input will use the auto-created default retention policy), both the database and retention policy must already exist.
The write-consistency-level can also be set. If any write operations do not meet the configured consistency guarantees, an error will occur and the data will not be indexed. The default consistency-level is `ONE`.
The OpenTSDB input also performs internal batching of the points it receives, as batched writes to the database are more efficient. The default _batch size_ is 1000, _pending batch_ factor is 5, with a _batch timeout_ of 1 second. This means the input will write batches of maximum size 1000, but if a batch has not reached 1000 points within 1 second of the first point being added to a batch, it will emit that batch regardless of size. The pending batch factor controls how many batches can be in memory at once, allowing the input to transmit a batch, while still building other batches.

View File

@@ -0,0 +1,129 @@
package opentsdb
import (
"time"
"github.com/influxdata/influxdb/monitor/diagnostics"
"github.com/influxdata/influxdb/toml"
)
const (
// DefaultBindAddress is the default address that the service binds to.
DefaultBindAddress = ":4242"
// DefaultDatabase is the default database used for writes.
DefaultDatabase = "opentsdb"
// DefaultRetentionPolicy is the default retention policy used for writes.
DefaultRetentionPolicy = ""
// DefaultConsistencyLevel is the default write consistency level.
DefaultConsistencyLevel = "one"
// DefaultBatchSize is the default OpenTSDB batch size.
DefaultBatchSize = 1000
// DefaultBatchTimeout is the default OpenTSDB batch timeout.
DefaultBatchTimeout = time.Second
// DefaultBatchPending is the default number of batches that can be in the queue.
DefaultBatchPending = 5
// DefaultCertificate is the default location of the certificate used when TLS is enabled.
DefaultCertificate = "/etc/ssl/influxdb.pem"
)
// Config represents the configuration of the OpenTSDB service.
type Config struct {
Enabled bool `toml:"enabled"`
BindAddress string `toml:"bind-address"`
Database string `toml:"database"`
RetentionPolicy string `toml:"retention-policy"`
ConsistencyLevel string `toml:"consistency-level"`
TLSEnabled bool `toml:"tls-enabled"`
Certificate string `toml:"certificate"`
BatchSize int `toml:"batch-size"`
BatchPending int `toml:"batch-pending"`
BatchTimeout toml.Duration `toml:"batch-timeout"`
LogPointErrors bool `toml:"log-point-errors"`
}
// NewConfig returns a new config for the service.
func NewConfig() Config {
return Config{
BindAddress: DefaultBindAddress,
Database: DefaultDatabase,
RetentionPolicy: DefaultRetentionPolicy,
ConsistencyLevel: DefaultConsistencyLevel,
TLSEnabled: false,
Certificate: DefaultCertificate,
BatchSize: DefaultBatchSize,
BatchPending: DefaultBatchPending,
BatchTimeout: toml.Duration(DefaultBatchTimeout),
LogPointErrors: true,
}
}
// WithDefaults takes the given config and returns a new config with any required
// default values set.
func (c *Config) WithDefaults() *Config {
d := *c
if d.BindAddress == "" {
d.BindAddress = DefaultBindAddress
}
if d.Database == "" {
d.Database = DefaultDatabase
}
if d.RetentionPolicy == "" {
d.RetentionPolicy = DefaultRetentionPolicy
}
if d.ConsistencyLevel == "" {
d.ConsistencyLevel = DefaultConsistencyLevel
}
if d.Certificate == "" {
d.Certificate = DefaultCertificate
}
if d.BatchSize == 0 {
d.BatchSize = DefaultBatchSize
}
if d.BatchPending == 0 {
d.BatchPending = DefaultBatchPending
}
if d.BatchTimeout == 0 {
d.BatchTimeout = toml.Duration(DefaultBatchTimeout)
}
return &d
}
// Configs wraps a slice of Config to aggregate diagnostics.
type Configs []Config
// Diagnostics returns one set of diagnostics for all of the Configs.
func (c Configs) Diagnostics() (*diagnostics.Diagnostics, error) {
d := &diagnostics.Diagnostics{
Columns: []string{"enabled", "bind-address", "database", "retention-policy", "batch-size", "batch-pending", "batch-timeout"},
}
for _, cc := range c {
if !cc.Enabled {
d.AddRow([]interface{}{false})
continue
}
r := []interface{}{true, cc.BindAddress, cc.Database, cc.RetentionPolicy, cc.BatchSize, cc.BatchPending, cc.BatchTimeout}
d.AddRow(r)
}
return d, nil
}
// Enabled returns true if any underlying Config is Enabled.
func (c Configs) Enabled() bool {
for _, cc := range c {
if cc.Enabled {
return true
}
}
return false
}

View File

@@ -0,0 +1,41 @@
package opentsdb_test
import (
"testing"
"github.com/BurntSushi/toml"
"github.com/influxdata/influxdb/services/opentsdb"
)
func TestConfig_Parse(t *testing.T) {
// Parse configuration.
var c opentsdb.Config
if _, err := toml.Decode(`
enabled = true
bind-address = ":9000"
database = "xxx"
consistency-level ="all"
tls-enabled = true
certificate = "/etc/ssl/cert.pem"
log-point-errors = true
`, &c); err != nil {
t.Fatal(err)
}
// Validate configuration.
if c.Enabled != true {
t.Fatalf("unexpected enabled: %v", c.Enabled)
} else if c.BindAddress != ":9000" {
t.Fatalf("unexpected bind address: %s", c.BindAddress)
} else if c.Database != "xxx" {
t.Fatalf("unexpected database: %s", c.Database)
} else if c.ConsistencyLevel != "all" {
t.Fatalf("unexpected consistency-level: %s", c.ConsistencyLevel)
} else if c.TLSEnabled != true {
t.Fatalf("unexpected tls-enabled: %v", c.TLSEnabled)
} else if c.Certificate != "/etc/ssl/cert.pem" {
t.Fatalf("unexpected certificate: %s", c.Certificate)
} else if !c.LogPointErrors {
t.Fatalf("unexpected log-point-errors: %v", c.LogPointErrors)
}
}

View File

@@ -0,0 +1,198 @@
package opentsdb
import (
"bufio"
"compress/gzip"
"encoding/json"
"errors"
"fmt"
"io"
"net"
"net/http"
"sync"
"sync/atomic"
"time"
"github.com/influxdata/influxdb"
"github.com/influxdata/influxdb/models"
"github.com/uber-go/zap"
)
// Handler is an http.Handler for the OpenTSDB service.
type Handler struct {
Database string
RetentionPolicy string
PointsWriter interface {
WritePointsPrivileged(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error
}
Logger zap.Logger
stats *Statistics
}
// ServeHTTP handles an HTTP request of the OpenTSDB REST API.
func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/api/metadata/put":
w.WriteHeader(http.StatusNoContent)
case "/api/put":
h.servePut(w, r)
default:
http.NotFound(w, r)
}
}
// servePut implements OpenTSDB's HTTP /api/put endpoint.
func (h *Handler) servePut(w http.ResponseWriter, r *http.Request) {
defer r.Body.Close()
// Require POST method.
if r.Method != "POST" {
http.Error(w, http.StatusText(http.StatusMethodNotAllowed), http.StatusMethodNotAllowed)
return
}
// Wrap reader if it's gzip encoded.
var br *bufio.Reader
if r.Header.Get("Content-Encoding") == "gzip" {
zr, err := gzip.NewReader(r.Body)
if err != nil {
http.Error(w, "could not read gzip, "+err.Error(), http.StatusBadRequest)
return
}
br = bufio.NewReader(zr)
} else {
br = bufio.NewReader(r.Body)
}
// Lookahead at the first byte.
f, err := br.Peek(1)
if err != nil || len(f) != 1 {
http.Error(w, "peek error: "+err.Error(), http.StatusBadRequest)
return
}
// Peek to see if this is a JSON array.
var multi bool
switch f[0] {
case '{':
case '[':
multi = true
default:
http.Error(w, "expected JSON array or hash", http.StatusBadRequest)
return
}
// Decode JSON data into slice of points.
dps := make([]point, 1)
if dec := json.NewDecoder(br); multi {
if err = dec.Decode(&dps); err != nil {
http.Error(w, "json array decode error", http.StatusBadRequest)
return
}
} else {
if err = dec.Decode(&dps[0]); err != nil {
http.Error(w, "json object decode error", http.StatusBadRequest)
return
}
}
// Convert points into TSDB points.
points := make([]models.Point, 0, len(dps))
for i := range dps {
p := dps[i]
// Convert timestamp to Go time.
// If time value is over a billion then it's microseconds.
var ts time.Time
if p.Time < 10000000000 {
ts = time.Unix(p.Time, 0)
} else {
ts = time.Unix(p.Time/1000, (p.Time%1000)*1000)
}
pt, err := models.NewPoint(p.Metric, models.NewTags(p.Tags), map[string]interface{}{"value": p.Value}, ts)
if err != nil {
h.Logger.Info(fmt.Sprintf("Dropping point %v: %v", p.Metric, err))
if h.stats != nil {
atomic.AddInt64(&h.stats.InvalidDroppedPoints, 1)
}
continue
}
points = append(points, pt)
}
// Write points.
if err := h.PointsWriter.WritePointsPrivileged(h.Database, h.RetentionPolicy, models.ConsistencyLevelAny, points); influxdb.IsClientError(err) {
h.Logger.Info(fmt.Sprint("write series error: ", err))
http.Error(w, "write series error: "+err.Error(), http.StatusBadRequest)
return
} else if err != nil {
h.Logger.Info(fmt.Sprint("write series error: ", err))
http.Error(w, "write series error: "+err.Error(), http.StatusInternalServerError)
return
}
w.WriteHeader(http.StatusNoContent)
}
// chanListener represents a listener that receives connections through a channel.
type chanListener struct {
addr net.Addr
ch chan net.Conn
done chan struct{}
closer sync.Once // closer ensures that Close is idempotent.
}
// newChanListener returns a new instance of chanListener.
func newChanListener(addr net.Addr) *chanListener {
return &chanListener{
addr: addr,
ch: make(chan net.Conn),
done: make(chan struct{}),
}
}
func (ln *chanListener) Accept() (net.Conn, error) {
errClosed := errors.New("network connection closed")
select {
case <-ln.done:
return nil, errClosed
case conn, ok := <-ln.ch:
if !ok {
return nil, errClosed
}
return conn, nil
}
}
// Close closes the connection channel.
func (ln *chanListener) Close() error {
ln.closer.Do(func() {
close(ln.done)
})
return nil
}
// Addr returns the network address of the listener.
func (ln *chanListener) Addr() net.Addr { return ln.addr }
// readerConn represents a net.Conn with an assignable reader.
type readerConn struct {
net.Conn
r io.Reader
}
// Read implements the io.Reader interface.
func (conn *readerConn) Read(b []byte) (n int, err error) { return conn.r.Read(b) }
// point represents an incoming JSON data point.
type point struct {
Metric string `json:"metric"`
Time int64 `json:"timestamp"`
Value float64 `json:"value"`
Tags map[string]string `json:"tags,omitempty"`
}

View File

@@ -0,0 +1,471 @@
// Package opentsdb provides a service for InfluxDB to ingest data via the opentsdb protocol.
package opentsdb // import "github.com/influxdata/influxdb/services/opentsdb"
import (
"bufio"
"bytes"
"crypto/tls"
"fmt"
"io"
"net"
"net/http"
"net/textproto"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/services/meta"
"github.com/influxdata/influxdb/tsdb"
"github.com/uber-go/zap"
)
// statistics gathered by the openTSDB package.
const (
statHTTPConnectionsHandled = "httpConnsHandled"
statTelnetConnectionsActive = "tlConnsActive"
statTelnetConnectionsHandled = "tlConnsHandled"
statTelnetPointsReceived = "tlPointsRx"
statTelnetBytesReceived = "tlBytesRx"
statTelnetReadError = "tlReadErr"
statTelnetBadLine = "tlBadLine"
statTelnetBadTime = "tlBadTime"
statTelnetBadTag = "tlBadTag"
statTelnetBadFloat = "tlBadFloat"
statBatchesTransmitted = "batchesTx"
statPointsTransmitted = "pointsTx"
statBatchesTransmitFail = "batchesTxFail"
statConnectionsActive = "connsActive"
statConnectionsHandled = "connsHandled"
statDroppedPointsInvalid = "droppedPointsInvalid"
)
// Service manages the listener and handler for an HTTP endpoint.
type Service struct {
ln net.Listener // main listener
httpln *chanListener // http channel-based listener
wg sync.WaitGroup
tls bool
cert string
mu sync.RWMutex
ready bool // Has the required database been created?
done chan struct{} // Is the service closing or closed?
BindAddress string
Database string
RetentionPolicy string
PointsWriter interface {
WritePointsPrivileged(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error
}
MetaClient interface {
CreateDatabase(name string) (*meta.DatabaseInfo, error)
}
// Points received over the telnet protocol are batched.
batchSize int
batchPending int
batchTimeout time.Duration
batcher *tsdb.PointBatcher
LogPointErrors bool
Logger zap.Logger
stats *Statistics
defaultTags models.StatisticTags
}
// NewService returns a new instance of Service.
func NewService(c Config) (*Service, error) {
// Use defaults where necessary.
d := c.WithDefaults()
s := &Service{
tls: d.TLSEnabled,
cert: d.Certificate,
BindAddress: d.BindAddress,
Database: d.Database,
RetentionPolicy: d.RetentionPolicy,
batchSize: d.BatchSize,
batchPending: d.BatchPending,
batchTimeout: time.Duration(d.BatchTimeout),
Logger: zap.New(zap.NullEncoder()),
LogPointErrors: d.LogPointErrors,
stats: &Statistics{},
defaultTags: models.StatisticTags{"bind": d.BindAddress},
}
return s, nil
}
// Open starts the service.
func (s *Service) Open() error {
s.mu.Lock()
defer s.mu.Unlock()
if !s.closed() {
return nil // Already open.
}
s.done = make(chan struct{})
s.Logger.Info("Starting OpenTSDB service")
s.batcher = tsdb.NewPointBatcher(s.batchSize, s.batchPending, s.batchTimeout)
s.batcher.Start()
// Start processing batches.
s.wg.Add(1)
go func() { defer s.wg.Done(); s.processBatches(s.batcher) }()
// Open listener.
if s.tls {
cert, err := tls.LoadX509KeyPair(s.cert, s.cert)
if err != nil {
return err
}
listener, err := tls.Listen("tcp", s.BindAddress, &tls.Config{
Certificates: []tls.Certificate{cert},
})
if err != nil {
return err
}
s.Logger.Info(fmt.Sprint("Listening on TLS: ", listener.Addr().String()))
s.ln = listener
} else {
listener, err := net.Listen("tcp", s.BindAddress)
if err != nil {
return err
}
s.Logger.Info(fmt.Sprint("Listening on: ", listener.Addr().String()))
s.ln = listener
}
s.httpln = newChanListener(s.ln.Addr())
// Begin listening for connections.
s.wg.Add(2)
go func() { defer s.wg.Done(); s.serve() }()
go func() { defer s.wg.Done(); s.serveHTTP() }()
return nil
}
// Close closes the openTSDB service.
func (s *Service) Close() error {
s.mu.Lock()
defer s.mu.Unlock()
if s.closed() {
return nil // Already closed.
}
close(s.done)
// Close the listeners.
if err := s.ln.Close(); err != nil {
return err
}
if err := s.httpln.Close(); err != nil {
return err
}
s.wg.Wait()
s.done = nil
if s.batcher != nil {
s.batcher.Stop()
}
return nil
}
// Closed returns true if the service is currently closed.
func (s *Service) Closed() bool {
s.mu.Lock()
defer s.mu.Unlock()
return s.closed()
}
func (s *Service) closed() bool {
select {
case <-s.done:
// Service is closing.
return true
default:
return s.done == nil
}
}
// createInternalStorage ensures that the required database has been created.
func (s *Service) createInternalStorage() error {
s.mu.RLock()
ready := s.ready
s.mu.RUnlock()
if ready {
return nil
}
if _, err := s.MetaClient.CreateDatabase(s.Database); err != nil {
return err
}
// The service is now ready.
s.mu.Lock()
s.ready = true
s.mu.Unlock()
return nil
}
// WithLogger sets the logger for the service.
func (s *Service) WithLogger(log zap.Logger) {
s.Logger = log.With(zap.String("service", "opentsdb"))
}
// Statistics maintains statistics for the subscriber service.
type Statistics struct {
HTTPConnectionsHandled int64
ActiveTelnetConnections int64
HandledTelnetConnections int64
TelnetPointsReceived int64
TelnetBytesReceived int64
TelnetReadError int64
TelnetBadLine int64
TelnetBadTime int64
TelnetBadTag int64
TelnetBadFloat int64
BatchesTransmitted int64
PointsTransmitted int64
BatchesTransmitFail int64
ActiveConnections int64
HandledConnections int64
InvalidDroppedPoints int64
}
// Statistics returns statistics for periodic monitoring.
func (s *Service) Statistics(tags map[string]string) []models.Statistic {
return []models.Statistic{{
Name: "opentsdb",
Tags: s.defaultTags.Merge(tags),
Values: map[string]interface{}{
statHTTPConnectionsHandled: atomic.LoadInt64(&s.stats.HTTPConnectionsHandled),
statTelnetConnectionsActive: atomic.LoadInt64(&s.stats.ActiveTelnetConnections),
statTelnetConnectionsHandled: atomic.LoadInt64(&s.stats.HandledTelnetConnections),
statTelnetPointsReceived: atomic.LoadInt64(&s.stats.TelnetPointsReceived),
statTelnetBytesReceived: atomic.LoadInt64(&s.stats.TelnetBytesReceived),
statTelnetReadError: atomic.LoadInt64(&s.stats.TelnetReadError),
statTelnetBadLine: atomic.LoadInt64(&s.stats.TelnetBadLine),
statTelnetBadTime: atomic.LoadInt64(&s.stats.TelnetBadTime),
statTelnetBadTag: atomic.LoadInt64(&s.stats.TelnetBadTag),
statTelnetBadFloat: atomic.LoadInt64(&s.stats.TelnetBadFloat),
statBatchesTransmitted: atomic.LoadInt64(&s.stats.BatchesTransmitted),
statPointsTransmitted: atomic.LoadInt64(&s.stats.PointsTransmitted),
statBatchesTransmitFail: atomic.LoadInt64(&s.stats.BatchesTransmitFail),
statConnectionsActive: atomic.LoadInt64(&s.stats.ActiveConnections),
statConnectionsHandled: atomic.LoadInt64(&s.stats.HandledConnections),
statDroppedPointsInvalid: atomic.LoadInt64(&s.stats.InvalidDroppedPoints),
},
}}
}
// Addr returns the listener's address. Returns nil if listener is closed.
func (s *Service) Addr() net.Addr {
if s.ln == nil {
return nil
}
return s.ln.Addr()
}
// serve serves the handler from the listener.
func (s *Service) serve() {
for {
// Wait for next connection.
conn, err := s.ln.Accept()
if opErr, ok := err.(*net.OpError); ok && !opErr.Temporary() {
s.Logger.Info("openTSDB TCP listener closed")
return
} else if err != nil {
s.Logger.Info(fmt.Sprint("error accepting openTSDB: ", err.Error()))
continue
}
// Handle connection in separate goroutine.
go s.handleConn(conn)
}
}
// handleConn processes conn. This is run in a separate goroutine.
func (s *Service) handleConn(conn net.Conn) {
defer atomic.AddInt64(&s.stats.ActiveConnections, -1)
atomic.AddInt64(&s.stats.ActiveConnections, 1)
atomic.AddInt64(&s.stats.HandledConnections, 1)
// Read header into buffer to check if it's HTTP.
var buf bytes.Buffer
r := bufio.NewReader(io.TeeReader(conn, &buf))
// Attempt to parse connection as HTTP.
_, err := http.ReadRequest(r)
// Rebuild connection from buffer and remaining connection data.
bufr := bufio.NewReader(io.MultiReader(&buf, conn))
conn = &readerConn{Conn: conn, r: bufr}
// If no HTTP parsing error occurred then process as HTTP.
if err == nil {
atomic.AddInt64(&s.stats.HTTPConnectionsHandled, 1)
s.httpln.ch <- conn
return
}
// Otherwise handle in telnet format.
s.wg.Add(1)
s.handleTelnetConn(conn)
s.wg.Done()
}
// handleTelnetConn accepts OpenTSDB's telnet protocol.
// Each telnet command consists of a line of the form:
// put sys.cpu.user 1356998400 42.5 host=webserver01 cpu=0
func (s *Service) handleTelnetConn(conn net.Conn) {
defer conn.Close()
defer atomic.AddInt64(&s.stats.ActiveTelnetConnections, -1)
atomic.AddInt64(&s.stats.ActiveTelnetConnections, 1)
atomic.AddInt64(&s.stats.HandledTelnetConnections, 1)
// Get connection details.
remoteAddr := conn.RemoteAddr().String()
// Wrap connection in a text protocol reader.
r := textproto.NewReader(bufio.NewReader(conn))
for {
line, err := r.ReadLine()
if err != nil {
if err != io.EOF {
atomic.AddInt64(&s.stats.TelnetReadError, 1)
s.Logger.Info(fmt.Sprint("error reading from openTSDB connection ", err.Error()))
}
return
}
atomic.AddInt64(&s.stats.TelnetPointsReceived, 1)
atomic.AddInt64(&s.stats.TelnetBytesReceived, int64(len(line)))
inputStrs := strings.Fields(line)
if len(inputStrs) == 1 && inputStrs[0] == "version" {
conn.Write([]byte("InfluxDB TSDB proxy"))
continue
}
if len(inputStrs) < 4 || inputStrs[0] != "put" {
atomic.AddInt64(&s.stats.TelnetBadLine, 1)
if s.LogPointErrors {
s.Logger.Info(fmt.Sprintf("malformed line '%s' from %s", line, remoteAddr))
}
continue
}
measurement := inputStrs[1]
tsStr := inputStrs[2]
valueStr := inputStrs[3]
tagStrs := inputStrs[4:]
var t time.Time
ts, err := strconv.ParseInt(tsStr, 10, 64)
if err != nil {
atomic.AddInt64(&s.stats.TelnetBadTime, 1)
if s.LogPointErrors {
s.Logger.Info(fmt.Sprintf("malformed time '%s' from %s", tsStr, remoteAddr))
}
}
switch len(tsStr) {
case 10:
t = time.Unix(ts, 0)
case 13:
t = time.Unix(ts/1000, (ts%1000)*1000)
default:
atomic.AddInt64(&s.stats.TelnetBadTime, 1)
if s.LogPointErrors {
s.Logger.Info(fmt.Sprintf("bad time '%s' must be 10 or 13 chars, from %s ", tsStr, remoteAddr))
}
continue
}
tags := make(map[string]string)
for t := range tagStrs {
parts := strings.SplitN(tagStrs[t], "=", 2)
if len(parts) != 2 || parts[0] == "" || parts[1] == "" {
atomic.AddInt64(&s.stats.TelnetBadTag, 1)
if s.LogPointErrors {
s.Logger.Info(fmt.Sprintf("malformed tag data '%v' from %s", tagStrs[t], remoteAddr))
}
continue
}
k := parts[0]
tags[k] = parts[1]
}
fields := make(map[string]interface{})
fv, err := strconv.ParseFloat(valueStr, 64)
if err != nil {
atomic.AddInt64(&s.stats.TelnetBadFloat, 1)
if s.LogPointErrors {
s.Logger.Info(fmt.Sprintf("bad float '%s' from %s", valueStr, remoteAddr))
}
continue
}
fields["value"] = fv
pt, err := models.NewPoint(measurement, models.NewTags(tags), fields, t)
if err != nil {
atomic.AddInt64(&s.stats.TelnetBadFloat, 1)
if s.LogPointErrors {
s.Logger.Info(fmt.Sprintf("bad float '%s' from %s", valueStr, remoteAddr))
}
continue
}
s.batcher.In() <- pt
}
}
// serveHTTP handles connections in HTTP format.
func (s *Service) serveHTTP() {
handler := &Handler{
Database: s.Database,
RetentionPolicy: s.RetentionPolicy,
PointsWriter: s.PointsWriter,
Logger: s.Logger,
stats: s.stats,
}
srv := &http.Server{Handler: handler}
srv.Serve(s.httpln)
}
// processBatches continually drains the given batcher and writes the batches to the database.
func (s *Service) processBatches(batcher *tsdb.PointBatcher) {
for {
select {
case <-s.done:
return
case batch := <-batcher.Out():
// Will attempt to create database if not yet created.
if err := s.createInternalStorage(); err != nil {
s.Logger.Info(fmt.Sprintf("Required database %s does not yet exist: %s", s.Database, err.Error()))
continue
}
if err := s.PointsWriter.WritePointsPrivileged(s.Database, s.RetentionPolicy, models.ConsistencyLevelAny, batch); err == nil {
atomic.AddInt64(&s.stats.BatchesTransmitted, 1)
atomic.AddInt64(&s.stats.PointsTransmitted, int64(len(batch)))
} else {
s.Logger.Info(fmt.Sprintf("failed to write point batch to database %q: %s", s.Database, err))
atomic.AddInt64(&s.stats.BatchesTransmitFail, 1)
}
}
}
}

View File

@@ -0,0 +1,295 @@
package opentsdb
import (
"errors"
"fmt"
"net"
"net/http"
"os"
"reflect"
"strings"
"sync/atomic"
"testing"
"time"
"github.com/davecgh/go-spew/spew"
"github.com/influxdata/influxdb/internal"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/services/meta"
"github.com/uber-go/zap"
)
func Test_Service_OpenClose(t *testing.T) {
// Let the OS assign a random port since we are only opening and closing the service,
// not actually connecting to it.
service := NewTestService("db0", "127.0.0.1:0")
// Closing a closed service is fine.
if err := service.Service.Close(); err != nil {
t.Fatal(err)
}
// Closing a closed service again is fine.
if err := service.Service.Close(); err != nil {
t.Fatal(err)
}
if err := service.Service.Open(); err != nil {
t.Fatal(err)
}
// Opening an already open service is fine.
if err := service.Service.Open(); err != nil {
t.Fatal(err)
}
// Reopening a previously opened service is fine.
if err := service.Service.Close(); err != nil {
t.Fatal(err)
}
if err := service.Service.Open(); err != nil {
t.Fatal(err)
}
// Tidy up.
if err := service.Service.Close(); err != nil {
t.Fatal(err)
}
}
// Ensure a point can be written via the telnet protocol.
func TestService_CreatesDatabase(t *testing.T) {
t.Parallel()
database := "db0"
s := NewTestService(database, "127.0.0.1:0")
s.WritePointsFn = func(string, string, models.ConsistencyLevel, []models.Point) error {
return nil
}
called := make(chan struct{})
s.MetaClient.CreateDatabaseFn = func(name string) (*meta.DatabaseInfo, error) {
if name != database {
t.Errorf("\n\texp = %s\n\tgot = %s\n", database, name)
}
// Allow some time for the caller to return and the ready status to
// be set.
time.AfterFunc(10*time.Millisecond, func() { called <- struct{}{} })
return nil, errors.New("an error")
}
if err := s.Service.Open(); err != nil {
t.Fatal(err)
}
points, err := models.ParsePointsString(`cpu value=1`)
if err != nil {
t.Fatal(err)
}
s.Service.batcher.In() <- points[0] // Send a point.
s.Service.batcher.Flush()
select {
case <-called:
// OK
case <-time.NewTimer(5 * time.Second).C:
t.Fatal("Service should have attempted to create database")
}
// ready status should not have been switched due to meta client error.
s.Service.mu.RLock()
ready := s.Service.ready
s.Service.mu.RUnlock()
if got, exp := ready, false; got != exp {
t.Fatalf("got %v, expected %v", got, exp)
}
// This time MC won't cause an error.
s.MetaClient.CreateDatabaseFn = func(name string) (*meta.DatabaseInfo, error) {
// Allow some time for the caller to return and the ready status to
// be set.
time.AfterFunc(10*time.Millisecond, func() { called <- struct{}{} })
return nil, nil
}
s.Service.batcher.In() <- points[0] // Send a point.
s.Service.batcher.Flush()
select {
case <-called:
// OK
case <-time.NewTimer(5 * time.Second).C:
t.Fatal("Service should have attempted to create database")
}
// ready status should not have been switched due to meta client error.
s.Service.mu.RLock()
ready = s.Service.ready
s.Service.mu.RUnlock()
if got, exp := ready, true; got != exp {
t.Fatalf("got %v, expected %v", got, exp)
}
s.Service.Close()
}
// Ensure a point can be written via the telnet protocol.
func TestService_Telnet(t *testing.T) {
t.Parallel()
s := NewTestService("db0", "127.0.0.1:0")
if err := s.Service.Open(); err != nil {
t.Fatal(err)
}
defer s.Service.Close()
// Mock points writer.
var called int32
s.WritePointsFn = func(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error {
atomic.StoreInt32(&called, 1)
if database != "db0" {
t.Fatalf("unexpected database: %s", database)
} else if retentionPolicy != "" {
t.Fatalf("unexpected retention policy: %s", retentionPolicy)
} else if !reflect.DeepEqual(points, []models.Point{
models.MustNewPoint(
"sys.cpu.user",
models.NewTags(map[string]string{"host": "webserver01", "cpu": "0"}),
map[string]interface{}{"value": 42.5},
time.Unix(1356998400, 0),
),
}) {
t.Fatalf("unexpected points: %#v", points)
}
return nil
}
// Open connection to the service.
conn, err := net.Dial("tcp", s.Service.Addr().String())
if err != nil {
t.Fatal(err)
}
defer conn.Close()
// Write telnet data and close.
if _, err := conn.Write([]byte("put sys.cpu.user 1356998400 42.5 host=webserver01 cpu=0")); err != nil {
t.Fatal(err)
}
if err := conn.Close(); err != nil {
t.Fatal(err)
}
tick := time.Tick(10 * time.Millisecond)
timeout := time.After(10 * time.Second)
for {
select {
case <-tick:
// Verify that the writer was called.
if atomic.LoadInt32(&called) > 0 {
return
}
case <-timeout:
t.Fatal("points writer not called")
}
}
}
// Ensure a point can be written via the HTTP protocol.
func TestService_HTTP(t *testing.T) {
t.Parallel()
s := NewTestService("db0", "127.0.0.1:0")
if err := s.Service.Open(); err != nil {
t.Fatal(err)
}
defer s.Service.Close()
// Mock points writer.
var called bool
s.WritePointsFn = func(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error {
called = true
if database != "db0" {
t.Fatalf("unexpected database: %s", database)
} else if retentionPolicy != "" {
t.Fatalf("unexpected retention policy: %s", retentionPolicy)
} else if !reflect.DeepEqual(points, []models.Point{
models.MustNewPoint(
"sys.cpu.nice",
models.NewTags(map[string]string{"dc": "lga", "host": "web01"}),
map[string]interface{}{"value": 18.0},
time.Unix(1346846400, 0),
),
}) {
spew.Dump(points)
t.Fatalf("unexpected points: %#v", points)
}
return nil
}
// Write HTTP request to server.
resp, err := http.Post("http://"+s.Service.Addr().String()+"/api/put", "application/json", strings.NewReader(`{"metric":"sys.cpu.nice", "timestamp":1346846400, "value":18, "tags":{"host":"web01", "dc":"lga"}}`))
if err != nil {
t.Fatal(err)
}
defer resp.Body.Close()
// Verify status and body.
if resp.StatusCode != http.StatusNoContent {
t.Fatalf("unexpected status code: %d", resp.StatusCode)
}
// Verify that the writer was called.
if !called {
t.Fatal("points writer not called")
}
}
type TestService struct {
Service *Service
MetaClient *internal.MetaClientMock
WritePointsFn func(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error
}
// NewTestService returns a new instance of Service.
func NewTestService(database string, bind string) *TestService {
s, err := NewService(Config{
BindAddress: bind,
Database: database,
ConsistencyLevel: "one",
})
if err != nil {
panic(err)
}
service := &TestService{
Service: s,
MetaClient: &internal.MetaClientMock{},
}
service.MetaClient.CreateDatabaseFn = func(db string) (*meta.DatabaseInfo, error) {
if got, exp := db, database; got != exp {
return nil, fmt.Errorf("got %v, expected %v", got, exp)
}
return nil, nil
}
if testing.Verbose() {
service.Service.WithLogger(zap.New(
zap.NewTextEncoder(),
zap.Output(os.Stderr),
))
}
service.Service.MetaClient = service.MetaClient
service.Service.PointsWriter = service
return service
}
func (s *TestService) WritePointsPrivileged(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error {
return s.WritePointsFn(database, retentionPolicy, consistencyLevel, points)
}

View File

@@ -0,0 +1,13 @@
Shard Precreation
============
During normal operation when InfluxDB receives time-series data, it writes the data to files known as _shards_. Each shard only contains data for a specific range of time. Therefore, before data can be accepted by the system, the shards must exist and InfluxDB always checks that the required shards exist for every incoming data point. If the required shards do not exist, InfluxDB will create those shards. Because this requires a cluster to reach consensus, the process is not instantaneous and can temporarily impact write-throughput.
Since almost all time-series data is written sequentially in time, the system has an excellent idea of the timestamps of future data. Shard precreation takes advantage of this fact by creating required shards ahead of time, thereby ensuring the required shards exist by the time new time-series data actually arrives. Write-throughput is therefore not affected when data is first received for a range of time that would normally trigger shard creation.
Note that the shard-existence check must remain in place in the code, even with shard precreation. This is because while most data is written sequentially in time, this is not always the case. Data may be written with timestamps in the past, or farther in the future than shard precreation handles.
## Configuration
Shard precreation can be disabled if necessary, though this is not recommended. If it is disabled, then shards will be only be created when explicitly needed.
The interval between runs of the shard precreation service, as well as the time-in-advance the shards are created, are also configurable. The defaults should work for most deployments.

View File

@@ -0,0 +1,67 @@
package precreator
import (
"errors"
"time"
"github.com/influxdata/influxdb/monitor/diagnostics"
"github.com/influxdata/influxdb/toml"
)
const (
// DefaultCheckInterval is the shard precreation check time if none is specified.
DefaultCheckInterval = 10 * time.Minute
// DefaultAdvancePeriod is the default period ahead of the endtime of a shard group
// that its successor group is created.
DefaultAdvancePeriod = 30 * time.Minute
)
// Config represents the configuration for shard precreation.
type Config struct {
Enabled bool `toml:"enabled"`
CheckInterval toml.Duration `toml:"check-interval"`
AdvancePeriod toml.Duration `toml:"advance-period"`
}
// NewConfig returns a new Config with defaults.
func NewConfig() Config {
return Config{
Enabled: true,
CheckInterval: toml.Duration(DefaultCheckInterval),
AdvancePeriod: toml.Duration(DefaultAdvancePeriod),
}
}
// Validate returns an error if the Config is invalid.
func (c Config) Validate() error {
if !c.Enabled {
return nil
}
// TODO: Should we enforce a minimum interval?
// Polling every nanosecond, for instance, will greatly impact performance.
if c.CheckInterval <= 0 {
return errors.New("check-interval must be positive")
}
if c.AdvancePeriod <= 0 {
return errors.New("advance-period must be positive")
}
return nil
}
// Diagnostics returns a diagnostics representation of a subset of the Config.
func (c Config) Diagnostics() (*diagnostics.Diagnostics, error) {
if !c.Enabled {
return diagnostics.RowFromMap(map[string]interface{}{
"enabled": false,
}), nil
}
return diagnostics.RowFromMap(map[string]interface{}{
"enabled": true,
"check-interval": c.CheckInterval,
"advance-period": c.AdvancePeriod,
}), nil
}

View File

@@ -0,0 +1,62 @@
package precreator_test
import (
"testing"
"time"
"github.com/BurntSushi/toml"
"github.com/influxdata/influxdb/services/precreator"
)
func TestConfig_Parse(t *testing.T) {
// Parse configuration.
var c precreator.Config
if _, err := toml.Decode(`
enabled = true
check-interval = "2m"
advance-period = "10m"
`, &c); err != nil {
t.Fatal(err)
}
// Validate configuration.
if !c.Enabled {
t.Fatalf("unexpected enabled state: %v", c.Enabled)
} else if time.Duration(c.CheckInterval) != 2*time.Minute {
t.Fatalf("unexpected check interval: %s", c.CheckInterval)
} else if time.Duration(c.AdvancePeriod) != 10*time.Minute {
t.Fatalf("unexpected advance period: %s", c.AdvancePeriod)
}
}
func TestConfig_Validate(t *testing.T) {
c := precreator.NewConfig()
if err := c.Validate(); err != nil {
t.Fatalf("unexpected validation fail from NewConfig: %s", err)
}
c = precreator.NewConfig()
c.CheckInterval = 0
if err := c.Validate(); err == nil {
t.Fatal("expected error for check-interval = 0, got nil")
}
c = precreator.NewConfig()
c.CheckInterval *= -1
if err := c.Validate(); err == nil {
t.Fatal("expected error for negative check-interval, got nil")
}
c = precreator.NewConfig()
c.AdvancePeriod = 0
if err := c.Validate(); err == nil {
t.Fatal("expected error for advance-period = 0, got nil")
}
c = precreator.NewConfig()
c.AdvancePeriod *= -1
if err := c.Validate(); err == nil {
t.Fatal("expected error for negative advance-period, got nil")
}
}

View File

@@ -0,0 +1,96 @@
// Package precreator provides the shard precreation service.
package precreator // import "github.com/influxdata/influxdb/services/precreator"
import (
"fmt"
"sync"
"time"
"github.com/uber-go/zap"
)
// Service manages the shard precreation service.
type Service struct {
checkInterval time.Duration
advancePeriod time.Duration
Logger zap.Logger
done chan struct{}
wg sync.WaitGroup
MetaClient interface {
PrecreateShardGroups(now, cutoff time.Time) error
}
}
// NewService returns an instance of the precreation service.
func NewService(c Config) (*Service, error) {
s := Service{
checkInterval: time.Duration(c.CheckInterval),
advancePeriod: time.Duration(c.AdvancePeriod),
Logger: zap.New(zap.NullEncoder()),
}
return &s, nil
}
// WithLogger sets the logger for the service.
func (s *Service) WithLogger(log zap.Logger) {
s.Logger = log.With(zap.String("service", "shard-precreation"))
}
// Open starts the precreation service.
func (s *Service) Open() error {
if s.done != nil {
return nil
}
s.Logger.Info(fmt.Sprintf("Starting precreation service with check interval of %s, advance period of %s",
s.checkInterval, s.advancePeriod))
s.done = make(chan struct{})
s.wg.Add(1)
go s.runPrecreation()
return nil
}
// Close stops the precreation service.
func (s *Service) Close() error {
if s.done == nil {
return nil
}
close(s.done)
s.wg.Wait()
s.done = nil
return nil
}
// runPrecreation continually checks if resources need precreation.
func (s *Service) runPrecreation() {
defer s.wg.Done()
for {
select {
case <-time.After(s.checkInterval):
if err := s.precreate(time.Now().UTC()); err != nil {
s.Logger.Info(fmt.Sprintf("failed to precreate shards: %s", err.Error()))
}
case <-s.done:
s.Logger.Info("Precreation service terminating")
return
}
}
}
// precreate performs actual resource precreation.
func (s *Service) precreate(now time.Time) error {
cutoff := now.Add(s.advancePeriod).UTC()
if err := s.MetaClient.PrecreateShardGroups(now, cutoff); err != nil {
return err
}
return nil
}

View File

@@ -0,0 +1,55 @@
package precreator
import (
"sync"
"testing"
"time"
"github.com/influxdata/influxdb/toml"
)
func Test_ShardPrecreation(t *testing.T) {
t.Parallel()
now := time.Now().UTC()
advancePeriod := 5 * time.Minute
// A test metastaore which returns 2 shard groups, only 1 of which requires a successor.
var wg sync.WaitGroup
wg.Add(1)
ms := metaClient{
PrecreateShardGroupsFn: func(v, u time.Time) error {
wg.Done()
if u != now.Add(advancePeriod) {
t.Fatalf("precreation called with wrong time, got %s, exp %s", u, now)
}
return nil
},
}
srv, err := NewService(Config{
CheckInterval: toml.Duration(time.Minute),
AdvancePeriod: toml.Duration(advancePeriod),
})
if err != nil {
t.Fatalf("failed to create shard precreation service: %s", err.Error())
}
srv.MetaClient = ms
err = srv.precreate(now)
if err != nil {
t.Fatalf("failed to precreate shards: %s", err.Error())
}
wg.Wait() // Ensure metaClient test function is called.
return
}
// PointsWriter represents a mock impl of PointsWriter.
type metaClient struct {
PrecreateShardGroupsFn func(now, cutoff time.Time) error
}
func (m metaClient) PrecreateShardGroups(now, cutoff time.Time) error {
return m.PrecreateShardGroupsFn(now, cutoff)
}

View File

@@ -0,0 +1,49 @@
package retention
import (
"errors"
"time"
"github.com/influxdata/influxdb/monitor/diagnostics"
"github.com/influxdata/influxdb/toml"
)
// Config represents the configuration for the retention service.
type Config struct {
Enabled bool `toml:"enabled"`
CheckInterval toml.Duration `toml:"check-interval"`
}
// NewConfig returns an instance of Config with defaults.
func NewConfig() Config {
return Config{Enabled: true, CheckInterval: toml.Duration(30 * time.Minute)}
}
// Validate returns an error if the Config is invalid.
func (c Config) Validate() error {
if !c.Enabled {
return nil
}
// TODO: Should we enforce a minimum interval?
// Polling every nanosecond, for instance, will greatly impact performance.
if c.CheckInterval <= 0 {
return errors.New("check-interval must be positive")
}
return nil
}
// Diagnostics returns a diagnostics representation of a subset of the Config.
func (c Config) Diagnostics() (*diagnostics.Diagnostics, error) {
if !c.Enabled {
return diagnostics.RowFromMap(map[string]interface{}{
"enabled": false,
}), nil
}
return diagnostics.RowFromMap(map[string]interface{}{
"enabled": true,
"check-interval": c.CheckInterval,
}), nil
}

View File

@@ -0,0 +1,46 @@
package retention_test
import (
"testing"
"time"
"github.com/BurntSushi/toml"
"github.com/influxdata/influxdb/services/retention"
)
func TestConfig_Parse(t *testing.T) {
// Parse configuration.
var c retention.Config
if _, err := toml.Decode(`
enabled = true
check-interval = "1s"
`, &c); err != nil {
t.Fatal(err)
}
// Validate configuration.
if c.Enabled != true {
t.Fatalf("unexpected enabled state: %v", c.Enabled)
} else if time.Duration(c.CheckInterval) != time.Second {
t.Fatalf("unexpected check interval: %v", c.CheckInterval)
}
}
func TestConfig_Validate(t *testing.T) {
c := retention.NewConfig()
if err := c.Validate(); err != nil {
t.Fatalf("unexpected validation fail from NewConfig: %s", err)
}
c = retention.NewConfig()
c.CheckInterval = 0
if err := c.Validate(); err == nil {
t.Fatal("expected error for check-interval = 0, got nil")
}
c = retention.NewConfig()
c.CheckInterval *= -1
if err := c.Validate(); err == nil {
t.Fatal("expected error for negative check-interval, got nil")
}
}

View File

@@ -0,0 +1,137 @@
// Package retention provides the retention policy enforcement service.
package retention // import "github.com/influxdata/influxdb/services/retention"
import (
"fmt"
"sync"
"time"
"github.com/influxdata/influxdb/services/meta"
"github.com/uber-go/zap"
)
// Service represents the retention policy enforcement service.
type Service struct {
MetaClient interface {
Databases() []meta.DatabaseInfo
DeleteShardGroup(database, policy string, id uint64) error
PruneShardGroups() error
}
TSDBStore interface {
ShardIDs() []uint64
DeleteShard(shardID uint64) error
}
checkInterval time.Duration
wg sync.WaitGroup
done chan struct{}
logger zap.Logger
}
// NewService returns a configured retention policy enforcement service.
func NewService(c Config) *Service {
return &Service{
checkInterval: time.Duration(c.CheckInterval),
done: make(chan struct{}),
logger: zap.New(zap.NullEncoder()),
}
}
// Open starts retention policy enforcement.
func (s *Service) Open() error {
s.logger.Info(fmt.Sprint("Starting retention policy enforcement service with check interval of ", s.checkInterval))
s.wg.Add(2)
go s.deleteShardGroups()
go s.deleteShards()
return nil
}
// Close stops retention policy enforcement.
func (s *Service) Close() error {
s.logger.Info("retention policy enforcement terminating")
close(s.done)
s.wg.Wait()
return nil
}
// WithLogger sets the logger on the service.
func (s *Service) WithLogger(log zap.Logger) {
s.logger = log.With(zap.String("service", "retention"))
}
func (s *Service) deleteShardGroups() {
defer s.wg.Done()
ticker := time.NewTicker(s.checkInterval)
defer ticker.Stop()
for {
select {
case <-s.done:
return
case <-ticker.C:
dbs := s.MetaClient.Databases()
for _, d := range dbs {
for _, r := range d.RetentionPolicies {
for _, g := range r.ExpiredShardGroups(time.Now().UTC()) {
if err := s.MetaClient.DeleteShardGroup(d.Name, r.Name, g.ID); err != nil {
s.logger.Info(fmt.Sprintf("failed to delete shard group %d from database %s, retention policy %s: %s",
g.ID, d.Name, r.Name, err.Error()))
} else {
s.logger.Info(fmt.Sprintf("deleted shard group %d from database %s, retention policy %s",
g.ID, d.Name, r.Name))
}
}
}
}
}
}
}
func (s *Service) deleteShards() {
defer s.wg.Done()
ticker := time.NewTicker(s.checkInterval)
defer ticker.Stop()
for {
select {
case <-s.done:
return
case <-ticker.C:
s.logger.Info("retention policy shard deletion check commencing")
type deletionInfo struct {
db string
rp string
}
deletedShardIDs := make(map[uint64]deletionInfo, 0)
dbs := s.MetaClient.Databases()
for _, d := range dbs {
for _, r := range d.RetentionPolicies {
for _, g := range r.DeletedShardGroups() {
for _, sh := range g.Shards {
deletedShardIDs[sh.ID] = deletionInfo{db: d.Name, rp: r.Name}
}
}
}
}
for _, id := range s.TSDBStore.ShardIDs() {
if info, ok := deletedShardIDs[id]; ok {
if err := s.TSDBStore.DeleteShard(id); err != nil {
s.logger.Error(fmt.Sprintf("failed to delete shard ID %d from database %s, retention policy %s: %s",
id, info.db, info.rp, err.Error()))
continue
}
s.logger.Info(fmt.Sprintf("shard ID %d from database %s, retention policy %s, deleted",
id, info.db, info.rp))
}
}
if err := s.MetaClient.PruneShardGroups(); err != nil {
s.logger.Info(fmt.Sprintf("error pruning shard groups: %s", err))
}
}
}
}

View File

@@ -0,0 +1,76 @@
package snapshotter
import (
"bytes"
"encoding/binary"
"encoding/json"
"errors"
"fmt"
"io"
"github.com/influxdata/influxdb/services/meta"
"github.com/influxdata/influxdb/tcp"
)
// Client provides an API for the snapshotter service.
type Client struct {
host string
}
// NewClient returns a new *Client.
func NewClient(host string) *Client {
return &Client{host: host}
}
// MetastoreBackup returns a snapshot of the meta store.
func (c *Client) MetastoreBackup() (*meta.Data, error) {
req := &Request{
Type: RequestMetastoreBackup,
}
b, err := c.doRequest(req)
if err != nil {
return nil, err
}
// Check the magic.
magic := binary.BigEndian.Uint64(b[:8])
if magic != BackupMagicHeader {
return nil, errors.New("invalid metadata received")
}
i := 8
// Size of the meta store bytes.
length := int(binary.BigEndian.Uint64(b[i : i+8]))
i += 8
metaBytes := b[i : i+length]
// Unpack meta data.
var data meta.Data
if err := data.UnmarshalBinary(metaBytes); err != nil {
return nil, fmt.Errorf("unmarshal: %s", err)
}
return &data, nil
}
// doRequest sends a request to the snapshotter service and returns the result.
func (c *Client) doRequest(req *Request) ([]byte, error) {
// Connect to snapshotter service.
conn, err := tcp.Dial("tcp", c.host, MuxHeader)
if err != nil {
return nil, err
}
defer conn.Close()
// Write the request
if err := json.NewEncoder(conn).Encode(req); err != nil {
return nil, fmt.Errorf("encode snapshot request: %s", err)
}
// Read snapshot from the connection
var buf bytes.Buffer
_, err = io.Copy(&buf, conn)
return buf.Bytes(), err
}

View File

@@ -0,0 +1,292 @@
// Package snapshotter provides the meta snapshot service.
package snapshotter // import "github.com/influxdata/influxdb/services/snapshotter"
import (
"bytes"
"encoding"
"encoding/binary"
"encoding/json"
"fmt"
"net"
"strings"
"sync"
"time"
"github.com/influxdata/influxdb"
"github.com/influxdata/influxdb/services/meta"
"github.com/influxdata/influxdb/tsdb"
"github.com/uber-go/zap"
)
const (
// MuxHeader is the header byte used for the TCP muxer.
MuxHeader = 3
// BackupMagicHeader is the first 8 bytes used to identify and validate
// a metastore backup file
BackupMagicHeader = 0x59590101
)
// Service manages the listener for the snapshot endpoint.
type Service struct {
wg sync.WaitGroup
err chan error
Node *influxdb.Node
MetaClient interface {
encoding.BinaryMarshaler
Database(name string) *meta.DatabaseInfo
}
TSDBStore *tsdb.Store
Listener net.Listener
Logger zap.Logger
}
// NewService returns a new instance of Service.
func NewService() *Service {
return &Service{
err: make(chan error),
Logger: zap.New(zap.NullEncoder()),
}
}
// Open starts the service.
func (s *Service) Open() error {
s.Logger.Info("Starting snapshot service")
s.wg.Add(1)
go s.serve()
return nil
}
// Close implements the Service interface.
func (s *Service) Close() error {
if s.Listener != nil {
s.Listener.Close()
}
s.wg.Wait()
return nil
}
// WithLogger sets the logger on the service.
func (s *Service) WithLogger(log zap.Logger) {
s.Logger = log.With(zap.String("service", "snapshot"))
}
// Err returns a channel for fatal out-of-band errors.
func (s *Service) Err() <-chan error { return s.err }
// serve serves snapshot requests from the listener.
func (s *Service) serve() {
defer s.wg.Done()
for {
// Wait for next connection.
conn, err := s.Listener.Accept()
if err != nil && strings.Contains(err.Error(), "connection closed") {
s.Logger.Info("snapshot listener closed")
return
} else if err != nil {
s.Logger.Info(fmt.Sprint("error accepting snapshot request: ", err.Error()))
continue
}
// Handle connection in separate goroutine.
s.wg.Add(1)
go func(conn net.Conn) {
defer s.wg.Done()
defer conn.Close()
if err := s.handleConn(conn); err != nil {
s.Logger.Info(err.Error())
}
}(conn)
}
}
// handleConn processes conn. This is run in a separate goroutine.
func (s *Service) handleConn(conn net.Conn) error {
r, err := s.readRequest(conn)
if err != nil {
return fmt.Errorf("read request: %s", err)
}
switch r.Type {
case RequestShardBackup:
if err := s.TSDBStore.BackupShard(r.ShardID, r.Since, conn); err != nil {
return err
}
case RequestMetastoreBackup:
if err := s.writeMetaStore(conn); err != nil {
return err
}
case RequestDatabaseInfo:
return s.writeDatabaseInfo(conn, r.Database)
case RequestRetentionPolicyInfo:
return s.writeRetentionPolicyInfo(conn, r.Database, r.RetentionPolicy)
default:
return fmt.Errorf("request type unknown: %v", r.Type)
}
return nil
}
func (s *Service) writeMetaStore(conn net.Conn) error {
// Retrieve and serialize the current meta data.
metaBlob, err := s.MetaClient.MarshalBinary()
if err != nil {
return fmt.Errorf("marshal meta: %s", err)
}
var nodeBytes bytes.Buffer
if err := json.NewEncoder(&nodeBytes).Encode(s.Node); err != nil {
return err
}
var numBytes [24]byte
binary.BigEndian.PutUint64(numBytes[:8], BackupMagicHeader)
binary.BigEndian.PutUint64(numBytes[8:16], uint64(len(metaBlob)))
binary.BigEndian.PutUint64(numBytes[16:24], uint64(nodeBytes.Len()))
// backup header followed by meta blob length
if _, err := conn.Write(numBytes[:16]); err != nil {
return err
}
if _, err := conn.Write(metaBlob); err != nil {
return err
}
if _, err := conn.Write(numBytes[16:24]); err != nil {
return err
}
if _, err := nodeBytes.WriteTo(conn); err != nil {
return err
}
return nil
}
// writeDatabaseInfo will write the relative paths of all shards in the database on
// this server into the connection.
func (s *Service) writeDatabaseInfo(conn net.Conn, database string) error {
res := Response{}
db := s.MetaClient.Database(database)
if db == nil {
return influxdb.ErrDatabaseNotFound(database)
}
for _, rp := range db.RetentionPolicies {
for _, sg := range rp.ShardGroups {
for _, sh := range sg.Shards {
// ignore if the shard isn't on the server
if s.TSDBStore.Shard(sh.ID) == nil {
continue
}
path, err := s.TSDBStore.ShardRelativePath(sh.ID)
if err != nil {
return err
}
res.Paths = append(res.Paths, path)
}
}
}
if err := json.NewEncoder(conn).Encode(res); err != nil {
return fmt.Errorf("encode resonse: %s", err.Error())
}
return nil
}
// writeDatabaseInfo will write the relative paths of all shards in the retention policy on
// this server into the connection
func (s *Service) writeRetentionPolicyInfo(conn net.Conn, database, retentionPolicy string) error {
res := Response{}
db := s.MetaClient.Database(database)
if db == nil {
return influxdb.ErrDatabaseNotFound(database)
}
var ret *meta.RetentionPolicyInfo
for _, rp := range db.RetentionPolicies {
if rp.Name == retentionPolicy {
ret = &rp
break
}
}
if ret == nil {
return influxdb.ErrRetentionPolicyNotFound(retentionPolicy)
}
for _, sg := range ret.ShardGroups {
for _, sh := range sg.Shards {
// ignore if the shard isn't on the server
if s.TSDBStore.Shard(sh.ID) == nil {
continue
}
path, err := s.TSDBStore.ShardRelativePath(sh.ID)
if err != nil {
return err
}
res.Paths = append(res.Paths, path)
}
}
if err := json.NewEncoder(conn).Encode(res); err != nil {
return fmt.Errorf("encode resonse: %s", err.Error())
}
return nil
}
// readRequest unmarshals a request object from the conn.
func (s *Service) readRequest(conn net.Conn) (Request, error) {
var r Request
if err := json.NewDecoder(conn).Decode(&r); err != nil {
return r, err
}
return r, nil
}
// RequestType indicates the typeof snapshot request.
type RequestType uint8
const (
// RequestShardBackup represents a request for a shard backup.
RequestShardBackup RequestType = iota
// RequestMetastoreBackup represents a request to back up the metastore.
RequestMetastoreBackup
// RequestDatabaseInfo represents a request for database info.
RequestDatabaseInfo
// RequestRetentionPolicyInfo represents a request for retention policy info.
RequestRetentionPolicyInfo
)
// Request represents a request for a specific backup or for information
// about the shards on this server for a database or retention policy.
type Request struct {
Type RequestType
Database string
RetentionPolicy string
ShardID uint64
Since time.Time
}
// Response contains the relative paths for all the shards on this server
// that are in the requested database or retention policy.
type Response struct {
Paths []string
}

View File

@@ -0,0 +1 @@
package snapshotter_test

View File

@@ -0,0 +1,103 @@
package subscriber
import (
"errors"
"fmt"
"os"
"path/filepath"
"time"
"github.com/influxdata/influxdb/monitor/diagnostics"
"github.com/influxdata/influxdb/toml"
)
const (
// DefaultHTTPTimeout is the default HTTP timeout for a Config.
DefaultHTTPTimeout = 30 * time.Second
// DefaultWriteConcurrency is the default write concurrency for a Config.
DefaultWriteConcurrency = 40
// DefaultWriteBufferSize is the default write buffer size for a Config.
DefaultWriteBufferSize = 1000
)
// Config represents a configuration of the subscriber service.
type Config struct {
// Whether to enable to Subscriber service
Enabled bool `toml:"enabled"`
HTTPTimeout toml.Duration `toml:"http-timeout"`
// InsecureSkipVerify gets passed to the http client, if true, it will
// skip https certificate verification. Defaults to false
InsecureSkipVerify bool `toml:"insecure-skip-verify"`
// configure the path to the PEM encoded CA certs file. If the
// empty string, the default system certs will be used
CaCerts string `toml:"ca-certs"`
// The number of writer goroutines processing the write channel.
WriteConcurrency int `toml:"write-concurrency"`
// The number of in-flight writes buffered in the write channel.
WriteBufferSize int `toml:"write-buffer-size"`
}
// NewConfig returns a new instance of a subscriber config.
func NewConfig() Config {
return Config{
Enabled: true,
HTTPTimeout: toml.Duration(DefaultHTTPTimeout),
InsecureSkipVerify: false,
CaCerts: "",
WriteConcurrency: DefaultWriteConcurrency,
WriteBufferSize: DefaultWriteBufferSize,
}
}
// Validate returns an error if the config is invalid.
func (c Config) Validate() error {
if c.HTTPTimeout <= 0 {
return errors.New("http-timeout must be greater than 0")
}
if c.CaCerts != "" && !fileExists(c.CaCerts) {
abspath, err := filepath.Abs(c.CaCerts)
if err != nil {
return fmt.Errorf("ca-certs file %s does not exist. Wrapped Error: %v", c.CaCerts, err)
}
return fmt.Errorf("ca-certs file %s does not exist", abspath)
}
if c.WriteBufferSize <= 0 {
return errors.New("write-buffer-size must be greater than 0")
}
if c.WriteConcurrency <= 0 {
return errors.New("write-concurrency must be greater than 0")
}
return nil
}
func fileExists(fileName string) bool {
info, err := os.Stat(fileName)
return err == nil && !info.IsDir()
}
// Diagnostics returns a diagnostics representation of a subset of the Config.
func (c Config) Diagnostics() (*diagnostics.Diagnostics, error) {
if !c.Enabled {
return diagnostics.RowFromMap(map[string]interface{}{
"enabled": false,
}), nil
}
return diagnostics.RowFromMap(map[string]interface{}{
"enabled": true,
"http-timeout": c.HTTPTimeout,
"write-concurrency": c.WriteConcurrency,
"write-buffer-size": c.WriteBufferSize,
}), nil
}

View File

@@ -0,0 +1,111 @@
package subscriber_test
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"testing"
"github.com/BurntSushi/toml"
"github.com/influxdata/influxdb/services/subscriber"
)
func TestConfig_Parse(t *testing.T) {
// Parse configuration.
var c subscriber.Config
if _, err := toml.Decode(`
enabled = false
`, &c); err != nil {
t.Fatal(err)
}
// Validate configuration.
if c.Enabled != false {
t.Errorf("unexpected enabled state: %v", c.Enabled)
}
if c.InsecureSkipVerify == true {
t.Errorf("InsecureSkipVerify: expected %v. got %v", false, c.InsecureSkipVerify)
}
}
func TestConfig_ParseTLSConfig(t *testing.T) {
abspath, err := filepath.Abs("/path/to/ca-certs.pem")
if err != nil {
t.Fatalf("Could not construct absolute path. %v", err)
}
// Parse configuration.
var c subscriber.Config
if _, err := toml.Decode(fmt.Sprintf(`
http-timeout = "60s"
enabled = true
ca-certs = '%s'
insecure-skip-verify = true
write-buffer-size = 1000
write-concurrency = 10
`, abspath), &c); err != nil {
t.Fatal(err)
}
// Validate configuration.
if c.Enabled != true {
t.Errorf("unexpected enabled state: %v", c.Enabled)
}
if c.CaCerts != abspath {
t.Errorf("CaCerts: expected %s. got %s", abspath, c.CaCerts)
}
if c.InsecureSkipVerify != true {
t.Errorf("InsecureSkipVerify: expected %v. got %v", true, c.InsecureSkipVerify)
}
err = c.Validate()
if err == nil {
t.Errorf("Expected Validation to fail (%s doesn't exist)", abspath)
}
if err.Error() != fmt.Sprintf("ca-certs file %s does not exist", abspath) {
t.Errorf("Expected descriptive validation error. Instead got %v", err)
}
}
func TestConfig_ParseTLSConfigValidCerts(t *testing.T) {
tmpfile, err := ioutil.TempFile("", "ca-certs.crt")
if err != nil {
t.Fatalf("could not create temp file. error was: %v", err)
}
defer os.Remove(tmpfile.Name())
if _, err := tmpfile.Write([]byte("=== BEGIN CERTIFICATE ===\n=== END CERTIFICATE ===")); err != nil {
t.Fatalf("could not write temp file. error was: %v", err)
}
if err := tmpfile.Close(); err != nil {
t.Fatalf("could not close temp file. error was %v", err)
}
// Parse configuration.
var c subscriber.Config
if _, err := toml.Decode(fmt.Sprintf(`
http-timeout = "60s"
enabled = true
ca-certs = '%s'
insecure-skip-verify = false
write-buffer-size = 1000
write-concurrency = 10
`, tmpfile.Name()), &c); err != nil {
t.Fatal(err)
}
// Validate configuration.
if c.Enabled != true {
t.Errorf("unexpected enabled state: %v", c.Enabled)
}
if c.CaCerts != tmpfile.Name() {
t.Errorf("CaCerts: expected %v. got %v", tmpfile.Name(), c.CaCerts)
}
if c.InsecureSkipVerify != false {
t.Errorf("InsecureSkipVerify: expected %v. got %v", false, c.InsecureSkipVerify)
}
if err := c.Validate(); err != nil {
t.Errorf("Expected Validation to succeed. Instead was: %v", err)
}
}

View File

@@ -0,0 +1,75 @@
package subscriber
import (
"crypto/tls"
"crypto/x509"
"io/ioutil"
"time"
"github.com/influxdata/influxdb/client/v2"
"github.com/influxdata/influxdb/coordinator"
)
// HTTP supports writing points over HTTP using the line protocol.
type HTTP struct {
c client.Client
}
// NewHTTP returns a new HTTP points writer with default options.
func NewHTTP(addr string, timeout time.Duration) (*HTTP, error) {
return NewHTTPS(addr, timeout, false, "")
}
// NewHTTPS returns a new HTTPS points writer with default options and HTTPS configured.
func NewHTTPS(addr string, timeout time.Duration, unsafeSsl bool, caCerts string) (*HTTP, error) {
tlsConfig, err := createTLSConfig(caCerts)
if err != nil {
return nil, err
}
conf := client.HTTPConfig{
Addr: addr,
Timeout: timeout,
InsecureSkipVerify: unsafeSsl,
TLSConfig: tlsConfig,
}
c, err := client.NewHTTPClient(conf)
if err != nil {
return nil, err
}
return &HTTP{c: c}, nil
}
// WritePoints writes points over HTTP transport.
func (h *HTTP) WritePoints(p *coordinator.WritePointsRequest) (err error) {
bp, _ := client.NewBatchPoints(client.BatchPointsConfig{
Database: p.Database,
RetentionPolicy: p.RetentionPolicy,
})
for _, pt := range p.Points {
bp.AddPoint(client.NewPointFrom(pt))
}
err = h.c.Write(bp)
return
}
func createTLSConfig(caCerts string) (*tls.Config, error) {
if caCerts == "" {
return nil, nil
}
return loadCaCerts(caCerts)
}
func loadCaCerts(caCerts string) (*tls.Config, error) {
caCert, err := ioutil.ReadFile(caCerts)
if err != nil {
return nil, err
}
caCertPool := x509.NewCertPool()
caCertPool.AppendCertsFromPEM(caCert)
return &tls.Config{
RootCAs: caCertPool,
}, nil
}

View File

@@ -0,0 +1,451 @@
// Package subscriber implements the subscriber service
// to forward incoming data to remote services.
package subscriber // import "github.com/influxdata/influxdb/services/subscriber"
import (
"errors"
"fmt"
"net/url"
"sync"
"sync/atomic"
"time"
"github.com/influxdata/influxdb/coordinator"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/monitor"
"github.com/influxdata/influxdb/services/meta"
"github.com/uber-go/zap"
)
// Statistics for the Subscriber service.
const (
statCreateFailures = "createFailures"
statPointsWritten = "pointsWritten"
statWriteFailures = "writeFailures"
)
// PointsWriter is an interface for writing points to a subscription destination.
// Only WritePoints() needs to be satisfied. PointsWriter implementations
// must be goroutine safe.
type PointsWriter interface {
WritePoints(p *coordinator.WritePointsRequest) error
}
// subEntry is a unique set that identifies a given subscription.
type subEntry struct {
db string
rp string
name string
}
// Service manages forking the incoming data from InfluxDB
// to defined third party destinations.
// Subscriptions are defined per database and retention policy.
type Service struct {
MetaClient interface {
Databases() []meta.DatabaseInfo
WaitForDataChanged() chan struct{}
}
NewPointsWriter func(u url.URL) (PointsWriter, error)
Logger zap.Logger
update chan struct{}
stats *Statistics
points chan *coordinator.WritePointsRequest
wg sync.WaitGroup
closed bool
closing chan struct{}
mu sync.Mutex
conf Config
subs map[subEntry]chanWriter
subMu sync.RWMutex
}
// NewService returns a subscriber service with given settings
func NewService(c Config) *Service {
s := &Service{
Logger: zap.New(zap.NullEncoder()),
closed: true,
stats: &Statistics{},
conf: c,
}
s.NewPointsWriter = s.newPointsWriter
return s
}
// Open starts the subscription service.
func (s *Service) Open() error {
if !s.conf.Enabled {
return nil // Service disabled.
}
s.mu.Lock()
defer s.mu.Unlock()
if s.MetaClient == nil {
return errors.New("no meta store")
}
s.closed = false
s.closing = make(chan struct{})
s.update = make(chan struct{})
s.points = make(chan *coordinator.WritePointsRequest, 100)
s.wg.Add(2)
go func() {
defer s.wg.Done()
s.run()
}()
go func() {
defer s.wg.Done()
s.waitForMetaUpdates()
}()
s.Logger.Info("opened service")
return nil
}
// Close terminates the subscription service.
// It will panic if called multiple times or without first opening the service.
func (s *Service) Close() error {
s.mu.Lock()
defer s.mu.Unlock()
if s.closed {
return nil // Already closed.
}
s.closed = true
close(s.points)
close(s.closing)
s.wg.Wait()
s.Logger.Info("closed service")
return nil
}
// WithLogger sets the logger on the service.
func (s *Service) WithLogger(log zap.Logger) {
s.Logger = log.With(zap.String("service", "subscriber"))
}
// Statistics maintains the statistics for the subscriber service.
type Statistics struct {
CreateFailures int64
PointsWritten int64
WriteFailures int64
}
// Statistics returns statistics for periodic monitoring.
func (s *Service) Statistics(tags map[string]string) []models.Statistic {
statistics := []models.Statistic{{
Name: "subscriber",
Tags: tags,
Values: map[string]interface{}{
statCreateFailures: atomic.LoadInt64(&s.stats.CreateFailures),
statPointsWritten: atomic.LoadInt64(&s.stats.PointsWritten),
statWriteFailures: atomic.LoadInt64(&s.stats.WriteFailures),
},
}}
s.subMu.RLock()
defer s.subMu.RUnlock()
for _, sub := range s.subs {
statistics = append(statistics, sub.Statistics(tags)...)
}
return statistics
}
func (s *Service) waitForMetaUpdates() {
for {
ch := s.MetaClient.WaitForDataChanged()
select {
case <-ch:
err := s.Update()
if err != nil {
s.Logger.Info(fmt.Sprint("error updating subscriptions: ", err))
}
case <-s.closing:
return
}
}
}
// Update will start new and stop deleted subscriptions.
func (s *Service) Update() error {
// signal update
select {
case s.update <- struct{}{}:
return nil
case <-s.closing:
return errors.New("service closed cannot update")
}
}
func (s *Service) createSubscription(se subEntry, mode string, destinations []string) (PointsWriter, error) {
var bm BalanceMode
switch mode {
case "ALL":
bm = ALL
case "ANY":
bm = ANY
default:
return nil, fmt.Errorf("unknown balance mode %q", mode)
}
writers := make([]PointsWriter, 0, len(destinations))
stats := make([]writerStats, 0, len(destinations))
// add only valid destinations
for _, dest := range destinations {
u, err := url.Parse(dest)
if err != nil {
return nil, fmt.Errorf("failed to parse destination: %s", dest)
}
w, err := s.NewPointsWriter(*u)
if err != nil {
return nil, fmt.Errorf("failed to create writer for destination: %s", dest)
}
writers = append(writers, w)
stats = append(stats, writerStats{dest: dest})
}
return &balancewriter{
bm: bm,
writers: writers,
stats: stats,
defaultTags: models.StatisticTags{
"database": se.db,
"retention_policy": se.rp,
"name": se.name,
"mode": mode,
},
}, nil
}
// Points returns a channel into which write point requests can be sent.
func (s *Service) Points() chan<- *coordinator.WritePointsRequest {
return s.points
}
// run read points from the points channel and writes them to the subscriptions.
func (s *Service) run() {
var wg sync.WaitGroup
s.subs = make(map[subEntry]chanWriter)
// Perform initial update
s.updateSubs(&wg)
for {
select {
case <-s.update:
s.updateSubs(&wg)
case p, ok := <-s.points:
if !ok {
// Close out all chanWriters
s.close(&wg)
return
}
for se, cw := range s.subs {
if p.Database == se.db && p.RetentionPolicy == se.rp {
select {
case cw.writeRequests <- p:
default:
atomic.AddInt64(&s.stats.WriteFailures, 1)
}
}
}
}
}
}
// close closes the existing channel writers.
func (s *Service) close(wg *sync.WaitGroup) {
s.subMu.Lock()
defer s.subMu.Unlock()
for _, cw := range s.subs {
cw.Close()
}
// Wait for them to finish
wg.Wait()
s.subs = nil
}
func (s *Service) updateSubs(wg *sync.WaitGroup) {
s.subMu.Lock()
defer s.subMu.Unlock()
if s.subs == nil {
s.subs = make(map[subEntry]chanWriter)
}
dbis := s.MetaClient.Databases()
allEntries := make(map[subEntry]bool, 0)
// Add in new subscriptions
for _, dbi := range dbis {
for _, rpi := range dbi.RetentionPolicies {
for _, si := range rpi.Subscriptions {
se := subEntry{
db: dbi.Name,
rp: rpi.Name,
name: si.Name,
}
allEntries[se] = true
if _, ok := s.subs[se]; ok {
continue
}
sub, err := s.createSubscription(se, si.Mode, si.Destinations)
if err != nil {
atomic.AddInt64(&s.stats.CreateFailures, 1)
s.Logger.Info(fmt.Sprintf("Subscription creation failed for '%s' with error: %s", si.Name, err))
continue
}
cw := chanWriter{
writeRequests: make(chan *coordinator.WritePointsRequest, s.conf.WriteBufferSize),
pw: sub,
pointsWritten: &s.stats.PointsWritten,
failures: &s.stats.WriteFailures,
logger: s.Logger,
}
for i := 0; i < s.conf.WriteConcurrency; i++ {
wg.Add(1)
go func() {
defer wg.Done()
cw.Run()
}()
}
s.subs[se] = cw
s.Logger.Info(fmt.Sprintf("added new subscription for %s %s", se.db, se.rp))
}
}
}
// Remove deleted subs
for se := range s.subs {
if !allEntries[se] {
// Close the chanWriter
s.subs[se].Close()
// Remove it from the set
delete(s.subs, se)
s.Logger.Info(fmt.Sprintf("deleted old subscription for %s %s", se.db, se.rp))
}
}
}
// newPointsWriter returns a new PointsWriter from the given URL.
func (s *Service) newPointsWriter(u url.URL) (PointsWriter, error) {
switch u.Scheme {
case "udp":
return NewUDP(u.Host), nil
case "http":
return NewHTTP(u.String(), time.Duration(s.conf.HTTPTimeout))
case "https":
if s.conf.InsecureSkipVerify {
s.Logger.Info("WARNING: 'insecure-skip-verify' is true. This will skip all certificate verifications.")
}
return NewHTTPS(u.String(), time.Duration(s.conf.HTTPTimeout), s.conf.InsecureSkipVerify, s.conf.CaCerts)
default:
return nil, fmt.Errorf("unknown destination scheme %s", u.Scheme)
}
}
// chanWriter sends WritePointsRequest to a PointsWriter received over a channel.
type chanWriter struct {
writeRequests chan *coordinator.WritePointsRequest
pw PointsWriter
pointsWritten *int64
failures *int64
logger zap.Logger
}
// Close closes the chanWriter.
func (c chanWriter) Close() {
close(c.writeRequests)
}
func (c chanWriter) Run() {
for wr := range c.writeRequests {
err := c.pw.WritePoints(wr)
if err != nil {
c.logger.Info(err.Error())
atomic.AddInt64(c.failures, 1)
} else {
atomic.AddInt64(c.pointsWritten, int64(len(wr.Points)))
}
}
}
// Statistics returns statistics for periodic monitoring.
func (c chanWriter) Statistics(tags map[string]string) []models.Statistic {
if m, ok := c.pw.(monitor.Reporter); ok {
return m.Statistics(tags)
}
return []models.Statistic{}
}
// BalanceMode specifies what balance mode to use on a subscription.
type BalanceMode int
const (
// ALL indicates to send writes to all subscriber destinations.
ALL BalanceMode = iota
// ANY indicates to send writes to a single subscriber destination, round robin.
ANY
)
type writerStats struct {
dest string
failures int64
pointsWritten int64
}
// balances writes across PointsWriters according to BalanceMode
type balancewriter struct {
bm BalanceMode
writers []PointsWriter
stats []writerStats
defaultTags models.StatisticTags
i int
}
func (b *balancewriter) WritePoints(p *coordinator.WritePointsRequest) error {
var lastErr error
for range b.writers {
// round robin through destinations.
i := b.i
w := b.writers[i]
b.i = (b.i + 1) % len(b.writers)
// write points to destination.
err := w.WritePoints(p)
if err != nil {
lastErr = err
atomic.AddInt64(&b.stats[i].failures, 1)
} else {
atomic.AddInt64(&b.stats[i].pointsWritten, int64(len(p.Points)))
if b.bm == ANY {
break
}
}
}
return lastErr
}
// Statistics returns statistics for periodic monitoring.
func (b *balancewriter) Statistics(tags map[string]string) []models.Statistic {
statistics := make([]models.Statistic, len(b.stats))
for i := range b.stats {
subTags := b.defaultTags.Merge(tags)
subTags["destination"] = b.stats[i].dest
statistics[i] = models.Statistic{
Name: "subscriber",
Tags: subTags,
Values: map[string]interface{}{
statPointsWritten: atomic.LoadInt64(&b.stats[i].pointsWritten),
statWriteFailures: atomic.LoadInt64(&b.stats[i].failures),
},
}
}
return statistics
}

View File

@@ -0,0 +1,443 @@
package subscriber_test
import (
"net/url"
"testing"
"time"
"github.com/influxdata/influxdb/coordinator"
"github.com/influxdata/influxdb/services/meta"
"github.com/influxdata/influxdb/services/subscriber"
)
type MetaClient struct {
DatabasesFn func() []meta.DatabaseInfo
WaitForDataChangedFn func() chan struct{}
}
func (m MetaClient) Databases() []meta.DatabaseInfo {
return m.DatabasesFn()
}
func (m MetaClient) WaitForDataChanged() chan struct{} {
return m.WaitForDataChangedFn()
}
type Subscription struct {
WritePointsFn func(*coordinator.WritePointsRequest) error
}
func (s Subscription) WritePoints(p *coordinator.WritePointsRequest) error {
return s.WritePointsFn(p)
}
func TestService_IgnoreNonMatch(t *testing.T) {
dataChanged := make(chan struct{})
ms := MetaClient{}
ms.WaitForDataChangedFn = func() chan struct{} {
return dataChanged
}
ms.DatabasesFn = func() []meta.DatabaseInfo {
return []meta.DatabaseInfo{
{
Name: "db0",
RetentionPolicies: []meta.RetentionPolicyInfo{
{
Name: "rp0",
Subscriptions: []meta.SubscriptionInfo{
{Name: "s0", Mode: "ANY", Destinations: []string{"udp://h0:9093", "udp://h1:9093"}},
},
},
},
},
}
}
prs := make(chan *coordinator.WritePointsRequest, 2)
urls := make(chan url.URL, 2)
newPointsWriter := func(u url.URL) (subscriber.PointsWriter, error) {
sub := Subscription{}
sub.WritePointsFn = func(p *coordinator.WritePointsRequest) error {
prs <- p
return nil
}
urls <- u
return sub, nil
}
s := subscriber.NewService(subscriber.NewConfig())
s.MetaClient = ms
s.NewPointsWriter = newPointsWriter
s.Open()
defer s.Close()
// Signal that data has changed
dataChanged <- struct{}{}
for _, expURLStr := range []string{"udp://h0:9093", "udp://h1:9093"} {
var u url.URL
expURL, _ := url.Parse(expURLStr)
select {
case u = <-urls:
case <-time.After(10 * time.Millisecond):
t.Fatal("expected urls")
}
if expURL.String() != u.String() {
t.Fatalf("unexpected url: got %s exp %s", u.String(), expURL.String())
}
}
// Write points that don't match any subscription.
s.Points() <- &coordinator.WritePointsRequest{
Database: "db1",
RetentionPolicy: "rp0",
}
s.Points() <- &coordinator.WritePointsRequest{
Database: "db0",
RetentionPolicy: "rp2",
}
// Shouldn't get any prs back
select {
case pr := <-prs:
t.Fatalf("unexpected points request %v", pr)
default:
}
close(dataChanged)
}
func TestService_ModeALL(t *testing.T) {
dataChanged := make(chan struct{})
ms := MetaClient{}
ms.WaitForDataChangedFn = func() chan struct{} {
return dataChanged
}
ms.DatabasesFn = func() []meta.DatabaseInfo {
return []meta.DatabaseInfo{
{
Name: "db0",
RetentionPolicies: []meta.RetentionPolicyInfo{
{
Name: "rp0",
Subscriptions: []meta.SubscriptionInfo{
{Name: "s0", Mode: "ALL", Destinations: []string{"udp://h0:9093", "udp://h1:9093"}},
},
},
},
},
}
}
prs := make(chan *coordinator.WritePointsRequest, 2)
urls := make(chan url.URL, 2)
newPointsWriter := func(u url.URL) (subscriber.PointsWriter, error) {
sub := Subscription{}
sub.WritePointsFn = func(p *coordinator.WritePointsRequest) error {
prs <- p
return nil
}
urls <- u
return sub, nil
}
s := subscriber.NewService(subscriber.NewConfig())
s.MetaClient = ms
s.NewPointsWriter = newPointsWriter
s.Open()
defer s.Close()
// Signal that data has changed
dataChanged <- struct{}{}
for _, expURLStr := range []string{"udp://h0:9093", "udp://h1:9093"} {
var u url.URL
expURL, _ := url.Parse(expURLStr)
select {
case u = <-urls:
case <-time.After(10 * time.Millisecond):
t.Fatal("expected urls")
}
if expURL.String() != u.String() {
t.Fatalf("unexpected url: got %s exp %s", u.String(), expURL.String())
}
}
// Write points that match subscription with mode ALL
expPR := &coordinator.WritePointsRequest{
Database: "db0",
RetentionPolicy: "rp0",
}
s.Points() <- expPR
// Should get pr back twice
for i := 0; i < 2; i++ {
var pr *coordinator.WritePointsRequest
select {
case pr = <-prs:
case <-time.After(10 * time.Millisecond):
t.Fatalf("expected points request: got %d exp 2", i)
}
if pr != expPR {
t.Errorf("unexpected points request: got %v, exp %v", pr, expPR)
}
}
close(dataChanged)
}
func TestService_ModeANY(t *testing.T) {
dataChanged := make(chan struct{})
ms := MetaClient{}
ms.WaitForDataChangedFn = func() chan struct{} {
return dataChanged
}
ms.DatabasesFn = func() []meta.DatabaseInfo {
return []meta.DatabaseInfo{
{
Name: "db0",
RetentionPolicies: []meta.RetentionPolicyInfo{
{
Name: "rp0",
Subscriptions: []meta.SubscriptionInfo{
{Name: "s0", Mode: "ANY", Destinations: []string{"udp://h0:9093", "udp://h1:9093"}},
},
},
},
},
}
}
prs := make(chan *coordinator.WritePointsRequest, 2)
urls := make(chan url.URL, 2)
newPointsWriter := func(u url.URL) (subscriber.PointsWriter, error) {
sub := Subscription{}
sub.WritePointsFn = func(p *coordinator.WritePointsRequest) error {
prs <- p
return nil
}
urls <- u
return sub, nil
}
s := subscriber.NewService(subscriber.NewConfig())
s.MetaClient = ms
s.NewPointsWriter = newPointsWriter
s.Open()
defer s.Close()
// Signal that data has changed
dataChanged <- struct{}{}
for _, expURLStr := range []string{"udp://h0:9093", "udp://h1:9093"} {
var u url.URL
expURL, _ := url.Parse(expURLStr)
select {
case u = <-urls:
case <-time.After(10 * time.Millisecond):
t.Fatal("expected urls")
}
if expURL.String() != u.String() {
t.Fatalf("unexpected url: got %s exp %s", u.String(), expURL.String())
}
}
// Write points that match subscription with mode ANY
expPR := &coordinator.WritePointsRequest{
Database: "db0",
RetentionPolicy: "rp0",
}
s.Points() <- expPR
// Validate we get the pr back just once
var pr *coordinator.WritePointsRequest
select {
case pr = <-prs:
case <-time.After(10 * time.Millisecond):
t.Fatal("expected points request")
}
if pr != expPR {
t.Errorf("unexpected points request: got %v, exp %v", pr, expPR)
}
// shouldn't get it a second time
select {
case pr = <-prs:
t.Fatalf("unexpected points request %v", pr)
default:
}
close(dataChanged)
}
func TestService_Multiple(t *testing.T) {
dataChanged := make(chan struct{})
ms := MetaClient{}
ms.WaitForDataChangedFn = func() chan struct{} {
return dataChanged
}
ms.DatabasesFn = func() []meta.DatabaseInfo {
return []meta.DatabaseInfo{
{
Name: "db0",
RetentionPolicies: []meta.RetentionPolicyInfo{
{
Name: "rp0",
Subscriptions: []meta.SubscriptionInfo{
{Name: "s0", Mode: "ANY", Destinations: []string{"udp://h0:9093", "udp://h1:9093"}},
},
},
{
Name: "rp1",
Subscriptions: []meta.SubscriptionInfo{
{Name: "s1", Mode: "ALL", Destinations: []string{"udp://h2:9093", "udp://h3:9093"}},
},
},
},
},
}
}
prs := make(chan *coordinator.WritePointsRequest, 4)
urls := make(chan url.URL, 4)
newPointsWriter := func(u url.URL) (subscriber.PointsWriter, error) {
sub := Subscription{}
sub.WritePointsFn = func(p *coordinator.WritePointsRequest) error {
prs <- p
return nil
}
urls <- u
return sub, nil
}
s := subscriber.NewService(subscriber.NewConfig())
s.MetaClient = ms
s.NewPointsWriter = newPointsWriter
s.Open()
defer s.Close()
// Signal that data has changed
dataChanged <- struct{}{}
for _, expURLStr := range []string{"udp://h0:9093", "udp://h1:9093", "udp://h2:9093", "udp://h3:9093"} {
var u url.URL
expURL, _ := url.Parse(expURLStr)
select {
case u = <-urls:
case <-time.After(100 * time.Millisecond):
t.Fatal("expected urls")
}
if expURL.String() != u.String() {
t.Fatalf("unexpected url: got %s exp %s", u.String(), expURL.String())
}
}
// Write points that don't match any subscription.
s.Points() <- &coordinator.WritePointsRequest{
Database: "db1",
RetentionPolicy: "rp0",
}
s.Points() <- &coordinator.WritePointsRequest{
Database: "db0",
RetentionPolicy: "rp2",
}
// Write points that match subscription with mode ANY
expPR := &coordinator.WritePointsRequest{
Database: "db0",
RetentionPolicy: "rp0",
}
s.Points() <- expPR
// Validate we get the pr back just once
var pr *coordinator.WritePointsRequest
select {
case pr = <-prs:
case <-time.After(100 * time.Millisecond):
t.Fatal("expected points request")
}
if pr != expPR {
t.Errorf("unexpected points request: got %v, exp %v", pr, expPR)
}
// shouldn't get it a second time
select {
case pr = <-prs:
t.Fatalf("unexpected points request %v", pr)
default:
}
// Write points that match subscription with mode ALL
expPR = &coordinator.WritePointsRequest{
Database: "db0",
RetentionPolicy: "rp1",
}
s.Points() <- expPR
// Should get pr back twice
for i := 0; i < 2; i++ {
select {
case pr = <-prs:
case <-time.After(100 * time.Millisecond):
t.Fatalf("expected points request: got %d exp 2", i)
}
if pr != expPR {
t.Errorf("unexpected points request: got %v, exp %v", pr, expPR)
}
}
close(dataChanged)
}
func TestService_WaitForDataChanged(t *testing.T) {
dataChanged := make(chan struct{}, 1)
ms := MetaClient{}
ms.WaitForDataChangedFn = func() chan struct{} {
return dataChanged
}
calls := make(chan bool, 2)
ms.DatabasesFn = func() []meta.DatabaseInfo {
calls <- true
return nil
}
s := subscriber.NewService(subscriber.NewConfig())
s.MetaClient = ms
// Explicitly closed below for testing
s.Open()
// Should be called once during open
select {
case <-calls:
case <-time.After(10 * time.Millisecond):
t.Fatal("expected call")
}
select {
case <-calls:
t.Fatal("unexpected call")
case <-time.After(time.Millisecond):
}
// Signal that data has changed
dataChanged <- struct{}{}
// Should be called once more after data changed
select {
case <-calls:
case <-time.After(10 * time.Millisecond):
t.Fatal("expected call")
}
select {
case <-calls:
t.Fatal("unexpected call")
case <-time.After(time.Millisecond):
}
//Close service ensure not called
s.Close()
dataChanged <- struct{}{}
select {
case <-calls:
t.Fatal("unexpected call")
case <-time.After(time.Millisecond):
}
close(dataChanged)
}

View File

@@ -0,0 +1,42 @@
package subscriber
import (
"net"
"github.com/influxdata/influxdb/coordinator"
)
// UDP supports writing points over UDP using the line protocol.
type UDP struct {
addr string
}
// NewUDP returns a new UDP listener with default options.
func NewUDP(addr string) *UDP {
return &UDP{addr: addr}
}
// WritePoints writes points over UDP transport.
func (u *UDP) WritePoints(p *coordinator.WritePointsRequest) (err error) {
var addr *net.UDPAddr
var con *net.UDPConn
addr, err = net.ResolveUDPAddr("udp", u.addr)
if err != nil {
return
}
con, err = net.DialUDP("udp", nil, addr)
if err != nil {
return
}
defer con.Close()
for _, p := range p.Points {
_, err = con.Write([]byte(p.String()))
if err != nil {
return
}
}
return
}

View File

@@ -0,0 +1,128 @@
# The UDP Input
## A note on UDP/IP OS Buffer sizes
Some OSes (most notably, Linux) place very restricive limits on the performance
of UDP protocols. It is _highly_ recommended that you increase these OS limits to
at least 25MB before trying to run UDP traffic to your instance.
25MB is just a recommendation, and should be adjusted to be inline with your
`read-buffer` plugin setting.
### Linux
Check the current UDP/IP receive buffer default and limit by typing the following commands:
```
sysctl net.core.rmem_max
sysctl net.core.rmem_default
```
If the values are less than 26214400 bytes (25MB) you should add the following lines to the /etc/sysctl.conf file:
```
net.core.rmem_max=26214400
net.core.rmem_default=26214400
```
Changes to /etc/sysctl.conf do not take effect until reboot. To update the values immediately, type the following commands as root:
```
sysctl -w net.core.rmem_max=26214400
sysctl -w net.core.rmem_default=26214400
```
### BSD/Darwin
On BSD/Darwin systems you need to add about a 15% padding to the kernel limit
socket buffer. Meaning if you want an 25MB buffer (8388608 bytes) you need to set
the kernel limit to `26214400*1.15 = 30146560`. This is not documented anywhere but
happens
[in the kernel here.](https://github.com/freebsd/freebsd/blob/master/sys/kern/uipc_sockbuf.c#L63-L64)
Check the current UDP/IP buffer limit by typing the following command:
```
sysctl kern.ipc.maxsockbuf
```
If the value is less than 30146560 bytes you should add the following lines to the /etc/sysctl.conf file (create it if necessary):
```
kern.ipc.maxsockbuf=30146560
```
Changes to /etc/sysctl.conf do not take effect until reboot. To update the values immediately, type the following commands as root:
```
sysctl -w kern.ipc.maxsockbuf=30146560
```
### Using the read-buffer option for the UDP listener
The `read-buffer` option allows users to set the buffer size for the UDP listener.
It Sets the size of the operating system's receive buffer associated with
the UDP traffic. Keep in mind that the OS must be able
to handle the number set here or the UDP listener will error and exit.
`read-buffer = 0` means to use the OS default, which is usually too
small for high UDP performance.
## Configuration
Each UDP input allows the binding address, target database, and target retention policy to be set. If the database does not exist, it will be created automatically when the input is initialized. If the retention policy is not configured, then the default retention policy for the database is used. However if the retention policy is set, the retention policy must be explicitly created. The input will not automatically create it.
Each UDP input also performs internal batching of the points it receives, as batched writes to the database are more efficient. The default _batch size_ is 1000, _pending batch_ factor is 5, with a _batch timeout_ of 1 second. This means the input will write batches of maximum size 1000, but if a batch has not reached 1000 points within 1 second of the first point being added to a batch, it will emit that batch regardless of size. The pending batch factor controls how many batches can be in memory at once, allowing the input to transmit a batch, while still building other batches.
## Processing
The UDP input can receive up to 64KB per read, and splits the received data by newline. Each part is then interpreted as line-protocol encoded points, and parsed accordingly.
## UDP is connectionless
Since UDP is a connectionless protocol there is no way to signal to the data source if any error occurs, and if data has even been successfully indexed. This should be kept in mind when deciding if and when to use the UDP input. The built-in UDP statistics are useful for monitoring the UDP inputs.
## Config Examples
One UDP listener
```
# influxd.conf
...
[[udp]]
enabled = true
bind-address = ":8089" # the bind address
database = "telegraf" # Name of the database that will be written to
batch-size = 5000 # will flush if this many points get buffered
batch-timeout = "1s" # will flush at least this often even if the batch-size is not reached
batch-pending = 10 # number of batches that may be pending in memory
read-buffer = 0 # UDP read buffer, 0 means to use OS default
...
```
Multiple UDP listeners
```
# influxd.conf
...
[[udp]]
# Default UDP for Telegraf
enabled = true
bind-address = ":8089" # the bind address
database = "telegraf" # Name of the database that will be written to
batch-size = 5000 # will flush if this many points get buffered
batch-timeout = "1s" # will flush at least this often even if the batch-size is not reached
batch-pending = 10 # number of batches that may be pending in memory
read-buffer = 0 # UDP read buffer size, 0 means to use OS default
[[udp]]
# High-traffic UDP
enabled = true
bind-address = ":80891" # the bind address
database = "mymetrics" # Name of the database that will be written to
batch-size = 5000 # will flush if this many points get buffered
batch-timeout = "1s" # will flush at least this often even if the batch-size is not reached
batch-pending = 100 # number of batches that may be pending in memory
read-buffer = 8388608 # (8*1024*1024) UDP read buffer size
...
```

View File

@@ -0,0 +1,127 @@
package udp
import (
"time"
"github.com/influxdata/influxdb/monitor/diagnostics"
"github.com/influxdata/influxdb/toml"
)
const (
// DefaultBindAddress is the default binding interface if none is specified.
DefaultBindAddress = ":8089"
// DefaultDatabase is the default database for UDP traffic.
DefaultDatabase = "udp"
// DefaultRetentionPolicy is the default retention policy used for writes.
DefaultRetentionPolicy = ""
// DefaultBatchSize is the default UDP batch size.
DefaultBatchSize = 5000
// DefaultBatchPending is the default number of pending UDP batches.
DefaultBatchPending = 10
// DefaultBatchTimeout is the default UDP batch timeout.
DefaultBatchTimeout = time.Second
// DefaultPrecision is the default time precision used for UDP services.
DefaultPrecision = "n"
// DefaultReadBuffer is the default buffer size for the UDP listener.
// Sets the size of the operating system's receive buffer associated with
// the UDP traffic. Keep in mind that the OS must be able
// to handle the number set here or the UDP listener will error and exit.
//
// DefaultReadBuffer = 0 means to use the OS default, which is usually too
// small for high UDP performance.
//
// Increasing OS buffer limits:
// Linux: sudo sysctl -w net.core.rmem_max=<read-buffer>
// BSD/Darwin: sudo sysctl -w kern.ipc.maxsockbuf=<read-buffer>
DefaultReadBuffer = 0
)
// Config holds various configuration settings for the UDP listener.
type Config struct {
Enabled bool `toml:"enabled"`
BindAddress string `toml:"bind-address"`
Database string `toml:"database"`
RetentionPolicy string `toml:"retention-policy"`
BatchSize int `toml:"batch-size"`
BatchPending int `toml:"batch-pending"`
ReadBuffer int `toml:"read-buffer"`
BatchTimeout toml.Duration `toml:"batch-timeout"`
Precision string `toml:"precision"`
}
// NewConfig returns a new instance of Config with defaults.
func NewConfig() Config {
return Config{
BindAddress: DefaultBindAddress,
Database: DefaultDatabase,
RetentionPolicy: DefaultRetentionPolicy,
BatchSize: DefaultBatchSize,
BatchPending: DefaultBatchPending,
BatchTimeout: toml.Duration(DefaultBatchTimeout),
}
}
// WithDefaults takes the given config and returns a new config with any required
// default values set.
func (c *Config) WithDefaults() *Config {
d := *c
if d.Database == "" {
d.Database = DefaultDatabase
}
if d.BatchSize == 0 {
d.BatchSize = DefaultBatchSize
}
if d.BatchPending == 0 {
d.BatchPending = DefaultBatchPending
}
if d.BatchTimeout == 0 {
d.BatchTimeout = toml.Duration(DefaultBatchTimeout)
}
if d.Precision == "" {
d.Precision = DefaultPrecision
}
if d.ReadBuffer == 0 {
d.ReadBuffer = DefaultReadBuffer
}
return &d
}
// Configs wraps a slice of Config to aggregate diagnostics.
type Configs []Config
// Diagnostics returns one set of diagnostics for all of the Configs.
func (c Configs) Diagnostics() (*diagnostics.Diagnostics, error) {
d := &diagnostics.Diagnostics{
Columns: []string{"enabled", "bind-address", "database", "retention-policy", "batch-size", "batch-pending", "batch-timeout"},
}
for _, cc := range c {
if !cc.Enabled {
d.AddRow([]interface{}{false})
continue
}
r := []interface{}{true, cc.BindAddress, cc.Database, cc.RetentionPolicy, cc.BatchSize, cc.BatchPending, cc.BatchTimeout}
d.AddRow(r)
}
return d, nil
}
// Enabled returns true if any underlying Config is Enabled.
func (c Configs) Enabled() bool {
for _, cc := range c {
if cc.Enabled {
return true
}
}
return false
}

View File

@@ -0,0 +1,43 @@
package udp_test
import (
"testing"
"time"
"github.com/BurntSushi/toml"
"github.com/influxdata/influxdb/services/udp"
)
func TestConfig_Parse(t *testing.T) {
// Parse configuration.
var c udp.Config
if _, err := toml.Decode(`
enabled = true
bind-address = ":4444"
database = "awesomedb"
retention-policy = "awesomerp"
batch-size = 100
batch-pending = 9
batch-timeout = "10ms"
udp-payload-size = 1500
`, &c); err != nil {
t.Fatal(err)
}
// Validate configuration.
if c.Enabled != true {
t.Fatalf("unexpected enabled: %v", c.Enabled)
} else if c.BindAddress != ":4444" {
t.Fatalf("unexpected bind address: %s", c.BindAddress)
} else if c.Database != "awesomedb" {
t.Fatalf("unexpected database: %s", c.Database)
} else if c.RetentionPolicy != "awesomerp" {
t.Fatalf("unexpected retention policy: %s", c.RetentionPolicy)
} else if c.BatchSize != 100 {
t.Fatalf("unexpected batch size: %d", c.BatchSize)
} else if c.BatchPending != 9 {
t.Fatalf("unexpected batch pending: %d", c.BatchPending)
} else if time.Duration(c.BatchTimeout) != (10 * time.Millisecond) {
t.Fatalf("unexpected batch timeout: %v", c.BatchTimeout)
}
}

View File

@@ -0,0 +1,300 @@
// Package udp provides the UDP input service for InfluxDB.
package udp // import "github.com/influxdata/influxdb/services/udp"
import (
"errors"
"fmt"
"net"
"sync"
"sync/atomic"
"time"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/services/meta"
"github.com/influxdata/influxdb/tsdb"
"github.com/uber-go/zap"
)
const (
// Arbitrary, testing indicated that this doesn't typically get over 10
parserChanLen = 1000
// MaxUDPPayload is largest payload size the UDP service will accept.
MaxUDPPayload = 64 * 1024
)
// statistics gathered by the UDP package.
const (
statPointsReceived = "pointsRx"
statBytesReceived = "bytesRx"
statPointsParseFail = "pointsParseFail"
statReadFail = "readFail"
statBatchesTransmitted = "batchesTx"
statPointsTransmitted = "pointsTx"
statBatchesTransmitFail = "batchesTxFail"
)
// Service is a UDP service that will listen for incoming packets of line protocol.
type Service struct {
conn *net.UDPConn
addr *net.UDPAddr
wg sync.WaitGroup
mu sync.RWMutex
ready bool // Has the required database been created?
done chan struct{} // Is the service closing or closed?
parserChan chan []byte
batcher *tsdb.PointBatcher
config Config
PointsWriter interface {
WritePointsPrivileged(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error
}
MetaClient interface {
CreateDatabase(name string) (*meta.DatabaseInfo, error)
}
Logger zap.Logger
stats *Statistics
defaultTags models.StatisticTags
}
// NewService returns a new instance of Service.
func NewService(c Config) *Service {
d := *c.WithDefaults()
return &Service{
config: d,
parserChan: make(chan []byte, parserChanLen),
batcher: tsdb.NewPointBatcher(d.BatchSize, d.BatchPending, time.Duration(d.BatchTimeout)),
Logger: zap.New(zap.NullEncoder()),
stats: &Statistics{},
defaultTags: models.StatisticTags{"bind": d.BindAddress},
}
}
// Open starts the service.
func (s *Service) Open() (err error) {
s.mu.Lock()
defer s.mu.Unlock()
if !s.closed() {
return nil // Already open.
}
s.done = make(chan struct{})
if s.config.BindAddress == "" {
return errors.New("bind address has to be specified in config")
}
if s.config.Database == "" {
return errors.New("database has to be specified in config")
}
s.addr, err = net.ResolveUDPAddr("udp", s.config.BindAddress)
if err != nil {
s.Logger.Info(fmt.Sprintf("Failed to resolve UDP address %s: %s", s.config.BindAddress, err))
return err
}
s.conn, err = net.ListenUDP("udp", s.addr)
if err != nil {
s.Logger.Info(fmt.Sprintf("Failed to set up UDP listener at address %s: %s", s.addr, err))
return err
}
if s.config.ReadBuffer != 0 {
err = s.conn.SetReadBuffer(s.config.ReadBuffer)
if err != nil {
s.Logger.Info(fmt.Sprintf("Failed to set UDP read buffer to %d: %s",
s.config.ReadBuffer, err))
return err
}
}
s.Logger.Info(fmt.Sprintf("Started listening on UDP: %s", s.config.BindAddress))
s.wg.Add(3)
go s.serve()
go s.parser()
go s.writer()
return nil
}
// Statistics maintains statistics for the UDP service.
type Statistics struct {
PointsReceived int64
BytesReceived int64
PointsParseFail int64
ReadFail int64
BatchesTransmitted int64
PointsTransmitted int64
BatchesTransmitFail int64
}
// Statistics returns statistics for periodic monitoring.
func (s *Service) Statistics(tags map[string]string) []models.Statistic {
return []models.Statistic{{
Name: "udp",
Tags: s.defaultTags.Merge(tags),
Values: map[string]interface{}{
statPointsReceived: atomic.LoadInt64(&s.stats.PointsReceived),
statBytesReceived: atomic.LoadInt64(&s.stats.BytesReceived),
statPointsParseFail: atomic.LoadInt64(&s.stats.PointsParseFail),
statReadFail: atomic.LoadInt64(&s.stats.ReadFail),
statBatchesTransmitted: atomic.LoadInt64(&s.stats.BatchesTransmitted),
statPointsTransmitted: atomic.LoadInt64(&s.stats.PointsTransmitted),
statBatchesTransmitFail: atomic.LoadInt64(&s.stats.BatchesTransmitFail),
},
}}
}
func (s *Service) writer() {
defer s.wg.Done()
for {
select {
case batch := <-s.batcher.Out():
// Will attempt to create database if not yet created.
if err := s.createInternalStorage(); err != nil {
s.Logger.Info(fmt.Sprintf("Required database %s does not yet exist: %s", s.config.Database, err.Error()))
continue
}
if err := s.PointsWriter.WritePointsPrivileged(s.config.Database, s.config.RetentionPolicy, models.ConsistencyLevelAny, batch); err == nil {
atomic.AddInt64(&s.stats.BatchesTransmitted, 1)
atomic.AddInt64(&s.stats.PointsTransmitted, int64(len(batch)))
} else {
s.Logger.Info(fmt.Sprintf("failed to write point batch to database %q: %s", s.config.Database, err))
atomic.AddInt64(&s.stats.BatchesTransmitFail, 1)
}
case <-s.done:
return
}
}
}
func (s *Service) serve() {
defer s.wg.Done()
buf := make([]byte, MaxUDPPayload)
s.batcher.Start()
for {
select {
case <-s.done:
// We closed the connection, time to go.
return
default:
// Keep processing.
n, _, err := s.conn.ReadFromUDP(buf)
if err != nil {
atomic.AddInt64(&s.stats.ReadFail, 1)
s.Logger.Info(fmt.Sprintf("Failed to read UDP message: %s", err))
continue
}
atomic.AddInt64(&s.stats.BytesReceived, int64(n))
bufCopy := make([]byte, n)
copy(bufCopy, buf[:n])
s.parserChan <- bufCopy
}
}
}
func (s *Service) parser() {
defer s.wg.Done()
for {
select {
case <-s.done:
return
case buf := <-s.parserChan:
points, err := models.ParsePointsWithPrecision(buf, time.Now().UTC(), s.config.Precision)
if err != nil {
atomic.AddInt64(&s.stats.PointsParseFail, 1)
s.Logger.Info(fmt.Sprintf("Failed to parse points: %s", err))
continue
}
for _, point := range points {
s.batcher.In() <- point
}
atomic.AddInt64(&s.stats.PointsReceived, int64(len(points)))
}
}
}
// Close closes the service and the underlying listener.
func (s *Service) Close() error {
s.mu.Lock()
defer s.mu.Unlock()
if s.closed() {
return nil // Already closed.
}
close(s.done)
if s.conn != nil {
s.conn.Close()
}
s.batcher.Flush()
s.wg.Wait()
// Release all remaining resources.
s.done = nil
s.conn = nil
s.Logger.Info("Service closed")
return nil
}
// Closed returns true if the service is currently closed.
func (s *Service) Closed() bool {
s.mu.Lock()
defer s.mu.Unlock()
return s.closed()
}
func (s *Service) closed() bool {
select {
case <-s.done:
// Service is closing.
return true
default:
}
return s.done == nil
}
// createInternalStorage ensures that the required database has been created.
func (s *Service) createInternalStorage() error {
s.mu.RLock()
ready := s.ready
s.mu.RUnlock()
if ready {
return nil
}
if _, err := s.MetaClient.CreateDatabase(s.config.Database); err != nil {
return err
}
// The service is now ready.
s.mu.Lock()
s.ready = true
s.mu.Unlock()
return nil
}
// WithLogger sets the logger on the service.
func (s *Service) WithLogger(log zap.Logger) {
s.Logger = log.With(zap.String("service", "udp"))
}
// Addr returns the listener's address.
func (s *Service) Addr() net.Addr {
return s.addr
}

View File

@@ -0,0 +1,160 @@
package udp
import (
"errors"
"os"
"testing"
"time"
"github.com/influxdata/influxdb/internal"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/services/meta"
"github.com/uber-go/zap"
)
func TestService_OpenClose(t *testing.T) {
service := NewTestService(nil)
// Closing a closed service is fine.
if err := service.Service.Close(); err != nil {
t.Fatal(err)
}
// Closing a closed service again is fine.
if err := service.Service.Close(); err != nil {
t.Fatal(err)
}
if err := service.Service.Open(); err != nil {
t.Fatal(err)
}
// Opening an already open service is fine.
if err := service.Service.Open(); err != nil {
t.Fatal(err)
}
// Reopening a previously opened service is fine.
if err := service.Service.Close(); err != nil {
t.Fatal(err)
}
if err := service.Service.Open(); err != nil {
t.Fatal(err)
}
// Tidy up.
if err := service.Service.Close(); err != nil {
t.Fatal(err)
}
}
func TestService_CreatesDatabase(t *testing.T) {
t.Parallel()
s := NewTestService(nil)
s.WritePointsFn = func(string, string, models.ConsistencyLevel, []models.Point) error {
return nil
}
called := make(chan struct{})
s.MetaClient.CreateDatabaseFn = func(name string) (*meta.DatabaseInfo, error) {
if name != s.Config.Database {
t.Errorf("\n\texp = %s\n\tgot = %s\n", s.Config.Database, name)
}
// Allow some time for the caller to return and the ready status to
// be set.
time.AfterFunc(10*time.Millisecond, func() { called <- struct{}{} })
return nil, errors.New("an error")
}
if err := s.Service.Open(); err != nil {
t.Fatal(err)
}
points, err := models.ParsePointsString(`cpu value=1`)
if err != nil {
t.Fatal(err)
}
s.Service.batcher.In() <- points[0] // Send a point.
s.Service.batcher.Flush()
select {
case <-called:
// OK
case <-time.NewTimer(5 * time.Second).C:
t.Fatal("Service should have attempted to create database")
}
// ready status should not have been switched due to meta client error.
s.Service.mu.RLock()
ready := s.Service.ready
s.Service.mu.RUnlock()
if got, exp := ready, false; got != exp {
t.Fatalf("got %v, expected %v", got, exp)
}
// This time MC won't cause an error.
s.MetaClient.CreateDatabaseFn = func(name string) (*meta.DatabaseInfo, error) {
// Allow some time for the caller to return and the ready status to
// be set.
time.AfterFunc(10*time.Millisecond, func() { called <- struct{}{} })
return nil, nil
}
s.Service.batcher.In() <- points[0] // Send a point.
s.Service.batcher.Flush()
select {
case <-called:
// OK
case <-time.NewTimer(5 * time.Second).C:
t.Fatal("Service should have attempted to create database")
}
// ready status should now be true.
s.Service.mu.RLock()
ready = s.Service.ready
s.Service.mu.RUnlock()
if got, exp := ready, true; got != exp {
t.Fatalf("got %v, expected %v", got, exp)
}
s.Service.Close()
}
type TestService struct {
Service *Service
Config Config
MetaClient *internal.MetaClientMock
WritePointsFn func(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error
}
func NewTestService(c *Config) *TestService {
if c == nil {
defaultC := NewConfig()
c = &defaultC
}
service := &TestService{
Service: NewService(*c),
Config: *c,
MetaClient: &internal.MetaClientMock{},
}
if testing.Verbose() {
service.Service.WithLogger(zap.New(
zap.NewTextEncoder(),
zap.Output(os.Stderr),
))
}
service.Service.MetaClient = service.MetaClient
service.Service.PointsWriter = service
return service
}
func (s *TestService) WritePointsPrivileged(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error {
return s.WritePointsFn(database, retentionPolicy, consistencyLevel, points)
}