vsphere-influxdb-go/vendor/github.com/influxdata/influxdb/monitor/service.go

// Package monitor provides a service and associated functionality
// for InfluxDB to self-monitor internal statistics and diagnostics.
package monitor // import "github.com/influxdata/influxdb/monitor"

import (
	"errors"
	"expvar"
	"fmt"
	"os"
	"runtime"
	"sort"
	"strconv"
	"sync"
	"time"

	"github.com/influxdata/influxdb/models"
	"github.com/influxdata/influxdb/monitor/diagnostics"
	"github.com/influxdata/influxdb/services/meta"
	"github.com/uber-go/zap"
)

// Policy constants.
const (
	// Name of the retention policy used by the monitor service.
	MonitorRetentionPolicy = "monitor"

	// Duration of the monitor retention policy.
	MonitorRetentionPolicyDuration = 7 * 24 * time.Hour

	// Default replication factor to set on the monitor retention policy.
	MonitorRetentionPolicyReplicaN = 1
)

// Monitor represents an instance of the monitor system.
type Monitor struct {
	// Build information for diagnostics.
	Version   string
	Commit    string
	Branch    string
	BuildTime string

	wg sync.WaitGroup

	mu                sync.RWMutex
	globalTags        map[string]string
	diagRegistrations map[string]diagnostics.Client
	reporter          Reporter
	done              chan struct{}
	storeCreated      bool
	storeEnabled      bool

	storeDatabase        string
	storeRetentionPolicy string
	storeInterval        time.Duration

	MetaClient interface {
		CreateDatabaseWithRetentionPolicy(name string, spec *meta.RetentionPolicySpec) (*meta.DatabaseInfo, error)
		Database(name string) *meta.DatabaseInfo
	}

	// Writer for pushing stats back into the database.
	PointsWriter PointsWriter

	Logger zap.Logger
}

// PointsWriter is a simplified interface for writing the points the monitor gathers.
type PointsWriter interface {
	WritePoints(database, retentionPolicy string, points models.Points) error
}

// New returns a new instance of the monitor system.
func New(r Reporter, c Config) *Monitor {
	return &Monitor{
		globalTags:           make(map[string]string),
		diagRegistrations:    make(map[string]diagnostics.Client),
		reporter:             r,
		storeEnabled:         c.StoreEnabled,
		storeDatabase:        c.StoreDatabase,
		storeInterval:        time.Duration(c.StoreInterval),
		storeRetentionPolicy: MonitorRetentionPolicy,
		Logger:               zap.New(zap.NullEncoder()),
	}
}

// open returns whether the monitor service is open.
func (m *Monitor) open() bool {
	m.mu.Lock()
	defer m.mu.Unlock()
	return m.done != nil
}

// Open opens the monitoring system, using the given clusterID, node ID, and hostname
// for identification purpose.
func (m *Monitor) Open() error {
	if m.open() {
		m.Logger.Info("Monitor is already open")
		return nil
	}

	m.Logger.Info("Starting monitor system")

	// Self-register various stats and diagnostics.
	m.RegisterDiagnosticsClient("build", &build{
		Version: m.Version,
		Commit:  m.Commit,
		Branch:  m.Branch,
		Time:    m.BuildTime,
	})
	m.RegisterDiagnosticsClient("runtime", &goRuntime{})
	m.RegisterDiagnosticsClient("network", &network{})
	m.RegisterDiagnosticsClient("system", &system{})

	m.mu.Lock()
	m.done = make(chan struct{})
	m.mu.Unlock()

	// If enabled, record stats in a InfluxDB system.
	if m.storeEnabled {
		// Start periodic writes to system.
		m.wg.Add(1)
		go m.storeStatistics()
	}

	return nil
}

func (m *Monitor) writePoints(p models.Points) error {
	m.mu.RLock()
	defer m.mu.RUnlock()

	if err := m.PointsWriter.WritePoints(m.storeDatabase, m.storeRetentionPolicy, p); err != nil {
		m.Logger.Info(fmt.Sprintf("failed to store statistics: %s", err))
	}
	return nil
}

// Close closes the monitor system.
func (m *Monitor) Close() error {
	if !m.open() {
		m.Logger.Info("Monitor is already closed.")
		return nil
	}

	m.Logger.Info("shutting down monitor system")
	m.mu.Lock()
	close(m.done)
	m.mu.Unlock()

	m.wg.Wait()

	m.mu.Lock()
	m.done = nil
	m.mu.Unlock()

	m.DeregisterDiagnosticsClient("build")
	m.DeregisterDiagnosticsClient("runtime")
	m.DeregisterDiagnosticsClient("network")
	m.DeregisterDiagnosticsClient("system")
	return nil
}

// SetGlobalTag can be used to set tags that will appear on all points
// written by the Monitor.
func (m *Monitor) SetGlobalTag(key string, value interface{}) {
	m.mu.Lock()
	m.globalTags[key] = fmt.Sprintf("%v", value)
	m.mu.Unlock()
}

// RemoteWriterConfig represents the configuration of a remote writer.
type RemoteWriterConfig struct {
	RemoteAddr string
	NodeID     string
	Username   string
	Password   string
	ClusterID  uint64
}

// SetPointsWriter can be used to set a writer for the monitoring points.
func (m *Monitor) SetPointsWriter(pw PointsWriter) error {
	if !m.storeEnabled {
		// not enabled, nothing to do
		return nil
	}
	m.mu.Lock()
	m.PointsWriter = pw
	m.mu.Unlock()

	// Subsequent calls to an already open Monitor are just a no-op.
	return m.Open()
}

// WithLogger sets the logger for the Monitor.
func (m *Monitor) WithLogger(log zap.Logger) {
	m.Logger = log.With(zap.String("service", "monitor"))
}

// RegisterDiagnosticsClient registers a diagnostics client with the given name and tags.
func (m *Monitor) RegisterDiagnosticsClient(name string, client diagnostics.Client) {
	m.mu.Lock()
	defer m.mu.Unlock()
	m.diagRegistrations[name] = client
	m.Logger.Info(fmt.Sprintf(`'%s' registered for diagnostics monitoring`, name))
}

// DeregisterDiagnosticsClient deregisters a diagnostics client by name.
func (m *Monitor) DeregisterDiagnosticsClient(name string) {
	m.mu.Lock()
	defer m.mu.Unlock()
	delete(m.diagRegistrations, name)
}

// Statistics returns the combined statistics for all expvar data. The given
// tags are added to each of the returned statistics.
func (m *Monitor) Statistics(tags map[string]string) ([]*Statistic, error) {
	var statistics []*Statistic

	expvar.Do(func(kv expvar.KeyValue) {
		// Skip built-in expvar stats.
		if kv.Key == "memstats" || kv.Key == "cmdline" {
			return
		}

		statistic := &Statistic{
			Statistic: models.NewStatistic(""),
		}

		// Add any supplied tags.
		for k, v := range tags {
			statistic.Tags[k] = v
		}

		// Every other top-level expvar value is a map.
		m := kv.Value.(*expvar.Map)

		m.Do(func(subKV expvar.KeyValue) {
			switch subKV.Key {
			case "name":
				// straight to string name.
				u, err := strconv.Unquote(subKV.Value.String())
				if err != nil {
					return
				}
				statistic.Name = u
			case "tags":
				// string-string tags map.
				n := subKV.Value.(*expvar.Map)
				n.Do(func(t expvar.KeyValue) {
					u, err := strconv.Unquote(t.Value.String())
					if err != nil {
						return
					}
					statistic.Tags[t.Key] = u
				})
			case "values":
				// string-interface map.
				n := subKV.Value.(*expvar.Map)
				n.Do(func(kv expvar.KeyValue) {
					var f interface{}
					var err error
					switch v := kv.Value.(type) {
					case *expvar.Float:
						f, err = strconv.ParseFloat(v.String(), 64)
						if err != nil {
							return
						}
					case *expvar.Int:
						f, err = strconv.ParseInt(v.String(), 10, 64)
						if err != nil {
							return
						}
					default:
						return
					}
					statistic.Values[kv.Key] = f
				})
			}
		})

		// If a registered client has no field data, don't include it in the results
		if len(statistic.Values) == 0 {
			return
		}

		statistics = append(statistics, statistic)
	})

	// Add Go memstats.
	statistic := &Statistic{
		Statistic: models.NewStatistic("runtime"),
	}

	// Add any supplied tags to Go memstats
	for k, v := range tags {
		statistic.Tags[k] = v
	}

	var rt runtime.MemStats
	runtime.ReadMemStats(&rt)
	statistic.Values = map[string]interface{}{
		"Alloc":        int64(rt.Alloc),
		"TotalAlloc":   int64(rt.TotalAlloc),
		"Sys":          int64(rt.Sys),
		"Lookups":      int64(rt.Lookups),
		"Mallocs":      int64(rt.Mallocs),
		"Frees":        int64(rt.Frees),
		"HeapAlloc":    int64(rt.HeapAlloc),
		"HeapSys":      int64(rt.HeapSys),
		"HeapIdle":     int64(rt.HeapIdle),
		"HeapInUse":    int64(rt.HeapInuse),
		"HeapReleased": int64(rt.HeapReleased),
		"HeapObjects":  int64(rt.HeapObjects),
		"PauseTotalNs": int64(rt.PauseTotalNs),
		"NumGC":        int64(rt.NumGC),
		"NumGoroutine": int64(runtime.NumGoroutine()),
	}
	statistics = append(statistics, statistic)

	statistics = m.gatherStatistics(statistics, tags)
	return statistics, nil
}

func (m *Monitor) gatherStatistics(statistics []*Statistic, tags map[string]string) []*Statistic {
	m.mu.RLock()
	defer m.mu.RUnlock()

	for _, s := range m.reporter.Statistics(tags) {
		statistics = append(statistics, &Statistic{Statistic: s})
	}
	return statistics
}

// Diagnostics fetches diagnostic information for each registered
// diagnostic client. It skips any clients that return an error when
// retrieving their diagnostics.
func (m *Monitor) Diagnostics() (map[string]*diagnostics.Diagnostics, error) {
	m.mu.Lock()
	defer m.mu.Unlock()

	diags := make(map[string]*diagnostics.Diagnostics, len(m.diagRegistrations))
	for k, v := range m.diagRegistrations {
		d, err := v.Diagnostics()
		if err != nil {
			continue
		}
		diags[k] = d
	}
	return diags, nil
}

// createInternalStorage ensures the internal storage has been created.
func (m *Monitor) createInternalStorage() {
	if m.storeCreated {
		return
	}

	if di := m.MetaClient.Database(m.storeDatabase); di == nil {
		duration := MonitorRetentionPolicyDuration
		replicaN := MonitorRetentionPolicyReplicaN
		spec := meta.RetentionPolicySpec{
			Name:     MonitorRetentionPolicy,
			Duration: &duration,
			ReplicaN: &replicaN,
		}

		if _, err := m.MetaClient.CreateDatabaseWithRetentionPolicy(m.storeDatabase, &spec); err != nil {
			m.Logger.Info(fmt.Sprintf("failed to create database '%s', failed to create storage: %s",
				m.storeDatabase, err.Error()))
			return
		}
	}

	// Mark storage creation complete.
	m.storeCreated = true
}

// waitUntilInterval waits until we are on an even interval for the duration.
func (m *Monitor) waitUntilInterval(d time.Duration) error {
	now := time.Now()
	until := now.Truncate(d).Add(d)
	timer := time.NewTimer(until.Sub(now))
	defer timer.Stop()

	select {
	case <-timer.C:
		return nil
	case <-m.done:
		return errors.New("interrupted")
	}
}

// storeStatistics writes the statistics to an InfluxDB system.
func (m *Monitor) storeStatistics() {
	defer m.wg.Done()
	m.Logger.Info(fmt.Sprintf("Storing statistics in database '%s' retention policy '%s', at interval %s",
		m.storeDatabase, m.storeRetentionPolicy, m.storeInterval))

	hostname, _ := os.Hostname()
	m.SetGlobalTag("hostname", hostname)

	// Wait until an even interval to start recording monitor statistics.
	// If we are interrupted before the interval for some reason, exit early.
	if err := m.waitUntilInterval(m.storeInterval); err != nil {
		return
	}

	tick := time.NewTicker(m.storeInterval)
	defer tick.Stop()

	for {
		select {
		case now := <-tick.C:
			now = now.Truncate(m.storeInterval)
			func() {
				m.mu.Lock()
				defer m.mu.Unlock()
				m.createInternalStorage()
			}()

			stats, err := m.Statistics(m.globalTags)
			if err != nil {
				m.Logger.Info(fmt.Sprintf("failed to retrieve registered statistics: %s", err))
				return
			}

			// Write all stats in batches
			batch := make(models.Points, 0, 5000)
			for _, s := range stats {
				pt, err := models.NewPoint(s.Name, models.NewTags(s.Tags), s.Values, now)
				if err != nil {
					m.Logger.Info(fmt.Sprintf("Dropping point %v: %v", s.Name, err))
					return
				}
				batch = append(batch, pt)
				if len(batch) == cap(batch) {
					m.writePoints(batch)
					batch = batch[:0]

				}
			}

			// Write the last batch
			if len(batch) > 0 {
				m.writePoints(batch)
			}
		case <-m.done:
			m.Logger.Info(fmt.Sprintf("terminating storage of statistics"))
			return
		}
	}
}

// Statistic represents the information returned by a single monitor client.
type Statistic struct {
	models.Statistic
}

// ValueNames returns a sorted list of the value names, if any.
func (s *Statistic) ValueNames() []string {
	a := make([]string, 0, len(s.Values))
	for k := range s.Values {
		a = append(a, k)
	}
	sort.Strings(a)
	return a
}

// Statistics is a slice of sortable statistics.
type Statistics []*Statistic

// Len implements sort.Interface.
func (a Statistics) Len() int { return len(a) }

// Less implements sort.Interface.
func (a Statistics) Less(i, j int) bool {
	return a[i].Name < a[j].Name
}

// Swap implements sort.Interface.
func (a Statistics) Swap(i, j int) { a[i], a[j] = a[j], a[i] }