From 3a29b66132f561c910d827e8c7ae82997f7c1f30 Mon Sep 17 00:00:00 2001 From: "Austin S. Hemmelgarn" Date: Tue, 13 Feb 2024 06:56:20 -0500 Subject: Include Go plugin sources in main repository. (#16997) * Include Go plugin sources in main repository. * Fix CI issues. * Rename source tree. --- .../collectors/go.d.plugin/modules/ntpd/README.md | 1 + .../collectors/go.d.plugin/modules/ntpd/charts.go | 346 ++++++++++++++++++++ .../collectors/go.d.plugin/modules/ntpd/client.go | 89 ++++++ .../collectors/go.d.plugin/modules/ntpd/collect.go | 154 +++++++++ .../go.d.plugin/modules/ntpd/config_schema.json | 26 ++ .../go.d.plugin/modules/ntpd/integrations/ntpd.md | 228 +++++++++++++ .../go.d.plugin/modules/ntpd/metadata.yaml | 260 +++++++++++++++ src/go/collectors/go.d.plugin/modules/ntpd/ntpd.go | 111 +++++++ .../go.d.plugin/modules/ntpd/ntpd_test.go | 351 +++++++++++++++++++++ 9 files changed, 1566 insertions(+) create mode 120000 src/go/collectors/go.d.plugin/modules/ntpd/README.md create mode 100644 src/go/collectors/go.d.plugin/modules/ntpd/charts.go create mode 100644 src/go/collectors/go.d.plugin/modules/ntpd/client.go create mode 100644 src/go/collectors/go.d.plugin/modules/ntpd/collect.go create mode 100644 src/go/collectors/go.d.plugin/modules/ntpd/config_schema.json create mode 100644 src/go/collectors/go.d.plugin/modules/ntpd/integrations/ntpd.md create mode 100644 src/go/collectors/go.d.plugin/modules/ntpd/metadata.yaml create mode 100644 src/go/collectors/go.d.plugin/modules/ntpd/ntpd.go create mode 100644 src/go/collectors/go.d.plugin/modules/ntpd/ntpd_test.go (limited to 'src/go/collectors/go.d.plugin/modules/ntpd') diff --git a/src/go/collectors/go.d.plugin/modules/ntpd/README.md b/src/go/collectors/go.d.plugin/modules/ntpd/README.md new file mode 120000 index 0000000000..bad92b03a2 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/ntpd/README.md @@ -0,0 +1 @@ +integrations/ntpd.md \ No newline at end of file diff --git a/src/go/collectors/go.d.plugin/modules/ntpd/charts.go b/src/go/collectors/go.d.plugin/modules/ntpd/charts.go new file mode 100644 index 0000000000..dc9d183d04 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/ntpd/charts.go @@ -0,0 +1,346 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package ntpd + +import ( + "fmt" + "strings" + + "github.com/netdata/go.d.plugin/agent/module" +) + +const ( + prioSystemOffset = module.Priority + iota + prioSystemJitter + prioSystemFrequency + prioSystemWander + prioSystemRootDelay + prioSystemRootDispersion + prioSystemStratum + prioSystemTimeConstant + prioSystemPrecision + + prioPeerOffset + prioPeerDelay + prioPeerDispersion + prioPeerJitter + prioPeerXleave + prioPeerRootDelay + prioPeerRootDispersion + prioPeerStratum + prioPeerHostMode + prioPeerPeerMode + prioPeerHostPoll + prioPeerPeerPoll + prioPeerPrecision +) + +var ( + systemCharts = module.Charts{ + systemOffsetChart.Copy(), + systemJitterChart.Copy(), + systemFrequencyChart.Copy(), + systemWanderChart.Copy(), + systemRootDelayChart.Copy(), + systemRootDispersionChart.Copy(), + systemStratumChart.Copy(), + systemTimeConstantChart.Copy(), + systemPrecisionChart.Copy(), + } + systemOffsetChart = module.Chart{ + ID: "sys_offset", + Title: "Combined offset of server relative to this host", + Units: "milliseconds", + Fam: "system", + Ctx: "ntpd.sys_offset", + Type: module.Area, + Priority: prioSystemOffset, + Dims: module.Dims{ + {ID: "offset", Name: "offset", Div: precision}, + }, + } + systemJitterChart = module.Chart{ + ID: "sys_jitter", + Title: "Combined system jitter and clock jitter", + Units: "milliseconds", + Fam: "system", + Ctx: "ntpd.sys_jitter", + Priority: prioSystemJitter, + Dims: module.Dims{ + {ID: "sys_jitter", Name: "system", Div: precision}, + {ID: "clk_jitter", Name: "clock", Div: precision}, + }, + } + systemFrequencyChart = module.Chart{ + ID: "sys_frequency", + Title: "Frequency offset relative to hardware clock", + Units: "ppm", + Fam: "system", + Ctx: "ntpd.sys_frequency", + Type: module.Area, + Priority: prioSystemFrequency, + Dims: module.Dims{ + {ID: "frequency", Name: "frequency", Div: precision}, + }, + } + systemWanderChart = module.Chart{ + ID: "sys_wander", + Title: "Clock frequency wander", + Units: "ppm", + Fam: "system", + Ctx: "ntpd.sys_wander", + Type: module.Area, + Priority: prioSystemWander, + Dims: module.Dims{ + {ID: "clk_wander", Name: "clock", Div: precision}, + }, + } + systemRootDelayChart = module.Chart{ + ID: "sys_rootdelay", + Title: "Total roundtrip delay to the primary reference clock", + Units: "milliseconds", + Fam: "system", + Ctx: "ntpd.sys_rootdelay", + Type: module.Area, + Priority: prioSystemRootDelay, + Dims: module.Dims{ + {ID: "rootdelay", Name: "delay", Div: precision}, + }, + } + systemRootDispersionChart = module.Chart{ + ID: "sys_rootdisp", + Title: "Total root dispersion to the primary reference clock", + Units: "milliseconds", + Fam: "system", + Ctx: "ntpd.sys_rootdisp", + Type: module.Area, + Priority: prioSystemRootDispersion, + Dims: module.Dims{ + {ID: "rootdisp", Name: "dispersion", Div: precision}, + }, + } + systemStratumChart = module.Chart{ + ID: "sys_stratum", + Title: "Stratum", + Units: "stratum", + Fam: "system", + Ctx: "ntpd.sys_stratum", + Priority: prioSystemStratum, + Dims: module.Dims{ + {ID: "stratum", Name: "stratum", Div: precision}, + }, + } + systemTimeConstantChart = module.Chart{ + ID: "sys_tc", + Title: "Time constant and poll exponent", + Units: "log2", + Fam: "system", + Ctx: "ntpd.sys_tc", + Priority: prioSystemTimeConstant, + Dims: module.Dims{ + {ID: "tc", Name: "current", Div: precision}, + {ID: "mintc", Name: "minimum", Div: precision}, + }, + } + systemPrecisionChart = module.Chart{ + ID: "sys_precision", + Title: "Precision", + Units: "log2", + Fam: "system", + Ctx: "ntpd.sys_precision", + Priority: prioSystemPrecision, + Dims: module.Dims{ + {ID: "precision", Name: "precision", Div: precision}, + }, + } +) + +var ( + peerChartsTmpl = module.Charts{ + peerOffsetChartTmpl.Copy(), + peerDelayChartTmpl.Copy(), + peerDispersionChartTmpl.Copy(), + peerJitterChartTmpl.Copy(), + peerXleaveChartTmpl.Copy(), + peerRootDelayChartTmpl.Copy(), + peerRootDispersionChartTmpl.Copy(), + peerStratumChartTmpl.Copy(), + peerHostModeChartTmpl.Copy(), + peerPeerModeChartTmpl.Copy(), + peerHostPollChartTmpl.Copy(), + peerPeerPollChartTmpl.Copy(), + peerPrecisionChartTmpl.Copy(), + } + peerOffsetChartTmpl = module.Chart{ + ID: "peer_%s_offset", + Title: "Peer offset", + Units: "milliseconds", + Fam: "peers", + Ctx: "ntpd.peer_offset", + Priority: prioPeerOffset, + Dims: module.Dims{ + {ID: "peer_%s_offset", Name: "offset", Div: precision}, + }, + } + peerDelayChartTmpl = module.Chart{ + ID: "peer_%s_delay", + Title: "Peer delay", + Units: "milliseconds", + Fam: "peers", + Ctx: "ntpd.peer_delay", + Priority: prioPeerDelay, + Dims: module.Dims{ + {ID: "peer_%s_delay", Name: "delay", Div: precision}, + }, + } + peerDispersionChartTmpl = module.Chart{ + ID: "peer_%s_dispersion", + Title: "Peer dispersion", + Units: "milliseconds", + Fam: "peers", + Ctx: "ntpd.peer_dispersion", + Priority: prioPeerDispersion, + Dims: module.Dims{ + {ID: "peer_%s_dispersion", Name: "dispersion", Div: precision}, + }, + } + peerJitterChartTmpl = module.Chart{ + ID: "peer_%s_jitter", + Title: "Peer jitter", + Units: "milliseconds", + Fam: "peers", + Ctx: "ntpd.peer_jitter", + Priority: prioPeerJitter, + Dims: module.Dims{ + {ID: "peer_%s_jitter", Name: "jitter", Div: precision}, + }, + } + peerXleaveChartTmpl = module.Chart{ + ID: "peer_%s_xleave", + Title: "Peer interleave delay", + Units: "milliseconds", + Fam: "peers", + Ctx: "ntpd.peer_xleave", + Priority: prioPeerXleave, + Dims: module.Dims{ + {ID: "peer_%s_xleave", Name: "xleave", Div: precision}, + }, + } + peerRootDelayChartTmpl = module.Chart{ + ID: "peer_%s_rootdelay", + Title: "Peer roundtrip delay to the primary reference clock", + Units: "milliseconds", + Fam: "peers", + Ctx: "ntpd.peer_rootdelay", + Priority: prioPeerRootDelay, + Dims: module.Dims{ + {ID: "peer_%s_rootdelay", Name: "rootdelay", Div: precision}, + }, + } + peerRootDispersionChartTmpl = module.Chart{ + ID: "peer_%s_rootdisp", + Title: "Peer root dispersion to the primary reference clock", + Units: "milliseconds", + Fam: "peers", + Ctx: "ntpd.peer_rootdisp", + Priority: prioPeerRootDispersion, + Dims: module.Dims{ + {ID: "peer_%s_rootdisp", Name: "dispersion", Div: precision}, + }, + } + peerStratumChartTmpl = module.Chart{ + ID: "peer_%s_stratum", + Title: "Peer stratum", + Units: "stratum", + Fam: "peers", + Ctx: "ntpd.peer_stratum", + Priority: prioPeerStratum, + Dims: module.Dims{ + {ID: "peer_%s_stratum", Name: "stratum", Div: precision}, + }, + } + peerHostModeChartTmpl = module.Chart{ + ID: "peer_%s_hmode", + Title: "Peer host mode", + Units: "hmode", + Fam: "peers", + Ctx: "ntpd.peer_hmode", + Priority: prioPeerHostMode, + Dims: module.Dims{ + {ID: "peer_%s_hmode", Name: "hmode", Div: precision}, + }, + } + peerPeerModeChartTmpl = module.Chart{ + ID: "peer_%s_pmode", + Title: "Peer mode", + Units: "pmode", + Fam: "peers", + Ctx: "ntpd.peer_pmode", + Priority: prioPeerPeerMode, + Dims: module.Dims{ + {ID: "peer_%s_pmode", Name: "pmode", Div: precision}, + }, + } + peerHostPollChartTmpl = module.Chart{ + ID: "peer_%s_hpoll", + Title: "Peer host poll exponent", + Units: "log2", + Fam: "peers", + Ctx: "ntpd.peer_hpoll", + Priority: prioPeerHostPoll, + Dims: module.Dims{ + {ID: "peer_%s_hpoll", Name: "hpoll", Div: precision}, + }, + } + peerPeerPollChartTmpl = module.Chart{ + ID: "peer_%s_ppoll", + Title: "Peer poll exponent", + Units: "log2", + Fam: "peers", + Ctx: "ntpd.peer_ppoll", + Priority: prioPeerPeerPoll, + Dims: module.Dims{ + {ID: "peer_%s_ppoll", Name: "hpoll", Div: precision}, + }, + } + peerPrecisionChartTmpl = module.Chart{ + ID: "peer_%s_precision", + Title: "Peer precision", + Units: "log2", + Fam: "peers", + Ctx: "ntpd.peer_precision", + Priority: prioPeerPrecision, + Dims: module.Dims{ + {ID: "peer_%s_precision", Name: "precision", Div: precision}, + }, + } +) + +func (n *NTPd) addPeerCharts(addr string) { + charts := peerChartsTmpl.Copy() + + for _, chart := range *charts { + chart.ID = fmt.Sprintf(chart.ID, strings.ReplaceAll(addr, ".", "_")) + chart.Labels = []module.Label{ + {Key: "peer_address", Value: addr}, + } + for _, dim := range chart.Dims { + dim.ID = fmt.Sprintf(dim.ID, addr) + } + } + + if err := n.Charts().Add(*charts...); err != nil { + n.Warning(err) + } +} + +func (n *NTPd) removePeerCharts(addr string) { + px := fmt.Sprintf("peer_%s", strings.ReplaceAll(addr, ".", "_")) + + for _, chart := range *n.Charts() { + if strings.HasPrefix(chart.ID, px) { + chart.MarkRemove() + chart.MarkNotCreated() + } + } +} diff --git a/src/go/collectors/go.d.plugin/modules/ntpd/client.go b/src/go/collectors/go.d.plugin/modules/ntpd/client.go new file mode 100644 index 0000000000..5164c80e8b --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/ntpd/client.go @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package ntpd + +import ( + "net" + "time" + + "github.com/facebook/time/ntp/control" +) + +func newNTPClient(c Config) (ntpConn, error) { + conn, err := net.DialTimeout("udp", c.Address, c.Timeout.Duration) + if err != nil { + return nil, err + } + + client := &ntpClient{ + conn: conn, + timeout: c.Timeout.Duration, + client: &control.NTPClient{Connection: conn}, + } + + return client, nil +} + +type ntpClient struct { + conn net.Conn + timeout time.Duration + client *control.NTPClient +} + +func (c *ntpClient) systemInfo() (map[string]string, error) { + return c.peerInfo(0) +} + +func (c *ntpClient) peerInfo(id uint16) (map[string]string, error) { + msg := &control.NTPControlMsgHead{ + VnMode: control.MakeVnMode(2, control.Mode), + REMOp: control.OpReadVariables, + AssociationID: id, + } + + if err := c.conn.SetDeadline(time.Now().Add(c.timeout)); err != nil { + return nil, err + } + + resp, err := c.client.Communicate(msg) + if err != nil { + return nil, err + } + + return resp.GetAssociationInfo() +} + +func (c *ntpClient) peerIDs() ([]uint16, error) { + msg := &control.NTPControlMsgHead{ + VnMode: control.MakeVnMode(2, control.Mode), + REMOp: control.OpReadStatus, + } + + if err := c.conn.SetDeadline(time.Now().Add(c.timeout)); err != nil { + return nil, err + } + + resp, err := c.client.Communicate(msg) + if err != nil { + return nil, err + } + + peers, err := resp.GetAssociations() + if err != nil { + return nil, err + } + + var ids []uint16 + for id := range peers { + ids = append(ids, id) + } + + return ids, nil +} + +func (c *ntpClient) close() { + if c.conn != nil { + _ = c.conn.Close() + c.conn = nil + } +} diff --git a/src/go/collectors/go.d.plugin/modules/ntpd/collect.go b/src/go/collectors/go.d.plugin/modules/ntpd/collect.go new file mode 100644 index 0000000000..09553a65cf --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/ntpd/collect.go @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package ntpd + +import ( + "fmt" + "net" + "strconv" + "time" +) + +const ( + precision = 1000000 +) + +func (n *NTPd) collect() (map[string]int64, error) { + if n.client == nil { + client, err := n.newClient(n.Config) + if err != nil { + return nil, fmt.Errorf("creating NTP client: %v", err) + } + n.client = client + } + + mx := make(map[string]int64) + + if err := n.collectInfo(mx); err != nil { + return nil, err + } + + if n.CollectPeers { + if now := time.Now(); now.Sub(n.findPeersTime) > n.findPeersEvery { + n.findPeersTime = now + if err := n.findPeers(); err != nil { + n.Warning(err) + } + } + n.collectPeersInfo(mx) + } + + return mx, nil +} + +func (n *NTPd) collectInfo(mx map[string]int64) error { + info, err := n.client.systemInfo() + if err != nil { + return fmt.Errorf("error on querying system info: %v", err) + } + + for k, v := range info { + switch k { + case + "offset", + "sys_jitter", + "clk_jitter", + "frequency", + "clk_wander", + "rootdelay", + "rootdisp", + "stratum", + "tc", + "mintc", + "precision": + if val, err := strconv.ParseFloat(v, 64); err == nil { + mx[k] = int64(val * precision) + } + } + } + return nil +} + +func (n *NTPd) collectPeersInfo(mx map[string]int64) { + for _, id := range n.peerIDs { + info, err := n.client.peerInfo(id) + if err != nil { + n.Warningf("error on querying NTP peer info id='%d': %v", id, err) + continue + } + + addr, ok := info["srcadr"] + if !ok { + continue + } + + for k, v := range info { + switch k { + case + "offset", + "delay", + "dispersion", + "jitter", + "xleave", + "rootdelay", + "rootdisp", + "stratum", + "hmode", + "pmode", + "hpoll", + "ppoll", + "precision": + if val, err := strconv.ParseFloat(v, 64); err == nil { + mx["peer_"+addr+"_"+k] = int64(val * precision) + } + } + } + } +} + +func (n *NTPd) findPeers() error { + n.peerIDs = n.peerIDs[:0] + + n.Debug("querying NTP peers") + peers, err := n.client.peerIDs() + if err != nil { + return fmt.Errorf("querying NTP peers: %v", err) + } + + n.Debugf("found %d NTP peers (ids: %v)", len(peers), peers) + seen := make(map[string]bool) + + for _, id := range peers { + info, err := n.client.peerInfo(id) + if err != nil { + n.Debugf("error on querying NTP peer info id='%d': %v", id, err) + continue + } + + addr, ok := info["srcadr"] + if ip := net.ParseIP(addr); !ok || ip == nil || n.peerIPAddrFilter.Contains(ip) { + n.Debugf("skipping NTP peer id='%d', srcadr='%s'", id, addr) + continue + } + + seen[addr] = true + + if !n.peerAddr[addr] { + n.peerAddr[addr] = true + n.Debugf("new NTP peer id='%d', srcadr='%s': creating charts", id, addr) + n.addPeerCharts(addr) + } + + n.peerIDs = append(n.peerIDs, id) + } + + for addr := range n.peerAddr { + if !seen[addr] { + delete(n.peerAddr, addr) + n.Debugf("stale NTP peer srcadr='%s': removing charts", addr) + n.removePeerCharts(addr) + } + } + + return nil +} diff --git a/src/go/collectors/go.d.plugin/modules/ntpd/config_schema.json b/src/go/collectors/go.d.plugin/modules/ntpd/config_schema.json new file mode 100644 index 0000000000..ef360a7f95 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/ntpd/config_schema.json @@ -0,0 +1,26 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "go.d/ntpd job configuration schema.", + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "address": { + "type": "string" + }, + "timeout": { + "type": [ + "string", + "integer" + ] + }, + "collect_peers": { + "type": "boolean" + } + }, + "required": [ + "name", + "address" + ] +} diff --git a/src/go/collectors/go.d.plugin/modules/ntpd/integrations/ntpd.md b/src/go/collectors/go.d.plugin/modules/ntpd/integrations/ntpd.md new file mode 100644 index 0000000000..be765ae189 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/ntpd/integrations/ntpd.md @@ -0,0 +1,228 @@ + + +# NTPd + + + + + +Plugin: go.d.plugin +Module: ntpd + + + +## Overview + +This collector monitors the system variables of the local `ntpd` daemon (optional incl. variables of the polled peers) using the NTP Control Message Protocol via UDP socket, similar to `ntpq`, the [standard NTP query program](https://doc.ntp.org/current-stable/ntpq.html). + + + + +This collector is supported on all platforms. + +This collector supports collecting metrics from multiple instances of this integration, including remote instances. + + +### Default Behavior + +#### Auto-Detection + +This integration doesn't support auto-detection. + +#### Limits + +The default configuration for this integration does not impose any limits on data collection. + +#### Performance Impact + +The default configuration for this integration is not expected to impose a significant performance impact on the system. + + +## Metrics + +Metrics grouped by *scope*. + +The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. + + + +### Per NTPd instance + +These metrics refer to the entire monitored application. + +This scope has no labels. + +Metrics: + +| Metric | Dimensions | Unit | +|:------|:----------|:----| +| ntpd.sys_offset | offset | milliseconds | +| ntpd.sys_jitter | system, clock | milliseconds | +| ntpd.sys_frequency | frequency | ppm | +| ntpd.sys_wander | clock | ppm | +| ntpd.sys_rootdelay | delay | milliseconds | +| ntpd.sys_rootdisp | dispersion | milliseconds | +| ntpd.sys_stratum | stratum | stratum | +| ntpd.sys_tc | current, minimum | log2 | +| ntpd.sys_precision | precision | log2 | + +### Per peer + +These metrics refer to the NTPd peer. + +Labels: + +| Label | Description | +|:-----------|:----------------| +| peer_address | peer's source IP address | + +Metrics: + +| Metric | Dimensions | Unit | +|:------|:----------|:----| +| ntpd.peer_offset | offset | milliseconds | +| ntpd.peer_delay | delay | milliseconds | +| ntpd.peer_dispersion | dispersion | milliseconds | +| ntpd.peer_jitter | jitter | milliseconds | +| ntpd.peer_xleave | xleave | milliseconds | +| ntpd.peer_rootdelay | rootdelay | milliseconds | +| ntpd.peer_rootdisp | dispersion | milliseconds | +| ntpd.peer_stratum | stratum | stratum | +| ntpd.peer_hmode | hmode | hmode | +| ntpd.peer_pmode | pmode | pmode | +| ntpd.peer_hpoll | hpoll | log2 | +| ntpd.peer_ppoll | ppoll | log2 | +| ntpd.peer_precision | precision | log2 | + + + +## Alerts + +There are no alerts configured by default for this integration. + + +## Setup + +### Prerequisites + +No action required. + +### Configuration + +#### File + +The configuration file name for this integration is `go.d/ntpd.conf`. + + +You can edit the configuration file using the `edit-config` script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory). + +```bash +cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata +sudo ./edit-config go.d/ntpd.conf +``` +#### Options + +The following options can be defined globally: update_every, autodetection_retry. + + +
Config options + +| Name | Description | Default | Required | +|:----|:-----------|:-------|:--------:| +| update_every | Data collection frequency. | 1 | no | +| autodetection_retry | Recheck interval in seconds. Zero means no recheck will be scheduled. | 0 | no | +| address | Server address in IP:PORT format. | 127.0.0.1:123 | yes | +| timeout | Connection/read/write timeout. | 3 | no | +| collect_peers | Determines whether peer metrics will be collected. | no | no | + +
+ +#### Examples + +##### Basic + +A basic example configuration. + +
Config + +```yaml +jobs: + - name: local + address: 127.0.0.1:123 + +``` +
+ +##### With peers metrics + +Collect peers metrics. + +
Config + +```yaml +jobs: + - name: local + address: 127.0.0.1:123 + collect_peers: yes + +``` +
+ +##### Multi-instance + +> **Note**: When you define multiple jobs, their names must be unique. + +Collecting metrics from local and remote instances. + + +
Config + +```yaml +jobs: + - name: local + address: 127.0.0.1:123 + + - name: remote + address: 203.0.113.0:123 + +``` +
+ + + +## Troubleshooting + +### Debug Mode + +To troubleshoot issues with the `ntpd` collector, run the `go.d.plugin` with the debug option enabled. The output +should give you clues as to why the collector isn't working. + +- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on + your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. + + ```bash + cd /usr/libexec/netdata/plugins.d/ + ``` + +- Switch to the `netdata` user. + + ```bash + sudo -u netdata -s + ``` + +- Run the `go.d.plugin` to debug the collector: + + ```bash + ./go.d.plugin -d -m ntpd + ``` + + diff --git a/src/go/collectors/go.d.plugin/modules/ntpd/metadata.yaml b/src/go/collectors/go.d.plugin/modules/ntpd/metadata.yaml new file mode 100644 index 0000000000..3b968f20c8 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/ntpd/metadata.yaml @@ -0,0 +1,260 @@ +plugin_name: go.d.plugin +modules: + - meta: + id: collector-go.d.plugin-ntpd + plugin_name: go.d.plugin + module_name: ntpd + monitored_instance: + name: NTPd + link: https://www.ntp.org/documentation/4.2.8-series/ntpd + icon_filename: ntp.png + categories: + - data-collection.system-clock-and-ntp + keywords: + - ntpd + - ntp + - time + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + most_popular: false + overview: + data_collection: + metrics_description: > + This collector monitors the system variables of the local `ntpd` daemon (optional incl. variables of the polled peers) + using the NTP Control Message Protocol via UDP socket, similar to `ntpq`, + the [standard NTP query program](https://doc.ntp.org/current-stable/ntpq.html). + method_description: "" + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "" + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: [] + configuration: + file: + name: go.d/ntpd.conf + options: + description: | + The following options can be defined globally: update_every, autodetection_retry. + folding: + title: Config options + enabled: true + list: + - name: update_every + description: Data collection frequency. + default_value: 1 + required: false + - name: autodetection_retry + description: Recheck interval in seconds. Zero means no recheck will be scheduled. + default_value: 0 + required: false + - name: address + description: Server address in IP:PORT format. + default_value: 127.0.0.1:123 + required: true + - name: timeout + description: Connection/read/write timeout. + default_value: 3 + required: false + - name: collect_peers + description: Determines whether peer metrics will be collected. + default_value: false + required: false + examples: + folding: + title: Config + enabled: true + list: + - name: Basic + description: A basic example configuration. + config: | + jobs: + - name: local + address: 127.0.0.1:123 + - name: With peers metrics + description: Collect peers metrics. + config: | + jobs: + - name: local + address: 127.0.0.1:123 + collect_peers: yes + - name: Multi-instance + description: | + > **Note**: When you define multiple jobs, their names must be unique. + + Collecting metrics from local and remote instances. + config: | + jobs: + - name: local + address: 127.0.0.1:123 + + - name: remote + address: 203.0.113.0:123 + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: These metrics refer to the entire monitored application. + labels: [] + metrics: + - name: ntpd.sys_offset + description: Combined offset of server relative to this host + unit: milliseconds + chart_type: area + dimensions: + - name: offset + - name: ntpd.sys_jitter + description: Combined system jitter and clock jitter + unit: milliseconds + chart_type: line + dimensions: + - name: system + - name: clock + - name: ntpd.sys_frequency + description: Frequency offset relative to hardware clock + unit: ppm + chart_type: area + dimensions: + - name: frequency + - name: ntpd.sys_wander + description: Clock frequency wander + unit: ppm + chart_type: area + dimensions: + - name: clock + - name: ntpd.sys_rootdelay + description: Total roundtrip delay to the primary reference clock + unit: milliseconds + chart_type: area + dimensions: + - name: delay + - name: ntpd.sys_rootdisp + description: Total root dispersion to the primary reference clock + unit: milliseconds + chart_type: area + dimensions: + - name: dispersion + - name: ntpd.sys_stratum + description: Stratum + unit: stratum + chart_type: line + dimensions: + - name: stratum + - name: ntpd.sys_tc + description: Time constant and poll exponent + unit: log2 + chart_type: line + dimensions: + - name: current + - name: minimum + - name: ntpd.sys_precision + description: Precision + unit: log2 + chart_type: line + dimensions: + - name: precision + - name: peer + description: These metrics refer to the NTPd peer. + labels: + - name: peer_address + description: peer's source IP address + metrics: + - name: ntpd.peer_offset + description: Peer offset + unit: milliseconds + chart_type: line + dimensions: + - name: offset + - name: ntpd.peer_delay + description: Peer delay + unit: milliseconds + chart_type: line + dimensions: + - name: delay + - name: ntpd.peer_dispersion + description: Peer dispersion + unit: milliseconds + chart_type: line + dimensions: + - name: dispersion + - name: ntpd.peer_jitter + description: Peer jitter + unit: milliseconds + chart_type: line + dimensions: + - name: jitter + - name: ntpd.peer_xleave + description: Peer interleave delay + unit: milliseconds + chart_type: line + dimensions: + - name: xleave + - name: ntpd.peer_rootdelay + description: Peer roundtrip delay to the primary reference clock + unit: milliseconds + chart_type: line + dimensions: + - name: rootdelay + - name: ntpd.peer_rootdisp + description: Peer root dispersion to the primary reference clock + unit: milliseconds + chart_type: line + dimensions: + - name: dispersion + - name: ntpd.peer_stratum + description: Peer stratum + unit: stratum + chart_type: line + dimensions: + - name: stratum + - name: ntpd.peer_hmode + description: Peer host mode + unit: hmode + chart_type: line + dimensions: + - name: hmode + - name: ntpd.peer_pmode + description: Peer mode + unit: pmode + chart_type: line + dimensions: + - name: pmode + - name: ntpd.peer_hpoll + description: Peer host poll exponent + unit: log2 + chart_type: line + dimensions: + - name: hpoll + - name: ntpd.peer_ppoll + description: Peer poll exponent + unit: log2 + chart_type: line + dimensions: + - name: ppoll + - name: ntpd.peer_precision + description: Peer precision + unit: log2 + chart_type: line + dimensions: + - name: precision diff --git a/src/go/collectors/go.d.plugin/modules/ntpd/ntpd.go b/src/go/collectors/go.d.plugin/modules/ntpd/ntpd.go new file mode 100644 index 0000000000..8bbc0ba4f3 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/ntpd/ntpd.go @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package ntpd + +import ( + _ "embed" + "time" + + "github.com/netdata/go.d.plugin/agent/module" + "github.com/netdata/go.d.plugin/pkg/iprange" + "github.com/netdata/go.d.plugin/pkg/web" +) + +//go:embed "config_schema.json" +var configSchema string + +func init() { + module.Register("ntpd", module.Creator{ + JobConfigSchema: configSchema, + Create: func() module.Module { return New() }, + }) +} + +func New() *NTPd { + return &NTPd{ + Config: Config{ + Address: "127.0.0.1:123", + Timeout: web.Duration{Duration: time.Second * 3}, + CollectPeers: false, + }, + charts: systemCharts.Copy(), + newClient: newNTPClient, + findPeersEvery: time.Minute * 3, + peerAddr: make(map[string]bool), + } +} + +type Config struct { + Address string `yaml:"address"` + Timeout web.Duration `yaml:"timeout"` + CollectPeers bool `yaml:"collect_peers"` +} + +type ( + NTPd struct { + module.Base + Config `yaml:",inline"` + + charts *module.Charts + + newClient func(c Config) (ntpConn, error) + client ntpConn + + findPeersTime time.Time + findPeersEvery time.Duration + peerAddr map[string]bool + peerIDs []uint16 + peerIPAddrFilter iprange.Pool + } + ntpConn interface { + systemInfo() (map[string]string, error) + peerInfo(id uint16) (map[string]string, error) + peerIDs() ([]uint16, error) + close() + } +) + +func (n *NTPd) Init() bool { + if n.Address == "" { + n.Error("config validation: 'address' can not be empty") + return false + } + + txt := "0.0.0.0 127.0.0.0/8" + r, err := iprange.ParseRanges(txt) + if err != nil { + n.Errorf("error on parse ip range '%s': %v", txt, err) + return false + } + + n.peerIPAddrFilter = r + + return true +} + +func (n *NTPd) Check() bool { + return len(n.Collect()) > 0 +} + +func (n *NTPd) Charts() *module.Charts { + return n.charts +} + +func (n *NTPd) Collect() map[string]int64 { + mx, err := n.collect() + if err != nil { + n.Error(err) + } + + if len(mx) == 0 { + return nil + } + return mx +} + +func (n *NTPd) Cleanup() { + if n.client != nil { + n.client.close() + n.client = nil + } +} diff --git a/src/go/collectors/go.d.plugin/modules/ntpd/ntpd_test.go b/src/go/collectors/go.d.plugin/modules/ntpd/ntpd_test.go new file mode 100644 index 0000000000..481d2d7e95 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/ntpd/ntpd_test.go @@ -0,0 +1,351 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package ntpd + +import ( + "errors" + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNTPd_Init(t *testing.T) { + tests := map[string]struct { + config Config + wantFail bool + }{ + "default config": { + config: New().Config, + }, + "unset 'address'": { + wantFail: true, + config: Config{ + Address: "", + }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + n := New() + n.Config = test.config + + if test.wantFail { + assert.False(t, n.Init()) + } else { + assert.True(t, n.Init()) + } + }) + } +} + +func TestNTPd_Charts(t *testing.T) { + assert.Equal(t, len(systemCharts), len(*New().Charts())) +} + +func TestNTPd_Cleanup(t *testing.T) { + tests := map[string]struct { + prepare func(*NTPd) + wantClose bool + }{ + "after New": { + wantClose: false, + prepare: func(*NTPd) {}, + }, + "after Init": { + wantClose: false, + prepare: func(n *NTPd) { n.Init() }, + }, + "after Check": { + wantClose: true, + prepare: func(n *NTPd) { n.Init(); n.Check() }, + }, + "after Collect": { + wantClose: true, + prepare: func(n *NTPd) { n.Init(); n.Collect() }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + m := &mockClient{} + n := prepareNTPdWithMock(m, true) + test.prepare(n) + + require.NotPanics(t, n.Cleanup) + + if test.wantClose { + assert.True(t, m.closeCalled) + } else { + assert.False(t, m.closeCalled) + } + }) + } +} + +func TestNTPd_Check(t *testing.T) { + tests := map[string]struct { + prepare func() *NTPd + wantFail bool + }{ + "system: success, peers: success": { + wantFail: false, + prepare: func() *NTPd { return prepareNTPdWithMock(&mockClient{}, true) }, + }, + "system: success, list peers: fails": { + wantFail: false, + prepare: func() *NTPd { return prepareNTPdWithMock(&mockClient{errOnPeerIDs: true}, true) }, + }, + "system: success, peers info: fails": { + wantFail: false, + prepare: func() *NTPd { return prepareNTPdWithMock(&mockClient{errOnPeerInfo: true}, true) }, + }, + "system: fails": { + wantFail: true, + prepare: func() *NTPd { return prepareNTPdWithMock(&mockClient{errOnSystemInfo: true}, true) }, + }, + "fail on creating client": { + wantFail: true, + prepare: func() *NTPd { return prepareNTPdWithMock(nil, true) }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + n := test.prepare() + + require.True(t, n.Init()) + + if test.wantFail { + assert.False(t, n.Check()) + } else { + assert.True(t, n.Check()) + } + }) + } + +} + +func TestNTPd_Collect(t *testing.T) { + tests := map[string]struct { + prepare func() *NTPd + expected map[string]int64 + expectedCharts int + }{ + "system: success, peers: success": { + prepare: func() *NTPd { return prepareNTPdWithMock(&mockClient{}, true) }, + expected: map[string]int64{ + "clk_jitter": 626000, + "clk_wander": 81000, + "mintc": 3000000, + "offset": -149638, + "peer_203.0.113.1_delay": 10464000, + "peer_203.0.113.1_dispersion": 5376000, + "peer_203.0.113.1_hmode": 3000000, + "peer_203.0.113.1_hpoll": 7000000, + "peer_203.0.113.1_jitter": 5204000, + "peer_203.0.113.1_offset": 312000, + "peer_203.0.113.1_pmode": 4000000, + "peer_203.0.113.1_ppoll": 7000000, + "peer_203.0.113.1_precision": -21000000, + "peer_203.0.113.1_rootdelay": 198000, + "peer_203.0.113.1_rootdisp": 14465000, + "peer_203.0.113.1_stratum": 2000000, + "peer_203.0.113.1_xleave": 95000, + "peer_203.0.113.2_delay": 10464000, + "peer_203.0.113.2_dispersion": 5376000, + "peer_203.0.113.2_hmode": 3000000, + "peer_203.0.113.2_hpoll": 7000000, + "peer_203.0.113.2_jitter": 5204000, + "peer_203.0.113.2_offset": 312000, + "peer_203.0.113.2_pmode": 4000000, + "peer_203.0.113.2_ppoll": 7000000, + "peer_203.0.113.2_precision": -21000000, + "peer_203.0.113.2_rootdelay": 198000, + "peer_203.0.113.2_rootdisp": 14465000, + "peer_203.0.113.2_stratum": 2000000, + "peer_203.0.113.2_xleave": 95000, + "peer_203.0.113.3_delay": 10464000, + "peer_203.0.113.3_dispersion": 5376000, + "peer_203.0.113.3_hmode": 3000000, + "peer_203.0.113.3_hpoll": 7000000, + "peer_203.0.113.3_jitter": 5204000, + "peer_203.0.113.3_offset": 312000, + "peer_203.0.113.3_pmode": 4000000, + "peer_203.0.113.3_ppoll": 7000000, + "peer_203.0.113.3_precision": -21000000, + "peer_203.0.113.3_rootdelay": 198000, + "peer_203.0.113.3_rootdisp": 14465000, + "peer_203.0.113.3_stratum": 2000000, + "peer_203.0.113.3_xleave": 95000, + "precision": -24000000, + "rootdelay": 10385000, + "rootdisp": 23404000, + "stratum": 2000000, + "sys_jitter": 1648010, + "tc": 7000000, + }, + expectedCharts: len(systemCharts) + len(peerChartsTmpl)*3, + }, + "system: success, list peers: fails": { + prepare: func() *NTPd { return prepareNTPdWithMock(&mockClient{errOnPeerIDs: true}, true) }, + expected: map[string]int64{ + "clk_jitter": 626000, + "clk_wander": 81000, + "mintc": 3000000, + "offset": -149638, + "precision": -24000000, + "rootdelay": 10385000, + "rootdisp": 23404000, + "stratum": 2000000, + "sys_jitter": 1648010, + "tc": 7000000, + }, + expectedCharts: len(systemCharts), + }, + "system: success, peers info: fails": { + prepare: func() *NTPd { return prepareNTPdWithMock(&mockClient{errOnPeerInfo: true}, true) }, + expected: map[string]int64{ + "clk_jitter": 626000, + "clk_wander": 81000, + "mintc": 3000000, + "offset": -149638, + "precision": -24000000, + "rootdelay": 10385000, + "rootdisp": 23404000, + "stratum": 2000000, + "sys_jitter": 1648010, + "tc": 7000000, + }, + expectedCharts: len(systemCharts), + }, + "system: fails": { + prepare: func() *NTPd { return prepareNTPdWithMock(&mockClient{errOnSystemInfo: true}, true) }, + expected: nil, + expectedCharts: len(systemCharts), + }, + "fail on creating client": { + prepare: func() *NTPd { return prepareNTPdWithMock(nil, true) }, + expected: nil, + expectedCharts: len(systemCharts), + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + n := test.prepare() + + require.True(t, n.Init()) + _ = n.Check() + + mx := n.Collect() + + assert.Equal(t, test.expected, mx) + assert.Equal(t, test.expectedCharts, len(*n.Charts())) + }) + } +} + +func prepareNTPdWithMock(m *mockClient, collectPeers bool) *NTPd { + n := New() + n.CollectPeers = collectPeers + if m == nil { + n.newClient = func(_ Config) (ntpConn, error) { return nil, errors.New("mock.newClient error") } + } else { + n.newClient = func(_ Config) (ntpConn, error) { return m, nil } + } + return n +} + +type mockClient struct { + errOnSystemInfo bool + errOnPeerInfo bool + errOnPeerIDs bool + closeCalled bool +} + +func (m *mockClient) systemInfo() (map[string]string, error) { + if m.errOnSystemInfo { + return nil, errors.New("mockClient.info() error") + } + + info := map[string]string{ + "rootdelay": "10.385", + "tc": "7", + "mintc": "3", + "processor": "x86_64", + "refid": "194.177.210.54", + "reftime": "0xe7504a10.74414244", + "clock": "0xe7504e80.8c46aa3f", + "peer": "14835", + "sys_jitter": "1.648010", + "leapsec": "201701010000", + "expire": "202306280000", + "leap": "0", + "stratum": "2", + "precision": "-24", + "offset": "-0.149638", + "frequency": "- 7.734", + "clk_wander": "0.081", + "tai": "37", + "version": "ntpd 4.2.8p15@1.3728-o Wed Sep 23 11:46:38 UTC 2020 (1)", + "rootdisp": "23.404", + "clk_jitter": "0.626", + "system": "Linux/5.10.0-19-amd64", + } + + return info, nil +} + +func (m *mockClient) peerInfo(id uint16) (map[string]string, error) { + if m.errOnPeerInfo { + return nil, errors.New("mockClient.peerInfo() error") + } + + info := map[string]string{ + "delay": "10.464", + "dispersion": "5.376", + "dstadr": "10.10.10.20", + "dstport": "123", + "filtdelay": "11.34 10.53 10.49 10.46 10.92 10.56 10.69 37.99", + "filtdisp": "0.00 2.01 4.01 5.93 7.89 9.84 11.81 13.73", + "filtoffset": "0.66 0.32 0.18 0.31 0.33 0.10 0.34 14.07", + "flash": "0x0", + "headway": "0", + "hmode": "3", + "hpoll": "7", + "jitter": "5.204", + "keyid": "0", + "leap": "0", + "offset": "0.312", + "pmode": "4", + "ppoll": "7", + "precision": "-21", + "reach": "0xff", + "rec": "0xe7504df8.74802284", + "refid": "193.93.164.193", + "reftime": "0xe7504b8b.0c98a518", + "rootdelay": "0.198", + "rootdisp": "14.465", + "srcadr": fmt.Sprintf("203.0.113.%d", id), + "srcport": "123", + "stratum": "2", + "unreach": "0", + "xleave": "0.095", + } + + return info, nil +} + +func (m *mockClient) peerIDs() ([]uint16, error) { + if m.errOnPeerIDs { + return nil, errors.New("mockClient.peerIDs() error") + } + return []uint16{1, 2, 3}, nil +} + +func (m *mockClient) close() { + m.closeCalled = true +} -- cgit v1.2.3