summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIlya Mashchenko <ilya@netdata.cloud>2024-04-19 17:38:37 +0300
committerGitHub <noreply@github.com>2024-04-19 17:38:37 +0300
commitaf8404cebb4821e0fa682e27aeb9714278af6096 (patch)
tree93bc7e67c8525892ae198bd6244ba22a4a830dac
parent079f1e312fded06956f8aa05385ba7b20d1a610d (diff)
go.d add storcli collector (#17454)
-rw-r--r--src/go/collectors/go.d.plugin/README.md3
-rw-r--r--src/go/collectors/go.d.plugin/config/go.d.conf1
-rw-r--r--src/go/collectors/go.d.plugin/config/go.d/storcli.conf5
-rw-r--r--src/go/collectors/go.d.plugin/modules/init.go1
-rw-r--r--src/go/collectors/go.d.plugin/modules/storcli/charts.go171
-rw-r--r--src/go/collectors/go.d.plugin/modules/storcli/collect.go32
-rw-r--r--src/go/collectors/go.d.plugin/modules/storcli/collect_controllers.go101
-rw-r--r--src/go/collectors/go.d.plugin/modules/storcli/collect_drives.go231
-rw-r--r--src/go/collectors/go.d.plugin/modules/storcli/config_schema.json35
-rw-r--r--src/go/collectors/go.d.plugin/modules/storcli/exec.go50
-rw-r--r--src/go/collectors/go.d.plugin/modules/storcli/init.go23
-rw-r--r--src/go/collectors/go.d.plugin/modules/storcli/metadata.yaml153
-rw-r--r--src/go/collectors/go.d.plugin/modules/storcli/storcli.go109
-rw-r--r--src/go/collectors/go.d.plugin/modules/storcli/storcli_test.go289
-rw-r--r--src/go/collectors/go.d.plugin/modules/storcli/testdata/config.json4
-rw-r--r--src/go/collectors/go.d.plugin/modules/storcli/testdata/config.yaml2
-rw-r--r--src/go/collectors/go.d.plugin/modules/storcli/testdata/megaraid-controllers-info.json687
-rw-r--r--src/go/collectors/go.d.plugin/modules/storcli/testdata/megaraid-drives-info.json495
18 files changed, 2390 insertions, 2 deletions
diff --git a/src/go/collectors/go.d.plugin/README.md b/src/go/collectors/go.d.plugin/README.md
index 6dc519bee1..fc688ada01 100644
--- a/src/go/collectors/go.d.plugin/README.md
+++ b/src/go/collectors/go.d.plugin/README.md
@@ -114,9 +114,8 @@ see the appropriate collector readme.
| [redis](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/redis) | Redis |
| [scaleio](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/scaleio) | Dell EMC ScaleIO |
| [SNMP](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/snmp) | SNMP |
-| [solr](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/solr) | Solr |
| [squidlog](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/squidlog) | Squid |
-| [springboot2](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/springboot2) | Spring Boot2 |
+| [storcli](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/storcli) | Broadcom Hardware RAID |
| [supervisord](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/supervisord) | Supervisor |
| [systemdunits](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/systemdunits) | Systemd unit state |
| [tengine](https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/tengine) | Tengine |
diff --git a/src/go/collectors/go.d.plugin/config/go.d.conf b/src/go/collectors/go.d.plugin/config/go.d.conf
index 86fa940650..ab3a5aca74 100644
--- a/src/go/collectors/go.d.plugin/config/go.d.conf
+++ b/src/go/collectors/go.d.plugin/config/go.d.conf
@@ -77,6 +77,7 @@ modules:
# scaleio: yes
# snmp: yes
# squidlog: yes
+# storcli: yes
# supervisord: yes
# systemdunits: yes
# tengine: yes
diff --git a/src/go/collectors/go.d.plugin/config/go.d/storcli.conf b/src/go/collectors/go.d.plugin/config/go.d/storcli.conf
new file mode 100644
index 0000000000..a4a9e3e0ac
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/config/go.d/storcli.conf
@@ -0,0 +1,5 @@
+## All available configuration options, their descriptions and default values:
+## https://github.com/netdata/netdata/tree/master/src/go/collectors/go.d.plugin/modules/storcli#readme
+
+jobs:
+ - name: storcli
diff --git a/src/go/collectors/go.d.plugin/modules/init.go b/src/go/collectors/go.d.plugin/modules/init.go
index 2790d30c24..69e11617f7 100644
--- a/src/go/collectors/go.d.plugin/modules/init.go
+++ b/src/go/collectors/go.d.plugin/modules/init.go
@@ -69,6 +69,7 @@ import (
_ "github.com/netdata/netdata/go/go.d.plugin/modules/scaleio"
_ "github.com/netdata/netdata/go/go.d.plugin/modules/snmp"
_ "github.com/netdata/netdata/go/go.d.plugin/modules/squidlog"
+ _ "github.com/netdata/netdata/go/go.d.plugin/modules/storcli"
_ "github.com/netdata/netdata/go/go.d.plugin/modules/supervisord"
_ "github.com/netdata/netdata/go/go.d.plugin/modules/systemdunits"
_ "github.com/netdata/netdata/go/go.d.plugin/modules/tengine"
diff --git a/src/go/collectors/go.d.plugin/modules/storcli/charts.go b/src/go/collectors/go.d.plugin/modules/storcli/charts.go
new file mode 100644
index 0000000000..65cd75a331
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/storcli/charts.go
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package storcli
+
+import (
+ "fmt"
+ "strconv"
+ "strings"
+
+ "github.com/netdata/netdata/go/go.d.plugin/agent/module"
+)
+
+const (
+ prioControllerStatus = module.Priority + iota
+ prioControllerBBUStatus
+
+ prioPhysDriveErrors
+ prioPhysDrivePredictiveFailures
+ prioPhysDriveSmartAlertStatus
+ prioPhysDriveTemperature
+)
+
+var controllerChartsTmpl = module.Charts{
+ controllerStatusChartTmpl.Copy(),
+ controllerBBUStatusChartTmpl.Copy(),
+}
+
+var (
+ controllerStatusChartTmpl = module.Chart{
+ ID: "controller_%s_status",
+ Title: "Controller status",
+ Units: "status",
+ Fam: "cntrl status",
+ Ctx: "storcli.controller_status",
+ Type: module.Line,
+ Priority: prioControllerStatus,
+ Dims: module.Dims{
+ {ID: "cntrl_%s_status_optimal", Name: "optimal"},
+ {ID: "cntrl_%s_status_degraded", Name: "degraded"},
+ {ID: "cntrl_%s_status_partially_degraded", Name: "partially_degraded"},
+ {ID: "cntrl_%s_status_failed", Name: "failed"},
+ },
+ }
+ controllerBBUStatusChartTmpl = module.Chart{
+ ID: "controller_%s_bbu_status",
+ Title: "Controller BBU status",
+ Units: "status",
+ Fam: "cntrl status",
+ Ctx: "storcli.controller_bbu_status",
+ Type: module.Line,
+ Priority: prioControllerBBUStatus,
+ Dims: module.Dims{
+ {ID: "cntrl_%s_bbu_status_healthy", Name: "healthy"},
+ {ID: "cntrl_%s_bbu_status_unhealthy", Name: "unhealthy"},
+ {ID: "cntrl_%s_bbu_status_na", Name: "na"},
+ },
+ }
+)
+
+var physDriveChartsTmpl = module.Charts{
+ physDriveMediaErrorsRateChartTmpl.Copy(),
+ physDrivePredictiveFailuresRateChartTmpl.Copy(),
+ physDriveSmartAlertStatusChartTmpl.Copy(),
+ physDriveTemperatureChartTmpl.Copy(),
+}
+
+var (
+ physDriveMediaErrorsRateChartTmpl = module.Chart{
+ ID: "phys_drive_%s_cntrl_%s_media_errors_rate",
+ Title: "Physical Drive media errors rate",
+ Units: "errors/s",
+ Fam: "pd errors",
+ Ctx: "storcli.phys_drive_errors",
+ Type: module.Line,
+ Priority: prioPhysDriveErrors,
+ Dims: module.Dims{
+ {ID: "phys_drive_%s_cntrl_%s_media_error_count", Name: "media"},
+ {ID: "phys_drive_%s_cntrl_%s_other_error_count", Name: "other"},
+ },
+ }
+ physDrivePredictiveFailuresRateChartTmpl = module.Chart{
+ ID: "phys_drive_%s_cntrl_%s_predictive_failures_rate",
+ Title: "Physical Drive predictive failures rate",
+ Units: "failures/s",
+ Fam: "pd errors",
+ Ctx: "storcli.phys_drive_predictive_failures",
+ Type: module.Line,
+ Priority: prioPhysDrivePredictiveFailures,
+ Dims: module.Dims{
+ {ID: "phys_drive_%s_cntrl_%s_predictive_failure_count", Name: "predictive_failures"},
+ },
+ }
+ physDriveSmartAlertStatusChartTmpl = module.Chart{
+ ID: "phys_drive_%s_cntrl_%s_smart_alert_status",
+ Title: "Physical Drive SMART alert status",
+ Units: "status",
+ Fam: "pd smart",
+ Ctx: "storcli.phys_drive_smart_alert_status",
+ Type: module.Line,
+ Priority: prioPhysDriveSmartAlertStatus,
+ Dims: module.Dims{
+ {ID: "phys_drive_%s_cntrl_%s_smart_alert_status_active", Name: "active"},
+ {ID: "phys_drive_%s_cntrl_%s_smart_alert_status_inactive", Name: "inactive"},
+ },
+ }
+ physDriveTemperatureChartTmpl = module.Chart{
+ ID: "phys_drive_%s_cntrl_%s_temperature",
+ Title: "Physical Drive temperature",
+ Units: "status",
+ Fam: "pd temperature",
+ Ctx: "storcli.phys_drive_temperature",
+ Type: module.Line,
+ Priority: prioPhysDriveTemperature,
+ Dims: module.Dims{
+ {ID: "phys_drive_%s_cntrl_%s_temperature", Name: "temperature"},
+ },
+ }
+)
+
+func (s *StorCli) addControllerCharts(cntrl controllerInfo) {
+ charts := controllerChartsTmpl.Copy()
+
+ num := strconv.Itoa(cntrl.Basics.Controller)
+
+ for _, chart := range *charts {
+ chart.ID = fmt.Sprintf(chart.ID, num)
+ chart.Labels = []module.Label{
+ {Key: "controller_number", Value: num},
+ {Key: "model", Value: cntrl.Basics.Model},
+ }
+ for _, dim := range chart.Dims {
+ dim.ID = fmt.Sprintf(dim.ID, num)
+ }
+ }
+
+ if err := s.Charts().Add(*charts...); err != nil {
+ s.Warning(err)
+ }
+}
+
+func (s *StorCli) addPhysDriveCharts(cntrlNum int, di *driveInfo, ds *driveState, da *driveAttrs) {
+ charts := physDriveChartsTmpl.Copy()
+
+ if _, ok := parseInt(getDriveTemperature(ds.DriveTemperature)); !ok {
+ _ = charts.Remove(physDriveTemperatureChartTmpl.ID)
+ }
+
+ num := strconv.Itoa(cntrlNum)
+
+ var enc, slot string
+ if parts := strings.Split(di.EIDSlt, ":"); len(parts) == 2 { // EID:Slt
+ enc, slot = parts[0], parts[1]
+ }
+
+ for _, chart := range *charts {
+ chart.ID = fmt.Sprintf(chart.ID, da.WWN, num)
+ chart.Labels = []module.Label{
+ {Key: "controller_number", Value: num},
+ {Key: "enclosure_number", Value: enc},
+ {Key: "slot_number", Value: slot},
+ {Key: "media_type", Value: di.Med},
+ }
+ for _, dim := range chart.Dims {
+ dim.ID = fmt.Sprintf(dim.ID, da.WWN, num)
+ }
+ }
+
+ if err := s.Charts().Add(*charts...); err != nil {
+ s.Warning(err)
+ }
+}
diff --git a/src/go/collectors/go.d.plugin/modules/storcli/collect.go b/src/go/collectors/go.d.plugin/modules/storcli/collect.go
new file mode 100644
index 0000000000..d9b1c9af2f
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/storcli/collect.go
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package storcli
+
+import "fmt"
+
+func (s *StorCli) collect() (map[string]int64, error) {
+ cntrlResp, err := s.queryControllersInfo()
+ if err != nil {
+ return nil, err
+ }
+
+ mx := make(map[string]int64)
+
+ if err := s.collectControllersInfo(mx, cntrlResp); err != nil {
+ return nil, fmt.Errorf("error collecting controller info: %s", err)
+ }
+
+ drives := cntrlResp.Controllers[0].ResponseData.PDList
+ driver := cntrlResp.Controllers[0].ResponseData.Version.DriverName
+ if driver == "megaraid_sas" && len(drives) > 0 {
+ drivesResp, err := s.queryDrivesInfo()
+ if err != nil {
+ return nil, fmt.Errorf("error collecting drives info: %s", err)
+ }
+ if err := s.collectMegaRaidDrives(mx, drivesResp); err != nil {
+ return nil, err
+ }
+ }
+
+ return mx, nil
+}
diff --git a/src/go/collectors/go.d.plugin/modules/storcli/collect_controllers.go b/src/go/collectors/go.d.plugin/modules/storcli/collect_controllers.go
new file mode 100644
index 0000000000..259013e6c1
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/storcli/collect_controllers.go
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package storcli
+
+import (
+ "encoding/json"
+ "errors"
+ "fmt"
+ "strconv"
+ "strings"
+)
+
+type (
+ controllersInfoResponse struct {
+ Controllers []struct {
+ CommandStatus struct {
+ Controller int `json:"Controller"`
+ Status string `json:"Status"`
+ } `json:"Command Status"`
+ ResponseData controllerInfo `json:"Response Data"`
+ } `json:"Controllers"`
+ }
+ controllerInfo struct {
+ Basics struct {
+ Controller int `json:"Controller"`
+ Model string `json:"Model"`
+ SerialNumber string `json:"Serial Number"`
+ } `json:"Basics"`
+ Version struct {
+ DriverName string `json:"Driver Name"`
+ } `json:"Version"`
+ Status struct {
+ ControllerStatus string `json:"Controller Status"`
+ BBUStatus storNumber `json:"BBU Status"`
+ } `json:"Status"`
+ BBUInfo []struct {
+ Model string `json:"Model"`
+ State string `json:"State"`
+ Temp string `json:"Temp"`
+ } `json:"BBU_Info"`
+ PDList []struct {
+ } `json:"PD LIST"`
+ }
+)
+
+func (s *StorCli) collectControllersInfo(mx map[string]int64, resp *controllersInfoResponse) error {
+ for _, v := range resp.Controllers {
+ cntrl := v.ResponseData
+
+ idx := strconv.Itoa(cntrl.Basics.Controller)
+ if !s.controllers[idx] {
+ s.controllers[idx] = true
+ s.addControllerCharts(cntrl)
+ }
+
+ px := fmt.Sprintf("cntrl_%s_", idx)
+
+ for _, st := range []string{"optimal", "degraded", "partially_degraded", "failed"} {
+ mx[px+"status_"+st] = 0
+ }
+ mx[px+"status_"+strings.ToLower(cntrl.Status.ControllerStatus)] = 1
+
+ for _, st := range []string{"healthy", "unhealthy", "na"} {
+ mx[px+"bbu_status_"+st] = 0
+ }
+ // https://github.com/prometheus-community/node-exporter-textfile-collector-scripts/issues/27
+ switch cntrl.Status.BBUStatus {
+ case "0", "8", "4096": // 0 good, 8 charging
+ mx[px+"bbu_status_healthy"] = 1
+ case "NA", "N/A":
+ mx[px+"bbu_status_na"] = 1
+ default:
+ mx[px+"bbu_status_unhealthy"] = 1
+ }
+ }
+ return nil
+}
+
+func (s *StorCli) queryControllersInfo() (*controllersInfoResponse, error) {
+ bs, err := s.exec.controllersInfo()
+ if err != nil {
+ return nil, err
+ }
+
+ if len(bs) == 0 {
+ return nil, errors.New("empty response")
+ }
+
+ var resp controllersInfoResponse
+ if err := json.Unmarshal(bs, &resp); err != nil {
+ return nil, err
+ }
+ if len(resp.Controllers) == 0 {
+ return nil, errors.New("no controllers found")
+ }
+ if st := resp.Controllers[0].CommandStatus.Status; st != "Success" {
+ return nil, fmt.Errorf("command status error: %s", st)
+ }
+
+ return &resp, nil
+}
diff --git a/src/go/collectors/go.d.plugin/modules/storcli/collect_drives.go b/src/go/collectors/go.d.plugin/modules/storcli/collect_drives.go
new file mode 100644
index 0000000000..353728d6dc
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/storcli/collect_drives.go
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package storcli
+
+import (
+ "encoding/json"
+ "errors"
+ "fmt"
+ "strconv"
+ "strings"
+)
+
+type drivesInfoResponse struct {
+ Controllers []struct {
+ CommandStatus struct {
+ Controller int `json:"Controller"`
+ Status string `json:"Status"`
+ } `json:"Command Status"`
+ ResponseData map[string]json.RawMessage `json:"Response Data"`
+ } `json:"Controllers"`
+}
+
+type (
+ driveInfo struct {
+ EIDSlt string `json:"EID:Slt"`
+ DID int `json:"DID"`
+ State string `json:"State"`
+ DG int `json:"DG"`
+ Size string `json:"Size"`
+ Intf string `json:"Intf"`
+ Med string `json:"Med"`
+ SED string `json:"SED"`
+ PI string `json:"PI"`
+ SeSz string `json:"SeSz"`
+ Model string `json:"Model"`
+ Sp string `json:"Sp"`
+ Type string `json:"Type"`
+ }
+ driveState struct {
+ MediaErrorCount storNumber `json:"Media Error Count"`
+ OtherErrorCount storNumber `json:"Other Error Count"`
+ DriveTemperature string `json:"Drive Temperature"`
+ PredictiveFailureCount storNumber `json:"Predictive Failure Count"`
+ SmartAlertFlagged string `json:"S.M.A.R.T alert flagged by drive"`
+ }
+ driveAttrs struct {
+ WWN string `json:"WWN"`
+ DeviceSpeed string `json:"Device Speed"`
+ LinkSpeed string `json:"Link Speed"`
+ }
+)
+
+type storNumber string // some int values can be 'N/A'
+
+func (n *storNumber) UnmarshalJSON(b []byte) error { *n = storNumber(b); return nil }
+
+func (s *StorCli) collectMegaRaidDrives(mx map[string]int64, resp *drivesInfoResponse) error {
+ for _, cntrl := range resp.Controllers {
+ var ids []string
+ for k := range cntrl.ResponseData {
+ if !strings.HasSuffix(k, "Detailed Information") {
+ continue
+ }
+ parts := strings.Fields(k) // Drive /c0/e252/s0 - Detailed Information
+ if len(parts) < 2 {
+ continue
+ }
+ id := parts[1]
+ if strings.IndexByte(id, '/') == -1 {
+ continue
+ }
+ ids = append(ids, id)
+ }
+
+ cntrlIdx := cntrl.CommandStatus.Controller
+
+ for _, id := range ids {
+ info, err := getDriveInfo(cntrl.ResponseData, id)
+ if err != nil {
+ return err
+ }
+ data, err := getDriveDetailedInfo(cntrl.ResponseData, id)
+ if err != nil {
+ return err
+ }
+ state, err := getDriveState(data, id)
+ if err != nil {
+ return err
+ }
+ attrs, err := getDriveAttrs(data, id)
+ if err != nil {
+ return err
+ }
+
+ if attrs.WWN == "" {
+ continue
+ }
+
+ if !s.drives[attrs.WWN] {
+ s.drives[attrs.WWN] = true
+ s.addPhysDriveCharts(cntrlIdx, info, state, attrs)
+ }
+
+ px := fmt.Sprintf("phys_drive_%s_cntrl_%d_", attrs.WWN, cntrlIdx)
+
+ if v, ok := parseInt(string(state.MediaErrorCount)); ok {
+ mx[px+"media_error_count"] = v
+ }
+ if v, ok := parseInt(string(state.OtherErrorCount)); ok {
+ mx[px+"other_error_count"] = v
+ }
+ if v, ok := parseInt(string(state.PredictiveFailureCount)); ok {
+ mx[px+"predictive_failure_count"] = v
+ }
+ if v, ok := parseInt(getDriveTemperature(state.DriveTemperature)); ok {
+ mx[px+"temperature"] = v
+ }
+ for _, st := range []string{"active", "inactive"} {
+ mx[px+"smart_alert_status_"+st] = 0
+ }
+ if state.SmartAlertFlagged == "Yes" {
+ mx[px+"smart_alert_status_active"] = 1
+ }
+ }
+ }
+
+ return nil
+}
+
+func (s *StorCli) queryDrivesInfo() (*drivesInfoResponse, error) {
+ bs, err := s.exec.drivesInfo()
+ if err != nil {
+ return nil, err
+ }
+
+ if len(bs) == 0 {
+ return nil, errors.New("empty response")
+ }
+
+ var resp drivesInfoResponse
+ if err := json.Unmarshal(bs, &resp); err != nil {
+ return nil, err
+ }
+
+ if len(resp.Controllers) == 0 {
+ return nil, errors.New("no controllers found")
+ }
+ if st := resp.Controllers[0].CommandStatus.Status; st != "Success" {
+ return nil, fmt.Errorf("command status error: %s", st)
+ }
+
+ return &resp, nil
+}
+
+func getDriveInfo(respData map[string]json.RawMessage, id string) (*driveInfo, error) {
+ k := fmt.Sprintf("Drive %s", id)
+ raw, ok := respData[k]
+ if !ok {
+ return nil, fmt.Errorf("drive info not found for '%s'", id)
+ }
+
+ var drive []driveInfo
+ if err := json.Unmarshal(raw, &drive); err != nil {
+ return nil, err
+ }
+
+ if len(drive) == 0 {
+ return nil, fmt.Errorf("drive info not found for '%s'", id)
+ }
+
+ return &drive[0], nil
+}
+
+func getDriveDetailedInfo(respData map[string]json.RawMessage, id string) (map[string]json.RawMessage, error) {
+ k := fmt.Sprintf("Drive %s - Detailed Information", id)
+ raw, ok := respData[k]
+ if !ok {
+ return nil, fmt.Errorf("drive detailed info not found for '%s'", id)
+ }
+
+ var info map[string]json.RawMessage
+ if err := json.Unmarshal(raw, &info); err != nil {
+ return nil, err
+ }
+
+ return info, nil
+}
+
+func getDriveState(driveDetailedInfo map[string]json.RawMessage, id string) (*driveState, error) {
+ k := fmt.Sprintf("Drive %s State", id)
+ raw, ok := driveDetailedInfo[k]
+ if !ok {
+ return nil, fmt.Errorf("drive detailed info state not found for '%s'", id)
+ }
+
+ var state driveState
+ if err := json.Unmarshal(raw, &state); err != nil {
+ return nil, err
+ }
+
+ return &state, nil
+}
+
+func getDriveAttrs(driveDetailedInfo map[string]json.RawMessage, id string) (*driveAttrs, error) {
+ k := fmt.Sprintf("Drive %s Device attributes", id)
+ raw, ok := driveDetailedInfo[k]
+ if !ok {
+ return nil, fmt.Errorf("drive detailed info state not found for '%s'", id)
+ }
+
+ var state driveAttrs
+ if err := json.Unmarshal(raw, &state); err != nil {
+ return nil, err
+ }
+
+ return &state, nil
+}
+
+func getDriveTemperature(s string) string {
+ // ' 28C (82.40 F)'
+ i := strings.IndexByte(s, 'C')
+ if i == -1 {
+ return ""
+ }
+ return strings.TrimSpace(s[:i])
+}
+
+func parseInt(s string) (int64, bool) {
+ i, err := strconv.ParseInt(s, 10, 64)
+ return i, err == nil
+}
diff --git a/src/go/collectors/go.d.plugin/modules/storcli/config_schema.json b/src/go/collectors/go.d.plugin/modules/storcli/config_schema.json
new file mode 100644
index 0000000000..226a370f43
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/storcli/config_schema.json
@@ -0,0 +1,35 @@
+{
+ "jsonSchema": {
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "title": "StorCLI collector configuration.",
+ "type": "object",
+ "properties": {
+ "update_every": {
+ "title": "Update every",
+ "description": "Data collection interval, measured in seconds.",
+ "type": "integer",
+ "minimum": 1,
+ "default": 10
+ },
+ "timeout": {
+ "title": "Timeout",
+ "description": "Timeout for executing the binary, specified in seconds.",
+ "type": "number",
+ "minimum": 0.5,
+ "default": 2
+ }
+ },
+ "additionalProperties": false,
+ "patternProperties": {
+ "^name$": {}
+ }
+ },
+ "uiSchema": {
+ "uiOptions": {
+ "fullPage": true
+ },
+ "timeout": {
+ "ui:help": "Accepts decimals for precise control (e.g., type 1.5 for 1.5 seconds)."
+ }
+ }
+}
diff --git a/src/go/collectors/go.d.plugin/modules/storcli/exec.go b/src/go/collectors/go.d.plugin/modules/storcli/exec.go
new file mode 100644
index 0000000000..3375ddbe4f
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/storcli/exec.go
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package storcli
+
+import (
+ "context"
+ "fmt"
+ "os/exec"
+ "time"
+
+ "github.com/netdata/netdata/go/go.d.plugin/logger"
+)
+
+func newStorCliExec(ndsudoPath string, timeout time.Duration, log *logger.Logger) *storCliExec {
+ return &storCliExec{
+ Logger: log,
+ ndsudoPath: ndsudoPath,
+ timeout: timeout,
+ }
+}
+
+type storCliExec struct {
+ *logger.Logger
+
+ ndsudoPath string
+ timeout time.Duration
+}
+
+func (e *storCliExec) controllersInfo() ([]byte, error) {
+ return e.execute("storcli-controllers-info")
+}
+
+func (e *storCliExec) drivesInfo() ([]byte, error) {
+ return e.execute("storcli-drives-info")
+}
+
+func (e *storCliExec) execute(args ...string) ([]byte, error) {
+ ctx, cancel := context.WithTimeout(context.Background(), e.timeout)
+ defer cancel()
+
+ cmd := exec.CommandContext(ctx, e.ndsudoPath, args...)
+ e.Debugf("executing '%s'", cmd)
+
+ bs, err := cmd.Output()
+ if err != nil {
+ return nil, fmt.Errorf("error on '%s': %v", cmd, err)
+ }
+
+ return bs, nil
+}
diff --git a/src/go/collectors/go.d.plugin/modules/storcli/init.go b/src/go/collectors/go.d.plugin/modules/storcli/init.go
new file mode 100644
index 0000000000..297f7c8c3e
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/storcli/init.go
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package storcli
+
+import (
+ "fmt"
+ "os"
+ "path/filepath"
+
+ "github.com/netdata/netdata/go/go.d.plugin/agent/executable"
+)
+
+func (s *StorCli) initStorCliExec() (storCli, error) {
+ ndsudoPath := filepath.Join(executable.Directory, "ndsudo")
+
+ if _, err := os.Stat(ndsudoPath); err != nil {
+ return nil, fmt.Errorf("ndsudo executable not found: %v", err)
+ }
+
+ storExec := newStorCliExec(ndsudoPath, s.Timeout.Duration(), s.Logger)
+
+ return storExec, nil
+}
diff --git a/src/go/collectors/go.d.plugin/modules/storcli/metadata.yaml b/src/go/collectors/go.d.plugin/modules/storcli/metadata.yaml
new file mode 100644
index 0000000000..ecf97fb442
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/storcli/metadata.yaml
@@ -0,0 +1,153 @@
+plugin_name: go.d.plugin
+modules:
+ - meta:
+ id: collector-go.d.plugin-storcli
+ plugin_name: go.d.plugin
+ module_name: storcli
+ monitored_instance:
+ name: StoreCLI RAID
+ link: "https://docs.broadcom.com/doc/12352476"
+ icon_filename: "hard-drive.svg"
+ categories:
+ - data-collection.storage-mount-points-and-filesystems
+ keywords:
+ - storage
+ - raid-controller
+ - manage-disks
+ related_resources:
+ integrations:
+ list: []
+ info_provided_to_referring_integrations:
+ description: ""
+ most_popular: false
+ overview:
+ data_collection:
+ metrics_description: |
+ Monitors the health of StoreCLI Hardware RAID by tracking the status of RAID adapters, physical drives, and backup batteries in your storage system.
+ It relies on the [`storcli`](https://docs.broadcom.com/doc/12352476) CLI tool but avoids directly executing the binary.
+ Instead, it utilizes `ndsudo`, a Netdata helper specifically designed to run privileged commands securely within the Netdata environment.
+ This approach eliminates the need to use `sudo`, improving security and potentially simplifying permission management.
+
+ Executed commands:
+ - `storcli /cALL show all J nolog`
+ - `storcli /cALL/eALL/sALL show all J nolog`
+ method_description: ""
+ supported_platforms:
+ include: []
+ exclude: []
+ multi_instance: false
+ additional_permissions:
+ description: ""
+ default_behavior:
+ auto_detection:
+ description: ""
+ limits:
+ description: ""
+ performance_impact:
+ description: ""
+ setup:
+ prerequisites:
+ list: []
+ configuration:
+ file:
+ name: go.d/storcli.conf
+ options:
+ description: |
+ The following options can be defined globally: update_every.
+ folding:
+ title: Config options
+ enabled: true
+ list:
+ - name: update_every
+ description: Data collection frequency.
+ default_value: 10
+ required: false
+ - name: timeout
+ description: storcli binary execution timeout.
+ default_value: 2
+ required: false
+ examples:
+ folding:
+ title: Config
+ enabled: true
+ list:
+ - name: Custom update_every
+ description: Allows you to override the default data collection interval.
+ config: |
+ jobs:
+ - name: storcli
+ update_every: 5 # Collect StorCLI RAID statistics every 5 seconds
+ troubleshooting:
+ problems:
+ list: []
+ alerts: []
+ metrics:
+ folding:
+ title: Metrics
+ enabled: false
+ description: ""
+ availability: []
+ scopes:
+ - name: controller
+ description: These metrics refer to the Controller.
+ labels:
+ - name: controller_number
+ description: Controller number (index)
+ - name: model
+ description: Controller model
+ metrics:
+ - name: storcli.controller_status
+ description: Controller status
+ unit: status
+ chart_type: line
+ dimensions:
+ - name: optimal
+ - name: degraded
+ - name: partially_degraded
+ - name: failed
+ - name: storcli.controller_bbu_status
+ description: Controller BBU status
+ unit: status
+ chart_type: line
+ dimensions:
+ - name: healthy
+ - name: unhealthy
+ - name: na
+ - name: physical drive
+ description: These metrics refer to the Physical Drive.
+ labels:
+ - name: controller_number
+ description: Controller number (index)
+ - name: enclosure_number
+ description: Enclosure number (index)
+ - name: slot_number
+ description: Slot number (index)
+ - name: media type
+ description: Media type (e.g. HDD)
+ metrics:
+ - name: storcli.phys_drive_errors
+ description: Physical Drive media errors rate
+ unit: errors/s
+ chart_type: line
+ dimensions:
+ - name: media
+ - name: other
+ - name: storcli.phys_drive_predictive_failures
+ description: Physical Drive predictive failures rate
+ unit: failures/s
+ chart_type: line
+ dimensions:
+ - name: predictive_failures
+ - name: storcli.phys_drive_smart_alert_status
+ description: Physical Drive SMART alert status
+ unit: status
+ chart_type: line
+ dimensions:
+ - name: active
+ - name: inactive
+ - name: storcli.phys_drive_temperature
+ description: Physical Drive temperature
+ unit: status
+ chart_type: line
+ dimensions:
+ - name: temperature
diff --git a/src/go/collectors/go.d.plugin/modules/storcli/storcli.go b/src/go/collectors/go.d.plugin/modules/storcli/storcli.go
new file mode 100644
index 0000000000..3122803b69
--- /dev/null
+++ b/