summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorValentin Rakush <52716954+alpes214@users.noreply.github.com>2019-08-23 13:29:20 +0300
committerChris Akritidis <43294513+cakrit@users.noreply.github.com>2019-08-23 12:29:20 +0200
commit62bc37ed5f6cf64459deafc11c7d6d938158c447 (patch)
tree116b91e623b39af175613b2b4b555ee2d58a1070
parent81ff262cd8f57551c73bf97030d2ad474c18592d (diff)
Add alarm status counter api call (#6554)
##### Summary This is implementation of a prerequisite for the requested feature #6536 (Generate an overall status badge/chart for the health of category) ##### Component Name web/api/ health/ ##### Details Provide a new, `alarm_count` API call that returns the total number of alarms for given contexts and alarm states. Default is the total number of raised alarms, for all contexts.
-rw-r--r--health/health.h1
-rw-r--r--health/health_json.c37
-rw-r--r--web/api/netdata-swagger.json55
-rw-r--r--web/api/netdata-swagger.yaml32
-rw-r--r--web/api/web_api_v1.c45
-rw-r--r--web/api/web_api_v1.h1
6 files changed, 171 insertions, 0 deletions
diff --git a/health/health.h b/health/health.h
index 0bf3b8ffa8..6c000bf458 100644
--- a/health/health.h
+++ b/health/health.h
@@ -58,6 +58,7 @@ extern void *health_main(void *ptr);
extern void health_reload(void);
extern int health_variable_lookup(const char *variable, uint32_t hash, RRDCALC *rc, calculated_number *result);
+extern void health_aggregate_alarms(RRDHOST *host, BUFFER *wb, BUFFER* context, RRDCALC_STATUS status);
extern void health_alarms2json(RRDHOST *host, BUFFER *wb, int all);
extern void health_alarm_log2json(RRDHOST *host, BUFFER *wb, uint32_t after);
diff --git a/health/health_json.c b/health/health_json.c
index e923b05c6c..f6ff1b1a74 100644
--- a/health/health_json.c
+++ b/health/health_json.c
@@ -231,6 +231,43 @@ static inline void health_rrdcalc2json_nolock(RRDHOST *host, BUFFER *wb, RRDCALC
//
//}
+void health_aggregate_alarms(RRDHOST *host, BUFFER *wb, BUFFER* contexts, RRDCALC_STATUS status) {
+ RRDCALC *rc;
+ int numberOfAlarms = 0;
+ char *tok = NULL;
+ char *p = NULL;
+
+ rrdhost_rdlock(host);
+
+ if (contexts) {
+ p = (char*)buffer_tostring(contexts);
+ while(p && *p && (tok = mystrsep(&p, ", |"))) {
+ if(!*tok) continue;
+
+ for(rc = host->alarms; rc ; rc = rc->next) {
+ if(unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec))
+ continue;
+ if(unlikely(rc->rrdset && rc->rrdset->hash_context == simple_hash(tok)
+ && !strcmp(rc->rrdset->context, tok)
+ && ((status==RRDCALC_STATUS_RAISED)?(rc->status >= RRDCALC_STATUS_WARNING):rc->status == status)))
+ numberOfAlarms++;
+ }
+ }
+ }
+ else {
+ for(rc = host->alarms; rc ; rc = rc->next) {
+ if(unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec))
+ continue;
+
+ if(unlikely((status==RRDCALC_STATUS_RAISED)?(rc->status >= RRDCALC_STATUS_WARNING):rc->status == status))
+ numberOfAlarms++;
+ }
+ }
+
+ buffer_sprintf(wb, "%d", numberOfAlarms);
+ rrdhost_unlock(host);
+}
+
void health_alarms2json(RRDHOST *host, BUFFER *wb, int all) {
int i;
diff --git a/web/api/netdata-swagger.json b/web/api/netdata-swagger.json
index 44e862d174..1a0ec86385 100644
--- a/web/api/netdata-swagger.json
+++ b/web/api/netdata-swagger.json
@@ -657,6 +657,61 @@
}
}
},
+ "/alarm_count": {
+ "get": {
+ "summary": "Get an overall status of the chart",
+ "description": "Checks multiple charts with the same context and counts number of alarms with given status.",
+ "parameters": [
+ {
+ "in": "query",
+ "name": "context",
+ "description": "Specify context which should be checked",
+ "required": false,
+ "allowEmptyValue": true,
+ "type": "array",
+ "items": {
+ "type": "string",
+ "collectionFormat": "pipes"
+ },
+ "default": [
+ "system.cpu"
+ ]
+ },
+ {
+ "in": "query",
+ "name": "status",
+ "description": "Specify alarm status to count",
+ "required": false,
+ "allowEmptyValue": true,
+ "type": "string",
+ "enum": [
+ "REMOVED",
+ "UNDEFINED",
+ "UNINITIALIZED",
+ "CLEAR",
+ "RAISED",
+ "WARNING",
+ "CRITICAL"
+ ],
+ "default": "RAISED"
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "An object containing a count of alarms with given status for given contexts",
+ "schema": {
+ "type": "array",
+ "items": {
+ "type": "number"
+ }
+ }
+ },
+ "500": {
+ "description": "Internal server error. This usually means the server is out of memory."
+ }
+ }
+ }
+ },
"/manage/health": {
"get": {
"summary": "Accesses the health management API to control health checks and notifications at runtime.",
diff --git a/web/api/netdata-swagger.yaml b/web/api/netdata-swagger.yaml
index 0052e646c2..50e6614006 100644
--- a/web/api/netdata-swagger.yaml
+++ b/web/api/netdata-swagger.yaml
@@ -430,6 +430,38 @@ paths:
type: array
items:
$ref: '#/definitions/alarm_log_entry'
+ /alarm_count:
+ get:
+ summary: 'Get an overall status of the chart'
+ description: "Checks multiple charts with the same context and counts number of alarms with given status."
+ parameters:
+ - in: query
+ name: context
+ description: "Specify context which should be checked"
+ required: false
+ allowEmptyValue: true
+ type: array
+ items:
+ type: string
+ collectionFormat: pipes
+ default: ['system.cpu']
+ - in: query
+ name: status
+ description: "Specify alarm status to count"
+ required: false
+ allowEmptyValue: true
+ type: string
+ enum: ['REMOVED', 'UNDEFINED', 'UNINITIALIZED', 'CLEAR', 'RAISED', 'WARNING', 'CRITICAL']
+ default: 'RAISED'
+ responses:
+ '200':
+ description: 'An object containing a count of alarms with given status for given contexts'
+ schema:
+ type: array
+ items:
+ type: number
+ '500':
+ description: 'Internal server error. This usually means the server is out of memory.'
/manage/health:
get:
summary: 'Accesses the health management API to control health checks and notifications at runtime.'
diff --git a/web/api/web_api_v1.c b/web/api/web_api_v1.c
index 2d12049632..f34f020090 100644
--- a/web/api/web_api_v1.c
+++ b/web/api/web_api_v1.c
@@ -213,6 +213,50 @@ inline int web_client_api_request_v1_alarms(RRDHOST *host, struct web_client *w,
return 200;
}
+inline int web_client_api_request_v1_alarm_count(RRDHOST *host, struct web_client *w, char *url) {
+ RRDCALC_STATUS status = RRDCALC_STATUS_RAISED;
+ BUFFER *contexts = NULL;
+
+ buffer_flush(w->response.data);
+ buffer_sprintf(w->response.data, "[");
+
+ while(url) {
+ char *value = mystrsep(&url, "&");
+ if(!value || !*value) continue;
+
+ char *name = mystrsep(&value, "=");
+ if(!name || !*name) continue;
+ if(!value || !*value) continue;
+
+ debug(D_WEB_CLIENT, "%llu: API v1 alarm_count query param '%s' with value '%s'", w->id, name, value);
+
+ char* p = value;
+ if(!strcmp(name, "status")) {
+ while ((*p = toupper(*p))) p++;
+ if (!strcmp("CRITICAL", value)) status = RRDCALC_STATUS_CRITICAL;
+ else if (!strcmp("WARNING", value)) status = RRDCALC_STATUS_WARNING;
+ else if (!strcmp("UNINITIALIZED", value)) status = RRDCALC_STATUS_UNINITIALIZED;
+ else if (!strcmp("UNDEFINED", value)) status = RRDCALC_STATUS_UNDEFINED;
+ else if (!strcmp("REMOVED", value)) status = RRDCALC_STATUS_REMOVED;
+ else if (!strcmp("CLEAR", value)) status = RRDCALC_STATUS_CLEAR;
+ }
+ else if(!strcmp(name, "context") || !strcmp(name, "ctx")) {
+ if(!contexts) contexts = buffer_create(255);
+ buffer_strcat(contexts, "|");
+ buffer_strcat(contexts, value);
+ }
+ }
+
+ health_aggregate_alarms(host, w->response.data, contexts, status);
+
+ buffer_sprintf(w->response.data, "]\n");
+ w->response.data->contenttype = CT_APPLICATION_JSON;
+ buffer_no_cacheable(w->response.data);
+
+ buffer_free(contexts);
+ return 200;
+}
+
inline int web_client_api_request_v1_alarm_log(RRDHOST *host, struct web_client *w, char *url) {
uint32_t after = 0;
@@ -780,6 +824,7 @@ static struct api_command {
{ "alarms", 0, WEB_CLIENT_ACL_DASHBOARD, web_client_api_request_v1_alarms },
{ "alarm_log", 0, WEB_CLIENT_ACL_DASHBOARD, web_client_api_request_v1_alarm_log },
{ "alarm_variables", 0, WEB_CLIENT_ACL_DASHBOARD, web_client_api_request_v1_alarm_variables },
+ { "alarm_count", 0, WEB_CLIENT_ACL_DASHBOARD, web_client_api_request_v1_alarm_count },
{ "allmetrics", 0, WEB_CLIENT_ACL_DASHBOARD, web_client_api_request_v1_allmetrics },
{ "manage/health", 0, WEB_CLIENT_ACL_MGMT, web_client_api_request_v1_mgmt_health },
// terminator
diff --git a/web/api/web_api_v1.h b/web/api/web_api_v1.h
index 70b7817804..3edb47e3c7 100644
--- a/web/api/web_api_v1.h
+++ b/web/api/web_api_v1.h
@@ -16,6 +16,7 @@ extern int web_client_api_request_v1_alarms(RRDHOST *host, struct web_client *w,
extern int web_client_api_request_v1_alarm_log(RRDHOST *host, struct web_client *w, char *url);
extern int web_client_api_request_single_chart(RRDHOST *host, struct web_client *w, char *url, void callback(RRDSET *st, BUFFER *buf));
extern int web_client_api_request_v1_alarm_variables(RRDHOST *host, struct web_client *w, char *url);
+extern int web_client_api_request_v1_alarm_count(RRDHOST *host, struct web_client *w, char *url);
extern int web_client_api_request_v1_charts(RRDHOST *host, struct web_client *w, char *url);
extern int web_client_api_request_v1_chart(RRDHOST *host, struct web_client *w, char *url);
extern int web_client_api_request_v1_data(RRDHOST *host, struct web_client *w, char *url);