summaryrefslogtreecommitdiffstats
path: root/claim
diff options
context:
space:
mode:
authorMarkos Fountoulakis <44345837+mfundul@users.noreply.github.com>2019-12-19 11:01:59 +0200
committerAndrew Moss <1043609+amoss@users.noreply.github.com>2019-12-19 10:01:59 +0100
commitce9f70d7b3696a54f099628f3337f5c4d5bd1407 (patch)
treed8f0e7bbdd1a070c2dcf619e62c6e5ff4faaccfc /claim
parentc1436efd00c5b9afdb3779585eb194ea8388a932 (diff)
Agent claiming (#7525)
Initial infrastructure support for agent claiming. This feature is not currently enabled as we are still finalizing the details of the cloud infrastructure w.r.t. agent claiming. The feature will be enabled when we are ready to release it.
Diffstat (limited to 'claim')
-rw-r--r--claim/Makefile.am21
-rw-r--r--claim/README.md72
-rw-r--r--claim/claim.c103
-rw-r--r--claim/claim.h14
-rwxr-xr-xclaim/netdata-claim.sh.in216
5 files changed, 426 insertions, 0 deletions
diff --git a/claim/Makefile.am b/claim/Makefile.am
new file mode 100644
index 0000000000..c838db9b66
--- /dev/null
+++ b/claim/Makefile.am
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+AUTOMAKE_OPTIONS = subdir-objects
+MAINTAINERCLEANFILES = $(srcdir)/Makefile.in
+
+CLEANFILES = \
+ netdata-claim.sh \
+ $(NULL)
+
+include $(top_srcdir)/build/subst.inc
+SUFFIXES = .in
+
+sbin_SCRIPTS = \
+ netdata-claim.sh \
+ $(NULL)
+
+dist_noinst_DATA = \
+ netdata-claim.sh.in \
+ README.md \
+ $(NULL)
+
diff --git a/claim/README.md b/claim/README.md
new file mode 100644
index 0000000000..05de636155
--- /dev/null
+++ b/claim/README.md
@@ -0,0 +1,72 @@
+# Agent claiming
+
+Agent claiming is part of the onboarding process when creating a workspace in Netdata Cloud. Each workspace gets its own
+common invitation mechanism, which begins with the administrators of the workspace creating a **claiming-token**. They,
+or other users is their organization, can then use the claiming-token to add an agent to their workspace.
+
+To claim a Netdata agent, you first send a claiming request to Netdata Cloud (from the agent node). Once the
+Netdata Cloud validates the claiming request of the agent (based on the claiming token), and returns a successful
+result, the node is considered claimed.
+
+## Claiming script
+
+The user can claim an agent by directly calling the `netdata-claim.sh` script **as the netdata user** and passing the
+following arguments:
+
+```sh
+-token=TOKEN
+ where TOKEN is the workspace claiming-token.
+-rooms=ROOM1,ROOM2,...
+ where ROOMX is the workspace war-room to join. This list is optional.
+-url=URL_BASE
+ where URL_BASE is the Netdata Cloud endpoint base URL. By default, this is https://netdata.cloud.
+-id=AGENT_ID
+ where AGENT_ID is the unique identifier of the agent. This is the agent's MACHINE_GUID by default.
+-hostname=HOSTNAME
+ where HOSTNAME is the result of the hostname command by default.
+```
+
+For example, the following command claims an agent and adds it to rooms `room1` and `room2`:
+
+```sh
+netdata-claim.sh -token=MYTOKEN1234567 -rooms=room1,room2
+```
+
+You should then update the `netdata` service about the result with `netdatacli`:
+
+```sh
+netdatacli reload-claiming-state
+```
+
+This reloads the agent claiming state from disk.
+
+## Netdata agent command line
+
+The user can trigger agent claiming by calling the `netdata` service binary with the additional command line parameters:
+
+```sh
+-W "claim -token=TOKEN -rooms=ROOM1,ROOM2"
+```
+
+For example:
+
+```sh
+/usr/sbin/netdata -D -W "claim -token=MYTOKEN1234567 -rooms=room1,room2"
+```
+
+If need be, the user can override the agent's defaults by providing additional arguments like those described
+[here](#claiming-script).
+
+## Claiming directory
+
+Netdata stores the agent claiming-related state in the user configuration directory under `claim.d`, e.g. in
+`/etc/netdata/claim.d`. The user can put files in this directory to provide defaults to the `-token` and `-rooms`
+arguments. These files should be owned **by the `netdata` user**.
+
+The `claim.d/token` file should contain the claiming-token and the `claim.d/rooms` file should contain the list of
+war-rooms.
+
+The user can also put the Cloud endpoint's full certificate chain in `claim.d/cloud_fullchain.pem` so that the agent
+can trust the endpoint if necessary.
+
+[![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fclaim%2FREADME&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)](<>)
diff --git a/claim/claim.c b/claim/claim.c
new file mode 100644
index 0000000000..75f0a437d6
--- /dev/null
+++ b/claim/claim.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "claim.h"
+#include "../registry/registry_internals.h"
+
+char *claiming_pending_arguments = NULL;
+
+static char *claiming_errors[] = {
+ "Agent claimed successfully", // 0
+ "Unknown argument", // 1
+ "Problems with claiming working directory", // 2
+ "Missing dependencies", // 3
+ "Failure to connect to endpoint", // 4
+ "Unknown HTTP error message", // 5
+ "invalid agent id", // 6
+ "invalid public key", // 7
+ "token has expired", // 8
+ "invalid token", // 9
+ "duplicate agent id", // 10
+ "claimed in another workspace", // 11
+ "internal server error" // 12
+};
+
+#define AGENT_UNCLAIMED 0
+#define AGENT_CLAIMED 1
+static uint8_t claiming_status = AGENT_UNCLAIMED;
+
+uint8_t is_agent_claimed(void)
+{
+ return (AGENT_CLAIMED == claiming_status);
+}
+
+#define CLAIMING_COMMAND_LENGTH 16384
+
+extern struct registry registry;
+
+/* rrd_init() must have been called before this function */
+void claim_agent(char *claiming_arguments)
+{
+ info("The claiming feature is under development and still subject to change before the next release");
+ return;
+
+ int exit_code;
+ pid_t command_pid;
+ char command_buffer[CLAIMING_COMMAND_LENGTH + 1];
+ FILE *fp;
+
+ snprintfz(command_buffer,
+ CLAIMING_COMMAND_LENGTH,
+ "exec netdata-claim.sh -hostname=%s -id=%s -url=%s %s",
+ netdata_configured_hostname,
+ localhost->machine_guid,
+ registry.cloud_base_url,
+ claiming_arguments);
+
+ info("Executing agent claiming command 'netdata-claim.sh'");
+ fp = mypopen(command_buffer, &command_pid);
+ if(!fp) {
+ error("Cannot popen(\"%s\").", command_buffer);
+ return;
+ }
+ info("Waiting for claiming command to finish.");
+ while (fgets(command_buffer, CLAIMING_COMMAND_LENGTH, fp) != NULL) {;}
+ exit_code = mypclose(fp, command_pid);
+ info("Agent claiming command returned with code %d", exit_code);
+ if (0 == exit_code) {
+ claiming_status = AGENT_CLAIMED;
+ info("Agent successfully claimed.");
+ return;
+ }
+ if (exit_code < 0) {
+ error("Agent claiming command failed to complete its run.");
+ return;
+ }
+ errno = 0;
+ unsigned maximum_known_exit_code = sizeof(claiming_errors) / sizeof(claiming_errors[0]);
+
+ if ((unsigned)exit_code > maximum_known_exit_code) {
+ error("Agent failed to be claimed with an unknown error.");
+ return;
+ }
+ error("Agent failed to be claimed with the following error message:");
+ error("\"%s\"", claiming_errors[exit_code]);
+}
+
+void load_claiming_state(void)
+{
+ info("The claiming feature is under development and still subject to change before the next release");
+ return;
+
+ char filename[FILENAME_MAX + 1];
+ struct stat statbuf;
+
+ snprintfz(filename, FILENAME_MAX, "%s/claim.d/is_claimed", netdata_configured_user_config_dir);
+ // check if the file exists
+ if (lstat(filename, &statbuf) != 0) {
+ info("File '%s' was not found. Setting state to AGENT_UNCLAIMED.", filename);
+ claiming_status = AGENT_UNCLAIMED;
+ } else {
+ info("File '%s' was found. Setting state to AGENT_CLAIMED.", filename);
+ claiming_status = AGENT_CLAIMED;
+ }
+}
diff --git a/claim/claim.h b/claim/claim.h
new file mode 100644
index 0000000000..3b2b867434
--- /dev/null
+++ b/claim/claim.h
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#ifndef NETDATA_CLAIM_H
+#define NETDATA_CLAIM_H 1
+
+#include "../daemon/common.h"
+
+extern char *claiming_pending_arguments;
+
+void claim_agent(char *claiming_arguments);
+uint8_t is_agent_claimed(void);
+void load_claiming_state(void);
+
+#endif //NETDATA_CLAIM_H
diff --git a/claim/netdata-claim.sh.in b/claim/netdata-claim.sh.in
new file mode 100755
index 0000000000..e565e3de28
--- /dev/null
+++ b/claim/netdata-claim.sh.in
@@ -0,0 +1,216 @@
+#!/usr/bin/env bash
+# netdata
+# real-time performance and health monitoring, done right!
+# (C) 2017 Costa Tsaousis <costa@tsaousis.gr>
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# Exit code: 0 - Success
+# Exit code: 1 - Unknown argument
+# Exit code: 2 - Problems with claiming working directory
+# Exit code: 3 - Missing dependencies
+# Exit code: 4 - Failure to connect to endpoint
+# Exit code: 5 - Unknown HTTP error message
+#
+# OK: Agent claimed successfully
+# HTTP Status code: 200
+# Exit code: 0
+#
+# Error: The agent id is invalid; it does not fulfill the constraints
+# HTTP Status code: 422
+# Error message: "invalid agent id"
+# Exit code: 6
+#
+# Error: Invalid public key; the public key is empty or not present
+# HTTP Status code: 422
+# Error message: "invalid public key"
+# Exit code: 7
+#
+# Error: Expired token
+# HTTP Status code: 403
+# Error message: "token has expired"
+# Exit code: 8
+#
+# Error: Invalid claiming token; missing, undecryptable, invalid payload...
+# HTTP Status code: 422
+# Error message: "invalid token"
+# Exit code: 9
+#
+# Error: Duplicate agent id; an agent with the same id but a different public key is already registered in the cloud
+# HTTP Status code: 409
+# Error message: "duplicate agent id"
+# Exit code: 10
+#
+# Error: Already claimed in another workspace;
+# this agent (same id, same public key) already belongs to another workspace
+# HTTP Status code: 403
+# Error message: "claimed in another workspace"
+# Exit code: 11
+#
+# Error: Internal server error. Any other unexpected error (DB problems, etc.)
+# HTTP Status code: 500
+# Error message: "internal server error"
+# Exit code: 12
+
+if command -v curl >/dev/null 2>&1 ; then
+ URLTOOL="curl"
+elif command -v wget >/dev/null 2>&1 ; then
+ URLTOOL="wget"
+else
+ echo >&2 "I need curl or wget to proceed, but neither is available on this system."
+ exit 3
+fi
+if ! command -v openssl >/dev/null 2>&1 ; then
+ echo >&2 "I need openssl to proceed, but neither is available on this system."
+ exit 3
+fi
+
+
+# -----------------------------------------------------------------------------
+# defaults to allow running this script by hand
+
+[ -z "${NETDATA_USER_CONFIG_DIR}" ] && NETDATA_USER_CONFIG_DIR="@configdir_POST@"
+MACHINE_GUID_FILE="@registrydir_POST@/netdata.public.unique.id"
+CLAIMING_DIR="${NETDATA_USER_CONFIG_DIR}/claim.d"
+TOKEN="unknown"
+URL_BASE="https://netdata.cloud"
+ID="unknown"
+ROOMS=""
+HOSTNAME=$(hostname)
+CLOUD_CERTIFICATE_FILE="${CLAIMING_DIR}/cloud_fullchain.pem"
+
+# get the MACHINE_GUID by default
+if [ -r "${MACHINE_GUID_FILE}" ]; then
+ ID="$(cat "${MACHINE_GUID_FILE}")"
+fi
+
+# get token from file
+if [ -r "${CLAIMING_DIR}/token" ]; then
+ TOKEN="$(cat "${CLAIMING_DIR}/token")"
+fi
+
+# get rooms from file
+if [ -r "${CLAIMING_DIR}/rooms" ]; then
+ ROOMS="$(cat "${CLAIMING_DIR}/rooms")"
+fi
+
+for arg in "$@"
+do
+ case $arg in
+ -token=*) TOKEN=${arg:7} ;;
+ -url=*) URL_BASE=${arg:5} ;;
+ -id=*) ID=${arg:4} ;;
+ -rooms=*) ROOMS=${arg:7} ;;
+ -hostname=*) HOSTNAME=${arg:10} ;;
+ *) echo >&2 "Unknown argument ${arg}"
+ exit 1 ;;
+ esac
+ shift 1
+done
+
+echo >&2 "Token: ****************"
+echo >&2 "Base URL: $URL_BASE"
+echo >&2 "Id: $ID"
+echo >&2 "Rooms: $ROOMS"
+echo >&2 "Hostname: $HOSTNAME"
+
+# create the claiming directory for this user
+if [ ! -d "${CLAIMING_DIR}" ] ; then
+ mkdir -p "${CLAIMING_DIR}" && chmod 0770 "${CLAIMING_DIR}"
+# shellcheck disable=SC2181
+ if [ $? -ne 0 ] ; then
+ echo >&2 "Failed to create claiming working directory ${CLAIMING_DIR}"
+ exit 2
+ fi
+fi
+if [ ! -w "${CLAIMING_DIR}" ] ; then
+ echo >&2 "No write permission in claiming working directory ${CLAIMING_DIR}"
+ exit 2
+fi
+
+if [ ! -f "${CLAIMING_DIR}/private.pem" ] ; then
+ echo >&2 "Generating private/public key for the first time."
+ if ! openssl genrsa -out "${CLAIMING_DIR}/private.pem" 2048 ; then
+ echo >&2 "Failed to generate private/public key pair."
+ exit 2
+ fi
+fi
+if [ ! -f "${CLAIMING_DIR}/public.pem" ] ; then
+ echo >&2 "Extracting public key from private key."
+ if ! openssl rsa -in "${CLAIMING_DIR}/private.pem" -outform PEM -pubout -out "${CLAIMING_DIR}/public.pem" ; then
+ echo >&2 "Failed to extract public key."
+ exit 2
+ fi
+fi
+
+TARGET_URL="${URL_BASE}/api/v1/workspaces/agents/${ID}"
+# shellcheck disable=SC2002
+KEY=$(cat "${CLAIMING_DIR}/public.pem" | tr '\n' '!' | sed -e 's/!/\\n/g')
+# shellcheck disable=SC2001
+[ -n "$ROOMS" ] && ROOMS=\"$(echo "$ROOMS" | sed s'/,/", "/g')\"
+
+cat > "${CLAIMING_DIR}/tmpin.txt" <<EMBED_JSON
+{
+ "agent": {
+ "id": "$ID",
+ "hostname": "$HOSTNAME"
+ },
+ "token": "$TOKEN",
+ "rooms" : [ $ROOMS ],
+ "publicKey" : "$KEY"
+}
+EMBED_JSON
+
+
+if [ "${URLTOOL}" = "curl" ] ; then
+ URLCOMMAND="curl --connect-timeout 5 --retry 3 -s -i -X PUT -d \"@${CLAIMING_DIR}/tmpin.txt\""
+else
+ URLCOMMAND="wget -T 15 -O - -q --save-headers --content-on-error=on --method=PUT \
+ --body-file=\"${CLAIMING_DIR}/tmpin.txt\""
+fi
+
+if [ -r "${CLOUD_CERTIFICATE_FILE}" ] ; then
+ if [ "${URLTOOL}" = "curl" ] ; then
+ URLCOMMAND="${URLCOMMAND} --cacert \"${CLOUD_CERTIFICATE_FILE}\""
+ else
+ URLCOMMAND="${URLCOMMAND} --ca-certificate \"${CLOUD_CERTIFICATE_FILE}\""
+ fi
+fi
+
+eval "${URLCOMMAND} \"${TARGET_URL}\"" | tee "${CLAIMING_DIR}/tmpout.txt"
+URLCOMMAND_EXIT_CODE=$?
+if [ "${URLTOOL}" = "wget" ] && [ "${URLCOMMAND_EXIT_CODE}" -eq 8 ] ; then
+# We consider the server issuing an error response a successful attempt at communicating
+ URLCOMMAND_EXIT_CODE=0
+fi
+
+rm -f "${CLAIMING_DIR}/tmpin.txt"
+
+# Check if URLCOMMAND connected and received reply
+if [ "${URLCOMMAND_EXIT_CODE}" -ne 0 ] ; then
+ echo >&2 "Failed to connect to ${URL_BASE}"
+ rm -f "${CLAIMING_DIR}/tmpout.txt"
+ exit 4
+fi
+
+HTTP_STATUS_CODE=$(grep "HTTP" "${CLAIMING_DIR}/tmpout.txt" | awk -F " " '{print $2}')
+if [ "${HTTP_STATUS_CODE}" -ne 200 ] ; then
+ ERROR_MESSAGE=$(grep "\"error\":" "${CLAIMING_DIR}/tmpout.txt" | awk -F "error\":\"" '{print $2}' | sed s'/"}//g')
+ case ${ERROR_MESSAGE} in
+ "invalid agent id") EXIT_CODE=6 ;;
+ "invalid public key") EXIT_CODE=7 ;;
+ "token has expired") EXIT_CODE=8 ;;
+ "invalid token") EXIT_CODE=9 ;;
+ "duplicate agent id") EXIT_CODE=10 ;;
+ "claimed in another workspace") EXIT_CODE=11 ;;
+ "internal server error") EXIT_CODE=12 ;;
+ *) EXIT_CODE=5 ;;
+ esac
+ echo >&2 "Failed to claim agent."
+ rm -f "${CLAIMING_DIR}/tmpout.txt"
+ exit $EXIT_CODE
+fi
+
+rm -f "${CLAIMING_DIR}/tmpout.txt"
+touch "${CLAIMING_DIR}/is_claimed"
+rm -f "${CLAIMING_DIR}/token"
+echo >&2 "Agent was successfully claimed." \ No newline at end of file