summaryrefslogtreecommitdiffstats
path: root/claim/netdata-claim.sh.in
diff options
context:
space:
mode:
authorAndrew Moss <1043609+amoss@users.noreply.github.com>2020-05-11 08:34:29 +0200
committerJames Mills <prologic@shortcircuit.net.au>2020-05-11 16:37:27 +1000
commitaa3ec552c896aebafd03b9d2c1864272dcb34749 (patch)
tree02f7cd95ed84d888c27fb4bfb55df2b251b97b7b /claim/netdata-claim.sh.in
parentfd05e1d87751ecaa45ebd3aed2499435b1627cea (diff)
Enable support for Netdata Cloud.
This PR merges the feature-branch to make the cloud live. It contains the following work: Co-authored-by: Andrew Moss <1043609+amoss@users.noreply.github.com(opens in new tab)> Co-authored-by: Jacek Kolasa <jacek.kolasa@gmail.com(opens in new tab)> Co-authored-by: Austin S. Hemmelgarn <austin@netdata.cloud(opens in new tab)> Co-authored-by: James Mills <prologic@shortcircuit.net.au(opens in new tab)> Co-authored-by: Markos Fountoulakis <44345837+mfundul@users.noreply.github.com(opens in new tab)> Co-authored-by: Timotej S <6674623+underhood@users.noreply.github.com(opens in new tab)> Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com(opens in new tab)> * dashboard with new navbars, v1.0-alpha.9: PR #8478 * dashboard v1.0.11: netdata/dashboard#76 Co-authored-by: Jacek Kolasa <jacek.kolasa@gmail.com(opens in new tab)> * Added installer code to bundle JSON-c if it's not present. PR #8836 Co-authored-by: James Mills <prologic@shortcircuit.net.au(opens in new tab)> * Fix claiming config PR #8843 * Adds JSON-c as hard dep. for ACLK PR #8838 * Fix SSL renegotiation errors in old versions of openssl. PR #8840. Also - we have a transient problem with opensuse CI so this PR disables them with a commit from @prologic. Co-authored-by: James Mills <prologic@shortcircuit.net.au(opens in new tab)> * Fix claiming error handling PR #8850 * Added CI to verify JSON-C bundling code in installer PR #8853 * Make cloud-enabled flag in web/api/v1/info be independent of ACLK build success PR #8866 * Reduce ACLK_STABLE_TIMEOUT from 10 to 3 seconds PR #8871 * remove old-cloud related UI from old dashboard (accessible now via /old suffix) PR #8858 * dashboard v1.0.13 PR #8870 * dashboard v1.0.14 PR #8904 * Provide feedback on proxy setting changes PR #8895 * Change the name of the connect message to update during an ongoing session PR #8927 * Fetch active alarms from alarm_log PR #8944
Diffstat (limited to 'claim/netdata-claim.sh.in')
-rwxr-xr-xclaim/netdata-claim.sh.in200
1 files changed, 130 insertions, 70 deletions
diff --git a/claim/netdata-claim.sh.in b/claim/netdata-claim.sh.in
index 50ee660d37..098b1a216d 100755
--- a/claim/netdata-claim.sh.in
+++ b/claim/netdata-claim.sh.in
@@ -9,74 +9,92 @@
# Exit code: 2 - Problems with claiming working directory
# Exit code: 3 - Missing dependencies
# Exit code: 4 - Failure to connect to endpoint
-# Exit code: 5 - Unknown HTTP error message
-# Exit code: 6 - The CLI didn't work
-# Exit code: 7 - Wrong user
+# Exit code: 5 - The CLI didn't work
+# Exit code: 6 - Wrong user
+# Exit code: 7 - Unknown HTTP error message
#
# OK: Agent claimed successfully
# HTTP Status code: 204
# Exit code: 0
#
+# Unknown HTTP error message
+# HTTP Status code: 422
+# Exit code: 7
+ERROR_KEYS[7]="None"
+ERROR_MESSAGES[7]="Unknown HTTP error message"
+
# Error: The agent id is invalid; it does not fulfill the constraints
# HTTP Status code: 422
-# Error key: "ErrInvalidNodeID"
-# Error message: "invalid node id"
-# Exit code: 6
+# Exit code: 8
+ERROR_KEYS[8]="ErrInvalidNodeID"
+ERROR_MESSAGES[8]="invalid node id"
# Error: The agent hostname is invalid; it does not fulfill the constraints
# HTTP Status code: 422
-# Error key: "ErrInvalidNodeName"
-# Error message: "invalid node name"
-# Exit code: 7
-#
+# Exit code: 9
+ERROR_KEYS[9]="ErrInvalidNodeName"
+ERROR_MESSAGES[9]="invalid node name"
+
# Error: At least one of the given rooms ids is invalid; it does not fulfill the constraints
# HTTP Status code: 422
-# Error key: "ErrInvalidRoomID"
-# Error message: "invalid room id"
-# Exit code: 8
-#
+# Exit code: 10
+ERROR_KEYS[10]="ErrInvalidRoomID"
+ERROR_MESSAGES[10]="invalid room id"
+
# Error: Invalid public key; the public key is empty or not present
# HTTP Status code: 422
-# Error key: "ErrInvalidPublicKey"
-# Error message: "invalid public key"
-# Exit code: 9
+# Exit code: 11
+ERROR_KEYS[11]="ErrInvalidPublicKey"
+ERROR_MESSAGES[11]="invalid public key"
#
# Error: Expired, missing or invalid token
# HTTP Status code: 403
-# Error key: "ErrForbidden"
-# Error message: "token expired" | "token not found" | "invalid token"
-# Exit code: 10
-#
+# Exit code: 12
+ERROR_KEYS[12]="ErrForbidden"
+ERROR_MESSAGES[12]="token expired/token not found/invalid token"
+
# Error: Duplicate agent id; an agent with the same id is already registered in the cloud
# HTTP Status code: 409
-# Error key: "ErrAlreadyClaimed"
-# Error message: "already claimed"
-# Exit code: 11
-#
+# Exit code: 13
+ERROR_KEYS[13]="ErrAlreadyClaimed"
+ERROR_MESSAGES[13]="already claimed"
+
# Error: The node claiming process is still in progress.
# HTTP Status code: 102
-# Error key: "ErrProcessingClaim"
-# Error message: "processing claiming"
-# Exit code: 12
-#
+# Exit code: 14
+ERROR_KEYS[14]="ErrProcessingClaim"
+ERROR_MESSAGES[14]="processing claiming"
+
# Error: Internal server error. Any other unexpected error (DB problems, etc.)
# HTTP Status code: 500
-# Error key: "ErrInternalServerError"
-# Error message: "Internal Server Error"
-# Exit code: 13
-#
+# Exit code: 15
+ERROR_KEYS[15]="ErrInternalServerError"
+ERROR_MESSAGES[15]="Internal Server Error"
+
# Error: There was a timout processing the claim.
# HTTP Status code: 504
-# Error key: "ErrGatewayTimeout"
-# Error message: "Gateway Timeout"
-# Exit code: 14
-#
+# Exit code: 16
+ERROR_KEYS[16]="ErrGatewayTimeout"
+ERROR_MESSAGES[16]="Gateway Timeout"
+
# Error: The service cannot handle the claiming request at this time.
# HTTP Status code: 503
-# Error key: "ErrServiceUnavailable"
-# Error message: "Service Unavailable"
-# Exit code: 15
+# Exit code: 17
+ERROR_KEYS[17]="ErrServiceUnavailable"
+ERROR_MESSAGES[17]="Service Unavailable"
+get_config_value() {
+ conf_file="${1}"
+ section="${2}"
+ key_name="${3}"
+ config_result=$(@sbindir_POST@/netdatacli 2>/dev/null read-config "$conf_file|$section|$key_name"; exit $?)
+ # shellcheck disable=SC2181
+ if [ "$?" != "0" ]; then
+ echo >&2 "cli failed, assume netdata is not running and query the on-disk config"
+ config_result=$(@sbindir_POST@/netdata 2>/dev/null -W get2 "$conf_file" "$section" "$key_name" unknown_default)
+ fi
+ echo "$config_result"
+}
if command -v curl >/dev/null 2>&1 ; then
URLTOOL="curl"
elif command -v wget >/dev/null 2>&1 ; then
@@ -90,15 +108,26 @@ if ! command -v openssl >/dev/null 2>&1 ; then
exit 3
fi
+# shellcheck disable=SC2050
+if [ "@enable_cloud_POST@" = "no" ]; then
+ echo >&2 "This agent was built with --disable-cloud and cannot be claimed"
+ exit 3
+fi
+# shellcheck disable=SC2050
+if [ "@can_enable_aclk_POST@" != "yes" ]; then
+ echo >&2 "This agent was built without the dependencies for Cloud and cannot be claimed"
+ exit 3
+fi
# -----------------------------------------------------------------------------
# defaults to allow running this script by hand
-[ -z "${NETDATA_USER_CONFIG_DIR}" ] && NETDATA_USER_CONFIG_DIR="@configdir_POST@"
+[ -z "${NETDATA_VARLIB_DIR}" ] && NETDATA_VARLIB_DIR="@varlibdir_POST@"
MACHINE_GUID_FILE="@registrydir_POST@/netdata.public.unique.id"
-CLAIMING_DIR="${NETDATA_USER_CONFIG_DIR}/claim.d"
+CLAIMING_DIR="${NETDATA_VARLIB_DIR}/cloud.d"
TOKEN="unknown"
-URL_BASE="https://netdata.cloud"
+URL_BASE=$(get_config_value cloud global "cloud base url")
+[ -z "$URL_BASE" ] && URL_BASE="https://app.netdata.cloud" # Cover post-install with --dont-start
ID="unknown"
ROOMS=""
[ -z "$HOSTNAME" ] && HOSTNAME=$(hostname)
@@ -106,14 +135,9 @@ CLOUD_CERTIFICATE_FILE="${CLAIMING_DIR}/cloud_fullchain.pem"
VERBOSE=0
INSECURE=0
RELOAD=1
-NETDATA_USER=netdata
+NETDATA_USER=$(get_config_value netdata global "run as user")
[ -z "$EUID" ] && EUID="$(id -u)"
-CONF_USER=$(grep '^[ #]*run as user[ ]*=' "${NETDATA_USER_CONFIG_DIR}/netdata.conf" 2>/dev/null)
-if [ -n "$CONF_USER" ]; then
- NETDATA_USER=$(echo "$CONF_USER" | sed 's/^[^=]*=[ \t]*//' | sed 's/[ \t]*$//')
-fi
-
# get the MACHINE_GUID by default
if [ -r "${MACHINE_GUID_FILE}" ]; then
@@ -152,7 +176,7 @@ done
if [ "$EUID" != "0" ] && [ "$(whoami)" != "$NETDATA_USER" ]; then
echo >&2 "This script must be run by the $NETDATA_USER user account"
- exit 7
+ exit 6
fi
# if curl not installed give warning SOCKS can't be used
@@ -279,37 +303,73 @@ if [ "${VERBOSE}" == 1 ] ; then
cat "${CLAIMING_DIR}/tmpout.txt"
fi
-HTTP_STATUS_CODE=$(grep "HTTP" "${CLAIMING_DIR}/tmpout.txt" | awk -F " " '{print $2}')
+ERROR_KEY=$(grep "\"errorMsgKey\":" "${CLAIMING_DIR}/tmpout.txt" | awk -F "errorMsgKey\":\"" '{print $2}' | awk -F "\"" '{print $1}')
+case ${ERROR_KEY} in
+ "ErrInvalidNodeID") EXIT_CODE=8 ;;
+ "ErrInvalidNodeName") EXIT_CODE=9 ;;
+ "ErrInvalidRoomID") EXIT_CODE=10 ;;
+ "ErrInvalidPublicKey") EXIT_CODE=11 ;;
+ "ErrForbidden") EXIT_CODE=12 ;;
+ "ErrAlreadyClaimed") EXIT_CODE=13 ;;
+ "ErrProcessingClaim") EXIT_CODE=14 ;;
+ "ErrInternalServerError") EXIT_CODE=15 ;;
+ "ErrGatewayTimeout") EXIT_CODE=16 ;;
+ "ErrServiceUnavailable") EXIT_CODE=17 ;;
+ *) EXIT_CODE=7 ;;
+esac
+HTTP_STATUS_CODE=$(grep "HTTP" "${CLAIMING_DIR}/tmpout.txt" | awk -F " " '{print $2}')
if [ "${HTTP_STATUS_CODE}" = "204" ] ; then
+ EXIT_CODE=0
+fi
+
+if [ "${HTTP_STATUS_CODE}" = "204" ] || [ "${ERROR_KEY}" = "ErrAlreadyClaimed" ] ; then
rm -f "${CLAIMING_DIR}/tmpout.txt"
echo -n "${ID}" >"${CLAIMING_DIR}/claimed_id" || (echo >&2 "Claiming failed"; set -e; exit 2)
rm -f "${CLAIMING_DIR}/token" || (echo >&2 "Claiming failed"; set -e; exit 2)
+
+ # Rewrite the cloud.conf on the disk
+ cat > "$CLAIMING_DIR/cloud.conf" <<HERE_DOC
+[global]
+ enabled = yes
+ cloud base url = $URL_BASE
+HERE_DOC
if [ "$EUID" == "0" ]; then
chown -R "${NETDATA_USER}:${NETDATA_USER}" ${CLAIMING_DIR} || (echo >&2 "Claiming failed"; set -e; exit 2)
fi
if [ "${RELOAD}" == "0" ] ; then
- exit 0
+ exit $EXIT_CODE
+ fi
+
+ if [ -z "${PROXY}" ]; then
+ PROXYMSG=""
+ else
+ PROXYMSG="You have attempted to claim this node through a proxy - please update your the proxy setting in your netdata.conf to ${PROXY}. "
fi
- netdatacli reload-claiming-state && echo >&2 "Node was successfully claimed." && exit 0
- echo "The claim was successful but the agent could not be notified ($?)- it requires a restart to connect to the cloud"
- exit 6
+ # Update cloud.conf in the agent memory
+ @sbindir_POST@/netdatacli write-config 'cloud|global|enabled|yes' && \
+ @sbindir_POST@/netdatacli write-config "cloud|global|cloud base url|$URL_BASE" && \
+ @sbindir_POST@/netdatacli reload-claiming-state && \
+ if [ "${HTTP_STATUS_CODE}" = "204" ] ; then
+ echo >&2 "${PROXYMSG}Node was successfully claimed."
+ else
+ echo >&2 "The agent cloud base url is set to the url provided."
+ echo >&2 "The cloud may have different credentials already registered for this agent ID and it cannot be reclaimed under different credentials for security reasons. If you are unable to connect use -id=\$(uuidgen) to overwrite this agent ID with a fresh value if the original credentials cannot be restored."
+ echo >&2 "${PROXYMSG}Failed to claim node with the following error message:\"${ERROR_MESSAGES[$EXIT_CODE]}\""
+ fi && exit $EXIT_CODE
+
+ if [ "${ERROR_KEY}" = "ErrAlreadyClaimed" ] ; then
+ echo >&2 "The cloud may have different credentials already registered for this agent ID and it cannot be reclaimed under different credentials for security reasons. If you are unable to connect use -id=\$(uuidgen) to overwrite this agent ID with a fresh value if the original credentials cannot be restored."
+ echo >&2 "${PROXYMSG}Failed to claim node with the following error message:\"${ERROR_MESSAGES[$EXIT_CODE]}\""
+ exit $EXIT_CODE
+ fi
+ echo >&2 "${PROXYMSG}The claim was successful but the agent could not be notified ($?)- it requires a restart to connect to the cloud."
+ exit 5
fi
-ERROR_MESSAGE=$(grep "\"errorMsgKey\":" "${CLAIMING_DIR}/tmpout.txt" | awk -F "errorMsgKey\":\"" '{print $2}' | awk -F "\"" '{print $1}')
-case ${ERROR_MESSAGE} in
- "ErrInvalidNodeID") EXIT_CODE=6 ;;
- "ErrInvalidNodeName") EXIT_CODE=7 ;;
- "ErrInvalidRoomID") EXIT_CODE=8 ;;
- "ErrInvalidPublicKey") EXIT_CODE=9 ;;
- "ErrForbidden") EXIT_CODE=10 ;;
- "ErrAlreadyClaimed") EXIT_CODE=11 ;;
- "ErrProcessingClaim") EXIT_CODE=12 ;;
- "ErrInternalServerError") EXIT_CODE=13 ;;
- "ErrGatewayTimeout") EXIT_CODE=14 ;;
- "ErrServiceUnavailable") EXIT_CODE=15 ;;
- *) EXIT_CODE=5 ;;
-esac
-echo >&2 "Failed to claim node."
+echo >&2 "Failed to claim node with the following error message:\"${ERROR_MESSAGES[$EXIT_CODE]}\""
+if [ "${VERBOSE}" == 1 ]; then
+ echo >&2 "Error key was:\"${ERROR_KEYS[$EXIT_CODE]}\""
+fi
rm -f "${CLAIMING_DIR}/tmpout.txt"
exit $EXIT_CODE