summaryrefslogtreecommitdiffstats
path: root/packaging
diff options
context:
space:
mode:
authorAustin S. Hemmelgarn <austin@netdata.cloud>2020-01-07 06:23:00 -0500
committerGitHub <noreply@github.com>2020-01-07 06:23:00 -0500
commit42a72f61dbd6f66bc9cf9fa9e273cc47d466491b (patch)
treee7c632800577d80c172399acf03e5a3076689228 /packaging
parentc0ae9b32f47a1af872af1888f53dedfda8a08df7 (diff)
Update handling of shutdown of the Netdata agent on update and uninstall. (#7595)
* Make the agent killing logic more reliable. This adjusts how we handle terminating the agent during an upgrade so that it does a better job of actually ensuring it's dead. The new logic works as follows: * After each signal is sent, we wait 5 seconds before checking for the agent again. * If it's been 60 seconds (12 cycles) since we started trying to kill the PID, we switch from SIGTERM to sending SIGKILL. * If it's been 120 seconds (24 cycles) since we started trying to kill the PID, we just give up. * Attempt to use netdatacli to shutdown Netdata. This is far more reliable than any other approach we have if it works. * Update uninstaller to use same stop logic as installer. This improves it's reliability significantly and makes it work correctly on FreeBSD. * Fix codacy issues.
Diffstat (limited to 'packaging')
-rw-r--r--packaging/installer/functions.sh26
-rwxr-xr-xpackaging/installer/netdata-uninstaller.sh105
2 files changed, 105 insertions, 26 deletions
diff --git a/packaging/installer/functions.sh b/packaging/installer/functions.sh
index d2b7b67616..1c1f556dea 100644
--- a/packaging/installer/functions.sh
+++ b/packaging/installer/functions.sh
@@ -424,7 +424,7 @@ install_netdata_service() {
pidisnetdata() {
if [ -d /proc/self ]; then
[ -z "$1" -o ! -f "/proc/$1/stat" ] && return 1
- [ "$(cat "/proc/$1/stat" | cut -d '(' -f 2 | cut -d ')' -f 1)" = "netdata" ] && return 0
+ [ "$(cut -d '(' -f 2 "/proc/$1/stat" | cut -d ')' -f 1)" = "netdata" ] && return 0
return 1
fi
return 0
@@ -437,17 +437,27 @@ stop_netdata_on_pid() {
printf >&2 "Stopping netdata on pid %s ..." "${pid}"
while [ -n "$pid" ] && [ ${ret} -eq 0 ]; do
- if [ ${count} -gt 45 ]; then
+ if [ ${count} -gt 24 ]; then
echo >&2 "Cannot stop the running netdata on pid ${pid}."
return 1
fi
count=$((count + 1))
- run kill "${pid}" 2>/dev/null
- ret=$?
+ pidisnetdata "${pid}" || ret=1
+ if [ ${ret} -eq 1 ] ; then
+ break
+ fi
+
+ if [ ${count} -lt 12 ] ; then
+ run kill "${pid}" 2>/dev/null
+ ret=$?
+ else
+ run kill -9 "${pid}" 2>/dev/null
+ ret=$?
+ fi
- test ${ret} -eq 0 && printf >&2 "." && sleep 2
+ test ${ret} -eq 0 && printf >&2 "." && sleep 5
done
@@ -480,6 +490,12 @@ netdata_pids() {
stop_all_netdata() {
local p
+
+ if [ -n $(netdata_pids) -a -n "$(builtin type -P netdatacli)" ] ; then
+ netdatacli shutdown-agent
+ sleep 20
+ fi
+
for p in $(netdata_pids); do
# shellcheck disable=SC2086
stop_netdata_on_pid ${p}
diff --git a/packaging/installer/netdata-uninstaller.sh b/packaging/installer/netdata-uninstaller.sh
index dc3a0eabb0..8925c123bf 100755
--- a/packaging/installer/netdata-uninstaller.sh
+++ b/packaging/installer/netdata-uninstaller.sh
@@ -258,25 +258,101 @@ rm_dir() {
fi
}
+safe_pidof() {
+ local pidof_cmd="$(command -v pidof 2>/dev/null)"
+ if [ -n "${pidof_cmd}" ]; then
+ ${pidof_cmd} "${@}"
+ return $?
+ else
+ ps -acxo pid,comm |
+ sed "s/^ *//g" |
+ grep netdata |
+ cut -d ' ' -f 1
+ return $?
+ fi
+}
+
+pidisnetdata() {
+ if [ -d /proc/self ]; then
+ [ -z "$1" -o ! -f "/proc/$1/stat" ] && return 1
+ [ "$(cut -d '(' -f 2 "/proc/$1/stat" | cut -d ')' -f 1)" = "netdata" ] && return 0
+ return 1
+ fi
+ return 0
+}
+
+stop_netdata_on_pid() {
+ local pid="${1}" ret=0 count=0
+
+ pidisnetdata "${pid}" || return 0
+
+ printf >&2 "Stopping netdata on pid %s ..." "${pid}"
+ while [ -n "$pid" ] && [ ${ret} -eq 0 ]; do
+ if [ ${count} -gt 24 ]; then
+ echo >&2 "Cannot stop the running netdata on pid ${pid}."
+ return 1
+ fi
+
+ count=$((count + 1))
+
+ pidisnetdata "${pid}" || ret=1
+ if [ ${ret} -eq 1 ] ; then
+ break
+ fi
+
+ if [ ${count} -lt 12 ] ; then
+ run kill "${pid}" 2>/dev/null
+ ret=$?
+ else
+ run kill -9 "${pid}" 2>/dev/null
+ ret=$?
+ fi
+
+ test ${ret} -eq 0 && printf >&2 "." && sleep 5
+
+ done
+
+ echo >&2
+ if [ ${ret} -eq 0 ]; then
+ echo >&2 "SORRY! CANNOT STOP netdata ON PID ${pid} !"
+ return 1
+ fi
+
+ echo >&2 "netdata on pid ${pid} stopped."
+ return 0
+}
+
netdata_pids() {
local p myns ns
+
myns="$(readlink /proc/self/ns/pid 2>/dev/null)"
+
for p in \
$(cat /var/run/netdata.pid 2>/dev/null) \
$(cat /var/run/netdata/netdata.pid 2>/dev/null) \
- $(pidof netdata 2>/dev/null); do
-
+ $(safe_pidof netdata 2>/dev/null); do
ns="$(readlink "/proc/${p}/ns/pid" 2>/dev/null)"
- #shellcheck disable=SC2002
+
if [ -z "${myns}" ] || [ -z "${ns}" ] || [ "${myns}" = "${ns}" ]; then
- name="$(cat "/proc/${p}/stat" 2>/dev/null | cut -d '(' -f 2 | cut -d ')' -f 1)"
- if [ "${name}" = "netdata" ]; then
- echo "${p}"
- fi
+ pidisnetdata "${p}" && echo "${p}"
fi
done
}
+stop_all_netdata() {
+ local p
+
+ if [ -n $(netdata_pids) -a -n "$(builtin type -P netdatacli)" ] ; then
+ netdatacli shutdown-agent
+ sleep 20
+ fi
+
+ for p in $(netdata_pids); do
+ # shellcheck disable=SC2086
+ stop_netdata_on_pid ${p}
+ done
+}
+
trap quit_msg EXIT
#shellcheck source=/dev/null
@@ -284,20 +360,7 @@ source "${ENVIRONMENT_FILE}" || exit 1
#### STOP NETDATA
echo >&2 "Stopping a possibly running netdata..."
-for p in $(netdata_pids); do
- i=0
- while kill "${p}" 2>/dev/null; do
- if [ "$i" -gt 30 ]; then
- echo >&2 "Forcefully stopping netdata with pid ${p}"
- run kill -9 "${p}"
- run sleep 2
- break
- fi
- sleep 1
- i=$((i + 1))
- done
-done
-sleep 2
+stop_all_netdata
#### REMOVE NETDATA FILES
rm_file /etc/logrotate.d/netdata