diff options
author | PradeepKiruvale <pradeepkumar.kj@softwareag.com> | 2022-06-16 19:14:21 +0530 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-06-16 19:14:21 +0530 |
commit | b865681acc2a086b817eb98b12f6b63ae7cd7c3e (patch) | |
tree | 6a4730d05d6d07d70a0b96e925a7138439b28f73 | |
parent | 3d35d4915256b451dcc03678de55de3e3ac7e023 (diff) |
Enable systemd watchdog monitoring for tedge-watchdog daemon (#1196)
* Closes #1174, add watchdog to tedge watchdog
Signed-off-by: Pradeep Kumar K J <pradeepkumar.kj@softwareag.com>
-rw-r--r-- | configuration/init/systemd/tedge-watchdog.service | 3 | ||||
-rw-r--r-- | crates/core/tedge_watchdog/src/error.rs | 2 | ||||
-rw-r--r-- | crates/core/tedge_watchdog/src/systemd_watchdog.rs | 36 |
3 files changed, 38 insertions, 3 deletions
diff --git a/configuration/init/systemd/tedge-watchdog.service b/configuration/init/systemd/tedge-watchdog.service index 43db63fc..bcea2e94 100644 --- a/configuration/init/systemd/tedge-watchdog.service +++ b/configuration/init/systemd/tedge-watchdog.service @@ -6,7 +6,8 @@ StartLimitIntervalSec=0 [Service] Type=notify ExecStart=/usr/bin/tedge_watchdog -Restart=on-failure +Restart=always +WatchdogSec=30 [Install] WantedBy=multi-user.target diff --git a/crates/core/tedge_watchdog/src/error.rs b/crates/core/tedge_watchdog/src/error.rs index ff5fb14b..f9763798 100644 --- a/crates/core/tedge_watchdog/src/error.rs +++ b/crates/core/tedge_watchdog/src/error.rs @@ -25,6 +25,6 @@ pub enum WatchdogError { #[error(transparent)] ParseSystemdFile(#[from] std::io::Error), - #[error("Did not find the WatchdogSec{file}")] + #[error("Did not find the WatchdogSec in {file}")] NoWatchdogSec { file: String }, } diff --git a/crates/core/tedge_watchdog/src/systemd_watchdog.rs b/crates/core/tedge_watchdog/src/systemd_watchdog.rs index 8682a9c1..43ed9af3 100644 --- a/crates/core/tedge_watchdog/src/systemd_watchdog.rs +++ b/crates/core/tedge_watchdog/src/systemd_watchdog.rs @@ -29,6 +29,40 @@ pub async fn start_watchdog(tedge_config_dir: PathBuf) -> Result<(), anyhow::Err // Send ready notification to systemd. notify_systemd(process::id(), "--ready")?; + // Send heart beat notifications to systemd, to notify about its own health status + start_watchdog_for_self().await?; + + // Monitor health of tedge services + start_watchdog_for_tedge_services(tedge_config_dir).await; + Ok(()) +} + +async fn start_watchdog_for_self() -> Result<(), WatchdogError> { + match get_watchdog_sec("/lib/systemd/system/tedge-watchdog.service") { + Ok(interval) => { + let _handle = tokio::spawn(async move { + loop { + let _ = notify_systemd(process::id(), "WATCHDOG=1").map_err(|e| { + eprintln!("Notifying systemd failed with {}", e); + }); + tokio::time::sleep(tokio::time::Duration::from_secs(interval / 4)).await; + } + }); + Ok(()) + } + + Err(WatchdogError::NoWatchdogSec { file }) => { + warn!( + "Watchdog is not enabled for tedge-watchdog : {}", + WatchdogError::NoWatchdogSec { file } + ); + Ok(()) + } + Err(e) => Err(e), + } +} + +async fn start_watchdog_for_tedge_services(tedge_config_dir: PathBuf) { let tedge_services = vec![ "tedge-mapper-c8y", "tedge-mapper-az", @@ -65,7 +99,6 @@ pub async fn start_watchdog(tedge_config_dir: PathBuf) -> Result<(), anyhow::Err } } futures::future::join_all(watchdog_tasks).await; - Ok(()) } async fn monitor_tedge_service( @@ -115,6 +148,7 @@ async fn monitor_tedge_service( let elapsed = start.elapsed(); if elapsed < tokio::time::Duration::from_secs(interval) { tokio::time::sleep(tokio::time::Duration::from_secs(interval) - elapsed).await; + warn!("tedge systemd watchdog not started because no services to monitor") } } } |