summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCosta Tsaousis <costa@netdata.cloud>2024-07-10 14:23:29 +0300
committerGitHub <noreply@github.com>2024-07-10 14:23:29 +0300
commit166e9caffaf2ace7312fd02a2cfeef6e7ab09ea2 (patch)
tree3c41fde86197b64b9d00fa19563c25f7a7e29877
parent8e98857ebfc5222f5afa45ff951cadad2897f569 (diff)
spawn server (Windows support for external plugins) (#17866)
* listening ipv6 sockets may be both ipv4 and ipv6, depending on the IPV6_ONLY flag * working libmnl ipv46 detection and added latency and retransmits from TCP_INFO * fix aggregations for rtt and retrans * code cleanup * code cleanup * code cleanup again * restore field renames * count namespaces * run namespaces in parallel * add libmnl to buildinfo * lock around safe_fork() * libmnl ports are in network byte order * posix spawn server for both executables and callback functions * local-sockets and network-viewer using the new spawn server * cleanup spawn servers sockets * spawn server stdin and stdout are linked to /dev/null * no need for spinlock in spawn server * empty all parameters * new spawn server is now used for plugins.d plugins * fix for environ * claiming script runs via the new spawn server * tc.plugin uses the new spawn server * analytics, buildinfo and cgroups.plugin use the new spawn server * cgroup-discovery uses the new spawn server * added ability to wait or kill spawned processes * removed old spawn server and now alert notifications use the new one * remove left-overs * hide spawn server internals; started working on windows version of the spawn server * fixes for windows * more windows work * more work on windows * added debug log to spawn server * fix compilation warnings * enable static threads on windows * running external plugins * working spawn server on windows * spawn server logs to collectoers.log * log windows last error together with errno * log updates * cleanup * decode_argv does not add an empty parameter * removed debug log * removed debug return * rework on close_range() * eliminate the need for waitid() * clear errno on the signal handler * added universal os_setproctitle() call to support FreeBSD too * os_get_pid_max() for windows and macos * isolate pids array from the rest of the code in apps.plugin so that it can be turned to a hashtable
-rw-r--r--CMakeLists.txt20
-rw-r--r--packaging/utils/compile-on-windows.sh8
-rw-r--r--src/aclk/aclk_rx_msgs.c6
-rw-r--r--src/claim/claim.c15
-rw-r--r--src/collectors/apps.plugin/apps_plugin.c39
-rw-r--r--src/collectors/apps.plugin/apps_plugin.h10
-rw-r--r--src/collectors/apps.plugin/apps_proc_pid_limits.c2
-rw-r--r--src/collectors/apps.plugin/apps_proc_pids.c40
-rw-r--r--src/collectors/cgroups.plugin/cgroup-discovery.c20
-rw-r--r--src/collectors/cgroups.plugin/cgroup-network.c30
-rw-r--r--src/collectors/cgroups.plugin/sys_fs_cgroup.c33
-rw-r--r--src/collectors/cups.plugin/cups_plugin.c2
-rw-r--r--src/collectors/ebpf.plugin/ebpf_apps.c2
-rw-r--r--src/collectors/freeipmi.plugin/freeipmi_plugin.c4
-rw-r--r--src/collectors/network-viewer.plugin/network-viewer.c324
-rw-r--r--src/collectors/nfacct.plugin/plugin_nfacct.c2
-rw-r--r--src/collectors/perf.plugin/perf_plugin.c2
-rw-r--r--src/collectors/plugins.d/local_listeners.c26
-rw-r--r--src/collectors/plugins.d/plugins_d.c34
-rw-r--r--src/collectors/plugins.d/plugins_d.h1
-rw-r--r--src/collectors/plugins.d/pluginsd_internals.c2
-rw-r--r--src/collectors/proc.plugin/proc_meminfo.c4
-rw-r--r--src/collectors/systemd-journal.plugin/systemd-journal.c2
-rw-r--r--src/collectors/tc.plugin/plugin_tc.c27
-rw-r--r--src/collectors/xenstat.plugin/xenstat_plugin.c2
-rw-r--r--src/daemon/analytics.c22
-rw-r--r--src/daemon/buildinfo.c21
-rw-r--r--src/daemon/common.c2
-rw-r--r--src/daemon/common.h3
-rw-r--r--src/daemon/daemon.c6
-rw-r--r--src/daemon/main.c79
-rw-r--r--src/daemon/main.h1
-rw-r--r--src/daemon/signals.c91
-rw-r--r--src/daemon/static_threads.c11
-rw-r--r--src/daemon/watcher.c2
-rw-r--r--src/daemon/watcher.h1
-rw-r--r--src/daemon/win_system-info.c2
-rw-r--r--src/daemon/winsvc.cc2
-rw-r--r--src/database/engine/rrdengine.c2
-rw-r--r--src/database/rrd.h3
-rw-r--r--src/database/rrdhost.c10
-rw-r--r--src/database/sqlite/sqlite_context.c2
-rw-r--r--src/database/sqlite/sqlite_db_migration.c2
-rw-r--r--src/database/sqlite/sqlite_health.c2
-rw-r--r--src/database/sqlite/sqlite_metadata.c4
-rw-r--r--src/exporting/send_data.c2
-rw-r--r--src/health/health_log.c2
-rw-r--r--src/health/health_notifications.c11
-rw-r--r--src/libnetdata/clocks/clocks.c4
-rw-r--r--src/libnetdata/libnetdata.c102
-rw-r--r--src/libnetdata/libnetdata.h21
-rw-r--r--src/libnetdata/log/log.c106
-rw-r--r--src/libnetdata/log/log.h5
-rw-r--r--src/libnetdata/maps/local-sockets.h664
-rw-r--r--src/libnetdata/os/close_range.c87
-rw-r--r--src/libnetdata/os/close_range.h12
-rw-r--r--src/libnetdata/os/get_pid_max.c45
-rw-r--r--src/libnetdata/os/os.h3
-rw-r--r--src/libnetdata/os/setproctitle.c31
-rw-r--r--src/libnetdata/os/setproctitle.h8
-rw-r--r--src/libnetdata/os/waitid.c72
-rw-r--r--src/libnetdata/os/waitid.h48
-rw-r--r--src/libnetdata/popen/README.md15
-rw-r--r--src/libnetdata/popen/popen.c446
-rw-r--r--src/libnetdata/popen/popen.h35
-rw-r--r--src/libnetdata/procfile/procfile.c2
-rw-r--r--src/libnetdata/socket/socket.c10
-rw-r--r--src/libnetdata/spawn_server/spawn_popen.c138
-rw-r--r--src/libnetdata/spawn_server/spawn_popen.h24
-rw-r--r--src/libnetdata/spawn_server/spawn_server.c1318
-rw-r--r--src/libnetdata/spawn_server/spawn_server.h48
-rw-r--r--src/logsmanagement/helper.h4
-rw-r--r--src/registry/registry_db.c2
-rw-r--r--src/spawn/README.md0
-rw-r--r--src/spawn/spawn.c288
-rw-r--r--src/spawn/spawn.h109
-rw-r--r--src/spawn/spawn_client.c250
-rw-r--r--src/spawn/spawn_server.c386
-rw-r--r--src/streaming/receiver.c2
-rw-r--r--src/streaming/rrdpush.c2
-rw-r--r--src/streaming/sender.c2
-rw-r--r--src/web/api/queries/query.c2
-rw-r--r--src/web/server/web_client.c2
83 files changed, 2778 insertions, 2455 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a3ac834448..5dee361775 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -668,8 +668,6 @@ set(LIBNETDATA_FILES
src/libnetdata/os/byteorder.h
src/libnetdata/onewayalloc/onewayalloc.c
src/libnetdata/onewayalloc/onewayalloc.h
- src/libnetdata/popen/popen.c
- src/libnetdata/popen/popen.h
src/libnetdata/procfile/procfile.c
src/libnetdata/procfile/procfile.h
src/libnetdata/query_progress/progress.c
@@ -720,8 +718,6 @@ set(LIBNETDATA_FILES
src/libnetdata/linked-lists.h
src/libnetdata/storage-point.h
src/libnetdata/bitmap64.h
- src/libnetdata/os/waitid.c
- src/libnetdata/os/waitid.h
src/libnetdata/os/gettid.c
src/libnetdata/os/gettid.h
src/libnetdata/os/adjtimex.c
@@ -750,6 +746,14 @@ set(LIBNETDATA_FILES
src/libnetdata/os/setenv.h
src/libnetdata/os/strndup.c
src/libnetdata/os/strndup.h
+ src/libnetdata/spawn_server/spawn_server.c
+ src/libnetdata/spawn_server/spawn_server.h
+ src/libnetdata/spawn_server/spawn_popen.c
+ src/libnetdata/spawn_server/spawn_popen.h
+ src/libnetdata/os/close_range.c
+ src/libnetdata/os/close_range.h
+ src/libnetdata/os/setproctitle.c
+ src/libnetdata/os/setproctitle.h
)
if(ENABLE_PLUGIN_EBPF)
@@ -1216,13 +1220,6 @@ set(CLAIM_PLUGIN_FILES
src/claim/claim.h
)
-set(SPAWN_PLUGIN_FILES
- src/spawn/spawn.c
- src/spawn/spawn_server.c
- src/spawn/spawn_client.c
- src/spawn/spawn.h
-)
-
set(ACLK_ALWAYS_BUILD
src/aclk/aclk_rrdhost_state.h
src/aclk/aclk_proxy.c
@@ -1410,7 +1407,6 @@ set(NETDATA_FILES
${STREAMING_PLUGIN_FILES}
${WEB_PLUGIN_FILES}
${CLAIM_PLUGIN_FILES}
- ${SPAWN_PLUGIN_FILES}
${ACLK_ALWAYS_BUILD}
${PROFILE_PLUGIN_FILES}
)
diff --git a/packaging/utils/compile-on-windows.sh b/packaging/utils/compile-on-windows.sh
index 103052be49..7e4e30eb34 100644
--- a/packaging/utils/compile-on-windows.sh
+++ b/packaging/utils/compile-on-windows.sh
@@ -26,6 +26,12 @@ then
exit 0
fi
+BUILD_FOR_PACKAGING="Off"
+if [ "${1}" = "package" ]
+then
+ BUILD_FOR_PACKAGING="On"
+fi
+
export PATH="/usr/local/bin:${PATH}"
WT_ROOT="$(pwd)"
@@ -54,7 +60,7 @@ fi
-DCMAKE_INSTALL_PREFIX="/opt/netdata" \
-DCMAKE_BUILD_TYPE="${BUILD_TYPE}" \
-DCMAKE_C_FLAGS="-fstack-protector-all -O0 -ggdb -Wall -Wextra -Wno-char-subscripts -Wa,-mbig-obj -pipe -DNETDATA_INTERNAL_CHECKS=1 -D_FILE_OFFSET_BITS=64 -D__USE_MINGW_ANSI_STDIO=1" \
- -DBUILD_FOR_PACKAGING=On \
+ -DBUILD_FOR_PACKAGING=${BUILD_FOR_PACKAGING} \
-DUSE_MOLD=Off \
-DNETDATA_USER="${USER}" \
-DDEFAULT_FEATURE_STATE=Off \
diff --git a/src/aclk/aclk_rx_msgs.c b/src/aclk/aclk_rx_msgs.c
index 60e421928e..432242f5ed 100644
--- a/src/aclk/aclk_rx_msgs.c
+++ b/src/aclk/aclk_rx_msgs.c
@@ -106,13 +106,13 @@ static inline int aclk_v2_payload_get_query(const char *payload, char **query_ur
else if(strncmp(payload, "DELETE /", 8) == 0)
start = payload + 7;
else {
- errno = 0;
+ errno_clear();
netdata_log_error("Only accepting requests that start with GET, POST, PUT, DELETE from CLOUD.");
return 1;
}
if(!(end = strstr(payload, HTTP_1_1 HTTP_ENDL))) {
- errno = 0;
+ errno_clear();
netdata_log_error("Doesn't look like HTTP GET request.");
return 1;
}
@@ -127,7 +127,7 @@ static int aclk_handle_cloud_http_request_v2(struct aclk_request *cloud_to_agent
{
aclk_query_t query;
- errno = 0;
+ errno_clear();
if (cloud_to_agent->version < ACLK_V_COMPRESSION) {
netdata_log_error(
"This handler cannot reply to request with version older than %d, received %d.",
diff --git a/src/claim/claim.c b/src/claim/claim.c
index 5f4ec9a433..16058b930b 100644
--- a/src/claim/claim.c
+++ b/src/claim/claim.c
@@ -53,11 +53,8 @@ CLAIM_AGENT_RESPONSE claim_agent(const char *claiming_arguments, bool force, con
}
#ifndef DISABLE_CLOUD
- int exit_code;
- pid_t command_pid;
char command_exec_buffer[CLAIMING_COMMAND_LENGTH + 1];
char command_line_buffer[CLAIMING_COMMAND_LENGTH + 1];
- FILE *fp_child_output, *fp_child_input;
// This is guaranteed to be set early in main via post_conf_load()
char *cloud_base_url = appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", NULL);
@@ -92,17 +89,17 @@ CLAIM_AGENT_RESPONSE claim_agent(const char *claiming_arguments, bool force, con
claiming_arguments);
netdata_log_info("Executing agent claiming command: %s", command_exec_buffer);
- fp_child_output = netdata_popen(command_line_buffer, &command_pid, &fp_child_input);
- if(!fp_child_output) {
+ POPEN_INSTANCE *instance = spawn_popen_run(command_exec_buffer);
+ if(!instance) {
netdata_log_error("Cannot popen(\"%s\").", command_exec_buffer);
return CLAIM_AGENT_CANNOT_EXECUTE_CLAIM_SCRIPT;
}
netdata_log_info("Waiting for claiming command '%s' to finish.", command_exec_buffer);
char read_buffer[100 + 1];
- while (fgets(read_buffer, 100, fp_child_output) != NULL) ;
+ while (fgets(read_buffer, 100, instance->child_stdout_fp) != NULL) ;
- exit_code = netdata_pclose(fp_child_input, fp_child_output, command_pid);
+ int exit_code = spawn_popen_wait(instance);
netdata_log_info("Agent claiming command '%s' returned with code %d", command_exec_buffer, exit_code);
if (0 == exit_code) {
@@ -113,7 +110,7 @@ CLAIM_AGENT_RESPONSE claim_agent(const char *claiming_arguments, bool force, con
netdata_log_error("Agent claiming command '%s' failed to complete its run", command_exec_buffer);
return CLAIM_AGENT_CLAIM_SCRIPT_FAILED;
}
- errno = 0;
+ errno_clear();
unsigned maximum_known_exit_code = sizeof(claiming_errors) / sizeof(claiming_errors[0]) - 1;
if ((unsigned)exit_code > maximum_known_exit_code) {
@@ -214,7 +211,7 @@ void load_cloud_conf(int silent)
netdata_cloud_enabled = CONFIG_BOOLEAN_NO;
char *filename;
- errno = 0;
+ errno_clear();
int ret = 0;
diff --git a/src/collectors/apps.plugin/apps_plugin.c b/src/collectors/apps.plugin/apps_plugin.c
index b660f8171c..8fe1ff0081 100644
--- a/src/collectors/apps.plugin/apps_plugin.c
+++ b/src/collectors/apps.plugin/apps_plugin.c
@@ -51,7 +51,6 @@ size_t
inodes_changed_counter = 0,
links_changed_counter = 0,
targets_assignment_counter = 0,
- all_pids_count = 0, // the number of processes running
apps_groups_targets_count = 0; // # of apps_groups.conf targets
int
@@ -136,20 +135,6 @@ struct target
size_t pagesize;
-struct pid_stat
- *root_of_pids = NULL, // global list of all processes running
- **all_pids = NULL; // to avoid allocations, we pre-allocate
- // a pointer for each pid in the entire pid space.
-
-#if (ALL_PIDS_ARE_READ_INSTANTLY == 0)
-// Another pre-allocated list of all possible pids.
-// We need it to pids and assign them a unique sortlist id, so that we
-// read parents before children. This is needed to prevent a situation where
-// a child is found running, but until we read its parent, it has exited and
-// its parent has accumulated its resources.
-pid_t *all_pids_sortlist = NULL;
-#endif
-
// ----------------------------------------------------------------------------
int managed_log(struct pid_stat *p, PID_LOG log, int status) {
@@ -208,7 +193,7 @@ int managed_log(struct pid_stat *p, PID_LOG log, int status) {
}
}
}
- errno = 0;
+ errno_clear();
}
else if(unlikely(p->log_thrown & log)) {
// netdata_log_error("unsetting log %u on pid %d", log, p->pid);
@@ -300,12 +285,14 @@ static void apply_apps_groups_targets_inheritance(void) {
}
// init goes always to default target
- if(all_pids[INIT_PID] && !all_pids[INIT_PID]->matched_by_config)
- all_pids[INIT_PID]->target = apps_groups_default_target;
+ struct pid_stat *pi = find_pid_entry(INIT_PID);
+ if(pi && !pi->matched_by_config)
+ pi->target = apps_groups_default_target;
// pid 0 goes always to default target
- if(all_pids[0] && !all_pids[INIT_PID]->matched_by_config)
- all_pids[0]->target = apps_groups_default_target;
+ pi = find_pid_entry(0);
+ if(pi && !pi->matched_by_config)
+ pi->target = apps_groups_default_target;
// give a default target on all top level processes
if(unlikely(debug_enabled)) loops++;
@@ -320,8 +307,9 @@ static void apply_apps_groups_targets_inheritance(void) {
p->sortlist = sortlist++;
}
- if(all_pids[1])
- all_pids[1]->sortlist = sortlist++;
+ pi = find_pid_entry(1);
+ if(pi)
+ pi->sortlist = sortlist++;
// give a target to all merged child processes
found = 1;
@@ -1052,12 +1040,7 @@ int main(int argc, char **argv) {
netdata_log_info("started on pid %d", getpid());
users_and_groups_init();
-
-#if (ALL_PIDS_ARE_READ_INSTANTLY == 0)
- all_pids_sortlist = callocz(sizeof(pid_t), (size_t)pid_max + 1);
-#endif
-
- all_pids = callocz(sizeof(struct pid_stat *), (size_t) pid_max + 1);
+ pids_init();
// ------------------------------------------------------------------------
// the event loop for functions
diff --git a/src/collectors/apps.plugin/apps_plugin.h b/src/collectors/apps.plugin/apps_plugin.h
index ce4d815adc..a085872d9f 100644
--- a/src/collectors/apps.plugin/apps_plugin.h
+++ b/src/collectors/apps.plugin/apps_plugin.h
@@ -17,9 +17,7 @@
#include <sys/proc_info.h>
#include <sys/sysctl.h>
#include <mach/mach_time.h> // For mach_timebase_info_data_t and mach_timebase_info
-#endif
-#if defined(__APPLE__)
extern mach_timebase_info_data_t mach_info;
#endif