summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorvkalintiris <vasilis@netdata.cloud>2024-03-04 12:22:01 +0200
committervkalintiris <vasilis@netdata.cloud>2024-03-19 12:13:16 +0200
commit39568ed90ffce40ef669a0f0d5ad1f9bdc7cff06 (patch)
treeeb2fbbbb908efb5a43ad2fbd7dedcaab8e8987c4
parenta890cfaa51854ed0c92cb169f0d2cd1652f6970d (diff)
Backtrace info when modifying refcount of metrics.bt
Keep the last X backtraces when a metric's reference count is increased/decreased. To keep CPU and memory consumption low we enable this only for UUIDs starting with 0x0A.
-rw-r--r--CMakeLists.txt30
-rwxr-xr-xcontrib/debian/rules6
-rw-r--r--packaging/cmake/config.cmake.h.in5
-rw-r--r--src/daemon/daemon.c3
-rw-r--r--src/daemon/main.c2
-rw-r--r--src/daemon/sentry-native/sentry-native.c4
-rw-r--r--src/database/engine/metric.c14
-rw-r--r--src/libnetdata/bt/bt.cc208
-rw-r--r--src/libnetdata/bt/bt.h20
-rw-r--r--src/libnetdata/libnetdata.h1
10 files changed, 289 insertions, 4 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c90e10a966..7c8355c730 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -125,6 +125,7 @@ option(ENABLE_BUNDLED_PROTOBUF "enable bundled protobuf" False)
option(ENABLE_LOGS_MANAGEMENT_TESTS "enable logs management tests" True)
+option(ENABLE_LIBBACKTRACE "enable libbacktrace" False)
option(ENABLE_SENTRY "enable sentry" False)
option(ENABLE_WEBRTC "enable webrtc" False)
@@ -136,6 +137,23 @@ if(ENABLE_PLUGIN_GO)
find_package(Go "${MIN_GO_VERSION}" REQUIRED)
endif()
+if(ENABLE_LIBBACKTRACE)
+ include(ExternalProject)
+
+ ExternalProject_Add(libbacktrace
+ GIT_REPOSITORY https://github.com/ianlancetaylor/libbacktrace.git
+ GIT_TAG master
+ CONFIGURE_COMMAND <SOURCE_DIR>/configure --prefix=<INSTALL_DIR>
+ BUILD_COMMAND make
+ BUILD_BYPRODUCTS <INSTALL_DIR>/lib/libbacktrace.a
+ UPDATE_DISCONNECTED True
+ INSTALL_COMMAND make install)
+
+ ExternalProject_Get_Property(libbacktrace INSTALL_DIR)
+ set(LIBBACKTRACE_INCLUDE_DIR ${INSTALL_DIR}/include)
+ set(LIBBACKTRACE_LIBRARY ${INSTALL_DIR}/lib/libbacktrace.a)
+endif()
+
if(ENABLE_SENTRY)
include(FetchContent)
@@ -670,6 +688,12 @@ if(ENABLE_PLUGIN_EBPF)
)
endif()
+if(ENABLE_LIBBACKTRACE)
+ list(APPEND LIBNETDATA_FILES
+ src/libnetdata/bt/bt.cc
+ src/libnetdata/bt/bt.h)
+endif()
+
set(LIBH2O_FILES
src/web/server/h2o/libh2o/deps/cloexec/cloexec.c
src/web/server/h2o/libh2o/deps/libgkc/gkc.c
@@ -1506,6 +1530,12 @@ if(ENABLE_PLUGIN_EBPF)
target_link_libraries(libnetdata PUBLIC ${ELF_LIBRARIES})
endif()
+if(ENABLE_LIBBACKTRACE)
+ add_dependencies(libnetdata libbacktrace)
+ target_include_directories(libnetdata PRIVATE ${LIBBACKTRACE_INCLUDE_DIR})
+ target_link_libraries(libnetdata PRIVATE ${LIBBACKTRACE_LIBRARY})
+endif()
+
# judy
target_link_libraries(libnetdata PUBLIC judy)
diff --git a/contrib/debian/rules b/contrib/debian/rules
index e9231cdca5..808edc1cd8 100755
--- a/contrib/debian/rules
+++ b/contrib/debian/rules
@@ -46,7 +46,8 @@ ifeq ($(ENABLE_SENTRY),true)
-DNETDATA_SENTRY_ENVIRONMENT=$(RELEASE_PIPELINE) \
-DNETDATA_SENTRY_RELEASE=$(VERSION) \
-DNETDATA_SENTRY_DIST=$(BUILD_DESTINATION) \
- -DNETDATA_SENTRY_DSN=$(SENTRY_DSN)
+ -DNETDATA_SENTRY_DSN=$(SENTRY_DSN) \
+ -DENABLE_LIBBACKTRACE=On
else
SENTRY_CONFIG := -DENABLE_SENTRY=Off
endif
@@ -69,7 +70,7 @@ override_dh_auto_configure:
packaging/bundle-ebpf-co-re.sh . ${TOP}/usr/libexec/netdata/plugins.d; \
fi
dh_auto_configure -- -G Ninja \
- -DCMAKE_BUILD_TYPE=RelWithDebInfo \
+ -DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_INSTALL_PREFIX=/ \
-DWEB_DIR=/var/lib/netdata/www \
-DCMAKE_C_FLAGS='-ffile-prefix-map=${SRC_DIR}=${SRC_DIR}' \
@@ -105,7 +106,6 @@ override_dh_strip:
if [ "${ENABLE_SENTRY}" = "true" ] && [ "${UPLOAD_SENTRY}" = "true" ]; then \
sentry-cli debug-files upload -o netdata-inc -p netdata-agent --force-foreground --log-level=debug --wait --include-sources /usr/src/netdata/debian/netdata/usr/sbin/netdata; \
fi
- dh_strip
override_dh_install:
cp -v $(BASE_CONFIG) debian/netdata.conf
diff --git a/packaging/cmake/config.cmake.h.in b/packaging/cmake/config.cmake.h.in
index 276b051f9e..9d55cdb971 100644
--- a/packaging/cmake/config.cmake.h.in
+++ b/packaging/cmake/config.cmake.h.in
@@ -116,6 +116,11 @@
#cmakedefine NETDATA_SENTRY_RELEASE "@NETDATA_SENTRY_RELEASE@"
#cmakedefine NETDATA_SENTRY_DIST "@NETDATA_SENTRY_DIST@"
#cmakedefine NETDATA_SENTRY_DSN "@NETDATA_SENTRY_DSN@"
+
+// enabled libbacktrace
+
+#cmakedefine ENABLE_LIBBACKTRACE
+
// enabled bundling
#cmakedefine ENABLE_BUNDLED_JSONC
diff --git a/src/daemon/daemon.c b/src/daemon/daemon.c
index d9a4b81de5..ed8d677d94 100644
--- a/src/daemon/daemon.c
+++ b/src/daemon/daemon.c
@@ -26,6 +26,9 @@ void get_netdata_execution_path(void) {
}
netdata_exe_file[exepath_size] = '\0';
+#ifdef ENABLE_LIBBACKTRACE
+ bt_init(netdata_exe_file, netdata_configured_cache_dir);
+#endif
// macOS's dirname(3) does not modify passed string
char *tmpdir = strdupz(netdata_exe_file);
diff --git a/src/daemon/main.c b/src/daemon/main.c
index de22a90f43..a1c39a0471 100644
--- a/src/daemon/main.c
+++ b/src/daemon/main.c
@@ -1400,6 +1400,8 @@ int unittest_prepare_rrd(char **user) {
}
int main(int argc, char **argv) {
+ uv_setup_args(argc, argv);
+
// initialize the system clocks
clocks_init();
netdata_start_time = now_realtime_sec();
diff --git a/src/daemon/sentry-native/sentry-native.c b/src/daemon/sentry-native/sentry-native.c
index 3594c1fffa..04246f1f7d 100644
--- a/src/daemon/sentry-native/sentry-native.c
+++ b/src/daemon/sentry-native/sentry-native.c
@@ -38,6 +38,10 @@ void sentry_native_init(void)
sentry_options_set_debug(options, 1);
#endif
+#ifdef ENABLE_LIBBACKTRACE
+ sentry_options_add_attachment(options, bt_path);
+#endif
+
sentry_init(options);
}
diff --git a/src/database/engine/metric.c b/src/database/engine/metric.c
index 97db53efbc..8ae2b4b6fe 100644
--- a/src/database/engine/metric.c
+++ b/src/database/engine/metric.c
@@ -134,6 +134,10 @@ static inline time_t mrg_metric_get_first_time_s_smart(MRG *mrg __maybe_unused,
}
static inline REFCOUNT metric_acquire(MRG *mrg __maybe_unused, METRIC *metric) {
+#ifdef ENABLE_LIBBACKTRACE
+ bt_collect(&metric->uuid);
+#endif
+
spinlock_lock(&metric->refcount_spinlock);
if (metric->refcount >= 0)
@@ -154,10 +158,18 @@ static inline REFCOUNT metric_acquire(MRG *mrg __maybe_unused, METRIC *metric) {
}
static inline void metric_release(MRG *mrg __maybe_unused, METRIC *metric) {
+#ifdef ENABLE_LIBBACKTRACE
+ bt_collect(&metric->uuid);
+#endif
+
spinlock_lock(&metric->refcount_spinlock);
- if (metric->refcount <= 0)
+ if (metric->refcount <= 0) {
+ #ifdef ENABLE_LIBBACKTRACE
+ bt_dump(&metric->uuid);
+ #endif
fatal("METRIC: refcount is %d (zero or negative) during release", metric->refcount);
+ }
metric->refcount -= 1;
REFCOUNT refcount = metric->refcount;
diff --git a/src/libnetdata/bt/bt.cc b/src/libnetdata/bt/bt.cc
new file mode 100644
index 0000000000..d462d7eaf2
--- /dev/null
+++ b/src/libnetdata/bt/bt.cc
@@ -0,0 +1,208 @@
+#include "bt.h"
+
+#include <backtrace.h>
+#include <backtrace-supported.h>
+
+#include <algorithm>
+#include <cstdio>
+#include <fstream>
+#include <mutex>
+#include <sstream>
+#include <unordered_map>
+#include <queue>
+
+static backtrace_state *State = nullptr;
+
+static int pcinfo_callback(void *data, uintptr_t pc, const char *filename, int lineno, const char *function)
+{
+ std::ostringstream *OS = static_cast<std::ostringstream*>(data);
+
+ if (function)
+ *OS << function << "() @ ";
+
+ if (filename)
+ *OS << filename << ":" << lineno;
+ else
+ *OS << pc << " (information not available)";
+
+ *OS << "\n";
+ return 0;
+}
+
+static void error_callback(void *data, const char *msg, int errnum)
+{
+ std::ostringstream *OS = static_cast<std::ostringstream*>(data);
+ *OS << "Backtrace error: " << msg << " (error number " << errnum << ")\n";
+}
+
+struct UuidKey
+{
+ const uuid_t *Inner;
+
+ bool operator==(const UuidKey& Other) const
+ {
+ return uuid_compare(*Inner, *Other.Inner) == 0;
+ }
+};
+
+namespace std
+{
+ template<>
+ struct hash<UuidKey>
+ {
+ size_t operator()(const UuidKey& Key) const
+ {
+ return XXH64(*Key.Inner, sizeof(uuid_t), 0);
+ }
+ };
+}
+
+class StackTrace
+{
+public:
+ static const size_t MAX_ITEMS = 128;
+ uintptr_t PCs[MAX_ITEMS] = { 0 };
+ size_t Items = 0;
+
+ void append(uintptr_t PC)
+ {
+ assert(Items < MAX_ITEMS);
+ PCs[Items++] = PC;
+ }
+
+ bool operator==(const StackTrace& Other) const
+ {
+ if (Items != Other.Items)
+ return false;
+
+ for (size_t i = 0; i < Items; i++)
+ if (PCs[i] != Other.PCs[i])
+ return false;
+
+ return true;
+ }
+
+ void dump(std::ostream &OS) const
+ {
+ for (size_t i = 0; i < Items; ++i)
+ backtrace_pcinfo(State, PCs[i], pcinfo_callback, error_callback, &OS);
+ OS << std::endl;
+ }
+};
+
+namespace std
+{
+ template<>
+ struct hash<StackTrace>
+ {
+ size_t operator()(const StackTrace& ST) const
+ {
+ return XXH64(ST.PCs, ST.Items * sizeof(uintptr_t), 0);
+ }
+ };
+}
+
+static std::vector<std::pair<uint64_t, StackTrace>> InternedStackTraces;
+
+static size_t stackTraceID(const StackTrace &ST)
+{
+ std::hash<StackTrace> hasher;
+ uint64_t K = hasher(ST);
+
+ auto Pred = [](const std::pair<uint64_t, StackTrace>& a, const std::pair<uint64_t, StackTrace>& b) {
+ return a.first < b.first;
+ };
+
+ std::pair<uint64_t, StackTrace> P(K, ST);
+ auto It = std::lower_bound(InternedStackTraces.begin(), InternedStackTraces.end(), P, Pred);
+ if (It != InternedStackTraces.end() && It->first == K)
+ return K;
+
+ InternedStackTraces.insert(It, {K, ST});
+ return K;
+}
+
+static const StackTrace &lookupStackTrace(uint64_t ID)
+{
+ auto Pred = [](const std::pair<uint64_t, StackTrace>& element, uint64_t value) {
+ return element.first < value;
+ };
+ auto It = std::lower_bound(InternedStackTraces.begin(), InternedStackTraces.end(), ID, Pred);
+
+ return It->second;
+}
+
+static std::unordered_map<UuidKey, std::queue<uint64_t>> USTs;
+static std::mutex Mutex;
+
+static int simple_callback(void *data, uintptr_t pc)
+{
+ StackTrace *ST = static_cast<StackTrace*>(data);
+ if (ST->Items == StackTrace::MAX_ITEMS)
+ fatal("StackTrace too big...");
+
+ ST->append(pc);
+ return 0;
+}
+
+const char *bt_path = NULL;
+
+void bt_init(const char *exepath, const char *cache_dir)
+{
+ State = backtrace_create_state(exepath, 1, nullptr, nullptr);
+
+ char buf[FILENAME_MAX + 1];
+ snprintfz(buf, FILENAME_MAX, "%s/%s", cache_dir, "bt.log");
+ bt_path = strdupz(buf);
+}
+
+void bt_collect(const uuid_t *uuid)
+{
+ // Enable collection on 1/16th of UUIDs to save on CPU and RAM consumption
+ if (*uuid[0] != 0x0A)
+ return;
+
+ {
+ std::lock_guard<std::mutex> lock(Mutex);
+
+ UuidKey UK = { uuid };
+
+ auto& Q = USTs[UK];
+ if (Q.size() == 128)
+ Q.pop();
+
+ StackTrace ST;
+ backtrace_simple(State, 1, simple_callback, error_callback, &ST);
+ Q.push(stackTraceID(ST));
+ }
+}
+
+void bt_dump(const uuid_t *uuid)
+{
+ std::lock_guard<std::mutex> lock(Mutex);
+
+ UuidKey UK = { uuid };
+
+ auto It = USTs.find(UK);
+ if (It == USTs.end())
+ return;
+
+ std::queue<uint64_t> Q = It->second;
+ std::ostringstream OS;
+
+ size_t Idx = 0;
+ while (!Q.empty())
+ {
+ OS << "Stack trace " << ++Idx << "/" << It->second.size() << ":\n";
+ const StackTrace& ST = lookupStackTrace(Q.front());
+ ST.dump(OS);
+ Q.pop();
+ }
+
+ std::ofstream OF{bt_path};
+ if (OF.is_open())
+ {
+ OF << OS.str();
+ OF.close();
+ }
+}
diff --git a/src/libnetdata/bt/bt.h b/src/libnetdata/bt/bt.h
new file mode 100644
index 0000000000..dfc6c91434
--- /dev/null
+++ b/src/libnetdata/bt/bt.h
@@ -0,0 +1,20 @@
+#ifndef LIBNETDATA_BT
+#define LIBNETDATA_BT
+
+#include "../libnetdata.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void bt_init(const char *exepath, const char *cache_dir);
+void bt_collect(const uuid_t *uuid);
+void bt_dump(const uuid_t *uuid);
+
+extern const char *bt_path;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* LIBNETDATA_BT */
diff --git a/src/libnetdata/libnetdata.h b/src/libnetdata/libnetdata.h
index 4bade9b5bb..9f806f0ef4 100644
--- a/src/libnetdata/libnetdata.h
+++ b/src/libnetdata/libnetdata.h
@@ -753,6 +753,7 @@ extern char *netdata_configured_host_prefix;
#include "facets/facets.h"
#include "functions_evloop/functions_evloop.h"
#include "query_progress/progress.h"
+#include "bt/bt.h"
// BEWARE: this exists in alarm-notify.sh
#define DEFAULT_CLOUD_BASE_URL "https://app.netdata.cloud"