From 39568ed90ffce40ef669a0f0d5ad1f9bdc7cff06 Mon Sep 17 00:00:00 2001 From: vkalintiris Date: Mon, 4 Mar 2024 12:22:01 +0200 Subject: Backtrace info when modifying refcount of metrics. Keep the last X backtraces when a metric's reference count is increased/decreased. To keep CPU and memory consumption low we enable this only for UUIDs starting with 0x0A. --- CMakeLists.txt | 30 +++++ contrib/debian/rules | 6 +- packaging/cmake/config.cmake.h.in | 5 + src/daemon/daemon.c | 3 + src/daemon/main.c | 2 + src/daemon/sentry-native/sentry-native.c | 4 + src/database/engine/metric.c | 14 ++- src/libnetdata/bt/bt.cc | 208 +++++++++++++++++++++++++++++++ src/libnetdata/bt/bt.h | 20 +++ src/libnetdata/libnetdata.h | 1 + 10 files changed, 289 insertions(+), 4 deletions(-) create mode 100644 src/libnetdata/bt/bt.cc create mode 100644 src/libnetdata/bt/bt.h diff --git a/CMakeLists.txt b/CMakeLists.txt index c90e10a966..7c8355c730 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -125,6 +125,7 @@ option(ENABLE_BUNDLED_PROTOBUF "enable bundled protobuf" False) option(ENABLE_LOGS_MANAGEMENT_TESTS "enable logs management tests" True) +option(ENABLE_LIBBACKTRACE "enable libbacktrace" False) option(ENABLE_SENTRY "enable sentry" False) option(ENABLE_WEBRTC "enable webrtc" False) @@ -136,6 +137,23 @@ if(ENABLE_PLUGIN_GO) find_package(Go "${MIN_GO_VERSION}" REQUIRED) endif() +if(ENABLE_LIBBACKTRACE) + include(ExternalProject) + + ExternalProject_Add(libbacktrace + GIT_REPOSITORY https://github.com/ianlancetaylor/libbacktrace.git + GIT_TAG master + CONFIGURE_COMMAND /configure --prefix= + BUILD_COMMAND make + BUILD_BYPRODUCTS /lib/libbacktrace.a + UPDATE_DISCONNECTED True + INSTALL_COMMAND make install) + + ExternalProject_Get_Property(libbacktrace INSTALL_DIR) + set(LIBBACKTRACE_INCLUDE_DIR ${INSTALL_DIR}/include) + set(LIBBACKTRACE_LIBRARY ${INSTALL_DIR}/lib/libbacktrace.a) +endif() + if(ENABLE_SENTRY) include(FetchContent) @@ -670,6 +688,12 @@ if(ENABLE_PLUGIN_EBPF) ) endif() +if(ENABLE_LIBBACKTRACE) + list(APPEND LIBNETDATA_FILES + src/libnetdata/bt/bt.cc + src/libnetdata/bt/bt.h) +endif() + set(LIBH2O_FILES src/web/server/h2o/libh2o/deps/cloexec/cloexec.c src/web/server/h2o/libh2o/deps/libgkc/gkc.c @@ -1506,6 +1530,12 @@ if(ENABLE_PLUGIN_EBPF) target_link_libraries(libnetdata PUBLIC ${ELF_LIBRARIES}) endif() +if(ENABLE_LIBBACKTRACE) + add_dependencies(libnetdata libbacktrace) + target_include_directories(libnetdata PRIVATE ${LIBBACKTRACE_INCLUDE_DIR}) + target_link_libraries(libnetdata PRIVATE ${LIBBACKTRACE_LIBRARY}) +endif() + # judy target_link_libraries(libnetdata PUBLIC judy) diff --git a/contrib/debian/rules b/contrib/debian/rules index e9231cdca5..808edc1cd8 100755 --- a/contrib/debian/rules +++ b/contrib/debian/rules @@ -46,7 +46,8 @@ ifeq ($(ENABLE_SENTRY),true) -DNETDATA_SENTRY_ENVIRONMENT=$(RELEASE_PIPELINE) \ -DNETDATA_SENTRY_RELEASE=$(VERSION) \ -DNETDATA_SENTRY_DIST=$(BUILD_DESTINATION) \ - -DNETDATA_SENTRY_DSN=$(SENTRY_DSN) + -DNETDATA_SENTRY_DSN=$(SENTRY_DSN) \ + -DENABLE_LIBBACKTRACE=On else SENTRY_CONFIG := -DENABLE_SENTRY=Off endif @@ -69,7 +70,7 @@ override_dh_auto_configure: packaging/bundle-ebpf-co-re.sh . ${TOP}/usr/libexec/netdata/plugins.d; \ fi dh_auto_configure -- -G Ninja \ - -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_INSTALL_PREFIX=/ \ -DWEB_DIR=/var/lib/netdata/www \ -DCMAKE_C_FLAGS='-ffile-prefix-map=${SRC_DIR}=${SRC_DIR}' \ @@ -105,7 +106,6 @@ override_dh_strip: if [ "${ENABLE_SENTRY}" = "true" ] && [ "${UPLOAD_SENTRY}" = "true" ]; then \ sentry-cli debug-files upload -o netdata-inc -p netdata-agent --force-foreground --log-level=debug --wait --include-sources /usr/src/netdata/debian/netdata/usr/sbin/netdata; \ fi - dh_strip override_dh_install: cp -v $(BASE_CONFIG) debian/netdata.conf diff --git a/packaging/cmake/config.cmake.h.in b/packaging/cmake/config.cmake.h.in index 276b051f9e..9d55cdb971 100644 --- a/packaging/cmake/config.cmake.h.in +++ b/packaging/cmake/config.cmake.h.in @@ -116,6 +116,11 @@ #cmakedefine NETDATA_SENTRY_RELEASE "@NETDATA_SENTRY_RELEASE@" #cmakedefine NETDATA_SENTRY_DIST "@NETDATA_SENTRY_DIST@" #cmakedefine NETDATA_SENTRY_DSN "@NETDATA_SENTRY_DSN@" + +// enabled libbacktrace + +#cmakedefine ENABLE_LIBBACKTRACE + // enabled bundling #cmakedefine ENABLE_BUNDLED_JSONC diff --git a/src/daemon/daemon.c b/src/daemon/daemon.c index d9a4b81de5..ed8d677d94 100644 --- a/src/daemon/daemon.c +++ b/src/daemon/daemon.c @@ -26,6 +26,9 @@ void get_netdata_execution_path(void) { } netdata_exe_file[exepath_size] = '\0'; +#ifdef ENABLE_LIBBACKTRACE + bt_init(netdata_exe_file, netdata_configured_cache_dir); +#endif // macOS's dirname(3) does not modify passed string char *tmpdir = strdupz(netdata_exe_file); diff --git a/src/daemon/main.c b/src/daemon/main.c index de22a90f43..a1c39a0471 100644 --- a/src/daemon/main.c +++ b/src/daemon/main.c @@ -1400,6 +1400,8 @@ int unittest_prepare_rrd(char **user) { } int main(int argc, char **argv) { + uv_setup_args(argc, argv); + // initialize the system clocks clocks_init(); netdata_start_time = now_realtime_sec(); diff --git a/src/daemon/sentry-native/sentry-native.c b/src/daemon/sentry-native/sentry-native.c index 3594c1fffa..04246f1f7d 100644 --- a/src/daemon/sentry-native/sentry-native.c +++ b/src/daemon/sentry-native/sentry-native.c @@ -38,6 +38,10 @@ void sentry_native_init(void) sentry_options_set_debug(options, 1); #endif +#ifdef ENABLE_LIBBACKTRACE + sentry_options_add_attachment(options, bt_path); +#endif + sentry_init(options); } diff --git a/src/database/engine/metric.c b/src/database/engine/metric.c index 97db53efbc..8ae2b4b6fe 100644 --- a/src/database/engine/metric.c +++ b/src/database/engine/metric.c @@ -134,6 +134,10 @@ static inline time_t mrg_metric_get_first_time_s_smart(MRG *mrg __maybe_unused, } static inline REFCOUNT metric_acquire(MRG *mrg __maybe_unused, METRIC *metric) { +#ifdef ENABLE_LIBBACKTRACE + bt_collect(&metric->uuid); +#endif + spinlock_lock(&metric->refcount_spinlock); if (metric->refcount >= 0) @@ -154,10 +158,18 @@ static inline REFCOUNT metric_acquire(MRG *mrg __maybe_unused, METRIC *metric) { } static inline void metric_release(MRG *mrg __maybe_unused, METRIC *metric) { +#ifdef ENABLE_LIBBACKTRACE + bt_collect(&metric->uuid); +#endif + spinlock_lock(&metric->refcount_spinlock); - if (metric->refcount <= 0) + if (metric->refcount <= 0) { + #ifdef ENABLE_LIBBACKTRACE + bt_dump(&metric->uuid); + #endif fatal("METRIC: refcount is %d (zero or negative) during release", metric->refcount); + } metric->refcount -= 1; REFCOUNT refcount = metric->refcount; diff --git a/src/libnetdata/bt/bt.cc b/src/libnetdata/bt/bt.cc new file mode 100644 index 0000000000..d462d7eaf2 --- /dev/null +++ b/src/libnetdata/bt/bt.cc @@ -0,0 +1,208 @@ +#include "bt.h" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +static backtrace_state *State = nullptr; + +static int pcinfo_callback(void *data, uintptr_t pc, const char *filename, int lineno, const char *function) +{ + std::ostringstream *OS = static_cast(data); + + if (function) + *OS << function << "() @ "; + + if (filename) + *OS << filename << ":" << lineno; + else + *OS << pc << " (information not available)"; + + *OS << "\n"; + return 0; +} + +static void error_callback(void *data, const char *msg, int errnum) +{ + std::ostringstream *OS = static_cast(data); + *OS << "Backtrace error: " << msg << " (error number " << errnum << ")\n"; +} + +struct UuidKey +{ + const uuid_t *Inner; + + bool operator==(const UuidKey& Other) const + { + return uuid_compare(*Inner, *Other.Inner) == 0; + } +}; + +namespace std +{ + template<> + struct hash + { + size_t operator()(const UuidKey& Key) const + { + return XXH64(*Key.Inner, sizeof(uuid_t), 0); + } + }; +} + +class StackTrace +{ +public: + static const size_t MAX_ITEMS = 128; + uintptr_t PCs[MAX_ITEMS] = { 0 }; + size_t Items = 0; + + void append(uintptr_t PC) + { + assert(Items < MAX_ITEMS); + PCs[Items++] = PC; + } + + bool operator==(const StackTrace& Other) const + { + if (Items != Other.Items) + return false; + + for (size_t i = 0; i < Items; i++) + if (PCs[i] != Other.PCs[i]) + return false; + + return true; + } + + void dump(std::ostream &OS) const + { + for (size_t i = 0; i < Items; ++i) + backtrace_pcinfo(State, PCs[i], pcinfo_callback, error_callback, &OS); + OS << std::endl; + } +}; + +namespace std +{ + template<> + struct hash + { + size_t operator()(const StackTrace& ST) const + { + return XXH64(ST.PCs, ST.Items * sizeof(uintptr_t), 0); + } + }; +} + +static std::vector> InternedStackTraces; + +static size_t stackTraceID(const StackTrace &ST) +{ + std::hash hasher; + uint64_t K = hasher(ST); + + auto Pred = [](const std::pair& a, const std::pair& b) { + return a.first < b.first; + }; + + std::pair P(K, ST); + auto It = std::lower_bound(InternedStackTraces.begin(), InternedStackTraces.end(), P, Pred); + if (It != InternedStackTraces.end() && It->first == K) + return K; + + InternedStackTraces.insert(It, {K, ST}); + return K; +} + +static const StackTrace &lookupStackTrace(uint64_t ID) +{ + auto Pred = [](const std::pair& element, uint64_t value) { + return element.first < value; + }; + auto It = std::lower_bound(InternedStackTraces.begin(), InternedStackTraces.end(), ID, Pred); + + return It->second; +} + +static std::unordered_map> USTs; +static std::mutex Mutex; + +static int simple_callback(void *data, uintptr_t pc) +{ + StackTrace *ST = static_cast(data); + if (ST->Items == StackTrace::MAX_ITEMS) + fatal("StackTrace too big..."); + + ST->append(pc); + return 0; +} + +const char *bt_path = NULL; + +void bt_init(const char *exepath, const char *cache_dir) +{ + State = backtrace_create_state(exepath, 1, nullptr, nullptr); + + char buf[FILENAME_MAX + 1]; + snprintfz(buf, FILENAME_MAX, "%s/%s", cache_dir, "bt.log"); + bt_path = strdupz(buf); +} + +void bt_collect(const uuid_t *uuid) +{ + // Enable collection on 1/16th of UUIDs to save on CPU and RAM consumption + if (*uuid[0] != 0x0A) + return; + + { + std::lock_guard lock(Mutex); + + UuidKey UK = { uuid }; + + auto& Q = USTs[UK]; + if (Q.size() == 128) + Q.pop(); + + StackTrace ST; + backtrace_simple(State, 1, simple_callback, error_callback, &ST); + Q.push(stackTraceID(ST)); + } +} + +void bt_dump(const uuid_t *uuid) +{ + std::lock_guard lock(Mutex); + + UuidKey UK = { uuid }; + + auto It = USTs.find(UK); + if (It == USTs.end()) + return; + + std::queue Q = It->second; + std::ostringstream OS; + + size_t Idx = 0; + while (!Q.empty()) + { + OS << "Stack trace " << ++Idx << "/" << It->second.size() << ":\n"; + const StackTrace& ST = lookupStackTrace(Q.front()); + ST.dump(OS); + Q.pop(); + } + + std::ofstream OF{bt_path}; + if (OF.is_open()) + { + OF << OS.str(); + OF.close(); + } +} diff --git a/src/libnetdata/bt/bt.h b/src/libnetdata/bt/bt.h new file mode 100644 index 0000000000..dfc6c91434 --- /dev/null +++ b/src/libnetdata/bt/bt.h @@ -0,0 +1,20 @@ +#ifndef LIBNETDATA_BT +#define LIBNETDATA_BT + +#include "../libnetdata.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void bt_init(const char *exepath, const char *cache_dir); +void bt_collect(const uuid_t *uuid); +void bt_dump(const uuid_t *uuid); + +extern const char *bt_path; + +#ifdef __cplusplus +} +#endif + +#endif /* LIBNETDATA_BT */ diff --git a/src/libnetdata/libnetdata.h b/src/libnetdata/libnetdata.h index 4bade9b5bb..9f806f0ef4 100644 --- a/src/libnetdata/libnetdata.h +++ b/src/libnetdata/libnetdata.h @@ -753,6 +753,7 @@ extern char *netdata_configured_host_prefix; #include "facets/facets.h" #include "functions_evloop/functions_evloop.h" #include "query_progress/progress.h" +#include "bt/bt.h" // BEWARE: this exists in alarm-notify.sh #define DEFAULT_CLOUD_BASE_URL "https://app.netdata.cloud" -- cgit v1.2.3