summaryrefslogtreecommitdiffstats
path: root/sample_files/huge_cpp_1.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'sample_files/huge_cpp_1.cpp')
-rw-r--r--sample_files/huge_cpp_1.cpp587763
1 files changed, 587763 insertions, 0 deletions
diff --git a/sample_files/huge_cpp_1.cpp b/sample_files/huge_cpp_1.cpp
new file mode 100644
index 000000000..6e00908ca
--- /dev/null
+++ b/sample_files/huge_cpp_1.cpp
@@ -0,0 +1,587763 @@
+/*
+ * Copyright (c) 2020-2021 Xuhpclab. All rights reserved.
+ * Licensed under the MIT License.
+ * See LICENSE file for more information.
+ */
+//3159 module path -> file path
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <cinttypes>
+#include <vector>
+
+#include "libelf.h"
+
+#include "dr_api.h"
+#include "drmgr.h"
+#include "drreg.h"
+#include "drsyms.h"
+#include "drutil.h"
+#include "drwrap.h"
+#include "hashtable.h"
+
+#include "drcctlib.h"
+#include "drcctlib_ext.h"
+#include "drcctlib_priv_share.h"
+#include "splay_tree.h"
+#include "shadow_memory.h"
+#include "memory_cache.h"
+
+#define DRCCTLIB_PRINTF(_FORMAT, _ARGS...) DRCCTLIB_PRINTF_TEMPLATE("fwk", _FORMAT, ##_ARGS)
+#define DRCCTLIB_EXIT_PROCESS(_FORMAT, _ARGS...) \
+ DRCCTLIB_CLIENT_EXIT_PROCESS_TEMPLATE("fwk", _FORMAT, ##_ARGS)
+
+#ifdef ARM32_CCTLIB
+# define DR_DISASM_DRCCTLIB DR_DISASM_ARM
+#elif defined(ARM64_CCTLIB)
+# define DR_DISASM_DRCCTLIB DR_DISASM_DR
+#else
+# define DR_DISASM_DRCCTLIB DR_DISASM_INTEL
+#endif
+
+#ifdef ARM_CCTLIB
+# define DR_STACK_REG DR_REG_SP
+#else
+# define DR_STACK_REG DR_REG_RSP
+#endif
+
+#ifdef x86_CCTLIB
+# define OPND_CREATE_SLOT OPND_CREATE_INT32
+# define OPND_CREATE_STATE OPND_CREATE_INT32
+# define OPND_CREATE_SHADOWPRT OPND_CREATE_INTPTR
+#elif defined(ARM_CCTLIB)
+# define OPND_CREATE_SLOT OPND_CREATE_INT
+# define OPND_CREATE_STATE OPND_CREATE_INT
+# define OPND_CREATE_SHADOWPRT OPND_CREATE_INT
+#endif
+#define OPND_CREATE_PT_CUR_SLOT OPND_CREATE_MEM32
+#define OPND_CREATE_PT_CUR_STATE OPND_CREATE_MEM32
+
+#ifdef CCTLIB_64
+# define OPND_CREATE_CTXT_HNDL_MEM OPND_CREATE_MEM64
+#else
+# define OPND_CREATE_CTXT_HNDL_MEM OPND_CREATE_MEM32
+#endif
+
+#ifdef ARM_CCTLIB
+# define OPND_CREATE_IMMEDIATE_INT OPND_CREATE_INT
+#else
+# ifdef CCTLIB_64
+# define OPND_CREATE_IMMEDIATE_INT OPND_CREATE_INT64
+# else
+# define OPND_CREATE_IMMEDIATE_INT OPND_CREATE_INT32
+# endif
+#endif
+
+// mem_cache and tls_mem_cache config (bb_node_cache && splay_node_cache)
+#ifdef FOR_SPEC_TEST
+# define MEM_CACHE_PAGE1_BIT 11 // 8KB max cost 56GB
+# define MEM_CACHE_PAGE2_BIT 20 // 28MB
+#else
+# define MEM_CACHE_PAGE1_BIT 4 // 128B max cost 447MB
+# define MEM_CACHE_PAGE2_BIT 20 // 28MB
+#endif
+#define TLS_MEM_CACHE_MIN_NUM 8192 // 2^13
+#define MEM_CACHE_DEBRIS_SIZE 1024 // 2^10
+
+// THREAD_SHARED_MEMORY(TSM) (bb_shadow_t)
+#define TSM_CACHE_PAGE1_BIT 4 // max support 1,048,576
+#define TSM_CACHE_PAGE2_BIT 16 // 65536
+
+// cache global 100KB per thread (pt->bb_cache && pt->inner_mem_ref_cache)
+#define BB_CACHE_MESSAGE_MAX_NUM 256 // 2^8 * 16B = 4KB
+#define INNER_MEM_REF_CACHE_MAX 4096 // 2^12 * 24B = 96KB
+
+#define INVALID_CTXT_HNDL 0
+#define THREAD_ROOT_SHARDED_CALLER_CONTEXT_HANDLE 1
+#define VALID_START_CTXT_HNDL 2
+
+#define THREAD_ROOT_BB_SHARED_BB_KEY 0
+
+#define STRING_POOL_NODES_MAX 7483647L
+// #define STRING_POOL_NODES_MAX 2147483647L // 1^31 - 1
+
+#define ATOMIC_ADD_CTXT_HNDL(origin, val) dr_atomic_add32_return_sum(&origin, val)
+#define ATOMIC_ADD_THREAD_ID_MAX(origin) dr_atomic_add32_return_sum(&origin, 1)
+
+typedef struct _bb_shadow_t {
+ bb_key_t key;
+ slot_t slot_num;
+ state_t end_ins_state;
+ int32_t mem_ref_num;
+ app_pc *ip_shadow;
+ state_t *state_shadow;
+ char *disasm_shadow;
+#ifdef IN_PROCESS_SPEEDUP
+ cct_bb_node_t **last_same_key_bb_pt_list;
+#endif
+} bb_shadow_t;
+
+typedef struct _client_cb_t {
+ void (*func_instr_analysis)(void *, instr_instrument_msg_t *);
+ void (*func_insert_bb_start)(void *, int32_t, int32_t);
+ void (*func_insert_bb_end)(void *, context_handle_t, int32_t, int32_t,
+ mem_ref_msg_t *, void **);
+} client_cb_t;
+
+typedef struct _bb_instrument_msg_t {
+ slot_t slot_max;
+ bb_key_t bb_key;
+ state_t bb_end_state;
+ int32_t mem_ref_num;
+ bb_shadow_t *bb_shadow;
+} bb_instrument_msg_t;
+
+#ifdef CCTLIB_64
+# define thread_aligned_num_t int64_t
+typedef struct _bb_cache_message_t {
+ thread_aligned_num_t index;
+ bb_shadow_t *bb_shadow;
+} bb_cache_message_t;
+#endif
+
+#ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO
+typedef struct _per_thread_cct_info_t {
+ uint64_t call_num;
+ uint64_t return_num;
+ uint64_t tree_high;
+ uint64_t cur_tree_high;
+ uint64_t ins_num;
+ uint64_t bb_node_num;
+ uint64_t real_node_num;
+ uint64_t mem_ref_num;
+ uint64_t splay_tree_search_num;
+ uint64_t cct_create_clean_call_num;
+} per_thread_cct_info_t;
+
+typedef struct _cct_info_t {
+ uint64_t ins_num;
+ uint64_t bb_node_num;
+ uint64_t real_node_num;
+ uint64_t mem_ref_num;
+ uint64_t splay_tree_search_num;
+ uint64_t cct_create_clean_call_num;
+} cct_info_t;
+#endif
+
+// TLS(thread local storage)
+typedef struct _per_thread_t {
+ int id;
+ // for root
+ cct_bb_node_t *root_bb_node;
+ // for current handle
+ cct_bb_node_t *cur_bb_node;
+
+ void *cur_buf1;
+ tls_memory_cache_t<cct_bb_node_t> *bb_node_cache;
+ tls_memory_cache_t<splay_node_t> *splay_node_cache;
+ splay_node_t *next_splay_node;
+ splay_node_t *dummy_splay_node;
+
+ aligned_ctxt_hndl_t cur_bb_child_ctxt_start_idx;
+ state_t pre_instr_state;
+ slot_t cur_slot;
+ state_t cur_state;
+
+ // Signal
+ cct_bb_node_t *signal_raise_bb_node;
+ slot_t signal_raise_slot;
+ state_t signal_raise_state;
+
+ // DO_DATA_CENTRIC
+ void *stack_base;
+ void *stack_size;
+ bool init_stack_cache;
+ bool stack_unlimited;
+
+ size_t dmem_alloc_size;
+ context_handle_t dmem_alloc_ctxt_hndl;
+
+#ifdef CCTLIB_64
+ // For cache control
+ void *cur_buf2;
+ bb_cache_message_t *bb_cache;
+ // For mem access cache control
+ void *cur_buf3;
+ mem_ref_msg_t *inner_mem_ref_cache;
+ // For cache run
+ bb_shadow_t *pre_bb_shadow;
+ void *bb_call_back_cache_data;
+#endif
+#ifdef IN_PROCESS_SPEEDUP
+ int speedup_cache_index;
+#endif
+ IF_DRCCTLIB_DEBUG(file_t log_file_bb;)
+ IF_DRCCTLIB_DEBUG(file_t log_file_instr;)
+#ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO
+ per_thread_cct_info_t cct_info;
+#endif
+ std::vector<datacentric_node_t> *thread_dynamic_datacentric_nodes;
+} per_thread_t;
+
+#ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO
+void *global_cct_info_lock;
+static cct_info_t global_cct_info;
+#endif
+
+static per_thread_t **global_pt_cache_buff;
+static int global_thread_id_max = 0;
+
+static int init_count = 0;
+
+enum {
+ INSTRACE_TLS_OFFS_BUF_PTR,
+ INSTRACE_TLS_COUNT, /* total number of TLS slots allocated */
+};
+
+static reg_id_t tls_seg1;
+static uint tls_offs1;
+static reg_id_t tls_seg2;
+static uint tls_offs2;
+static reg_id_t tls_seg3;
+static uint tls_offs3;
+#define TLS_SLOT(tls_base, tls_offs, enum_val) \
+ (void **)((byte *)(tls_base) + (tls_offs) + (enum_val))
+#define BUF_PTR1(tls_base, enum_val) \
+ *(aligned_ctxt_hndl_t **)TLS_SLOT(tls_base, tls_offs1, enum_val)
+#define BUF_PTR2(tls_base, enum_val) \
+ *(bb_cache_message_t **)TLS_SLOT(tls_base, tls_offs2, enum_val)
+#define BUF_PTR3(tls_base, enum_val) \
+ *(mem_ref_msg_t **)TLS_SLOT(tls_base, tls_offs3, enum_val)
+#define MINSERT instrlist_meta_preinsert
+
+static int tls_idx;
+static file_t global_log_file;
+
+static client_cb_t global_client_cb;
+
+// static file_t global_debug_file;
+static char global_flags = DRCCTLIB_DEFAULT;
+
+static bool (*global_instr_filter)(instr_t *) = DRCCTLIB_FILTER_ZERO_INSTR;
+
+static cct_ip_node_t *global_ip_node_buff;
+static context_handle_t global_ip_node_buff_idle_idx = VALID_START_CTXT_HNDL;
+
+#define BB_TABLE_HASH_BITS 10
+static hashtable_t global_bb_key_table;
+
+#ifdef DRCCTLIB_SUPPORT_ATTACH_DETACH
+static void *thread_sync_lock;
+#endif
+static void *bb_shadow_lock;
+static void *bb_node_cache_lock;
+static void *splay_node_cache_lock;
+static void *bb_shadow_cache_lock;
+static memory_cache_t<cct_bb_node_t> *global_bb_node_cache;
+static memory_cache_t<splay_node_t> *global_splay_node_cache;
+static thread_shared_memory_cache_t<bb_shadow_t> *global_bb_shadow_cache;
+
+static char *global_string_pool;
+static int global_string_pool_idle_idx = 0;
+static context_handle_t global_static_datacentric_node_idx = 0;
+#define ATOMIC_ADD_STRING_POOL_INDEX(origin, val) dr_atomic_add32_return_sum(&origin, val)
+#define ATOMIC_ADD_STATIC_DC_NODE_INDEX(origin, val) dr_atomic_add32_return_sum(&origin, val)
+
+static ConcurrentShadowMemory<data_handle_t> *global_shadow_memory;
+
+void *global_cct_info_lock;
+void *dynamic_datacentric_nodes_lock;
+static std::vector<datacentric_node_t> *dynamic_datacentric_nodes;
+static std::vector<datacentric_node_t> *static_datacentric_nodes;
+
+// ctxt to ipnode
+static inline context_handle_t
+ip_node_to_ctxt_hndl(cct_ip_node_t *ip)
+{
+ return (context_handle_t)(ip - global_ip_node_buff);
+}
+// ipnode to ctxt
+static inline cct_ip_node_t *
+ctxt_hndl_to_ip_node(context_handle_t ctxt_hndl)
+{
+ return global_ip_node_buff + ctxt_hndl;
+}
+
+static inline bool
+ctxt_hndl_is_valid(context_handle_t ctxt_hndl)
+{
+ return ctxt_hndl >= THREAD_ROOT_SHARDED_CALLER_CONTEXT_HANDLE &&
+ ctxt_hndl < global_ip_node_buff_idle_idx;
+}
+
+static inline bool
+ip_node_is_valid(cct_ip_node_t *ip)
+{
+ context_handle_t ctxt_hndl = ip_node_to_ctxt_hndl(ip);
+ return ctxt_hndl_is_valid(ctxt_hndl);
+}
+
+static inline cct_bb_node_t *
+ip_node_parent_bb_node(cct_ip_node_t *ip)
+{
+#ifdef IPNODE_STORE_BNODE_IDX
+ return global_bb_node_cache->get_object_by_index(ip->parent_bb_node_cache_index);
+#else
+ return ip->parent_bb_node;
+#endif
+}
+
+static inline cct_bb_node_t *
+ctxt_hndl_parent_bb_node(context_handle_t ctxt_hndl)
+{
+ return ip_node_parent_bb_node(ctxt_hndl_to_ip_node(ctxt_hndl));
+}
+
+static inline void
+bb_node_init_cache_index(cct_bb_node_t *bb_node, int32_t index)
+{
+#ifdef IPNODE_STORE_BNODE_IDX
+ bb_node->cache_index = index;
+#endif
+}
+
+static inline context_handle_t
+bb_node_end_ctxt(cct_bb_node_t *bb_node)
+{
+ return bb_node->child_ctxt_start_idx + bb_node->max_slots - 1;
+}
+
+static inline cct_ip_node_t *
+bb_node_end_ip(cct_bb_node_t *bb_node)
+{
+ return ctxt_hndl_to_ip_node(bb_node_end_ctxt(bb_node));
+}
+
+static inline cct_bb_node_t *
+bb_node_parent_bb(cct_bb_node_t *bb_node)
+{
+ return bb_node->parent_bb;
+}
+
+static inline context_handle_t
+bb_node_caller_ctxt_hndl(cct_bb_node_t *bb_node)
+{
+ cct_bb_node_t *parent_bb = bb_node_parent_bb(bb_node);
+ if (parent_bb == NULL) {
+ return THREAD_ROOT_SHARDED_CALLER_CONTEXT_HANDLE;
+ }
+ return bb_node_end_ctxt(parent_bb);
+}
+
+static inline context_handle_t
+cur_child_ctxt_start_idx(slot_t num)
+{
+ context_handle_t next_start_idx =
+ ATOMIC_ADD_CTXT_HNDL(global_ip_node_buff_idle_idx, num);
+ if (next_start_idx >= CONTEXT_HANDLE_MAX) {
+ DRCCTLIB_EXIT_PROCESS("Preallocated IPNodes exhausted. CCTLib couldn't fit your "
+ "application in its memory. Try a smaller program.");
+ }
+
+ return next_start_idx - num;
+}
+
+#ifdef ARM_CCTLIB
+
+static bool
+instr_is_ldstex(instr_t *instr)
+{
+ if (instr_get_opcode(instr) == OP_ldstex) {
+ return true;
+ }
+ return false;
+}
+#endif
+
+// instr state flag
+static inline bool
+instr_state_contain(state_t instr_state_flag, state_t state)
+{
+ return (instr_state_flag & state) > 0;
+}
+
+static inline bool
+instr_need_instrument_check_flag(state_t instr_state_flag)
+{
+ return instr_state_contain(instr_state_flag, INSTR_STATE_CLIENT_INTEREST) ||
+ instr_state_contain(instr_state_flag, INSTR_STATE_CALL_DIRECT) ||
+ instr_state_contain(instr_state_flag, INSTR_STATE_CALL_IN_DIRECT) ||
+ instr_state_contain(instr_state_flag, INSTR_STATE_RETURN);
+}
+
+static inline bool
+instr_need_instrument(instr_t *instr)
+{
+ if (instr_is_call_direct(instr) || instr_is_call_indirect(instr) ||
+ instr_is_return(instr)) {
+ return true;
+ }
+ if (global_instr_filter(instr)) {
+ return true;
+ }
+ return false;
+}
+
+static inline state_t
+instr_get_state(instr_t *instr)
+{
+ state_t flag = 0;
+ if (global_instr_filter(instr)) {
+ flag = flag | INSTR_STATE_CLIENT_INTEREST;
+ }
+ if (instr_reads_memory(instr) || instr_writes_memory(instr)) {
+ flag = flag | INSTR_STATE_MEM_ACCESS;
+ }
+ if (instr_is_call_direct(instr)) {
+ flag = flag | INSTR_STATE_CALL_DIRECT;
+ } else if (instr_is_call_indirect(instr)) {
+ flag = flag | INSTR_STATE_CALL_IN_DIRECT;
+ } else if (instr_is_return(instr)) {
+ flag = flag | INSTR_STATE_RETURN;
+ }
+ return flag;
+}
+
+static inline void
+bb_init_shadow_config(instrlist_t *bb, slot_t *interest_ins_num, state_t *end_state,
+ int32_t *mem_ref_num)
+{
+#ifdef ARM32_CCTLIB
+ instr_t *bb_first = instr_get_next_app(instrlist_first_app(bb));
+#else
+ instr_t *bb_first = instrlist_first_app(bb);
+#endif
+
+#ifdef ARM_CCTLIB
+ if (instr_is_exclusive_store(bb_first)) {
+ return;
+ }
+ bool skip = false;
+#endif
+ for (instr_t *instr = bb_first; instr != NULL; instr = instr_get_next_app(instr)) {
+#ifdef ARM_CCTLIB
+ if (!skip && (instr_is_exclusive_load(instr) || instr_is_ldstex(instr))) {
+ skip = true;
+ }
+ if (!skip) {
+#endif
+ state_t state = instr_get_state(instr);
+ if (instr_need_instrument_check_flag(state)) {
+ *end_state = state;
+ (*interest_ins_num)++;
+ }
+ if (instr_state_contain(state, INSTR_STATE_CLIENT_INTEREST)) {
+ for (int i = 0; i < instr_num_srcs(instr); i++) {
+ if (opnd_is_memory_reference(instr_get_src(instr, i))) {
+ (*mem_ref_num)++;
+ }
+ }
+ for (int i = 0; i < instr_num_dsts(instr); i++) {
+ if (opnd_is_memory_reference(instr_get_dst(instr, i))) {
+ (*mem_ref_num)++;
+ }
+ }
+ }
+#ifdef ARM_CCTLIB
+ }
+ if (skip && (instr_is_exclusive_store(instr) || instr_is_ldstex(instr))) {
+ skip = false;
+ }
+#endif
+ }
+ return;
+}
+
+static inline void
+bb_shadow_create(bb_shadow_t *bb_shadow, int32_t index)
+{
+ bb_shadow->key = index;
+ bb_shadow->ip_shadow = NULL;
+ bb_shadow->state_shadow = NULL;
+ bb_shadow->disasm_shadow = NULL;
+#ifdef IN_PROCESS_SPEEDUP
+ bb_shadow->last_same_key_bb_pt_list = NULL;
+#endif
+}
+
+static inline void
+bb_shadow_init_config(bb_shadow_t *bb_shadow, slot_t slot_num, state_t end_ins_state,
+ int32_t mem_ref_num)
+{
+ bb_shadow->slot_num = slot_num;
+ bb_shadow->end_ins_state = end_ins_state;
+ bb_shadow->mem_ref_num = mem_ref_num;
+}
+
+static inline void
+bb_shadow_create_cache(bb_shadow_t *bb_shadow)
+{
+ if (bb_shadow->slot_num <= 0) {
+ return;
+ }
+ bb_shadow->ip_shadow = (app_pc *)dr_raw_mem_alloc(
+ bb_shadow->slot_num * sizeof(app_pc), DR_MEMPROT_READ | DR_MEMPROT_WRITE, NULL);
+ bb_shadow->state_shadow = (state_t *)dr_raw_mem_alloc(
+ bb_shadow->slot_num * sizeof(state_t), DR_MEMPROT_READ | DR_MEMPROT_WRITE, NULL);
+ bb_shadow->disasm_shadow =
+ (char *)dr_raw_mem_alloc(bb_shadow->slot_num * DISASM_CACHE_SIZE * sizeof(char),
+ DR_MEMPROT_READ | DR_MEMPROT_WRITE, NULL);
+#ifdef IN_PROCESS_SPEEDUP
+ bb_shadow->last_same_key_bb_pt_list = (cct_bb_node_t **)dr_raw_mem_alloc(
+ SPEEDUP_SUPPORT_THREAD_MAX_NUM * sizeof(cct_bb_node_t *),
+ DR_MEMPROT_READ | DR_MEMPROT_WRITE, NULL);
+#endif
+}
+
+static inline void
+bb_shadow_free_cache(bb_shadow_t *bb_shadow)
+{
+ if (bb_shadow->slot_num <= 0 || bb_shadow->ip_shadow == NULL) {
+ return;
+ }
+ dr_raw_mem_free(bb_shadow->ip_shadow, bb_shadow->slot_num * sizeof(app_pc));
+ dr_raw_mem_free(bb_shadow->state_shadow, bb_shadow->slot_num * sizeof(state_t));
+ dr_raw_mem_free(bb_shadow->disasm_shadow,
+ bb_shadow->slot_num * DISASM_CACHE_SIZE * sizeof(char));
+#ifdef IN_PROCESS_SPEEDUP
+ dr_raw_mem_free(bb_shadow->last_same_key_bb_pt_list,
+ SPEEDUP_SUPPORT_THREAD_MAX_NUM * sizeof(cct_bb_node_t *));
+#endif
+}
+
+static inline cct_bb_node_t *
+bb_node_create(tls_memory_cache_t<cct_bb_node_t> *tls_cache, bb_key_t key,
+ cct_bb_node_t *parent_bb, slot_t num)
+{
+ cct_bb_node_t *new_node = tls_cache->get_next_object();
+ new_node->parent_bb = parent_bb;
+ new_node->key = key;
+ new_node->child_ctxt_start_idx = cur_child_ctxt_start_idx(num);
+ new_node->max_slots = num;
+ new_node->callee_splay_tree_root = NULL;
+#ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO
+ new_node->callee_tree_size = 0;
+#endif
+ cct_ip_node_t *children = ctxt_hndl_to_ip_node(new_node->child_ctxt_start_idx);
+ for (slot_t i = 0; i < num; ++i) {
+#ifdef IPNODE_STORE_BNODE_IDX
+ children[i].parent_bb_node_cache_index = new_node->cache_index;
+#else
+ children[i].parent_bb_node = new_node;
+#endif
+ }
+ return new_node;
+}
+
+static inline void
+instr_instrument_client_cb(void *drcontext, instr_instrument_msg_t *instrument_msg)
+{
+ if (instr_state_contain(instrument_msg->state, INSTR_STATE_CLIENT_INTEREST) &&
+ global_client_cb.func_instr_analysis != NULL) {
+ (*global_client_cb.func_instr_analysis)(drcontext, instrument_msg);
+ }
+}
+
+static inline instr_instrument_msg_t *
+instr_instrument_msg_create(instrlist_t *bb, instr_t *instr, bool interest_start,
+ slot_t slot, state_t state)
+{
+ instr_instrument_msg_t *msg =
+ (instr_instrument_msg_t *)dr_global_alloc(sizeof(instr_instrument_msg_t));
+ msg->bb = bb;
+ msg->instr = instr;
+ msg->interest_start = interest_start;
+ msg->slot = slot;
+ msg->state = state;
+ msg->next = NULL;
+ return msg;
+}
+
+static inline void
+instr_instrument_msg_delete(instr_instrument_msg_t *msg)
+{
+ if (msg == NULL) {
+ return;
+ }
+ dr_global_free(msg, sizeof(instr_instrument_msg_t));
+}
+
+static inline bb_instrument_msg_t *
+bb_instrument_msg_create(bb_key_t bb_key, slot_t slot_max, state_t bb_end_state,
+ int32_t mem_ref_num, bb_shadow_t *bb_shadow)
+{
+ bb_instrument_msg_t *bb_msg =
+ (bb_instrument_msg_t *)dr_global_alloc(sizeof(bb_instrument_msg_t));
+ bb_msg->slot_max = slot_max;
+ bb_msg->bb_key = bb_key;
+ bb_msg->bb_end_state = bb_end_state;
+ bb_msg->mem_ref_num = mem_ref_num;
+ bb_msg->bb_shadow = bb_shadow;
+ return bb_msg;
+}
+
+static inline void
+bb_instrument_msg_delete(bb_instrument_msg_t *bb_msg)
+{
+ if (bb_msg == NULL) {
+ return;
+ }
+ dr_global_free(bb_msg, sizeof(bb_instrument_msg_t));
+}
+
+#ifdef CCTLIB_64
+static inline void
+per_thread_bb_end_cb(void *drcontext, context_handle_t bb_child_ctxt_start_idx,
+ slot_t slot_num, int32_t memory_ref_num,
+ mem_ref_msg_t *mem_ref_cache, void **bb_call_back_cache_data_ptr)
+{
+ if (global_client_cb.func_insert_bb_end != NULL) {
+ (*global_client_cb.func_insert_bb_end)(drcontext, bb_child_ctxt_start_idx,
+ slot_num, memory_ref_num, mem_ref_cache,
+ bb_call_back_cache_data_ptr);
+ }
+}
+
+static inline void
+per_thread_init_stack_cache(void *drcontext, per_thread_t *pt)
+{
+ if (pt->bb_cache[0].bb_shadow != NULL) {
+ if (!pt->init_stack_cache) {
+ dr_mcontext_t mcontext = {
+ sizeof(mcontext),
+ DR_MC_ALL,
+ };
+ dr_get_mcontext(drcontext, &mcontext);
+ pt->stack_base = (void *)(ptr_int_t)reg_get_value(DR_STACK_REG, &mcontext);
+ // DRCCTLIB_PRINTF("pt %d stack_base %p stack size %p stack_end %p", pt->id,
+ // pt->stack_base, (ptr_int_t)pt->stack_size,
+ // (ptr_int_t)pt->stack_base - (ptr_int_t)pt->stack_size);
+ pt->init_stack_cache = true;
+ }
+ pt->bb_cache[1].bb_shadow = pt->bb_cache[0].bb_shadow;
+ pt->bb_cache[0].bb_shadow = NULL;
+ }
+}
+
+static inline void
+per_thread_refresh_bb_cache(void *drcontext, per_thread_t *pt)
+{
+ if (pt->bb_cache[1].bb_shadow == NULL) {
+ return;
+ }
+ // read & write
+ cct_bb_node_t *cur_bb_node = pt->cur_bb_node;
+ splay_node_t *next_splay_node = pt->next_splay_node;
+ bb_shadow_t *pre_bb_shadow = pt->pre_bb_shadow;
+# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO
+ per_thread_cct_info_t temp_cct_info = pt->cct_info;
+ int64_t splay_tree_search_num = 0;
+# endif
+
+ // read only
+ bb_cache_message_t *bb_cache = pt->bb_cache;
+ cct_bb_node_t *root_node = pt->root_bb_node;
+ void **bb_call_back_cache_data_ptr = &pt->bb_call_back_cache_data;
+ tls_memory_cache_t<cct_bb_node_t> *bb_node_cache = pt->bb_node_cache;
+ tls_memory_cache_t<splay_node_t> *splay_node_cache = pt->splay_node_cache;
+ splay_node_t *dummy_splay_node = pt->dummy_splay_node;
+# ifdef IN_PROCESS_SPEEDUP
+ int speedup_cache_index = pt->speedup_cache_index;
+# endif
+
+ for (thread_aligned_num_t i = 1; i < BB_CACHE_MESSAGE_MAX_NUM; i++) {
+ if (bb_cache[i].bb_shadow != NULL) {
+ per_thread_bb_end_cb(drcontext, cur_bb_node->child_ctxt_start_idx,
+ pre_bb_shadow->slot_num, 0, NULL,
+ bb_call_back_cache_data_ptr);
+
+ bb_shadow_t *cur_bb_shadow = bb_cache[i].bb_shadow;
+ cct_bb_node_t *new_caller_bb_node = NULL;
+ if (instr_state_contain(pre_bb_shadow->end_ins_state,
+ INSTR_STATE_THREAD_ROOT_VIRTUAL)) {
+ new_caller_bb_node = root_node;
+ } else if (instr_state_contain(pre_bb_shadow->end_ins_state,
+ INSTR_STATE_CALL_DIRECT) ||
+ instr_state_contain(pre_bb_shadow->end_ins_state,
+ INSTR_STATE_CALL_IN_DIRECT)) {
+ new_caller_bb_node = cur_bb_node;
+# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO
+ temp_cct_info.call_num++;
+ temp_cct_info.cur_tree_high++;
+# endif
+ } else if (instr_state_contain(pre_bb_shadow->end_ins_state,
+ INSTR_STATE_RETURN)) {
+ if (bb_node_parent_bb(cur_bb_node) == root_node) {
+ new_caller_bb_node = bb_node_parent_bb(cur_bb_node);
+ } else {
+ new_caller_bb_node =
+ bb_node_parent_bb(bb_node_parent_bb(cur_bb_node));
+# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO
+ temp_cct_info.cur_tree_high--;
+# endif
+ }
+# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO
+ temp_cct_info.return_num++;
+# endif
+ } else {
+ new_caller_bb_node = bb_node_parent_bb(cur_bb_node);
+ }
+
+# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO
+ temp_cct_info.ins_num += cur_bb_shadow->slot_num;
+ temp_cct_info.bb_node_num++;
+ if (temp_cct_info.tree_high < temp_cct_info.cur_tree_high) {
+ temp_cct_info.tree_high = temp_cct_info.cur_tree_high;
+ }
+# endif
+# ifdef IN_PROCESS_SPEEDUP
+ if (speedup_cache_index >= 0 &&
+ cur_bb_shadow->last_same_key_bb_pt_list[speedup_cache_index] != NULL) {
+ if (bb_node_parent_bb(
+ cur_bb_shadow->last_same_key_bb_pt_list[speedup_cache_index]) ==
+ new_caller_bb_node) {
+ cur_bb_node =
+ cur_bb_shadow->last_same_key_bb_pt_list[speedup_cache_index];
+# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO
+ splay_tree_search_num++;
+# endif
+ pre_bb_shadow = cur_bb_shadow;
+ bb_cache[i].bb_shadow = NULL;
+ continue;
+ }
+ }
+# endif
+# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO
+ splay_node_t *new_root = splay_tree_update_test(
+ new_caller_bb_node->callee_splay_tree_root,
+ (splay_node_key_t)cur_bb_shadow->key, dummy_splay_node, next_splay_node,
+ &splay_tree_search_num);
+# else
+ splay_node_t *new_root = splay_tree_update(
+ new_caller_bb_node->callee_splay_tree_root,
+ (splay_node_key_t)cur_bb_shadow->key, dummy_splay_node, next_splay_node);
+# endif
+ if (new_root->payload == NULL) {
+ new_root->payload =
+ (void *)bb_node_create(bb_node_cache, cur_bb_shadow->key,
+ new_caller_bb_node, cur_bb_shadow->slot_num);
+ next_splay_node = splay_node_cache->get_next_object();
+# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO
+ temp_cct_info.real_node_num++;
+ new_caller_bb_node->callee_tree_size++;
+# endif
+ }
+ new_caller_bb_node->callee_splay_tree_root = new_root;
+ cur_bb_node = (cct_bb_node_t *)(new_root->payload);
+# ifdef IN_PROCESS_SPEEDUP
+ if (speedup_cache_index >= 0) {
+ cur_bb_shadow->last_same_key_bb_pt_list[speedup_cache_index] =
+ cur_bb_node;
+ }
+# endif
+ pre_bb_shadow = cur_bb_shadow;
+ bb_cache[i].bb_shadow = NULL;
+ } else {
+ break;
+ }
+ }
+ pt->cur_bb_node = cur_bb_node;
+ pt->next_splay_node = next_splay_node;
+ pt->pre_bb_shadow = pre_bb_shadow;
+# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO
+ temp_cct_info.cct_create_clean_call_num++;
+ temp_cct_info.splay_tree_search_num += splay_tree_search_num;
+ pt->cct_info = temp_cct_info;
+# endif
+
+ pt->cur_bb_child_ctxt_start_idx = pt->cur_bb_node->child_ctxt_start_idx;
+ pt->pre_instr_state = pt->pre_bb_shadow->end_ins_state;
+ BUF_PTR2(pt->cur_buf2, INSTRACE_TLS_OFFS_BUF_PTR) = pt->bb_cache + 1;
+}
+
+static inline void
+per_thread_update_cct_tree()
+{
+ void *drcontext = dr_get_current_drcontext();
+ per_thread_t *pt = (per_thread_t *)drmgr_get_tls_field(drcontext, tls_idx);
+ per_thread_init_stack_cache(drcontext, pt);
+ per_thread_refresh_bb_cache(drcontext, pt);
+}
+
+static inline void
+per_thread_refresh_bb_cache_and_mem_ref_cache(void *drcontext, per_thread_t *pt)
+{
+ if (pt->bb_cache[1].bb_shadow == NULL) {
+ return;
+ }
+ // read & write
+ cct_bb_node_t *cur_bb_node = pt->cur_bb_node;
+ splay_node_t *next_splay_node = pt->next_splay_node;
+ bb_shadow_t *pre_bb_shadow = pt->pre_bb_shadow;
+# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO
+ per_thread_cct_info_t temp_cct_info = pt->cct_info;
+ int64_t splay_tree_search_num = 0;
+# endif
+
+ // read only
+ bb_cache_message_t *bb_cache = pt->bb_cache;
+ mem_ref_msg_t *inner_mem_ref_cache = pt->inner_mem_ref_cache;
+ cct_bb_node_t *root_node = pt->root_bb_node;
+ void **bb_call_back_cache_data_ptr = &pt->bb_call_back_cache_data;
+ tls_memory_cache_t<cct_bb_node_t> *bb_node_cache = pt->bb_node_cache;
+ tls_memory_cache_t<splay_node_t> *splay_node_cache = pt->splay_node_cache;
+ splay_node_t *dummy_splay_node = pt->dummy_splay_node;
+# ifdef IN_PROCESS_SPEEDUP
+ int speedup_cache_index = pt->speedup_cache_index;
+# endif
+
+ thread_aligned_num_t pre_bb_start_index = 0;
+
+ for (thread_aligned_num_t i = 1; i < BB_CACHE_MESSAGE_MAX_NUM; i++) {
+ if (bb_cache[i].bb_shadow != NULL) {
+ per_thread_bb_end_cb(drcontext, cur_bb_node->child_ctxt_start_idx,
+ pre_bb_shadow->slot_num, pre_bb_shadow->mem_ref_num,
+ inner_mem_ref_cache + pre_bb_start_index,
+ bb_call_back_cache_data_ptr);
+
+ pre_bb_start_index += pre_bb_shadow->mem_ref_num;
+ bb_shadow_t *cur_bb_shadow = bb_cache[i].bb_shadow;
+ cct_bb_node_t *new_caller_bb_node = NULL;
+ if (instr_state_contain(pre_bb_shadow->end_ins_state,
+ INSTR_STATE_THREAD_ROOT_VIRTUAL)) {
+ new_caller_bb_node = root_node;
+ } else if (instr_state_contain(pre_bb_shadow->end_ins_state,
+ INSTR_STATE_CALL_DIRECT) ||
+ instr_state_contain(pre_bb_shadow->end_ins_state,
+ INSTR_STATE_CALL_IN_DIRECT)) {
+ new_caller_bb_node = cur_bb_node;
+# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO
+ temp_cct_info.call_num++;
+ temp_cct_info.cur_tree_high++;
+# endif
+ } else if (instr_state_contain(pre_bb_shadow->end_ins_state,
+ INSTR_STATE_RETURN)) {
+ if (bb_node_parent_bb(cur_bb_node) == root_node) {
+ new_caller_bb_node = bb_node_parent_bb(cur_bb_node);
+ } else {
+ new_caller_bb_node =
+ bb_node_parent_bb(bb_node_parent_bb(cur_bb_node));
+# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO
+ temp_cct_info.cur_tree_high--;
+# endif
+ }
+# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO
+ temp_cct_info.return_num++;
+# endif
+ } else {
+ new_caller_bb_node = bb_node_parent_bb(cur_bb_node);
+ }
+
+# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO
+ temp_cct_info.ins_num += cur_bb_shadow->slot_num;
+ temp_cct_info.mem_ref_num += cur_bb_shadow->mem_ref_num;
+ temp_cct_info.bb_node_num++;
+ if (temp_cct_info.tree_high < temp_cct_info.cur_tree_high) {
+ temp_cct_info.tree_high = temp_cct_info.cur_tree_high;
+ }
+# endif
+# ifdef IN_PROCESS_SPEEDUP
+ if (speedup_cache_index >= 0 &&
+ cur_bb_shadow->last_same_key_bb_pt_list[speedup_cache_index] != NULL) {
+ if (bb_node_parent_bb(
+ cur_bb_shadow->last_same_key_bb_pt_list[speedup_cache_index]) ==
+ new_caller_bb_node) {
+ cur_bb_node =
+ cur_bb_shadow->last_same_key_bb_pt_list[speedup_cache_index];
+# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO
+ splay_tree_search_num++;
+# endif
+ pre_bb_shadow = cur_bb_shadow;
+ bb_cache[i].bb_shadow = NULL;
+ continue;
+ }
+ }
+# endif
+# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO
+ splay_node_t *new_root = splay_tree_update_test(
+ new_caller_bb_node->callee_splay_tree_root,
+ (splay_node_key_t)cur_bb_shadow->key, dummy_splay_node, next_splay_node,
+ &splay_tree_search_num);
+# else
+ splay_node_t *new_root = splay_tree_update(
+ new_caller_bb_node->callee_splay_tree_root,
+ (splay_node_key_t)cur_bb_shadow->key, dummy_splay_node, next_splay_node);
+# endif
+ if (new_root->payload == NULL) {
+ new_root->payload =
+ (void *)bb_node_create(bb_node_cache, cur_bb_shadow->key,
+ new_caller_bb_node, cur_bb_shadow->slot_num);
+ next_splay_node = splay_node_cache->get_next_object();
+# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO
+ temp_cct_info.real_node_num++;
+ new_caller_bb_node->callee_tree_size++;
+# endif
+ }
+ new_caller_bb_node->callee_splay_tree_root = new_root;
+ cur_bb_node = (cct_bb_node_t *)(new_root->payload);
+# ifdef IN_PROCESS_SPEEDUP
+ if (speedup_cache_index >= 0) {
+ cur_bb_shadow->last_same_key_bb_pt_list[speedup_cache_index] =
+ cur_bb_node;
+ }
+# endif
+ pre_bb_shadow = cur_bb_shadow;
+ bb_cache[i].bb_shadow = NULL;
+ } else {
+ break;
+ }
+ }
+ pt->cur_bb_node = cur_bb_node;
+ pt->next_splay_node = next_splay_node;
+ pt->pre_bb_shadow = pre_bb_shadow;
+# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO
+ temp_cct_info.cct_create_clean_call_num++;
+ temp_cct_info.splay_tree_search_num += splay_tree_search_num;
+ pt->cct_info = temp_cct_info;
+# endif
+
+ pt->cur_bb_child_ctxt_start_idx = cur_bb_node->child_ctxt_start_idx;
+ pt->pre_instr_state = pre_bb_shadow->end_ins_state;
+ BUF_PTR2(pt->cur_buf2, INSTRACE_TLS_OFFS_BUF_PTR) = bb_cache + 1;
+
+ thread_aligned_num_t pre_bb_end_index =
+ pre_bb_start_index + pre_bb_shadow->mem_ref_num;
+ thread_aligned_num_t max_index = INNER_MEM_REF_CACHE_MAX >= pre_bb_end_index
+ ? pre_bb_end_index
+ : INNER_MEM_REF_CACHE_MAX;
+ thread_aligned_num_t last_index = pre_bb_start_index;
+ for (; last_index < max_index; last_index++) {
+ if (inner_mem_ref_cache[last_index].addr != 0) {
+ inner_mem_ref_cache[last_index - pre_bb_start_index].slot =
+ inner_mem_ref_cache[last_index].slot;
+ inner_mem_ref_cache[last_index - pre_bb_start_index].addr =
+ inner_mem_ref_cache[last_index].addr;
+ inner_mem_ref_cache[last_index].addr = 0;
+ } else {
+ break;
+ }
+ }
+ BUF_PTR3(pt->cur_buf3, INSTRACE_TLS_OFFS_BUF_PTR) =
+ inner_mem_ref_cache + last_index - pre_bb_start_index;
+ for (thread_aligned_num_t index = last_index - pre_bb_start_index;
+ index < pre_bb_start_index; index++) {
+ inner_mem_ref_cache[index].addr = 0;
+ }
+}
+
+static inline void
+per_thread_update_cct_tree_memory_cache()
+{
+ void *drcontext = dr_get_current_drcontext();
+ per_thread_t *pt = (per_thread_t *)drmgr_get_tls_field(drcontext, tls_idx);
+ per_thread_init_stack_cache(drcontext, pt);
+ per_thread_refresh_bb_cache_and_mem_ref_cache(drcontext, pt);
+}
+
+static inline void
+refresh_per_thread_cct_tree(void *drcontext, per_thread_t *pt)
+{
+ if ((global_flags & DRCCTLIB_CACHE_MODE) == 0) {
+ return;
+ }
+ per_thread_init_stack_cache(drcontext, pt);
+ if ((global_flags & DRCCTLIB_CACHE_MEMEORY_ACCESS_ADDR) != 0) {
+ per_thread_refresh_bb_cache_and_mem_ref_cache(drcontext, pt);
+ } else {
+ per_thread_refresh_bb_cache(drcontext, pt);
+ }
+}
+
+static inline void
+per_thread_end_bb_cache_refresh(void *drcontext, per_thread_t *pt)</