diff options
Diffstat (limited to 'sample_files/huge_cpp_1.cpp')
-rw-r--r-- | sample_files/huge_cpp_1.cpp | 587763 |
1 files changed, 587763 insertions, 0 deletions
diff --git a/sample_files/huge_cpp_1.cpp b/sample_files/huge_cpp_1.cpp new file mode 100644 index 000000000..6e00908ca --- /dev/null +++ b/sample_files/huge_cpp_1.cpp @@ -0,0 +1,587763 @@ +/* + * Copyright (c) 2020-2021 Xuhpclab. All rights reserved. + * Licensed under the MIT License. + * See LICENSE file for more information. + */ +//3159 module path -> file path +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/types.h> +#include <signal.h> +#include <cinttypes> +#include <vector> + +#include "libelf.h" + +#include "dr_api.h" +#include "drmgr.h" +#include "drreg.h" +#include "drsyms.h" +#include "drutil.h" +#include "drwrap.h" +#include "hashtable.h" + +#include "drcctlib.h" +#include "drcctlib_ext.h" +#include "drcctlib_priv_share.h" +#include "splay_tree.h" +#include "shadow_memory.h" +#include "memory_cache.h" + +#define DRCCTLIB_PRINTF(_FORMAT, _ARGS...) DRCCTLIB_PRINTF_TEMPLATE("fwk", _FORMAT, ##_ARGS) +#define DRCCTLIB_EXIT_PROCESS(_FORMAT, _ARGS...) \ + DRCCTLIB_CLIENT_EXIT_PROCESS_TEMPLATE("fwk", _FORMAT, ##_ARGS) + +#ifdef ARM32_CCTLIB +# define DR_DISASM_DRCCTLIB DR_DISASM_ARM +#elif defined(ARM64_CCTLIB) +# define DR_DISASM_DRCCTLIB DR_DISASM_DR +#else +# define DR_DISASM_DRCCTLIB DR_DISASM_INTEL +#endif + +#ifdef ARM_CCTLIB +# define DR_STACK_REG DR_REG_SP +#else +# define DR_STACK_REG DR_REG_RSP +#endif + +#ifdef x86_CCTLIB +# define OPND_CREATE_SLOT OPND_CREATE_INT32 +# define OPND_CREATE_STATE OPND_CREATE_INT32 +# define OPND_CREATE_SHADOWPRT OPND_CREATE_INTPTR +#elif defined(ARM_CCTLIB) +# define OPND_CREATE_SLOT OPND_CREATE_INT +# define OPND_CREATE_STATE OPND_CREATE_INT +# define OPND_CREATE_SHADOWPRT OPND_CREATE_INT +#endif +#define OPND_CREATE_PT_CUR_SLOT OPND_CREATE_MEM32 +#define OPND_CREATE_PT_CUR_STATE OPND_CREATE_MEM32 + +#ifdef CCTLIB_64 +# define OPND_CREATE_CTXT_HNDL_MEM OPND_CREATE_MEM64 +#else +# define OPND_CREATE_CTXT_HNDL_MEM OPND_CREATE_MEM32 +#endif + +#ifdef ARM_CCTLIB +# define OPND_CREATE_IMMEDIATE_INT OPND_CREATE_INT +#else +# ifdef CCTLIB_64 +# define OPND_CREATE_IMMEDIATE_INT OPND_CREATE_INT64 +# else +# define OPND_CREATE_IMMEDIATE_INT OPND_CREATE_INT32 +# endif +#endif + +// mem_cache and tls_mem_cache config (bb_node_cache && splay_node_cache) +#ifdef FOR_SPEC_TEST +# define MEM_CACHE_PAGE1_BIT 11 // 8KB max cost 56GB +# define MEM_CACHE_PAGE2_BIT 20 // 28MB +#else +# define MEM_CACHE_PAGE1_BIT 4 // 128B max cost 447MB +# define MEM_CACHE_PAGE2_BIT 20 // 28MB +#endif +#define TLS_MEM_CACHE_MIN_NUM 8192 // 2^13 +#define MEM_CACHE_DEBRIS_SIZE 1024 // 2^10 + +// THREAD_SHARED_MEMORY(TSM) (bb_shadow_t) +#define TSM_CACHE_PAGE1_BIT 4 // max support 1,048,576 +#define TSM_CACHE_PAGE2_BIT 16 // 65536 + +// cache global 100KB per thread (pt->bb_cache && pt->inner_mem_ref_cache) +#define BB_CACHE_MESSAGE_MAX_NUM 256 // 2^8 * 16B = 4KB +#define INNER_MEM_REF_CACHE_MAX 4096 // 2^12 * 24B = 96KB + +#define INVALID_CTXT_HNDL 0 +#define THREAD_ROOT_SHARDED_CALLER_CONTEXT_HANDLE 1 +#define VALID_START_CTXT_HNDL 2 + +#define THREAD_ROOT_BB_SHARED_BB_KEY 0 + +#define STRING_POOL_NODES_MAX 7483647L +// #define STRING_POOL_NODES_MAX 2147483647L // 1^31 - 1 + +#define ATOMIC_ADD_CTXT_HNDL(origin, val) dr_atomic_add32_return_sum(&origin, val) +#define ATOMIC_ADD_THREAD_ID_MAX(origin) dr_atomic_add32_return_sum(&origin, 1) + +typedef struct _bb_shadow_t { + bb_key_t key; + slot_t slot_num; + state_t end_ins_state; + int32_t mem_ref_num; + app_pc *ip_shadow; + state_t *state_shadow; + char *disasm_shadow; +#ifdef IN_PROCESS_SPEEDUP + cct_bb_node_t **last_same_key_bb_pt_list; +#endif +} bb_shadow_t; + +typedef struct _client_cb_t { + void (*func_instr_analysis)(void *, instr_instrument_msg_t *); + void (*func_insert_bb_start)(void *, int32_t, int32_t); + void (*func_insert_bb_end)(void *, context_handle_t, int32_t, int32_t, + mem_ref_msg_t *, void **); +} client_cb_t; + +typedef struct _bb_instrument_msg_t { + slot_t slot_max; + bb_key_t bb_key; + state_t bb_end_state; + int32_t mem_ref_num; + bb_shadow_t *bb_shadow; +} bb_instrument_msg_t; + +#ifdef CCTLIB_64 +# define thread_aligned_num_t int64_t +typedef struct _bb_cache_message_t { + thread_aligned_num_t index; + bb_shadow_t *bb_shadow; +} bb_cache_message_t; +#endif + +#ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO +typedef struct _per_thread_cct_info_t { + uint64_t call_num; + uint64_t return_num; + uint64_t tree_high; + uint64_t cur_tree_high; + uint64_t ins_num; + uint64_t bb_node_num; + uint64_t real_node_num; + uint64_t mem_ref_num; + uint64_t splay_tree_search_num; + uint64_t cct_create_clean_call_num; +} per_thread_cct_info_t; + +typedef struct _cct_info_t { + uint64_t ins_num; + uint64_t bb_node_num; + uint64_t real_node_num; + uint64_t mem_ref_num; + uint64_t splay_tree_search_num; + uint64_t cct_create_clean_call_num; +} cct_info_t; +#endif + +// TLS(thread local storage) +typedef struct _per_thread_t { + int id; + // for root + cct_bb_node_t *root_bb_node; + // for current handle + cct_bb_node_t *cur_bb_node; + + void *cur_buf1; + tls_memory_cache_t<cct_bb_node_t> *bb_node_cache; + tls_memory_cache_t<splay_node_t> *splay_node_cache; + splay_node_t *next_splay_node; + splay_node_t *dummy_splay_node; + + aligned_ctxt_hndl_t cur_bb_child_ctxt_start_idx; + state_t pre_instr_state; + slot_t cur_slot; + state_t cur_state; + + // Signal + cct_bb_node_t *signal_raise_bb_node; + slot_t signal_raise_slot; + state_t signal_raise_state; + + // DO_DATA_CENTRIC + void *stack_base; + void *stack_size; + bool init_stack_cache; + bool stack_unlimited; + + size_t dmem_alloc_size; + context_handle_t dmem_alloc_ctxt_hndl; + +#ifdef CCTLIB_64 + // For cache control + void *cur_buf2; + bb_cache_message_t *bb_cache; + // For mem access cache control + void *cur_buf3; + mem_ref_msg_t *inner_mem_ref_cache; + // For cache run + bb_shadow_t *pre_bb_shadow; + void *bb_call_back_cache_data; +#endif +#ifdef IN_PROCESS_SPEEDUP + int speedup_cache_index; +#endif + IF_DRCCTLIB_DEBUG(file_t log_file_bb;) + IF_DRCCTLIB_DEBUG(file_t log_file_instr;) +#ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO + per_thread_cct_info_t cct_info; +#endif + std::vector<datacentric_node_t> *thread_dynamic_datacentric_nodes; +} per_thread_t; + +#ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO +void *global_cct_info_lock; +static cct_info_t global_cct_info; +#endif + +static per_thread_t **global_pt_cache_buff; +static int global_thread_id_max = 0; + +static int init_count = 0; + +enum { + INSTRACE_TLS_OFFS_BUF_PTR, + INSTRACE_TLS_COUNT, /* total number of TLS slots allocated */ +}; + +static reg_id_t tls_seg1; +static uint tls_offs1; +static reg_id_t tls_seg2; +static uint tls_offs2; +static reg_id_t tls_seg3; +static uint tls_offs3; +#define TLS_SLOT(tls_base, tls_offs, enum_val) \ + (void **)((byte *)(tls_base) + (tls_offs) + (enum_val)) +#define BUF_PTR1(tls_base, enum_val) \ + *(aligned_ctxt_hndl_t **)TLS_SLOT(tls_base, tls_offs1, enum_val) +#define BUF_PTR2(tls_base, enum_val) \ + *(bb_cache_message_t **)TLS_SLOT(tls_base, tls_offs2, enum_val) +#define BUF_PTR3(tls_base, enum_val) \ + *(mem_ref_msg_t **)TLS_SLOT(tls_base, tls_offs3, enum_val) +#define MINSERT instrlist_meta_preinsert + +static int tls_idx; +static file_t global_log_file; + +static client_cb_t global_client_cb; + +// static file_t global_debug_file; +static char global_flags = DRCCTLIB_DEFAULT; + +static bool (*global_instr_filter)(instr_t *) = DRCCTLIB_FILTER_ZERO_INSTR; + +static cct_ip_node_t *global_ip_node_buff; +static context_handle_t global_ip_node_buff_idle_idx = VALID_START_CTXT_HNDL; + +#define BB_TABLE_HASH_BITS 10 +static hashtable_t global_bb_key_table; + +#ifdef DRCCTLIB_SUPPORT_ATTACH_DETACH +static void *thread_sync_lock; +#endif +static void *bb_shadow_lock; +static void *bb_node_cache_lock; +static void *splay_node_cache_lock; +static void *bb_shadow_cache_lock; +static memory_cache_t<cct_bb_node_t> *global_bb_node_cache; +static memory_cache_t<splay_node_t> *global_splay_node_cache; +static thread_shared_memory_cache_t<bb_shadow_t> *global_bb_shadow_cache; + +static char *global_string_pool; +static int global_string_pool_idle_idx = 0; +static context_handle_t global_static_datacentric_node_idx = 0; +#define ATOMIC_ADD_STRING_POOL_INDEX(origin, val) dr_atomic_add32_return_sum(&origin, val) +#define ATOMIC_ADD_STATIC_DC_NODE_INDEX(origin, val) dr_atomic_add32_return_sum(&origin, val) + +static ConcurrentShadowMemory<data_handle_t> *global_shadow_memory; + +void *global_cct_info_lock; +void *dynamic_datacentric_nodes_lock; +static std::vector<datacentric_node_t> *dynamic_datacentric_nodes; +static std::vector<datacentric_node_t> *static_datacentric_nodes; + +// ctxt to ipnode +static inline context_handle_t +ip_node_to_ctxt_hndl(cct_ip_node_t *ip) +{ + return (context_handle_t)(ip - global_ip_node_buff); +} +// ipnode to ctxt +static inline cct_ip_node_t * +ctxt_hndl_to_ip_node(context_handle_t ctxt_hndl) +{ + return global_ip_node_buff + ctxt_hndl; +} + +static inline bool +ctxt_hndl_is_valid(context_handle_t ctxt_hndl) +{ + return ctxt_hndl >= THREAD_ROOT_SHARDED_CALLER_CONTEXT_HANDLE && + ctxt_hndl < global_ip_node_buff_idle_idx; +} + +static inline bool +ip_node_is_valid(cct_ip_node_t *ip) +{ + context_handle_t ctxt_hndl = ip_node_to_ctxt_hndl(ip); + return ctxt_hndl_is_valid(ctxt_hndl); +} + +static inline cct_bb_node_t * +ip_node_parent_bb_node(cct_ip_node_t *ip) +{ +#ifdef IPNODE_STORE_BNODE_IDX + return global_bb_node_cache->get_object_by_index(ip->parent_bb_node_cache_index); +#else + return ip->parent_bb_node; +#endif +} + +static inline cct_bb_node_t * +ctxt_hndl_parent_bb_node(context_handle_t ctxt_hndl) +{ + return ip_node_parent_bb_node(ctxt_hndl_to_ip_node(ctxt_hndl)); +} + +static inline void +bb_node_init_cache_index(cct_bb_node_t *bb_node, int32_t index) +{ +#ifdef IPNODE_STORE_BNODE_IDX + bb_node->cache_index = index; +#endif +} + +static inline context_handle_t +bb_node_end_ctxt(cct_bb_node_t *bb_node) +{ + return bb_node->child_ctxt_start_idx + bb_node->max_slots - 1; +} + +static inline cct_ip_node_t * +bb_node_end_ip(cct_bb_node_t *bb_node) +{ + return ctxt_hndl_to_ip_node(bb_node_end_ctxt(bb_node)); +} + +static inline cct_bb_node_t * +bb_node_parent_bb(cct_bb_node_t *bb_node) +{ + return bb_node->parent_bb; +} + +static inline context_handle_t +bb_node_caller_ctxt_hndl(cct_bb_node_t *bb_node) +{ + cct_bb_node_t *parent_bb = bb_node_parent_bb(bb_node); + if (parent_bb == NULL) { + return THREAD_ROOT_SHARDED_CALLER_CONTEXT_HANDLE; + } + return bb_node_end_ctxt(parent_bb); +} + +static inline context_handle_t +cur_child_ctxt_start_idx(slot_t num) +{ + context_handle_t next_start_idx = + ATOMIC_ADD_CTXT_HNDL(global_ip_node_buff_idle_idx, num); + if (next_start_idx >= CONTEXT_HANDLE_MAX) { + DRCCTLIB_EXIT_PROCESS("Preallocated IPNodes exhausted. CCTLib couldn't fit your " + "application in its memory. Try a smaller program."); + } + + return next_start_idx - num; +} + +#ifdef ARM_CCTLIB + +static bool +instr_is_ldstex(instr_t *instr) +{ + if (instr_get_opcode(instr) == OP_ldstex) { + return true; + } + return false; +} +#endif + +// instr state flag +static inline bool +instr_state_contain(state_t instr_state_flag, state_t state) +{ + return (instr_state_flag & state) > 0; +} + +static inline bool +instr_need_instrument_check_flag(state_t instr_state_flag) +{ + return instr_state_contain(instr_state_flag, INSTR_STATE_CLIENT_INTEREST) || + instr_state_contain(instr_state_flag, INSTR_STATE_CALL_DIRECT) || + instr_state_contain(instr_state_flag, INSTR_STATE_CALL_IN_DIRECT) || + instr_state_contain(instr_state_flag, INSTR_STATE_RETURN); +} + +static inline bool +instr_need_instrument(instr_t *instr) +{ + if (instr_is_call_direct(instr) || instr_is_call_indirect(instr) || + instr_is_return(instr)) { + return true; + } + if (global_instr_filter(instr)) { + return true; + } + return false; +} + +static inline state_t +instr_get_state(instr_t *instr) +{ + state_t flag = 0; + if (global_instr_filter(instr)) { + flag = flag | INSTR_STATE_CLIENT_INTEREST; + } + if (instr_reads_memory(instr) || instr_writes_memory(instr)) { + flag = flag | INSTR_STATE_MEM_ACCESS; + } + if (instr_is_call_direct(instr)) { + flag = flag | INSTR_STATE_CALL_DIRECT; + } else if (instr_is_call_indirect(instr)) { + flag = flag | INSTR_STATE_CALL_IN_DIRECT; + } else if (instr_is_return(instr)) { + flag = flag | INSTR_STATE_RETURN; + } + return flag; +} + +static inline void +bb_init_shadow_config(instrlist_t *bb, slot_t *interest_ins_num, state_t *end_state, + int32_t *mem_ref_num) +{ +#ifdef ARM32_CCTLIB + instr_t *bb_first = instr_get_next_app(instrlist_first_app(bb)); +#else + instr_t *bb_first = instrlist_first_app(bb); +#endif + +#ifdef ARM_CCTLIB + if (instr_is_exclusive_store(bb_first)) { + return; + } + bool skip = false; +#endif + for (instr_t *instr = bb_first; instr != NULL; instr = instr_get_next_app(instr)) { +#ifdef ARM_CCTLIB + if (!skip && (instr_is_exclusive_load(instr) || instr_is_ldstex(instr))) { + skip = true; + } + if (!skip) { +#endif + state_t state = instr_get_state(instr); + if (instr_need_instrument_check_flag(state)) { + *end_state = state; + (*interest_ins_num)++; + } + if (instr_state_contain(state, INSTR_STATE_CLIENT_INTEREST)) { + for (int i = 0; i < instr_num_srcs(instr); i++) { + if (opnd_is_memory_reference(instr_get_src(instr, i))) { + (*mem_ref_num)++; + } + } + for (int i = 0; i < instr_num_dsts(instr); i++) { + if (opnd_is_memory_reference(instr_get_dst(instr, i))) { + (*mem_ref_num)++; + } + } + } +#ifdef ARM_CCTLIB + } + if (skip && (instr_is_exclusive_store(instr) || instr_is_ldstex(instr))) { + skip = false; + } +#endif + } + return; +} + +static inline void +bb_shadow_create(bb_shadow_t *bb_shadow, int32_t index) +{ + bb_shadow->key = index; + bb_shadow->ip_shadow = NULL; + bb_shadow->state_shadow = NULL; + bb_shadow->disasm_shadow = NULL; +#ifdef IN_PROCESS_SPEEDUP + bb_shadow->last_same_key_bb_pt_list = NULL; +#endif +} + +static inline void +bb_shadow_init_config(bb_shadow_t *bb_shadow, slot_t slot_num, state_t end_ins_state, + int32_t mem_ref_num) +{ + bb_shadow->slot_num = slot_num; + bb_shadow->end_ins_state = end_ins_state; + bb_shadow->mem_ref_num = mem_ref_num; +} + +static inline void +bb_shadow_create_cache(bb_shadow_t *bb_shadow) +{ + if (bb_shadow->slot_num <= 0) { + return; + } + bb_shadow->ip_shadow = (app_pc *)dr_raw_mem_alloc( + bb_shadow->slot_num * sizeof(app_pc), DR_MEMPROT_READ | DR_MEMPROT_WRITE, NULL); + bb_shadow->state_shadow = (state_t *)dr_raw_mem_alloc( + bb_shadow->slot_num * sizeof(state_t), DR_MEMPROT_READ | DR_MEMPROT_WRITE, NULL); + bb_shadow->disasm_shadow = + (char *)dr_raw_mem_alloc(bb_shadow->slot_num * DISASM_CACHE_SIZE * sizeof(char), + DR_MEMPROT_READ | DR_MEMPROT_WRITE, NULL); +#ifdef IN_PROCESS_SPEEDUP + bb_shadow->last_same_key_bb_pt_list = (cct_bb_node_t **)dr_raw_mem_alloc( + SPEEDUP_SUPPORT_THREAD_MAX_NUM * sizeof(cct_bb_node_t *), + DR_MEMPROT_READ | DR_MEMPROT_WRITE, NULL); +#endif +} + +static inline void +bb_shadow_free_cache(bb_shadow_t *bb_shadow) +{ + if (bb_shadow->slot_num <= 0 || bb_shadow->ip_shadow == NULL) { + return; + } + dr_raw_mem_free(bb_shadow->ip_shadow, bb_shadow->slot_num * sizeof(app_pc)); + dr_raw_mem_free(bb_shadow->state_shadow, bb_shadow->slot_num * sizeof(state_t)); + dr_raw_mem_free(bb_shadow->disasm_shadow, + bb_shadow->slot_num * DISASM_CACHE_SIZE * sizeof(char)); +#ifdef IN_PROCESS_SPEEDUP + dr_raw_mem_free(bb_shadow->last_same_key_bb_pt_list, + SPEEDUP_SUPPORT_THREAD_MAX_NUM * sizeof(cct_bb_node_t *)); +#endif +} + +static inline cct_bb_node_t * +bb_node_create(tls_memory_cache_t<cct_bb_node_t> *tls_cache, bb_key_t key, + cct_bb_node_t *parent_bb, slot_t num) +{ + cct_bb_node_t *new_node = tls_cache->get_next_object(); + new_node->parent_bb = parent_bb; + new_node->key = key; + new_node->child_ctxt_start_idx = cur_child_ctxt_start_idx(num); + new_node->max_slots = num; + new_node->callee_splay_tree_root = NULL; +#ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO + new_node->callee_tree_size = 0; +#endif + cct_ip_node_t *children = ctxt_hndl_to_ip_node(new_node->child_ctxt_start_idx); + for (slot_t i = 0; i < num; ++i) { +#ifdef IPNODE_STORE_BNODE_IDX + children[i].parent_bb_node_cache_index = new_node->cache_index; +#else + children[i].parent_bb_node = new_node; +#endif + } + return new_node; +} + +static inline void +instr_instrument_client_cb(void *drcontext, instr_instrument_msg_t *instrument_msg) +{ + if (instr_state_contain(instrument_msg->state, INSTR_STATE_CLIENT_INTEREST) && + global_client_cb.func_instr_analysis != NULL) { + (*global_client_cb.func_instr_analysis)(drcontext, instrument_msg); + } +} + +static inline instr_instrument_msg_t * +instr_instrument_msg_create(instrlist_t *bb, instr_t *instr, bool interest_start, + slot_t slot, state_t state) +{ + instr_instrument_msg_t *msg = + (instr_instrument_msg_t *)dr_global_alloc(sizeof(instr_instrument_msg_t)); + msg->bb = bb; + msg->instr = instr; + msg->interest_start = interest_start; + msg->slot = slot; + msg->state = state; + msg->next = NULL; + return msg; +} + +static inline void +instr_instrument_msg_delete(instr_instrument_msg_t *msg) +{ + if (msg == NULL) { + return; + } + dr_global_free(msg, sizeof(instr_instrument_msg_t)); +} + +static inline bb_instrument_msg_t * +bb_instrument_msg_create(bb_key_t bb_key, slot_t slot_max, state_t bb_end_state, + int32_t mem_ref_num, bb_shadow_t *bb_shadow) +{ + bb_instrument_msg_t *bb_msg = + (bb_instrument_msg_t *)dr_global_alloc(sizeof(bb_instrument_msg_t)); + bb_msg->slot_max = slot_max; + bb_msg->bb_key = bb_key; + bb_msg->bb_end_state = bb_end_state; + bb_msg->mem_ref_num = mem_ref_num; + bb_msg->bb_shadow = bb_shadow; + return bb_msg; +} + +static inline void +bb_instrument_msg_delete(bb_instrument_msg_t *bb_msg) +{ + if (bb_msg == NULL) { + return; + } + dr_global_free(bb_msg, sizeof(bb_instrument_msg_t)); +} + +#ifdef CCTLIB_64 +static inline void +per_thread_bb_end_cb(void *drcontext, context_handle_t bb_child_ctxt_start_idx, + slot_t slot_num, int32_t memory_ref_num, + mem_ref_msg_t *mem_ref_cache, void **bb_call_back_cache_data_ptr) +{ + if (global_client_cb.func_insert_bb_end != NULL) { + (*global_client_cb.func_insert_bb_end)(drcontext, bb_child_ctxt_start_idx, + slot_num, memory_ref_num, mem_ref_cache, + bb_call_back_cache_data_ptr); + } +} + +static inline void +per_thread_init_stack_cache(void *drcontext, per_thread_t *pt) +{ + if (pt->bb_cache[0].bb_shadow != NULL) { + if (!pt->init_stack_cache) { + dr_mcontext_t mcontext = { + sizeof(mcontext), + DR_MC_ALL, + }; + dr_get_mcontext(drcontext, &mcontext); + pt->stack_base = (void *)(ptr_int_t)reg_get_value(DR_STACK_REG, &mcontext); + // DRCCTLIB_PRINTF("pt %d stack_base %p stack size %p stack_end %p", pt->id, + // pt->stack_base, (ptr_int_t)pt->stack_size, + // (ptr_int_t)pt->stack_base - (ptr_int_t)pt->stack_size); + pt->init_stack_cache = true; + } + pt->bb_cache[1].bb_shadow = pt->bb_cache[0].bb_shadow; + pt->bb_cache[0].bb_shadow = NULL; + } +} + +static inline void +per_thread_refresh_bb_cache(void *drcontext, per_thread_t *pt) +{ + if (pt->bb_cache[1].bb_shadow == NULL) { + return; + } + // read & write + cct_bb_node_t *cur_bb_node = pt->cur_bb_node; + splay_node_t *next_splay_node = pt->next_splay_node; + bb_shadow_t *pre_bb_shadow = pt->pre_bb_shadow; +# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO + per_thread_cct_info_t temp_cct_info = pt->cct_info; + int64_t splay_tree_search_num = 0; +# endif + + // read only + bb_cache_message_t *bb_cache = pt->bb_cache; + cct_bb_node_t *root_node = pt->root_bb_node; + void **bb_call_back_cache_data_ptr = &pt->bb_call_back_cache_data; + tls_memory_cache_t<cct_bb_node_t> *bb_node_cache = pt->bb_node_cache; + tls_memory_cache_t<splay_node_t> *splay_node_cache = pt->splay_node_cache; + splay_node_t *dummy_splay_node = pt->dummy_splay_node; +# ifdef IN_PROCESS_SPEEDUP + int speedup_cache_index = pt->speedup_cache_index; +# endif + + for (thread_aligned_num_t i = 1; i < BB_CACHE_MESSAGE_MAX_NUM; i++) { + if (bb_cache[i].bb_shadow != NULL) { + per_thread_bb_end_cb(drcontext, cur_bb_node->child_ctxt_start_idx, + pre_bb_shadow->slot_num, 0, NULL, + bb_call_back_cache_data_ptr); + + bb_shadow_t *cur_bb_shadow = bb_cache[i].bb_shadow; + cct_bb_node_t *new_caller_bb_node = NULL; + if (instr_state_contain(pre_bb_shadow->end_ins_state, + INSTR_STATE_THREAD_ROOT_VIRTUAL)) { + new_caller_bb_node = root_node; + } else if (instr_state_contain(pre_bb_shadow->end_ins_state, + INSTR_STATE_CALL_DIRECT) || + instr_state_contain(pre_bb_shadow->end_ins_state, + INSTR_STATE_CALL_IN_DIRECT)) { + new_caller_bb_node = cur_bb_node; +# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO + temp_cct_info.call_num++; + temp_cct_info.cur_tree_high++; +# endif + } else if (instr_state_contain(pre_bb_shadow->end_ins_state, + INSTR_STATE_RETURN)) { + if (bb_node_parent_bb(cur_bb_node) == root_node) { + new_caller_bb_node = bb_node_parent_bb(cur_bb_node); + } else { + new_caller_bb_node = + bb_node_parent_bb(bb_node_parent_bb(cur_bb_node)); +# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO + temp_cct_info.cur_tree_high--; +# endif + } +# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO + temp_cct_info.return_num++; +# endif + } else { + new_caller_bb_node = bb_node_parent_bb(cur_bb_node); + } + +# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO + temp_cct_info.ins_num += cur_bb_shadow->slot_num; + temp_cct_info.bb_node_num++; + if (temp_cct_info.tree_high < temp_cct_info.cur_tree_high) { + temp_cct_info.tree_high = temp_cct_info.cur_tree_high; + } +# endif +# ifdef IN_PROCESS_SPEEDUP + if (speedup_cache_index >= 0 && + cur_bb_shadow->last_same_key_bb_pt_list[speedup_cache_index] != NULL) { + if (bb_node_parent_bb( + cur_bb_shadow->last_same_key_bb_pt_list[speedup_cache_index]) == + new_caller_bb_node) { + cur_bb_node = + cur_bb_shadow->last_same_key_bb_pt_list[speedup_cache_index]; +# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO + splay_tree_search_num++; +# endif + pre_bb_shadow = cur_bb_shadow; + bb_cache[i].bb_shadow = NULL; + continue; + } + } +# endif +# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO + splay_node_t *new_root = splay_tree_update_test( + new_caller_bb_node->callee_splay_tree_root, + (splay_node_key_t)cur_bb_shadow->key, dummy_splay_node, next_splay_node, + &splay_tree_search_num); +# else + splay_node_t *new_root = splay_tree_update( + new_caller_bb_node->callee_splay_tree_root, + (splay_node_key_t)cur_bb_shadow->key, dummy_splay_node, next_splay_node); +# endif + if (new_root->payload == NULL) { + new_root->payload = + (void *)bb_node_create(bb_node_cache, cur_bb_shadow->key, + new_caller_bb_node, cur_bb_shadow->slot_num); + next_splay_node = splay_node_cache->get_next_object(); +# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO + temp_cct_info.real_node_num++; + new_caller_bb_node->callee_tree_size++; +# endif + } + new_caller_bb_node->callee_splay_tree_root = new_root; + cur_bb_node = (cct_bb_node_t *)(new_root->payload); +# ifdef IN_PROCESS_SPEEDUP + if (speedup_cache_index >= 0) { + cur_bb_shadow->last_same_key_bb_pt_list[speedup_cache_index] = + cur_bb_node; + } +# endif + pre_bb_shadow = cur_bb_shadow; + bb_cache[i].bb_shadow = NULL; + } else { + break; + } + } + pt->cur_bb_node = cur_bb_node; + pt->next_splay_node = next_splay_node; + pt->pre_bb_shadow = pre_bb_shadow; +# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO + temp_cct_info.cct_create_clean_call_num++; + temp_cct_info.splay_tree_search_num += splay_tree_search_num; + pt->cct_info = temp_cct_info; +# endif + + pt->cur_bb_child_ctxt_start_idx = pt->cur_bb_node->child_ctxt_start_idx; + pt->pre_instr_state = pt->pre_bb_shadow->end_ins_state; + BUF_PTR2(pt->cur_buf2, INSTRACE_TLS_OFFS_BUF_PTR) = pt->bb_cache + 1; +} + +static inline void +per_thread_update_cct_tree() +{ + void *drcontext = dr_get_current_drcontext(); + per_thread_t *pt = (per_thread_t *)drmgr_get_tls_field(drcontext, tls_idx); + per_thread_init_stack_cache(drcontext, pt); + per_thread_refresh_bb_cache(drcontext, pt); +} + +static inline void +per_thread_refresh_bb_cache_and_mem_ref_cache(void *drcontext, per_thread_t *pt) +{ + if (pt->bb_cache[1].bb_shadow == NULL) { + return; + } + // read & write + cct_bb_node_t *cur_bb_node = pt->cur_bb_node; + splay_node_t *next_splay_node = pt->next_splay_node; + bb_shadow_t *pre_bb_shadow = pt->pre_bb_shadow; +# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO + per_thread_cct_info_t temp_cct_info = pt->cct_info; + int64_t splay_tree_search_num = 0; +# endif + + // read only + bb_cache_message_t *bb_cache = pt->bb_cache; + mem_ref_msg_t *inner_mem_ref_cache = pt->inner_mem_ref_cache; + cct_bb_node_t *root_node = pt->root_bb_node; + void **bb_call_back_cache_data_ptr = &pt->bb_call_back_cache_data; + tls_memory_cache_t<cct_bb_node_t> *bb_node_cache = pt->bb_node_cache; + tls_memory_cache_t<splay_node_t> *splay_node_cache = pt->splay_node_cache; + splay_node_t *dummy_splay_node = pt->dummy_splay_node; +# ifdef IN_PROCESS_SPEEDUP + int speedup_cache_index = pt->speedup_cache_index; +# endif + + thread_aligned_num_t pre_bb_start_index = 0; + + for (thread_aligned_num_t i = 1; i < BB_CACHE_MESSAGE_MAX_NUM; i++) { + if (bb_cache[i].bb_shadow != NULL) { + per_thread_bb_end_cb(drcontext, cur_bb_node->child_ctxt_start_idx, + pre_bb_shadow->slot_num, pre_bb_shadow->mem_ref_num, + inner_mem_ref_cache + pre_bb_start_index, + bb_call_back_cache_data_ptr); + + pre_bb_start_index += pre_bb_shadow->mem_ref_num; + bb_shadow_t *cur_bb_shadow = bb_cache[i].bb_shadow; + cct_bb_node_t *new_caller_bb_node = NULL; + if (instr_state_contain(pre_bb_shadow->end_ins_state, + INSTR_STATE_THREAD_ROOT_VIRTUAL)) { + new_caller_bb_node = root_node; + } else if (instr_state_contain(pre_bb_shadow->end_ins_state, + INSTR_STATE_CALL_DIRECT) || + instr_state_contain(pre_bb_shadow->end_ins_state, + INSTR_STATE_CALL_IN_DIRECT)) { + new_caller_bb_node = cur_bb_node; +# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO + temp_cct_info.call_num++; + temp_cct_info.cur_tree_high++; +# endif + } else if (instr_state_contain(pre_bb_shadow->end_ins_state, + INSTR_STATE_RETURN)) { + if (bb_node_parent_bb(cur_bb_node) == root_node) { + new_caller_bb_node = bb_node_parent_bb(cur_bb_node); + } else { + new_caller_bb_node = + bb_node_parent_bb(bb_node_parent_bb(cur_bb_node)); +# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO + temp_cct_info.cur_tree_high--; +# endif + } +# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO + temp_cct_info.return_num++; +# endif + } else { + new_caller_bb_node = bb_node_parent_bb(cur_bb_node); + } + +# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO + temp_cct_info.ins_num += cur_bb_shadow->slot_num; + temp_cct_info.mem_ref_num += cur_bb_shadow->mem_ref_num; + temp_cct_info.bb_node_num++; + if (temp_cct_info.tree_high < temp_cct_info.cur_tree_high) { + temp_cct_info.tree_high = temp_cct_info.cur_tree_high; + } +# endif +# ifdef IN_PROCESS_SPEEDUP + if (speedup_cache_index >= 0 && + cur_bb_shadow->last_same_key_bb_pt_list[speedup_cache_index] != NULL) { + if (bb_node_parent_bb( + cur_bb_shadow->last_same_key_bb_pt_list[speedup_cache_index]) == + new_caller_bb_node) { + cur_bb_node = + cur_bb_shadow->last_same_key_bb_pt_list[speedup_cache_index]; +# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO + splay_tree_search_num++; +# endif + pre_bb_shadow = cur_bb_shadow; + bb_cache[i].bb_shadow = NULL; + continue; + } + } +# endif +# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO + splay_node_t *new_root = splay_tree_update_test( + new_caller_bb_node->callee_splay_tree_root, + (splay_node_key_t)cur_bb_shadow->key, dummy_splay_node, next_splay_node, + &splay_tree_search_num); +# else + splay_node_t *new_root = splay_tree_update( + new_caller_bb_node->callee_splay_tree_root, + (splay_node_key_t)cur_bb_shadow->key, dummy_splay_node, next_splay_node); +# endif + if (new_root->payload == NULL) { + new_root->payload = + (void *)bb_node_create(bb_node_cache, cur_bb_shadow->key, + new_caller_bb_node, cur_bb_shadow->slot_num); + next_splay_node = splay_node_cache->get_next_object(); +# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO + temp_cct_info.real_node_num++; + new_caller_bb_node->callee_tree_size++; +# endif + } + new_caller_bb_node->callee_splay_tree_root = new_root; + cur_bb_node = (cct_bb_node_t *)(new_root->payload); +# ifdef IN_PROCESS_SPEEDUP + if (speedup_cache_index >= 0) { + cur_bb_shadow->last_same_key_bb_pt_list[speedup_cache_index] = + cur_bb_node; + } +# endif + pre_bb_shadow = cur_bb_shadow; + bb_cache[i].bb_shadow = NULL; + } else { + break; + } + } + pt->cur_bb_node = cur_bb_node; + pt->next_splay_node = next_splay_node; + pt->pre_bb_shadow = pre_bb_shadow; +# ifdef DRCCTLIB_DEBUG_LOG_CCT_INFO + temp_cct_info.cct_create_clean_call_num++; + temp_cct_info.splay_tree_search_num += splay_tree_search_num; + pt->cct_info = temp_cct_info; +# endif + + pt->cur_bb_child_ctxt_start_idx = cur_bb_node->child_ctxt_start_idx; + pt->pre_instr_state = pre_bb_shadow->end_ins_state; + BUF_PTR2(pt->cur_buf2, INSTRACE_TLS_OFFS_BUF_PTR) = bb_cache + 1; + + thread_aligned_num_t pre_bb_end_index = + pre_bb_start_index + pre_bb_shadow->mem_ref_num; + thread_aligned_num_t max_index = INNER_MEM_REF_CACHE_MAX >= pre_bb_end_index + ? pre_bb_end_index + : INNER_MEM_REF_CACHE_MAX; + thread_aligned_num_t last_index = pre_bb_start_index; + for (; last_index < max_index; last_index++) { + if (inner_mem_ref_cache[last_index].addr != 0) { + inner_mem_ref_cache[last_index - pre_bb_start_index].slot = + inner_mem_ref_cache[last_index].slot; + inner_mem_ref_cache[last_index - pre_bb_start_index].addr = + inner_mem_ref_cache[last_index].addr; + inner_mem_ref_cache[last_index].addr = 0; + } else { + break; + } + } + BUF_PTR3(pt->cur_buf3, INSTRACE_TLS_OFFS_BUF_PTR) = + inner_mem_ref_cache + last_index - pre_bb_start_index; + for (thread_aligned_num_t index = last_index - pre_bb_start_index; + index < pre_bb_start_index; index++) { + inner_mem_ref_cache[index].addr = 0; + } +} + +static inline void +per_thread_update_cct_tree_memory_cache() +{ + void *drcontext = dr_get_current_drcontext(); + per_thread_t *pt = (per_thread_t *)drmgr_get_tls_field(drcontext, tls_idx); + per_thread_init_stack_cache(drcontext, pt); + per_thread_refresh_bb_cache_and_mem_ref_cache(drcontext, pt); +} + +static inline void +refresh_per_thread_cct_tree(void *drcontext, per_thread_t *pt) +{ + if ((global_flags & DRCCTLIB_CACHE_MODE) == 0) { + return; + } + per_thread_init_stack_cache(drcontext, pt); + if ((global_flags & DRCCTLIB_CACHE_MEMEORY_ACCESS_ADDR) != 0) { + per_thread_refresh_bb_cache_and_mem_ref_cache(drcontext, pt); + } else { + per_thread_refresh_bb_cache(drcontext, pt); + } +} + +static inline void +per_thread_end_bb_cache_refresh(void *drcontext, per_thread_t *pt) |