summaryrefslogtreecommitdiffstats
path: root/src/whatfiles.c
blob: bc054082b7fdda3a61997afd28fb324da2715784 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
#include <dirent.h>
#include <signal.h>
#include <stddef.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <sys/ptrace.h>
#include <sys/syscall.h>
#include <sys/user.h>
#include <sys/wait.h>

#include "whatfiles.h"
#include "hashmap.h"
#include "strings.h"

FILE *Handle = (FILE*)NULL;
int Debug = 0;

// looks at the current syscall and outputs its information if it's one we're interested in
void check_syscall(pid_t current_pid, struct user_regs_struct regs, HashMap map)
{
    struct String filename = {0};
    struct String output = {0};
    init_string(&filename, 64);
    init_string(&output, 64);
    char mode[MODE_LEN] = {0};

    pid_t parent_tid, child_tid;
    unsigned long flags;
    unsigned long newsp;

    size_t index;
    HashError err = find_index(current_pid, map, &index);
    if (err) DEBUG("unknown pid %d, syscall %lld\n", current_pid, regs.orig_rax);

    switch (regs.orig_rax)
    {
    case SYS_execve:
        DEBUG("PID %d exec'd. orig_rax: %lld, rax: %lld\n", current_pid, regs.orig_rax, regs.rax);
        if (peek_filename(current_pid, regs.rdi, &filename)) {
            DEBUG("associated process %d with name \"%s\"\n", current_pid, filename.data);
            set_name(current_pid, filename.data, map);
        }
        break;
    case SYS_fork:
        DEBUG("PID %d forked. orig_rax: %lld, rax: %lld\n", current_pid, regs.orig_rax, regs.rax);
        break;
    case SYS_clone:
        flags = regs.rdi;
        newsp = regs.rsi;
        parent_tid = ptrace(PTRACE_PEEKDATA, current_pid, (void*)regs.rdx, 0);
        child_tid = ptrace(PTRACE_PEEKDATA, current_pid, (void*)regs.r10, 0);
        DEBUG("PID %d cloned. orig_rax: %lld, rax: %lld, flags: 0x%ld, newsp: 0x%ld, parent pid: %d, child pid: %d\n", 
            current_pid, regs.orig_rax, regs.rax, flags, newsp, parent_tid, child_tid);
        break;
    case SYS_creat:
        peek_filename(current_pid, regs.rdi, &filename);
        get_mode(regs.rsi, mode);
        build_output(mode, "creat()", regs.rsi, current_pid, &filename, &output, map);
        OUTPUT("%s", output.data);
        break;
    case SYS_open:
        peek_filename(current_pid, regs.rdi, &filename);
        get_mode(regs.rdx, mode);
        build_output(mode, "open()", regs.rdx, current_pid, &filename, &output, map);
        OUTPUT("%s", output.data);
        break;
    case SYS_openat:
        peek_filename(current_pid, regs.rsi, &filename);
        get_mode(regs.r10, mode);
        build_output(mode, "openat()", regs.r10, current_pid, &filename, &output, map);
        OUTPUT("%s", output.data);
        break;
    case SYS_unlink:
        peek_filename(current_pid, regs.rdi, &filename);
        build_output("delete", "unlink()", 0, current_pid, &filename, &output, map);
        OUTPUT("%s", output.data);
        break;
    case SYS_unlinkat:
        peek_filename(current_pid, regs.rsi, &filename);
        build_output("delete", "unlinkat()", 0, current_pid, &filename, &output, map);
        OUTPUT("%s", output.data);
        break;
    default:
        // DEBUG("syscall: %lld, pid: %d\n", regs.orig_rax, current_pid);
        break;
    }
    free(filename.data);
    free(output.data);
}

// responsible for seeing new processes and threads created by forks, clones, or vforks, and inserting them into the hashmap
void check_ptrace_event(pid_t current_pid, int proc_status, HashMap map)
{
    struct String new_proc = {0};
    init_string(&new_proc, 128);

    unsigned long ptrace_event;
    long res = ptrace(PTRACE_GETEVENTMSG, current_pid, (char*)0, &ptrace_event);
    if (res == -1L) SYS_ERR("ptrace() failed to get event msg");
    switch (proc_status >> 8)
    {
    case SIGTRAP | (PTRACE_EVENT_FORK << 8):
        DEBUG("caught PTRACE_EVENT_FORK from pid %d. new pid: %ld\n", current_pid, ptrace_event);
        insert((pid_t)ptrace_event, ENTRY, map);
        read_task((pid_t)ptrace_event, &new_proc);
        set_name((pid_t)ptrace_event, new_proc.data, map);
        break;
    case SIGTRAP | (PTRACE_EVENT_CLONE << 8):
        DEBUG("caught PTRACE_EVENT_CLONE from pid %d. new pid: %ld\n", current_pid, ptrace_event);
        insert((pid_t)ptrace_event, ENTRY, map);
        read_task((pid_t)ptrace_event, &new_proc);
        set_name((pid_t)ptrace_event, new_proc.data, map);
        break;
    case SIGTRAP | (PTRACE_EVENT_VFORK << 8):
        DEBUG("caught PTRACE_EVENT_VFORK from pid %d. new pid: %ld\n", current_pid, ptrace_event);
        insert((pid_t)ptrace_event, ENTRY, map);
        read_task((pid_t)ptrace_event, &new_proc);
        set_name((pid_t)ptrace_event, new_proc.data, map);
        break;
    case SIGTRAP | (PTRACE_EVENT_EXEC << 8):
        DEBUG("caught PTRACE_EVENT_EXEC from pid %d. former pid: %ld\n", current_pid, ptrace_event);
        /*
        from ptrace man page, "execve(2) under ptrace":
            When  one  thread  in  a multithreaded process calls execve(2), the kernel destroys all other threads in the
            process, and resets the thread ID of the execing thread to the thread group ID (process ID).   (Or,  to  put
            things another way, when a multithreaded process does an execve(2), at completion of the call, it appears as
            though the execve(2) occurred in the thread group leader, regardless of which  thread  did  the  execve(2).)
            This resetting of the thread ID looks very confusing to tracers: 
                [...]
                *   The  execing  tracee  changes  its  thread ID while it is in the execve(2).  (Remember, under ptrace, the
                    "pid" returned from waitpid(2), or fed into ptrace calls, is the  tracee's  thread  ID.)   That  is,  the
                    tracee's  thread  ID  is  reset  to  be the same as its process ID, which is the same as the thread group
                    leader's thread ID.
                *   Then a PTRACE_EVENT_EXEC stop happens, if the PTRACE_O_TRACEEXEC option was turned on.
        So, we should not insert the ptrace_event value, but the current_pid, as by the time we (the tracer)
        see this event, the PID has already been changed.
        */
        // insert((pid_t)ptrace_event, ENTRY, map);
        insert(current_pid, ENTRY, map);
        break;
    default:
        break;
    }

    free(new_proc.data);
}

void step_syscall(pid_t current_pid, int proc_status, HashMap map)
{
    long res;
    struct user_regs_struct regs;  

    // get current register values
    do {
        res = ptrace(PTRACE_GETREGS, current_pid, &regs, &regs);
    } while (res == -1L && errno == ESRCH);
    if (res == -1L && errno != ESRCH) SYS_ERR("ptrace() failed to get registers");

    // If it's the same PID performing the same syscall (has same orig_rax) as last time, we don't care. Just means it's exiting the syscall.
    // Might want to keep for debug mode? This might result in missing some output, in the case where two threads of the same process enter the same syscall before either exits,
    // because they will both return the same PID to wait() when given SIGTRAP as part of the syscall-enter-exit loop. Might also result in double-printing,
    // because if two threads (that report the same PID) enter two different syscalls before either exits, the "last" syscall for the PID won't be the entry by that thread.
    if (!is_exiting(current_pid, regs.orig_rax) /*|| Debug*/) {
        check_syscall(current_pid, regs, map);
    }
    LastSyscall.pid = current_pid;
    LastSyscall.syscall = regs.orig_rax;
    check_ptrace_event(current_pid, proc_status, map);
    // continue, catching next entry or exit from syscall
    res = ptrace(PTRACE_SYSCALL, current_pid, 0, 0);
    if (res == -1L) SYS_ERR("ptrace() failed to resume");
    fflush(stdout);
}

int main(int argc, char* argv[])
{
    int pid, status;
    HashError err;
    int sys_err;
    bool stdout_override = false;
    bool attach = false;

    struct HashMap hm = {0};
    HashMap hashmap = &hm;
    init_hashmap(hashmap);

    int start_of_user_command = discover_flags(argc, argv);
    char *user_filename = parse_flags(start_of_user_command, argv, &pid, &stdout_override, &attach);
    if (start_of_user_command == argc && !attach) {
        fprintf(stderr, "Must specify a command to be run (after whatfiles arguments) or use the -p flag followed by a PID to attach to an existing process.\n");
        usage();
    }
    if (stdout_override) {
        Handle = stdout;
    } else {
        if (!user_filename) { // if filename is still empty string, make default
            char default_filename[64];