summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/attach.c271
-rw-r--r--src/hashmap.c193
-rw-r--r--src/hashmap.h47
-rw-r--r--src/strings.c54
-rw-r--r--src/strings.h17
-rw-r--r--src/utilities.c230
-rw-r--r--src/whatfiles.c342
-rw-r--r--src/whatfiles.h59
8 files changed, 1213 insertions, 0 deletions
diff --git a/src/attach.c b/src/attach.c
new file mode 100644
index 0000000..ebf02a8
--- /dev/null
+++ b/src/attach.c
@@ -0,0 +1,271 @@
+#include <dirent.h>
+#include <regex.h>
+#include <signal.h>
+#include <stdio.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/ptrace.h>
+#include <sys/syscall.h>
+#include <sys/user.h>
+#include <sys/wait.h>
+
+#include "whatfiles.h"
+
+// Most of this file gratefully adapted from Nominal Animal's answer at
+// https://stackoverflow.com/questions/18577956/how-to-use-ptrace-to-get-a-consistent-view-of-multiple-threads
+
+void read_file(struct String *str, size_t size, FILE *file)
+{
+ char c;
+ for (size_t read = 0; read < size && (c = fgetc(file)) != EOF; read++) {
+ append_char(c, str);
+ }
+}
+
+// returns 0 if status file couldn't be read, character of status otherwise
+char read_status(pid_t pid)
+{
+ char c = 0;
+ char path[128] = {0};
+ sprintf(path, "/proc/%d/status", pid);
+
+ struct String string = {0};
+ struct String *str = &string;
+
+ init_string(str, 4096);
+ FILE *h_status = fopen(path, "rb");
+ if (!h_status) return 0;
+ read_file(str, 4096, h_status);
+
+ regex_t regex;
+ int err;
+ regmatch_t pmatch[2];
+ if (regcomp(&regex, "State:\\W+([A-Za-z])", REG_EXTENDED) != 0)
+ SYS_ERR("regex compilation error");
+ err = regexec(&regex, str->data, 2, pmatch, 0);
+ regfree(&regex);
+ if (err) {
+ DEBUG("failed to find regex match in /proc/%d/status file\n", pid);
+ } else {
+ c = *(str->data + pmatch[1].rm_so);
+ }
+ free(str->data);
+ return c;
+}
+
+void read_task(pid_t tid, struct String *str)
+{
+ char path[128] = {0};
+ sprintf(path, "/proc/%d/comm", tid);
+ FILE *h_comm = fopen(path, "rb");
+ if (!h_comm) {
+ fprintf(stderr, "tried to read nonexistent /proc/%d/comm\n", tid);
+ exit(1);
+ }
+ read_file(str, 4096, h_comm);
+ if (str->data[str->len-1] == '\n') delete_char(str); // remove newline if present
+}
+
+int attach_to_process(pid_t pid, HashMap map)
+{
+
+ pid_t *tid = 0;
+ size_t tids = 0;
+ size_t tids_max = 0;
+ size_t t;
+ long r, sys_err;
+
+ // stop the process and its threads
+ kill(pid, SIGSTOP);
+ // can't wait() on a process that's not a child
+ while (1) {
+ char status = read_status(pid);
+ if (status == 'T' || status == 't') break;
+ struct timespec ts = {0, 1000000 * 250}; // quarter second
+ nanosleep(&ts, &ts);
+ DEBUG("waiting for PID %d to stop\n", pid);
+ }
+
+ // get thread IDs from /proc/[PID]/task/
+ tids = get_tids(&tid, &tids_max, pid);
+ if (!tids)
+ {
+ DEBUG("process %d has no threads\n", pid);
+ // kill(pid, SIGCONT);
+ // return 0;
+ } else {
+ DEBUG("Process %d has %d tasks\n", (int)pid, (int)tids);
+ }
+
+ /* Attach to all tasks. */
+ for (t = 0; t < tids; t++) {
+ do {
+ r = ptrace(PTRACE_ATTACH, tid[t], (void *)0, (void *)0);
+ } while (r == -1L && (errno == EBUSY || errno == EFAULT || errno == ESRCH));
+ if (r == -1L) {
+ DEBUG("ptrace attach error\n");
+ const int saved_errno = errno;
+ while (t-- > 0) {
+ do {
+ r = ptrace(PTRACE_DETACH, tid[t], (void *)0, (void *)0);
+ } while (r == -1L && (errno == EBUSY || errno == EFAULT || errno == ESRCH));
+ }
+ tids = 0;
+ errno = saved_errno;
+ return errno;
+ }
+ // if successfully attached, add to map
+ insert(tid[t], 0, map);
+ struct String str = {0};
+ init_string(&str, 4096);
+ read_task(tid[t], &str);
+ set_name(tid[t], str.data, map);
+ free(str.data);
+ }
+
+ // set ptrace options
+ // register for the ptrace events we want to catch
+ sys_err = ptrace(PTRACE_SETOPTIONS, pid, (char*)0, PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK | PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXEC);
+ if (sys_err == -1) SYS_ERR("ptrace() failed to set options");
+
+ for (t = 0; t < tids; t++) {
+ sys_err = ptrace(PTRACE_SYSCALL, tid[t], 0, 0);
+ if (sys_err == -1) SYS_ERR("ptrace() failed to resume thread");
+ }
+ kill(pid, SIGCONT);
+ return 0;
+}
+
+// used upon exit as signal handler when whatfiles was used to attach to a process already in progress
+void detatch_from_process(HashMap map)
+{
+ for (int i = 0; i < map->size; i++) {
+ pid_t pid = map->keys[i];
+ if (pid) {
+ int r;
+ int counter = 0;
+ char status = 0;
+ // make sure the thread is stopped
+ kill(pid, SIGSTOP);
+ while (1) {
+ status = read_status(pid);
+ if (status == 'T' || status == 't') break; // thread stopped, due to kill(SIGSTOP) above or ptrace syscall SIGTRAP
+ struct timespec ts = {0, 100000000}; // 100 million nanoseconds = tenth of a second
+ nanosleep(&ts, &ts);
+ DEBUG("waiting for PID %d to stop\n", pid);
+ if (counter > 9) {
+ DEBUG("could not detatch from PID %d\n", pid);
+ break;
+ }
+ counter++;
+ }
+ if (counter > 9) continue; // if we weren't able to detatch from this process, move on
+ do {
+ r = ptrace(PTRACE_DETACH, pid, (void *)0, (void *)0);
+ } while (r == -1L && (errno == EBUSY || errno == EFAULT || errno == ESRCH));
+ if (r == -1) fprintf(stderr, "error detatching from PID %d\n", pid);
+ else DEBUG("detatched from process %d\n", pid);
+ kill(pid, SIGCONT);
+ }
+ }
+}
+
+size_t get_tids(pid_t **const listptr, size_t *const sizeptr, const pid_t pid)
+{
+ char dirname[64];
+ DIR *dir;
+ pid_t *list;
+ size_t size, used = 0;
+
+ // make sure we've been given non-null pointers and a valid pid
+ if (!listptr || !sizeptr || pid < (pid_t)1) {
+ errno = EINVAL;
+ return (size_t)0;
+ }
+
+ // if sizeptr points to 0 or less, null contents of listptr and sizeptr
+ if (*sizeptr > 0) {
+ list = *listptr;
+ size = *sizeptr;
+ } else {
+ list = *listptr = NULL;
+ size = *sizeptr = 0;
+ }
+
+ if (snprintf(dirname, sizeof dirname, "/proc/%d/task/", (int)pid) >= (int)sizeof dirname) {
+ errno = ENOTSUP;
+ return (size_t)0;
+ }
+
+ dir = opendir(dirname);
+ if (!dir) {
+ errno = ESRCH;
+ return (size_t)0;
+ }
+
+ while (1) {
+ struct dirent *ent;
+ int value;
+ char dummy;
+
+ errno = 0;
+ ent = readdir(dir);
+ if (!ent) break;
+
+ /* Parse TIDs. Ignore non-numeric entries. */
+ if (sscanf(ent->d_name, "%d%c", &value, &dummy) != 1) continue;
+
+ /* Ignore obviously invalid entries. */
+ if (value < 1) continue;
+
+ /* Make sure there is room for another TID. */
+ if (used >= size) {
+ size = (used | 127) + 128;
+ list = realloc(list, size * sizeof list[0]);
+ if (!list) {
+ closedir(dir);
+ errno = ENOMEM;
+ return (size_t)0;
+ }
+ *listptr = list;
+ *sizeptr = size;
+ }
+
+ /* Add to list. */
+ list[used++] = (pid_t)value;
+ }
+
+ if (errno) {
+ const int saved_errno = errno;
+ closedir(dir);
+ errno = saved_errno;
+ return (size_t)0;
+ }
+ if (closedir(dir)) {
+ errno = EIO;
+ return (size_t)0;
+ }
+
+ /* None? */
+ if (used < 1) {
+ errno = ESRCH;
+ return (size_t)0;
+ }
+
+ /* Make sure there is room for a terminating (pid_t)0. */
+ if (used >= size) {
+ size = used + 1;
+ list = realloc(list, size * sizeof list[0]);
+ if (!list) {
+ errno = ENOMEM;
+ return (size_t)0;
+ }
+ *listptr = list;
+ *sizeptr = size;
+ }
+
+ /* Terminate list; done. */
+ list[used] = (pid_t)0;
+ errno = 0;
+ return used;
+}
diff --git a/src/hashmap.c b/src/hashmap.c
new file mode 100644
index 0000000..106c65a
--- /dev/null
+++ b/src/hashmap.c
@@ -0,0 +1,193 @@
+#include <assert.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <sys/types.h>
+
+#include "hashmap.h"
+#include "strings.h"
+
+/*
+Not the best hash map. Open addressing, quadratic probing
+(which doesn't seem to perform much better than linear for random int inputs),
+and wastes three quarters of its space to minimize collisions.
+Would benefit from separate chaining with linked lists. But good enough for now.
+Also not using all of the functionality in whatfiles, but keeping so that it can
+be easily modified and reused as a generic hashmap later.
+*/
+
+void init_hashmap(HashMap map)
+{
+ map->size = INITIAL_SIZE;
+ map->used = 0;
+ map->keys = calloc(INITIAL_SIZE, sizeof(pid_t));
+ map->status = calloc(INITIAL_SIZE, sizeof(size_t));
+ map->names = calloc(INITIAL_SIZE, sizeof(struct String));
+ if (!(map->keys && map->status && map->names)) {
+ perror("calloc() error");
+ exit(errno);
+ }
+}
+
+HashError set_name(pid_t pid, char *name, HashMap map)
+{
+ size_t index;
+ HashError err = find_index(pid, map, &index);
+ if (err == OK) {
+ init_string(&map->names[index], 64);
+ append_str(name, strlen(name), &map->names[index]);
+ }
+ return err;
+}
+
+// also changes status value if pid already present
+HashError insert(pid_t pid, Status status, HashMap map)
+{
+ if (map->used >= map->size / 4) {
+ resize_hashmap(map);
+ }
+ size_t index = pid % map->size;
+ int base = 1;
+ int c = 0;
+
+ while (map->keys[index] != 0) {
+ if (map->keys[index] == pid) {
+ map->status[index] = status;
+ return OK;
+ }
+ c++; DebugStats.steps++;
+ // index = (index+1) % map->size; // linear probe
+ index = (index + base * base) % map->size; // quadratic probe
+ base++;
+ }
+
+ if (c) DebugStats.collisions++;
+
+ map->keys[index] = pid;
+ map->status[index] = status;
+ map->used++;
+ return OK;
+}
+
+HashError find_index(pid_t pid, HashMap map, size_t *result)
+{
+ size_t index = pid % map->size;
+ size_t start = index;
+ while (map->keys[index] != pid) {
+ index = (index + 1) % map->size;
+ if (index == start) {
+ return KEY_NOT_FOUND;
+ }
+ }
+ *result = index;
+ return OK;
+}
+
+HashError destroy(HashMap map)
+{
+ if (!(map->keys && map->status && map->names)) {
+ return NULL_PTR_IN_MAP;
+ }
+ // this loop never fired because I had map->size = 0 at the top. I'm an idiot.
+ for (int i = 0; i < map->size; i++) {
+ if (map->names[i].data) {
+ free(map->names[i].data);
+ }
+ }
+ free(map->keys);
+ free(map->status);
+ free(map->names);
+ map->size = 0;
+ map->used = 0;
+ return OK;
+}
+
+void resize_hashmap(HashMap map)
+{
+ pid_t *orig_pids = map->keys;
+ Status *orig_status = map->status;
+ struct String *orig_names = map->names;
+ size_t orig_size = map->size;
+
+ map->used = 0;
+ map->size *= 2;
+ map->keys = calloc(map->size, sizeof(pid_t));
+ map->status = calloc(map->size, sizeof(Status));
+ map->names = calloc(map->size, sizeof(struct String));
+ if (!(map->keys && map->status && map->names)) {
+ perror("calloc() error");
+ exit(errno);
+ }
+
+ for (int i = 0; i < orig_size; i++) {
+ if (!orig_pids[i]) continue;
+ insert(orig_pids[i], orig_status[i], map);
+ if (orig_names[i].data) {
+ set_name(orig_pids[i], orig_names[i].data, map);
+ free(orig_names[i].data);
+ }
+ }
+ free(orig_pids);
+ free(orig_status);
+ free(orig_names);
+}
+
+HashError get_name(pid_t pid, HashMap map, struct String *name)
+{
+ size_t index;
+ HashError err = find_index(pid, map, &index);
+ if (err == OK) {
+ name = &map->names[index];
+ }
+ return err;
+}
+
+HashError get_status(pid_t pid, HashMap map, size_t *result)
+{
+ size_t idx = 0;
+ HashError res = find_index(pid, map, &idx);
+ if (res == OK) {
+ *result = map->status[idx];
+ }
+ return res;
+}
+
+HashError increment(pid_t pid, HashMap map)
+{
+ size_t idx = 0;
+ HashError res = find_index(pid, map, &idx);
+ if (res == OK && map->status[idx] < __INT_MAX__) {
+ map->status[idx]++;
+ }
+ return res;
+}
+
+HashError decrement(pid_t pid, HashMap map)
+{
+ size_t idx = 0;
+ HashError res = find_index(pid, map, &idx);
+ if (res == OK && map->status[idx] > 0) {
+ map->status[idx]--;
+ }
+ return res;
+}
+
+HashError remove_pid(pid_t pid, HashMap map)
+{
+ size_t idx = 0;
+ HashError res = find_index(pid, map, &idx);
+ if (res == OK) {
+ map->keys[idx] = 0;
+ map->status[idx] = 0;
+ if (map->names[idx].data) {
+ free(map->names[idx].data);
+ }
+ struct String zeroed = {0};
+ map->names[idx] = zeroed;
+ map->used--;
+ }
+ return res;
+}
diff --git a/src/hashmap.h b/src/hashmap.h
new file mode 100644
index 0000000..920ebcb
--- /dev/null
+++ b/src/hashmap.h
@@ -0,0 +1,47 @@
+#ifndef HASHMAP_H
+#define HASHMAP_H
+
+#include "strings.h"
+
+#define INITIAL_SIZE 1024
+
+// tracks whether the process is entering or exiting the current syscall
+typedef enum {
+ ENTRY = 0,
+ EXIT = 1,
+} Status;
+
+typedef enum {
+ OK = 0,
+ KEY_NOT_FOUND,
+ NULL_PTR_IN_MAP,
+} HashError;
+
+struct HashMap {
+ size_t size;
+ size_t used;
+ pid_t *keys;
+ Status *status;
+ struct String *names;
+};
+typedef struct HashMap* HashMap;
+
+void init_hashmap(HashMap map);
+void resize_hashmap(HashMap map);
+
+HashError destroy(HashMap map);
+HashError find_index(pid_t key, HashMap map, size_t *result);
+HashError insert(pid_t key, Status status, HashMap map);
+HashError get_status(pid_t key, HashMap map, size_t *result);
+HashError remove_pid(pid_t key, HashMap map);
+HashError increment(pid_t key, HashMap map);
+HashError decrement(pid_t key, HashMap map);
+HashError set_name(pid_t key, char *name, HashMap map);
+HashError get_name(pid_t key, HashMap map, struct String *name);
+
+struct {
+ int collisions;
+ int steps;
+} DebugStats;
+
+#endif /* !HASHMAP_H */
diff --git a/src/strings.c b/src/strings.c
new file mode 100644
index 0000000..baa433f
--- /dev/null
+++ b/src/strings.c
@@ -0,0 +1,54 @@
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "strings.h"
+
+// new String pointers must be zeroed before being handed to this function
+void init_string(struct String *str, size_t initial_capacity)
+{
+ if (str->data) free(str->data);
+ str->data = malloc(initial_capacity);
+ if (str->data == NULL) {
+ perror("malloc() failed");
+ exit(errno);
+ }
+ str->cap = initial_capacity;
+ str->len = 0;
+ *(str->data) = 0;
+}
+
+void resize_string(struct String *str)
+{
+ str->data = realloc((void*)str->data, str->cap * 2);
+ if (str->data == NULL) {
+ perror("realloc() failed");
+ exit(errno);
+ }
+ str->cap = str->cap * 2;
+}
+
+void append_str(char *input_str, size_t len, struct String *str)
+{
+ while (str->len + len >= str->cap - 1) {
+ resize_string(str);
+ }
+ strcpy(str->data + str->len, input_str);
+ str->len += len;
+}
+
+void append_char(char c, struct String *str)
+{
+ if (str->len >= str->cap - 1) {
+ resize_string(str);
+ }
+ str->data[str->len] = c;
+ str->len += 1;
+ str->data[str->len] = '\0';
+}
+
+void delete_char(struct String *str)
+{
+ str->len -= 1;
+ str->data[str->len] = '\0';
+}
diff --git a/src/strings.h b/src/strings.h
new file mode 100644
index 0000000..05157ec
--- /dev/null
+++ b/src/strings.h
@@ -0,0 +1,17 @@
+#ifndef STRINGS_H
+#define STRINGS_H
+
+#include <stdio.h>
+
+struct String {
+ size_t cap;
+ size_t len;
+ char* data;
+};
+
+void init_string(struct String *str, size_t initial_capacity);
+void append_str(char *input_str, size_t len, struct String *str);
+void append_char(char c, struct String *str);
+void delete_char(struct String *str);
+
+#endif /* !STRINGS_H */
diff --git a/src/utilities.c b/src/utilities.c
new file mode 100644
index 0000000..c1a04ed
--- /dev/null
+++ b/src/utilities.c
@@ -0,0 +1,230 @@
+#include <assert.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/ptrace.h>
+
+#include "string.h"
+#include "whatfiles.h"
+
+char *FLAGS = "ado:p:s";
+
+void build_output(
+ char *mode,
+ char *syscall_name,
+ unsigned long long reg,
+ pid_t pid,
+ struct String *filename,
+ struct String *result,
+ HashMap map
+)
+{
+ size_t index;
+ HashError err = find_index(pid, map, &index); // find_index() returns OK = 0 or NOT_FOUND = 1
+ struct String *proc_string = err ? NULL : &map->names[index];
+
+ char mode_str[MODE_LEN] = {0};
+ // grab detected mode or the raw number
+ *mode ? sprintf(mode_str, "%5s", mode) : sprintf(mode_str, "0x%llX", reg);
+ append_str("mode: ", strlen("mode: "), result);
+ append_str(mode_str, strlen(mode_str), result);
+
+ append_str(", file: ", strlen(", file: "), result);
+ append_str(filename->data, strlen(filename->data), result);
+
+ append_str(", syscall: ", strlen(", syscall: "), result);
+ append_str(syscall_name, strlen(syscall_name), result);
+
+ char pid_str[MODE_LEN] = {0};
+ sprintf(pid_str, ", PID: %d", pid);
+ append_str(pid_str, strlen(pid_str), result);
+
+ // make sure proc_string points to a `struct String`, that the String has been initialized, and that the first char isn't just a null byte
+ char *proc_name = proc_string && proc_string->data && *proc_string->data
+ ? proc_string->data
+ : "[unknown]";
+ append_str(", process: ", strlen(", process: "), result);
+ append_str(proc_name, strlen(proc_name), result);
+ append_str("\n", strlen("\n"), result);
+}
+
+void get_mode(unsigned long long m, char *mode)
+{
+ char *strings[] = {"read", "write", "rd/wr", "create"};
+ int modes[] = {O_RDONLY, O_WRONLY, O_RDWR, O_CREAT};
+ for (int i=0; i<4; i++) {
+ if (m & modes[i] || m == modes[i]) {
+ assert(strlen(strings[i]) < MODE_LEN);
+ strcpy(mode, strings[i]);
+ }
+ }
+}
+
+void get_command(pid_t current_pid, char *command, size_t len)
+{
+ char proc_str[64] = {0};
+ FILE *proc_file;
+ sprintf(proc_str, "/proc/%d/cmdline", current_pid);
+ proc_file = fopen(proc_str, "r");
+ if (proc_file) {
+ getline(&command, &len, proc_file);
+ }
+ fclose(proc_file);
+}
+
+bool peek_filename(pid_t pid, unsigned long long p_reg, struct String *str)
+{
+ char get_next_word = 1;
+ long *addr = (long*)p_reg;
+ do {
+ // get next long-sized chunk of data from the address
+ long res = ptrace(PTRACE_PEEKDATA, pid, (void*)addr, 0);
+ if (res == -1) return 0;
+ // iterate over it, appending to our filepath string, bailing if we encounter a null character
+ for (int i = 0; i < sizeof(res); i++) {
+ char current_byte = (char)(res >> (8*i) & 0xFF);
+ if (current_byte) {
+ append_char(current_byte, str);
+ } else {
+ get_next_word = 0;
+ break;
+ }
+ }
+ addr++;
+ } while (get_next_word);
+ return 1;
+}
+
+// void toggle_status(pid_t current_pid, HashMap map)
+// {
+// size_t index;
+// HashError err = find_index(current_pid, map, &index);
+// HASH_ERR_CHECK(err, "index not found in map when trying to change syscall status");
+// // if (map->status[index]) decrement(current_pid, map);
+// // else increment(current_pid, map);
+// if (map->status[index] == ENTRY) err = insert(current_pid, EXIT, map);
+// else if (map->status[index] == EXIT) err = insert(current_pid, ENTRY, map);
+// else SYS_ERR("syscall status not 0 or 1");
+// }
+
+// Returns whether the current ptrace stop is an entry to a syscall or exit from one, which we track by comparing it to the previous one.
+// Can return false positives, if multiple threads of a single process/PID enter the same syscall before either exits; can also return false negatives,
+// if multiple threads of a single process/PID enter different syscalls before either exits.
+bool is_exiting(pid_t pid, unsigned long long syscall)
+{
+ return pid == LastSyscall.pid && syscall == LastSyscall.syscall;
+}
+
+// return index within argv of the beginning of the user's command and end of whatfiles' flags
+// no: now that we're attaching to processes, we aren't necessarily going to have a beginning of command
+// so need to return index of last whatfiles arg.
+int discover_flags(int argc, char *argv[])
+{
+ int i;
+ for (i = 1; i < argc; i++) {
+ char *current_arg = argv[i];
+ char *last_arg = argv[i-1];
+ char last_char = last_arg[strlen(last_arg)-1];
+ if (*current_arg == '-') {
+ continue; // in an option
+ } else if (*last_arg == '-' && (last_char == 'o' || last_char == 'p')) {
+ continue; // not in an option, but in argument to option
+ }
+ return i; // if still here, we're at the user's command
+ }
+ return i;
+}
+
+char *parse_flags(int argc, char *argv[], pid_t *pid, bool *stdout_override, bool *attach)
+{
+ int c;
+ char *filename = NULL;
+ while ((c = getopt(argc, argv, FLAGS)) != -1) {
+ switch(c)
+ {
+ case 'a':
+ about();
+ break;
+ case 'd':
+ Debug = 1;
+ break;
+ case 'o':
+ filename = optarg;
+ break;
+ case 'p':
+ *attach = true;
+ *pid = atoi(optarg);
+ if (!*pid || *pid < 1) {
+ fprintf(stderr, "Bad PID %s given, must be integer.\n", optarg);
+ exit(1);
+ }
+ break;
+ case 's':
+ *stdout_override = true;
+ break;
+ case '?':
+ if (optopt == 'o') {
+ fprintf(stderr, "Option -o requires the desired location of the output file as argument.\n");
+ } else if (optopt == 'p') {
+ fprintf(stderr, "Option -p requires the PID of the process to be tracked as argument.\n");
+ } else if (isprint(optopt)) {
+ fprintf(stderr, "Unknown option `-%c'.\n", optopt);
+ } else {
+ fprintf(stderr, "Unknown option character `\\x%x'.\n", optopt);
+ }
+ usage();
+ break;
+ default:
+ usage();
+ break;
+ }
+ }
+ return filename;
+}
+
+void usage()
+{
+ fprintf(stderr, "\n ======== Usage ========\n");
+ fprintf(stderr, "Whatfiles can be used to log what files a process accesses, and in what mode.\n");
+ fprintf(stderr, "To track the entire lifetime of a program, use it (and whatever arguments) after whatfiles flags.\n");
+ fprintf(stderr, "You can also attach to a currently-running program, though this requires root privileges.\n");
+ fprintf(stderr, "\n ======== Flags ========\n");
+ fprintf(stderr, " -o ./output.log : specify log file location\n");
+ fprintf(stderr, " -p [PID] : attach to currently running process (requires sudo)\n");
+ fprintf(stderr, " -s : output to stdout rather than log file\n");
+ fprintf(stderr, " -d : include debug output\n");
+ fprintf(stderr, " -a : print about/license\n");
+ fprintf(stderr, "\n ======== Examples ========\n");
+ fprintf(stderr, "Basic use, write what files the calendar uses to log:\n");
+ fprintf(stderr, " $ whatfiles cal\n");
+ fprintf(stderr, "Run `ls`, include debug output, and log to stdout:\n");
+ fprintf(stderr, " $ whatfiles -ds ls -lah /var/log\n");
+ fprintf(stderr, "Attach to currently open process with PID 1234:\n");
+ fprintf(stderr, " $ sudo whatfiles -p 1234\n");
+ fprintf(stderr, "Watch what files an installation creates and name the log:\n");
+ fprintf(stderr, " $ sudo whatfiles -o ./firefox.log apt install firefox\n");
+ exit(1);
+}
+
+void about()
+{
+ char *about_message =
+"https://github.com/spieglt/whatfiles\n"
+"Copyright (C) 2020 Theron Spiegl. All rights reserved.\n\n"
+
+"Whatfiles is a Linux utility used to log what files another program accesses and in what mode, "
+"as well as that program's child processes and threads.\n\n"
+
+" This program is free software: you can redistribute it and/or modify\n"
+" it under the terms of the GNU General Public License as published by\n"
+" the Free Software Foundation, either version 3 of the License, or\n"
+" (at your option) any later version.\n\n"
+" This program is distributed in the hope that it will be useful,\n"
+" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
+" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n"
+" GNU General Public License for more details.\n\n"
+" You should have received a copy of the GNU General Public License\n"
+" along with this program. If not, see <https://www.gnu.org/licenses/>.\n";
+ printf("%s\n", about_message);
+ exit(0);
+}
diff --git a/src/whatfiles.c b/src/whatfiles.c
new file mode 100644
index 0000000..bc05408
--- /dev/null
+++ b/src/whatfiles.c
@@ -0,0 +1,342 @@
+#include <dirent.h>
+#include <signal.h>
+#include <stddef.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/ptrace.h>
+#include <sys/syscall.h>
+#include <sys/user.h>
+#include <sys/wait.h>
+
+#include "whatfiles.h"
+#include "hashmap.h"
+#include "strings.h"
+
+FILE *Handle = (FILE*)NULL;
+int Debug = 0;
+
+// looks at the current syscall and outputs its information if it's one we're interested in
+void check_syscall(pid_t current_pid, struct user_regs_struct regs, HashMap map)
+{
+ struct String filename = {0};
+ struct String output = {0};
+ init_string(&filename, 64);
+ init_string(&output, 64);
+ char mode[MODE_LEN] = {0};
+
+ pid_t parent_tid, child_tid;
+ unsigned long flags;
+ unsigned long newsp;
+
+ size_t index;
+ HashError err = find_index(current_pid, map, &index);
+ if (err) DEBUG("unknown pid %d, syscall %lld\n", current_pid, regs.orig_rax);
+
+ switch (regs.orig_rax)
+ {
+ case SYS_execve:
+ DEBUG("PID %d exec'd. orig_rax: %lld, rax: %lld\n", current_pid, regs.orig_rax, regs.rax);
+ if (peek_filename(current_pid, regs.rdi, &filename)) {
+ DEBUG("associated process %d with name \"%s\"\n", current_pid, filename.data);
+ set_name(current_pid, filename.data, map);
+ }
+ break;
+ case SYS_fork:
+ DEBUG("PID %d forked. orig_rax: %lld, rax: %lld\n", current_pid, regs.orig_rax, regs.rax);
+ break;
+ case SYS_clone:
+ flags = regs.rdi;
+ newsp = regs.rsi;
+ parent_tid = ptrace(PTRACE_PEEKDATA, current_pid, (void*)regs.rdx, 0);
+ child_tid = ptrace(PTRACE_PEEKDATA, current_pid, (void*)regs.r10, 0);
+ DEBUG("PID %d cloned. orig_rax: %lld, rax: %lld, flags: 0x%ld, newsp: 0x%ld, parent pid: %d, child pid: %d\n",
+ current_pid, regs.orig_rax, regs.rax, flags, newsp, parent_tid, child_tid);
+ break;
+ case SYS_creat:
+ peek_filename(current_pid, regs.rdi, &filename);
+ get_mode(regs.rsi, mode);
+ build_output(mode, "creat()", regs.rsi, current_pid, &filename, &output, map);
+ OUTPUT("%s", output.data);
+ break;
+ case SYS_open:
+ peek_filename(current_pid, regs.rdi, &filename);
+ get_mode(regs.rdx, mode);
+ build_output(mode, "open()", regs.rdx, current_pid, &filename, &output, map);
+ OUTPUT("%s", output.data);
+ break;
+ case SYS_openat:
+ peek_filename(current_pid, regs.rsi, &filename);
+ get_mode(regs.r10, mode);
+ build_output(mode, "openat()", regs.r10, current_pid, &filename, &output, map);
+ OUTPUT("%s", output.data);
+ break;
+ case SYS_unlink:
+ peek_filename(current_pid, regs.rdi, &filename);
+ build_output("delete", "unlink()", 0, current_pid, &filename, &output, map);
+ OUTPUT("%s", output.data);
+ break;
+ case SYS_unlinkat:
+ peek_filename(current_pid, regs.rsi, &filename);
+ build_output("delete", "unlinkat()", 0, current_pid, &filename, &output, map);
+ OUTPUT("%s", output.data);
+ break;
+ default:
+ // DEBUG("syscall: %lld, pid: %d\n", regs.orig_rax, current_pid);
+ break;
+ }
+ free(filename.data);
+ free(output.data);
+}
+
+// responsible for seeing new processes and threads created by forks, clones, or vforks, and inserting them into the hashmap
+void check_ptrace_event(pid_t current_pid, int proc_status, HashMap map)
+{
+ struct String new_proc = {0};
+ init_string(&new_proc, 128);
+
+ unsigned long ptrace_event;
+ long res = ptrace(PTRACE_GETEVENTMSG, current_pid, (char*)0, &ptrace_event);
+ if (res == -1L) SYS_ERR("ptrace() failed to get event msg");
+ switch (proc_status >> 8)
+ {
+ case SIGTRAP | (PTRACE_EVENT_FORK << 8):
+ DEBUG("caught PTRACE_EVENT_FORK from pid %d. new pid: %ld\n", current_pid, ptrace_event);
+ insert((pid_t)ptrace_event, ENTRY, map);
+ read_task((pid_t)ptrace_event, &new_proc);
+ set_name((pid_t)ptrace_event, new_proc.data, map);
+ break;
+ case SIGTRAP | (PTRACE_EVENT_CLONE << 8):
+ DEBUG("caught PTRACE_EVENT_CLONE from pid %d. new pid: %ld\n", current_pid, ptrace_event);
+ insert((pid_t)ptrace_event, ENTRY, map);
+ read_task((pid_t)ptrace_event, &new_proc);
+ set_name((pid_t)ptrace_event, new_proc.data, map);
+ break;
+ case SIGTRAP | (PTRACE_EVENT_VFORK << 8):
+ DEBUG("caught PTRACE_EVENT_VFORK from pid %d. new pid: %ld\n", current_pid, ptrace_event);
+ insert((pid_t)ptrace_event, ENTRY, map);
+ read_task((pid_t)ptrace_event, &new_proc);
+ set_name((pid_t)ptrace_event, new_proc.data, map);
+ break;
+ case SIGTRAP | (PTRACE_EVENT_EXEC << 8):
+ DEBUG("caught PTRACE_EVENT_EXEC from pid %d. former pid: %ld\n", current_pid, ptrace_event);
+ /*
+ from ptrace man page, "execve(2) under ptrace":
+ When one thread in a multithreaded process calls execve(2), the kernel destroys all other threads in the
+ process, and resets the thread ID of the execing thread to the thread group ID (process ID). (Or, to put
+ things another way, when a multithreaded process does an execve(2), at completion of the call, it appears as
+ though the execve(2) occurred in the thread group leader, regardless of which thread did the execve(2).)
+ This resetting of the thread ID looks very confusing to tracers:
+ [...]
+ * The execing tracee changes its thread ID while it is in the execve(2). (Remember, under ptrace, the
+ "pid" returned from waitpid(2), or fed into ptrace calls, is the tracee's thread ID.) That is, the
+ tracee's thread ID is reset to be the same as its process ID, which is the same as the thread group
+ leader's thread ID.
+ * Then a PTRACE_EVENT_EXEC stop happens, if the PTRACE_O_TRACEEXEC option was turned on.
+ So, we should not insert the ptrace_event value, but the current_pid, as by the time we (the tracer)
+ see this event, the PID has already been changed.
+ */
+ // insert((pid_t)ptrace_event, ENTRY, map);
+ insert(current_pid, ENTRY, map);
+ break;
+ default:
+ break;
+ }
+
+ free(new_proc.data);
+}
+
+void step_syscall(pid_t current_pid, int proc_status, HashMap map)
+{
+ long res;
+ struct user_regs_struct regs;
+
+ // get current register values
+ do {
+ res = ptrace(PTRACE_GETREGS, current_pid, &regs, &regs);
+ } while (res == -1L && errno == ESRCH);
+ if (res == -1L && errno != ESRCH) SYS_ERR("ptrace() failed to get registers");
+
+ // If it's the same PID performing the same syscall (has same orig_rax) as last time, we don't care. Just means it's exiting the syscall.
+ // Might want to keep for debug mode? This might result in missing some output, in the case where two threads of the same process enter the same syscall before either exits,
+ // because they will both return the same PID to wait() when given SIGTRAP as part of the syscall-enter-exit loop. Might also result in double-printing,
+ // because if two threads (that report the same PID) enter two different syscalls before either exits, the "last" syscall for the PID won't be the entry by that thread.
+ if (!is_exiting(current_pid, regs.orig_rax) /*|| Debug*/) {
+ check_syscall(current_pid, regs, map);
+ }
+ LastSyscall.pid = current_pid;
+ LastSyscall.syscall = regs.orig_rax;
+ check_ptrace_event(current_pid, proc_status, map);
+ // continue, catching