diff options
author | Costa Tsaousis <costa@netdata.cloud> | 2023-11-30 23:18:58 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-30 23:18:58 +0200 |
commit | 0705251a8a204d565017a2f29f157ffae39a5932 (patch) | |
tree | c26b173cb1565df10042fdcb212c5888f613314e | |
parent | 3362c4e405c6a170b88c1e2040690278f4cfe58a (diff) |
log2journal improvements 4 (#16510)
* fix codeql warnings
* work to support multiple types of value rewrites
* support looking up variables names in all values specified
* added more unit tests for the current functionality
* satisfy coverity
* fixed quotes
* test nginx-combined.yaml too
* removed deletions - injections can now do the same
* remove empty line
25 files changed, 1524 insertions, 1302 deletions
diff --git a/Makefile.am b/Makefile.am index aff19318ae..398c6fb20c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -357,7 +357,6 @@ LOG2JOURNAL_FILES = \ collectors/log2journal/log2journal-logfmt.c \ collectors/log2journal/log2journal-pcre2.c \ collectors/log2journal/log2journal-params.c \ - collectors/log2journal/log2journal-duplicate.c \ collectors/log2journal/log2journal-inject.c \ collectors/log2journal/log2journal-pattern.c \ collectors/log2journal/log2journal-replace.c \ diff --git a/collectors/log2journal/README.md b/collectors/log2journal/README.md index 22b87f2a49..2747142d68 100644 --- a/collectors/log2journal/README.md +++ b/collectors/log2journal/README.md @@ -140,10 +140,10 @@ Avoid setting priority to 0 (`LOG_EMERG`), because these will be on your termina To set the PRIORITY field in the output, we can use `NGINX_STATUS` fields. We need a copy of it, which we will alter later. -We can instruct `log2journal` to duplicate `NGINX_STATUS`, like this: `log2journal --duplicate=PRIORITY=NGINX_STATUS`. Let's try it: +We can instruct `log2journal` to duplicate `NGINX_STATUS`, like this: `log2journal --inject 'PRIORITY=${NGINX_STATUS}'`. Let's try it: ```bash -# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 104 0.001 "-" "Go-http-client/1.1"' | log2journal '^(?<NGINX_REMOTE_ADDR>[^ ]+) - (?<NGINX_REMOTE_USER>[^ ]+) \[(?<NGINX_TIME_LOCAL>[^\]]+)\] "(?<MESSAGE>(?<NGINX_METHOD>[A-Z]+) (?<NGINX_URL>[^ ]+) HTTP/(?<NGINX_HTTP_VERSION>[^"]+))" (?<NGINX_STATUS>\d+) (?<NGINX_BODY_BYTES_SENT>\d+) (?<NGINX_REQUEST_LENGTH>\d+) (?<NGINX_REQUEST_TIME>[\d.]+) "(?<NGINX_HTTP_REFERER>[^"]*)" "(?<NGINX_HTTP_USER_AGENT>[^"]*)"' --duplicate=PRIORITY=NGINX_STATUS +# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 104 0.001 "-" "Go-http-client/1.1"' | log2journal '^(?<NGINX_REMOTE_ADDR>[^ ]+) - (?<NGINX_REMOTE_USER>[^ ]+) \[(?<NGINX_TIME_LOCAL>[^\]]+)\] "(?<MESSAGE>(?<NGINX_METHOD>[A-Z]+) (?<NGINX_URL>[^ ]+) HTTP/(?<NGINX_HTTP_VERSION>[^"]+))" (?<NGINX_STATUS>\d+) (?<NGINX_BODY_BYTES_SENT>\d+) (?<NGINX_REQUEST_LENGTH>\d+) (?<NGINX_REQUEST_TIME>[\d.]+) "(?<NGINX_HTTP_REFERER>[^"]*)" "(?<NGINX_HTTP_USER_AGENT>[^"]*)"' --inject 'PRIORITY=${NGINX_STATUS}' MESSAGE=GET /index.html HTTP/1.1 NGINX_BODY_BYTES_SENT=4172 NGINX_HTTP_REFERER=- @@ -161,10 +161,10 @@ NGINX_URL=/index.html ``` -Now that we have the `PRIORITY` field equal to the `NGINX_STATUS`, we can use instruct `log2journal` to change it to a valid priority, by appending: `--rewrite=PRIORITY=/^5/3 --rewrite=PRIORITY=/.*/6`. These rewrite commands say to match everything that starts with `5` and replace it with priority `3` (error) and everything else with priority `6` (info). Let's see it: +Now that we have the `PRIORITY` field equal to the `NGINX_STATUS`, we can use instruct `log2journal` to change it to a valid priority, by appending: `--rewrite 'PRIORITY=/^5/3' --rewrite 'PRIORITY=/.*/6'`. These rewrite commands say to match everything that starts with `5` and replace it with priority `3` (error) and everything else with priority `6` (info). Let's see it: ```bash -# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 104 0.001 "-" "Go-http-client/1.1"' | log2journal '^(?<NGINX_REMOTE_ADDR>[^ ]+) - (?<NGINX_REMOTE_USER>[^ ]+) \[(?<NGINX_TIME_LOCAL>[^\]]+)\] "(?<MESSAGE>(?<NGINX_METHOD>[A-Z]+) (?<NGINX_URL>[^ ]+) HTTP/(?<NGINX_HTTP_VERSION>[^"]+))" (?<NGINX_STATUS>\d+) (?<NGINX_BODY_BYTES_SENT>\d+) (?<NGINX_REQUEST_LENGTH>\d+) (?<NGINX_REQUEST_TIME>[\d.]+) "(?<NGINX_HTTP_REFERER>[^"]*)" "(?<NGINX_HTTP_USER_AGENT>[^"]*)"' --duplicate=STATUS2PRIORITY=NGINX_STATUS --rewrite=PRIORITY=/^5/3 --rewrite=PRIORITY=/.*/6 +# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 104 0.001 "-" "Go-http-client/1.1"' | log2journal '^(?<NGINX_REMOTE_ADDR>[^ ]+) - (?<NGINX_REMOTE_USER>[^ ]+) \[(?<NGINX_TIME_LOCAL>[^\]]+)\] "(?<MESSAGE>(?<NGINX_METHOD>[A-Z]+) (?<NGINX_URL>[^ ]+) HTTP/(?<NGINX_HTTP_VERSION>[^"]+))" (?<NGINX_STATUS>\d+) (?<NGINX_BODY_BYTES_SENT>\d+) (?<NGINX_REQUEST_LENGTH>\d+) (?<NGINX_REQUEST_TIME>[\d.]+) "(?<NGINX_HTTP_REFERER>[^"]*)" "(?<NGINX_HTTP_USER_AGENT>[^"]*)"' --inject 'PRIORITY=${NGINX_STATUS}' --rewrite 'PRIORITY=/^5/3' --rewrite 'PRIORITY=/.*/6' MESSAGE=GET /index.html HTTP/1.1 NGINX_BODY_BYTES_SENT=4172 NGINX_HTTP_REFERER=- @@ -182,12 +182,12 @@ NGINX_URL=/index.html ``` -Similarly, we could duplicate `NGINX_URL` to `NGINX_ENDPOINT` and then process it with sed to remove any query string, or replace IDs in the URL path with constant names, thus giving us uniform endpoints independently of the parameters. +Similarly, we could duplicate `${NGINX_URL}` to `NGINX_ENDPOINT` and then process it to remove any query string, or replace IDs in the URL path with constant names, thus giving us uniform endpoints independently of the parameters. -To complete the example, we can also inject a `SYSLOG_IDENTIFIER` with `log2journal`, using `--inject=SYSLOG_IDENTIFIER=nginx-log`, like this: +To complete the example, we can also inject a `SYSLOG_IDENTIFIER` with `log2journal`, using `--inject SYSLOG_IDENTIFIER=nginx-log`, like this: ```bash -# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 104 0.001 "-" "Go-http-client/1.1"' | log2journal '^(?<NGINX_REMOTE_ADDR>[^ ]+) - (?<NGINX_REMOTE_USER>[^ ]+) \[(?<NGINX_TIME_LOCAL>[^\]]+)\] "(?<MESSAGE>(?<NGINX_METHOD>[A-Z]+) (?<NGINX_URL>[^ ]+) HTTP/(?<NGINX_HTTP_VERSION>[^"]+))" (?<NGINX_STATUS>\d+) (?<NGINX_BODY_BYTES_SENT>\d+) (?<NGINX_REQUEST_LENGTH>\d+) (?<NGINX_REQUEST_TIME>[\d.]+) "(?<NGINX_HTTP_REFERER>[^"]*)" "(?<NGINX_HTTP_USER_AGENT>[^"]*)"' --duplicate=STATUS2PRIORITY=NGINX_STATUS --inject=SYSLOG_IDENTIFIER=nginx -rewrite=PRIORITY=/^5/3 --rewrite=PRIORITY=/.*/6 +# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 104 0.001 "-" "Go-http-client/1.1"' | log2journal '^(?<NGINX_REMOTE_ADDR>[^ ]+) - (?<NGINX_REMOTE_USER>[^ ]+) \[(?<NGINX_TIME_LOCAL>[^\]]+)\] "(?<MESSAGE>(?<NGINX_METHOD>[A-Z]+) (?<NGINX_URL>[^ ]+) HTTP/(?<NGINX_HTTP_VERSION>[^"]+))" (?<NGINX_STATUS>\d+) (?<NGINX_BODY_BYTES_SENT>\d+) (?<NGINX_REQUEST_LENGTH>\d+) (?<NGINX_REQUEST_TIME>[\d.]+) "(?<NGINX_HTTP_REFERER>[^"]*)" "(?<NGINX_HTTP_USER_AGENT>[^"]*)"' --inject 'PRIORITY=${NGINX_STATUS}' --inject 'SYSLOG_IDENTIFIER=nginx' -rewrite 'PRIORITY=/^5/3' --rewrite 'PRIORITY=/.*/6' MESSAGE=GET /index.html HTTP/1.1 NGINX_BODY_BYTES_SENT=4172 NGINX_HTTP_REFERER=- @@ -210,7 +210,7 @@ Now the message is ready to be sent to a systemd-journal. For this we use `syste ```bash -# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 104 0.001 "-" "Go-http-client/1.1"' | log2journal '^(?<NGINX_REMOTE_ADDR>[^ ]+) - (?<NGINX_REMOTE_USER>[^ ]+) \[(?<NGINX_TIME_LOCAL>[^\]]+)\] "(?<MESSAGE>(?<NGINX_METHOD>[A-Z]+) (?<NGINX_URL>[^ ]+) HTTP/(?<NGINX_HTTP_VERSION>[^"]+))" (?<NGINX_STATUS>\d+) (?<NGINX_BODY_BYTES_SENT>\d+) (?<NGINX_REQUEST_LENGTH>\d+) (?<NGINX_REQUEST_TIME>[\d.]+) "(?<NGINX_HTTP_REFERER>[^"]*)" "(?<NGINX_HTTP_USER_AGENT>[^"]*)"' --duplicate=STATUS2PRIORITY=NGINX_STATUS --inject=SYSLOG_IDENTIFIER=nginx -rewrite=PRIORITY=/^5/3 --rewrite=PRIORITY=/.*/6 | systemd-cat-native +# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 104 0.001 "-" "Go-http-client/1.1"' | log2journal '^(?<NGINX_REMOTE_ADDR>[^ ]+) - (?<NGINX_REMOTE_USER>[^ ]+) \[(?<NGINX_TIME_LOCAL>[^\]]+)\] "(?<MESSAGE>(?<NGINX_METHOD>[A-Z]+) (?<NGINX_URL>[^ ]+) HTTP/(?<NGINX_HTTP_VERSION>[^"]+))" (?<NGINX_STATUS>\d+) (?<NGINX_BODY_BYTES_SENT>\d+) (?<NGINX_REQUEST_LENGTH>\d+) (?<NGINX_REQUEST_TIME>[\d.]+) "(?<NGINX_HTTP_REFERER>[^"]*)" "(?<NGINX_HTTP_USER_AGENT>[^"]*)"' --inject 'PRIORITY=${NGINX_STATUS}' --inject 'SYSLOG_IDENTIFIER=nginx' -rewrite 'PRIORITY=/^5/3' --rewrite 'PRIORITY=/.*/6' | systemd-cat-native # no output # let's find the message @@ -296,24 +296,24 @@ pattern='(?x) # Enable PCRE2 extended mode tail -n $last -F /var/log/nginx/*access.log \ | log2journal "${pattern}" \ - --filename-key=NGINX_LOG_FILE \ - --duplicate=PRIORITY=NGINX_STATUS \ - --duplicate=NGINX_STATUS_FAMILY=NGINX_STATUS \ - --inject=SYSLOG_IDENTIFIER=nginx-log \ - --unmatched-key=MESSAGE \ - --inject-unmatched=PRIORITY=1 \ - --rewrite='PRIORITY=/^5/3 --rewrite=PRIORITY=/.*/6' \ - --rewrite='NGINX_STATUS_FAMILY=/^(?<first_digit>[0-9]).*$/${first_digit}xx' \ - --rewrite='NGINX_STATUS_FAMILY=/^.*$/UNKNOWN' \ + --filename-key 'NGINX_LOG_FILE' \ + --unmatched-key 'MESSAGE' \ + --inject-unmatched 'PRIORITY=1' \ + --inject 'PRIORITY=${NGINX_STATUS}' \ + --rewrite 'PRIORITY=/^5/3' \ + --rewrite 'PRIORITY=/.*/6' \ + --inject 'NGINX_STATUS_FAMILY=${NGINX_STATUS}' \ + --rewrite 'NGINX_STATUS_FAMILY=/^(?<first_digit>[0-9]).*$/${first_digit}xx' \ + --rewrite 'NGINX_STATUS_FAMILY=/^.*$/UNKNOWN' \ + --inject 'SYSLOG_IDENTIFIER=nginx-log' \ | $send_or_show ``` - ## `log2journal` options ``` -Netdata log2journal v1.43.0-306-g929866ad3 +Netdata log2journal v1.43.0-313-gd79fbac6a Convert logs to systemd Journal Export Format. @@ -351,14 +351,6 @@ Options: unmatched entry will appear as the log message in the journals. Use --inject-unmatched to inject additional fields to unmatched lines. - --duplicate TARGET=KEY1[,KEY2[,KEY3[,...]] - Create a new key called TARGET, duplicating the values of the keys - given. Useful for further processing. When multiple keys are given, - their values are separated by comma. - - Up to 512 duplications can be given on the command line, and up to - 20 keys per duplication command are allowed. - --inject LINE Inject constant fields to the output (both matched and unmatched logs). --inject entries are added to unmatched lines too, when their key is @@ -455,40 +447,40 @@ This is a simple diagram of the pipeline taking place: | INPUT | | read one log line at a time | +---------------------------------------------------+ - v v - +---------------------------------+ | - | EXTRACT FIELDS AND VALUES | | - | JSON, logfmt, or pattern based | | - | (apply optional PREFIX) | | - +---------------------------------+ | - v v | - +---------------+ +--------------+ | - | DUPLICATE | | FILTER | | - | | | filter keys | | - | create new | +--------------+ | - | fields by | v | - | duplicating | +--------------+ | - | other fields | | RENAME | | - | and their | | change | | - | values | | field names | | - +---------------+ +--------------+ | - v v v - +---------------------------------+ +--------------+ - | REWRITE PIPELINES | | INJECT | - | altering the values of fields | | constants | - +---------------------------------+ +--------------+ - v v + v v v v v v + +---------------------------------------------------+ + | EXTRACT FIELDS AND VALUES | + | JSON, logfmt, or pattern based | + | (apply optional PREFIX) | + +---------------------------------------------------+ + v v v v v v + +---------------------------------------------------+ + | RENAME FIELDS | + | change the names of the fields | + +---------------------------------------------------+ + v v v v v v + +---------------------------------------------------+ + | INJECT NEW FIELDS | + | constants, or other field values as variables | + +---------------------------------------------------+ + v v v v v v + +---------------------------------------------------+ + | REWRITE FIELD VALUES | + | pipeline multiple rewriting rules to alter | + | the values of the fields | + +---------------------------------------------------+ + v v v v v v + +---------------------------------------------------+ + | FILTER FIELDS | + | use include and exclude patterns on the field | + | names, to select which fields are sent to journal | + +---------------------------------------------------+ + v v v v v v +---------------------------------------------------+ | OUTPUT | | generate Journal Export Format | +---------------------------------------------------+ -IMPORTANT: - - Extraction of keys includes formatting them according to journal rules. - - Duplication rules use the original extracted field names, after they have - been prefixed (when a PREFIX is set) and before they are renamed. - - Rewriting is always the last stage, so the final field names are matched. - -------------------------------------------------------------------------------- JOURNAL FIELDS RULES (enforced by systemd-journald) diff --git a/collectors/log2journal/log2journal-duplicate.c b/collectors/log2journal/log2journal-duplicate.c deleted file mode 100644 index af0be843fd..0000000000 --- a/collectors/log2journal/log2journal-duplicate.c +++ /dev/null @@ -1,49 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "log2journal.h" - -void duplication_cleanup(DUPLICATION *dp) { - hashed_key_cleanup(&dp->target); - - for(size_t j = 0; j < dp->used ; j++) { - hashed_key_cleanup(&dp->keys[j]); - txt_cleanup(&dp->values[j]); - } -} - -DUPLICATION *log_job_duplication_add(LOG_JOB *jb, const char *target, size_t target_len) { - if (jb->dups.used >= MAX_KEY_DUPS) { - log2stderr("ERROR: Too many duplicates defined. Maximum allowed is %d.", MAX_KEY_DUPS); - return NULL; - } - - if(target_len > JOURNAL_MAX_KEY_LEN) { - log2stderr("WARNING: key of duplicate '%.*s' is too long for journals. Will be truncated.", (int)target_len, target); - target_len = JOURNAL_MAX_KEY_LEN; - } - - DUPLICATION *kd = &jb->dups.array[jb->dups.used++]; - hashed_key_len_set(&kd->target, target, target_len); - kd->used = 0; - kd->exposed = false; - - // Initialize values array - for (size_t i = 0; i < MAX_KEY_DUPS_KEYS; i++) { - kd->values[i].txt = NULL; - kd->values[i].size = 0; - } - - return kd; -} - -bool log_job_duplication_key_add(DUPLICATION *kd, const char *key, size_t key_len) { - if (kd->used >= MAX_KEY_DUPS_KEYS) { - log2stderr("ERROR: Too many keys in duplication of target '%s'.", kd->target.key); - return false; - } - - hashed_key_len_set(&kd->keys[kd->used++], key, key_len); - - return true; -} - diff --git a/collectors/log2journal/log2journal-help.c b/collectors/log2journal/log2journal-help.c index 47bcdb5289..67af516df3 100644 --- a/collectors/log2journal/log2journal-help.c +++ b/collectors/log2journal/log2journal-help.c @@ -88,14 +88,6 @@ void log_job_command_line_help(const char *name) { printf(" unmatched entry will appear as the log message in the journals.\n"); printf(" Use --inject-unmatched to inject additional fields to unmatched lines.\n"); printf("\n"); - printf(" --duplicate TARGET=KEY1[,KEY2[,KEY3[,...]]\n"); - printf(" Create a new key called TARGET, duplicating the values of the keys\n"); - printf(" given. Useful for further processing. When multiple keys are given,\n"); - printf(" their values are separated by comma.\n"); - printf("\n"); - printf(" Up to %d duplications can be given on the command line, and up to\n", MAX_KEY_DUPS); - printf(" %d keys per duplication command are allowed.\n", MAX_KEY_DUPS_KEYS); - printf("\n"); printf(" --inject LINE\n"); printf(" Inject constant fields to the output (both matched and unmatched logs).\n"); printf(" --inject entries are added to unmatched lines too, when their key is\n"); @@ -192,40 +184,40 @@ void log_job_command_line_help(const char *name) { printf(" | INPUT | \n"); printf(" | read one log line at a time | \n"); printf(" +---------------------------------------------------+ \n"); - printf(" v v \n"); - printf(" +---------------------------------+ | \n"); - printf(" | EXTRACT FIELDS AND VALUES | | \n"); - printf(" | JSON, logfmt, or pattern based | | \n"); - printf(" | (apply optional PREFIX) | | \n"); - printf(" +---------------------------------+ | \n"); - printf(" v v | \n"); - printf(" +---------------+ +--------------+ | \n"); - printf(" | DUPLICATE | | FILTER | | \n"); - printf(" | | | filter keys | | \n"); - printf(" | create new | +--------------+ | \n"); - printf(" | fields by | v | \n"); - printf(" | duplicating | +--------------+ | \n"); - printf(" | other fields | | RENAME | | \n"); - printf(" | and their | | change | | \n"); - printf(" | values | | field names | | \n"); - printf(" +---------------+ +--------------+ | \n"); - printf(" v v v \n"); - printf(" +---------------------------------+ +--------------+ \n"); - printf(" | REWRITE PIPELINES | | INJECT | \n"); - printf(" | altering the values of fields | | constants | \n"); - printf(" +---------------------------------+ +--------------+ \n"); - printf(" v v \n"); + printf(" v v v v v v \n"); + printf(" +---------------------------------------------------+ \n"); + printf(" | EXTRACT FIELDS AND VALUES | \n"); + printf(" | JSON, logfmt, or pattern based | \n"); + printf(" | (apply optional PREFIX) | \n"); + printf(" +---------------------------------------------------+ \n"); + printf(" v v v v v v \n"); + printf(" +---------------------------------------------------+ \n"); + printf(" | RENAME FIELDS | \n"); + printf(" | change the names of the fields | \n"); + printf(" +---------------------------------------------------+ \n"); + printf(" v v v v v v \n"); + printf(" +---------------------------------------------------+ \n"); + printf(" | INJECT NEW FIELDS | \n"); + printf(" | constants, or other field values as variables | \n"); + printf(" +---------------------------------------------------+ \n"); + printf(" v v v v v v \n"); + printf(" +---------------------------------------------------+ \n"); + printf(" | REWRITE FIELD VALUES | \n"); + printf(" | pipeline multiple rewriting rules to alter | \n"); + printf(" | the values of the fields | \n"); + printf(" +---------------------------------------------------+ \n"); + printf(" v v v v v v \n"); + printf(" +---------------------------------------------------+ \n"); + printf(" | FILTER FIELDS | \n"); + printf(" | use include and exclude patterns on the field | \n"); + printf(" | names, to select which fields are sent to journal | \n"); + printf(" +---------------------------------------------------+ \n"); + printf(" v v v v v v \n"); printf(" +---------------------------------------------------+ \n"); printf(" | OUTPUT | \n"); printf(" | generate Journal Export Format | \n"); printf(" +---------------------------------------------------+ \n"); printf(" \n"); - printf("IMPORTANT:\n"); - printf(" - Extraction of keys includes formatting them according to journal rules.\n"); - printf(" - Duplication rules use the original extracted field names, after they have\n"); - printf(" been prefixed (when a PREFIX is set) and before they are renamed.\n"); - printf(" - Rewriting is always the last stage, so the final field names are matched.\n"); - printf("\n"); printf("--------------------------------------------------------------------------------\n"); printf("JOURNAL FIELDS RULES (enforced by systemd-journald)\n"); printf("\n"); diff --git a/collectors/log2journal/log2journal-inject.c b/collectors/log2journal/log2journal-inject.c index 000ad7a748..45158066bf 100644 --- a/collectors/log2journal/log2journal-inject.c +++ b/collectors/log2journal/log2journal-inject.c @@ -4,10 +4,10 @@ void injection_cleanup(INJECTION *inj) { hashed_key_cleanup(&inj->key); - txt_cleanup(&inj->value); + replace_pattern_cleanup(&inj->value); } -static inline void log_job_injection_replace(INJECTION *inj, const char *key, size_t key_len, const char *value, size_t value_len) { +static inline bool log_job_injection_replace(INJECTION *inj, const char *key, size_t key_len, const char *value, size_t value_len) { if(key_len > JOURNAL_MAX_KEY_LEN) log2stderr("WARNING: injection key '%.*s' is too long for journal. Will be truncated.", (int)key_len, key); @@ -15,7 +15,11 @@ static inline void log_job_injection_replace(INJECTION *inj, const char *key, si log2stderr("WARNING: injection value of key '%.*s' is too long for journal. Will be truncated.", (int)key_len, key); hashed_key_len_set(&inj->key, key, key_len); - txt_replace(&inj->value, value, value_len); + char *v = strndupz(value, value_len); + bool ret = replace_pattern_set(&inj->value, v); + freez(v); + + return ret; } bool log_job_injection_add(LOG_JOB *jb, const char *key, size_t key_len, const char *value, size_t value_len, bool unmatched) { @@ -32,13 +36,14 @@ bool log_job_injection_add(LOG_JOB *jb, const char *key, size_t key_len, const c } } + bool ret; if (unmatched) { - log_job_injection_replace(&jb->unmatched.injections.keys[jb->unmatched.injections.used++], - key, key_len, value, value_len); + ret = log_job_injection_replace(&jb->unmatched.injections.keys[jb->unmatched.injections.used++], + key, key_len, value, value_len); } else { - log_job_injection_replace(&jb->injections.keys[jb->injections.used++], - key, key_len, value, value_len); + ret = log_job_injection_replace(&jb->injections.keys[jb->injections.used++], + key, key_len, value, value_len); } - return true; + return ret; } diff --git a/collectors/log2journal/log2journal-params.c b/collectors/log2journal/log2journal-params.c index 60d22f901d..ca4e2f5860 100644 --- a/collectors/log2journal/log2journal-params.c +++ b/collectors/log2journal/log2journal-params.c @@ -10,7 +10,7 @@ void log_job_init(LOG_JOB *jb) { } static void simple_hashtable_cleanup_allocated(SIMPLE_HASHTABLE *ht) { - for(size_t i = 0; i < ht->used ;i++) { + for(size_t i = 0; i < ht->size ;i++) { HASHED_KEY *k = ht->hashtable[i].data; if(k && k->flags & HK_HASHTABLE_ALLOCATED) { hashed_key_cleanup(k); @@ -41,12 +41,12 @@ void log_job_cleanup(LOG_JOB *jb) { for(size_t i = 0; i < jb->renames.used ;i++) rename_cleanup(&jb->renames.array[i]); - for(size_t i = 0; i < jb->dups.used ;i++) - duplication_cleanup(&jb->dups.array[i]); - for(size_t i = 0; i < jb->rewrites.used; i++) rewrite_cleanup(&jb->rewrites.array[i]); + txt_cleanup(&jb->rewrites.tmp); + txt_cleanup(&jb->filename.current); + simple_hashtable_cleanup_allocated(&jb->hashtable); simple_hashtable_free(&jb->hashtable); @@ -146,6 +146,79 @@ static bool is_symbol(char c) { return !isalpha(c) && !isdigit(c) && !iscntrl(c); } +struct { + const char *keyword; + int action; + RW_FLAGS flag; +} rewrite_flags[] = { + {"match", 1, RW_MATCH_PCRE2}, + {"match", 0, RW_MATCH_NON_EMPTY}, + + {"regex", 1, RW_MATCH_PCRE2}, + {"regex", 0, RW_MATCH_NON_EMPTY}, + + {"pcre2", 1, RW_MATCH_PCRE2}, + {"pcre2", 0, RW_MATCH_NON_EMPTY}, + + {"non_empty", 1, RW_MATCH_NON_EMPTY}, + {"non_empty", 0, RW_MATCH_PCRE2}, + + {"non-empty", 1, RW_MATCH_NON_EMPTY}, + {"non-empty", 0, RW_MATCH_PCRE2}, + + {"not_empty", 1, RW_MATCH_NON_EMPTY}, + {"not_empty", 0, RW_MATCH_PCRE2}, + + {"not-empty", 1, RW_MATCH_NON_EMPTY}, + {"not-empty", 0, RW_MATCH_PCRE2}, + + {"stop", 0, RW_DONT_STOP}, + {"no-stop", 1, RW_DONT_STOP}, + {"no_stop", 1, RW_DONT_STOP}, + {"dont-stop", 1, RW_DONT_STOP}, + {"dont_stop", 1, RW_DONT_STOP}, + {"continue", 1, RW_DONT_STOP}, + {"inject", 1, RW_INJECT}, + {"existing", 0, RW_INJECT}, +}; + +RW_FLAGS parse_rewrite_flags(const char *options) { + RW_FLAGS flags = RW_MATCH_PCRE2; // Default option + + // Tokenize the input options using "," + char *token; + char *optionsCopy = strdup(options); // Make a copy to avoid modifying the original + token = strtok(optionsCopy, ","); + + while (token != NULL) { + // Find the keyword-action mapping + bool found = false; + + for (size_t i = 0; i < sizeof(rewrite_flags) / sizeof(rewrite_flags[0]); i++) { + if (strcmp(token, rewrite_flags[i].keyword) == 0) { + if (rewrite_flags[i].action == 1) { + flags |= rewrite_flags[i].flag; // Set the flag + } else { + flags &= ~rewrite_flags[i].flag; // Unset the flag + } + + found = true; + } + } + + if(!found) + log2stderr("Warning: rewrite options '%s' is not understood.", token); + + // Get the next token + token = strtok(NULL, ","); + } + + free(optionsCopy); // Free the copied string + + return flags; +} + + static bool parse_rewrite(LOG_JOB *jb, const char *param) { // Search for '=' in param const char *equal_sign = strchr(param, '='); @@ -180,18 +253,20 @@ static bool parse_rewrite(LOG_JOB *jb, const char *param) { return false; } - // Reserve a slot in rewrites - if (jb->rewrites.used >= MAX_REWRITES) { - log2stderr("Error: Exceeded maximum number of rewrite rules, while processing: %s", param); - return false; - } + RW_FLAGS flags = RW_MATCH_PCRE2; + const char *third_separator = strchr(second_separator + 1, separator); + if(third_separator) + flags = parse_rewrite_flags(third_separator + 1); // Extract key, search pattern, and replacement pattern char *key = strndupz(param, equal_sign - param); char *search_pattern = strndupz(equal_sign + 2, second_separator - (equal_sign + 2)); - char *replace_pattern = strdupz(second_separator + 1); + char *replace_pattern = third_separator ? strndup(second_separator + 1, third_separator - (second_separator + 1)) : strdupz(second_separator + 1); - bool ret = log_job_rewrite_add(jb, key, search_pattern, replace_pattern); + if(!*search_pattern) + flags &= ~RW_MATCH_PCRE2; + + bool ret = log_job_rewrite_add(jb, key, flags, search_pattern, replace_pattern); freez(key); freez(search_pattern); @@ -214,41 +289,6 @@ static bool parse_inject(LOG_JOB *jb, const char *value, bool unmatched) { return true; } -static bool parse_duplicate(LOG_JOB *jb, const char *value) { - const char *target = value; - const char *equal_sign = strchr(value, '='); - if (!equal_sign || equal_sign == target) { - log2stderr("Error: Invalid duplicate format, '=' not found or at the start in %s", value); - return false; - } - - size_t target_len = equal_sign - target; - DUPLICATION *kd = log_job_duplication_add(jb, target, target_len); - if(!kd) return false; - - const char *key = equal_sign + 1; - while (key) { - if (kd->used >= MAX_KEY_DUPS_KEYS) { - log2stderr("Error: too many keys in duplication of target '%s'.", kd->target.key); - return false; - } - - const char *comma = strchr(key, ','); - size_t key_len; - if (comma) { |