summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCosta Tsaousis <costa@netdata.cloud>2023-11-30 23:18:58 +0200
committerGitHub <noreply@github.com>2023-11-30 23:18:58 +0200
commit0705251a8a204d565017a2f29f157ffae39a5932 (patch)
treec26b173cb1565df10042fdcb212c5888f613314e
parent3362c4e405c6a170b88c1e2040690278f4cfe58a (diff)
log2journal improvements 4 (#16510)
* fix codeql warnings * work to support multiple types of value rewrites * support looking up variables names in all values specified * added more unit tests for the current functionality * satisfy coverity * fixed quotes * test nginx-combined.yaml too * removed deletions - injections can now do the same * remove empty line
-rw-r--r--Makefile.am1
-rw-r--r--collectors/log2journal/README.md104
-rw-r--r--collectors/log2journal/log2journal-duplicate.c49
-rw-r--r--collectors/log2journal/log2journal-help.c66
-rw-r--r--collectors/log2journal/log2journal-inject.c21
-rw-r--r--collectors/log2journal/log2journal-params.c136
-rw-r--r--collectors/log2journal/log2journal-replace.c7
-rw-r--r--collectors/log2journal/log2journal-rewrite.c36
-rw-r--r--collectors/log2journal/log2journal-yaml.c249
-rw-r--r--collectors/log2journal/log2journal.c477
-rw-r--r--collectors/log2journal/log2journal.d/nginx-combined.yaml48
-rw-r--r--collectors/log2journal/log2journal.d/nginx-json.yaml93
-rw-r--r--collectors/log2journal/log2journal.h111
-rw-r--r--collectors/log2journal/tests.d/full.output47
-rw-r--r--collectors/log2journal/tests.d/full.yaml47
-rw-r--r--collectors/log2journal/tests.d/json-exclude.output196
-rw-r--r--collectors/log2journal/tests.d/json-include.output54
-rw-r--r--collectors/log2journal/tests.d/json.output418
-rw-r--r--collectors/log2journal/tests.d/logfmt.log5
-rw-r--r--collectors/log2journal/tests.d/logfmt.output37
-rw-r--r--collectors/log2journal/tests.d/logfmt.yaml34
-rw-r--r--collectors/log2journal/tests.d/nginx-combined.log14
-rw-r--r--collectors/log2journal/tests.d/nginx-combined.output210
-rw-r--r--collectors/log2journal/tests.d/nginx-json.output346
-rwxr-xr-xcollectors/log2journal/tests.sh20
25 files changed, 1524 insertions, 1302 deletions
diff --git a/Makefile.am b/Makefile.am
index aff19318ae..398c6fb20c 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -357,7 +357,6 @@ LOG2JOURNAL_FILES = \
collectors/log2journal/log2journal-logfmt.c \
collectors/log2journal/log2journal-pcre2.c \
collectors/log2journal/log2journal-params.c \
- collectors/log2journal/log2journal-duplicate.c \
collectors/log2journal/log2journal-inject.c \
collectors/log2journal/log2journal-pattern.c \
collectors/log2journal/log2journal-replace.c \
diff --git a/collectors/log2journal/README.md b/collectors/log2journal/README.md
index 22b87f2a49..2747142d68 100644
--- a/collectors/log2journal/README.md
+++ b/collectors/log2journal/README.md
@@ -140,10 +140,10 @@ Avoid setting priority to 0 (`LOG_EMERG`), because these will be on your termina
To set the PRIORITY field in the output, we can use `NGINX_STATUS` fields. We need a copy of it, which we will alter later.
-We can instruct `log2journal` to duplicate `NGINX_STATUS`, like this: `log2journal --duplicate=PRIORITY=NGINX_STATUS`. Let's try it:
+We can instruct `log2journal` to duplicate `NGINX_STATUS`, like this: `log2journal --inject 'PRIORITY=${NGINX_STATUS}'`. Let's try it:
```bash
-# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 104 0.001 "-" "Go-http-client/1.1"' | log2journal '^(?<NGINX_REMOTE_ADDR>[^ ]+) - (?<NGINX_REMOTE_USER>[^ ]+) \[(?<NGINX_TIME_LOCAL>[^\]]+)\] "(?<MESSAGE>(?<NGINX_METHOD>[A-Z]+) (?<NGINX_URL>[^ ]+) HTTP/(?<NGINX_HTTP_VERSION>[^"]+))" (?<NGINX_STATUS>\d+) (?<NGINX_BODY_BYTES_SENT>\d+) (?<NGINX_REQUEST_LENGTH>\d+) (?<NGINX_REQUEST_TIME>[\d.]+) "(?<NGINX_HTTP_REFERER>[^"]*)" "(?<NGINX_HTTP_USER_AGENT>[^"]*)"' --duplicate=PRIORITY=NGINX_STATUS
+# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 104 0.001 "-" "Go-http-client/1.1"' | log2journal '^(?<NGINX_REMOTE_ADDR>[^ ]+) - (?<NGINX_REMOTE_USER>[^ ]+) \[(?<NGINX_TIME_LOCAL>[^\]]+)\] "(?<MESSAGE>(?<NGINX_METHOD>[A-Z]+) (?<NGINX_URL>[^ ]+) HTTP/(?<NGINX_HTTP_VERSION>[^"]+))" (?<NGINX_STATUS>\d+) (?<NGINX_BODY_BYTES_SENT>\d+) (?<NGINX_REQUEST_LENGTH>\d+) (?<NGINX_REQUEST_TIME>[\d.]+) "(?<NGINX_HTTP_REFERER>[^"]*)" "(?<NGINX_HTTP_USER_AGENT>[^"]*)"' --inject 'PRIORITY=${NGINX_STATUS}'
MESSAGE=GET /index.html HTTP/1.1
NGINX_BODY_BYTES_SENT=4172
NGINX_HTTP_REFERER=-
@@ -161,10 +161,10 @@ NGINX_URL=/index.html
```
-Now that we have the `PRIORITY` field equal to the `NGINX_STATUS`, we can use instruct `log2journal` to change it to a valid priority, by appending: `--rewrite=PRIORITY=/^5/3 --rewrite=PRIORITY=/.*/6`. These rewrite commands say to match everything that starts with `5` and replace it with priority `3` (error) and everything else with priority `6` (info). Let's see it:
+Now that we have the `PRIORITY` field equal to the `NGINX_STATUS`, we can use instruct `log2journal` to change it to a valid priority, by appending: `--rewrite 'PRIORITY=/^5/3' --rewrite 'PRIORITY=/.*/6'`. These rewrite commands say to match everything that starts with `5` and replace it with priority `3` (error) and everything else with priority `6` (info). Let's see it:
```bash
-# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 104 0.001 "-" "Go-http-client/1.1"' | log2journal '^(?<NGINX_REMOTE_ADDR>[^ ]+) - (?<NGINX_REMOTE_USER>[^ ]+) \[(?<NGINX_TIME_LOCAL>[^\]]+)\] "(?<MESSAGE>(?<NGINX_METHOD>[A-Z]+) (?<NGINX_URL>[^ ]+) HTTP/(?<NGINX_HTTP_VERSION>[^"]+))" (?<NGINX_STATUS>\d+) (?<NGINX_BODY_BYTES_SENT>\d+) (?<NGINX_REQUEST_LENGTH>\d+) (?<NGINX_REQUEST_TIME>[\d.]+) "(?<NGINX_HTTP_REFERER>[^"]*)" "(?<NGINX_HTTP_USER_AGENT>[^"]*)"' --duplicate=STATUS2PRIORITY=NGINX_STATUS --rewrite=PRIORITY=/^5/3 --rewrite=PRIORITY=/.*/6
+# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 104 0.001 "-" "Go-http-client/1.1"' | log2journal '^(?<NGINX_REMOTE_ADDR>[^ ]+) - (?<NGINX_REMOTE_USER>[^ ]+) \[(?<NGINX_TIME_LOCAL>[^\]]+)\] "(?<MESSAGE>(?<NGINX_METHOD>[A-Z]+) (?<NGINX_URL>[^ ]+) HTTP/(?<NGINX_HTTP_VERSION>[^"]+))" (?<NGINX_STATUS>\d+) (?<NGINX_BODY_BYTES_SENT>\d+) (?<NGINX_REQUEST_LENGTH>\d+) (?<NGINX_REQUEST_TIME>[\d.]+) "(?<NGINX_HTTP_REFERER>[^"]*)" "(?<NGINX_HTTP_USER_AGENT>[^"]*)"' --inject 'PRIORITY=${NGINX_STATUS}' --rewrite 'PRIORITY=/^5/3' --rewrite 'PRIORITY=/.*/6'
MESSAGE=GET /index.html HTTP/1.1
NGINX_BODY_BYTES_SENT=4172
NGINX_HTTP_REFERER=-
@@ -182,12 +182,12 @@ NGINX_URL=/index.html
```
-Similarly, we could duplicate `NGINX_URL` to `NGINX_ENDPOINT` and then process it with sed to remove any query string, or replace IDs in the URL path with constant names, thus giving us uniform endpoints independently of the parameters.
+Similarly, we could duplicate `${NGINX_URL}` to `NGINX_ENDPOINT` and then process it to remove any query string, or replace IDs in the URL path with constant names, thus giving us uniform endpoints independently of the parameters.
-To complete the example, we can also inject a `SYSLOG_IDENTIFIER` with `log2journal`, using `--inject=SYSLOG_IDENTIFIER=nginx-log`, like this:
+To complete the example, we can also inject a `SYSLOG_IDENTIFIER` with `log2journal`, using `--inject SYSLOG_IDENTIFIER=nginx-log`, like this:
```bash
-# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 104 0.001 "-" "Go-http-client/1.1"' | log2journal '^(?<NGINX_REMOTE_ADDR>[^ ]+) - (?<NGINX_REMOTE_USER>[^ ]+) \[(?<NGINX_TIME_LOCAL>[^\]]+)\] "(?<MESSAGE>(?<NGINX_METHOD>[A-Z]+) (?<NGINX_URL>[^ ]+) HTTP/(?<NGINX_HTTP_VERSION>[^"]+))" (?<NGINX_STATUS>\d+) (?<NGINX_BODY_BYTES_SENT>\d+) (?<NGINX_REQUEST_LENGTH>\d+) (?<NGINX_REQUEST_TIME>[\d.]+) "(?<NGINX_HTTP_REFERER>[^"]*)" "(?<NGINX_HTTP_USER_AGENT>[^"]*)"' --duplicate=STATUS2PRIORITY=NGINX_STATUS --inject=SYSLOG_IDENTIFIER=nginx -rewrite=PRIORITY=/^5/3 --rewrite=PRIORITY=/.*/6
+# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 104 0.001 "-" "Go-http-client/1.1"' | log2journal '^(?<NGINX_REMOTE_ADDR>[^ ]+) - (?<NGINX_REMOTE_USER>[^ ]+) \[(?<NGINX_TIME_LOCAL>[^\]]+)\] "(?<MESSAGE>(?<NGINX_METHOD>[A-Z]+) (?<NGINX_URL>[^ ]+) HTTP/(?<NGINX_HTTP_VERSION>[^"]+))" (?<NGINX_STATUS>\d+) (?<NGINX_BODY_BYTES_SENT>\d+) (?<NGINX_REQUEST_LENGTH>\d+) (?<NGINX_REQUEST_TIME>[\d.]+) "(?<NGINX_HTTP_REFERER>[^"]*)" "(?<NGINX_HTTP_USER_AGENT>[^"]*)"' --inject 'PRIORITY=${NGINX_STATUS}' --inject 'SYSLOG_IDENTIFIER=nginx' -rewrite 'PRIORITY=/^5/3' --rewrite 'PRIORITY=/.*/6'
MESSAGE=GET /index.html HTTP/1.1
NGINX_BODY_BYTES_SENT=4172
NGINX_HTTP_REFERER=-
@@ -210,7 +210,7 @@ Now the message is ready to be sent to a systemd-journal. For this we use `syste
```bash
-# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 104 0.001 "-" "Go-http-client/1.1"' | log2journal '^(?<NGINX_REMOTE_ADDR>[^ ]+) - (?<NGINX_REMOTE_USER>[^ ]+) \[(?<NGINX_TIME_LOCAL>[^\]]+)\] "(?<MESSAGE>(?<NGINX_METHOD>[A-Z]+) (?<NGINX_URL>[^ ]+) HTTP/(?<NGINX_HTTP_VERSION>[^"]+))" (?<NGINX_STATUS>\d+) (?<NGINX_BODY_BYTES_SENT>\d+) (?<NGINX_REQUEST_LENGTH>\d+) (?<NGINX_REQUEST_TIME>[\d.]+) "(?<NGINX_HTTP_REFERER>[^"]*)" "(?<NGINX_HTTP_USER_AGENT>[^"]*)"' --duplicate=STATUS2PRIORITY=NGINX_STATUS --inject=SYSLOG_IDENTIFIER=nginx -rewrite=PRIORITY=/^5/3 --rewrite=PRIORITY=/.*/6 | systemd-cat-native
+# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 104 0.001 "-" "Go-http-client/1.1"' | log2journal '^(?<NGINX_REMOTE_ADDR>[^ ]+) - (?<NGINX_REMOTE_USER>[^ ]+) \[(?<NGINX_TIME_LOCAL>[^\]]+)\] "(?<MESSAGE>(?<NGINX_METHOD>[A-Z]+) (?<NGINX_URL>[^ ]+) HTTP/(?<NGINX_HTTP_VERSION>[^"]+))" (?<NGINX_STATUS>\d+) (?<NGINX_BODY_BYTES_SENT>\d+) (?<NGINX_REQUEST_LENGTH>\d+) (?<NGINX_REQUEST_TIME>[\d.]+) "(?<NGINX_HTTP_REFERER>[^"]*)" "(?<NGINX_HTTP_USER_AGENT>[^"]*)"' --inject 'PRIORITY=${NGINX_STATUS}' --inject 'SYSLOG_IDENTIFIER=nginx' -rewrite 'PRIORITY=/^5/3' --rewrite 'PRIORITY=/.*/6' | systemd-cat-native
# no output
# let's find the message
@@ -296,24 +296,24 @@ pattern='(?x) # Enable PCRE2 extended mode
tail -n $last -F /var/log/nginx/*access.log \
| log2journal "${pattern}" \
- --filename-key=NGINX_LOG_FILE \
- --duplicate=PRIORITY=NGINX_STATUS \
- --duplicate=NGINX_STATUS_FAMILY=NGINX_STATUS \
- --inject=SYSLOG_IDENTIFIER=nginx-log \
- --unmatched-key=MESSAGE \
- --inject-unmatched=PRIORITY=1 \
- --rewrite='PRIORITY=/^5/3 --rewrite=PRIORITY=/.*/6' \
- --rewrite='NGINX_STATUS_FAMILY=/^(?<first_digit>[0-9]).*$/${first_digit}xx' \
- --rewrite='NGINX_STATUS_FAMILY=/^.*$/UNKNOWN' \
+ --filename-key 'NGINX_LOG_FILE' \
+ --unmatched-key 'MESSAGE' \
+ --inject-unmatched 'PRIORITY=1' \
+ --inject 'PRIORITY=${NGINX_STATUS}' \
+ --rewrite 'PRIORITY=/^5/3' \
+ --rewrite 'PRIORITY=/.*/6' \
+ --inject 'NGINX_STATUS_FAMILY=${NGINX_STATUS}' \
+ --rewrite 'NGINX_STATUS_FAMILY=/^(?<first_digit>[0-9]).*$/${first_digit}xx' \
+ --rewrite 'NGINX_STATUS_FAMILY=/^.*$/UNKNOWN' \
+ --inject 'SYSLOG_IDENTIFIER=nginx-log' \
| $send_or_show
```
-
## `log2journal` options
```
-Netdata log2journal v1.43.0-306-g929866ad3
+Netdata log2journal v1.43.0-313-gd79fbac6a
Convert logs to systemd Journal Export Format.
@@ -351,14 +351,6 @@ Options:
unmatched entry will appear as the log message in the journals.
Use --inject-unmatched to inject additional fields to unmatched lines.
- --duplicate TARGET=KEY1[,KEY2[,KEY3[,...]]
- Create a new key called TARGET, duplicating the values of the keys
- given. Useful for further processing. When multiple keys are given,
- their values are separated by comma.
-
- Up to 512 duplications can be given on the command line, and up to
- 20 keys per duplication command are allowed.
-
--inject LINE
Inject constant fields to the output (both matched and unmatched logs).
--inject entries are added to unmatched lines too, when their key is
@@ -455,40 +447,40 @@ This is a simple diagram of the pipeline taking place:
| INPUT |
| read one log line at a time |
+---------------------------------------------------+
- v v
- +---------------------------------+ |
- | EXTRACT FIELDS AND VALUES | |
- | JSON, logfmt, or pattern based | |
- | (apply optional PREFIX) | |
- +---------------------------------+ |
- v v |
- +---------------+ +--------------+ |
- | DUPLICATE | | FILTER | |
- | | | filter keys | |
- | create new | +--------------+ |
- | fields by | v |
- | duplicating | +--------------+ |
- | other fields | | RENAME | |
- | and their | | change | |
- | values | | field names | |
- +---------------+ +--------------+ |
- v v v
- +---------------------------------+ +--------------+
- | REWRITE PIPELINES | | INJECT |
- | altering the values of fields | | constants |
- +---------------------------------+ +--------------+
- v v
+ v v v v v v
+ +---------------------------------------------------+
+ | EXTRACT FIELDS AND VALUES |
+ | JSON, logfmt, or pattern based |
+ | (apply optional PREFIX) |
+ +---------------------------------------------------+
+ v v v v v v
+ +---------------------------------------------------+
+ | RENAME FIELDS |
+ | change the names of the fields |
+ +---------------------------------------------------+
+ v v v v v v
+ +---------------------------------------------------+
+ | INJECT NEW FIELDS |
+ | constants, or other field values as variables |
+ +---------------------------------------------------+
+ v v v v v v
+ +---------------------------------------------------+
+ | REWRITE FIELD VALUES |
+ | pipeline multiple rewriting rules to alter |
+ | the values of the fields |
+ +---------------------------------------------------+
+ v v v v v v
+ +---------------------------------------------------+
+ | FILTER FIELDS |
+ | use include and exclude patterns on the field |
+ | names, to select which fields are sent to journal |
+ +---------------------------------------------------+
+ v v v v v v
+---------------------------------------------------+
| OUTPUT |
| generate Journal Export Format |
+---------------------------------------------------+
-IMPORTANT:
- - Extraction of keys includes formatting them according to journal rules.
- - Duplication rules use the original extracted field names, after they have
- been prefixed (when a PREFIX is set) and before they are renamed.
- - Rewriting is always the last stage, so the final field names are matched.
-
--------------------------------------------------------------------------------
JOURNAL FIELDS RULES (enforced by systemd-journald)
diff --git a/collectors/log2journal/log2journal-duplicate.c b/collectors/log2journal/log2journal-duplicate.c
deleted file mode 100644
index af0be843fd..0000000000
--- a/collectors/log2journal/log2journal-duplicate.c
+++ /dev/null
@@ -1,49 +0,0 @@
-// SPDX-License-Identifier: GPL-3.0-or-later
-
-#include "log2journal.h"
-
-void duplication_cleanup(DUPLICATION *dp) {
- hashed_key_cleanup(&dp->target);
-
- for(size_t j = 0; j < dp->used ; j++) {
- hashed_key_cleanup(&dp->keys[j]);
- txt_cleanup(&dp->values[j]);
- }
-}
-
-DUPLICATION *log_job_duplication_add(LOG_JOB *jb, const char *target, size_t target_len) {
- if (jb->dups.used >= MAX_KEY_DUPS) {
- log2stderr("ERROR: Too many duplicates defined. Maximum allowed is %d.", MAX_KEY_DUPS);
- return NULL;
- }
-
- if(target_len > JOURNAL_MAX_KEY_LEN) {
- log2stderr("WARNING: key of duplicate '%.*s' is too long for journals. Will be truncated.", (int)target_len, target);
- target_len = JOURNAL_MAX_KEY_LEN;
- }
-
- DUPLICATION *kd = &jb->dups.array[jb->dups.used++];
- hashed_key_len_set(&kd->target, target, target_len);
- kd->used = 0;
- kd->exposed = false;
-
- // Initialize values array
- for (size_t i = 0; i < MAX_KEY_DUPS_KEYS; i++) {
- kd->values[i].txt = NULL;
- kd->values[i].size = 0;
- }
-
- return kd;
-}
-
-bool log_job_duplication_key_add(DUPLICATION *kd, const char *key, size_t key_len) {
- if (kd->used >= MAX_KEY_DUPS_KEYS) {
- log2stderr("ERROR: Too many keys in duplication of target '%s'.", kd->target.key);
- return false;
- }
-
- hashed_key_len_set(&kd->keys[kd->used++], key, key_len);
-
- return true;
-}
-
diff --git a/collectors/log2journal/log2journal-help.c b/collectors/log2journal/log2journal-help.c
index 47bcdb5289..67af516df3 100644
--- a/collectors/log2journal/log2journal-help.c
+++ b/collectors/log2journal/log2journal-help.c
@@ -88,14 +88,6 @@ void log_job_command_line_help(const char *name) {
printf(" unmatched entry will appear as the log message in the journals.\n");
printf(" Use --inject-unmatched to inject additional fields to unmatched lines.\n");
printf("\n");
- printf(" --duplicate TARGET=KEY1[,KEY2[,KEY3[,...]]\n");
- printf(" Create a new key called TARGET, duplicating the values of the keys\n");
- printf(" given. Useful for further processing. When multiple keys are given,\n");
- printf(" their values are separated by comma.\n");
- printf("\n");
- printf(" Up to %d duplications can be given on the command line, and up to\n", MAX_KEY_DUPS);
- printf(" %d keys per duplication command are allowed.\n", MAX_KEY_DUPS_KEYS);
- printf("\n");
printf(" --inject LINE\n");
printf(" Inject constant fields to the output (both matched and unmatched logs).\n");
printf(" --inject entries are added to unmatched lines too, when their key is\n");
@@ -192,40 +184,40 @@ void log_job_command_line_help(const char *name) {
printf(" | INPUT | \n");
printf(" | read one log line at a time | \n");
printf(" +---------------------------------------------------+ \n");
- printf(" v v \n");
- printf(" +---------------------------------+ | \n");
- printf(" | EXTRACT FIELDS AND VALUES | | \n");
- printf(" | JSON, logfmt, or pattern based | | \n");
- printf(" | (apply optional PREFIX) | | \n");
- printf(" +---------------------------------+ | \n");
- printf(" v v | \n");
- printf(" +---------------+ +--------------+ | \n");
- printf(" | DUPLICATE | | FILTER | | \n");
- printf(" | | | filter keys | | \n");
- printf(" | create new | +--------------+ | \n");
- printf(" | fields by | v | \n");
- printf(" | duplicating | +--------------+ | \n");
- printf(" | other fields | | RENAME | | \n");
- printf(" | and their | | change | | \n");
- printf(" | values | | field names | | \n");
- printf(" +---------------+ +--------------+ | \n");
- printf(" v v v \n");
- printf(" +---------------------------------+ +--------------+ \n");
- printf(" | REWRITE PIPELINES | | INJECT | \n");
- printf(" | altering the values of fields | | constants | \n");
- printf(" +---------------------------------+ +--------------+ \n");
- printf(" v v \n");
+ printf(" v v v v v v \n");
+ printf(" +---------------------------------------------------+ \n");
+ printf(" | EXTRACT FIELDS AND VALUES | \n");
+ printf(" | JSON, logfmt, or pattern based | \n");
+ printf(" | (apply optional PREFIX) | \n");
+ printf(" +---------------------------------------------------+ \n");
+ printf(" v v v v v v \n");
+ printf(" +---------------------------------------------------+ \n");
+ printf(" | RENAME FIELDS | \n");
+ printf(" | change the names of the fields | \n");
+ printf(" +---------------------------------------------------+ \n");
+ printf(" v v v v v v \n");
+ printf(" +---------------------------------------------------+ \n");
+ printf(" | INJECT NEW FIELDS | \n");
+ printf(" | constants, or other field values as variables | \n");
+ printf(" +---------------------------------------------------+ \n");
+ printf(" v v v v v v \n");
+ printf(" +---------------------------------------------------+ \n");
+ printf(" | REWRITE FIELD VALUES | \n");
+ printf(" | pipeline multiple rewriting rules to alter | \n");
+ printf(" | the values of the fields | \n");
+ printf(" +---------------------------------------------------+ \n");
+ printf(" v v v v v v \n");
+ printf(" +---------------------------------------------------+ \n");
+ printf(" | FILTER FIELDS | \n");
+ printf(" | use include and exclude patterns on the field | \n");
+ printf(" | names, to select which fields are sent to journal | \n");
+ printf(" +---------------------------------------------------+ \n");
+ printf(" v v v v v v \n");
printf(" +---------------------------------------------------+ \n");
printf(" | OUTPUT | \n");
printf(" | generate Journal Export Format | \n");
printf(" +---------------------------------------------------+ \n");
printf(" \n");
- printf("IMPORTANT:\n");
- printf(" - Extraction of keys includes formatting them according to journal rules.\n");
- printf(" - Duplication rules use the original extracted field names, after they have\n");
- printf(" been prefixed (when a PREFIX is set) and before they are renamed.\n");
- printf(" - Rewriting is always the last stage, so the final field names are matched.\n");
- printf("\n");
printf("--------------------------------------------------------------------------------\n");
printf("JOURNAL FIELDS RULES (enforced by systemd-journald)\n");
printf("\n");
diff --git a/collectors/log2journal/log2journal-inject.c b/collectors/log2journal/log2journal-inject.c
index 000ad7a748..45158066bf 100644
--- a/collectors/log2journal/log2journal-inject.c
+++ b/collectors/log2journal/log2journal-inject.c
@@ -4,10 +4,10 @@
void injection_cleanup(INJECTION *inj) {
hashed_key_cleanup(&inj->key);
- txt_cleanup(&inj->value);
+ replace_pattern_cleanup(&inj->value);
}
-static inline void log_job_injection_replace(INJECTION *inj, const char *key, size_t key_len, const char *value, size_t value_len) {
+static inline bool log_job_injection_replace(INJECTION *inj, const char *key, size_t key_len, const char *value, size_t value_len) {
if(key_len > JOURNAL_MAX_KEY_LEN)
log2stderr("WARNING: injection key '%.*s' is too long for journal. Will be truncated.", (int)key_len, key);
@@ -15,7 +15,11 @@ static inline void log_job_injection_replace(INJECTION *inj, const char *key, si
log2stderr("WARNING: injection value of key '%.*s' is too long for journal. Will be truncated.", (int)key_len, key);
hashed_key_len_set(&inj->key, key, key_len);
- txt_replace(&inj->value, value, value_len);
+ char *v = strndupz(value, value_len);
+ bool ret = replace_pattern_set(&inj->value, v);
+ freez(v);
+
+ return ret;
}
bool log_job_injection_add(LOG_JOB *jb, const char *key, size_t key_len, const char *value, size_t value_len, bool unmatched) {
@@ -32,13 +36,14 @@ bool log_job_injection_add(LOG_JOB *jb, const char *key, size_t key_len, const c
}
}
+ bool ret;
if (unmatched) {
- log_job_injection_replace(&jb->unmatched.injections.keys[jb->unmatched.injections.used++],
- key, key_len, value, value_len);
+ ret = log_job_injection_replace(&jb->unmatched.injections.keys[jb->unmatched.injections.used++],
+ key, key_len, value, value_len);
} else {
- log_job_injection_replace(&jb->injections.keys[jb->injections.used++],
- key, key_len, value, value_len);
+ ret = log_job_injection_replace(&jb->injections.keys[jb->injections.used++],
+ key, key_len, value, value_len);
}
- return true;
+ return ret;
}
diff --git a/collectors/log2journal/log2journal-params.c b/collectors/log2journal/log2journal-params.c
index 60d22f901d..ca4e2f5860 100644
--- a/collectors/log2journal/log2journal-params.c
+++ b/collectors/log2journal/log2journal-params.c
@@ -10,7 +10,7 @@ void log_job_init(LOG_JOB *jb) {
}
static void simple_hashtable_cleanup_allocated(SIMPLE_HASHTABLE *ht) {
- for(size_t i = 0; i < ht->used ;i++) {
+ for(size_t i = 0; i < ht->size ;i++) {
HASHED_KEY *k = ht->hashtable[i].data;
if(k && k->flags & HK_HASHTABLE_ALLOCATED) {
hashed_key_cleanup(k);
@@ -41,12 +41,12 @@ void log_job_cleanup(LOG_JOB *jb) {
for(size_t i = 0; i < jb->renames.used ;i++)
rename_cleanup(&jb->renames.array[i]);
- for(size_t i = 0; i < jb->dups.used ;i++)
- duplication_cleanup(&jb->dups.array[i]);
-
for(size_t i = 0; i < jb->rewrites.used; i++)
rewrite_cleanup(&jb->rewrites.array[i]);
+ txt_cleanup(&jb->rewrites.tmp);
+ txt_cleanup(&jb->filename.current);
+
simple_hashtable_cleanup_allocated(&jb->hashtable);
simple_hashtable_free(&jb->hashtable);
@@ -146,6 +146,79 @@ static bool is_symbol(char c) {
return !isalpha(c) && !isdigit(c) && !iscntrl(c);
}
+struct {
+ const char *keyword;
+ int action;
+ RW_FLAGS flag;
+} rewrite_flags[] = {
+ {"match", 1, RW_MATCH_PCRE2},
+ {"match", 0, RW_MATCH_NON_EMPTY},
+
+ {"regex", 1, RW_MATCH_PCRE2},
+ {"regex", 0, RW_MATCH_NON_EMPTY},
+
+ {"pcre2", 1, RW_MATCH_PCRE2},
+ {"pcre2", 0, RW_MATCH_NON_EMPTY},
+
+ {"non_empty", 1, RW_MATCH_NON_EMPTY},
+ {"non_empty", 0, RW_MATCH_PCRE2},
+
+ {"non-empty", 1, RW_MATCH_NON_EMPTY},
+ {"non-empty", 0, RW_MATCH_PCRE2},
+
+ {"not_empty", 1, RW_MATCH_NON_EMPTY},
+ {"not_empty", 0, RW_MATCH_PCRE2},
+
+ {"not-empty", 1, RW_MATCH_NON_EMPTY},
+ {"not-empty", 0, RW_MATCH_PCRE2},
+
+ {"stop", 0, RW_DONT_STOP},
+ {"no-stop", 1, RW_DONT_STOP},
+ {"no_stop", 1, RW_DONT_STOP},
+ {"dont-stop", 1, RW_DONT_STOP},
+ {"dont_stop", 1, RW_DONT_STOP},
+ {"continue", 1, RW_DONT_STOP},
+ {"inject", 1, RW_INJECT},
+ {"existing", 0, RW_INJECT},
+};
+
+RW_FLAGS parse_rewrite_flags(const char *options) {
+ RW_FLAGS flags = RW_MATCH_PCRE2; // Default option
+
+ // Tokenize the input options using ","
+ char *token;
+ char *optionsCopy = strdup(options); // Make a copy to avoid modifying the original
+ token = strtok(optionsCopy, ",");
+
+ while (token != NULL) {
+ // Find the keyword-action mapping
+ bool found = false;
+
+ for (size_t i = 0; i < sizeof(rewrite_flags) / sizeof(rewrite_flags[0]); i++) {
+ if (strcmp(token, rewrite_flags[i].keyword) == 0) {
+ if (rewrite_flags[i].action == 1) {
+ flags |= rewrite_flags[i].flag; // Set the flag
+ } else {
+ flags &= ~rewrite_flags[i].flag; // Unset the flag
+ }
+
+ found = true;
+ }
+ }
+
+ if(!found)
+ log2stderr("Warning: rewrite options '%s' is not understood.", token);
+
+ // Get the next token
+ token = strtok(NULL, ",");
+ }
+
+ free(optionsCopy); // Free the copied string
+
+ return flags;
+}
+
+
static bool parse_rewrite(LOG_JOB *jb, const char *param) {
// Search for '=' in param
const char *equal_sign = strchr(param, '=');
@@ -180,18 +253,20 @@ static bool parse_rewrite(LOG_JOB *jb, const char *param) {
return false;
}
- // Reserve a slot in rewrites
- if (jb->rewrites.used >= MAX_REWRITES) {
- log2stderr("Error: Exceeded maximum number of rewrite rules, while processing: %s", param);
- return false;
- }
+ RW_FLAGS flags = RW_MATCH_PCRE2;
+ const char *third_separator = strchr(second_separator + 1, separator);
+ if(third_separator)
+ flags = parse_rewrite_flags(third_separator + 1);
// Extract key, search pattern, and replacement pattern
char *key = strndupz(param, equal_sign - param);
char *search_pattern = strndupz(equal_sign + 2, second_separator - (equal_sign + 2));
- char *replace_pattern = strdupz(second_separator + 1);
+ char *replace_pattern = third_separator ? strndup(second_separator + 1, third_separator - (second_separator + 1)) : strdupz(second_separator + 1);
- bool ret = log_job_rewrite_add(jb, key, search_pattern, replace_pattern);
+ if(!*search_pattern)
+ flags &= ~RW_MATCH_PCRE2;
+
+ bool ret = log_job_rewrite_add(jb, key, flags, search_pattern, replace_pattern);
freez(key);
freez(search_pattern);
@@ -214,41 +289,6 @@ static bool parse_inject(LOG_JOB *jb, const char *value, bool unmatched) {
return true;
}
-static bool parse_duplicate(LOG_JOB *jb, const char *value) {
- const char *target = value;
- const char *equal_sign = strchr(value, '=');
- if (!equal_sign || equal_sign == target) {
- log2stderr("Error: Invalid duplicate format, '=' not found or at the start in %s", value);
- return false;
- }
-
- size_t target_len = equal_sign - target;
- DUPLICATION *kd = log_job_duplication_add(jb, target, target_len);
- if(!kd) return false;
-
- const char *key = equal_sign + 1;
- while (key) {
- if (kd->used >= MAX_KEY_DUPS_KEYS) {
- log2stderr("Error: too many keys in duplication of target '%s'.", kd->target.key);
- return false;
- }
-
- const char *comma = strchr(key, ',');
- size_t key_len;
- if (comma) {