summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCosta Tsaousis <costa@netdata.cloud>2023-11-29 19:42:10 +0200
committerGitHub <noreply@github.com>2023-11-29 19:42:10 +0200
commit36714f9633f984600b53e85ff16b02fac3fe7e3d (patch)
tree1b88dc19b5a7aa2e789f1faeecd4c7769accb5a1
parent3e94c0a8ef5ba820c30dd45665217454cceff61f (diff)
Log2journal improvements part2 (#16494)
* code cleanup; isolation of pcre2 parsing * reorg fields for speed * renames and comments * prefixing now works on pcre2 mode too * move yaml configurations to log2journal.d directory in /usr/lib/netdata/conf.d * unify the transliteration * cleanup and fix ups * fix compiler warning * started writing a unit test for log2journal * fix codeql warnings * code re-organization and cleanup * code re-organization and cleanup; added include and exclude filters * added callocz and fixed yaml parsing * added hashtable to log2journal * more tests * added nginx-json config and unit test * stop parsing yaml when errors have been encountered * fix codeql warnings * fix docs * exit when a test fails
-rw-r--r--.gitignore1
-rw-r--r--Makefile.am8
-rw-r--r--collectors/log2journal/Makefile.am6
-rw-r--r--collectors/log2journal/README.md244
-rw-r--r--collectors/log2journal/log2journal-duplicate.c49
-rw-r--r--collectors/log2journal/log2journal-help.c130
-rw-r--r--collectors/log2journal/log2journal-inject.c44
-rw-r--r--collectors/log2journal/log2journal-json.c136
-rw-r--r--collectors/log2journal/log2journal-logfmt.c118
-rw-r--r--collectors/log2journal/log2journal-params.c301
-rw-r--r--collectors/log2journal/log2journal-pattern.c54
-rw-r--r--collectors/log2journal/log2journal-pcre2.c139
-rw-r--r--collectors/log2journal/log2journal-rename.c21
-rw-r--r--collectors/log2journal/log2journal-replace.c104
-rw-r--r--collectors/log2journal/log2journal-rewrite.c28
-rw-r--r--collectors/log2journal/log2journal-yaml.c243
-rw-r--r--collectors/log2journal/log2journal.c421
-rw-r--r--collectors/log2journal/log2journal.d/nginx-combined.yaml12
-rw-r--r--collectors/log2journal/log2journal.d/nginx-json.yaml169
-rw-r--r--collectors/log2journal/log2journal.h375
-rw-r--r--collectors/log2journal/tests.d/full.output78
-rw-r--r--collectors/log2journal/tests.d/full.yaml77
-rw-r--r--collectors/log2journal/tests.d/json-exclude.output153
-rw-r--r--collectors/log2journal/tests.d/json-include.output54
-rw-r--r--collectors/log2journal/tests.d/json.log3
-rw-r--r--collectors/log2journal/tests.d/json.output294
-rw-r--r--collectors/log2journal/tests.d/nginx-json.log9
-rw-r--r--collectors/log2journal/tests.d/nginx-json.output296
-rwxr-xr-xcollectors/log2journal/tests.sh142
-rw-r--r--libnetdata/facets/facets.c90
-rw-r--r--libnetdata/simple_hashtable.h92
31 files changed, 2845 insertions, 1046 deletions
diff --git a/.gitignore b/.gitignore
index 3463617c6c..a53025c3be 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,6 +18,7 @@ Makefile.in
.*.swp
*.old
*.log
+!collectors/log2journal/tests.d/*.log
*.pyc
Makefile
diff --git a/Makefile.am b/Makefile.am
index addb9c9f3e..547b5933e0 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -225,6 +225,7 @@ LIBNETDATA_FILES = \
libnetdata/http/http_defs.h \
libnetdata/dyn_conf/dyn_conf.c \
libnetdata/dyn_conf/dyn_conf.h \
+ libnetdata/simple_hashtable.h \
$(NULL)
if ENABLE_PLUGIN_EBPF
@@ -354,7 +355,14 @@ LOG2JOURNAL_FILES = \
collectors/log2journal/log2journal-yaml.c \
collectors/log2journal/log2journal-json.c \
collectors/log2journal/log2journal-logfmt.c \
+ collectors/log2journal/log2journal-pcre2.c \
collectors/log2journal/log2journal-params.c \
+ collectors/log2journal/log2journal-duplicate.c \
+ collectors/log2journal/log2journal-inject.c \
+ collectors/log2journal/log2journal-pattern.c \
+ collectors/log2journal/log2journal-replace.c \
+ collectors/log2journal/log2journal-rename.c \
+ collectors/log2journal/log2journal-rewrite.c \
$(NULL)
diff --git a/collectors/log2journal/Makefile.am b/collectors/log2journal/Makefile.am
index e8a08be08c..578757fc38 100644
--- a/collectors/log2journal/Makefile.am
+++ b/collectors/log2journal/Makefile.am
@@ -4,9 +4,13 @@ AUTOMAKE_OPTIONS = subdir-objects
MAINTAINERCLEANFILES = $(srcdir)/Makefile.in
dist_noinst_DATA = \
+ tests.sh \
README.md \
+ tests.d/* \
$(NULL)
-dist_libconfig_DATA = \
+log2journalconfigdir=$(libconfigdir)/log2journal.d
+dist_log2journalconfig_DATA = \
log2journal.d/nginx-combined.yaml \
+ log2journal.d/nginx-json.yaml \
$(NULL)
diff --git a/collectors/log2journal/README.md b/collectors/log2journal/README.md
index 5c1e7de7b5..22b87f2a49 100644
--- a/collectors/log2journal/README.md
+++ b/collectors/log2journal/README.md
@@ -313,22 +313,29 @@ tail -n $last -F /var/log/nginx/*access.log \
```
-Netdata log2journal v1.43.0-276-gfff8d1181
+Netdata log2journal v1.43.0-306-g929866ad3
-Convert structured log input to systemd Journal Export Format.
+Convert logs to systemd Journal Export Format.
-Using PCRE2 patterns, extract the fields from structured logs on the standard
-input, and generate output according to systemd Journal Export Format.
+ - JSON logs: extracts all JSON fields.
+ - logfmt logs: extracts all logfmt fields.
+ - free-form logs: uses PCRE2 patterns to extracts fields.
-Usage: ./log2journal [OPTIONS] PATTERN
+Usage: ./log2journal [OPTIONS] PATTERN|json
Options:
- --file /path/to/file.yaml
+ --file /path/to/file.yaml or -f /path/to/file.yaml
Read yaml configuration file for instructions.
+ --config CONFIG_NAME
+ Run with the internal configuration named CONFIG_NAME.
+ Available internal configs:
+
+ nginx-combined nginx-json
+
--show-config
- Show the configuration in yaml format before starting the job.
+ Show the configuration in YAML format before starting the job.
This is also an easy way to convert command line parameters to yaml.
--filename-key KEY
@@ -348,6 +355,7 @@ Options:
Create a new key called TARGET, duplicating the values of the keys
given. Useful for further processing. When multiple keys are given,
their values are separated by comma.
+
Up to 512 duplications can be given on the command line, and up to
20 keys per duplication command are allowed.
@@ -355,12 +363,14 @@ Options:
Inject constant fields to the output (both matched and unmatched logs).
--inject entries are added to unmatched lines too, when their key is
not used in --inject-unmatched (--inject-unmatched override --inject).
+
Up to 512 fields can be injected.
--inject-unmatched LINE
Inject lines into the output for each unmatched log entry.
Usually, --inject-unmatched=PRIORITY=3 is needed to mark the unmatched
lines as errors, so that they can easily be spotted in the journals.
+
Up to 512 such lines can be injected.
--rewrite KEY=/SearchPattern/ReplacePattern
@@ -369,6 +379,7 @@ Options:
be used between the search pattern and the replacement pattern.
The search pattern is a PCRE2 regular expression, and the replacement
pattern supports literals and named capture groups from the search pattern.
+
Example:
--rewrite DATE=/^(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})$/
${day}/${month}/${year}
@@ -376,57 +387,109 @@ Options:
Only one rewrite rule is applied per key; the sequence of rewrites stops
for the key once a rule matches it. This allows providing a sequence of
- independent rewriting rules for the same key, matching the different values
- the key may get, and also provide a catch-all rewrite rule at the end of the
- sequence for setting the key value if no other rule matched it.
+ independent rewriting rules for the same key, matching the different
+ values the key may get, and also provide a catch-all rewrite rule at the
+ end, for setting the key value if no other rule matched it.
- The combination of duplicating keys with the values of multiple other keys
- combined with multiple rewrite rules, allows creating complex rules for
- rewriting key values.
+ Duplication of keys with the values of multiple other keys, combined with
+ multiple value rewriting rules, allows creating complex rules for adding
+ new keys, based on the values of existing keys.
Up to 512 rewriting rules are allowed.
- -h, --help
+ --include PATTERN
+ Include only keys matching the PCRE2 PATTERN.
+ Useful when parsing JSON of logfmt logs, to include only the keys given.
+ The keys are matched after the PREFIX has been added to them.
+
+ --exclude PATTERN
+ Exclude the keys matching the PCRE2 PATTERN.
+ Useful when parsing JSON of logfmt logs, to exclude some of the keys given.
+ The keys are matched after the PREFIX has been added to them.
+
+ When both include and exclude patterns are set and both match a key,
+ exclude wins and the key will not be added, like a pipeline, we first
+ include it and then exclude it.
+
+ --prefix PREFIX
+ Prefix all fields with PREFIX. The PREFIX is added before processing
+ duplications, renames and rewrites, so that the extracted keys have to
+ be matched with the PREFIX in them.
+ PREFIX is assumed to be systemd-journal friendly.
+
+ --rename NEW=OLD
+ Rename fields, before rewriting their values.
+
+ Up to 512 renaming rules are allowed.
+
+ -h, or --help
Display this help and exit.
PATTERN
PATTERN should be a valid PCRE2 regular expression.
RE2 regular expressions (like the ones usually used in Go applications),
are usually valid PCRE2 patterns too.
- Regular expressions without named groups are ignored.
+ Sub-expressions without named groups are evaluated, but their matches are
+ not added to the output.
+
+ JSON mode
+ JSON mode is enabled when the pattern is set to: json
+ Field names are extracted from the JSON logs and are converted to the
+ format expected by Journal Export Format (all caps, only _ is allowed).
+
+ logfmt mode
+ logfmt mode is enabled when the pattern is set to: logfmt
+ Field names are extracted from the logfmt logs and are converted to the
+ format expected by Journal Export Format (all caps, only _ is allowed).
+
The program accepts all parameters as both --option=value and --option value.
-The maximum line length accepted is 1048576 characters.
-The maximum number of fields in the PCRE2 pattern is 1024.
+The maximum log line length accepted is 1048576 characters.
PIPELINE AND SEQUENCE OF PROCESSING
This is a simple diagram of the pipeline taking place:
+
+ +---------------------------------------------------+
+ | INPUT |
+ | read one log line at a time |
+ +---------------------------------------------------+
+ v v
+ +---------------------------------+ |
+ | EXTRACT FIELDS AND VALUES | |
+ | JSON, logfmt, or pattern based | |
+ | (apply optional PREFIX) | |
+ +---------------------------------+ |
+ v v |
+ +---------------+ +--------------+ |
+ | DUPLICATE | | FILTER | |
+ | | | filter keys | |
+ | create new | +--------------+ |
+ | fields by | v |
+ | duplicating | +--------------+ |
+ | other fields | | RENAME | |
+ | and their | | change | |
+ | values | | field names | |
+ +---------------+ +--------------+ |
+ v v v
+ +---------------------------------+ +--------------+
+ | REWRITE PIPELINES | | INJECT |
+ | altering the values of fields | | constants |
+ +---------------------------------+ +--------------+
+ v v
+ +---------------------------------------------------+
+ | OUTPUT |
+ | generate Journal Export Format |
+ +---------------------------------------------------+
+
+IMPORTANT:
+ - Extraction of keys includes formatting them according to journal rules.
+ - Duplication rules use the original extracted field names, after they have
+ been prefixed (when a PREFIX is set) and before they are renamed.
+ - Rewriting is always the last stage, so the final field names are matched.
- +---------------------------------------------------+
- | INPUT |
- +---------------------------------------------------+
- v v
- +---------------------------------+ |
- | EXTRACT FIELDS AND VALUES | |
- +---------------------------------+ |
- v v |
- +---------------+ | |
- | DUPLICATE | | |
- | create fields | | |
- | with values | | |
- +---------------+ | |
- v v v
- +---------------------------------+ +--------------+
- | REWRITE PIPELINES | | INJECT |
- | altering the values | | constants |
- +---------------------------------+ +--------------+
- v v
- +---------------------------------------------------+
- | OUTPUT |
- +---------------------------------------------------+
-
+--------------------------------------------------------------------------------
JOURNAL FIELDS RULES (enforced by systemd-journald)
- field names can be up to 64 characters
@@ -458,107 +521,10 @@ JOURNAL FIELDS RULES (enforced by systemd-journald)
You can find the most common fields at 'man systemd.journal-fields'.
-Example YAML file:
-
---------------------------------------------------------------------------------
-# Netdata log2journal Configuration Template
-# The following parses nginx log files using the combined format.
-
-# The PCRE2 pattern to match log entries and give names to the fields.
-# The journal will have these names, so follow their rules. You can
-# initiate an extended PCRE2 pattern by starting the pattern with (?x)
-pattern: |
- (?x) # Enable PCRE2 extended mode
- ^
- (?<NGINX_REMOTE_ADDR>[^ ]+) \s - \s # NGINX_REMOTE_ADDR
- (?<NGINX_REMOTE_USER>[^ ]+) \s # NGINX_REMOTE_USER
- \[
- (?<NGINX_TIME_LOCAL>[^\]]+) # NGINX_TIME_LOCAL
- \]
- \s+ "
- (?<MESSAGE>
- (?<NGINX_METHOD>[A-Z]+) \s+ # NGINX_METHOD
- (?<NGINX_URL>[^ ]+) \s+
- HTTP/(?<NGINX_HTTP_VERSION>[^"]+)
- )
- " \s+
- (?<NGINX_STATUS>\d+) \s+ # NGINX_STATUS
- (?<NGINX_BODY_BYTES_SENT>\d+) \s+ # NGINX_BODY_BYTES_SENT
- "(?<NGINX_HTTP_REFERER>[^"]*)" \s+ # NGINX_HTTP_REFERER
- "(?<NGINX_HTTP_USER_AGENT>[^"]*)" # NGINX_HTTP_USER_AGENT
-
-# When log2journal can detect the filename of each log entry (tail gives it
-# only when it tails multiple files), this key will be used to send the
-# filename to the journals.
-filename:
- key: NGINX_LOG_FILENAME
-
-# Duplicate fields under a different name. You can duplicate multiple fields
-# to a new one and then use rewrite rules to change its value.
-duplicate:
-
- # we insert the field PRIORITY as a copy of NGINX_STATUS.
- - key: PRIORITY
- values_of:
- - NGINX_STATUS
-
- # we inject the field NGINX_STATUS_FAMILY as a copy of NGINX_STATUS.
- - key: NGINX_STATUS_FAMILY
- values_of:
- - NGINX_STATUS
-
-# Inject constant fields into the journal logs.
-inject:
- - key: SYSLOG_IDENTIFIER
- value: "nginx-log"
-
-# Rewrite the value of fields (including the duplicated ones).
-# The search pattern can have named groups, and the replace pattern can use
-# them as ${name}.
-rewrite:
- # PRIORTY is a duplicate of NGINX_STATUS
- # Valid PRIORITIES: 0=emerg, 1=alert, 2=crit, 3=error, 4=warn, 5=notice, 6=info, 7=debug
- - key: "PRIORITY"
- search: "^[123]"
- replace: 6
-
- - key: "PRIORITY"
- search: "^4"
- replace: 5
-
- - key: "PRIORITY"
- search: "^5"
- replace: 3
-
- - key: "PRIORITY"
- search: ".*"
- replace: 4
-
- # NGINX_STATUS_FAMILY is a duplicate of NGINX_STATUS
- - key: "NGINX_STATUS_FAMILY"
- search: "^(?<first_digit>[1-5])"
- replace: "${first_digit}xx"
-
- - key: "NGINX_STATUS_FAMILY"
- search: ".*"
- replace: "UNKNOWN"
-
-# Control what to do when input logs do not match the main PCRE2 pattern.
-unmatched:
- # The journal key to log the PCRE2 error message to.
- # Set this to MESSAGE, so you to see the error in the log.
- key: MESSAGE
-
- # Inject static fields to the unmatched entries.
- # Set PRIORITY=1 (alert) to help you spot unmatched entries in the logs.
- inject:
- - key: PRIORITY
- value: 1
-
---------------------------------------------------------------------------------
-
```
+`log2journal` supports YAML configuration files, like the ones found [in this directory](log2journal.d/).
+
## `systemd-cat-native` options
```
diff --git a/collectors/log2journal/log2journal-duplicate.c b/collectors/log2journal/log2journal-duplicate.c
new file mode 100644
index 0000000000..af0be843fd
--- /dev/null
+++ b/collectors/log2journal/log2journal-duplicate.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "log2journal.h"
+
+void duplication_cleanup(DUPLICATION *dp) {
+ hashed_key_cleanup(&dp->target);
+
+ for(size_t j = 0; j < dp->used ; j++) {
+ hashed_key_cleanup(&dp->keys[j]);
+ txt_cleanup(&dp->values[j]);
+ }
+}
+
+DUPLICATION *log_job_duplication_add(LOG_JOB *jb, const char *target, size_t target_len) {
+ if (jb->dups.used >= MAX_KEY_DUPS) {
+ log2stderr("ERROR: Too many duplicates defined. Maximum allowed is %d.", MAX_KEY_DUPS);
+ return NULL;
+ }
+
+ if(target_len > JOURNAL_MAX_KEY_LEN) {
+ log2stderr("WARNING: key of duplicate '%.*s' is too long for journals. Will be truncated.", (int)target_len, target);
+ target_len = JOURNAL_MAX_KEY_LEN;
+ }
+
+ DUPLICATION *kd = &jb->dups.array[jb->dups.used++];
+ hashed_key_len_set(&kd->target, target, target_len);
+ kd->used = 0;
+ kd->exposed = false;
+
+ // Initialize values array
+ for (size_t i = 0; i < MAX_KEY_DUPS_KEYS; i++) {
+ kd->values[i].txt = NULL;
+ kd->values[i].size = 0;
+ }
+
+ return kd;
+}
+
+bool log_job_duplication_key_add(DUPLICATION *kd, const char *key, size_t key_len) {
+ if (kd->used >= MAX_KEY_DUPS_KEYS) {
+ log2stderr("ERROR: Too many keys in duplication of target '%s'.", kd->target.key);
+ return false;
+ }
+
+ hashed_key_len_set(&kd->keys[kd->used++], key, key_len);
+
+ return true;
+}
+
diff --git a/collectors/log2journal/log2journal-help.c b/collectors/log2journal/log2journal-help.c
index e340db452e..47bcdb5289 100644
--- a/collectors/log2journal/log2journal-help.c
+++ b/collectors/log2journal/log2journal-help.c
@@ -10,12 +10,12 @@ static void config_dir_print_available(void) {
dir = opendir(path);
if (dir == NULL) {
- log2stderr(" >>> Cannot open directory '%s'", path);
+ log2stderr(" >>> Cannot open directory:\n %s", path);
return;
}
size_t column_width = 80;
- size_t current_columns = 0;
+ size_t current_columns = 7; // Start with 7 spaces for the first line
while ((entry = readdir(dir))) {
if (entry->d_type == DT_REG) { // Check if it's a regular file
@@ -24,10 +24,13 @@ static void config_dir_print_available(void) {
if (len >= 5 && strcmp(file_name + len - 5, ".yaml") == 0) {
// Remove the ".yaml" extension
len -= 5;
+ if (current_columns == 7) {
+ printf(" "); // Print 7 spaces at the beginning of a new line
+ }
if (current_columns + len + 1 > column_width) {
// Start a new line if the current line is full
- printf("\n ");
- current_columns = 0;
+ printf("\n "); // Print newline and 7 spaces
+ current_columns = 7;
}
printf("%.*s ", (int)len, file_name); // Print the filename without extension
current_columns += len + 1; // Add filename length and a space
@@ -39,7 +42,7 @@ static void config_dir_print_available(void) {
printf("\n"); // Add a newline at the end
}
-void log2journal_command_line_help(const char *name) {
+void log_job_command_line_help(const char *name) {
printf("\n");
printf("Netdata log2journal " PACKAGE_VERSION "\n");
printf("\n");
@@ -54,11 +57,11 @@ void log2journal_command_line_help(const char *name) {
printf("Options:\n");
printf("\n");
#ifdef HAVE_LIBYAML
- printf(" --file /path/to/file.yaml\n");
+ printf(" --file /path/to/file.yaml or -f /path/to/file.yaml\n");
printf(" Read yaml configuration file for instructions.\n");
printf("\n");
printf(" --config CONFIG_NAME\n");
- printf(" Run with the internal configuration named CONFIG_NAME\n");
+ printf(" Run with the internal configuration named CONFIG_NAME.\n");
printf(" Available internal configs:\n");
printf("\n");
config_dir_print_available();
@@ -89,6 +92,7 @@ void log2journal_command_line_help(const char *name) {
printf(" Create a new key called TARGET, duplicating the values of the keys\n");
printf(" given. Useful for further processing. When multiple keys are given,\n");
printf(" their values are separated by comma.\n");
+ printf("\n");
printf(" Up to %d duplications can be given on the command line, and up to\n", MAX_KEY_DUPS);
printf(" %d keys per duplication command are allowed.\n", MAX_KEY_DUPS_KEYS);
printf("\n");
@@ -96,12 +100,14 @@ void log2journal_command_line_help(const char *name) {
printf(" Inject constant fields to the output (both matched and unmatched logs).\n");
printf(" --inject entries are added to unmatched lines too, when their key is\n");
printf(" not used in --inject-unmatched (--inject-unmatched override --inject).\n");
+ printf("\n");
printf(" Up to %d fields can be injected.\n", MAX_INJECTIONS);
printf("\n");
printf(" --inject-unmatched LINE\n");
printf(" Inject lines into the output for each unmatched log entry.\n");
printf(" Usually, --inject-unmatched=PRIORITY=3 is needed to mark the unmatched\n");
printf(" lines as errors, so that they can easily be spotted in the journals.\n");
+ printf("\n");
printf(" Up to %d such lines can be injected.\n", MAX_INJECTIONS);
printf("\n");
printf(" --rewrite KEY=/SearchPattern/ReplacePattern\n");
@@ -110,6 +116,7 @@ void log2journal_command_line_help(const char *name) {
printf(" be used between the search pattern and the replacement pattern.\n");
printf(" The search pattern is a PCRE2 regular expression, and the replacement\n");
printf(" pattern supports literals and named capture groups from the search pattern.\n");
+ printf("\n");
printf(" Example:\n");
printf(" --rewrite DATE=/^(?<year>\\d{4})-(?<month>\\d{2})-(?<day>\\d{2})$/\n");
printf(" ${day}/${month}/${year}\n");
@@ -117,76 +124,109 @@ void log2journal_command_line_help(const char *name) {
printf("\n");
printf(" Only one rewrite rule is applied per key; the sequence of rewrites stops\n");
printf(" for the key once a rule matches it. This allows providing a sequence of\n");
- printf(" independent rewriting rules for the same key, matching the different values\n");
- printf(" the key may get, and also provide a catch-all rewrite rule at the end of the\n");
- printf(" sequence for setting the key value if no other rule matched it.\n");
+ printf(" independent rewriting rules for the same key, matching the different\n");
+ printf(" values the key may get, and also provide a catch-all rewrite rule at the\n");
+ printf(" end, for setting the key value if no other rule matched it.\n");
+ printf("\n");
+ printf(" Duplication of keys with the values of multiple other keys, combined with\n");
+ printf(" multiple value rewriting rules, allows creating complex rules for adding\n");
+ printf(" new keys, based on the values of existing keys.\n");
printf("\n");
- printf(" The combination of duplicating keys with the values of multiple other keys\n");
- printf(" combined with multiple rewrite rules, allows creating complex rules for\n");
- printf(" rewriting key values.\n");
printf(" Up to %d rewriting rules are allowed.\n", MAX_REWRITES);
printf("\n");
+ printf(" --include PATTERN\n");
+ printf(" Include only keys matching the PCRE2 PATTERN.\n");
+ printf(" Useful when parsing JSON of logfmt logs, to include only the keys given.\n");
+ printf(" The keys are matched after the PREFIX has been added to them.\n");
+ printf("\n");
+ printf(" --exclude PATTERN\n");
+ printf(" Exclude the keys matching the PCRE2 PATTERN.\n");
+ printf(" Useful when parsing JSON of logfmt logs, to exclude some of the keys given.\n");
+ printf(" The keys are matched after the PREFIX has been added to them.\n");
+ printf("\n");
+ printf(" When both include and exclude patterns are set and both match a key,\n");
+ printf(" exclude wins and the key will not be added, like a pipeline, we first\n");
+ printf(" include it and then exclude it.\n");
+ printf("\n");
printf(" --prefix PREFIX\n");
- printf(" Prefix all JSON or logfmt fields with PREFIX.\n");
+ printf(" Prefix all fields with PREFIX. The PREFIX is added before processing\n");
+ printf(" duplications, renames and rewrites, so that the extracted keys have to\n");
+ printf(" be matched with the PREFIX in them.\n");
+ printf(" PREFIX is assumed to be systemd-journal friendly.\n");
printf("\n");
printf(" --rename NEW=OLD\n");
printf(" Rename fields, before rewriting their values.\n");
+ printf("\n");
printf(" Up to %d renaming rules are allowed.\n", MAX_RENAMES);
printf("\n");
- printf(" -h, --help\n");
+ printf(" -h, or --help\n");
printf(" Display this help and exit.\n");
printf("\n");
printf(" PATTERN\n");
printf(" PATTERN should be a valid PCRE2 regular expression.\n");
printf(" RE2 regular expressions (like the ones usually used in Go applications),\n");
printf(" are usually valid PCRE2 patterns too.\n");
- printf(" Regular expressions without named groups are evaluated but their matches\n");
- printf(" are not added to the output.\n");
+ printf(" Sub-expressions without named groups are evaluated, but their matches are\n");
+ printf(" not added to the output.\n");
printf("\n");
printf(" JSON mode\n");
printf(" JSON mode is enabled when the pattern is set to: json\n");
printf(" Field names are extracted from the JSON logs and are converted to the\n");
printf(" format expected by Journal Export Format (all caps, only _ is allowed).\n");
- printf(" Prefixing is enabled in this mode.\n");
+ printf("\n");
printf(" logfmt mode\n");
printf(" logfmt mode is enabled when the pattern is set to: logfmt\n");
printf(" Field names are extracted from the logfmt logs and are converted to the\n");
printf(" format expected by Journal Export Format (all caps, only _ is allowed).\n");
- printf(" Prefixing is enabled in this mode.\n");
printf("\n");
printf("\n");
printf("The program accepts all parameters as both --option=value and --option value.\n");
printf("\n");
- printf("The maximum line length accepted is %d characters.\n", MAX_LINE_LENGTH);
- printf("The maximum number of fields in the PCRE2 pattern is %d.\n", OVECCOUNT / 3);
+ printf("The maximum log line length accepted is %d characters.\n", MAX_LINE_LENGTH);
printf("\n");
printf("PIPELINE AND SEQUENCE OF PROCESSING\n");
printf("\n");
printf("This is a simple diagram of the pipeline taking place:\n");
- printf("\n");
- printf(" +---------------------------------------------------+\n");
- printf(" | INPUT |\n");
- printf(" +---------------------------------------------------+\n");
- printf(" v v\n");