summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCosta Tsaousis <costa@netdata.cloud>2023-12-03 17:00:51 +0200
committerGitHub <noreply@github.com>2023-12-03 15:00:51 +0000
commitfc9b5170c6301366bd66d9b0766850a0ed8dc807 (patch)
tree7b2596498725a161f5ae9ed7943dc16de799a4f1
parent48b2d1609ce4266f11d84b4a3ae269aad07650b9 (diff)
log2journal improvements 5 (#16519)
* added ${LINE} variable; added default config * prefer single quotes in yaml to avoid interference from yaml escaping * simple_hashtable now supports deletions * simple hashtable now supports setting entries with NULL values * hashtable implementation now has sorting option to maintain a sorted list of the items * multiple hashtables with type checking * added comments * still incomplete yaml parser * fixes and cleanup
-rw-r--r--collectors/log2journal/Makefile.am1
-rw-r--r--collectors/log2journal/README.md7
-rw-r--r--collectors/log2journal/log2journal-help.c2
-rw-r--r--collectors/log2journal/log2journal-json.c75
-rw-r--r--collectors/log2journal/log2journal-params.c27
-rw-r--r--collectors/log2journal/log2journal-yaml.c2
-rw-r--r--collectors/log2journal/log2journal.c107
-rw-r--r--collectors/log2journal/log2journal.d/default.yaml15
-rw-r--r--collectors/log2journal/log2journal.d/nginx-combined.yaml38
-rw-r--r--collectors/log2journal/log2journal.d/nginx-json.yaml78
-rw-r--r--collectors/log2journal/log2journal.h66
-rw-r--r--collectors/log2journal/tests.d/default.output20
-rw-r--r--collectors/log2journal/tests.d/full.output24
-rw-r--r--collectors/log2journal/tests.d/full.yaml12
-rwxr-xr-xcollectors/log2journal/tests.sh22
-rw-r--r--libnetdata/facets/facets.c69
-rw-r--r--libnetdata/simple_hashtable.h372
17 files changed, 684 insertions, 253 deletions
diff --git a/collectors/log2journal/Makefile.am b/collectors/log2journal/Makefile.am
index 578757fc38..b13d2160b3 100644
--- a/collectors/log2journal/Makefile.am
+++ b/collectors/log2journal/Makefile.am
@@ -13,4 +13,5 @@ log2journalconfigdir=$(libconfigdir)/log2journal.d
dist_log2journalconfig_DATA = \
log2journal.d/nginx-combined.yaml \
log2journal.d/nginx-json.yaml \
+ log2journal.d/default.yaml \
$(NULL)
diff --git a/collectors/log2journal/README.md b/collectors/log2journal/README.md
index 2747142d68..bb48378803 100644
--- a/collectors/log2journal/README.md
+++ b/collectors/log2journal/README.md
@@ -2,7 +2,7 @@
`log2journal` and `systemd-cat-native` can be used to convert a structured log file, such as the ones generated by web servers, into `systemd-journal` entries.
-By combining these tools, together with the usual UNIX shell tools you can create advanced log processing pipelines sending any kind of structured text logs to systemd-journald. This is a simple, but powerful and efficient way to handle log processing.
+By combining these tools you can create advanced log processing pipelines sending any kind of structured text logs to systemd-journald. This is a simple, but powerful and efficient way to handle log processing.
The process involves the usual piping of shell commands, to get and process the log files in realtime.
@@ -27,6 +27,11 @@ Let's see the steps:
```
3. `systemd-cat-native` is a Netdata program. I can send the logs to a local `systemd-journald` (journal namespaces supported), or to a remote `systemd-journal-remote`.
+
+## YAML configuration
+
+
+
## Real-life example
We have an nginx server logging in this format:
diff --git a/collectors/log2journal/log2journal-help.c b/collectors/log2journal/log2journal-help.c
index 67af516df3..a20615c3c2 100644
--- a/collectors/log2journal/log2journal-help.c
+++ b/collectors/log2journal/log2journal-help.c
@@ -60,7 +60,7 @@ void log_job_command_line_help(const char *name) {
printf(" --file /path/to/file.yaml or -f /path/to/file.yaml\n");
printf(" Read yaml configuration file for instructions.\n");
printf("\n");
- printf(" --config CONFIG_NAME\n");
+ printf(" --config CONFIG_NAME or -c CONFIG_NAME\n");
printf(" Run with the internal configuration named CONFIG_NAME.\n");
printf(" Available internal configs:\n");
printf("\n");
diff --git a/collectors/log2journal/log2journal-json.c b/collectors/log2journal/log2journal-json.c
index 41f893abc5..2ca294e4db 100644
--- a/collectors/log2journal/log2journal-json.c
+++ b/collectors/log2journal/log2journal-json.c
@@ -167,7 +167,7 @@ static inline bool json_parse_number(LOG_JSON_STATE *js) {
}
}
-static bool encode_utf8(unsigned codepoint, char **d, size_t *remaining) {
+static inline bool encode_utf8(unsigned codepoint, char **d, size_t *remaining) {
if (codepoint <= 0x7F) {
// 1-byte sequence
if (*remaining < 2) return false; // +1 for the null
@@ -205,6 +205,56 @@ static bool encode_utf8(unsigned codepoint, char **d, size_t *remaining) {
return true;
}
+size_t parse_surrogate(const char *s, char *d, size_t *remaining) {
+ if (s[0] != '\\' || (s[1] != 'u' && s[1] != 'U')) {
+ return 0; // Not a valid Unicode escape sequence
+ }
+
+ char hex[9] = {0}; // Buffer for the hexadecimal value
+ unsigned codepoint;
+
+ if (s[1] == 'u') {
+ // Handle \uXXXX
+ if (!isxdigit(s[2]) || !isxdigit(s[3]) || !isxdigit(s[4]) || !isxdigit(s[5])) {
+ return 0; // Not a valid \uXXXX sequence
+ }
+
+ hex[0] = s[2];
+ hex[1] = s[3];
+ hex[2] = s[4];
+ hex[3] = s[5];
+ codepoint = (unsigned)strtoul(hex, NULL, 16);
+
+ if (codepoint >= 0xD800 && codepoint <= 0xDBFF) {
+ // Possible start of surrogate pair
+ if (s[6] == '\\' && s[7] == 'u' && isxdigit(s[8]) && isxdigit(s[9]) &&
+ isxdigit(s[10]) && isxdigit(s[11])) {
+ // Valid low surrogate
+ unsigned low_surrogate = strtoul(&s[8], NULL, 16);
+ if (low_surrogate < 0xDC00 || low_surrogate > 0xDFFF) {
+ return 0; // Invalid low surrogate
+ }
+ codepoint = 0x10000 + ((codepoint - 0xD800) << 10) + (low_surrogate - 0xDC00);
+ return encode_utf8(codepoint, &d, remaining) ? 12 : 0; // \uXXXX\uXXXX
+ }
+ }
+
+ // Single \uXXXX
+ return encode_utf8(codepoint, &d, remaining) ? 6 : 0;
+ }
+ else {
+ // Handle \UXXXXXXXX
+ for (int i = 2; i < 10; i++) {
+ if (!isxdigit(s[i])) {
+ return 0; // Not a valid \UXXXXXXXX sequence
+ }
+ hex[i - 2] = s[i];
+ }
+ codepoint = (unsigned)strtoul(hex, NULL, 16);
+ return encode_utf8(codepoint, &d, remaining) ? 10 : 0; // \UXXXXXXXX
+ }
+}
+
static inline void copy_newline(LOG_JSON_STATE *js __maybe_unused, char **d, size_t *remaining) {
if(*remaining > 3) {
*(*d)++ = '\\';
@@ -258,18 +308,12 @@ static inline bool json_parse_string(LOG_JSON_STATE *js) {
s++;
break;
- case 'u':
- if(isxdigit(s[1]) && isxdigit(s[2]) && isxdigit(s[3]) && isxdigit(s[4])) {
- char b[5] = {
- [0] = s[1],
- [1] = s[2],
- [2] = s[3],
- [3] = s[4],
- [4] = '\0',
- };
- unsigned codepoint = strtoul(b, NULL, 16);
- if(encode_utf8(codepoint, &d, &remaining)) {
- s += 5;
+ case 'u': {
+ size_t old_remaining = remaining;
+ size_t consumed = parse_surrogate(s - 1, d, &remaining);
+ if (consumed > 0) {
+ s += consumed - 1; // -1 because we already incremented s after '\\'
+ d += old_remaining - remaining;
continue;
}
else {
@@ -278,11 +322,6 @@ static inline bool json_parse_string(LOG_JSON_STATE *js) {
c = *s++;
}
}
- else {
- *d++ = '\\';
- remaining--;
- c = *s++;
- }
break;
default:
diff --git a/collectors/log2journal/log2journal-params.c b/collectors/log2journal/log2journal-params.c
index ca4e2f5860..a7bb3e263c 100644
--- a/collectors/log2journal/log2journal-params.c
+++ b/collectors/log2journal/log2journal-params.c
@@ -6,22 +6,25 @@
void log_job_init(LOG_JOB *jb) {
memset(jb, 0, sizeof(*jb));
- simple_hashtable_init(&jb->hashtable, 32);
+ simple_hashtable_init_KEY(&jb->hashtable, 32);
+ hashed_key_set(&jb->line.key, "LINE");
}
-static void simple_hashtable_cleanup_allocated(SIMPLE_HASHTABLE *ht) {
- for(size_t i = 0; i < ht->size ;i++) {
- HASHED_KEY *k = ht->hashtable[i].data;
+static void simple_hashtable_cleanup_allocated_keys(SIMPLE_HASHTABLE_KEY *ht) {
+ SIMPLE_HASHTABLE_FOREACH_READ_ONLY(ht, sl, _KEY) {
+ HASHED_KEY *k = SIMPLE_HASHTABLE_FOREACH_READ_ONLY_VALUE(sl);
if(k && k->flags & HK_HASHTABLE_ALLOCATED) {
- hashed_key_cleanup(k);
- freez(k);
- ht->hashtable[i].data = NULL;
- ht->hashtable[i].hash = 0;
+ // the order of these statements is important!
+ simple_hashtable_del_slot_KEY(ht, sl); // remove any references to n
+ hashed_key_cleanup(k); // cleanup the internals of n
+ freez(k); // free n
}
}
}
void log_job_cleanup(LOG_JOB *jb) {
+ hashed_key_cleanup(&jb->line.key);
+
if(jb->prefix) {
freez((void *) jb->prefix);
jb->prefix = NULL;
@@ -47,8 +50,8 @@ void log_job_cleanup(LOG_JOB *jb) {
txt_cleanup(&jb->rewrites.tmp);
txt_cleanup(&jb->filename.current);
- simple_hashtable_cleanup_allocated(&jb->hashtable);
- simple_hashtable_free(&jb->hashtable);
+ simple_hashtable_cleanup_allocated_keys(&jb->hashtable);
+ simple_hashtable_destroy_KEY(&jb->hashtable);
// remove references to everything else, to reveal them in valgrind
memset(jb, 0, sizeof(*jb));
@@ -346,7 +349,7 @@ bool log_job_command_line_parse_parameters(LOG_JOB *jb, int argc, char **argv) {
if (!yaml_parse_file(value, jb))
return false;
}
- else if (strcmp(param, "--config") == 0) {
+ else if (strcmp(param, "-c") == 0 || strcmp(param, "--config") == 0) {
if (!yaml_parse_config(value, jb))
return false;
}
@@ -392,7 +395,7 @@ bool log_job_command_line_parse_parameters(LOG_JOB *jb, int argc, char **argv) {
// Check if a pattern is set and exactly one pattern is specified
if (!jb->pattern) {
- log2stderr("Error: Pattern not specified.");
+ log2stderr("Warning: pattern not specified. Try the default config with: -c default");
log_job_command_line_help(argv[0]);
return false;
}
diff --git a/collectors/log2journal/log2journal-yaml.c b/collectors/log2journal/log2journal-yaml.c
index 1b9e823cb7..862e7bf4b7 100644
--- a/collectors/log2journal/log2journal-yaml.c
+++ b/collectors/log2journal/log2journal-yaml.c
@@ -852,7 +852,7 @@ static bool needs_quotes_in_yaml(const char *str) {
static void yaml_print_node(const char *key, const char *value, size_t depth, bool dash) {
if(depth > 10) depth = 10;
- const char *quote = "\"";
+ const char *quote = "'";
const char *second_line = NULL;
if(value && strchr(value, '\n')) {
diff --git a/collectors/log2journal/log2journal.c b/collectors/log2journal/log2journal.c
index 5dd98d6837..c3204939cd 100644
--- a/collectors/log2journal/log2journal.c
+++ b/collectors/log2journal/log2journal.c
@@ -61,37 +61,14 @@ const char journal_key_characters_map[256] = {
// ----------------------------------------------------------------------------
-// Function to insert a key into the sorted.keys array while keeping it sorted
-void log_job_add_key_sorted(LOG_JOB *jb, HASHED_KEY *newKey) {
- size_t i, j;
-
- // Find the position to insert the new key based on lexicographic order
- for (i = 0; i < jb->sorted.used; i++) {
- if (strcmp(newKey->key, jb->sorted.keys[i]->key) < 0) {
- break;
- }
- }
-
- // Shift elements to the right to make space for the new key
- for (j = jb->sorted.used; j > i; j--) {
- jb->sorted.keys[j] = jb->sorted.keys[j - 1];
- }
-
- // Insert the new key at the correct position
- jb->sorted.keys[i] = newKey;
- jb->sorted.used++;
-}
-
static inline HASHED_KEY *get_key_from_hashtable(LOG_JOB *jb, HASHED_KEY *k) {
if(k->flags & HK_HASHTABLE_ALLOCATED)
return k;
if(!k->hashtable_ptr) {
HASHED_KEY *ht_key;
- SIMPLE_HASHTABLE_SLOT *slot = simple_hashtable_get_slot(&jb->hashtable, k->hash, true);
- if(slot->data) {
- ht_key = slot->data;
-
+ SIMPLE_HASHTABLE_SLOT_KEY *slot = simple_hashtable_get_slot_KEY(&jb->hashtable, k->hash, true);
+ if((ht_key = SIMPLE_HASHTABLE_SLOT_DATA(slot))) {
if(!(ht_key->flags & HK_COLLISION_CHECKED)) {
ht_key->flags |= HK_COLLISION_CHECKED;
@@ -109,11 +86,7 @@ static inline HASHED_KEY *get_key_from_hashtable(LOG_JOB *jb, HASHED_KEY *k) {
ht_key->hash = k->hash;
ht_key->flags = HK_HASHTABLE_ALLOCATED;
- slot->hash = ht_key->hash;
- slot->data = ht_key;
- jb->hashtable.used++;
-
- log_job_add_key_sorted(jb, ht_key);
+ simple_hashtable_set_slot_KEY(&jb->hashtable, slot, ht_key->hash, ht_key);
}
k->hashtable_ptr = ht_key;
@@ -158,18 +131,25 @@ static inline void validate_key(LOG_JOB *jb __maybe_unused, HASHED_KEY *k) {
// ----------------------------------------------------------------------------
-static inline size_t replace_evaluate_to_buffer(LOG_JOB *jb, HASHED_KEY *k, REPLACE_PATTERN *rp, char *dst, size_t dst_size) {
+static inline size_t replace_evaluate_to_buffer(LOG_JOB *jb, HASHED_KEY *k __maybe_unused, REPLACE_PATTERN *rp, char *dst, size_t dst_size) {
size_t remaining = dst_size;
char *copy_to = dst;
for(REPLACE_NODE *node = rp->nodes; node != NULL && remaining > 1; node = node->next) {
if(node->is_variable) {
- HASHED_KEY *ktmp = get_key_from_hashtable_with_char_ptr(jb, node->name.key);
- if(ktmp->value.len) {
- size_t copied = copy_to_buffer(copy_to, remaining, ktmp->value.txt, ktmp->value.len);
+ if(hashed_keys_match(&node->name, &jb->line.key)) {
+ size_t copied = copy_to_buffer(copy_to, remaining, jb->line.trimmed, jb->line.trimmed_len);
copy_to += copied;
remaining -= copied;
}
+ else {
+ HASHED_KEY *ktmp = get_key_from_hashtable_with_char_ptr(jb, node->name.key);
+ if(ktmp->value.len) {
+ size_t copied = copy_to_buffer(copy_to, remaining, ktmp->value.txt, ktmp->value.len);
+ copy_to += copied;
+ remaining -= copied;
+ }
+ }
}
else {
size_t copied = copy_to_buffer(copy_to, remaining, node->name.key, node->name.len);
@@ -189,9 +169,14 @@ static inline void replace_evaluate(LOG_JOB *jb, HASHED_KEY *k, REPLACE_PATTERN
for(REPLACE_NODE *node = rp->nodes; node != NULL; node = node->next) {
if(node->is_variable) {
- HASHED_KEY *ktmp = get_key_from_hashtable_with_char_ptr(jb, node->name.key);
- if(ktmp->value.len)
- txt_expand_and_append(&ht_key->value, ktmp->value.txt, ktmp->value.len);
+ if(hashed_keys_match(&node->name, &jb->line.key))
+ txt_expand_and_append(&ht_key->value, jb->line.trimmed, jb->line.trimmed_len);
+
+ else {
+ HASHED_KEY *ktmp = get_key_from_hashtable_with_char_ptr(jb, node->name.key);
+ if(ktmp->value.len)
+ txt_expand_and_append(&ht_key->value, ktmp->value.txt, ktmp->value.len);
+ }
}
else
txt_expand_and_append(&ht_key->value, node->name.key, node->name.len);
@@ -220,9 +205,14 @@ static inline void replace_evaluate_from_pcre2(LOG_JOB *jb, HASHED_KEY *k, REPLA
txt_expand_and_append(&jb->rewrites.tmp, k->value.txt + start_offset, length);
}
else {
- HASHED_KEY *ktmp = get_key_from_hashtable_with_char_ptr(jb, node->name.key);
- if(ktmp->value.len)
- txt_expand_and_append(&jb->rewrites.tmp, ktmp->value.txt, ktmp->value.len);
+ if(hashed_keys_match(&node->name, &jb->line.key))
+ txt_expand_and_append(&jb->rewrites.tmp, jb->line.trimmed, jb->line.trimmed_len);
+
+ else {
+ HASHED_KEY *ktmp = get_key_from_hashtable_with_char_ptr(jb, node->name.key);
+ if(ktmp->value.len)
+ txt_expand_and_append(&jb->rewrites.tmp, ktmp->value.txt, ktmp->value.len);
+ }
}
}
else {
@@ -299,15 +289,6 @@ static inline void send_key_value_error(LOG_JOB *jb, HASHED_KEY *key, const char
printf("\n");
}
-static inline void send_key_value_and_rewrite(LOG_JOB *jb, HASHED_KEY *key, const char *value, size_t len) {
- HASHED_KEY *ht_key = get_key_from_hashtable(jb, key);
-
- txt_replace(&ht_key->value, value, len);
- ht_key->flags |= HK_VALUE_FROM_LOG;
-
-// fprintf(stderr, "SET %s=%.*s\n", ht_key->key, (int)ht_key->value.len, ht_key->value.txt);
-}
-
inline void log_job_send_extracted_key_value(LOG_JOB *jb, const char *key, const char *value, size_t len) {
HASHED_KEY *ht_key = get_key_from_hashtable_with_char_ptr(jb, key);
HASHED_KEY *nk = rename_key(jb, ht_key);
@@ -341,8 +322,8 @@ static inline void log_job_process_rewrites(LOG_JOB *jb) {
}
static inline void send_all_fields(LOG_JOB *jb) {
- for(size_t i = 0; i < jb->sorted.used ;i++) {
- HASHED_KEY *k = jb->sorted.keys[i];
+ SIMPLE_HASHTABLE_SORTED_FOREACH_READ_ONLY(&jb->hashtable, kptr, HASHED_KEY, _KEY) {
+ HASHED_KEY *k = SIMPLE_HASHTABLE_SORTED_FOREACH_READ_ONLY_VALUE(kptr);
if(k->value.len) {
// the key exists and has some value
@@ -496,11 +477,13 @@ int log_job_run(LOG_JOB *jb) {
if(strcmp(jb->pattern, "json") == 0) {
json = json_parser_create(jb);
+ // never fails
}
else if(strcmp(jb->pattern, "logfmt") == 0) {
logfmt = logfmt_parser_create(jb);
+ // never fails
}
- else {
+ else if(strcmp(jb->pattern, "none") != 0) {
pcre2 = pcre2_parser_create(jb);
if(pcre2_has_error(pcre2)) {
log2stderr("%s", pcre2_parser_error(pcre2));
@@ -509,21 +492,25 @@ int log_job_run(LOG_JOB *jb) {
}
}
- char buffer[MAX_LINE_LENGTH];
- char *line;
- size_t len;
+ jb->line.buffer = mallocz(MAX_LINE_LENGTH + 1);
+ jb->line.size = MAX_LINE_LENGTH + 1;
+ jb->line.trimmed_len = 0;
+ jb->line.trimmed = jb->line.buffer;
+
+ while ((jb->line.trimmed = get_next_line(jb, (char *)jb->line.buffer, jb->line.size, &jb->line.trimmed_len))) {
+ const char *line = jb->line.trimmed;
+ size_t len = jb->line.trimmed_len;
- while ((line = get_next_line(jb, buffer, sizeof(buffer), &len))) {
if(jb_switched_filename(jb, line, len))
continue;
- bool line_is_matched;
+ bool line_is_matched = true;
if(json)
line_is_matched = json_parse_document(json, line);
else if(logfmt)
line_is_matched = logfmt_parse_document(logfmt, line);
- else
+ else if(pcre2)
line_is_matched = pcre2_parse_document(pcre2, line, len);
if(!line_is_matched) {
@@ -531,7 +518,7 @@ int log_job_run(LOG_JOB *jb) {
log2stderr("%s", json_parser_error(json));
else if(logfmt)
log2stderr("%s", logfmt_parser_error(logfmt));
- else
+ else if(pcre2)
log2stderr("%s", pcre2_parser_error(pcre2));
if(!jb_send_unmatched_line(jb, line))
@@ -557,6 +544,8 @@ int log_job_run(LOG_JOB *jb) {
else if(pcre2)
pcre2_parser_destroy(pcre2);
+ freez((void *)jb->line.buffer);
+
return 0;
}
diff --git a/collectors/log2journal/log2journal.d/default.yaml b/collectors/log2journal/log2journal.d/default.yaml
new file mode 100644
index 0000000000..d41efc4abb
--- /dev/null
+++ b/collectors/log2journal/log2journal.d/default.yaml
@@ -0,0 +1,15 @@
+pattern: none
+
+filename:
+ key: LOG_FILENAME
+
+inject:
+ - key: MESSAGE
+ value: '${LINE}' # a special variable that resolves to the whole line read from the log
+
+ - key: PRIORITY
+ value: 6 # Valid PRIORITIES: 0=emerg, 1=alert, 2=crit, 3=error, 4=warn, 5=notice, 6=info, 7=debug
+
+ - key: SYSLOG_IDENTIFIER
+ value: log2journal # the name of the application sending the logs
+
diff --git a/collectors/log2journal/log2journal.d/nginx-combined.yaml b/collectors/log2journal/log2journal.d/nginx-combined.yaml
index 00610f4b7c..003c774d7b 100644
--- a/collectors/log2journal/log2journal.d/nginx-combined.yaml
+++ b/collectors/log2journal/log2journal.d/nginx-combined.yaml
@@ -37,15 +37,15 @@ rename:
# Inject constant fields into the journal logs.
inject:
- key: SYSLOG_IDENTIFIER
- value: "nginx-log"
+ value: nginx-log
# inject PRIORITY is a duplicate of NGINX_STATUS
- - key: "PRIORITY"
- value: "${NGINX_STATUS}"
+ - key: PRIORITY
+ value: '${NGINX_STATUS}'
# Inject NGINX_STATUS_FAMILY is a duplicate of NGINX_STATUS
- - key: "NGINX_STATUS_FAMILY"
- value: "${NGINX_STATUS}"
+ - key: NGINX_STATUS_FAMILY
+ value: '${NGINX_STATUS}'
# Rewrite the value of fields (including the duplicated ones).
# The search pattern can have named groups, and the replace pattern can use
@@ -53,30 +53,30 @@ inject:
rewrite:
# PRIORITY is a duplicate of NGINX_STATUS
# Valid PRIORITIES: 0=emerg, 1=alert, 2=crit, 3=error, 4=warn, 5=notice, 6=info, 7=debug
- - key: "PRIORITY"
- match: "^[123]"
+ - key: PRIORITY
+ match: '^[123]'
value: 6
- - key: "PRIORITY"
- match: "^4"
+ - key: PRIORITY
+ match: '^4'
value: 5
- - key: "PRIORITY"
- match: "^5"
+ - key: PRIORITY
+ match: '^5'
value: 3
- - key: "PRIORITY"
- match: ".*"
+ - key: PRIORITY
+ match: '.*'
value: 4
# NGINX_STATUS_FAMILY is a duplicate of NGINX_STATUS
- - key: "NGINX_STATUS_FAMILY"
- match: "^(?<first_digit>[1-5])"
- value: "${first_digit}xx"
+ - key: NGINX_STATUS_FAMILY
+ match: '^(?<first_digit>[1-5])'
+ value: '${first_digit}xx'
- - key: "NGINX_STATUS_FAMILY"
- match: ".*"
- value: "UNKNOWN"
+ - key: NGINX_STATUS_FAMILY
+ match: '.*'
+ value: 'UNKNOWN'
# Control what to do when input logs do not match the main PCRE2 pattern.
unmatched:
diff --git a/collectors/log2journal/log2journal.d/nginx-json.yaml b/collectors/log2journal/log2journal.d/nginx-json.yaml
index 1ad702da7c..7fdc4be584 100644
--- a/collectors/log2journal/log2journal.d/nginx-json.yaml
+++ b/collectors/log2journal/log2journal.d/nginx-json.yaml
@@ -12,7 +12,7 @@ filename:
key: NGINX_LOG_FILENAME
filter:
- exclude: "NGINX_BINARY_REMOTE_ADDR"
+ exclude: '^(NGINX_BINARY_REMOTE_ADDR)$'
rename:
- new_key: MESSAGE
@@ -69,15 +69,15 @@ rename:
# Inject constant fields into the journal logs.
inject:
- key: SYSLOG_IDENTIFIER
- value: "nginx-log"
+ value: nginx-log
# inject PRIORITY is a duplicate of NGINX_STATUS
- - key: "PRIORITY"
- value: "${NGINX_STATUS}"
+ - key: PRIORITY
+ value: '${NGINX_STATUS}'
# Inject NGINX_STATUS_FAMILY is a duplicate of NGINX_STATUS
- - key: "NGINX_STATUS_FAMILY"
- value: "${NGINX_STATUS}"
+ - key: NGINX_STATUS_FAMILY
+ value: '${NGINX_STATUS}'
# Rewrite the value of fields (including the duplicated ones).
@@ -87,69 +87,69 @@ rewrite:
# a ? means it has query string, everything else means it does not
- key: NGINX_HAS_QUERY_STRING
match: '^\?$'
- value: "yes"
+ value: yes
- key: NGINX_HAS_QUERY_STRING
- match: ".*"
- value: "no"
+ match: '.*'
+ value: no
# 'on' means it was HTTPS, everything else means it was not
- key: NGINX_HTTPS
- match: "^on$"
- value: "yes"
+ match: '^on$'
+ value: yes
- key: NGINX_HTTPS
- match: ".*"
- value: "no"
+ match: '.*'
+ value: no
# 'p' means it was pipelined, everything else means it was not
- key: NGINX_PIPELINED
- match: "^p$"
- value: "yes"
+ match: '^p$'
+ value: yes
- key: NGINX_PIPELINED
- match: ".*"
- value: "no"
+ match: '.*'
+ value: no
# zero means client sent a certificate and it was verified, non-zero means otherwise
- key: NGINX_PROXY_PROTOCOL_TLV_SSL_VERIFY
- match: "^0$"
- value: "yes"
+ match: '^0$'
+ value: yes
- key: NGINX_PROXY_PROTOCOL_TLV_SSL_VERIFY
- match: ".*"
- value: "no"
+ match: '.*'
+ value: no
# 'OK' means request completed, everything else means it didn't
- key: NGINX_REQUEST_COMPLETION
- match: "^OK$"
- value: "completed"
+ match: '^OK$'
+ value: 'completed'
- key: NGINX_REQUEST_COMPLETION
- match: ".*"
- value: "not completed"
+ match: '.*'
+ value: 'not completed'
# PRIORTY is a duplicate of NGINX_STATUS
# Valid PRIORITIES: 0=emerg, 1=alert, 2=crit, 3=error, 4=warn, 5=notice, 6=info, 7=debug
- - key: "PRIORITY"
- match: "^[123]"
+ - key: PRIORITY
+ match: '^[123]'
value: 6
- - key: "PRIORITY"
- match: "^4"
+ - key: PRIORITY
+ match: '^4'
value: 5
- - key: "PRIORITY"
- match: "^5"
+ - key: PRIORITY
+ match: '^5'
value: 3
- - key: "PRIORITY"
- match: ".*"
+ - key: PRIORITY
+ match: '.*'
value: 4
# NGINX_STATUS_FAMILY is a duplicate of NGINX_STATUS
- - key: "NGINX_STATUS_FAMILY"
- match: "^(?<first_digit>[1-5])"
- value: "${first_digit}xx"
+ - key: NGINX_STATUS_FAMILY
+ match: '^(?<first_digit>[1-5])'
+ value: '${first_digit}xx'
- - key: "NGINX_STATUS_FAMILY"
- match: ".*"
- value: "UNKNOWN"
+ - key: NGINX_STATUS_FAMILY
+ match: '.*'
+ value: 'UNKNOWN'
# Control what to do when input logs do not match the main PCRE2 pattern.
unmatched:
diff --git a/collectors/log2journal/log2journal.h b/collectors/log2journal/log2journal.h
index f34d3db177..834a5b135d 100644
--- a/collectors/log2journal/log2journal.h
+++ b/collectors/log2journal/log2journal.h
@@ -13,6 +13,7 @@
#include <stdbool.h>
#include <string.h>
#include <ctype.h>
+#include <math.h>
#include <stdarg.h>
#include <assert.h>
@@ -42,11 +43,23 @@ static inline void *mallocz(size_t size) {
}
static inline void *callocz(size_t elements, size_t size) {
- void *ptr = mallocz(elements * size);
- memset(ptr, 0, elements * size);
+ void *ptr = calloc(elements, size);
+ if (!ptr) {
+ log2stderr("Fatal Error: Memory allocation failed. Requested size: %zu bytes.", elements * size);
+ exit(EXIT_FAILURE);
+ }
return ptr;
}
+static inline void *reallocz(void *ptr, size_t size) {
+ void *new_ptr = realloc(ptr, size);
+ if (!new_ptr) {
+ log2stderr("Fatal Error: Memory reallocation failed. Requested size: %zu bytes.", size);
+ exit(EXIT_FAILURE);
+ }
+ return new_ptr;
+}
+
static inline char *strdupz(const char *s) {
char *ptr = strdup(s);
if (!ptr) {
@@ -74,7 +87,6 @@ static inline void freez(void *ptr) {
#define XXH_INLINE_ALL
#include "../../libnetdata/xxhash.h"
-#include "../../libnetdata/simple_hashtable.h"
#define PCRE2_CODE_UNIT_WIDTH 8
#include <pcre2.h>
@@ -83,13 +95,29 @@ static inline void freez(void *ptr) {
#include <yaml.h>
#endif
+// ----------------------------------------------------------------------------
+// hashtable for HASHED_KEY
+
+// cleanup hashtable defines
+#undef SIMPLE_HASHTABLE_SORT_FUNCTION
+#undef SIMPLE_HASHTABLE_VALUE_TYPE
+#undef SIMPLE_HASHTABLE_NAME
+#undef NETDATA_SIMPLE_HASHTABLE_H
+
+struct hashed_key;
+static inline int compare_keys(struct hashed_key *k1, struct hashed_key *k2);
+#define SIMPLE_HASHTABLE_SORT_FUNCTION compare_keys
+#define SIMPLE_HASHTABLE_VALUE_TYPE struct hashed_key
+#define SIMPLE_HASHTABLE_NAME _KEY
+#include "../../libnetdata/simple_hashtable.h"
+
+// ----------------------------------------------------------------------------
+
#define MAX_OUTPUT_KEYS 1024
#define MAX_LINE_LENGTH (1024 * 1024)
-#define MAX_KEY_DUPS (MAX_OUTPUT_KEYS / 2)
#define MAX_INJECTIONS (MAX_OUTPUT_KEYS / 2)
#define MAX_REWRITES (MAX_OUTPUT_KEYS / 2)
#define MAX_RENAMES (MAX_OUTPUT_KEYS / 2)
-#define MAX_KEY_DUPS_KEYS 20
#define JOURNAL_MAX_KEY_LEN 64 // according to systemd-journald
#define JOURNAL_MAX_VALUE_LEN (48 * 1024) // according to systemd-journald
@@ -178,13 +206,7 @@ static inline void txt_expand_and_append(TEXT *t, const char *s, size_t len) {
if(new_size < t->size * 2)
new_size = t->size * 2;
- char *b = mallocz(new_size);
- if(t->txt) {
- memcpy(b, t->txt, t->len);
- freez(t->txt);
- }
-
- t->txt = b;
+ t->txt = reallocz(t->txt, new_size);
t->size = new_size;
}
@@ -213,9 +235,6 @@ typedef enum __attribute__((__packed__)) {
HK_RENAMES_CHECKED = (1 << 4), // we checked once if there are renames on this key
HK_HAS_RENAMES = (1 << 5), // and we found there is a rename rule related to it
- HK_DUPS_CHECKED = (1 << 6), // we checked once if there are duplications for this key
- HK_HAS_DUPS = (1 << 7), // and we found there are duplication related to it
-
// ephemeral flags - they are unset at the end of each log line
HK_VALUE_FROM_LOG = (1 << 14), // the value of this key has been read from the log (or from injection, duplication)
@@ -268,6 +287