diff options
author | Costa Tsaousis <costa@netdata.cloud> | 2023-12-03 17:00:51 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-03 15:00:51 +0000 |
commit | fc9b5170c6301366bd66d9b0766850a0ed8dc807 (patch) | |
tree | 7b2596498725a161f5ae9ed7943dc16de799a4f1 | |
parent | 48b2d1609ce4266f11d84b4a3ae269aad07650b9 (diff) |
log2journal improvements 5 (#16519)
* added ${LINE} variable; added default config
* prefer single quotes in yaml to avoid interference from yaml escaping
* simple_hashtable now supports deletions
* simple hashtable now supports setting entries with NULL values
* hashtable implementation now has sorting option to maintain a sorted list of the items
* multiple hashtables with type checking
* added comments
* still incomplete yaml parser
* fixes and cleanup
-rw-r--r-- | collectors/log2journal/Makefile.am | 1 | ||||
-rw-r--r-- | collectors/log2journal/README.md | 7 | ||||
-rw-r--r-- | collectors/log2journal/log2journal-help.c | 2 | ||||
-rw-r--r-- | collectors/log2journal/log2journal-json.c | 75 | ||||
-rw-r--r-- | collectors/log2journal/log2journal-params.c | 27 | ||||
-rw-r--r-- | collectors/log2journal/log2journal-yaml.c | 2 | ||||
-rw-r--r-- | collectors/log2journal/log2journal.c | 107 | ||||
-rw-r--r-- | collectors/log2journal/log2journal.d/default.yaml | 15 | ||||
-rw-r--r-- | collectors/log2journal/log2journal.d/nginx-combined.yaml | 38 | ||||
-rw-r--r-- | collectors/log2journal/log2journal.d/nginx-json.yaml | 78 | ||||
-rw-r--r-- | collectors/log2journal/log2journal.h | 66 | ||||
-rw-r--r-- | collectors/log2journal/tests.d/default.output | 20 | ||||
-rw-r--r-- | collectors/log2journal/tests.d/full.output | 24 | ||||
-rw-r--r-- | collectors/log2journal/tests.d/full.yaml | 12 | ||||
-rwxr-xr-x | collectors/log2journal/tests.sh | 22 | ||||
-rw-r--r-- | libnetdata/facets/facets.c | 69 | ||||
-rw-r--r-- | libnetdata/simple_hashtable.h | 372 |
17 files changed, 684 insertions, 253 deletions
diff --git a/collectors/log2journal/Makefile.am b/collectors/log2journal/Makefile.am index 578757fc38..b13d2160b3 100644 --- a/collectors/log2journal/Makefile.am +++ b/collectors/log2journal/Makefile.am @@ -13,4 +13,5 @@ log2journalconfigdir=$(libconfigdir)/log2journal.d dist_log2journalconfig_DATA = \ log2journal.d/nginx-combined.yaml \ log2journal.d/nginx-json.yaml \ + log2journal.d/default.yaml \ $(NULL) diff --git a/collectors/log2journal/README.md b/collectors/log2journal/README.md index 2747142d68..bb48378803 100644 --- a/collectors/log2journal/README.md +++ b/collectors/log2journal/README.md @@ -2,7 +2,7 @@ `log2journal` and `systemd-cat-native` can be used to convert a structured log file, such as the ones generated by web servers, into `systemd-journal` entries. -By combining these tools, together with the usual UNIX shell tools you can create advanced log processing pipelines sending any kind of structured text logs to systemd-journald. This is a simple, but powerful and efficient way to handle log processing. +By combining these tools you can create advanced log processing pipelines sending any kind of structured text logs to systemd-journald. This is a simple, but powerful and efficient way to handle log processing. The process involves the usual piping of shell commands, to get and process the log files in realtime. @@ -27,6 +27,11 @@ Let's see the steps: ``` 3. `systemd-cat-native` is a Netdata program. I can send the logs to a local `systemd-journald` (journal namespaces supported), or to a remote `systemd-journal-remote`. + +## YAML configuration + + + ## Real-life example We have an nginx server logging in this format: diff --git a/collectors/log2journal/log2journal-help.c b/collectors/log2journal/log2journal-help.c index 67af516df3..a20615c3c2 100644 --- a/collectors/log2journal/log2journal-help.c +++ b/collectors/log2journal/log2journal-help.c @@ -60,7 +60,7 @@ void log_job_command_line_help(const char *name) { printf(" --file /path/to/file.yaml or -f /path/to/file.yaml\n"); printf(" Read yaml configuration file for instructions.\n"); printf("\n"); - printf(" --config CONFIG_NAME\n"); + printf(" --config CONFIG_NAME or -c CONFIG_NAME\n"); printf(" Run with the internal configuration named CONFIG_NAME.\n"); printf(" Available internal configs:\n"); printf("\n"); diff --git a/collectors/log2journal/log2journal-json.c b/collectors/log2journal/log2journal-json.c index 41f893abc5..2ca294e4db 100644 --- a/collectors/log2journal/log2journal-json.c +++ b/collectors/log2journal/log2journal-json.c @@ -167,7 +167,7 @@ static inline bool json_parse_number(LOG_JSON_STATE *js) { } } -static bool encode_utf8(unsigned codepoint, char **d, size_t *remaining) { +static inline bool encode_utf8(unsigned codepoint, char **d, size_t *remaining) { if (codepoint <= 0x7F) { // 1-byte sequence if (*remaining < 2) return false; // +1 for the null @@ -205,6 +205,56 @@ static bool encode_utf8(unsigned codepoint, char **d, size_t *remaining) { return true; } +size_t parse_surrogate(const char *s, char *d, size_t *remaining) { + if (s[0] != '\\' || (s[1] != 'u' && s[1] != 'U')) { + return 0; // Not a valid Unicode escape sequence + } + + char hex[9] = {0}; // Buffer for the hexadecimal value + unsigned codepoint; + + if (s[1] == 'u') { + // Handle \uXXXX + if (!isxdigit(s[2]) || !isxdigit(s[3]) || !isxdigit(s[4]) || !isxdigit(s[5])) { + return 0; // Not a valid \uXXXX sequence + } + + hex[0] = s[2]; + hex[1] = s[3]; + hex[2] = s[4]; + hex[3] = s[5]; + codepoint = (unsigned)strtoul(hex, NULL, 16); + + if (codepoint >= 0xD800 && codepoint <= 0xDBFF) { + // Possible start of surrogate pair + if (s[6] == '\\' && s[7] == 'u' && isxdigit(s[8]) && isxdigit(s[9]) && + isxdigit(s[10]) && isxdigit(s[11])) { + // Valid low surrogate + unsigned low_surrogate = strtoul(&s[8], NULL, 16); + if (low_surrogate < 0xDC00 || low_surrogate > 0xDFFF) { + return 0; // Invalid low surrogate + } + codepoint = 0x10000 + ((codepoint - 0xD800) << 10) + (low_surrogate - 0xDC00); + return encode_utf8(codepoint, &d, remaining) ? 12 : 0; // \uXXXX\uXXXX + } + } + + // Single \uXXXX + return encode_utf8(codepoint, &d, remaining) ? 6 : 0; + } + else { + // Handle \UXXXXXXXX + for (int i = 2; i < 10; i++) { + if (!isxdigit(s[i])) { + return 0; // Not a valid \UXXXXXXXX sequence + } + hex[i - 2] = s[i]; + } + codepoint = (unsigned)strtoul(hex, NULL, 16); + return encode_utf8(codepoint, &d, remaining) ? 10 : 0; // \UXXXXXXXX + } +} + static inline void copy_newline(LOG_JSON_STATE *js __maybe_unused, char **d, size_t *remaining) { if(*remaining > 3) { *(*d)++ = '\\'; @@ -258,18 +308,12 @@ static inline bool json_parse_string(LOG_JSON_STATE *js) { s++; break; - case 'u': - if(isxdigit(s[1]) && isxdigit(s[2]) && isxdigit(s[3]) && isxdigit(s[4])) { - char b[5] = { - [0] = s[1], - [1] = s[2], - [2] = s[3], - [3] = s[4], - [4] = '\0', - }; - unsigned codepoint = strtoul(b, NULL, 16); - if(encode_utf8(codepoint, &d, &remaining)) { - s += 5; + case 'u': { + size_t old_remaining = remaining; + size_t consumed = parse_surrogate(s - 1, d, &remaining); + if (consumed > 0) { + s += consumed - 1; // -1 because we already incremented s after '\\' + d += old_remaining - remaining; continue; } else { @@ -278,11 +322,6 @@ static inline bool json_parse_string(LOG_JSON_STATE *js) { c = *s++; } } - else { - *d++ = '\\'; - remaining--; - c = *s++; - } break; default: diff --git a/collectors/log2journal/log2journal-params.c b/collectors/log2journal/log2journal-params.c index ca4e2f5860..a7bb3e263c 100644 --- a/collectors/log2journal/log2journal-params.c +++ b/collectors/log2journal/log2journal-params.c @@ -6,22 +6,25 @@ void log_job_init(LOG_JOB *jb) { memset(jb, 0, sizeof(*jb)); - simple_hashtable_init(&jb->hashtable, 32); + simple_hashtable_init_KEY(&jb->hashtable, 32); + hashed_key_set(&jb->line.key, "LINE"); } -static void simple_hashtable_cleanup_allocated(SIMPLE_HASHTABLE *ht) { - for(size_t i = 0; i < ht->size ;i++) { - HASHED_KEY *k = ht->hashtable[i].data; +static void simple_hashtable_cleanup_allocated_keys(SIMPLE_HASHTABLE_KEY *ht) { + SIMPLE_HASHTABLE_FOREACH_READ_ONLY(ht, sl, _KEY) { + HASHED_KEY *k = SIMPLE_HASHTABLE_FOREACH_READ_ONLY_VALUE(sl); if(k && k->flags & HK_HASHTABLE_ALLOCATED) { - hashed_key_cleanup(k); - freez(k); - ht->hashtable[i].data = NULL; - ht->hashtable[i].hash = 0; + // the order of these statements is important! + simple_hashtable_del_slot_KEY(ht, sl); // remove any references to n + hashed_key_cleanup(k); // cleanup the internals of n + freez(k); // free n } } } void log_job_cleanup(LOG_JOB *jb) { + hashed_key_cleanup(&jb->line.key); + if(jb->prefix) { freez((void *) jb->prefix); jb->prefix = NULL; @@ -47,8 +50,8 @@ void log_job_cleanup(LOG_JOB *jb) { txt_cleanup(&jb->rewrites.tmp); txt_cleanup(&jb->filename.current); - simple_hashtable_cleanup_allocated(&jb->hashtable); - simple_hashtable_free(&jb->hashtable); + simple_hashtable_cleanup_allocated_keys(&jb->hashtable); + simple_hashtable_destroy_KEY(&jb->hashtable); // remove references to everything else, to reveal them in valgrind memset(jb, 0, sizeof(*jb)); @@ -346,7 +349,7 @@ bool log_job_command_line_parse_parameters(LOG_JOB *jb, int argc, char **argv) { if (!yaml_parse_file(value, jb)) return false; } - else if (strcmp(param, "--config") == 0) { + else if (strcmp(param, "-c") == 0 || strcmp(param, "--config") == 0) { if (!yaml_parse_config(value, jb)) return false; } @@ -392,7 +395,7 @@ bool log_job_command_line_parse_parameters(LOG_JOB *jb, int argc, char **argv) { // Check if a pattern is set and exactly one pattern is specified if (!jb->pattern) { - log2stderr("Error: Pattern not specified."); + log2stderr("Warning: pattern not specified. Try the default config with: -c default"); log_job_command_line_help(argv[0]); return false; } diff --git a/collectors/log2journal/log2journal-yaml.c b/collectors/log2journal/log2journal-yaml.c index 1b9e823cb7..862e7bf4b7 100644 --- a/collectors/log2journal/log2journal-yaml.c +++ b/collectors/log2journal/log2journal-yaml.c @@ -852,7 +852,7 @@ static bool needs_quotes_in_yaml(const char *str) { static void yaml_print_node(const char *key, const char *value, size_t depth, bool dash) { if(depth > 10) depth = 10; - const char *quote = "\""; + const char *quote = "'"; const char *second_line = NULL; if(value && strchr(value, '\n')) { diff --git a/collectors/log2journal/log2journal.c b/collectors/log2journal/log2journal.c index 5dd98d6837..c3204939cd 100644 --- a/collectors/log2journal/log2journal.c +++ b/collectors/log2journal/log2journal.c @@ -61,37 +61,14 @@ const char journal_key_characters_map[256] = { // ---------------------------------------------------------------------------- -// Function to insert a key into the sorted.keys array while keeping it sorted -void log_job_add_key_sorted(LOG_JOB *jb, HASHED_KEY *newKey) { - size_t i, j; - - // Find the position to insert the new key based on lexicographic order - for (i = 0; i < jb->sorted.used; i++) { - if (strcmp(newKey->key, jb->sorted.keys[i]->key) < 0) { - break; - } - } - - // Shift elements to the right to make space for the new key - for (j = jb->sorted.used; j > i; j--) { - jb->sorted.keys[j] = jb->sorted.keys[j - 1]; - } - - // Insert the new key at the correct position - jb->sorted.keys[i] = newKey; - jb->sorted.used++; -} - static inline HASHED_KEY *get_key_from_hashtable(LOG_JOB *jb, HASHED_KEY *k) { if(k->flags & HK_HASHTABLE_ALLOCATED) return k; if(!k->hashtable_ptr) { HASHED_KEY *ht_key; - SIMPLE_HASHTABLE_SLOT *slot = simple_hashtable_get_slot(&jb->hashtable, k->hash, true); - if(slot->data) { - ht_key = slot->data; - + SIMPLE_HASHTABLE_SLOT_KEY *slot = simple_hashtable_get_slot_KEY(&jb->hashtable, k->hash, true); + if((ht_key = SIMPLE_HASHTABLE_SLOT_DATA(slot))) { if(!(ht_key->flags & HK_COLLISION_CHECKED)) { ht_key->flags |= HK_COLLISION_CHECKED; @@ -109,11 +86,7 @@ static inline HASHED_KEY *get_key_from_hashtable(LOG_JOB *jb, HASHED_KEY *k) { ht_key->hash = k->hash; ht_key->flags = HK_HASHTABLE_ALLOCATED; - slot->hash = ht_key->hash; - slot->data = ht_key; - jb->hashtable.used++; - - log_job_add_key_sorted(jb, ht_key); + simple_hashtable_set_slot_KEY(&jb->hashtable, slot, ht_key->hash, ht_key); } k->hashtable_ptr = ht_key; @@ -158,18 +131,25 @@ static inline void validate_key(LOG_JOB *jb __maybe_unused, HASHED_KEY *k) { // ---------------------------------------------------------------------------- -static inline size_t replace_evaluate_to_buffer(LOG_JOB *jb, HASHED_KEY *k, REPLACE_PATTERN *rp, char *dst, size_t dst_size) { +static inline size_t replace_evaluate_to_buffer(LOG_JOB *jb, HASHED_KEY *k __maybe_unused, REPLACE_PATTERN *rp, char *dst, size_t dst_size) { size_t remaining = dst_size; char *copy_to = dst; for(REPLACE_NODE *node = rp->nodes; node != NULL && remaining > 1; node = node->next) { if(node->is_variable) { - HASHED_KEY *ktmp = get_key_from_hashtable_with_char_ptr(jb, node->name.key); - if(ktmp->value.len) { - size_t copied = copy_to_buffer(copy_to, remaining, ktmp->value.txt, ktmp->value.len); + if(hashed_keys_match(&node->name, &jb->line.key)) { + size_t copied = copy_to_buffer(copy_to, remaining, jb->line.trimmed, jb->line.trimmed_len); copy_to += copied; remaining -= copied; } + else { + HASHED_KEY *ktmp = get_key_from_hashtable_with_char_ptr(jb, node->name.key); + if(ktmp->value.len) { + size_t copied = copy_to_buffer(copy_to, remaining, ktmp->value.txt, ktmp->value.len); + copy_to += copied; + remaining -= copied; + } + } } else { size_t copied = copy_to_buffer(copy_to, remaining, node->name.key, node->name.len); @@ -189,9 +169,14 @@ static inline void replace_evaluate(LOG_JOB *jb, HASHED_KEY *k, REPLACE_PATTERN for(REPLACE_NODE *node = rp->nodes; node != NULL; node = node->next) { if(node->is_variable) { - HASHED_KEY *ktmp = get_key_from_hashtable_with_char_ptr(jb, node->name.key); - if(ktmp->value.len) - txt_expand_and_append(&ht_key->value, ktmp->value.txt, ktmp->value.len); + if(hashed_keys_match(&node->name, &jb->line.key)) + txt_expand_and_append(&ht_key->value, jb->line.trimmed, jb->line.trimmed_len); + + else { + HASHED_KEY *ktmp = get_key_from_hashtable_with_char_ptr(jb, node->name.key); + if(ktmp->value.len) + txt_expand_and_append(&ht_key->value, ktmp->value.txt, ktmp->value.len); + } } else txt_expand_and_append(&ht_key->value, node->name.key, node->name.len); @@ -220,9 +205,14 @@ static inline void replace_evaluate_from_pcre2(LOG_JOB *jb, HASHED_KEY *k, REPLA txt_expand_and_append(&jb->rewrites.tmp, k->value.txt + start_offset, length); } else { - HASHED_KEY *ktmp = get_key_from_hashtable_with_char_ptr(jb, node->name.key); - if(ktmp->value.len) - txt_expand_and_append(&jb->rewrites.tmp, ktmp->value.txt, ktmp->value.len); + if(hashed_keys_match(&node->name, &jb->line.key)) + txt_expand_and_append(&jb->rewrites.tmp, jb->line.trimmed, jb->line.trimmed_len); + + else { + HASHED_KEY *ktmp = get_key_from_hashtable_with_char_ptr(jb, node->name.key); + if(ktmp->value.len) + txt_expand_and_append(&jb->rewrites.tmp, ktmp->value.txt, ktmp->value.len); + } } } else { @@ -299,15 +289,6 @@ static inline void send_key_value_error(LOG_JOB *jb, HASHED_KEY *key, const char printf("\n"); } -static inline void send_key_value_and_rewrite(LOG_JOB *jb, HASHED_KEY *key, const char *value, size_t len) { - HASHED_KEY *ht_key = get_key_from_hashtable(jb, key); - - txt_replace(&ht_key->value, value, len); - ht_key->flags |= HK_VALUE_FROM_LOG; - -// fprintf(stderr, "SET %s=%.*s\n", ht_key->key, (int)ht_key->value.len, ht_key->value.txt); -} - inline void log_job_send_extracted_key_value(LOG_JOB *jb, const char *key, const char *value, size_t len) { HASHED_KEY *ht_key = get_key_from_hashtable_with_char_ptr(jb, key); HASHED_KEY *nk = rename_key(jb, ht_key); @@ -341,8 +322,8 @@ static inline void log_job_process_rewrites(LOG_JOB *jb) { } static inline void send_all_fields(LOG_JOB *jb) { - for(size_t i = 0; i < jb->sorted.used ;i++) { - HASHED_KEY *k = jb->sorted.keys[i]; + SIMPLE_HASHTABLE_SORTED_FOREACH_READ_ONLY(&jb->hashtable, kptr, HASHED_KEY, _KEY) { + HASHED_KEY *k = SIMPLE_HASHTABLE_SORTED_FOREACH_READ_ONLY_VALUE(kptr); if(k->value.len) { // the key exists and has some value @@ -496,11 +477,13 @@ int log_job_run(LOG_JOB *jb) { if(strcmp(jb->pattern, "json") == 0) { json = json_parser_create(jb); + // never fails } else if(strcmp(jb->pattern, "logfmt") == 0) { logfmt = logfmt_parser_create(jb); + // never fails } - else { + else if(strcmp(jb->pattern, "none") != 0) { pcre2 = pcre2_parser_create(jb); if(pcre2_has_error(pcre2)) { log2stderr("%s", pcre2_parser_error(pcre2)); @@ -509,21 +492,25 @@ int log_job_run(LOG_JOB *jb) { } } - char buffer[MAX_LINE_LENGTH]; - char *line; - size_t len; + jb->line.buffer = mallocz(MAX_LINE_LENGTH + 1); + jb->line.size = MAX_LINE_LENGTH + 1; + jb->line.trimmed_len = 0; + jb->line.trimmed = jb->line.buffer; + + while ((jb->line.trimmed = get_next_line(jb, (char *)jb->line.buffer, jb->line.size, &jb->line.trimmed_len))) { + const char *line = jb->line.trimmed; + size_t len = jb->line.trimmed_len; - while ((line = get_next_line(jb, buffer, sizeof(buffer), &len))) { if(jb_switched_filename(jb, line, len)) continue; - bool line_is_matched; + bool line_is_matched = true; if(json) line_is_matched = json_parse_document(json, line); else if(logfmt) line_is_matched = logfmt_parse_document(logfmt, line); - else + else if(pcre2) line_is_matched = pcre2_parse_document(pcre2, line, len); if(!line_is_matched) { @@ -531,7 +518,7 @@ int log_job_run(LOG_JOB *jb) { log2stderr("%s", json_parser_error(json)); else if(logfmt) log2stderr("%s", logfmt_parser_error(logfmt)); - else + else if(pcre2) log2stderr("%s", pcre2_parser_error(pcre2)); if(!jb_send_unmatched_line(jb, line)) @@ -557,6 +544,8 @@ int log_job_run(LOG_JOB *jb) { else if(pcre2) pcre2_parser_destroy(pcre2); + freez((void *)jb->line.buffer); + return 0; } diff --git a/collectors/log2journal/log2journal.d/default.yaml b/collectors/log2journal/log2journal.d/default.yaml new file mode 100644 index 0000000000..d41efc4abb --- /dev/null +++ b/collectors/log2journal/log2journal.d/default.yaml @@ -0,0 +1,15 @@ +pattern: none + +filename: + key: LOG_FILENAME + +inject: + - key: MESSAGE + value: '${LINE}' # a special variable that resolves to the whole line read from the log + + - key: PRIORITY + value: 6 # Valid PRIORITIES: 0=emerg, 1=alert, 2=crit, 3=error, 4=warn, 5=notice, 6=info, 7=debug + + - key: SYSLOG_IDENTIFIER + value: log2journal # the name of the application sending the logs + diff --git a/collectors/log2journal/log2journal.d/nginx-combined.yaml b/collectors/log2journal/log2journal.d/nginx-combined.yaml index 00610f4b7c..003c774d7b 100644 --- a/collectors/log2journal/log2journal.d/nginx-combined.yaml +++ b/collectors/log2journal/log2journal.d/nginx-combined.yaml @@ -37,15 +37,15 @@ rename: # Inject constant fields into the journal logs. inject: - key: SYSLOG_IDENTIFIER - value: "nginx-log" + value: nginx-log # inject PRIORITY is a duplicate of NGINX_STATUS - - key: "PRIORITY" - value: "${NGINX_STATUS}" + - key: PRIORITY + value: '${NGINX_STATUS}' # Inject NGINX_STATUS_FAMILY is a duplicate of NGINX_STATUS - - key: "NGINX_STATUS_FAMILY" - value: "${NGINX_STATUS}" + - key: NGINX_STATUS_FAMILY + value: '${NGINX_STATUS}' # Rewrite the value of fields (including the duplicated ones). # The search pattern can have named groups, and the replace pattern can use @@ -53,30 +53,30 @@ inject: rewrite: # PRIORITY is a duplicate of NGINX_STATUS # Valid PRIORITIES: 0=emerg, 1=alert, 2=crit, 3=error, 4=warn, 5=notice, 6=info, 7=debug - - key: "PRIORITY" - match: "^[123]" + - key: PRIORITY + match: '^[123]' value: 6 - - key: "PRIORITY" - match: "^4" + - key: PRIORITY + match: '^4' value: 5 - - key: "PRIORITY" - match: "^5" + - key: PRIORITY + match: '^5' value: 3 - - key: "PRIORITY" - match: ".*" + - key: PRIORITY + match: '.*' value: 4 # NGINX_STATUS_FAMILY is a duplicate of NGINX_STATUS - - key: "NGINX_STATUS_FAMILY" - match: "^(?<first_digit>[1-5])" - value: "${first_digit}xx" + - key: NGINX_STATUS_FAMILY + match: '^(?<first_digit>[1-5])' + value: '${first_digit}xx' - - key: "NGINX_STATUS_FAMILY" - match: ".*" - value: "UNKNOWN" + - key: NGINX_STATUS_FAMILY + match: '.*' + value: 'UNKNOWN' # Control what to do when input logs do not match the main PCRE2 pattern. unmatched: diff --git a/collectors/log2journal/log2journal.d/nginx-json.yaml b/collectors/log2journal/log2journal.d/nginx-json.yaml index 1ad702da7c..7fdc4be584 100644 --- a/collectors/log2journal/log2journal.d/nginx-json.yaml +++ b/collectors/log2journal/log2journal.d/nginx-json.yaml @@ -12,7 +12,7 @@ filename: key: NGINX_LOG_FILENAME filter: - exclude: "NGINX_BINARY_REMOTE_ADDR" + exclude: '^(NGINX_BINARY_REMOTE_ADDR)$' rename: - new_key: MESSAGE @@ -69,15 +69,15 @@ rename: # Inject constant fields into the journal logs. inject: - key: SYSLOG_IDENTIFIER - value: "nginx-log" + value: nginx-log # inject PRIORITY is a duplicate of NGINX_STATUS - - key: "PRIORITY" - value: "${NGINX_STATUS}" + - key: PRIORITY + value: '${NGINX_STATUS}' # Inject NGINX_STATUS_FAMILY is a duplicate of NGINX_STATUS - - key: "NGINX_STATUS_FAMILY" - value: "${NGINX_STATUS}" + - key: NGINX_STATUS_FAMILY + value: '${NGINX_STATUS}' # Rewrite the value of fields (including the duplicated ones). @@ -87,69 +87,69 @@ rewrite: # a ? means it has query string, everything else means it does not - key: NGINX_HAS_QUERY_STRING match: '^\?$' - value: "yes" + value: yes - key: NGINX_HAS_QUERY_STRING - match: ".*" - value: "no" + match: '.*' + value: no # 'on' means it was HTTPS, everything else means it was not - key: NGINX_HTTPS - match: "^on$" - value: "yes" + match: '^on$' + value: yes - key: NGINX_HTTPS - match: ".*" - value: "no" + match: '.*' + value: no # 'p' means it was pipelined, everything else means it was not - key: NGINX_PIPELINED - match: "^p$" - value: "yes" + match: '^p$' + value: yes - key: NGINX_PIPELINED - match: ".*" - value: "no" + match: '.*' + value: no # zero means client sent a certificate and it was verified, non-zero means otherwise - key: NGINX_PROXY_PROTOCOL_TLV_SSL_VERIFY - match: "^0$" - value: "yes" + match: '^0$' + value: yes - key: NGINX_PROXY_PROTOCOL_TLV_SSL_VERIFY - match: ".*" - value: "no" + match: '.*' + value: no # 'OK' means request completed, everything else means it didn't - key: NGINX_REQUEST_COMPLETION - match: "^OK$" - value: "completed" + match: '^OK$' + value: 'completed' - key: NGINX_REQUEST_COMPLETION - match: ".*" - value: "not completed" + match: '.*' + value: 'not completed' # PRIORTY is a duplicate of NGINX_STATUS # Valid PRIORITIES: 0=emerg, 1=alert, 2=crit, 3=error, 4=warn, 5=notice, 6=info, 7=debug - - key: "PRIORITY" - match: "^[123]" + - key: PRIORITY + match: '^[123]' value: 6 - - key: "PRIORITY" - match: "^4" + - key: PRIORITY + match: '^4' value: 5 - - key: "PRIORITY" - match: "^5" + - key: PRIORITY + match: '^5' value: 3 - - key: "PRIORITY" - match: ".*" + - key: PRIORITY + match: '.*' value: 4 # NGINX_STATUS_FAMILY is a duplicate of NGINX_STATUS - - key: "NGINX_STATUS_FAMILY" - match: "^(?<first_digit>[1-5])" - value: "${first_digit}xx" + - key: NGINX_STATUS_FAMILY + match: '^(?<first_digit>[1-5])' + value: '${first_digit}xx' - - key: "NGINX_STATUS_FAMILY" - match: ".*" - value: "UNKNOWN" + - key: NGINX_STATUS_FAMILY + match: '.*' + value: 'UNKNOWN' # Control what to do when input logs do not match the main PCRE2 pattern. unmatched: diff --git a/collectors/log2journal/log2journal.h b/collectors/log2journal/log2journal.h index f34d3db177..834a5b135d 100644 --- a/collectors/log2journal/log2journal.h +++ b/collectors/log2journal/log2journal.h @@ -13,6 +13,7 @@ #include <stdbool.h> #include <string.h> #include <ctype.h> +#include <math.h> #include <stdarg.h> #include <assert.h> @@ -42,11 +43,23 @@ static inline void *mallocz(size_t size) { } static inline void *callocz(size_t elements, size_t size) { - void *ptr = mallocz(elements * size); - memset(ptr, 0, elements * size); + void *ptr = calloc(elements, size); + if (!ptr) { + log2stderr("Fatal Error: Memory allocation failed. Requested size: %zu bytes.", elements * size); + exit(EXIT_FAILURE); + } return ptr; } +static inline void *reallocz(void *ptr, size_t size) { + void *new_ptr = realloc(ptr, size); + if (!new_ptr) { + log2stderr("Fatal Error: Memory reallocation failed. Requested size: %zu bytes.", size); + exit(EXIT_FAILURE); + } + return new_ptr; +} + static inline char *strdupz(const char *s) { char *ptr = strdup(s); if (!ptr) { @@ -74,7 +87,6 @@ static inline void freez(void *ptr) { #define XXH_INLINE_ALL #include "../../libnetdata/xxhash.h" -#include "../../libnetdata/simple_hashtable.h" #define PCRE2_CODE_UNIT_WIDTH 8 #include <pcre2.h> @@ -83,13 +95,29 @@ static inline void freez(void *ptr) { #include <yaml.h> #endif +// ---------------------------------------------------------------------------- +// hashtable for HASHED_KEY + +// cleanup hashtable defines +#undef SIMPLE_HASHTABLE_SORT_FUNCTION +#undef SIMPLE_HASHTABLE_VALUE_TYPE +#undef SIMPLE_HASHTABLE_NAME +#undef NETDATA_SIMPLE_HASHTABLE_H + +struct hashed_key; +static inline int compare_keys(struct hashed_key *k1, struct hashed_key *k2); +#define SIMPLE_HASHTABLE_SORT_FUNCTION compare_keys +#define SIMPLE_HASHTABLE_VALUE_TYPE struct hashed_key +#define SIMPLE_HASHTABLE_NAME _KEY +#include "../../libnetdata/simple_hashtable.h" + +// ---------------------------------------------------------------------------- + #define MAX_OUTPUT_KEYS 1024 #define MAX_LINE_LENGTH (1024 * 1024) -#define MAX_KEY_DUPS (MAX_OUTPUT_KEYS / 2) #define MAX_INJECTIONS (MAX_OUTPUT_KEYS / 2) #define MAX_REWRITES (MAX_OUTPUT_KEYS / 2) #define MAX_RENAMES (MAX_OUTPUT_KEYS / 2) -#define MAX_KEY_DUPS_KEYS 20 #define JOURNAL_MAX_KEY_LEN 64 // according to systemd-journald #define JOURNAL_MAX_VALUE_LEN (48 * 1024) // according to systemd-journald @@ -178,13 +206,7 @@ static inline void txt_expand_and_append(TEXT *t, const char *s, size_t len) { if(new_size < t->size * 2) new_size = t->size * 2; - char *b = mallocz(new_size); - if(t->txt) { - memcpy(b, t->txt, t->len); - freez(t->txt); - } - - t->txt = b; + t->txt = reallocz(t->txt, new_size); t->size = new_size; } @@ -213,9 +235,6 @@ typedef enum __attribute__((__packed__)) { HK_RENAMES_CHECKED = (1 << 4), // we checked once if there are renames on this key HK_HAS_RENAMES = (1 << 5), // and we found there is a rename rule related to it - HK_DUPS_CHECKED = (1 << 6), // we checked once if there are duplications for this key - HK_HAS_DUPS = (1 << 7), // and we found there are duplication related to it - // ephemeral flags - they are unset at the end of each log line HK_VALUE_FROM_LOG = (1 << 14), // the value of this key has been read from the log (or from injection, duplication) @@ -268,6 +287 |