summaryrefslogtreecommitdiffstats
path: root/libnetdata
diff options
context:
space:
mode:
authorCosta Tsaousis <costa@netdata.cloud>2023-11-28 02:05:34 +0200
committerGitHub <noreply@github.com>2023-11-28 00:05:34 +0000
commita64c8cdb43bd98ac9c8165e14a621881c730dde5 (patch)
tree0504bc2d5698dc9c6a1b6222491f60110542d7f1 /libnetdata
parentc8c25a35cd1b668ac8fb6b654bd370c1aef17477 (diff)
log2journal moved to collectors (#16481)
* log2journal moved to collectors * split log2journal into multiple files * update the path xxh headers * json support for log2journal * logfmt support * fix warning * fix logfmt prefix * added support for UTF-8 escape sequences in json values
Diffstat (limited to 'libnetdata')
-rw-r--r--libnetdata/log/Makefile.am1
-rw-r--r--libnetdata/log/log2journal.c2135
-rw-r--r--libnetdata/log/log2journal.md670
3 files changed, 0 insertions, 2806 deletions
diff --git a/libnetdata/log/Makefile.am b/libnetdata/log/Makefile.am
index 8a178bfed2..161784b8f6 100644
--- a/libnetdata/log/Makefile.am
+++ b/libnetdata/log/Makefile.am
@@ -5,5 +5,4 @@ MAINTAINERCLEANFILES = $(srcdir)/Makefile.in
dist_noinst_DATA = \
README.md \
- log2journal.md \
$(NULL)
diff --git a/libnetdata/log/log2journal.c b/libnetdata/log/log2journal.c
deleted file mode 100644
index 173a7af5bd..0000000000
--- a/libnetdata/log/log2journal.c
+++ /dev/null
@@ -1,2135 +0,0 @@
-// SPDX-License-Identifier: GPL-3.0-or-later
-
-// only for PACKAGE_VERSION
-#include "config.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdbool.h>
-#include <string.h>
-#include <ctype.h>
-#include <stdarg.h>
-
-#define XXH_INLINE_ALL
-#include "../xxhash.h"
-
-#define PCRE2_CODE_UNIT_WIDTH 8
-#include <pcre2.h>
-
-#ifdef HAVE_LIBYAML
-#include <yaml.h>
-#endif
-
-#define MAX_OUTPUT_KEYS 1024
-#define OVECCOUNT (MAX_OUTPUT_KEYS * 3) // should be a multiple of 3
-#define MAX_LINE_LENGTH (1024 * 1024)
-#define MAX_KEY_DUPS (MAX_OUTPUT_KEYS / 2)
-#define MAX_INJECTIONS (MAX_OUTPUT_KEYS / 2)
-#define MAX_REWRITES (MAX_OUTPUT_KEYS / 2)
-#define MAX_KEY_DUPS_KEYS 20
-
-#define MAX_KEY_LEN 64 // according to systemd-journald
-#define MAX_VALUE_LEN (48 * 1024) // according to systemd-journald
-
-struct key_rewrite;
-static pcre2_code *jb_compile_pcre2_pattern(const char *pattern);
-static bool parse_replacement_pattern(struct key_rewrite *rw);
-
-#define YAML_CONFIG_NGINX_COMBINED \
- "# Netdata log2journal Configuration Template\n" \
- "# The following parses nginx log files using the combined format.\n" \
- "\n" \
- "# The PCRE2 pattern to match log entries and give names to the fields.\n" \
- "# The journal will have these names, so follow their rules. You can\n" \
- "# initiate an extended PCRE2 pattern by starting the pattern with (?x)\n" \
- "pattern: |\n" \
- " (?x) # Enable PCRE2 extended mode\n" \
- " ^\n" \
- " (?<NGINX_REMOTE_ADDR>[^ ]+) \\s - \\s # NGINX_REMOTE_ADDR\n" \
- " (?<NGINX_REMOTE_USER>[^ ]+) \\s # NGINX_REMOTE_USER\n" \
- " \\[\n" \
- " (?<NGINX_TIME_LOCAL>[^\\]]+) # NGINX_TIME_LOCAL\n" \
- " \\]\n" \
- " \\s+ \"\n" \
- " (?<MESSAGE>\n" \
- " (?<NGINX_METHOD>[A-Z]+) \\s+ # NGINX_METHOD\n" \
- " (?<NGINX_URL>[^ ]+) \\s+\n" \
- " HTTP/(?<NGINX_HTTP_VERSION>[^\"]+)\n" \
- " )\n" \
- " \" \\s+\n" \
- " (?<NGINX_STATUS>\\d+) \\s+ # NGINX_STATUS\n" \
- " (?<NGINX_BODY_BYTES_SENT>\\d+) \\s+ # NGINX_BODY_BYTES_SENT\n" \
- " \"(?<NGINX_HTTP_REFERER>[^\"]*)\" \\s+ # NGINX_HTTP_REFERER\n" \
- " \"(?<NGINX_HTTP_USER_AGENT>[^\"]*)\" # NGINX_HTTP_USER_AGENT\n" \
- "\n" \
- "# When log2journal can detect the filename of each log entry (tail gives it\n" \
- "# only when it tails multiple files), this key will be used to send the\n" \
- "# filename to the journals.\n" \
- "filename:\n" \
- " key: NGINX_LOG_FILENAME\n" \
- "\n" \
- "# Duplicate fields under a different name. You can duplicate multiple fields\n" \
- "# to a new one and then use rewrite rules to change its value.\n" \
- "duplicate:\n" \
- "\n" \
- " # we insert the field PRIORITY as a copy of NGINX_STATUS.\n" \
- " - key: PRIORITY\n" \
- " values_of:\n" \
- " - NGINX_STATUS\n" \
- "\n" \
- " # we inject the field NGINX_STATUS_FAMILY as a copy of NGINX_STATUS.\n" \
- " - key: NGINX_STATUS_FAMILY\n" \
- " values_of: \n" \
- " - NGINX_STATUS\n" \
- "\n" \
- "# Inject constant fields into the journal logs.\n" \
- "inject:\n" \
- " - key: SYSLOG_IDENTIFIER\n" \
- " value: \"nginx-log\"\n" \
- "\n" \
- "# Rewrite the value of fields (including the duplicated ones).\n" \
- "# The search pattern can have named groups, and the replace pattern can use\n" \
- "# them as ${name}.\n" \
- "rewrite:\n" \
- " # PRIORTY is a duplicate of NGINX_STATUS\n" \
- " # Valid PRIORITIES: 0=emerg, 1=alert, 2=crit, 3=error, 4=warn, 5=notice, 6=info, 7=debug\n" \
- " - key: \"PRIORITY\"\n" \
- " search: \"^[123]\"\n" \
- " replace: 6\n" \
- "\n" \
- " - key: \"PRIORITY\"\n" \
- " search: \"^4\"\n" \
- " replace: 5\n" \
- "\n" \
- " - key: \"PRIORITY\"\n" \
- " search: \"^5\"\n" \
- " replace: 3\n" \
- "\n" \
- " - key: \"PRIORITY\"\n" \
- " search: \".*\"\n" \
- " replace: 4\n" \
- " \n" \
- " # NGINX_STATUS_FAMILY is a duplicate of NGINX_STATUS\n" \
- " - key: \"NGINX_STATUS_FAMILY\"\n" \
- " search: \"^(?<first_digit>[1-5])\"\n" \
- " replace: \"${first_digit}xx\"\n" \
- "\n" \
- " - key: \"NGINX_STATUS_FAMILY\"\n" \
- " search: \".*\"\n" \
- " replace: \"UNKNOWN\"\n" \
- "\n" \
- "# Control what to do when input logs do not match the main PCRE2 pattern.\n" \
- "unmatched:\n" \
- " # The journal key to log the PCRE2 error message to.\n" \
- " # Set this to MESSAGE, so you to see the error in the log.\n" \
- " key: MESSAGE\n" \
- " \n" \
- " # Inject static fields to the unmatched entries.\n" \
- " # Set PRIORITY=1 (alert) to help you spot unmatched entries in the logs.\n" \
- " inject:\n" \
- " - key: PRIORITY\n" \
- " value: 1\n" \
- "\n"
-
-void display_help(const char *name) {
- printf("\n");
- printf("Netdata log2journal " PACKAGE_VERSION "\n");
- printf("\n");
- printf("Convert structured log input to systemd Journal Export Format.\n");
- printf("\n");
- printf("Using PCRE2 patterns, extract the fields from structured logs on the standard\n");
- printf("input, and generate output according to systemd Journal Export Format.\n");
- printf("\n");
- printf("Usage: %s [OPTIONS] PATTERN\n", name);
- printf("\n");
- printf("Options:\n");
- printf("\n");
- printf(" --file /path/to/file.yaml\n");
- printf(" Read yaml configuration file for instructions.\n");
- printf("\n");
- printf(" --config CONFIG_NAME\n");
- printf(" Run with the internal configuration named CONFIG_NAME\n");
- printf(" Available internal configs: nginx-combined\n");
- printf("\n");
- printf(" --show-config\n");
- printf(" Show the configuration in yaml format before starting the job.\n");
- printf(" This is also an easy way to convert command line parameters to yaml.\n");
- printf("\n");
- printf(" --filename-key KEY\n");
- printf(" Add a field with KEY as the key and the current filename as value.\n");
- printf(" Automatically detects filenames when piped after 'tail -F',\n");
- printf(" and tail matches multiple filenames.\n");
- printf(" To inject the filename when tailing a single file, use --inject.\n");
- printf("\n");
- printf(" --unmatched-key KEY\n");
- printf(" Include unmatched log entries in the output with KEY as the field name.\n");
- printf(" Use this to include unmatched entries to the output stream.\n");
- printf(" Usually it should be set to --unmatched-key=MESSAGE so that the\n");
- printf(" unmatched entry will appear as the log message in the journals.\n");
- printf(" Use --inject-unmatched to inject additional fields to unmatched lines.\n");
- printf("\n");
- printf(" --duplicate TARGET=KEY1[,KEY2[,KEY3[,...]]\n");
- printf(" Create a new key called TARGET, duplicating the values of the keys\n");
- printf(" given. Useful for further processing. When multiple keys are given,\n");
- printf(" their values are separated by comma.\n");
- printf(" Up to %d duplications can be given on the command line, and up to\n", MAX_KEY_DUPS);
- printf(" %d keys per duplication command are allowed.\n", MAX_KEY_DUPS_KEYS);
- printf("\n");
- printf(" --inject LINE\n");
- printf(" Inject constant fields to the output (both matched and unmatched logs).\n");
- printf(" --inject entries are added to unmatched lines too, when their key is\n");
- printf(" not used in --inject-unmatched (--inject-unmatched override --inject).\n");
- printf(" Up to %d fields can be injected.\n", MAX_INJECTIONS);
- printf("\n");
- printf(" --inject-unmatched LINE\n");
- printf(" Inject lines into the output for each unmatched log entry.\n");
- printf(" Usually, --inject-unmatched=PRIORITY=3 is needed to mark the unmatched\n");
- printf(" lines as errors, so that they can easily be spotted in the journals.\n");
- printf(" Up to %d such lines can be injected.\n", MAX_INJECTIONS);
- printf("\n");
- printf(" --rewrite KEY=/SearchPattern/ReplacePattern\n");
- printf(" Apply a rewrite rule to the values of a specific key.\n");
- printf(" The first character after KEY= is the separator, which should also\n");
- printf(" be used between the search pattern and the replacement pattern.\n");
- printf(" The search pattern is a PCRE2 regular expression, and the replacement\n");
- printf(" pattern supports literals and named capture groups from the search pattern.\n");
- printf(" Example:\n");
- printf(" --rewrite DATE=/^(?<year>\\d{4})-(?<month>\\d{2})-(?<day>\\d{2})$/\n");
- printf(" ${day}/${month}/${year}\n");
- printf(" This will rewrite dates in the format YYYY-MM-DD to DD/MM/YYYY.\n");
- printf("\n");
- printf(" Only one rewrite rule is applied per key; the sequence of rewrites stops\n");
- printf(" for the key once a rule matches it. This allows providing a sequence of\n");
- printf(" independent rewriting rules for the same key, matching the different values\n");
- printf(" the key may get, and also provide a catch-all rewrite rule at the end of the\n");
- printf(" sequence for setting the key value if no other rule matched it.\n");
- printf("\n");
- printf(" The combination of duplicating keys with the values of multiple other keys\n");
- printf(" combined with multiple rewrite rules, allows creating complex rules for\n");
- printf(" rewriting key values.\n");
- printf("\n");
- printf(" Up to %d rewriting rules are allowed.\n", MAX_REWRITES);
- printf("\n");
- printf(" -h, --help\n");
- printf(" Display this help and exit.\n");
- printf("\n");
- printf(" PATTERN\n");
- printf(" PATTERN should be a valid PCRE2 regular expression.\n");
- printf(" RE2 regular expressions (like the ones usually used in Go applications),\n");
- printf(" are usually valid PCRE2 patterns too.\n");
- printf(" Regular expressions without named groups are ignored.\n");
- printf("\n");
- printf("The program accepts all parameters as both --option=value and --option value.\n");
- printf("\n");
- printf("The maximum line length accepted is %d characters.\n", MAX_LINE_LENGTH);
- printf("The maximum number of fields in the PCRE2 pattern is %d.\n", OVECCOUNT / 3);
- printf("\n");
- printf("PIPELINE AND SEQUENCE OF PROCESSING\n");
- printf("\n");
- printf("This is a simple diagram of the pipeline taking place:\n");
- printf("\n");
- printf(" +---------------------------------------------------+\n");
- printf(" | INPUT |\n");
- printf(" +---------------------------------------------------+\n");
- printf(" v v\n");
- printf(" +---------------------------------+ |\n");
- printf(" | EXTRACT FIELDS AND VALUES | |\n");
- printf(" +---------------------------------+ |\n");
- printf(" v v |\n");
- printf(" +---------------+ | |\n");
- printf(" | DUPLICATE | | |\n");
- printf(" | create fields | | |\n");
- printf(" | with values | | |\n");
- printf(" +---------------+ | |\n");
- printf(" v v v\n");
- printf(" +---------------------------------+ +--------------+\n");
- printf(" | REWRITE PIPELINES | | INJECT |\n");
- printf(" | altering the values | | constants |\n");
- printf(" +---------------------------------+ +--------------+\n");
- printf(" v v\n");
- printf(" +---------------------------------------------------+\n");
- printf(" | OUTPUT |\n");
- printf(" +---------------------------------------------------+\n");
- printf("\n");
- printf("JOURNAL FIELDS RULES (enforced by systemd-journald)\n");
- printf("\n");
- printf(" - field names can be up to 64 characters\n");
- printf(" - the only allowed field characters are A-Z, 0-9 and underscore\n");
- printf(" - the first character of fields cannot be a digit\n");
- printf(" - protected journal fields start with underscore:\n");
- printf(" * they are accepted by systemd-journal-remote\n");
- printf(" * they are NOT accepted by a local systemd-journald\n");
- printf("\n");
- printf(" For best results, always include these fields:\n");
- printf("\n");
- printf(" MESSAGE=TEXT\n");
- printf(" The MESSAGE is the body of the log entry.\n");
- printf(" This field is what we usually see in our logs.\n");
- printf("\n");
- printf(" PRIORITY=NUMBER\n");
- printf(" PRIORITY sets the severity of the log entry.\n");
- printf(" 0=emerg, 1=alert, 2=crit, 3=err, 4=warn, 5=notice, 6=info, 7=debug\n");
- printf(" - Emergency events (0) are usually broadcast to all terminals.\n");
- printf(" - Emergency, alert, critical, and error (0-3) are usually colored red.\n");
- printf(" - Warning (4) entries are usually colored yellow.\n");
- printf(" - Notice (5) entries are usually bold or have a brighter white color.\n");
- printf(" - Info (6) entries are the default.\n");
- printf(" - Debug (7) entries are usually grayed or dimmed.\n");
- printf("\n");
- printf(" SYSLOG_IDENTIFIER=NAME\n");
- printf(" SYSLOG_IDENTIFIER sets the name of application.\n");
- printf(" Use something descriptive, like: SYSLOG_IDENTIFIER=nginx-logs\n");
- printf("\n");
- printf("You can find the most common fields at 'man systemd.journal-fields'.\n");
- printf("\n");
- printf("Example YAML file:\n\n"
- "--------------------------------------------------------------------------------\n"
- "%s"
- "--------------------------------------------------------------------------------\n"
- "\n",
- YAML_CONFIG_NGINX_COMBINED);
-}
-
-// ----------------------------------------------------------------------------
-// logging
-
-// enable the compiler to check for printf like errors on our log2stderr() function
-static void log2stderr(const char *format, ...) __attribute__ ((format(__printf__, 1, 2)));
-static void log2stderr(const char *format, ...) {
- va_list args;
- va_start(args, format);
- vfprintf(stderr, format, args);
- va_end(args);
- fprintf(stderr, "\n");
-}
-
-// ----------------------------------------------------------------------------
-// allocation functions abstraction
-
-void *mallocz(size_t size) {
- void *ptr = malloc(size);
- if (!ptr) {
- log2stderr("Fatal Error: Memory allocation failed. Requested size: %zu bytes.", size);
- exit(EXIT_FAILURE);
- }
- return ptr;
-}
-
-char *strdupz(const char *s) {
- char *ptr = strdup(s);
- if (!ptr) {
- log2stderr("Fatal Error: Memory allocation failed in strdup.");
- exit(EXIT_FAILURE);
- }
- return ptr;
-}
-
-char *strndupz(const char *s, size_t n) {
- char *ptr = strndup(s, n);
- if (!ptr) {
- log2stderr("Fatal Error: Memory allocation failed in strndup. Requested size: %zu bytes.", n);
- exit(EXIT_FAILURE);
- }
- return ptr;
-}
-
-void freez(void *ptr) {
- if (ptr)
- free(ptr);
-}
-
-// ----------------------------------------------------------------------------
-
-size_t copy_to_buffer(char *dst, size_t dst_size, const char *src, size_t src_len) {
- if(dst_size < 2) {
- if(dst_size == 1)
- *dst = '\0';
-
- return 0;
- }
-
- if(src_len <= dst_size - 1) {
- memcpy(dst, src, src_len);
- dst[src_len] = '\0';
- return src_len;
- }
- else {
- memcpy(dst, src, dst_size - 1);
- dst[dst_size - 1] = '\0';
- return dst_size - 1;
- }
-}
-
-// ----------------------------------------------------------------------------
-
-typedef struct txt {
- char *s;
- size_t size;
-} TEXT;
-
-static void txt_replace(TEXT *txt, const char *s, size_t len) {
- if(!s || !*s || len == 0) {
- s = "";
- len = 0;
- }
-
- if(len + 1 <= txt->size) {
- // the existing value allocation, fits our value
-
- memcpy(txt->s, s, len);
- txt->s[len] = '\0';
- }
- else {
- // no existing value allocation, or too small for our value
-
- if(txt->s)
- freez(txt->s);
-
- txt->s = strndupz(s, len);
- txt->size = len + 1;
- }
-}
-
-// ----------------------------------------------------------------------------
-
-typedef struct key_value {
- char key[MAX_KEY_LEN + 1];
- TEXT value;
- bool on_unmatched;
-} KEY_VALUE;
-
-void key_value_replace(KEY_VALUE *kv, const char *key, size_t key_len, const char *value, size_t value_len) {
- copy_to_buffer(kv->key, sizeof(kv->key), key, key_len);
- txt_replace(&kv->value, value, value_len);
-}
-
-// ----------------------------------------------------------------------------
-
-struct key_dup {
- XXH64_hash_t hash;
- char *target;
- char *keys[MAX_KEY_DUPS_KEYS];
- TEXT values[MAX_KEY_DUPS_KEYS];
- size_t used;
- bool exposed;
-};
-
-struct replacement_node {
- bool is_variable;
- const char *s;
- size_t len;
- struct replacement_node *next;
-};
-
-struct key_rewrite {
- XXH64_hash_t hash;
- char *key;
- char *search_pattern;
- char *replace_pattern;
- pcre2_code *re;
- pcre2_match_data *match_data;
- struct replacement_node *nodes;
-};
-
-struct log_job {
- bool show_config;
-
- const char *pattern;
-
- struct {
- const char *key;
- char current[FILENAME_MAX + 1];
- bool last_line_was_empty;
- } filename;
-
- struct {
- KEY_VALUE keys[MAX_INJECTIONS];
- size_t used;
- } injections;
-
- struct {
- const char *key;
- struct {
- KEY_VALUE keys[MAX_INJECTIONS];
- size_t used;
- } injections;
- } unmatched;
-
- struct {
- struct key_dup array[MAX_KEY_DUPS];
- size_t used;
- } dups;
-
- struct {
- struct key_rewrite array[MAX_REWRITES];
- size_t used;
- } rewrites;
-};
-
-static bool log_job_add_filename_key(struct log_job *jb, const char *key, size_t key_len) {
- if(!key || !*key) {
- log2stderr("filename key cannot be empty.");
- return false;
- }
-
- if(jb->filename.key)
- freez((char*)jb->filename.key);
-
- jb->filename.key = strndupz(key, key_len);
-
- return true;
-}
-
-static bool log_job_add_injection(struct log_job *jb, const char *key, size_t key_len, const char *value, size_t value_len, bool unmatched) {
- if (unmatched) {
- if (jb->unmatched.injections.used >= MAX_INJECTIONS) {
- log2stderr("Error: too many unmatched injections. You can inject up to %d lines.", MAX_INJECTIONS);
- return false;
- }
- }
- else {
- if (jb->injections.used >= MAX_INJECTIONS) {
- log2stderr("Error: too many injections. You can inject up to %d lines.", MAX_INJECTIONS);
- return false;
- }
- }
-
- if (unmatched) {
- key_value_replace(&jb->unmatched.injections.keys[jb->unmatched.injections.used++],
- key, key_len,
- value, value_len);
- } else {
- key_value_replace(&jb->injections.keys[jb->injections.used++],
- key, key_len,
- value, value_len);
- }
-
- return true;
-}
-
-static bool log_job_add_rewrite(struct log_job *jb, const char *key, const char *search_pattern, const char *replace_pattern) {
- pcre2_code *re = jb_compile_pcre2_pattern(search_pattern);
- if (!re) {
- return false;
- }
-
- struct key_rewrite *rw = &jb->rewrites.array[jb->rewrites.used++];
- rw->key = strdupz(key);
- rw->hash = XXH3_64bits(rw->key, strlen(rw->key));
- rw->search_pattern = strdupz(search_pattern);
- rw->replace_pattern = strdupz(replace_pattern);
- rw->re = re;
- rw->match_data = pcre2_match_data_create_from_pattern(rw->re, NULL);
-
- // Parse the replacement pattern and create the linked list
- if (!parse_replacement_pattern(rw)) {
- pcre2_match_data_free(rw->match_data);
- pcre2_code_free(rw->re);
- freez(rw->key);
- freez(rw->search_pattern);
- freez(rw->replace_pattern);
- jb->rewrites.used--;
- return false;
- }
-
- return true;
-}
-
-void jb_cleanup(struct log_job *jb) {
- for(size_t i = 0; i < jb->injections.used ;i++) {
- if(jb->injections.keys[i].value.s)
- freez(jb->injections.keys[i].value.s);
- }
-
- for(size_t i = 0; i < jb->unmatched.injections.used ;i++) {
- if(jb->unmatched.injections.keys[i].value.s)
- freez(jb->unmatched.injections.keys[i].value.s);
- }
-
- for(size_t i = 0; i < jb->dups.used ;i++) {
- struct key_dup *kd = &jb->dups.array[i];
-
- if(kd->target)
- freez(kd->target);
-
- for(size_t j = 0; j < kd->used ; j++) {
- if (kd->keys[j])
- freez(kd->keys[j]);
-
- if (kd->values[j].s)
- freez(kd->values[j].s);
- }
- }
-
- for(size_t i = 0; i < jb->rewrites.used; i++) {
- struct key_rewrite *rw = &jb->rewrites.array[i];
-
- if (rw->key)
- freez(rw->key);
-
- if (rw->search_pattern)
- freez(rw->search_pattern);
-
- if (rw->replace_pattern)
- freez(rw->replace_pattern);
-
- if(rw->match_data)
- pcre2_match_data_free(rw->match_data);
-
- if (rw->re)
- pcre2_code_free(rw->re);
-
- // Cleanup for replacement nodes linked list
- struct replacement_node *current = rw->nodes;
- while (current != NULL) {
- struct replacement_node *next = current->next;
-
- if (current->s)
- freez((void *)current->s);
-
- freez(current);
- current = next;
- }
- }
-
- memset(jb, 0, sizeof(*jb));
-}
-
-// ----------------------------------------------------------------------------
-// PCRE2
-
-static pcre2_code *jb_compile_pcre2_pattern(const char *pattern) {
- int error_number;
- PCRE2_SIZE error_offset;
- PCRE2_SPTR pattern_ptr = (PCRE2_SPTR)pattern;
-
- pcre2_code *re = pcre2_compile(pattern_ptr, PCRE2_ZERO_TERMINATED, 0, &error_number, &error_offset, NULL);
- if (re == NULL) {
- PCRE2_UCHAR errbuf[1024];
- pcre2_get_error_message(error_number, errbuf, sizeof(errbuf));
- log2stderr("PCRE2 compilation failed at offset %d: %s", (int)error_offset, errbuf);
- log2stderr("Check for common regex syntax errors or unsupported PCRE2 patterns.");
- return NULL;
- }
-
- return re;
-}
-
-static inline bool jb_pcre2_match(pcre2_code *re, pcre2_match_data *match_data, char *line, size_t len, bool log) {
- int rc = pcre2_match(re, (PCRE2_SPTR)line, len, 0, 0, match_data, NULL);
- if(rc < 0) {
- PCRE2_UCHAR errbuf[1024];
- pcre2_get_error_message(rc, errbuf, sizeof(errbuf));
-
- if(log)
- log2stderr("PCRE2 error %d: %s on: %s", rc, errbuf, line);
-
- return false;
- }
-
- return true;
-}
-
-// ----------------------------------------------------------------------------
-
-static char *rewrite_value(struct log_job *jb, const char *key, XXH64_hash_t hash, const char *value, size_t value_len) {
- static __thread char rewritten_value[MAX_VALUE_LEN + 1];
-
- for (size_t i = 0; i < jb->rewrites.used; i++) {
- struct key_rewrite *rw = &jb->rewrites.array[i];
-
- if (rw->hash == hash && strcmp(rw->key, key) == 0) {
- if (!jb_pcre2_match(rw->re, rw->match_data, (char *)value, value_len, false)) {
- continue; // No match found, skip to next rewrite rule
- }
-
- PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(rw->match_data);
-
- char *buffer = rewritten_value;
- size_t buffer_remaining = sizeof(rewritten_value);
-
- // Iterate through the linked list of replacement nodes
- for (struct replacement_node *node = rw->nodes; node != NULL; node = node->next) {
- if (node->is_variable) {
- uint32_t groupnumber = pcre2_substring_number_from_name(rw->re, (PCRE2_SPTR)node->s);
- PCRE2_SIZE start_offset = ovector[2 * groupnumber];
- PCRE2_SIZE end_offset = ovector[2 * groupnumber + 1];
- PCRE2_SIZE length = end_offset - start_offset;
-
- size_t copied = copy_to_buffer(buffer, buffer_remaining, value + start_offset, length);
- buffer += copied;
- buffer_remaining -= copied;
- }
- else {
- size_t len = node->len;
- size_t copied = copy_to_buffer(buffer, buffer_remaining, node->s, len);
- buffer += copied;
- buffer_remaining -= copied;
- }
- }
-
- return rewritten_value;
- }
- }
-
- return NULL;
-}
-
-// ----------------------------------------------------------------------------
-
-static inline void send_key_value_error(const char *key, const char *format, ...) __attribute__ ((format(__printf__, 2, 3)));
-static inline void send_key_value_error(const char *key, const char *format, ...) {
- printf("%s=", key);
- va_list args;
- va_start(args, format);
- vprintf(format, args);
- va_end(args);
- printf("\n");
-}
-
-static inline void send_key_value_and_rewrite(struct log_job *jb, const char *key, XXH64_hash_t hash, const char *value, size_t len) {
- char *rewritten = rewrite_value(jb, key, hash, value, len);
- if(!rewritten)
- printf("%s=%.*s\n", key, (int)len, value);
- else
- printf("%s=%s\n", key, rewritten);
-}
-
-static inline void send_key_value_constant(struct log_job *jb, const char *key, const char *value) {
- printf("%s=%s\n", key, value);
-}
-
-// ----------------------------------------------------------------------------
-
-static struct key_dup *add_duplicate_target_to_job(struct log_job *jb, const char *target, size_t target_len) {
- if (jb->dups.used >= MAX_KEY_DUPS) {
- log2stderr("Error: Too many duplicates defined. Maximum allowed is %d.", MAX_KEY_DUPS);
- return NULL;
- }
-
- struct key_dup *kd = &jb->dups.array[jb->dups.used++];
- kd->target = strndupz(target, target_len);
- kd->hash = XXH3_64bits(kd->target, target_len);
- kd->used = 0;
- kd->exposed = false;
-
- // Initialize values array
- for (size_t i = 0; i < MAX_KEY_DUPS_KEYS; i++) {
- kd->values[i].s = NULL;
- kd->values[i].size = 0;
- }
-
- return kd;
-}
-
-static bool add_key_to_duplicate(struct key_dup *kd, const char *key, size_t key_len) {
- if (kd->used >= MAX_KEY_DUPS_KEYS) {
- log2stderr("Error: Too many keys in duplication of target '%s'.", kd->target);
- return false;
- }
-
- kd->keys[kd->used++] = strndupz(key, key_len);
- return true;
-}
-
-// ----------------------------------------------------------------------------
-// yaml configuration file
-
-#ifdef HAVE_LIBYAML
-
-
-// ----------------------------------------------------------------------------
-// yaml library functions
-
-static const char *yaml_event_name(yaml_event_type_t type) {
- switch (type) {
- case YAML_NO_EVENT:
- return "YAML_NO_EVENT";
-
- case YAML_SCALAR_EVENT:
- return "YAML_SCALAR_EVENT";
-
- case YAML_ALIAS_EVENT:
- return "YAML_ALIAS_EVENT";
-
- case YAML_MAPPING_START_EVENT:
- return "YAML_MAPPING_START_EVENT";
-
- case YAML_MAPPING_END_EVENT:
- return "YAML_MAPPING_END_EVENT";
-
- case YAML_SEQUENCE_START_EVENT:
- return "YAML_SEQUENCE_START_EVENT";
-
- case YAML_SEQUENCE_END_EVENT:
- return "YAML_SEQUENCE_END_EVENT";
-
- case YAML_STREAM_START_EVENT:
- return "YAML_STREAM_START_EVENT";
-
- case YAML_STREAM_END_EVENT:
- return "YAML_STREAM_END_EVENT";
-
- case YAML_DOCUMENT_START_EVENT:
- return "YAML_DOCUMENT_START_EVENT";
-
- case YAML_DOCUMENT_END_EVENT:
- return "YAML_DOCUMENT_END_EVENT";
-
- default:
- return "UNKNOWN";
- }
-}
-
-#define yaml_error(parser, event, fmt, args...) yaml_error_with_trace(parser, event, __LINE__, __FUNCTION__, __FILE__, fmt, ##args)
-static void yaml_error_with_trace(yaml_parser_t *parser, yaml_event_t *event, size_t line, const char *function, const char *file, const char *format, ...) __attribute__ ((format(__printf__, 6, 7)));
-static void yaml_error_with_trace(yaml_parser_t *parser, yaml_event_t *event, size_t line, const char *function, const char *file, const char *format, ...) {
- char buf[1024] = ""; // Initialize buf to an empty string
- const char *type = "";
-
- if(event) {
- type = yaml_event_name(event->type);
-
- switch (event->type) {
- case YAML_SCALAR_EVENT:
- copy_to_buffer(buf, sizeof(buf), (char *)event->data.scalar.value, event->data.scalar.length);
- break;
-
- case YAML_ALIAS_EVENT:
- snprintf(buf, sizeof(buf), "%s", event->data.alias.anchor);
- break;
-
- default:
- break;
- }
- }
-
- fprintf(stderr, "YAML %zu@%s, %s(): (line %d, column %d, %s%s%s): ",
- line, file, function,
- (int)(parser->mark.line + 1), (int)(parser->mark.column + 1),
- type, buf[0]? ", near ": "", buf);
-
- va_list args;
- va_start(args, format);
- vfprintf(stderr, format, args);
- va_end(args);
- fprintf(stderr, "\n");
-}
-
-#define yaml_parse(parser, event) yaml_parse_with_trace(parser, event, __LINE__, __FUNCTION__, __FILE__)
-static bool yaml_parse_with_trace(yaml_parser_t *parser, yaml_event_t *event, size_t line, const char *function, const char *file) {
- if (!yaml_parser_parse(parser, event)) {
- yaml_error(parser, NULL, "YAML parser error %d", parser->error);
- return false;
- }
-
-// fprintf(stderr, ">>> %s >>> %.*s\n",
-// yaml_event_name(event->type),
-// event->type == YAML_SCALAR_EVENT ? event->data.scalar.length : 0,
-// event->type == YAML_SCALAR_EVENT ? (char *)event->data.scalar.value : "");
-
- return true;
-}
-
-#define yaml_parse_expect_event(parser, type) yaml_parse_expect_event_with_trace(parser, type, __LINE__, __FUNCTION__, __FILE__)
-static bool yaml_parse_expect_event_with_trace(yaml_parser_t *parser, yaml_event_type_t type, size_t line, const char *function, const char *file) {
- yaml_event_t event;
- if (!yaml_parse(parser, &event))
- return false;
-
- bool ret = true;
- if(event.type != type) {
- yaml_error_with_trace(parser, &event, line, function, file, "unexpected event - expecting: %s", yaml_event_name(type));
- ret = false;
- }
-// else
-// fprintf(stderr, "OK (%zu@%s, %s()\n", line, file, function);
-
- yaml_event_delete(&event);
- return ret;
-}
-
-#define yaml_scalar_matches(event, s, len) yaml_scalar_matches_with_trace(event, s, len, __LINE__, __FUNCTION__, __FILE__)
-static bool yaml_scalar_matches_with_trace(yaml_event_t *event, const char *s, size_t len, size_t line __maybe_unused, const char *function __maybe_unused, const char *file __maybe_unused) {
- if(event->type != YAML_SCALAR_EVENT)
- return false;
-
- if(len != event->data.scalar.length)
- return false;
-// else
-// fprintf(stderr, "OK (%zu@%s, %s()\n", line, file, function);
-
- return strcmp((char *)event->data.scalar.value, s) == 0;
-}
-
-// ----------------------------------------------------------------------------
-
-static struct key_dup *yaml_parse_duplicate_key(struct log_job *jb, yaml_parser_t *parser) {
- yaml_event_t event;
-
- if (!yaml_parse(parser, &event))
- return false;
-
- struct key_dup *kd = NULL;
- if(event.type == YAML_SCALAR_EVENT) {
- kd = add_duplicate_target_to_job(jb, (char *)event.data.scalar.value, event.data.scalar.length);
- }
- else
- yaml_error(parser, &event, "duplicate key must