// SPDX-License-Identifier: GPL-3.0-or-later
#define NETDATA_RRD_INTERNALS
#include "rrd.h"
// ----------------------------------------------------------------------------
// labels sanitization
/*
* All labels follow these rules:
*
* Character Symbol Values Names
* UTF-8 characters UTF-8 yes -> _
* Lower case letter [a-z] yes yes
* Upper case letter [A-Z] yes -> [a-z]
* Digit [0-9] yes yes
* Underscore _ yes yes
* Minus - yes yes
* Plus + yes -> _
* Colon : yes -> _
* Semicolon ; -> : -> _
* Equal = -> : -> _
* Period . yes yes
* Comma , -> . -> .
* Slash / yes yes
* Backslash \ -> / -> /
* At @ yes -> _
* Space yes -> _
* Opening parenthesis ( yes -> _
* Closing parenthesis ) yes -> _
* anything else -> _ -> _
*
* The above rules should allow users to set in tags (indicative):
*
* 1. hostnames and domain names as-is
* 2. email addresses as-is
* 3. floating point numbers, converted to always use a dot as the decimal point
*
* Leading and trailing spaces and control characters are removed from both label
* names and values.
*
* Multiple spaces inside the label name or the value are removed (only 1 is retained).
* In names spaces are also converted to underscores.
*
* Names that are only underscores are rejected (they do not enter the dictionary).
*
* The above rules do not require any conversion to be included in JSON strings.
*
* Label names and values are truncated to LABELS_MAX_LENGTH (200) characters.
*
* When parsing, label key and value are separated by the first colon (:) found.
* So label:value1:value2 is parsed as key = "label", value = "value1:value2"
*
* This means a label key cannot contain a colon (:) - it is converted to
* underscore if it does.
*
*/
#define RRDLABELS_MAX_NAME_LENGTH 200
#define RRDLABELS_MAX_VALUE_LENGTH 800 // 800 in bytes, up to 200 UTF-8 characters
static unsigned char label_spaces_char_map[256];
static unsigned char label_names_char_map[256];
static unsigned char label_values_char_map[256] = {
[0] = '\0', //
[1] = '_', //
[2] = '_', //
[3] = '_', //
[4] = '_', //
[5] = '_', //
[6] = '_', //
[7] = '_', //
[8] = '_', //
[9] = '_', //
[10] = '_', //
[11] = '_', //
[12] = '_', //
[13] = '_', //
[14] = '_', //
[15] = '_', //
[16] = '_', //
[17] = '_', //
[18] = '_', //
[19] = '_', //
[20] = '_', //
[21] = '_', //
[22] = '_', //
[23] = '_', //
[24] = '_', //
[25] = '_', //
[26] = '_', //
[27] = '_', //
[28] = '_', //
[29] = '_', //
[30] = '_', //
[31] = '_', //
[32] = ' ', // SPACE keep
[33] = '_', // !