diff options
-rw-r--r-- | CMakeLists.txt | 1 | ||||
-rw-r--r-- | Makefile.am | 1 | ||||
-rw-r--r-- | libnetdata/libnetdata.h | 1 | ||||
-rw-r--r-- | libnetdata/string/utf8.h | 9 | ||||
-rw-r--r-- | libnetdata/url/url.c | 144 | ||||
-rw-r--r-- | web/api/badges/web_buffer_svg.c | 216 | ||||
-rw-r--r-- | web/server/web_client.c | 15 | ||||
-rw-r--r-- | web/server/web_client.h | 3 |
8 files changed, 226 insertions, 164 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 6bfab928ff..4d631f2fe7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -320,6 +320,7 @@ set(LIBNETDATA_FILES libnetdata/json/jsmn.h libnetdata/health/health.c libnetdata/health/health.h + libnetdata/string/utf8.h libnetdata/socket/security.c libnetdata/socket/security.h) diff --git a/Makefile.am b/Makefile.am index bc928bba87..de161c849d 100644 --- a/Makefile.am +++ b/Makefile.am @@ -167,6 +167,7 @@ LIBNETDATA_FILES = \ libnetdata/json/jsmn.h \ libnetdata/health/health.c \ libnetdata/health/health.h \ + libnetdata/string/utf8.h \ $(NULL) APPS_PLUGIN_FILES = \ diff --git a/libnetdata/libnetdata.h b/libnetdata/libnetdata.h index 1672ae3004..ef883300b8 100644 --- a/libnetdata/libnetdata.h +++ b/libnetdata/libnetdata.h @@ -313,5 +313,6 @@ extern char *netdata_configured_host_prefix; #include "url/url.h" #include "json/json.h" #include "health/health.h" +#include "string/utf8.h" #endif // NETDATA_LIB_H diff --git a/libnetdata/string/utf8.h b/libnetdata/string/utf8.h new file mode 100644 index 0000000000..133ec710b6 --- /dev/null +++ b/libnetdata/string/utf8.h @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_STRING_UTF8_H +#define NETDATA_STRING_UTF8_H 1 + +#define IS_UTF8_BYTE(x) (x & 0x80) +#define IS_UTF8_STARTBYTE(x) (IS_UTF8_BYTE(x)&&(x & 0x40)) + +#endif /* NETDATA_STRING_UTF8_H */ diff --git a/libnetdata/url/url.c b/libnetdata/url/url.c index 07a9f8069e..1929d6686f 100644 --- a/libnetdata/url/url.c +++ b/libnetdata/url/url.c @@ -52,6 +52,125 @@ char *url_decode(char *str) { return url_decode_r(buf, str, size); } +//decode %XX character or return 0 if cannot +char url_percent_escape_decode(char *s) { + if(likely(s[1] && s[2])) + return from_hex(s[1]) << 4 | from_hex(s[2]); + return 0; +} + +//this (utf8 string related) should be moved in separate file in future +char url_utf8_get_byte_length(char c) { + if(!IS_UTF8_BYTE(c)) + return 1; + + char length = 0; + while(likely(c & 0x80)) { + length++; + c <<= 1; + } + //4 byte is max size for UTF-8 char + //10XX XXXX is not valid character -> check length == 1 + if(length > 4 || length == 1) + return -1; + + return length; +} + +//decode % encoded UTF-8 characters and copy them to *d +//return count of bytes written to *d +char url_decode_multibyte_utf8(char *s, char *d, char *d_end) { + char first_byte = url_percent_escape_decode(s); + + if(unlikely(!first_byte || !IS_UTF8_STARTBYTE(first_byte))) + return 0; + + char byte_length = url_utf8_get_byte_length(first_byte); + + if(unlikely(byte_length <= 0 || d+byte_length >= d_end)) + return 0; + + char to_read = byte_length; + while(to_read > 0) { + char c = url_percent_escape_decode(s); + + if(unlikely( !IS_UTF8_BYTE(c) )) + return 0; + if((to_read != byte_length) && IS_UTF8_STARTBYTE(c)) + return 0; + + *d++ = c; + s+=3; + to_read--; + } + + return byte_length; +} + +/* + * The utf8_check() function scans the '\0'-terminated string starting + * at s. It returns a pointer to the first byte of the first malformed + * or overlong UTF-8 sequence found, or NULL if the string contains + * only correct UTF-8. It also spots UTF-8 sequences that could cause + * trouble if converted to UTF-16, namely surrogate characters + * (U+D800..U+DFFF) and non-Unicode positions (U+FFFE..U+FFFF). This + * routine is very likely to find a malformed sequence if the input + * uses any other encoding than UTF-8. It therefore can be used as a + * very effective heuristic for distinguishing between UTF-8 and other + * encodings. + * + * Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/> -- 2005-03-30 + * License: http://www.cl.cam.ac.uk/~mgk25/short-license.html + */ + +unsigned char *utf8_check(unsigned char *s) +{ + while (*s) + { + if (*s < 0x80) + /* 0xxxxxxx */ + s++; + else if ((s[0] & 0xe0) == 0xc0) + { + /* 110XXXXx 10xxxxxx */ + if ((s[1] & 0xc0) != 0x80 || + (s[0] & 0xfe) == 0xc0) /* overlong? */ + return s; + else + s += 2; + } + else if ((s[0] & 0xf0) == 0xe0) + { + /* 1110XXXX 10Xxxxxx 10xxxxxx */ + if ((s[1] & 0xc0) != 0x80 || + (s[2] & 0xc0) != 0x80 || + (s[0] == 0xe0 && (s[1] & 0xe0) == 0x80) || /* overlong? */ + (s[0] == 0xed && (s[1] & 0xe0) == 0xa0) || /* surrogate? */ + (s[0] == 0xef && s[1] == 0xbf && + (s[2] & 0xfe) == 0xbe)) /* U+FFFE or U+FFFF? */ + return s; + else + s += 3; + } + else if ((s[0] & 0xf8) == 0xf0) + { + /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */ + if ((s[1] & 0xc0) != 0x80 || + (s[2] & 0xc0) != 0x80 || + (s[3] & 0xc0) != 0x80 || + (s[0] == 0xf0 && (s[1] & 0xf0) == 0x80) || /* overlong? */ + (s[0] == 0xf4 && s[1] > 0x8f) || s[0] > 0xf4) /* > U+10FFFF? */ + return s; + else + s += 4; + } + else + return s; + } + + return NULL; +} + char *url_decode_r(char *to, char *url, size_t size) { char *s = url, // source *d = to, // destination @@ -59,12 +178,24 @@ char *url_decode_r(char *to, char *url, size_t size) { while(*s && d < e) { if(unlikely(*s == '%')) { - if(likely(s[1] && s[2])) { - char t = from_hex(s[1]) << 4 | from_hex(s[2]); + char t = url_percent_escape_decode(s); + if(IS_UTF8_BYTE(t)) { + char bytes_written = url_decode_multibyte_utf8(s, d, e); + if(likely(bytes_written)){ + d += bytes_written; + s += (bytes_written * 3)-1; + } + else { + goto fail_cleanup; + } + } + else if(likely(t) && isprint(t)) { // avoid HTTP header injection - *d++ = (char)((isprint(t))? t : ' '); + *d++ = t; s += 2; } + else + goto fail_cleanup; } else if(unlikely(*s == '+')) *d++ = ' '; @@ -77,5 +208,12 @@ char *url_decode_r(char *to, char *url, size_t size) { *d = '\0'; + if(unlikely( utf8_check(to) )) //NULL means sucess here + return NULL; + return to; + +fail_cleanup: + *d = '\0'; + return NULL; } diff --git a/web/api/badges/web_buffer_svg.c b/web/api/badges/web_buffer_svg.c index b24fddedf5..4f9826fb20 100644 --- a/web/api/badges/web_buffer_svg.c +++ b/web/api/badges/web_buffer_svg.c @@ -11,7 +11,7 @@ * https://github.com/badges/shields/blob/master/measure-text.js */ -static double verdana11_widths[256] = { +static double verdana11_widths[128] = { [0] = 0.0, [1] = 0.0, [2] = 0.0, @@ -139,157 +139,36 @@ static double verdana11_widths[256] = { [124] = 4.9951171875, // | [125] = 6.982421875, // } [126] = 9.001953125, // ~ - [127] = 0.0, - [128] = 0.0, - [129] = 0.0, - [130] = 0.0, - [131] = 0.0, - [132] = 0.0, - [133] = 0.0, - [134] = 0.0, - [135] = 0.0, - [136] = 0.0, - [137] = 0.0, - [138] = 0.0, - [139] = 0.0, - [140] = 0.0, - [141] = 0.0, - [142] = 0.0, - [143] = 0.0, - [144] = 0.0, - [145] = 0.0, - [146] = 0.0, - [147] = 0.0, - [148] = 0.0, - [149] = 0.0, - [150] = 0.0, - [151] = 0.0, - [152] = 0.0, - [153] = 0.0, - [154] = 0.0, - [155] = 0.0, - [156] = 0.0, - [157] = 0.0, - [158] = 0.0, - [159] = 0.0, - [160] = 0.0, - [161] = 0.0, - [162] = 0.0, - [163] = 0.0, - [164] = 0.0, - [165] = 0.0, - [166] = 0.0, - [167] = 0.0, - [168] = 0.0, - [169] = 0.0, - [170] = 0.0, - [171] = 0.0, - [172] = 0.0, - [173] = 0.0, - [174] = 0.0, - [175] = 0.0, - [176] = 0.0, - [177] = 0.0, - [178] = 0.0, - [179] = 0.0, - [180] = 0.0, - [181] = 0.0, - [182] = 0.0, - [183] = 0.0, - [184] = 0.0, - [185] = 0.0, - [186] = 0.0, - [187] = 0.0, - [188] = 0.0, - [189] = 0.0, - [190] = 0.0, - [191] = 0.0, - [192] = 0.0, - [193] = 0.0, - [194] = 0.0, - [195] = 0.0, - [196] = 0.0, - [197] = 0.0, - [198] = 0.0, - [199] = 0.0, - [200] = 0.0, - [201] = 0.0, - [202] = 0.0, - [203] = 0.0, - [204] = 0.0, - [205] = 0.0, - [206] = 0.0, - [207] = 0.0, - [208] = 0.0, - [209] = 0.0, - [210] = 0.0, - [211] = 0.0, - [212] = 0.0, - [213] = 0.0, - [214] = 0.0, - [215] = 0.0, - [216] = 0.0, - [217] = 0.0, - [218] = 0.0, - [219] = 0.0, - [220] = 0.0, - [221] = 0.0, - [222] = 0.0, - [223] = 0.0, - [224] = 0.0, - [225] = 0.0, - [226] = 0.0, - [227] = 0.0, - [228] = 0.0, - [229] = 0.0, - [230] = 0.0, - [231] = 0.0, - [232] = 0.0, - [233] = 0.0, - [234] = 0.0, - [235] = 0.0, - [236] = 0.0, - [237] = 0.0, - [238] = 0.0, - [239] = 0.0, - [240] = 0.0, - [241] = 0.0, - [242] = 0.0, - [243] = 0.0, - [244] = 0.0, - [245] = 0.0, - [246] = 0.0, - [247] = 0.0, - [248] = 0.0, - [249] = 0.0, - [250] = 0.0, - [251] = 0.0, - [252] = 0.0, - [253] = 0.0, - [254] = 0.0, - [255] = 0.0 + [127] = 0.0 }; // find the width of the string using the verdana 11points font -// re-write the string in place, skiping zero-length characters -static inline double verdana11_width(char *s) { +static inline double verdana11_width(const char *s, float em_size) { double w = 0.0; - char *d = s; while(*s) { - double t = verdana11_widths[(unsigned char)*s]; - if(t == 0.0) + // if UTF8 multibyte char found and guess it's width equal 1em + // as label width will be updated with JavaScript this is not so important + + // TODO: maybe move UTF8 functions from url.c to separate util in libnetdata + // then use url_utf8_get_byte_length etc. + if(IS_UTF8_STARTBYTE(*s)) { s++; + while(IS_UTF8_BYTE(*s) && !IS_UTF8_STARTBYTE(*s)){ + s++; + } + w += em_size; + } else { - w += t + VERDANA_KERNING; - if(d != s) - *d++ = *s++; - else - d = ++s; + if(likely(!(*s & 0x80))){ // Byte 1XXX XXXX is not valid in UTF8 + double t = verdana11_widths[(unsigned char)*s]; + if(t != 0.0) + w += t + VERDANA_KERNING; + } + s++; } } - *d = '\0'; w -= VERDANA_KERNING; w += VERDANA_PADDING; return w; @@ -810,8 +689,7 @@ static inline void calc_colorz(const char *color, char *final, size_t len, calcu #define COLOR_STRING_SIZE 100 void buffer_svg(BUFFER *wb, const char *label, calculated_number value, const char *units, const char *label_color, const char *value_color, int precision, int scale, uint32_t options) { - char label_buffer[LABEL_STRING_SIZE + 1] - , value_color_buffer[COLOR_STRING_SIZE + 1] + char value_color_buffer[COLOR_STRING_SIZE + 1] , value_string[VALUE_STRING_SIZE + 1] , label_escaped[LABEL_STRING_SIZE + 1] , value_escaped[VALUE_STRING_SIZE + 1] @@ -831,14 +709,11 @@ void buffer_svg(BUFFER *wb, const char *label, calculated_number value, const ch calc_colorz(value_color, value_color_buffer, COLOR_STRING_SIZE, value); format_value_and_unit(value_string, VALUE_STRING_SIZE, (options & RRDR_OPTION_DISPLAY_ABS)?calculated_number_fabs(value):value, units, precision); - // we need to copy the label, since verdana11_width may write to it - strncpyz(label_buffer, label, LABEL_STRING_SIZE); - - label_width = verdana11_width(label_buffer) + (BADGE_HORIZONTAL_PADDING * 2); - value_width = verdana11_width(value_string) + (BADGE_HORIZONTAL_PADDING * 2); + label_width = verdana11_width(label, font_size) + (BADGE_HORIZONTAL_PADDING * 2); + value_width = verdana11_width(value_string, font_size) + (BADGE_HORIZONTAL_PADDING * 2); total_width = label_width + value_width; - escape_xmlz(label_escaped, label_buffer, LABEL_STRING_SIZE); + escape_xmlz(label_escaped, label, LABEL_STRING_SIZE); escape_xmlz(value_escaped, value_string, VALUE_STRING_SIZE); escape_xmlz(label_color_escaped, color_map(label_color), COLOR_STRING_SIZE); escape_xmlz(value_color_escaped, color_map(value_color_buffer), COLOR_STRING_SIZE); @@ -862,19 +737,43 @@ void buffer_svg(BUFFER *wb, const char *label, calculated_number value, const ch "<stop offset=\"1\" stop-opacity=\".1\"/>" "</linearGradient>" "<mask id=\"round\">" - "<rect width=\"%0.2f\" height=\"%0.2f\" rx=\"%0.2f\" fill=\"#fff\"/>" + "<rect class=\"bdge-ttl-width\" width=\"%0.2f\" height=\"%0.2f\" rx=\"%0.2f\" fill=\"#fff\"/>" "</mask>" "<g mask=\"url(#round)\">" - "<rect width=\"%0.2f\" height=\"%0.2f\" fill=\"%s\"/>" - "<rect x=\"%0.2f\" width=\"%0.2f\" height=\"%0.2f\" fill=\"%s\"/>" - "<rect width=\"%0.2f\" height=\"%0.2f\" fill=\"url(#smooth)\"/>" + "<rect class=\"bdge-rect-lbl\" width=\"%0.2f\" height=\"%0.2f\" fill=\"%s\"/>" + "<rect class=\"bdge-rect-val\" x=\"%0.2f\" width=\"%0.2f\" height=\"%0.2f\" fill=\"%s\"/>" + "<rect class=\"bdge-ttl-width\" width=\"%0.2f\" height=\"%0.2f\" fill=\"url(#smooth)\"/>" "</g>" "<g fill=\"#fff\" text-anchor=\"middle\" font-family=\"DejaVu Sans,Verdana,Geneva,sans-serif\" font-size=\"%0.2f\">" - "<text x=\"%0.2f\" y=\"%0.0f\" fill=\"#010101\" fill-opacity=\".3\">%s</text>" - "<text x=\"%0.2f\" y=\"%0.0f\">%s</text>" - "<text x=\"%0.2f\" y=\"%0.0f\" fill=\"#010101\" fill-opacity=\".3\">%s</text>" - "<text x=\"%0.2f\" y=\"%0.0f\">%s</text>" + "<text class=\"bdge-lbl-lbl\" x=\"%0.2f\" y=\"%0.0f\" fill=\"#010101\" fill-opacity=\".3\">%s</text>" + "<text class=\"bdge-lbl-lbl\" x=\"%0.2f\" y=\"%0.0f\">%s</text>" + "<text class=\"bdge-lbl-val\" x=\"%0.2f\" y=\"%0.0f\" fill=\"#010101\" fill-opacity=\".3\">%s</text>" + "<text class=\"bdge-lbl-val\" x=\"%0.2f\" y=\"%0.0f\">%s</text>" "</g>" + "<script type=\"text/javascript\">" + "var bdg_horiz_padding = %d;" + "function netdata_bdge_each(list, attr, value){" + "Array.prototype.forEach.call(list, function(el){" + "el.setAttribute(attr, value);" + "});" + "};" + "var this_svg = document.currentScript.closest(\"svg\");" + "var elem_lbl = this_svg.getElementsByClassName(\"bdge-lbl-lbl\");" + "var elem_val = this_svg.getElementsByClassName(\"bdge-lbl-val\");" + "var lbl_size = elem_lbl[0].getBBox();" + "var val_size = elem_val[0].getBBox();" + "var width_total = lbl_size.width + bdg_horiz_padding*2;" + "this_svg.getElementsByClassName(\"bdge-rect-lbl\")[0].setAttribute(\"width\", width_total);" + "netdata_bdge_each(elem_lbl, \"x\", (lbl_size.width / 2) + bdg_horiz_padding);" + "netdata_bdge_each(elem_val, \"x\", width_total + (val_size.width / 2) + bdg_horiz_padding);" + "var val_rect = this_svg.getElementsByClassName(\"bdge-rect-val\")[0];" + "val_rect.setAttribute(\"width\", val_size.width + bdg_horiz_padding*2);" + "val_rect.setAttribute(\"x\", width_total);" + "width_total += val_size.width + bdg_horiz_padding*2;" + "var width_update_elems = this_svg.getElementsByClassName(\"bdge-ttl-width\");" + "netdata_bdge_each(width_update_elems, \"width\", width_total);" + "this_svg.setAttribute(\"width\", width_total);" + "</script>" "</svg>", total_width, height, total_width, height, round_corner, @@ -885,7 +784,8 @@ void buffer_svg(BUFFER *wb, const char *label, calculated_number value, const ch label_width / 2, ceil(height - text_offset), label_escaped, label_width / 2, ceil(height - text_offset - 1.0), label_escaped, label_width + value_width / 2 -1, ceil(height - text_offset), value_escaped, - label_width + value_width / 2 -1, ceil(height - text_offset - 1.0), value_escaped); + label_width + value_width / 2 -1, ceil(height - text_offset - 1.0), value_escaped, + BADGE_HORIZONTAL_PADDING ); } int web_client_api_request_v1_badge(RRDHOST *host, struct web_client *w, char *url) { diff --git a/web/server/web_client.c b/web/server/web_client.c index bd275f5e50..20745d8dd4 100644 --- a/web/server/web_client.c +++ b/web/server/web_client.c @@ -821,6 +821,7 @@ static inline char *http_header_parse(struct web_client *w, char *s, int parse_u typedef enum { HTTP_VALIDATION_OK, HTTP_VALIDATION_NOT_SUPPORTED, + HTTP_VALIDATION_MALFORMED_URL, #ifdef ENABLE_HTTPS HTTP_VALIDATION_INCOMPLETE, HTTP_VALIDATION_REDIRECT @@ -941,7 +942,8 @@ static inline HTTP_VALIDATION http_request_validate(struct web_client *w) { // a valid complete HTTP request found *ue = '\0'; - url_decode_r(w->decoded_url, encoded_url, NETDATA_WEB_REQUEST_URL_SIZE + 1); + if(!url_decode_r(w->decoded_url, encoded_url, NETDATA_WEB_REQUEST_URL_SIZE + 1)) + return HTTP_VALIDATION_MALFORMED_URL; *ue = ' '; // copy the URL - we are going to overwrite parts of it @@ -1424,7 +1426,7 @@ void web_client_process_request(struct web_client *w) { buffer_flush(w->response.data); buffer_sprintf(w->response.data, "Received request is too big (%zu bytes).\r\n", w->response.data->len); - w->response.code = 400; + w->response.code = HTTP_RESPONSE_BAD_REQUEST; } else { // wait for more data @@ -1441,12 +1443,19 @@ void web_client_process_request(struct web_client *w) { break; } #endif + case HTTP_VALIDATION_MALFORMED_URL: + debug(D_WEB_CLIENT_ACCESS, "%llu: URL parsing failed (malformed URL). Cannot understand '%s'.", w->id, w->response.data->buffer); + + buffer_flush(w->response.data); + buffer_strcat(w->response.data, "URL not valid. I don't understand you...\r\n"); + w->response.code = HTTP_RESPONSE_BAD_REQUEST; + break; case HTTP_VALIDATION_NOT_SUPPORTED: debug(D_WEB_CLIENT_ACCESS, "%llu: Cannot understand '%s'.", w->id, w->response.data->buffer); buffer_flush(w->response.data); buffer_strcat(w->response.data, "I don't understand you...\r\n"); - w->response.code = 400; + w->response.code = HTTP_RESPONSE_BAD_REQUEST; break; } diff --git a/web/server/web_client.h b/web/server/web_client.h index 0a57e8d8e7..c6de66979e 100644 --- a/web/server/web_client.h +++ b/web/server/web_client.h @@ -11,6 +11,9 @@ extern int web_enable_gzip, web_gzip_strategy; #endif /* NETDATA_WITH_ZLIB */ +//HTTP_CODES 4XX +#define HTTP_RESPONSE_BAD_REQUEST 400 + extern int respect_web_browser_do_not_track_policy; extern char *web_x_frame_options; |