Url parser refactoring (#6247)

* URL_parser_review comments 1 * URL_parser_review restoring web_client.c * URL_parser_review restoring url.h * URL_parser_review restoring web_client.h * URL_parser_review restoring inlined.h * URL_parser_review restoring various * URL_parser_review commenting! * URL_parser_review last checks! * URL_parser_review registry! * URL_parser_review codacy errors! * URL_parser_review codacy errors 2! * URL_parser_review end of request! * URL_parser_review * URL_parser_review format fix * URL_parser_review restoring * URL_parser_review stopped at 5! * URL_parser_review formatting! * URL_parser_review: Started the map of the query string when it is necessary * URL_parser_review: With these adjusts in the URL library we are now able to parser all the escape characters! * URL_parser_review: code review Fixes problems and format asked by coworkers! * URL_parser_review: adjust script The script was not 100% according the shellcheck specifications, no less important it was a direct script instead a .in file * sslstream: Rebase 2 It was necessary to change a function due the UTF-8 * sslstream: Fixing 6426 We had a cast error introduced by other PR, so I am fixing here * URL_parser_review Change .gitignore to avoid considering a script file.
author: thiagoftsm <thiagoftsm@gmail.com> 2019-07-25 12:30:00 +0000
committer: GitHub <noreply@github.com> 2019-07-25 12:30:00 +0000
commit: 3076cfe5d455b8007e4f90776e9ea3d05faf1a7e (patch)
tree: ccf4590bcbc52f4011560daca4e77214e5d6e077
parent: b74cc9af0707957c9f7d252eae8fd20c9b091aff (diff)
9 files changed, 711 insertions, 79 deletions
diff --git a/.gitignore b/.gitignore
index 6cd8a33057..c48d87e6c6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -166,6 +166,7 @@ callgrind.out.*
 gmon.out
 gmon.txt
 sitespeed-result/
+tests/urls/request.sh
 
 # tests and temp files
 python.d/python-modules-installer.sh
diff --git a/libnetdata/url/url.c b/libnetdata/url/url.c
index 1929d6686f..7df9faaf02 100644
--- a/libnetdata/url/url.c
+++ b/libnetdata/url/url.c
@@ -43,8 +43,16 @@ char *url_encode(char *str) {
     return pbuf;
 }
 
-/* Returns a url-decoded version of str */
-/* IMPORTANT: be sure to free() the returned string after use */
+/**
+ * URL Decode
+ *
+ * Returns a url-decoded version of str
+ * IMPORTANT: be sure to free() the returned string after use
+ *
+ * @param str the string that will be decode
+ *
+ * @return a pointer for the url decoded.
+ */
 char *url_decode(char *str) {
     size_t size = strlen(str) + 1;
 
@@ -52,14 +60,30 @@ char *url_decode(char *str) {
     return url_decode_r(buf, str, size);
 }
 
-//decode %XX character or return 0 if cannot
+/**
+ *  Percentage escape decode
+ *
+ *  Decode %XX character or return 0 if cannot
+ *
+ *  @param s the string to decode
+ *
+ *  @return The character decoded on success and 0 otherwise
+ */
 char url_percent_escape_decode(char *s) {
     if(likely(s[1] && s[2]))
         return from_hex(s[1]) << 4 | from_hex(s[2]);
     return 0;
 }
 
-//this (utf8 string related) should be moved in separate file in future
+/**
+ * Get byte length
+ *
+ * This (utf8 string related) should be moved in separate file in future
+ *
+ * @param c is the utf8 character
+ *  *
+ * @return It reurns the length of the specific character.
+ */
 char url_utf8_get_byte_length(char c) {
     if(!IS_UTF8_BYTE(c))
         return 1;
@@ -77,8 +101,17 @@ char url_utf8_get_byte_length(char c) {
     return length;
 }
 
-//decode % encoded UTF-8 characters and copy them to *d
-//return count of bytes written to *d
+/**
+ * Decode Multibyte UTF8
+ *
+ * Decode % encoded UTF-8 characters and copy them to *d
+ *
+ * @param s first address
+ * @param d
+ * @param d_end last address
+ *
+ * @return count of bytes written to *d
+ */
 char url_decode_multibyte_utf8(char *s, char *d, char *d_end) {
     char first_byte = url_percent_escape_decode(s);
 
@@ -122,7 +155,6 @@ char url_decode_multibyte_utf8(char *s, char *d, char *d_end) {
  * Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/> -- 2005-03-30
  * License: http://www.cl.cam.ac.uk/~mgk25/short-license.html
  */
-
 unsigned char *utf8_check(unsigned char *s)
 {
     while (*s)
@@ -208,7 +240,7 @@ char *url_decode_r(char *to, char *url, size_t size) {
 
     *d = '\0';
 
-    if(unlikely( utf8_check(to) )) //NULL means sucess here
+    if(unlikely( utf8_check((unsigned  char *)to) )) //NULL means sucess here
         return NULL;
 
     return to;
@@ -217,3 +249,157 @@ fail_cleanup:
     *d = '\0';
     return NULL;
 }
+
+/**
+ * Is request complete?
+ *
+ * Check whether the request is complete.
+ * This function cannot check all the requests METHODS, for example, case you are working with POST, it will fail.
+ *
+ * @param begin is the first character of the sequence to analyse.
+ * @param end is the last character of the sequence
+ * @param length is the length of the total of bytes read, it is not the difference between end and begin.
+ *
+ * @return It returns 1 when the request is complete and 0 otherwise.
+ */
+inline int url_is_request_complete(char *begin, char *end, size_t length) {
+
+    if ( begin == end) {
+        //Message cannot be complete when first and last address are the same
+        return 0;
+    }
+
+    //This math to verify  the last is valid, because we are discarding the POST
+    if (length > 4) {
+        begin = end - 4;
+    }
+
+    return (strstr(begin, "\r\n\r\n"))?1:0;
+}
+
+/**
+ * Find protocol
+ *
+ * Search for the string ' HTTP/' in the message given.
+ *
+ * @param s is the start of the user request.
+ * @return
+ */
+inline char *url_find_protocol(char *s) {
+    while(*s) {
+        // find the next space
+        while (*s && *s != ' ') s++;
+
+        // is it SPACE + "HTTP/" ?
+        if(*s && !strncmp(s, " HTTP/", 6)) break;
+        else s++;
+    }
+
+    return s;
+}
+
+/**
+ * Map query string
+ *
+ * Map the query string fields that will be decoded.
+ * This functions must be called after to check the presence of query strings,
+ * here we are assuming that you already tested this.
+ *
+ * @param out the pointer to pointers that will be used to map
+ * @param url the input url that we are decoding.
+ *
+ * @return It returns the number of total variables in the query string.
+ */
+int url_map_query_string(char **out, char *url) {
+    (void)out;
+    (void)url;
+    int count = 0;
+
+    //First we try to parse considering that there was not URL encode process
+    char *moveme = url;
+    char *ptr;
+
+    //We always we have at least one here, so I can set this.
+    out[count++] = moveme;
+    while(moveme) {
+        ptr = strchr((moveme+1), '&');
+        if(ptr) {
+            out[count++] = ptr;
+        }
+
+        moveme = ptr;
+    }
+
+    //I could not find any '&', so I am assuming now it is like '%26'
+    if (count == 1) {
+        moveme = url;
+        while(moveme) {
+            ptr = strchr((moveme+1), '%');
+            if(ptr) {
+                char *test = (ptr+1);
+                if (!strncmp(test, "3f", 2) || !strncmp(test, "3F", 2)) {
+                    out[count++] = ptr;
+                }
+            }
+            moveme = ptr;
+        }
+    }
+
+    return count;
+}
+
+/**
+ * Parse query string
+ *
+ * Parse the query string mapped and store it inside output.
+ *
+ * @param output is a vector where I will store the string.
+ * @param max is the maximum length of the output
+ * @param map the map done by the function url_map_query_string.
+ * @param total the total number of variables inside map
+ *
+ * @return It returns 0 on success and -1 otherwise
+ */
+int url_parse_query_string(char *output, size_t max, char **map, int total) {
+    if(!total) {
+        return 0;
+    }
+
+    int counter, next;
+    size_t length;
+    char *end;
+    char *begin = map[0];
+    char save;
+    size_t copied = 0;
+    for(counter = 0, next=1 ; next <= total ; ++counter, ++next) {
+        if (next != total) {
+            end = map[next];
+            length = (size_t) (end - begin);
+            save = *end;
+            *end = 0x00;
+        } else {
+            length = strlen(begin);
+            end = NULL;
+        }
+        length++;
+
+        if (length > (max - copied)) {
+            error("Parsing query string: we cannot parse a query string so big");
+            break;
+        }
+
+        if(!url_decode_r(output, begin, length)) {
+            return -1;
+        }
+        length = strlen(output);
+        copied += length;
+        output += length;
+
+        begin = end;
+        if (begin) {
+            *begin = save;
+        }
+    }
+
+    return 0;
+}
diff --git a/libnetdata/url/url.h b/libnetdata/url/url.h
index 6cef6d7a84..10f3fe1763 100644
--- a/libnetdata/url/url.h
+++ b/libnetdata/url/url.h
@@ -25,4 +25,11 @@ extern char *url_decode(char *str);
 
 extern char *url_decode_r(char *to, char *url, size_t size);
 
+#define WEB_FIELDS_MAX 400
+extern int url_map_query_string(char **out, char *url);
+extern int url_parse_query_string(char *output, size_t max, char **map, int total);
+
+extern int url_is_request_complete(char *begin,char *end,size_t length);
+extern char *url_find_protocol(char *s);
+
 #endif /* NETDATA_URL_H */
diff --git a/tests/Makefile.am b/tests/Makefile.am
index b0f65456e2..5488752dc7 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -5,6 +5,7 @@ MAINTAINERCLEANFILES = $(srcdir)/Makefile.in
 
 CLEANFILES = \
 	health_mgmtapi/health-cmdapi-test.sh \
+	urls/request.sh \
 	$(NULL)
 
 include $(top_srcdir)/build/subst.inc
@@ -22,10 +23,12 @@ dist_noinst_DATA = \
     node.d/fronius.process.spec.js \
     node.d/fronius.validation.spec.js \
     health_mgmtapi/health-cmdapi-test.sh.in \
+    urls/request.sh.in \
 	$(NULL)
 
 dist_plugins_SCRIPTS = \
 	health_mgmtapi/health-cmdapi-test.sh \
+	urls/request.sh \
 	$(NULL)
 
 dist_noinst_SCRIPTS = \
diff --git a/tests/urls/request.sh.in b/tests/urls/request.sh.in
new file mode 100644
index 0000000000..fac00bc4ee
--- /dev/null
+++ b/tests/urls/request.sh.in
@@ -0,0 +1,303 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+################################################################################################
+####																						####
+####									GLOBAL VARIABLES									####
+####																						####
+################################################################################################
+
+# The current time
+CT=$(date +'%s')
+
+# The previous time
+PT=$((CT - 30))
+
+# The output directory where we will store the results and error
+OUTDIR="tests"
+OUTEDIR="encoded_tests"
+OUTOPTDIR="options"
+ERRDIR="etests"
+
+################################################################################################
+####																						####
+####										FUNCTIONS										####
+####																						####
+################################################################################################
+
+# Print error message and close script
+netdata_print_error(){
+    echo "Closing due error \"$1\" code \"$2\""
+    exit 1
+}
+
+# Print the header message of the function
+netdata_print_header() {
+    echo "$1"
+}
+
+# Create the main directory where the results will be stored
+netdata_create_directory() {
+    netdata_print_header "Creating directory $1"
+    if [ ! -d "$1" ]; then
+        mkdir "$1"
+		TEST=$?
+        if [ $TEST -ne  0 ]; then
+            netdata_print_error "Cannot create directory  $?"
+        fi
+    else
+        echo "Working with directory $OUTDIR"
+    fi
+}
+
+#Check whether download did not have problem
+netdata_test_download(){
+    grep "HTTP/1.1 200 OK" "$1" 2>/dev/null 1>/dev/null
+	TEST=$?
+    if [ $TEST -ne 0 ]; then
+        netdata_print_error "Cannot do download of the page $2" $?
+        exit 1
+    fi
+}
+
+#Check whether download had a problem
+netdata_error_test(){
+    grep "HTTP/1.1 200 OK" "$1" 2>/dev/null 1>/dev/null
+	TEST=$?
+    if [ $TEST -eq 0 ]; then
+        netdata_print_error "The page $2 did not answer with an error" $?
+        exit 1
+    fi
+}
+
+
+# Download information from Netdata 
+netdata_download_various() {
+    netdata_print_header "Getting $2"
+    curl -v -k --create-dirs -o "$OUTDIR/$3.out" "$1/$2" 2> "$OUTDIR/$3.err"
+    netdata_test_download "$OUTDIR/$3.err" "$1/$2"
+}
+
+netdata_download_various_with_options() {
+    netdata_print_header "Getting options for $2"
+    curl -X OPTIONS -v -k --create-dirs -o "$OUTOPTDIR/$3.out" "$1/$2" 2> "$OUTOPTDIR/$3.err"
+    netdata_test_download "$OUTOPTDIR/$3.err" "$1/$2"
+}
+
+# Download information from Netdata 
+netdata_wrong_request_various() {
+    netdata_print_header "Getting $2"
+    curl -v -k --create-dirs -o "$ERRDIR/$3.out" "$1/$2" 2> "$ERRDIR/$3.err"
+    netdata_error_test "$ERRDIR/$3.err" "$1/$2"
+}
+
+# Download charts from Netdata 
+netdata_download_charts() {
+    curl -v -k --create-dirs -o "$OUTDIR/charts.out" "$1/$2" 2> "$OUTDIR/charts.err"
+    netdata_test_download "$OUTDIR/charts.err" "$1/$2"
+
+    #Rewrite the next
+    grep -w "id" tests/charts.out| cut -d: -f2 | grep "\"," | sed s/,//g | sort
+}
+
+#Test options for a specific chart
+netdata_download_chart() {
+    SEPARATOR="&"
+    EQUAL="="
+    OUTD=$OUTDIR
+    ENCODED=" "
+    for I in $(seq 0 1); do
+        if [ "$I" -eq "1" ] ; then
+            SEPARATOR="%26"
+            EQUAL="%3D"
+            OUTD=$OUTEDIR
+            ENCODED="encoded"
+        fi
+
+		NAME=${3//\"/}
+        netdata_print_header "Getting data for $NAME using $4 $ENCODED"
+
+        LDIR=$OUTD"/"$4
+
+        LURL="$1/$2$EQUAL$NAME"
+
+        NAME=$NAME"_$4"
+
+        curl -v -k --create-dirs -o "$LDIR/$NAME.out" "$LURL" 2> "$LDIR/$NAME.err"
+        netdata_test_download "$LDIR/$NAME.err" "$LURL"
+
+        UFILES=( "points" "before" "after" )
+        COUNTER=0
+        for OPT in "points=100" "before=$PT" "after=$CT" ;
+        do
+            LURL="$LURL$SEPARATOR$OPT"
+            LFILE=$NAME"_${UFILES[$COUNTER]}";
+
+            curl -v -k --create-dirs -o "$LDIR/$LFILE.out" "$LURL" 2> "$LDIR/$LFILE.err"
+            netdata_test_download "$LDIR/$LFILE.err" "$LURL"
+
+            COUNTER=$((COUNTER + 1))
+        done
+
+        LURL="$LURL&group$EQUAL"
+        for OPT in "min" "max" "sum" "median" "stddev" "cv" "ses" "des" "incremental_sum" "average";
+        do
+            TURL=$LURL$OPT
+            TFILE=$NAME"_$OPT";
+            curl -v -k --create-dirs -o "$LDIR/$TFILE.out" "$TURL" 2> "$LDIR/$TFILE.err"
+            netdata_test_download "$LDIR/$TFILE.err" "$TURL"
+            for MORE in "jsonp" "json" "ssv" "csv" "datatable" "datasource" "tsv" "ssvcomma" "html" "array";
+            do
+                TURL=$TURL"&format="$MORE
+                TFILE=$NAME"_$OPT""_$MORE";
+                curl -v -k --create-dirs -o "$LDIR/$TFILE.out" "$TURL" 2> "$LDIR/$TFILE.err"
+                netdata_test_download "$LDIR/$TFILE.err" "$TURL"
+            done
+        done
+
+        LURL="$LURL$OPT&gtime=60"
+        NFILE=$NAME"_gtime"
+        curl -v -k --create-dirs -o "$LDIR/$NFILE.out" "$TURL" 2> "$LDIR/$NFILE.err"
+        netdata_test_download "$LDIR/$NFILE.err" "$LURL"
+
+        LURL="$LURL$OPT&options=percentage"
+        NFILE=$NAME"_percentage"
+        curl -v -k --create-dirs -o "$LDIR/$NFILE.out" "$TURL" 2> "$LDIR/$NFILE.err"
+        netdata_test_download "$LDIR/$NFILE.err" "$LURL"
+
+        LURL="$LURL$OPT&dimensions=system%7Cnice"
+        NFILE=$NAME"_dimension"
+        curl -v -k --create-dirs -o "$LDIR/$NFILE.out" "$TURL" 2> "$LDIR/$NFILE.err"
+        netdata_test_download "$LDIR/$NFILE.err" "$LURL"
+
+        LURL="$LURL$OPT&label=testing"
+        NFILE=$NAME"_label"
+        curl -v -k --create-dirs -o "$LDIR/$NFILE.out" "$TURL" 2> "$LDIR/$NFILE.err"
+        netdata_test_download "$LDIR/$NFILE.err" "$LURL"
+    done
+}
+
+# Download information from Netdata 
+netdata_download_allmetrics() {
+    netdata_print_header "Getting All metrics"
+    LURL="$1/api/v1/allmetrics?format="
+    for FMT in "shell" "prometheus" "prometheus_all_hosts" "json" ;
+    do
+        TURL=$LURL$FMT
+        for OPT in "yes" "no";
+        do
+            if [ "$FMT" == "prometheus" ]; then
+                TURL="$TURL&help=$OPT&types=$OPT&timestamps=$OPT"
+            fi
+            TURL="$TURL&names=$OPT&oldunits=$OPT&hideunits=$OPT&prefix=ND"
+
+            NAME="allmetrics_$FMT"
+            echo "$OUTDIR/$2/$NAME.out"
+            curl -v -k --create-dirs -o "$OUTDIR/$2/$NAME.out" "$TURL" 2> "$OUTDIR/$2/$NAME.err"
+            netdata_test_download "$OUTDIR/$2/$NAME.err" "$TURL"
+            done
+    done
+}
+
+
+################################################################################################
+####																						####
+####									MAIN ROUTINE										####
+####																						####
+################################################################################################
+MURL="http://127.0.0.1:19999"
+
+netdata_create_directory $OUTDIR
+netdata_create_directory $OUTEDIR
+netdata_create_directory $OUTOPTDIR
+netdata_create_directory $ERRDIR
+
+wget --execute="robots = off" --mirror --convert-links --no-parent http://127.0.0.1:19999
+TEST=$?
+if [ $TEST -ne "0" ] ; then
+    echo "Cannot connect to Netdata"
+    exit 1
+fi
+
+netdata_download_various $MURL "netdata.conf" "netdata.conf"
+
+netdata_download_various_with_options $MURL "netdata.conf" "netdata.conf"
+
+netdata_wrong_request_various $MURL "api/v15/info?this%20could%20not%20be%20here" "err_version"
+
+netdata_wrong_request_various $MURL "api/v1/\(*@&$\!$%%5E\)\!$*%&\)\!$*%%5E*\!%5E%\!%5E$%\!%5E%\(\!*%5E*%5E%\(*@&$%5E%\(\!%5E#*&\!^#$*&\!^%\)@\($%^\)\!*&^\(\!*&^#$&#$\)\!$%^\)\!$*%&\)#$\!^#*$^\!\(*#^#\)\!%^\!\)$*%&\!\(*&$\!^#$*&^\!*#^$\!*^\)%\(\!*&$%\)\(\!&#$\!^*#&$^\!*^%\)\!$%\)\!\(&#$\!^#*&^$" "err_version2"
+
+netdata_download_various $MURL "api/v1/info" "info"
+netdata_download_various_with_options $MURL "api/v1/info" "info"
+netdata_download_various $MURL "api/v1/info?this%20could%20not%20be%20here" "err_info"
+
+netdata_print_header "Getting all the netdata charts"
+CHARTS=$( netdata_download_charts "http://127.0.0.1:19999" "api/v1/charts" )
+WCHARTS=$( netdata_download_charts "http://127.0.0.1:19999" "api/v1/charts?this%20could%20not%20be%20here" )
+WCHARTS2=$( netdata_download_charts "http://127.0.0.1:19999" "api/v1/charts%3fthis%20could%20not%20be%20here" )
+
+if [ ${#CHARTS[@]} -ne ${#WCHARTS[@]} ]; then
+    echo "The number of charts does not match with division not encoded.";
+    exit 2;
+elif [ ${#CHARTS[@]} -ne ${#WCHARTS2[@]} ]; then
+    echo "The number of charts does not match when everything is encoded";
+    exit 3;
+fi
+
+netdata_wrong_request_various $MURL "api/v1/chart" "err_chart_without_chart"
+netdata_wrong_request_various $MURL "api/v1/chart?_=234231424242" "err_chart_arg"
+
+netdata_download_various $MURL "api/v1/chart?chart=cpu.cpu0_interrupts&_=234231424242" "chart_cpu_with_more_args"
+netdata_download_various_with_options $MURL "api/v1/chart?chart=cpu.cpu0_interrupts&_=234231424242" "chart_cpu_with_more_args"
+
+netdata_download_various $MURL "api/v1/chart%3Fchart=cpu.cpu0_interrupts&_=234231424242" "chart_cpu_with_more_args_encoded"
+netdata_download_various_with_options $MURL "api/v1/chart%3Fchart=cpu.cpu0_interrupts&_=234231424242" "chart_cpu_with_more_args_encoded"
+netdata_download_various $MURL "api/v1/chart%3Fchart=cpu.cpu0_interrupts%26_=234231424242" "chart_cpu_with_more_args_encoded2"
+netdata_download_various $MURL "api/v1/chart%3Fchart%3Dcpu.cpu0_interrupts%26_%3D234231424242" "chart_cpu_with_more_args_encoded3"
+
+netdata_create_directory "$OUTDIR/chart"
+for I in $CHARTS ; do
+	NAME=${I//\"/}
+    netdata_download_various $MURL "api/v1/chart?chart=$NAME"  "chart/$NAME"
+done
+
+netdata_wrong_request_various $MURL "api/v1/alarm_variables" "err_alarm_variables_without_chart"
+netdata_wrong_request_various $MURL "api/v1/alarm_variables?_=234231424242" "err_alarm_variables_arg"
+netdata_download_various $MURL "api/v1/alarm_variables?chart=cpu.cpu0_interrupts&_=234231424242" "alarm_cpu_with_more_args"
+
+netdata_create_directory "$OUTDIR/alarm_variables"
+for I in $CHARTS ; do
+	NAME=${I//\"/}
+    netdata_download_various $MURL "api/v1/alarm_variables?chart=$NAME"  "alarm_variables/$NAME"
+done
+
+netdata_create_directory "$OUTDIR/badge"
+netdata_create_directory "$OUTEDIR/badge"
+for I in $CHARTS ; do
+    netdata_download_chart $MURL "api/v1/badge.svg?chart" "$I" "badge"
+done
+
+netdata_create_directory "$OUTDIR/allmetrics"
+netdata_download_allmetrics $MURL "allmetrics"
+
+netdata_download_various $MURL "api/v1/alarms?all"  "alarms_all"
+netdata_download_various $MURL "api/v1/alarms?active"  "alarms_active"
+netdata_download_various $MURL "api/v1/alarms"  "alarms_nothing"
+
+netdata_download_various $MURL "api/v1/alarm_log?after"  "alarm_without"
+netdata_download_various $MURL "api/v1/alarm_log"  "alarm_nothing"
+netdata_download_various $MURL "api/v1/alarm_log?after&_=$PT"  "alarm_log"
+
+netdata_create_directory "$OUTDIR/data"
+netdata_create_directory "$OUTEDIR/data"
+for I in $CHARTS ; do
+    netdata_download_chart $MURL "api/v1/data?chart" "$I" "data"
+    break;
+done
+
+#http://arch-esxi:19999/api/v1/(*@&$!$%%5E)!$*%&)!$*%%5E*!%5E%!%5E$%!%5E%(!*%5E*%5E%(*@&$%5E%(!%5E#*&!^#$*&!^%)@($%^)!*&^(!*&^#$&#$)!$%^)!$*%&)#$!^#*$^!(*#^#)!%^!)$*%&!(*&$!^#$*&^!*#^$!*^)%(!*&$%)(!&#$!^*#&$^!*^%)!$%)!(&#$!^#*&^$
+
+WHITE='\033[0;37m'
+echo -e "${WHITE}ALL the URLS got 200 as answer!"
+
+exit 0
diff --git a/web/api/health/health_cmdapi.c b/web/api/health/health_cmdapi.c
index 468054c67f..94293dbe68 100644
--- a/web/api/health/health_cmdapi.c
+++ b/web/api/health/health_cmdapi.c
@@ -179,6 +179,7 @@ int web_client_api_request_v1_mgmt_health(RRDHOST *host, struct web_client *w, c
                     silencer = health_silencers_addparam(silencer, key, value);
                 }
             }
+
             if (likely(silencer)) {
                 health_silencers_add(silencer);
                 buffer_strcat(wb, HEALTH_CMDAPI_MSG_ADDED);
diff --git a/web/api/web_api_v1.c b/web/api/web_api_v1.c
index 7c0d728bf8..2273224bb1 100644
--- a/web/api/web_api_v1.c
+++ b/web/api/web_api_v1.c
@@ -797,23 +797,23 @@ inline int web_client_api_request_v1(RRDHOST *host, struct web_client *w, char *
     }
 
     // get the command
-    char *tok = mystrsep(&url, "?");
-    if(tok && *tok) {
-        debug(D_WEB_CLIENT, "%llu: Searching for API v1 command '%s'.", w->id, tok);
-        uint32_t hash = simple_hash(tok);
+    if(url) {
+        debug(D_WEB_CLIENT, "%llu: Searching for API v1 command '%s'.", w->id, url);
+        uint32_t hash = simple_hash(url);
 
         for(i = 0; api_commands[i].command ;i++) {
-            if(unlikely(hash == api_commands[i].hash && !strcmp(tok, api_commands[i].command))) {
+            if(unlikely(hash == api_commands[i].hash && !strcmp(url, api_commands[i].command))) {
                 if(unlikely(api_commands[i].acl != WEB_CLIENT_ACL_NOCHECK) &&  !(w->acl & api_commands[i].acl))
                     return web_client_permission_denied(w);
 
-                return api_commands[i].callback(host, w, url);
+                //return api_commands[i].callback(host, w, url);
+                return api_commands[i].callback(host, w, (w->decoded_query_string + 1));
             }
         }
 
         buffer_flush(w->response.data);
         buffer_strcat(w->response.data, "Unsupported v1 API command: ");
-        buffer_strcat_htmlescape(w->response.data, tok);
+        buffer_strcat_htmlescape(w->response.data, url);
         return 404;
     }
     else {
diff --git a/web/server/web_client.c b/web/server/web_client.c
index 20745d8dd4..e8566ce991 100644
--- a/web/server/web_client.c
+++ b/web/server/web_client.c
@@ -772,7 +772,6 @@ static inline char *http_header_parse(struct web_client *w, char *s, int parse_u
     // terminate the value
     *ve = '\0';
 
-    // fprintf(stderr, "HEADER: '%s' = '%s'\n", s, v);
     uint32_t hash = simple_uhash(s);
 
     if(hash == hash_origin && !strcasecmp(s, "Origin"))
@@ -812,66 +811,31 @@ static inline char *http_header_parse(struct web_client *w, char *s, int parse_u
     return ve;
 }
 
-// http_request_validate()
-// returns:
-// = 0 : all good, process the request
-// > 0 : request is not supported
-// < 0 : request is incomplete - wait for more data
-
-typedef enum {
-    HTTP_VALIDATION_OK,
-    HTTP_VALIDATION_NOT_SUPPORTED,
-    HTTP_VALIDATION_MALFORMED_URL,
-#ifdef ENABLE_HTTPS
-    HTTP_VALIDATION_INCOMPLETE,
-    HTTP_VALIDATION_REDIRECT
-#else
-    HTTP_VALIDATION_INCOMPLETE
-#endif
-} HTTP_VALIDATION;
-
-static inline HTTP_VALIDATION http_request_validate(struct web_client *w) {
-    char *s = (char *)buffer_tostring(w->response.data), *encoded_url = NULL;
-
-    size_t last_pos = w->header_parse_last_size;
-    if(last_pos > 4) last_pos -= 4; // allow searching for \r\n\r\n
-    else last_pos = 0;
-
-    w->header_parse_tries++;
-    w->header_parse_last_size = buffer_strlen(w->response.data);
-
-    if(w->header_parse_tries > 1) {
-        if(w->header_parse_last_size < last_pos)
-            last_pos = 0;
-
-        if(strstr(&s[last_pos], "\r\n\r\n") == NULL) {
-            if(w->header_parse_tries > 10) {
-                info("Disabling slow client after %zu attempts to read the request (%zu bytes received)", w->header_parse_tries, buffer_strlen(w->response.data));
-                w->header_parse_tries = 0;
-                w->header_parse_last_size = 0;
-                web_client_disable_wait_receive(w);
-                return HTTP_VALIDATION_NOT_SUPPORTED;
-            }
-
-            return HTTP_VALIDATION_INCOMPLETE;
-        }
-    }
-
+/**
+ * Valid Method
+ *
+ * Netdata accepts only three methods, including one of these three(STREAM) is an internal method.
+ *
+ * @param w is the structure with the client request
+ * @param s is the start string to parse
+ *
+ * @return it returns the next address to parse case the method is valid and NULL otherwise.
+ */
+static inline char *web_client_valid_method(struct web_client *w, char *s) {
     // is is a valid request?
     if(!strncmp(s, "GET ", 4)) {
-        encoded_url = s = &s[4];
+        s = &s[4];
         w->mode = WEB_CLIENT_MODE_NORMAL;
     }
     else if(!strncmp(s, "OPTIONS ", 8)) {
-        encoded_url = s = &s[8];
+        s = &s[8];
         w->mode = WEB_CLIENT_MODE_OPTIONS;
     }
     else if(!strncmp(s, "STREAM ", 7)) {
+        s = &s[7];
+
 #ifdef ENABLE_HTTPS
         if ( (w->ssl.flags) && (netdata_use_ssl_on_stream & NETDATA_SSL_FORCE)){
-            w->header_parse_tries = 0;
-            w->header_parse_last_size = 0;
-            web_client_disable_wait_receive(w);
             char hostname[256];
             char *copyme = strstr(s,"hostname=");
             if ( copyme ){
@@ -892,29 +856,150 @@ static inline HTTP_VALIDATION http_request_validate(struct web_client *w) {
                 hostname[13] = 0x00;
             }
             error("The server is configured to always use encrypt connection, please enable the SSL on slave with hostname '%s'.",hostname);
-            return HTTP_VALIDATION_NOT_SUPPORTED;
+            s = NULL;
         }
 #endif
 
-        encoded_url = s = &s[7];
         w->mode = WEB_CLIENT_MODE_STREAM;
     }
     else {
+        s = NULL;
+    }
+
+    return s;
+}
+
+/**
+ * Set Path Query
+ *
+ * Set the pointers to the path and query string according to the input.
+ *
+ * @param w is the structure with the client request
+ * @param s is the first address of the string.
+ * @param ptr is the address of the separator.
+ */
+static void web_client_set_path_query(struct web_client *w, char *s, char *ptr) {
+    w->url_path_length = (size_t)(ptr -s);
+
+    w->url_search_path = ptr;
+}
+
+/**
+ * Split path query
+ *
+ * Do the separation between path and query string
+ *
+ * @param w is the structure with the client request
+ * @param s is the string to parse
+ */
+void web_client_split_path_query(struct web_client *w, char *s) {
+    //I am assuming here that the separator character(?) is not encoded
+    char *ptr = strchr(s, '?');
+    if(ptr) {
+        w->separator = '?';
+        web_client_set_path_query(w, s, ptr);
+        return;
+    }
+
+    //Here I test the second possibility, the URL is completely encoded by the user.
+    //I am not using the strcasestr, because it is fastest to check %3f and compare
+    //the next character.
+    //We executed some tests with "encodeURI(uri);" described in https://www.w3schools.com/jsref/jsref_encodeuri.asp
+    //on July 1st, 2019, that show us that URLs won't have '?','=' and '&' encoded, but we decided to move in front
+    //with the next part, because users can develop their own encoded that won't follow this rule.
+    char *moveme = s;
+    while (moveme) {
+        ptr = strchr(moveme, '%');
+        if(ptr) {
+            char *test = (ptr+1);
+            if (!strncmp(test, "3f", 2) || !strncmp(test, "3F", 2)) {
+                w->separator = *ptr;
+                web_client_set_path_query(w, s, ptr);
+                return;
+            }
+            ptr++;
+        }
+
+        moveme = ptr;
+    }
+
+    w->separator = 0x00;
+    w->url_path_length = strlen(s);
+    w->url_search_path = NULL;
+}
+
+/**
+ * Request validate
+ *
+ * @param w is the structure with the client request
+ *
+ * @return It returns HTTP_VALIDATION_OK on success and another code present
+ *          in the enum HTTP_VALIDATION otherwise.
+ */
+static inline HTTP_VALIDATION http_request_validate(struct web_client *w) {
+    char *s = (char *)buffer_tostring(w->response.data), *encoded_url = NULL;
+
+    size_t last_pos = w->header_parse_last_size;
+
+    w->header_parse_tries++;
+    w->header_parse_last_size = buffer_strlen(w->response.data);
+
+    int is_it_valid;
+    if(w->header_parse_tries > 1) {
+        if(last_pos > 4) last_pos -= 4; // allow searching for \r\n\r\n
+        else last_pos = 0;
+
+        if(w->header_parse_last_size < last_pos)
+            last_pos = 0;
+
+        is_it_valid = url_is_request_complete(s, &s[last_pos], w->header_parse_last_size);
+        if(!is_it_valid) {
+            if(w->header_parse_tries > 10) {
+                info("Disabling slow client after %zu attempts to read the request (%zu bytes received)", w->header_parse_tries, buffer_strlen(w->response.data));
+                w->header_parse_tries = 0;
+                w->header_parse_last_size = 0;
+                web_client_disable_wait_receive(w);
+                return HTTP_VALIDATION_NOT_SUPPORTED;
+            }
+
+            return HTTP_VALIDATION_INCOMPLETE;
+        }
+
+        is_it_valid = 1;
+    } else {
+        last_pos = w->header_parse_last_size;
+        is_it_valid = url_is_request_complete(s, &s[last_pos], w->header_parse_last_size);
+    }
+
+    s = web_client_valid_method(w, s);
+    if (!s) {
         w->header_parse_tries = 0;
         w->header_parse_last_size = 0;
         web_client_disable_wait_receive(w);
+
         return HTTP_VALIDATION_NOT_SUPPORTED;
+    } else if (!is_it_valid) {
+        //Invalid request, we have more data after the end of message
+        char *check = strstr((char *)buffer_tostring(w->response.data), "\r\n\r\n");
+        if(check) {
+            check += 4;
+            if (*check) {
+                w->header_parse_tries = 0;
+                w->header_parse_last_size = 0;
+                web_client_disable_wait_receive(w);
+                return HTTP_VALIDATION_NOT_SUPPORTED;
+            }
+        }
+
+        web_client_enable_wait_receive(w);
+        return HTTP_VALIDATION_INCOMPLETE;
     }
 
-    // find the SPACE + "HTTP/"
-    while(*s) {
author	thiagoftsm <thiagoftsm@gmail.com>	2019-07-25 12:30:00 +0000
committer	GitHub <noreply@github.com>	2019-07-25 12:30:00 +0000
commit	3076cfe5d455b8007e4f90776e9ea3d05faf1a7e (patch)
tree	ccf4590bcbc52f4011560daca4e77214e5d6e077
parent	b74cc9af0707957c9f7d252eae8fd20c9b091aff (diff)