Url parser refactoring (#6247)

* URL_parser_review comments 1 * URL_parser_review restoring web_client.c * URL_parser_review restoring url.h * URL_parser_review restoring web_client.h * URL_parser_review restoring inlined.h * URL_parser_review restoring various * URL_parser_review commenting! * URL_parser_review last checks! * URL_parser_review registry! * URL_parser_review codacy errors! * URL_parser_review codacy errors 2! * URL_parser_review end of request! * URL_parser_review * URL_parser_review format fix * URL_parser_review restoring * URL_parser_review stopped at 5! * URL_parser_review formatting! * URL_parser_review: Started the map of the query string when it is necessary * URL_parser_review: With these adjusts in the URL library we are now able to parser all the escape characters! * URL_parser_review: code review Fixes problems and format asked by coworkers! * URL_parser_review: adjust script The script was not 100% according the shellcheck specifications, no less important it was a direct script instead a .in file * sslstream: Rebase 2 It was necessary to change a function due the UTF-8 * sslstream: Fixing 6426 We had a cast error introduced by other PR, so I am fixing here * URL_parser_review Change .gitignore to avoid considering a script file.
author: thiagoftsm <thiagoftsm@gmail.com> 2019-07-25 12:30:00 +0000
committer: GitHub <noreply@github.com> 2019-07-25 12:30:00 +0000
commit: 3076cfe5d455b8007e4f90776e9ea3d05faf1a7e (patch)
tree: ccf4590bcbc52f4011560daca4e77214e5d6e077 /libnetdata
parent: b74cc9af0707957c9f7d252eae8fd20c9b091aff (diff)
2 files changed, 201 insertions, 8 deletions
diff --git a/libnetdata/url/url.c b/libnetdata/url/url.c
index 1929d6686f..7df9faaf02 100644
--- a/libnetdata/url/url.c
+++ b/libnetdata/url/url.c
@@ -43,8 +43,16 @@ char *url_encode(char *str) {
     return pbuf;
 }
 
-/* Returns a url-decoded version of str */
-/* IMPORTANT: be sure to free() the returned string after use */
+/**
+ * URL Decode
+ *
+ * Returns a url-decoded version of str
+ * IMPORTANT: be sure to free() the returned string after use
+ *
+ * @param str the string that will be decode
+ *
+ * @return a pointer for the url decoded.
+ */
 char *url_decode(char *str) {
     size_t size = strlen(str) + 1;
 
@@ -52,14 +60,30 @@ char *url_decode(char *str) {
     return url_decode_r(buf, str, size);
 }
 
-//decode %XX character or return 0 if cannot
+/**
+ *  Percentage escape decode
+ *
+ *  Decode %XX character or return 0 if cannot
+ *
+ *  @param s the string to decode
+ *
+ *  @return The character decoded on success and 0 otherwise
+ */
 char url_percent_escape_decode(char *s) {
     if(likely(s[1] && s[2]))
         return from_hex(s[1]) << 4 | from_hex(s[2]);
     return 0;
 }
 
-//this (utf8 string related) should be moved in separate file in future
+/**
+ * Get byte length
+ *
+ * This (utf8 string related) should be moved in separate file in future
+ *
+ * @param c is the utf8 character
+ *  *
+ * @return It reurns the length of the specific character.
+ */
 char url_utf8_get_byte_length(char c) {
     if(!IS_UTF8_BYTE(c))
         return 1;
@@ -77,8 +101,17 @@ char url_utf8_get_byte_length(char c) {
     return length;
 }
 
-//decode % encoded UTF-8 characters and copy them to *d
-//return count of bytes written to *d
+/**
+ * Decode Multibyte UTF8
+ *
+ * Decode % encoded UTF-8 characters and copy them to *d
+ *
+ * @param s first address
+ * @param d
+ * @param d_end last address
+ *
+ * @return count of bytes written to *d
+ */
 char url_decode_multibyte_utf8(char *s, char *d, char *d_end) {
     char first_byte = url_percent_escape_decode(s);
 
@@ -122,7 +155,6 @@ char url_decode_multibyte_utf8(char *s, char *d, char *d_end) {
  * Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/> -- 2005-03-30
  * License: http://www.cl.cam.ac.uk/~mgk25/short-license.html
  */
-
 unsigned char *utf8_check(unsigned char *s)
 {
     while (*s)
@@ -208,7 +240,7 @@ char *url_decode_r(char *to, char *url, size_t size) {
 
     *d = '\0';
 
-    if(unlikely( utf8_check(to) )) //NULL means sucess here
+    if(unlikely( utf8_check((unsigned  char *)to) )) //NULL means sucess here
         return NULL;
 
     return to;
@@ -217,3 +249,157 @@ fail_cleanup:
     *d = '\0';
     return NULL;
 }
+
+/**
+ * Is request complete?
+ *
+ * Check whether the request is complete.
+ * This function cannot check all the requests METHODS, for example, case you are working with POST, it will fail.
+ *
+ * @param begin is the first character of the sequence to analyse.
+ * @param end is the last character of the sequence
+ * @param length is the length of the total of bytes read, it is not the difference between end and begin.
+ *
+ * @return It returns 1 when the request is complete and 0 otherwise.
+ */
+inline int url_is_request_complete(char *begin, char *end, size_t length) {
+
+    if ( begin == end) {
+        //Message cannot be complete when first and last address are the same
+        return 0;
+    }
+
+    //This math to verify  the last is valid, because we are discarding the POST
+    if (length > 4) {
+        begin = end - 4;
+    }
+
+    return (strstr(begin, "\r\n\r\n"))?1:0;
+}
+
+/**
+ * Find protocol
+ *
+ * Search for the string ' HTTP/' in the message given.
+ *
+ * @param s is the start of the user request.
+ * @return
+ */
+inline char *url_find_protocol(char *s) {
+    while(*s) {
+        // find the next space
+        while (*s && *s != ' ') s++;
+
+        // is it SPACE + "HTTP/" ?
+        if(*s && !strncmp(s, " HTTP/", 6)) break;
+        else s++;
+    }
+
+    return s;
+}
+
+/**
+ * Map query string
+ *
+ * Map the query string fields that will be decoded.
+ * This functions must be called after to check the presence of query strings,
+ * here we are assuming that you already tested this.
+ *
+ * @param out the pointer to pointers that will be used to map
+ * @param url the input url that we are decoding.
+ *
+ * @return It returns the number of total variables in the query string.
+ */
+int url_map_query_string(char **out, char *url) {
+    (void)out;
+    (void)url;
+    int count = 0;
+
+    //First we try to parse considering that there was not URL encode process
+    char *moveme = url;
+    char *ptr;
+
+    //We always we have at least one here, so I can set this.
+    out[count++] = moveme;
+    while(moveme) {
+        ptr = strchr((moveme+1), '&');
+        if(ptr) {
+            out[count++] = ptr;
+        }
+
+        moveme = ptr;
+    }
+
+    //I could not find any '&', so I am assuming now it is like '%26'
+    if (count == 1) {
+        moveme = url;
+        while(moveme) {
+            ptr = strchr((moveme+1), '%');
+            if(ptr) {
+                char *test = (ptr+1);
+                if (!strncmp(test, "3f", 2) || !strncmp(test, "3F", 2)) {
+                    out[count++] = ptr;
+                }
+            }
+            moveme = ptr;
+        }
+    }
+
+    return count;
+}
+
+/**
+ * Parse query string
+ *
+ * Parse the query string mapped and store it inside output.
+ *
+ * @param output is a vector where I will store the string.
+ * @param max is the maximum length of the output
+ * @param map the map done by the function url_map_query_string.
+ * @param total the total number of variables inside map
+ *
+ * @return It returns 0 on success and -1 otherwise
+ */
+int url_parse_query_string(char *output, size_t max, char **map, int total) {
+    if(!total) {
+        return 0;
+    }
+
+    int counter, next;
+    size_t length;
+    char *end;
+    char *begin = map[0];
+    char save;
+    size_t copied = 0;
+    for(counter = 0, next=1 ; next <= total ; ++counter, ++next) {
+        if (next != total) {
+            end = map[next];
+            length = (size_t) (end - begin);
+            save = *end;
+            *end = 0x00;
+        } else {
+            length = strlen(begin);
+            end = NULL;
+        }
+        length++;
+
+        if (length > (max - copied)) {
+            error("Parsing query string: we cannot parse a query string so big");
+            break;
+        }
+
+        if(!url_decode_r(output, begin, length)) {
+            return -1;
+        }
+        length = strlen(output);
+        copied += length;
+        output += length;
+
+        begin = end;
+        if (begin) {
+            *begin = save;
+        }
+    }
+
+    return 0;
+}
diff --git a/libnetdata/url/url.h b/libnetdata/url/url.h
index 6cef6d7a84..10f3fe1763 100644
--- a/libnetdata/url/url.h
+++ b/libnetdata/url/url.h
@@ -25,4 +25,11 @@ extern char *url_decode(char *str);
 
 extern char *url_decode_r(char *to, char *url, size_t size);
 
+#define WEB_FIELDS_MAX 400
+extern int url_map_query_string(char **out, char *url);
+extern int url_parse_query_string(char *output, size_t max, char **map, int total);
+
+extern int url_is_request_complete(char *begin,char *end,size_t length);
+extern char *url_find_protocol(char *s);
+
 #endif /* NETDATA_URL_H */
author	thiagoftsm <thiagoftsm@gmail.com>	2019-07-25 12:30:00 +0000
committer	GitHub <noreply@github.com>	2019-07-25 12:30:00 +0000
commit	3076cfe5d455b8007e4f90776e9ea3d05faf1a7e (patch)
tree	ccf4590bcbc52f4011560daca4e77214e5d6e077 /libnetdata
parent	b74cc9af0707957c9f7d252eae8fd20c9b091aff (diff)