patch 8.2.4695: JSON encoding could be fasterv8.2.4695

Problem: JSON encoding could be faster. Solution: Optimize encoding JSON strings. (closes #10086)
author: LemonBoy <thatlemon@gmail.com> 2022-04-05 15:07:32 +0100
committer: Bram Moolenaar <Bram@vim.org> 2022-04-05 15:07:32 +0100
commit: beb0ef1ab2dbd9760345e3e03647b93914591d56 (patch)
tree: 6c210b9fc90f48c2b92a65ac5b0e4d98bbd007a7 /src/json.c
parent: 02560424bf838cadc8c19294af6b6b6c383ab291 (diff)
1 files changed, 88 insertions, 35 deletions
diff --git a/src/json.c b/src/json.c
index 5be8f7fe66..47bf9904a3 100644
--- a/src/json.c
+++ b/src/json.c
@@ -114,37 +114,72 @@ json_encode_lsp_msg(typval_T *val)
 }
 #endif
 
+/*
+ * Lookup table to quickly know if the given ASCII character must be escaped.
+ */
+static const char ascii_needs_escape[128] = {
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x0.
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x1.
+    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x2.
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x3.
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4.
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // 0x5.
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x6.
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7.
+};
+
+/*
+ * Encode the utf-8 encoded string "str" into "gap".
+ */
     static void
 write_string(garray_T *gap, char_u *str)
 {
     char_u	*res = str;
     char_u	numbuf[NUMBUFLEN];
+    char_u	*from;
+#if defined(USE_ICONV)
+    vimconv_T   conv;
+    char_u	*converted = NULL;
+#endif
+    int		c;
 
     if (res == NULL)
-	ga_concat(gap, (char_u *)"\"\"");
-    else
     {
-#if defined(USE_ICONV)
-	vimconv_T   conv;
-	char_u	    *converted = NULL;
+	ga_concat(gap, (char_u *)"\"\"");
+	return;
+    }
 
-	if (!enc_utf8)
-	{
-	    // Convert the text from 'encoding' to utf-8, the JSON string is
-	    // always utf-8.
-	    conv.vc_type = CONV_NONE;
-	    convert_setup(&conv, p_enc, (char_u*)"utf-8");
-	    if (conv.vc_type != CONV_NONE)
-		converted = res = string_convert(&conv, res, NULL);
-	    convert_setup(&conv, NULL, NULL);
-	}
+#if defined(USE_ICONV)
+    if (!enc_utf8)
+    {
+	// Convert the text from 'encoding' to utf-8, because a JSON string is
+	// always utf-8.
+	conv.vc_type = CONV_NONE;
+	convert_setup(&conv, p_enc, (char_u*)"utf-8");
+	if (conv.vc_type != CONV_NONE)
+	    converted = res = string_convert(&conv, res, NULL);
+	convert_setup(&conv, NULL, NULL);
+    }
 #endif
-	ga_append(gap, '"');
-	while (*res != NUL)
+    ga_append(gap, '"');
+    // `from` is the beginning of a sequence of bytes we can directly copy from
+    // the input string, avoiding the overhead associated to decoding/encoding
+    // them.
+    from = res;
+    while ((c = *res) != NUL)
+    {
+	// always use utf-8 encoding, ignore 'encoding'
+	if (c < 0x80)
 	{
-	    int c;
-	    // always use utf-8 encoding, ignore 'encoding'
-	    c = utf_ptr2char(res);
+	    if (!ascii_needs_escape[c])
+	    {
+		res += 1;
+		continue;
+	    }
+
+	    if (res != from)
+		ga_concat_len(gap, from, res - from);
+	    from = res + 1;
 
 	    switch (c)
 	    {
@@ -164,25 +199,43 @@ write_string(garray_T *gap, char_u *str)
 		    ga_append(gap, c);
 		    break;
 		default:
-		    if (c >= 0x20)
-		    {
-			numbuf[utf_char2bytes(c, numbuf)] = NUL;
-			ga_concat(gap, numbuf);
-		    }
-		    else
-		    {
-			vim_snprintf((char *)numbuf, NUMBUFLEN,
-							 "\\u%04lx", (long)c);
-			ga_concat(gap, numbuf);
-		    }
+		    vim_snprintf((char *)numbuf, NUMBUFLEN, "\\u%04lx",
+								      (long)c);
+		    ga_concat(gap, numbuf);
+	    }
+
+	    res += 1;
+	}
+	else
+	{
+	    int l = utf_ptr2len(res);
+
+	    if (l > 1)
+	    {
+		res += l;
+		continue;
 	    }
-	    res += utf_ptr2len(res);
+
+	    // Invalid utf-8 sequence, replace it with the Unicode replacement
+	    // character U+FFFD.
+	    if (res != from)
+		ga_concat_len(gap, from, res - from);
+	    from = res + 1;
+
+	    numbuf[utf_char2bytes(0xFFFD, numbuf)] = NUL;
+	    ga_concat(gap, numbuf);
+
+	    res += l;
 	}
-	ga_append(gap, '"');
+    }
+
+    if (res != from)
+	ga_concat_len(gap, from, res - from);
+
+    ga_append(gap, '"');
 #if defined(USE_ICONV)
-	vim_free(converted);
+    vim_free(converted);
 #endif
-    }
 }
 
 /*
author	LemonBoy <thatlemon@gmail.com>	2022-04-05 15:07:32 +0100
committer	Bram Moolenaar <Bram@vim.org>	2022-04-05 15:07:32 +0100
commit	beb0ef1ab2dbd9760345e3e03647b93914591d56 (patch)
tree	6c210b9fc90f48c2b92a65ac5b0e4d98bbd007a7 /src/json.c
parent	02560424bf838cadc8c19294af6b6b6c383ab291 (diff)