summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLemonBoy <thatlemon@gmail.com>2022-04-05 15:07:32 +0100
committerBram Moolenaar <Bram@vim.org>2022-04-05 15:07:32 +0100
commitbeb0ef1ab2dbd9760345e3e03647b93914591d56 (patch)
tree6c210b9fc90f48c2b92a65ac5b0e4d98bbd007a7
parent02560424bf838cadc8c19294af6b6b6c383ab291 (diff)
patch 8.2.4695: JSON encoding could be fasterv8.2.4695
Problem: JSON encoding could be faster. Solution: Optimize encoding JSON strings. (closes #10086)
-rw-r--r--src/json.c123
-rw-r--r--src/testdir/test_json.vim3
-rw-r--r--src/version.c2
3 files changed, 93 insertions, 35 deletions
diff --git a/src/json.c b/src/json.c
index 5be8f7fe66..47bf9904a3 100644
--- a/src/json.c
+++ b/src/json.c
@@ -114,37 +114,72 @@ json_encode_lsp_msg(typval_T *val)
}
#endif
+/*
+ * Lookup table to quickly know if the given ASCII character must be escaped.
+ */
+static const char ascii_needs_escape[128] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x0.
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x1.
+ 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x2.
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x3.
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4.
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // 0x5.
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x6.
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7.
+};
+
+/*
+ * Encode the utf-8 encoded string "str" into "gap".
+ */
static void
write_string(garray_T *gap, char_u *str)
{
char_u *res = str;
char_u numbuf[NUMBUFLEN];
+ char_u *from;
+#if defined(USE_ICONV)
+ vimconv_T conv;
+ char_u *converted = NULL;
+#endif
+ int c;
if (res == NULL)
- ga_concat(gap, (char_u *)"\"\"");
- else
{
-#if defined(USE_ICONV)
- vimconv_T conv;
- char_u *converted = NULL;
+ ga_concat(gap, (char_u *)"\"\"");
+ return;
+ }
- if (!enc_utf8)
- {
- // Convert the text from 'encoding' to utf-8, the JSON string is
- // always utf-8.
- conv.vc_type = CONV_NONE;
- convert_setup(&conv, p_enc, (char_u*)"utf-8");
- if (conv.vc_type != CONV_NONE)
- converted = res = string_convert(&conv, res, NULL);
- convert_setup(&conv, NULL, NULL);
- }
+#if defined(USE_ICONV)
+ if (!enc_utf8)
+ {
+ // Convert the text from 'encoding' to utf-8, because a JSON string is
+ // always utf-8.
+ conv.vc_type = CONV_NONE;
+ convert_setup(&conv, p_enc, (char_u*)"utf-8");
+ if (conv.vc_type != CONV_NONE)
+ converted = res = string_convert(&conv, res, NULL);
+ convert_setup(&conv, NULL, NULL);
+ }
#endif
- ga_append(gap, '"');
- while (*res != NUL)
+ ga_append(gap, '"');
+ // `from` is the beginning of a sequence of bytes we can directly copy from
+ // the input string, avoiding the overhead associated to decoding/encoding
+ // them.
+ from = res;
+ while ((c = *res) != NUL)
+ {
+ // always use utf-8 encoding, ignore 'encoding'
+ if (c < 0x80)
{
- int c;
- // always use utf-8 encoding, ignore 'encoding'
- c = utf_ptr2char(res);
+ if (!ascii_needs_escape[c])
+ {
+ res += 1;
+ continue;
+ }
+
+ if (res != from)
+ ga_concat_len(gap, from, res - from);
+ from = res + 1;
switch (c)
{
@@ -164,25 +199,43 @@ write_string(garray_T *gap, char_u *str)
ga_append(gap, c);
break;
default:
- if (c >= 0x20)
- {
- numbuf[utf_char2bytes(c, numbuf)] = NUL;
- ga_concat(gap, numbuf);
- }
- else
- {
- vim_snprintf((char *)numbuf, NUMBUFLEN,
- "\\u%04lx", (long)c);
- ga_concat(gap, numbuf);
- }
+ vim_snprintf((char *)numbuf, NUMBUFLEN, "\\u%04lx",
+ (long)c);
+ ga_concat(gap, numbuf);
+ }
+
+ res += 1;
+ }
+ else
+ {
+ int l = utf_ptr2len(res);
+
+ if (l > 1)
+ {
+ res += l;
+ continue;
}
- res += utf_ptr2len(res);
+
+ // Invalid utf-8 sequence, replace it with the Unicode replacement
+ // character U+FFFD.
+ if (res != from)
+ ga_concat_len(gap, from, res - from);
+ from = res + 1;
+
+ numbuf[utf_char2bytes(0xFFFD, numbuf)] = NUL;
+ ga_concat(gap, numbuf);
+
+ res += l;
}
- ga_append(gap, '"');
+ }
+
+ if (res != from)
+ ga_concat_len(gap, from, res - from);
+
+ ga_append(gap, '"');
#if defined(USE_ICONV)
- vim_free(converted);
+ vim_free(converted);
#endif
- }
}
/*
diff --git a/src/testdir/test_json.vim b/src/testdir/test_json.vim
index 0248aa9ecc..3ee7837033 100644
--- a/src/testdir/test_json.vim
+++ b/src/testdir/test_json.vim
@@ -107,6 +107,9 @@ func Test_json_encode()
call assert_equal('"café"', json_encode("caf\xe9"))
let &encoding = save_encoding
+ " Invalid utf-8 sequences are replaced with U+FFFD (replacement character)
+ call assert_equal('"foo' . "\ufffd" . '"', json_encode("foo\xAB"))
+
call assert_fails('echo json_encode(function("tr"))', 'E1161: Cannot json encode a func')
call assert_fails('echo json_encode([function("tr")])', 'E1161: Cannot json encode a func')
diff --git a/src/version.c b/src/version.c
index 3e220f581f..f4d300b39e 100644
--- a/src/version.c
+++ b/src/version.c
@@ -747,6 +747,8 @@ static char *(features[]) =
static int included_patches[] =
{ /* Add new patch number below this line */
/**/
+ 4695,
+/**/
4694,
/**/
4693,