diff options
author | LemonBoy <thatlemon@gmail.com> | 2022-04-05 15:07:32 +0100 |
---|---|---|
committer | Bram Moolenaar <Bram@vim.org> | 2022-04-05 15:07:32 +0100 |
commit | beb0ef1ab2dbd9760345e3e03647b93914591d56 (patch) | |
tree | 6c210b9fc90f48c2b92a65ac5b0e4d98bbd007a7 /src/json.c | |
parent | 02560424bf838cadc8c19294af6b6b6c383ab291 (diff) |
patch 8.2.4695: JSON encoding could be fasterv8.2.4695
Problem: JSON encoding could be faster.
Solution: Optimize encoding JSON strings. (closes #10086)
Diffstat (limited to 'src/json.c')
-rw-r--r-- | src/json.c | 123 |
1 files changed, 88 insertions, 35 deletions
diff --git a/src/json.c b/src/json.c index 5be8f7fe66..47bf9904a3 100644 --- a/src/json.c +++ b/src/json.c @@ -114,37 +114,72 @@ json_encode_lsp_msg(typval_T *val) } #endif +/* + * Lookup table to quickly know if the given ASCII character must be escaped. + */ +static const char ascii_needs_escape[128] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x0. + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x1. + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x2. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x3. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // 0x5. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x6. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. +}; + +/* + * Encode the utf-8 encoded string "str" into "gap". + */ static void write_string(garray_T *gap, char_u *str) { char_u *res = str; char_u numbuf[NUMBUFLEN]; + char_u *from; +#if defined(USE_ICONV) + vimconv_T conv; + char_u *converted = NULL; +#endif + int c; if (res == NULL) - ga_concat(gap, (char_u *)"\"\""); - else { -#if defined(USE_ICONV) - vimconv_T conv; - char_u *converted = NULL; + ga_concat(gap, (char_u *)"\"\""); + return; + } - if (!enc_utf8) - { - // Convert the text from 'encoding' to utf-8, the JSON string is - // always utf-8. - conv.vc_type = CONV_NONE; - convert_setup(&conv, p_enc, (char_u*)"utf-8"); - if (conv.vc_type != CONV_NONE) - converted = res = string_convert(&conv, res, NULL); - convert_setup(&conv, NULL, NULL); - } +#if defined(USE_ICONV) + if (!enc_utf8) + { + // Convert the text from 'encoding' to utf-8, because a JSON string is + // always utf-8. + conv.vc_type = CONV_NONE; + convert_setup(&conv, p_enc, (char_u*)"utf-8"); + if (conv.vc_type != CONV_NONE) + converted = res = string_convert(&conv, res, NULL); + convert_setup(&conv, NULL, NULL); + } #endif - ga_append(gap, '"'); - while (*res != NUL) + ga_append(gap, '"'); + // `from` is the beginning of a sequence of bytes we can directly copy from + // the input string, avoiding the overhead associated to decoding/encoding + // them. + from = res; + while ((c = *res) != NUL) + { + // always use utf-8 encoding, ignore 'encoding' + if (c < 0x80) { - int c; - // always use utf-8 encoding, ignore 'encoding' - c = utf_ptr2char(res); + if (!ascii_needs_escape[c]) + { + res += 1; + continue; + } + + if (res != from) + ga_concat_len(gap, from, res - from); + from = res + 1; switch (c) { @@ -164,25 +199,43 @@ write_string(garray_T *gap, char_u *str) ga_append(gap, c); break; default: - if (c >= 0x20) - { - numbuf[utf_char2bytes(c, numbuf)] = NUL; - ga_concat(gap, numbuf); - } - else - { - vim_snprintf((char *)numbuf, NUMBUFLEN, - "\\u%04lx", (long)c); - ga_concat(gap, numbuf); - } + vim_snprintf((char *)numbuf, NUMBUFLEN, "\\u%04lx", + (long)c); + ga_concat(gap, numbuf); + } + + res += 1; + } + else + { + int l = utf_ptr2len(res); + + if (l > 1) + { + res += l; + continue; } - res += utf_ptr2len(res); + + // Invalid utf-8 sequence, replace it with the Unicode replacement + // character U+FFFD. + if (res != from) + ga_concat_len(gap, from, res - from); + from = res + 1; + + numbuf[utf_char2bytes(0xFFFD, numbuf)] = NUL; + ga_concat(gap, numbuf); + + res += l; } - ga_append(gap, '"'); + } + + if (res != from) + ga_concat_len(gap, from, res - from); + + ga_append(gap, '"'); #if defined(USE_ICONV) - vim_free(converted); + vim_free(converted); #endif - } } /* |