From 95707037afa1aeae4f3494dc623a721ceed7fc4e Mon Sep 17 00:00:00 2001 From: Yegappan Lakshmanan Date: Wed, 14 Jun 2023 13:10:15 +0100 Subject: patch 9.0.1629: having utf16idx() rounding up is inconvenient Problem: Having utf16idx() rounding up is inconvenient. Solution: Make utf16idx() round down. (Yegappan Lakshmanan, closes #12523) --- runtime/doc/builtin.txt | 4 ++-- src/strings.c | 8 ++++++-- src/testdir/test_functions.vim | 20 ++++++++++---------- src/version.c | 2 ++ 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/runtime/doc/builtin.txt b/runtime/doc/builtin.txt index b14851ca89..1b7c6dcc6e 100644 --- a/runtime/doc/builtin.txt +++ b/runtime/doc/builtin.txt @@ -10127,8 +10127,8 @@ utf16idx({string}, {idx} [, {countcc} [, {charidx}]]) When {charidx} is present and TRUE, {idx} is used as the character index in the String {string} instead of as the byte index. - An {idx} in the middle of a UTF-8 sequence is rounded upwards - to the end of that sequence. + An {idx} in the middle of a UTF-8 sequence is rounded + downwards to the beginning of that sequence. Returns -1 if the arguments are invalid or if there are less than {idx} bytes in {string}. If there are exactly {idx} bytes diff --git a/src/strings.c b/src/strings.c index 9e2c331bf4..e3be3354d3 100644 --- a/src/strings.c +++ b/src/strings.c @@ -1743,8 +1743,10 @@ f_strtrans(typval_T *argvars, typval_T *rettv) /* - * * "utf16idx()" function + * + * Converts a byte or character offset in a string to the corresponding UTF-16 + * code unit offset. */ void f_utf16idx(typval_T *argvars, typval_T *rettv) @@ -1780,6 +1782,7 @@ f_utf16idx(typval_T *argvars, typval_T *rettv) char_u *p; int len; + int utf16idx = 0; for (p = str, len = 0; charidx ? idx >= 0 : p <= str + idx; len++) { if (*p == NUL) @@ -1791,6 +1794,7 @@ f_utf16idx(typval_T *argvars, typval_T *rettv) rettv->vval.v_number = len; return; } + utf16idx = len; int clen = ptr2len(p); int c = (clen > 1) ? utf_ptr2char(p) : *p; if (c > 0xFFFF) @@ -1800,7 +1804,7 @@ f_utf16idx(typval_T *argvars, typval_T *rettv) idx--; } - rettv->vval.v_number = len > 0 ? len - 1 : 0; + rettv->vval.v_number = utf16idx; } /* diff --git a/src/testdir/test_functions.vim b/src/testdir/test_functions.vim index d7e7f923ba..a17c19e179 100644 --- a/src/testdir/test_functions.vim +++ b/src/testdir/test_functions.vim @@ -1518,14 +1518,14 @@ func Test_utf16idx_from_byteidx() " UTF-16 index of a string with four byte characters let str = 'a😊😊b' call assert_equal(0, utf16idx(str, 0)) - call assert_equal(2, utf16idx(str, 1)) - call assert_equal(2, utf16idx(str, 2)) - call assert_equal(2, utf16idx(str, 3)) - call assert_equal(2, utf16idx(str, 4)) - call assert_equal(4, utf16idx(str, 5)) - call assert_equal(4, utf16idx(str, 6)) - call assert_equal(4, utf16idx(str, 7)) - call assert_equal(4, utf16idx(str, 8)) + call assert_equal(1, utf16idx(str, 1)) + call assert_equal(1, utf16idx(str, 2)) + call assert_equal(1, utf16idx(str, 3)) + call assert_equal(1, utf16idx(str, 4)) + call assert_equal(3, utf16idx(str, 5)) + call assert_equal(3, utf16idx(str, 6)) + call assert_equal(3, utf16idx(str, 7)) + call assert_equal(3, utf16idx(str, 8)) call assert_equal(5, utf16idx(str, 9)) call assert_equal(6, utf16idx(str, 10)) call assert_equal(-1, utf16idx(str, 11)) @@ -1621,8 +1621,8 @@ func Test_utf16idx_from_charidx() " UTF-16 index of a string with four byte characters let str = "a😊😊b" call assert_equal(0, utf16idx(str, 0, v:false, v:true)) - call assert_equal(2, utf16idx(str, 1, v:false, v:true)) - call assert_equal(4, utf16idx(str, 2, v:false, v:true)) + call assert_equal(1, utf16idx(str, 1, v:false, v:true)) + call assert_equal(3, utf16idx(str, 2, v:false, v:true)) call assert_equal(5, utf16idx(str, 3, v:false, v:true)) call assert_equal(6, utf16idx(str, 4, v:false, v:true)) call assert_equal(-1, utf16idx(str, 5, v:false, v:true)) diff --git a/src/version.c b/src/version.c index 1235ac9439..c753acd379 100644 --- a/src/version.c +++ b/src/version.c @@ -695,6 +695,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ +/**/ + 1629, /**/ 1628, /**/ -- cgit v1.2.3