summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBram Moolenaar <Bram@vim.org>2020-12-28 12:56:58 +0100
committerBram Moolenaar <Bram@vim.org>2020-12-28 12:56:58 +0100
commit17793ef23aae0bc94539390ccfe5e63b0ad39ff2 (patch)
tree710a6d39741c88899dd9e06309e1a1cb5ec1385d
parent9618a25b9c054f0ee4e267d2db96b6e7c113ed7a (diff)
patch 8.2.2233: cannot convert a byte index into a character indexv8.2.2233
Problem: Cannot convert a byte index into a character index. Solution: Add charidx(). (Yegappan Lakshmanan, closes #7561)
-rw-r--r--runtime/doc/eval.txt27
-rw-r--r--runtime/doc/usr_41.txt1
-rw-r--r--src/evalfunc.c54
-rw-r--r--src/testdir/test_functions.vim25
-rw-r--r--src/version.c2
5 files changed, 109 insertions, 0 deletions
diff --git a/runtime/doc/eval.txt b/runtime/doc/eval.txt
index 4c6636c614..8b85dad9cf 100644
--- a/runtime/doc/eval.txt
+++ b/runtime/doc/eval.txt
@@ -2475,6 +2475,8 @@ ch_status({handle} [, {options}])
changenr() Number current change number
char2nr({expr} [, {utf8}]) Number ASCII/UTF8 value of first char in {expr}
charclass({string}) Number character class of {string}
+charidx({string}, {idx} [, {countcc}])
+ Number char index of byte {idx} in {string}
chdir({dir}) String change current working directory
cindent({lnum}) Number C indent for line {lnum}
clearmatches([{win}]) none clear all matches
@@ -3588,6 +3590,31 @@ charclass({string}) *charclass()*
other specific Unicode class
The class is used in patterns and word motions.
+ *charidx()*
+charidx({string}, {idx} [, {countcc}])
+ Return the character index of the byte at {idx} in {string}.
+ The index of the first character is zero.
+ If there are no multibyte characters the returned value is
+ equal to {idx}.
+ When {countcc} is omitted or zero, then composing characters
+ are not counted separately, their byte length is added to the
+ preceding base character.
+ When {countcc} is set to 1, then composing characters are
+ counted as separate characters.
+ Returns -1 if the arguments are invalid or if {idx} is greater
+ than the index of the last byte in {string}. An error is
+ given if the first argument is not a string, the second
+ argument is not a number or when the third argument is present
+ and is not zero or one.
+ See |byteidx()| and |byteidxcomp()| for getting the byte index
+ from the character index.
+ Examples: >
+ echo charidx('áb́ć', 3) returns 1
+ echo charidx('áb́ć', 6, 1) returns 4
+ echo charidx('áb́ć', 16) returns -1
+<
+ Can also be used as a |method|: >
+ GetName()->charidx(idx)
chdir({dir}) *chdir()*
Change the current working directory to {dir}. The scope of
diff --git a/runtime/doc/usr_41.txt b/runtime/doc/usr_41.txt
index a19d005631..a035038e47 100644
--- a/runtime/doc/usr_41.txt
+++ b/runtime/doc/usr_41.txt
@@ -625,6 +625,7 @@ String manipulation: *string-functions*
iconv() convert text from one encoding to another
byteidx() byte index of a character in a string
byteidxcomp() like byteidx() but count composing characters
+ charidx() character index of a byte in a string
repeat() repeat a string multiple times
eval() evaluate a string expression
execute() execute an Ex command and get the output
diff --git a/src/evalfunc.c b/src/evalfunc.c
index cf9c2c45ca..9b3b5beb66 100644
--- a/src/evalfunc.c
+++ b/src/evalfunc.c
@@ -47,6 +47,7 @@ static void f_ceil(typval_T *argvars, typval_T *rettv);
#endif
static void f_changenr(typval_T *argvars, typval_T *rettv);
static void f_char2nr(typval_T *argvars, typval_T *rettv);
+static void f_charidx(typval_T *argvars, typval_T *rettv);
static void f_col(typval_T *argvars, typval_T *rettv);
static void f_confirm(typval_T *argvars, typval_T *rettv);
static void f_copy(typval_T *argvars, typval_T *rettv);
@@ -789,6 +790,8 @@ static funcentry_T global_functions[] =
ret_number, f_char2nr},
{"charclass", 1, 1, FEARG_1, NULL,
ret_number, f_charclass},
+ {"charidx", 2, 3, FEARG_1, NULL,
+ ret_number, f_charidx},
{"chdir", 1, 1, FEARG_1, NULL,
ret_string, f_chdir},
{"cindent", 1, 1, FEARG_1, NULL,
@@ -2420,6 +2423,57 @@ f_char2nr(typval_T *argvars, typval_T *rettv)
rettv->vval.v_number = tv_get_string(&argvars[0])[0];
}
+/*
+ * "charidx()" function
+ */
+ static void
+f_charidx(typval_T *argvars, typval_T *rettv)
+{
+ char_u *str;
+ varnumber_T idx;
+ int countcc = FALSE;
+ char_u *p;
+ int len;
+ int (*ptr2len)(char_u *);
+
+ rettv->vval.v_number = -1;
+
+ if (argvars[0].v_type != VAR_STRING || argvars[1].v_type != VAR_NUMBER
+ || (argvars[2].v_type != VAR_UNKNOWN
+ && argvars[2].v_type != VAR_NUMBER))
+ {
+ emsg(_(e_invarg));
+ return;
+ }
+
+ str = tv_get_string_chk(&argvars[0]);
+ idx = tv_get_number_chk(&argvars[1], NULL);
+ if (str == NULL || idx < 0)
+ return;
+
+ if (argvars[2].v_type != VAR_UNKNOWN)
+ countcc = (int)tv_get_bool(&argvars[2]);
+ if (countcc < 0 || countcc > 1)
+ {
+ semsg(_(e_using_number_as_bool_nr), countcc);
+ return;
+ }
+
+ if (enc_utf8 && countcc)
+ ptr2len = utf_ptr2len;
+ else
+ ptr2len = mb_ptr2len;
+
+ for (p = str, len = 0; p <= str + idx; len++)
+ {
+ if (*p == NUL)
+ return;
+ p += ptr2len(p);
+ }
+
+ rettv->vval.v_number = len > 0 ? len - 1 : 0;
+}
+
win_T *
get_optional_window(typval_T *argvars, int idx)
{
diff --git a/src/testdir/test_functions.vim b/src/testdir/test_functions.vim
index dd4429e709..89db161802 100644
--- a/src/testdir/test_functions.vim
+++ b/src/testdir/test_functions.vim
@@ -1132,6 +1132,31 @@ func Test_byteidx()
call assert_fails("call byteidxcomp([], 0)", 'E730:')
endfunc
+" Test for charidx()
+func Test_charidx()
+ let a = 'xáb́y'
+ call assert_equal(0, charidx(a, 0))
+ call assert_equal(1, charidx(a, 3))
+ call assert_equal(2, charidx(a, 4))
+ call assert_equal(3, charidx(a, 7))
+ call assert_equal(-1, charidx(a, 8))
+ call assert_equal(-1, charidx('', 0))
+
+ " count composing characters
+ call assert_equal(0, charidx(a, 0, 1))
+ call assert_equal(2, charidx(a, 2, 1))
+ call assert_equal(3, charidx(a, 4, 1))
+ call assert_equal(5, charidx(a, 7, 1))
+ call assert_equal(-1, charidx(a, 8, 1))
+ call assert_equal(-1, charidx('', 0, 1))
+
+ call assert_fails('let x = charidx([], 1)', 'E474:')
+ call assert_fails('let x = charidx("abc", [])', 'E474:')
+ call assert_fails('let x = charidx("abc", 1, [])', 'E474:')
+ call assert_fails('let x = charidx("abc", 1, -1)', 'E1023:')
+ call assert_fails('let x = charidx("abc", 1, 2)', 'E1023:')
+endfunc
+
func Test_count()
let l = ['a', 'a', 'A', 'b']
call assert_equal(2, count(l, 'a'))
diff --git a/src/version.c b/src/version.c
index 35f1defe31..71bc47d043 100644
--- a/src/version.c
+++ b/src/version.c
@@ -751,6 +751,8 @@ static char *(features[]) =
static int included_patches[] =
{ /* Add new patch number below this line */
/**/
+ 2233,
+/**/
2232,
/**/
2231,