summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBram Moolenaar <Bram@vim.org>2020-09-22 20:33:50 +0200
committerBram Moolenaar <Bram@vim.org>2020-09-22 20:33:50 +0200
commit4f73b8e9cc83f647b34002554a8bdf9abec0a82f (patch)
treeaae61e658dd6f1074eb7f9b6cceea94f4ca7d1b0
parent44aaf5416e0121500dd52b7cab306d7618b4fe53 (diff)
patch 8.2.1726: fuzzy matching only works on stringsv8.2.1726
Problem: Fuzzy matching only works on strings. Solution: Support passing a dict. Add matchfuzzypos() to also get the match positions. (Yegappan Lakshmanan, closes #6947)
-rw-r--r--runtime/doc/eval.txt55
-rw-r--r--runtime/doc/usr_41.txt1
-rw-r--r--src/evalfunc.c3
-rw-r--r--src/proto/search.pro1
-rw-r--r--src/search.c520
-rw-r--r--src/testdir/Make_all.mak2
-rw-r--r--src/testdir/test_functions.vim24
-rw-r--r--src/testdir/test_matchfuzzy.vim188
-rw-r--r--src/version.c2
9 files changed, 629 insertions, 167 deletions
diff --git a/runtime/doc/eval.txt b/runtime/doc/eval.txt
index 82b81fbb32..f0271c01dc 100644
--- a/runtime/doc/eval.txt
+++ b/runtime/doc/eval.txt
@@ -2641,7 +2641,10 @@ matcharg({nr}) List arguments of |:match|
matchdelete({id} [, {win}]) Number delete match identified by {id}
matchend({expr}, {pat} [, {start} [, {count}]])
Number position where {pat} ends in {expr}
-matchfuzzy({list}, {str}) List fuzzy match {str} in {list}
+matchfuzzy({list}, {str} [, {dict}])
+ List fuzzy match {str} in {list}
+matchfuzzypos({list}, {str} [, {dict}])
+ List fuzzy match {str} in {list}
matchlist({expr}, {pat} [, {start} [, {count}]])
List match and submatches of {pat} in {expr}
matchstr({expr}, {pat} [, {start} [, {count}]])
@@ -7311,12 +7314,25 @@ matchend({expr}, {pat} [, {start} [, {count}]]) *matchend()*
GetText()->matchend('word')
-matchfuzzy({list}, {str}) *matchfuzzy()*
- Returns a list with all the strings in {list} that fuzzy
- match {str}. The strings in the returned list are sorted
- based on the matching score. {str} is treated as a literal
- string and regular expression matching is NOT supported.
- The maximum supported {str} length is 256.
+matchfuzzy({list}, {str} [, {dict}]) *matchfuzzy()*
+ If {list} is a list of strings, then returns a list with all
+ the strings in {list} that fuzzy match {str}. The strings in
+ the returned list are sorted based on the matching score.
+
+ If {list} is a list of dictionaries, then the optional {dict}
+ argument supports the following items:
+ key key of the item which is fuzzy matched against
+ {str}. The value of this item should be a
+ string.
+ text_cb |Funcref| that will be called for every item
+ in {list} to get the text for fuzzy matching.
+ This should accept a dictionary item as the
+ argument and return the text for that item to
+ use for fuzzy matching.
+
+ {str} is treated as a literal string and regular expression
+ matching is NOT supported. The maximum supported {str} length
+ is 256.
If there are no matching strings or there is an error, then an
empty list is returned. If length of {str} is greater than
@@ -7327,11 +7343,36 @@ matchfuzzy({list}, {str}) *matchfuzzy()*
< results in ["clay"]. >
:echo getbufinfo()->map({_, v -> v.name})->matchfuzzy("ndl")
< results in a list of buffer names fuzzy matching "ndl". >
+ :echo getbufinfo()->matchfuzzy("ndl", {'key' : 'name'})
+< results in a list of buffer information dicts with buffer
+ names fuzzy matching "ndl". >
+ :echo getbufinfo()->matchfuzzy("spl",
+ \ {'text_cb' : {v -> v.name}})
+< results in a list of buffer information dicts with buffer
+ names fuzzy matching "spl". >
:echo v:oldfiles->matchfuzzy("test")
< results in a list of file names fuzzy matching "test". >
:let l = readfile("buffer.c")->matchfuzzy("str")
< results in a list of lines in "buffer.c" fuzzy matching "str".
+matchfuzzypos({list}, {str} [, {dict}]) *matchfuzzypos()*
+ Same as |matchfuzzy()|, but returns the list of matched
+ strings and the list of character positions where characters
+ in {str} matches.
+
+ If {str} matches multiple times in a string, then only the
+ positions for the best match is returned.
+
+ If there are no matching strings or there is an error, then a
+ list with two empty list items is returned.
+
+ Example: >
+ :echo matchfuzzypos(['testing'], 'tsg')
+< results in [['testing'], [[0, 2, 6]]] >
+ :echo matchfuzzypos(['clay', 'lacy'], 'la')
+< results in [['lacy', 'clay'], [[0, 1], [1, 2]]] >
+ :echo [{'text': 'hello', 'id' : 10}]->matchfuzzypos('ll', {'key' : 'text'})
+< results in [{'id': 10, 'text': 'hello'}] [[2, 3]]
matchlist({expr}, {pat} [, {start} [, {count}]]) *matchlist()*
Same as |match()|, but return a |List|. The first item in the
diff --git a/runtime/doc/usr_41.txt b/runtime/doc/usr_41.txt
index 1e5915f41b..387232b52e 100644
--- a/runtime/doc/usr_41.txt
+++ b/runtime/doc/usr_41.txt
@@ -604,6 +604,7 @@ String manipulation: *string-functions*
match() position where a pattern matches in a string
matchend() position where a pattern match ends in a string
matchfuzzy() fuzzy matches a string in a list of strings
+ matchfuzzypos() fuzzy matches a string in a list of strings
matchstr() match of a pattern in a string
matchstrpos() match and positions of a pattern in a string
matchlist() like matchstr() and also return submatches
diff --git a/src/evalfunc.c b/src/evalfunc.c
index f96a4c44d1..de723aaef7 100644
--- a/src/evalfunc.c
+++ b/src/evalfunc.c
@@ -753,7 +753,8 @@ static funcentry_T global_functions[] =
{"matcharg", 1, 1, FEARG_1, ret_list_string, f_matcharg},
{"matchdelete", 1, 2, FEARG_1, ret_number, f_matchdelete},
{"matchend", 2, 4, FEARG_1, ret_number, f_matchend},
- {"matchfuzzy", 2, 2, FEARG_1, ret_list_string, f_matchfuzzy},
+ {"matchfuzzy", 2, 3, FEARG_1, ret_list_string, f_matchfuzzy},
+ {"matchfuzzypos", 2, 3, FEARG_1, ret_list_any, f_matchfuzzypos},
{"matchlist", 2, 4, FEARG_1, ret_list_string, f_matchlist},
{"matchstr", 2, 4, FEARG_1, ret_string, f_matchstr},
{"matchstrpos", 2, 4, FEARG_1, ret_list_any, f_matchstrpos},
diff --git a/src/proto/search.pro b/src/proto/search.pro
index 1b62254199..14047908c1 100644
--- a/src/proto/search.pro
+++ b/src/proto/search.pro
@@ -37,4 +37,5 @@ spat_T *get_spat(int idx);
int get_spat_last_idx(void);
void f_searchcount(typval_T *argvars, typval_T *rettv);
void f_matchfuzzy(typval_T *argvars, typval_T *rettv);
+void f_matchfuzzypos(typval_T *argvars, typval_T *rettv);
/* vim: set ft=c : */
diff --git a/src/search.c b/src/search.c
index 9572785b7e..59b46e54d8 100644
--- a/src/search.c
+++ b/src/search.c
@@ -4216,33 +4216,144 @@ the_end:
*/
typedef struct
{
- listitem_T *item;
+ listitem_T *item;
int score;
+ list_T *lmatchpos;
} fuzzyItem_T;
+// bonus for adjacent matches
+#define SEQUENTIAL_BONUS 15
+// bonus if match occurs after a separator
+#define SEPARATOR_BONUS 30
+// bonus if match is uppercase and prev is lower
+#define CAMEL_BONUS 30
+// bonus if the first letter is matched
+#define FIRST_LETTER_BONUS 15
+// penalty applied for every letter in str before the first match
+#define LEADING_LETTER_PENALTY -5
+// maximum penalty for leading letters
+#define MAX_LEADING_LETTER_PENALTY -15
+// penalty for every letter that doesn't match
+#define UNMATCHED_LETTER_PENALTY -1
+// Score for a string that doesn't fuzzy match the pattern
+#define SCORE_NONE -9999
+
+#define FUZZY_MATCH_RECURSION_LIMIT 10
+// Maximum number of characters that can be fuzzy matched
+#define MAXMATCHES 256
+
+typedef int_u matchidx_T;
+
+/*
+ * Compute a score for a fuzzy matched string. The matching character locations
+ * are in 'matches'.
+ */
+ static int
+fuzzy_match_compute_score(
+ char_u *str,
+ int strSz,
+ matchidx_T *matches,
+ int numMatches)
+{
+ int score;
+ int penalty;
+ int unmatched;
+ int i;
+ char_u *p = str;
+ matchidx_T sidx = 0;
+
+ // Initialize score
+ score = 100;
+
+ // Apply leading letter penalty
+ penalty = LEADING_LETTER_PENALTY * matches[0];
+ if (penalty < MAX_LEADING_LETTER_PENALTY)
+ penalty = MAX_LEADING_LETTER_PENALTY;
+ score += penalty;
+
+ // Apply unmatched penalty
+ unmatched = strSz - numMatches;
+ score += UNMATCHED_LETTER_PENALTY * unmatched;
+
+ // Apply ordering bonuses
+ for (i = 0; i < numMatches; ++i)
+ {
+ matchidx_T currIdx = matches[i];
+
+ if (i > 0)
+ {
+ matchidx_T prevIdx = matches[i - 1];
+
+ // Sequential
+ if (currIdx == (prevIdx + 1))
+ score += SEQUENTIAL_BONUS;
+ }
+
+ // Check for bonuses based on neighbor character value
+ if (currIdx > 0)
+ {
+ // Camel case
+ int neighbor;
+ int curr;
+ int neighborSeparator;
+
+ if (has_mbyte)
+ {
+ while (sidx < currIdx)
+ {
+ neighbor = (*mb_ptr2char)(p);
+ (void)mb_ptr2char_adv(&p);
+ sidx++;
+ }
+ curr = (*mb_ptr2char)(p);
+ }
+ else
+ {
+ neighbor = str[currIdx - 1];
+ curr = str[currIdx];
+ }
+
+ if (vim_islower(neighbor) && vim_isupper(curr))
+ score += CAMEL_BONUS;
+
+ // Separator
+ neighborSeparator = neighbor == '_' || neighbor == ' ';
+ if (neighborSeparator)
+ score += SEPARATOR_BONUS;
+ }
+ else
+ {
+ // First letter
+ score += FIRST_LETTER_BONUS;
+ }
+ }
+ return score;
+}
+
static int
fuzzy_match_recursive(
- char_u *fuzpat,
- char_u *str,
- int *outScore,
- char_u *strBegin,
- char_u *srcMatches,
- char_u *matches,
- int maxMatches,
- int nextMatch,
- int *recursionCount,
- int recursionLimit)
+ char_u *fuzpat,
+ char_u *str,
+ matchidx_T strIdx,
+ int *outScore,
+ char_u *strBegin,
+ int strLen,
+ matchidx_T *srcMatches,
+ matchidx_T *matches,
+ int maxMatches,
+ int nextMatch,
+ int *recursionCount)
{
// Recursion params
int recursiveMatch = FALSE;
- char_u bestRecursiveMatches[256];
+ matchidx_T bestRecursiveMatches[MAXMATCHES];
int bestRecursiveScore = 0;
int first_match;
int matched;
// Count recursions
++*recursionCount;
- if (*recursionCount >= recursionLimit)
+ if (*recursionCount >= FUZZY_MATCH_RECURSION_LIMIT)
return FALSE;
// Detect end of strings
@@ -4251,13 +4362,20 @@ fuzzy_match_recursive(
// Loop through fuzpat and str looking for a match
first_match = TRUE;
- while (*fuzpat != '\0' && *str != '\0')
+ while (*fuzpat != NUL && *str != NUL)
{
+ int c1;
+ int c2;
+
+ c1 = PTR2CHAR(fuzpat);
+ c2 = PTR2CHAR(str);
+
// Found match
- if (vim_tolower(*fuzpat) == vim_tolower(*str))
+ if (vim_tolower(c1) == vim_tolower(c2))
{
- char_u recursiveMatches[256];
+ matchidx_T recursiveMatches[MAXMATCHES];
int recursiveScore = 0;
+ char_u *next_char;
// Supplied matches buffer was too short
if (nextMatch >= maxMatches)
@@ -4266,116 +4384,58 @@ fuzzy_match_recursive(
// "Copy-on-Write" srcMatches into matches
if (first_match && srcMatches)
{
- memcpy(matches, srcMatches, nextMatch);
+ memcpy(matches, srcMatches, nextMatch * sizeof(srcMatches[0]));
first_match = FALSE;
}
// Recursive call that "skips" this match
- if (fuzzy_match_recursive(fuzpat, str + 1, &recursiveScore,
- strBegin, matches, recursiveMatches,
- sizeof(recursiveMatches), nextMatch, recursionCount,
- recursionLimit))
+ if (has_mbyte)
+ next_char = str + (*mb_ptr2len)(str);
+ else
+ next_char = str + 1;
+ if (fuzzy_match_recursive(fuzpat, next_char, strIdx + 1,
+ &recursiveScore, strBegin, strLen, matches,
+ recursiveMatches,
+ sizeof(recursiveMatches)/sizeof(recursiveMatches[0]),
+ nextMatch, recursionCount))
{
// Pick best recursive score
if (!recursiveMatch || recursiveScore > bestRecursiveScore)
{
- memcpy(bestRecursiveMatches, recursiveMatches, 256);
+ memcpy(bestRecursiveMatches, recursiveMatches,
+ MAXMATCHES * sizeof(recursiveMatches[0]));
bestRecursiveScore = recursiveScore;
}
recursiveMatch = TRUE;
}
// Advance
- matches[nextMatch++] = (char_u)(str - strBegin);
- ++fuzpat;
+ matches[nextMatch++] = strIdx;
+ if (has_mbyte)
+ (void)mb_ptr2char_adv(&fuzpat);
+ else
+ ++fuzpat;
}
- ++str;
+ if (has_mbyte)
+ (void)mb_ptr2char_adv(&str);
+ else
+ ++str;
+ strIdx++;
}
// Determine if full fuzpat was matched
- matched = *fuzpat == '\0' ? TRUE : FALSE;
+ matched = *fuzpat == NUL ? TRUE : FALSE;
// Calculate score
if (matched)
- {
- // bonus for adjacent matches
- int sequential_bonus = 15;
- // bonus if match occurs after a separator
- int separator_bonus = 30;
- // bonus if match is uppercase and prev is lower
- int camel_bonus = 30;
- // bonus if the first letter is matched
- int first_letter_bonus = 15;
- // penalty applied for every letter in str before the first match
- int leading_letter_penalty = -5;
- // maximum penalty for leading letters
- int max_leading_letter_penalty = -15;
- // penalty for every letter that doesn't matter
- int unmatched_letter_penalty = -1;
- int penalty;
- int unmatched;
- int i;
-
- // Iterate str to end
- while (*str != '\0')
- ++str;
-
- // Initialize score
- *outScore = 100;
-
- // Apply leading letter penalty
- penalty = leading_letter_penalty * matches[0];
- if (penalty < max_leading_letter_penalty)
- penalty = max_leading_letter_penalty;
- *outScore += penalty;
-
- // Apply unmatched penalty
- unmatched = (int)(str - strBegin) - nextMatch;
- *outScore += unmatched_letter_penalty * unmatched;
-
- // Apply ordering bonuses
- for (i = 0; i < nextMatch; ++i)
- {
- char_u currIdx = matches[i];
-
- if (i > 0)
- {
- char_u prevIdx = matches[i - 1];
-
- // Sequential
- if (currIdx == (prevIdx + 1))
- *outScore += sequential_bonus;
- }
-
- // Check for bonuses based on neighbor character value
- if (currIdx > 0)
- {
- // Camel case
- char_u neighbor = strBegin[currIdx - 1];
- char_u curr = strBegin[currIdx];
- int neighborSeparator;
-
- if (islower(neighbor) && isupper(curr))
- *outScore += camel_bonus;
-
- // Separator
- neighborSeparator = neighbor == '_' || neighbor == ' ';
- if (neighborSeparator)
- *outScore += separator_bonus;
- }
- else
- {
- // First letter
- *outScore += first_letter_bonus;
- }
- }
- }
+ *outScore = fuzzy_match_compute_score(strBegin, strLen, matches,
+ nextMatch);
// Return best result
if (recursiveMatch && (!matched || bestRecursiveScore > *outScore))
{
// Recursive score is better than "this"
- memcpy(matches, bestRecursiveMatches, maxMatches);
+ memcpy(matches, bestRecursiveMatches, maxMatches * sizeof(matches[0]));
*outScore = bestRecursiveScore;
return TRUE;
}
@@ -4394,22 +4454,27 @@ fuzzy_match_recursive(
* normalized and varies with pattern.
* Recursion is limited internally (default=10) to prevent degenerate cases
* (fuzpat="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").
- * Uses char_u for match indices. Therefore patterns are limited to 256
+ * Uses char_u for match indices. Therefore patterns are limited to MAXMATCHES
* characters.
*
- * Returns TRUE if fuzpat is found AND calculates a score.
+ * Returns TRUE if 'fuzpat' matches 'str'. Also returns the match score in
+ * 'outScore' and the matching character positions in 'matches'.
*/
static int
-fuzzy_match(char_u *str, char_u *fuzpat, int *outScore)
+fuzzy_match(
+ char_u *str,
+ char_u *fuzpat,
+ int *outScore,
+ matchidx_T *matches,
+ int maxMatches)
{
- char_u matches[256];
int recursionCount = 0;
- int recursionLimit = 10;
+ int len = MB_CHARLEN(str);
*outScore = 0;
- return fuzzy_match_recursive(fuzpat, str, outScore, str, NULL, matches,
- sizeof(matches), 0, &recursionCount, recursionLimit);
+ return fuzzy_match_recursive(fuzpat, str, 0, outScore, str, len, NULL,
+ matches, maxMatches, 0, &recursionCount);
}
/*
@@ -4425,84 +4490,269 @@ fuzzy_item_compare(const void *s1, const void *s2)
}
/*
- * Fuzzy search the string 'str' in 'strlist' and return the matching strings
- * in 'fmatchlist'.
+ * Fuzzy search the string 'str' in a list of 'items' and return the matching
+ * strings in 'fmatchlist'.
+ * If 'items' is a list of strings, then search for 'str' in the list.
+ * If 'items' is a list of dicts, then either use 'key' to lookup the string
+ * for each item or use 'item_cb' Funcref function to get the string.
+ * If 'retmatchpos' is TRUE, then return a list of positions where 'str'
+ * matches for each item.
*/
static void
-match_fuzzy(list_T *strlist, char_u *str, list_T *fmatchlist)
+match_fuzzy(
+ list_T *items,
+ char_u *str,
+ char_u *key,
+ callback_T *item_cb,
+ int retmatchpos,
+ list_T *fmatchlist)
{
long len;
fuzzyItem_T *ptrs;
listitem_T *li;
long i = 0;
int found_match = FALSE;
+ matchidx_T matches[MAXMATCHES];
- len = list_len(strlist);
+ len = list_len(items);
if (len == 0)
return;
- ptrs = ALLOC_MULT(fuzzyItem_T, len);
+ ptrs = ALLOC_CLEAR_MULT(fuzzyItem_T, len);
if (ptrs == NULL)
return;
- // For all the string items in strlist, get the fuzzy matching score
- FOR_ALL_LIST_ITEMS(strlist, li)
+ // For all the string items in items, get the fuzzy matching score
+ FOR_ALL_LIST_ITEMS(items, li)
{
- int score;
+ int score;
+ char_u *itemstr;
+ typval_T rettv;
ptrs[i].item = li;
- ptrs[i].score = -9999;
- // ignore non-string items in the list
- if (li->li_tv.v_type == VAR_STRING && li->li_tv.vval.v_string != NULL)
- if (fuzzy_match(li->li_tv.vval.v_string, str, &score))
+ ptrs[i].score = SCORE_NONE;
+ itemstr = NULL;
+ rettv.v_type = VAR_UNKNOWN;
+ if (li->li_tv.v_type == VAR_STRING) // list of strings
+ itemstr = li->li_tv.vval.v_string;
+ else if (li->li_tv.v_type == VAR_DICT &&
+ (key != NULL || item_cb->cb_name != NULL))
+ {
+ // For a dict, either use the specified key to lookup the string or
+ // use the specified callback function to get the string.
+ if (key != NULL)
+ itemstr = dict_get_string(li->li_tv.vval.v_dict, key, FALSE);
+ else
+ {
+ typval_T argv[2];
+
+ // Invoke the supplied callback (if any) to get the dict item
+ li->li_tv.vval.v_dict->dv_refcount++;
+ argv[0].v_type = VAR_DICT;
+ argv[0].vval.v_dict = li->li_tv.vval.v_dict;
+ argv[1].v_type = VAR_UNKNOWN;
+ if (call_callback(item_cb, -1, &rettv, 1, argv) != FAIL)
+ {
+ if (rettv.v_type == VAR_STRING)
+ itemstr = rettv.vval.v_string;
+ }
+ dict_unref(li->li_tv.vval.v_dict);
+ }
+ }
+
+ if (itemstr != NULL
+ && fuzzy_match(itemstr, str, &score, matches,
+ sizeof(matches) / sizeof(matches[0])))
+ {
+ // Copy the list of matching positions in itemstr to a list, if
+ // 'retmatchpos' is set.
+ if (retmatchpos)
{
- ptrs[i].score = score;
- found_match = TRUE;
+ int j;
+ int strsz;
+
+ ptrs[i].lmatchpos = list_alloc();
+ if (ptrs[i].lmatchpos == NULL)
+ goto done;
+ strsz = MB_CHARLEN(str);
+ for (j = 0; j < strsz; j++)
+ {
+ if (list_append_number(ptrs[i].lmatchpos,
+ matches[j]) == FAIL)
+ goto done;
+ }
}
+ ptrs[i].score = score;
+ found_match = TRUE;
+ }
++i;
+ clear_tv(&rettv);
}
if (found_match)
{
+ list_T *l;
+
// Sort the list by the descending order of the match score
qsort((void *)ptrs, (size_t)len, sizeof(fuzzyItem_T),
fuzzy_item_compare);
- // Copy the matching strings with 'score != -9999' to the return list
+ // For matchfuzzy(), return a list of matched strings.
+ // ['str1', 'str2', 'str3']
+ // For matchfuzzypos(), return a list with two items.
+ // The first item is a list of matched strings. The second item
+ // is a list of lists where each list item is a list of matched
+ // character positions.
+ // [['str1', 'str2', 'str3'], [[1, 3], [1, 3], [1, 3]]]
+ if (retmatchpos)
+ {
+ li = list_find(fmatchlist, 0);
+ if (li == NULL || li->li_tv.vval.v_list == NULL)
+ goto done;
+ l = li->li_tv.vval.v_list;
+ }
+ else
+ l = fmatchlist;
+
+ // Copy the matching strings with a valid score to the return list
for (i = 0; i < len; i++)
{
- if (ptrs[i].score == -9999)
+ if (ptrs[i].score == SCORE_NONE)
break;
- list_append_string(fmatchlist, ptrs[i].item->li_tv.vval.v_string,
- -1);
+ list_append_tv(l, &ptrs[i].item->li_tv);
+ }
+
+ // next copy the list of matching positions
+ if (retmatchpos)
+ {
+ li = list_find(fmatchlist, -1);
+ if (li == NULL || li->li_tv.vval.v_list == NULL)
+ goto done;
+ l = li->li_tv.vval.v_list;
+
+ for (i = 0; i < len; i++)
+ {
+ if (ptrs[i].score == SCORE_NONE)
+ break;
+ if (ptrs[i].lmatchpos != NULL &&
+ list_append_list(l, ptrs[i].lmatchpos) == FAIL)
+ goto done;
+ }
}
}
+done:
vim_free(ptrs);
}
/*
- * "matchfuzzy()" function
+ * Do fuzzy matching. Returns the list of matched strings in 'rettv'.
+ * If 'retmatchpos' is TRUE, also returns the matching character positions.
*/
- void
-f_matchfuzzy(typval_T *argvars, typval_T *rettv)
+ static void
+do_fuzzymatch(typval_T *argvars, typval_T *rettv, int retmatchpos)
{
- if (argvars[0].v_type != VAR_LIST)
+ callback_T cb;
+ char_u *key = NULL;
+ int ret;
+
+ CLEAR_POINTER(&cb);
+
+ // validate and get the arguments
+ if (argvars[0].v_type != VAR_LIST || argvars[0].vval.v_list == NULL)
{
- emsg(_(e_listreq));
+ semsg(_(e_listarg), retmatchpos ? "matchfuzzypos()" : "matchfuzzy()");
return;
}
- if (argvars[0].vval.v_list == NULL)
- return;
if (argvars[1].v_type != VAR_STRING
|| argvars[1].vval.v_string == NULL)
{
semsg(_(e_invarg2), tv_get_string(&argvars[1]));
return;
}
- if (rettv_list_alloc(rettv) == OK)
- match_fuzzy(argvars[0].vval.v_list, tv_get_string(&argvars[1]),
- rettv->vval.v_list);
+
+ if (argvars[2].v_type != VAR_UNKNOWN)
+ {
+ dict_T *d;
+ dictitem_T *di;
+
+ if (argvars[2].v_type != VAR_DICT || argvars[2].vval.v_dict == NULL)
+ {
+ emsg(_(e_dictreq));
+ return;
+ }
+
+ // To search a dict, either a callback function or a key can be
+ // specified.
+ d = argvars[2].vval.v_dict;
+ if ((di = dict_find(d, (char_u *)"key", -1)) != NULL)
+ {
+ if (di->di_tv.v_type != VAR_STRING
+ || di->di_tv.vval.v_string == NULL
+ || *di->di_tv.vval.v_string == NUL)
+ {
+ semsg(_(e_invarg2), tv_get_string(&di->di_tv));
+ return;
+ }
+ key = tv_get_string(&di->di_tv);
+ }
+ else if ((di = dict_find(d, (char_u *)"text_cb", -1)) != NULL)
+ {
+ cb = get_callback(&di->di_tv);
+ if (cb.cb_name == NULL)
+ {
+ semsg(_(e_invargval), "text_cb");
+ return;
+ }
+ }
+ }
+
+ // get the fuzzy matches
+ ret = rettv_list_alloc(rettv);
+ if (ret != OK)
+ goto done;
+ if (retmatchpos)
+ {
+ list_T *l;
+
+ // For matchfuzzypos(), a list with two items are returned. First item
+ // is a list of matching strings and the second item is a list of
+ // lists with matching positions within each string.
+ l = list_alloc();
+ if (l == NULL)
+ goto done;
+ if (list_append_list(rettv->vval.v_list, l) == FAIL)
+ goto done;
+ l = list_alloc();
+ if (l == NULL)
+ goto done;
+ if (list_append_list(rettv->vval.v_list, l) == FAIL)
+ goto done;
+ }
+
+ match_fuzzy(argvars[0].vval.v_list, tv_get_string(&argvars[1]), key,
+ &cb, retmatchpos, rettv->vval.v_list);
+
+done:
+ free_callback(&cb);
+}
+
+/*
+ * "matchfuzzy()" function
+ */
+ void
+f_matchfuzzy(typval_T *argvars, typval_T *rettv)
+{
+ do_fuzzymatch(argvars, rettv, FALSE);
+}
+
+/*
+ * "matchfuzzypos()" function
+ */
+ void
+f_matchfuzzypos(typval_T *argvars, typval_T *rettv)
+{
+ do_fuzzymatch(argvars, rettv, TRUE);
}
#endif
diff --git a/src/testdir/Make_all.mak b/src/testdir/Make_all.mak
index 61ad6a1f38..eecc2e7fd3 100644
--- a/src/testdir/Make_all.mak
+++ b/src/testdir/Make_all.mak
@@ -184,6 +184,7 @@ NEW_TESTS = \
test_match \
test_matchadd_conceal \
test_matchadd_conceal_utf8 \
+ test_matchfuzzy \
test_memory_usage \
test_menu \
test_messages \
@@ -420,6 +421,7 @@ NEW_TESTS_RES = \
test_match.res \
test_matchadd_conceal.res \
test_matchadd_conceal_utf8.res \
+ test_matchfuzzy.res \
test_memory_usage.res \
test_menu.res \
test_messages.res \
diff --git a/src/testdir/test_functions.vim b/src/testdir/test_functions.vim
index e31b63cc43..7ff9b7a819 100644
--- a/src/testdir/test_functions.vim
+++ b/src/testdir/test_functions.vim
@@ -2554,28 +2554,4 @@ func Test_browsedir()
call assert_fails('call browsedir("open", [])', 'E730:')
endfunc
-" Test for matchfuzzy()
-func Test_matchfuzzy()
- call assert_fails('call matchfuzzy(10, "abc")', 'E714:')
- call assert_fails('call matchfuzzy(["abc"], [])', 'E730:')
- call assert_equal([], matchfuzzy([], 'abc'))
- call assert_equal([], matchfuzzy(['abc'], ''))
- call assert_equal(['abc'], matchfuzzy(['abc', 10], 'ac'))
- call assert_equal([], matchfuzzy([10, 20], 'ac'))
- call assert_equal(['abc'], matchfuzzy(['abc'], 'abc'))
- call assert_equal(['crayon', 'camera'], matchfuzzy(['camera', 'crayon'], 'cra'))
- call assert_equal(['aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa', 'aba'], matchfuzzy(['aba', 'aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa'], 'aa'))
- call assert_equal(['one'], matchfuzzy(['one', 'two'], 'one'))
- call assert_equal(['oneTwo', 'onetwo'], matchfuzzy(['onetwo', 'oneTwo'], 'oneTwo'))
- call assert_equal(['one_two', 'onetwo'], matchfuzzy(['onetwo', 'one_two'], 'oneTwo'))
- call assert_equal(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], matchfuzzy(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], 'aa'))
- call assert_equal([], matchfuzzy([repeat('a', 300)], repeat('a', 257)))
-
- %bw!
- eval ['somebuf', 'anotherone', 'needle', 'yetanotherone']->map({_, v -> bufadd(v) + bufload(v)})
- let l = getbufinfo()->map({_, v -> v.name})->matchfuzzy('ndl')
- call assert_equal(1, len(l))
- call assert_match('needle', l[0])
-endfunc
-
" vim: shiftwidth=2 sts=2 expandtab
diff --git a/src/testdir/test_matchfuzzy.vim b/src/testdir/test_matchfuzzy.vim
new file mode 100644
index 0000000000..b7ef483584
--- /dev/null
+++ b/src/testdir/test_matchfuzzy.vim
@@ -0,0 +1,188 @@
+" Tests for fuzzy matching
+
+source shared.vim
+source check.vim
+
+" Test for matchfuzzy()
+func Test_matchfuzzy()
+ call assert_fails('call matchfuzzy(10, "abc")', 'E686:')
+ call assert_fails('call matchfuzzy(["abc"], [])', 'E730:')
+ call assert_fails("let x = matchfuzzy(test_null_list(), 'foo')", 'E686:')
+ call assert_fails('call matchfuzzy(["abc"], test_null_string())', 'E475:')
+ call assert_equal([], matchfuzzy([], 'abc'))
+ call assert_equal([], matchfuzzy(['abc'], ''))
+ call assert_equal(['abc'], matchfuzzy(['abc', 10], 'ac'))
+ call assert_equal([], matchfuzzy([10, 20], 'ac'))
+ call assert_equal(['abc'], matchfuzzy(['abc'], 'abc'))
+ call assert_equal(['crayon', 'camera'], matchfuzzy(['camera', 'crayon'], 'cra'))
+ call assert_equal(['aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa', 'aba'], matchfuzzy(['aba', 'aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa'], 'aa'))
+ call assert_equal(['one'], matchfuzzy(['one', 'two'], 'one'))
+ call assert_equal(['oneTwo', 'onetwo'], matchfuzzy(['onetwo', 'oneTwo'], 'oneTwo'))
+ call assert_equal(['one_two', 'onetwo'], matchfuzzy(['onetwo', 'one_two'], 'oneTwo'))
+ call assert_equal(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], matchfuzzy(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], 'aa'))
+ call assert_equal(256, matchfuzzy([repeat('a', 256)], repeat('a', 256))[0]->len())
+ call assert_equal([], matchfuzzy([repeat('a', 300)], repeat('a', 257)))
+
+ " Tests for match preferences
+ " preference for camel case match
+ call assert_equal(['oneTwo', 'onetwo'], ['onetwo', 'oneTwo']->matchfuzzy('onetwo'))
+ " preference for match after a separator (_ or space)
+ call assert_equal(['one_two', 'one two', 'onetwo'], ['onetwo', 'one_two', 'one two']->matchfuzzy('onetwo'))
+ " preference for leading letter match
+ call assert_equal(['onetwo', 'xonetwo'], ['xonetwo', 'onetwo']->matchfuzzy('onetwo'))
+ " preference for sequential match
+ call assert_equal(['onetwo', 'oanbectdweo'], ['oanbectdweo', 'onetwo']->matchfuzzy('onetwo'))
+ " non-matching leading letter(s) penalty
+ call assert_equal(['xonetwo', 'xxonetwo'], ['xxonetwo', 'xonetwo']->matchfuzzy('onetwo'))
+ " total non-matching letter(s) penalty
+ call assert_equal(['one', 'onex', 'onexx'], ['onexx', 'one', 'onex']->matchfuzzy('one'))
+
+ %bw!
+ eval ['somebuf', 'anotherone', 'needle', 'yetanotherone']->map({_, v -> bufadd(v) + bufload(v)})
+ let l = getbufinfo()->map({_, v -> v.name})->matchfuzzy('ndl')
+ call assert_equal(1, len(l))
+ call assert_match('needle', l[0])
+
+ let l = [{'id' : 5, 'val' : 'crayon'}, {'id' : 6, 'val' : 'camera'}]
+ call assert_equal([{'id' : 6, 'val' : 'camera'}], matchfuzzy(l, 'cam', {'text_cb' : {v -> v.val}}))
+ call assert_equal([{'id' : 6, 'val' : 'camera'}], matchfuzzy(l, 'cam', {'key' : 'val'}))
+ call assert_equal([], matchfuzzy(l, 'day', {'text_cb' : {v -> v.val}}))
+ call assert_equal([], matchfuzzy(l, 'day', {'key' : 'val'}))
+ call assert_fails("let x = matchfuzzy(l, 'cam', 'random')", 'E715:')
+ call assert_equal([], matchfuzzy(l, 'day', {'text_cb' : {v -> []}}))
+ call assert_equal([], matchfuzzy(l, 'day', {'text_cb' : {v -> 1}}))
+ call assert_fails("let x = matchfuzzy(l, 'day', {'text_cb' : {a, b -> 1}})", 'E119:')
+ call assert_equal([], matchfuzzy(l, 'cam'))
+ call assert_fails("let x = matchfuzzy(l, 'cam', {'text_cb' : []})", 'E921:')
+ call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : []})", 'E730:')
+ call assert_fails("let x = matchfuzzy(l, 'cam', test_null_dict())", 'E715:')
+ call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : test_null_string()})", 'E475:')
+ call assert_fails("let x = matchfuzzy(l, 'foo', {'text_cb' : test_null_function()})", 'E475:')
+
+ let l = [{'id' : 5, 'name' : 'foo'}, {'id' : 6, 'name' : []}, {'id' : 7}]
+ call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : 'name'})", 'E730:')
+
+ " Test in latin1 encoding
+ let save_enc = &encoding
+ set encoding=latin1
+ call assert_equal(['abc'], matchfuzzy(['abc'], 'abc'))
+ let &encoding = save_enc
+endfunc
+
+" Test for the fuzzymatchpos() function
+func Test_matchfuzzypos()
+ call assert_equal([['curl', 'world'], [[2,3], [2,3]]], matchfuzzypos(['world', 'curl'], 'rl'))
+ call assert_equal([['curl', 'world'], [[2,3], [2,3]]], matchfuzzypos(['world', 'one', 'curl'], 'rl'))
+ call assert_equal([['hello', 'hello world hello world'],
+ \ [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]],
+ \ matchfuzzypos(['hello world hello world', 'hello', 'world'], 'hello'))
+ call assert_equal([['aaaaaaa'], [[0, 1, 2]]], matchfuzzypos(['aaaaaaa'], 'aaa'))
+ call assert_equal([[], []], matchfuzzypos(['world', 'curl'], 'ab'))
+ let x = matchfuzzypos([repeat('a', 256)], repeat('a', 256))
+ call assert_equal(range(256), x[1][0])
+ call assert_equal([[], []], matchfuzzypos([repeat('a', 300)], repeat('a', 257)))
+ call assert_equal([[], []], matchfuzzypos([], 'abc'))
+
+ " match in a long string
+ call assert_equal([[repeat('x', 300) .. 'abc'], [[300, 301, 302]]],
+ \ matchfuzzypos([repeat('x', 300) .. 'abc'], 'abc'))
+
+ " preference for camel case match
+ call assert_equal([['xabcxxaBc'], [[6, 7, 8]]], matchfuzzypos(['xabcxxaBc'], 'abc'))
+ " preference for match after a separator (_ or space)
+ call assert_equal([['xabx_ab'], [[5, 6]]], matchfuzzypos(['xabx_ab'], 'ab'))
+ " preference for leading letter match
+ call assert_equal([['abcxabc'], [[0, 1]]], matchfuzzypos(['abcxabc'], 'ab'))
+ " preference for sequential match
+ call assert_equal([['aobncedone'], [[7, 8, 9]]], matchfuzzypos(['aobncedone'], 'one'))
+ " best recursive match
+ call assert_equal([['xoone'], [[2, 3, 4]]], matchfuzzypos(['xoone'], 'one'))
+
+ let l = [{'id' : 5, 'val' : 'crayon'}, {'id' : 6, 'val' : 'camera'}]
+ call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]]],
+ \ matchfuzzypos(l, 'cam', {'text_cb' : {v -> v.val}}))
+ call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]]],
+ \ matchfuzzypos(l, 'cam', {'key' : 'val'}))
+ call assert_equal([[], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> v.val}}))
+ call assert_equal([[], []], matchfuzzypos(l, 'day', {'key' : 'val'}))
+ call assert_fails("let x = matchfuzzypos(l, 'cam', 'random')", 'E715:')
+ call assert_equal([[], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> []}}))
+ call assert_equal([[], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> 1}}))
+ call assert_fails("let x = matchfuzzypos(l, 'day', {'text_cb' : {a, b -> 1}})", 'E119:')
+ call assert_equal([[], []], matchfuzzypos(l, 'cam'))
+ call assert_fails("let x = matchfuzzypos(l, 'cam', {'text_cb' : []})", 'E921:')
+ call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : []})", 'E730:')
+ call assert_fails("let x = matchfuzzypos(l, 'cam', test_null_dict())", 'E715:')
+ call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : test_null_string()})", 'E475:')
+ call assert_fails("let x = matchfuzzypos(l, 'foo', {'text_cb' : test_null_function()})", 'E475:')
+
+ let l = [{'id' : 5, 'name' : 'foo'}, {'id' : 6, 'name' : []}, {'id' : 7}]
+ call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : 'name'})", 'E730:')
+endfunc
+
+func Test_matchfuzzy_mbyte()
+ CheckFeature multi_lang
+ call assert_equal(['ンヹㄇヺヴ'], matchfuzzy(['ンヹㄇヺヴ'], 'ヹヺ'))
+ " reverse the order of characters
+ call assert_equal([], matchfuzzy(['ンヹㄇヺヴ'], 'ヺヹ'))
+ call assert_equal(['αβΩxxx', 'xαxβxΩx'],
+ \ matchfuzzy(['αβΩxxx', 'xαxβxΩx'], 'αβΩ'))
+ call assert_equal(['ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ', 'πbπ'],
+ \ matchfuzzy(['πbπ', 'ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ'], 'ππ'))
+
+ " preference for camel case match
+ call assert_equal(['oneĄwo', 'oneąwo'],
+ \ ['oneąwo', 'oneĄwo']->matchfuzzy('oneąwo'))
+ " preference for match after a separator (_ or space)
+ call assert_equal(['â… â…¡a_bã„Ÿã„ ', 'â… â…¡a bã„Ÿã„ ', 'â… â…¡abã„Ÿã„ '],
+ \ ['â…