summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYegappan Lakshmanan <yegappan@yahoo.com>2022-03-13 19:23:48 +0000
committerBram Moolenaar <Bram@vim.org>2022-03-13 19:23:48 +0000
commitb29b96806f1472371fb3cc01d48394e00b95cfc8 (patch)
tree4911fbe938c3ba3b0904b906e04caf61452df25f
parent424da7aa481032f501158a671a0c1d117507afdd (diff)
patch 8.2.4562: linear tag search is not optimalv8.2.4562
Problem: Linear tag search is not optimal. Solution: Improve linear tag search performance. (Yegappan Lakshmanan, closes #9944)
-rw-r--r--src/tag.c281
-rw-r--r--src/version.c2
2 files changed, 142 insertions, 141 deletions
diff --git a/src/tag.c b/src/tag.c
index 4cf11dc226..b7ac9c150a 100644
--- a/src/tag.c
+++ b/src/tag.c
@@ -1642,34 +1642,34 @@ find_tagfunc_tags(
typedef struct
{
tagsearch_state_T state; // tag search state
+ int stop_searching; // stop when match found or error
+ pat_T *orgpat; // holds unconverted pattern info
+ char_u *lbuf; // line buffer
+ int lbuf_size; // length of lbuf
char_u *tag_fname; // name of the tag file
FILE *fp; // current tags file pointer
- pat_T orgpat; // holds unconverted pattern info
int flags; // flags used for tag search
int tag_file_sorted; // !_TAG_FILE_SORTED value
int get_searchpat; // used for 'showfulltag'
int help_only; // only search for help tags
- vimconv_T vimconv;
-#ifdef FEAT_MULTI_LANG
- char_u help_lang[3]; // lang of current tags file
- int help_pri; // help language priority
- char_u *help_lang_find; // lang to be found
- int is_txt; // flag of file extension
-#endif
int did_open; // did open a tag file
int mincount; // MAXCOL: find all matches
// other: minimal number of matches
int linear; // do a linear search
- char_u *lbuf; // line buffer
- int lbuf_size; // length of lbuf
+ vimconv_T vimconv;
#ifdef FEAT_EMACS_TAGS
int is_etag; // current file is emacs style
char_u *ebuf; // additional buffer for etag fname
#endif
+#ifdef FEAT_MULTI_LANG
+ char_u help_lang[3]; // lang of current tags file
+ int help_pri; // help language priority
+ char_u *help_lang_find; // lang to be found
+ int is_txt; // flag of file extension
+#endif
int match_count; // number of matches found
garray_T ga_match[MT_COUNT]; // stores matches in sequence
hashtab_T ht_match[MT_COUNT]; // stores matches by key
- int stop_searching; // stop when match found or error
} findtags_state_T;
/*
@@ -1687,9 +1687,10 @@ findtags_state_init(
st->tag_fname = alloc(MAXPATHL + 1);
st->fp = NULL;
- st->orgpat.pat = pat;
- st->orgpat.len = (int)STRLEN(pat);
- st->orgpat.regmatch.regprog = NULL;
+ st->orgpat = ALLOC_ONE(pat_T);
+ st->orgpat->pat = pat;
+ st->orgpat->len = (int)STRLEN(pat);
+ st->orgpat->regmatch.regprog = NULL;
st->flags = flags;
st->tag_file_sorted = NUL;
st->help_only = (flags & TAG_HELP);
@@ -1736,7 +1737,8 @@ findtags_state_free(findtags_state_T *st)
{
vim_free(st->tag_fname);
vim_free(st->lbuf);
- vim_regfree(st->orgpat.regmatch.regprog);
+ vim_regfree(st->orgpat->regmatch.regprog);
+ vim_free(st->orgpat);
#ifdef FEAT_EMACS_TAGS
vim_free(st->ebuf);
#endif
@@ -1940,6 +1942,7 @@ emacs_tags_file_eof(findtags_state_T *st)
st->fp = incstack[incstack_idx].fp;
STRCPY(st->tag_fname, incstack[incstack_idx].etag_fname);
vim_free(incstack[incstack_idx].etag_fname);
+ st->is_etag = TRUE; // (only etags can include)
return TRUE;
}
@@ -2099,12 +2102,9 @@ findtags_get_next_line(findtags_state_T *st, tagsearch_info_T *sinfo_p)
{
#ifdef FEAT_EMACS_TAGS
if (emacs_tags_file_eof(st) == TRUE)
- {
// an included tags file. Continue processing the parent
// tags file.
- st->is_etag = TRUE; // (only etags can include)
return TAGS_READ_IGNORE;
- }
#endif
return TAGS_READ_EOF;
}
@@ -2193,12 +2193,12 @@ findtags_start_state_handler(
{
st->state = TS_BINARY;
*sortic = TRUE;
- st->orgpat.regmatch.rm_ic = (p_ic || !noic);
+ st->orgpat->regmatch.rm_ic = (p_ic || !noic);
}
else
st->state = TS_LINEAR;
- if (st->state == TS_BINARY && st->orgpat.regmatch.rm_ic && !*sortic)
+ if (st->state == TS_BINARY && st->orgpat->regmatch.rm_ic && !*sortic)
{
// Binary search won't work for ignoring case, use linear
// search.
@@ -2241,14 +2241,21 @@ findtags_start_state_handler(
* Returns FAIL if a format error is encountered.
*/
static int
-findtags_parse_line(findtags_state_T *st, tagptrs_T *tagpp)
+findtags_parse_line(
+ findtags_state_T *st,
+ tagptrs_T *tagpp,
+ findtags_match_args_T *margs,
+ tagsearch_info_T *sinfo_p)
{
int status;
+ int i;
+ int cmplen;
+ int tagcmp;
// Figure out where the different strings are in this line.
// For "normal" tags: Do a quick check if the tag matches.
// This speeds up tag searching a lot!
- if (st->orgpat.headlen
+ if (st->orgpat->headlen
#ifdef FEAT_EMACS_TAGS
&& !st->is_etag
#endif
@@ -2258,89 +2265,18 @@ findtags_parse_line(findtags_state_T *st, tagptrs_T *tagpp)
tagpp->tagname = st->lbuf;
tagpp->tagname_end = vim_strchr(st->lbuf, TAB);
if (tagpp->tagname_end == NULL)
- {
// Corrupted tag line.
- return FAIL;
- }
-
- // Can be a matching tag, isolate the file name and command.
- tagpp->fname = tagpp->tagname_end + 1;
- tagpp->fname_end = vim_strchr(tagpp->fname, TAB);
- if (tagpp->fname_end == NULL)
- status = FAIL;
- else
- {
- tagpp->command = tagpp->fname_end + 1;
- status = OK;
- }
- }
- else
- status = parse_tag_line(st->lbuf,
-#ifdef FEAT_EMACS_TAGS
- st->is_etag,
-#endif
- tagpp);
-
- if (status == FAIL)
- return FAIL;
-
-#ifdef FEAT_EMACS_TAGS
- if (st->is_etag)
- tagpp->fname = st->ebuf;
-#endif
-
- return OK;
-}
-
-/*
- * Initialize the structure used for tag matching.
- */
- static void
-findtags_matchargs_init(findtags_match_args_T *margs, int flags)
-{
- margs->matchoff = 0; // match offset
- margs->match_re = FALSE; // match with regexp
- margs->match_no_ic = FALSE; // matches with case
- margs->has_re = (flags & TAG_REGEXP); // regexp used
- margs->sortic = FALSE; // tag file sorted in nocase
- margs->sort_error = FALSE; // tags file not sorted
-}
-
-/*
- * Compares the tag name in 'tagpp->tagname' with a search pattern in
- * 'st->orgpat.head'.
- * Returns TAG_MATCH_SUCCESS if the tag matches, TAG_MATCH_FAIL if the tag
- * doesn't match, TAG_MATCH_NEXT to look for the next matching tag (used in a
- * binary search) and TAG_MATCH_STOP if all the tags are processed without a
- * match. Uses the values in 'margs' for doing the comparison.
- */
- static tagmatch_status_T
-findtags_match_tag(
- findtags_state_T *st,
- tagptrs_T *tagpp,
- findtags_match_args_T *margs,
- tagsearch_info_T *sinfo_p)
-{
- int match = FALSE;
- int cmplen;
- int i;
- int tagcmp;
+ return TAG_MATCH_FAIL;
- // Skip this line if the length of the tag is different and
- // there is no regexp, or the tag is too short.
- if (st->orgpat.headlen
-#ifdef FEAT_EMACS_TAGS
- && !st->is_etag
-#endif
- )
- {
+ // Skip this line if the length of the tag is different and
+ // there is no regexp, or the tag is too short.
cmplen = (int)(tagpp->tagname_end - tagpp->tagname);
if (p_tl != 0 && cmplen > p_tl) // adjust for 'taglength'
cmplen = p_tl;
- if (margs->has_re && st->orgpat.headlen < cmplen)
- cmplen = st->orgpat.headlen;
- else if (st->state == TS_LINEAR && st->orgpat.headlen != cmplen)
- return TAG_MATCH_FAIL;
+ if ((st->flags & TAG_REGEXP) && st->orgpat->headlen < cmplen)
+ cmplen = st->orgpat->headlen;
+ else if (st->state == TS_LINEAR && st->orgpat->headlen != cmplen)
+ return TAG_MATCH_NEXT;
if (st->state == TS_BINARY)
{
@@ -2353,18 +2289,18 @@ findtags_match_tag(
// Compare the current tag with the searched tag.
if (margs->sortic)
- tagcmp = tag_strnicmp(tagpp->tagname, st->orgpat.head,
+ tagcmp = tag_strnicmp(tagpp->tagname, st->orgpat->head,
(size_t)cmplen);
else
- tagcmp = STRNCMP(tagpp->tagname, st->orgpat.head, cmplen);
+ tagcmp = STRNCMP(tagpp->tagname, st->orgpat->head, cmplen);
// A match with a shorter tag means to search forward.
// A match with a longer tag means to search backward.
if (tagcmp == 0)
{
- if (cmplen < st->orgpat.headlen)
+ if (cmplen < st->orgpat->headlen)
tagcmp = -1;
- else if (cmplen > st->orgpat.headlen)
+ else if (cmplen > st->orgpat->headlen)
tagcmp = 1;
}
@@ -2404,7 +2340,7 @@ findtags_match_tag(
}
else if (st->state == TS_SKIP_BACK)
{
- if (MB_STRNICMP(tagpp->tagname, st->orgpat.head, cmplen) != 0)
+ if (MB_STRNICMP(tagpp->tagname, st->orgpat->head, cmplen) != 0)
st->state = TS_STEP_FORWARD;
else
// Have to skip back more. Restore the curr_offset
@@ -2414,7 +2350,7 @@ findtags_match_tag(
}
else if (st->state == TS_STEP_FORWARD)
{
- if (MB_STRNICMP(tagpp->tagname, st->orgpat.head, cmplen) != 0)
+ if (MB_STRNICMP(tagpp->tagname, st->orgpat->head, cmplen) != 0)
{
if ((off_T)vim_ftell(st->fp) > sinfo_p->match_offset)
return TAG_MATCH_STOP; // past last match
@@ -2424,9 +2360,68 @@ findtags_match_tag(
}
else
// skip this match if it can't match
- if (MB_STRNICMP(tagpp->tagname, st->orgpat.head, cmplen) != 0)
- return TAG_MATCH_FAIL;
+ if (MB_STRNICMP(tagpp->tagname, st->orgpat->head, cmplen) != 0)
+ return TAG_MATCH_NEXT;
+
+ // Can be a matching tag, isolate the file name and command.
+ tagpp->fname = tagpp->tagname_end + 1;
+ tagpp->fname_end = vim_strchr(tagpp->fname, TAB);
+ if (tagpp->fname_end == NULL)
+ status = FAIL;
+ else
+ {
+ tagpp->command = tagpp->fname_end + 1;
+ status = OK;
+ }
}
+ else
+ status = parse_tag_line(st->lbuf,
+#ifdef FEAT_EMACS_TAGS
+ st->is_etag,
+#endif
+ tagpp);
+
+ if (status == FAIL)
+ return TAG_MATCH_FAIL;
+
+#ifdef FEAT_EMACS_TAGS
+ if (st->is_etag)
+ tagpp->fname = st->ebuf;
+#endif
+
+ return TAG_MATCH_SUCCESS;
+}
+
+/*
+ * Initialize the structure used for tag matching.
+ */
+ static void
+findtags_matchargs_init(findtags_match_args_T *margs, int flags)
+{
+ margs->matchoff = 0; // match offset
+ margs->match_re = FALSE; // match with regexp
+ margs->match_no_ic = FALSE; // matches with case
+ margs->has_re = (flags & TAG_REGEXP); // regexp used
+ margs->sortic = FALSE; // tag file sorted in nocase
+ margs->sort_error = FALSE; // tags file not sorted
+}
+
+/*
+ * Compares the tag name in 'tagpp->tagname' with a search pattern in
+ * 'st->orgpat.head'.
+ * Returns TAG_MATCH_SUCCESS if the tag matches, TAG_MATCH_FAIL if the tag
+ * doesn't match, TAG_MATCH_NEXT to look for the next matching tag (used in a
+ * binary search) and TAG_MATCH_STOP if all the tags are processed without a
+ * match. Uses the values in 'margs' for doing the comparison.
+ */
+ static tagmatch_status_T
+findtags_match_tag(
+ findtags_state_T *st,
+ tagptrs_T *tagpp,
+ findtags_match_args_T *margs)
+{
+ int match = FALSE;
+ int cmplen;
// First try matching with the pattern literally (also when it is
// a regexp).
@@ -2434,41 +2429,41 @@ findtags_match_tag(
if (p_tl != 0 && cmplen > p_tl) // adjust for 'taglength'
cmplen = p_tl;
// if tag length does not match, don't try comparing
- if (st->orgpat.len != cmplen)
+ if (st->orgpat->len != cmplen)
match = FALSE;
else
{
- if (st->orgpat.regmatch.rm_ic)
+ if (st->orgpat->regmatch.rm_ic)
{
match =
- (MB_STRNICMP(tagpp->tagname, st->orgpat.pat, cmplen) == 0);
+ (MB_STRNICMP(tagpp->tagname, st->orgpat->pat, cmplen) == 0);
if (match)
margs->match_no_ic =
- (STRNCMP(tagpp->tagname, st->orgpat.pat, cmplen) == 0);
+ (STRNCMP(tagpp->tagname, st->orgpat->pat, cmplen) == 0);
}
else
- match = (STRNCMP(tagpp->tagname, st->orgpat.pat, cmplen) == 0);
+ match = (STRNCMP(tagpp->tagname, st->orgpat->pat, cmplen) == 0);
}
// Has a regexp: Also find tags matching regexp.
margs->match_re = FALSE;
- if (!match && st->orgpat.regmatch.regprog != NULL)
+ if (!match && st->orgpat->regmatch.regprog != NULL)
{
int cc;
cc = *tagpp->tagname_end;
*tagpp->tagname_end = NUL;
- match = vim_regexec(&st->orgpat.regmatch, tagpp->tagname, (colnr_T)0);
+ match = vim_regexec(&st->orgpat->regmatch, tagpp->tagname, (colnr_T)0);
if (match)
{
- margs->matchoff = (int)(st->orgpat.regmatch.startp[0] -
+ margs->matchoff = (int)(st->orgpat->regmatch.startp[0] -
tagpp->tagname);
- if (st->orgpat.regmatch.rm_ic)
+ if (st->orgpat->regmatch.rm_ic)
{
- st->orgpat.regmatch.rm_ic = FALSE;
- margs->match_no_ic = vim_regexec(&st->orgpat.regmatch,
+ st->orgpat->regmatch.rm_ic = FALSE;
+ margs->match_no_ic = vim_regexec(&st->orgpat->regmatch,
tagpp->tagname, (colnr_T)0);
- st->orgpat.regmatch.rm_ic = TRUE;
+ st->orgpat->regmatch.rm_ic = TRUE;
}
}
*tagpp->tagname_end = cc;
@@ -2571,7 +2566,7 @@ findtags_add_match(
else
mtt = MT_GL_OTH;
}
- if (st->orgpat.regmatch.rm_ic && !margs->match_no_ic)
+ if (st->orgpat->regmatch.rm_ic && !margs->match_no_ic)
mtt += MT_IC_OFF;
if (margs->match_re)
mtt += MT_RE_OFF;
@@ -2857,7 +2852,12 @@ line_read_in:
continue;
}
- if (findtags_parse_line(st, &tagp) == FAIL)
+ retval = findtags_parse_line(st, &tagp, margs, &search_info);
+ if (retval == TAG_MATCH_NEXT)
+ continue;
+ if (retval == TAG_MATCH_STOP)
+ break;
+ if (retval == TAG_MATCH_FAIL)
{
semsg(_(e_format_error_in_tags_file_str), st->tag_fname);
#ifdef FEAT_CSCOPE
@@ -2868,7 +2868,7 @@ line_read_in:
return;
}
- retval = findtags_match_tag(st, &tagp, margs, &search_info);
+ retval = findtags_match_tag(st, &tagp, margs);
if (retval == TAG_MATCH_NEXT)
continue;
if (retval == TAG_MATCH_STOP)
@@ -2895,7 +2895,7 @@ findtags_in_file(findtags_state_T *st, char_u *buf_ffname)
{
findtags_match_args_T margs;
#ifdef FEAT_CSCOPE
- int use_cscope = FALSE;
+ int use_cscope = (st->flags & TAG_CSCOPE);
#endif
st->vimconv.vc_type = CONV_NONE;
@@ -2906,7 +2906,6 @@ findtags_in_file(findtags_state_T *st, char_u *buf_ffname)
// A file that doesn't exist is silently ignored. Only when not a
// single file is found, an error message is given (further on).
#ifdef FEAT_CSCOPE
- use_cscope = (st->flags & TAG_CSCOPE);
if (use_cscope)
st->fp = NULL; // avoid GCC warning
else
@@ -3111,28 +3110,28 @@ find_tags(
{
// When "@ab" is specified use only the "ab" language, otherwise
// search all languages.
- if (st.orgpat.len > 3 && pat[st.orgpat.len - 3] == '@'
- && ASCII_ISALPHA(pat[st.orgpat.len - 2])
- && ASCII_ISALPHA(pat[st.orgpat.len - 1]))
+ if (st.orgpat->len > 3 && pat[st.orgpat->len - 3] == '@'
+ && ASCII_ISALPHA(pat[st.orgpat->len - 2])
+ && ASCII_ISALPHA(pat[st.orgpat->len - 1]))
{
- saved_pat = vim_strnsave(pat, st.orgpat.len - 3);
+ saved_pat = vim_strnsave(pat, st.orgpat->len - 3);
if (saved_pat != NULL)
{
- st.help_lang_find = &pat[st.orgpat.len - 2];
- st.orgpat.pat = saved_pat;
- st.orgpat.len -= 3;
+ st.help_lang_find = &pat[st.orgpat->len - 2];
+ st.orgpat->pat = saved_pat;
+ st.orgpat->len -= 3;
}
}
}
#endif
- if (p_tl != 0 && st.orgpat.len > p_tl) // adjust for 'taglength'
- st.orgpat.len = p_tl;
+ if (p_tl != 0 && st.orgpat->len > p_tl) // adjust for 'taglength'
+ st.orgpat->len = p_tl;
save_emsg_off = emsg_off;
emsg_off = TRUE; // don't want error for invalid RE here
- prepare_pats(&st.orgpat, has_re);
+ prepare_pats(st.orgpat, has_re);
emsg_off = save_emsg_off;
- if (has_re && st.orgpat.regmatch.regprog == NULL)
+ if (has_re && st.orgpat->regmatch.regprog == NULL)
goto findtag_end;
#ifdef FEAT_EVAL
@@ -3164,11 +3163,11 @@ find_tags(
* When the tag file is case-fold sorted, it is either one or the other.
* Only ignore case when TAG_NOIC not used or 'ignorecase' set.
*/
- st.orgpat.regmatch.rm_ic = ((p_ic || !noic)
- && (findall || st.orgpat.headlen == 0 || !p_tbs));
+ st.orgpat->regmatch.rm_ic = ((p_ic || !noic)
+ && (findall || st.orgpat->headlen == 0 || !p_tbs));
for (round = 1; round <= 2; ++round)
{
- st.linear = (st.orgpat.headlen == 0 || !p_tbs || round == 2);
+ st.linear = (st.orgpat->headlen == 0 || !p_tbs || round == 2);
/*
* Try tag file names from tags option one by one.
@@ -3200,7 +3199,7 @@ find_tags(
// stop searching when already did a linear search, or when TAG_NOIC
// used, and 'ignorecase' not set or already did case-ignore search
if (st.stop_searching || st.linear || (!p_ic && noic) ||
- st.orgpat.regmatch.rm_ic)
+ st.orgpat->regmatch.rm_ic)
break;
# ifdef FEAT_CSCOPE
if (use_cscope)
@@ -3208,7 +3207,7 @@ find_tags(
# endif
// try another time while ignoring case
- st.orgpat.regmatch.rm_ic = TRUE;
+ st.orgpat->regmatch.rm_ic = TRUE;
}
if (!st.stop_searching)
diff --git a/src/version.c b/src/version.c
index 2bc2cc48e3..7ac37a41f4 100644
--- a/src/version.c
+++ b/src/version.c
@@ -751,6 +751,8 @@ static char *(features[]) =
static int included_patches[] =
{ /* Add new patch number below this line */
/**/
+ 4562,
+/**/
4561,
/**/
4560,