patch 8.2.4538: the find_tags_in_file() function is too longv8.2.4538

Problem: The find_tags_in_file() function is too long. Solution: Refactor into smaller functions. (Yegappan Lakshmanan, closes #9920)
author: Yegappan Lakshmanan <yegappan@yahoo.com> 2022-03-10 18:36:54 +0000
committer: Bram Moolenaar <Bram@vim.org> 2022-03-10 18:36:54 +0000
commit: bf40e90dfeb1d3d0280077e65782beb3fee31c9f (patch)
tree: 0d25c44b993fb8c6909eab71b8e274143255381d
parent: 48f83c31d14702b40ad0bf3c61091871d9db072d (diff)
3 files changed, 746 insertions, 628 deletions
diff --git a/src/tag.c b/src/tag.c
index 65723ed573..aa0f6e5d7e 100644
--- a/src/tag.c
+++ b/src/tag.c
@@ -39,6 +39,68 @@ typedef struct tag_pointers
 } tagptrs_T;
 
 /*
+ * Return values used when reading lines from a tags file.
+ */
+typedef enum
+{
+    TAGS_READ_SUCCESS = 1,
+    TAGS_READ_EOF,
+    TAGS_READ_IGNORE,
+} tags_read_status_T;
+
+/*
+ * States used during a tags search
+ */
+typedef enum
+{
+    TS_START,		// at start of file
+    TS_LINEAR,		// linear searching forward, till EOF
+    TS_BINARY,		// binary searching
+    TS_SKIP_BACK,	// skipping backwards
+    TS_STEP_FORWARD	// stepping forwards
+} tagsearch_state_T;	// Current search state
+
+/*
+ * Binary search file offsets in a tags file
+ */
+typedef struct
+{
+    off_T	low_offset;	// offset for first char of first line that
+				// could match
+    off_T	high_offset;	// offset of char after last line that could
+				// match
+    off_T	curr_offset;	// Current file offset in search range
+    off_T	curr_offset_used; // curr_offset used when skipping back
+    off_T	match_offset;	// Where the binary search found a tag
+    int	low_char;		// first char at low_offset
+    int	high_char;		// first char at high_offset
+} tagsearch_info_T;
+
+/*
+ * Return values used when matching tags against a pattern.
+ */
+typedef enum
+{
+    TAG_MATCH_SUCCESS = 1,
+    TAG_MATCH_FAIL,
+    TAG_MATCH_STOP,
+    TAG_MATCH_NEXT
+} tagmatch_status_T;
+
+/*
+ * Arguments used for matching tags read from a tags file against a pattern.
+ */
+typedef struct
+{
+    int	matchoff;		// tag match offset
+    int	match_re;		// TRUE if the tag matches a regexp
+    int	match_no_ic;		// TRUE if the tag matches with case
+    int	has_re;			// regular expression used
+    int	sortic;			// tags file sorted ignoring case (foldcase)
+    int	sort_error;		// tags file not sorted
+} findtags_match_args_T;
+
+/*
  * The matching tags are first stored in one of the hash tables.  In
  * which one depends on the priority of the match.
  * ht_match[] is used to find duplicates, ga_match[] to keep them in sequence.
@@ -1577,13 +1639,20 @@ find_tagfunc_tags(
 /*
  * State information used during a tag search
  */
-typedef struct {
+typedef struct
+{
+    tagsearch_state_T	state;		// tag search state
     char_u	*tag_fname;		// name of the tag file
+    FILE	*fp;			// current tags file pointer
     pat_T	orgpat;			// holds unconverted pattern info
-    int		name_only;		// get only tag names
-    int		get_searchpat;	// used for 'showfulltag'
+    int		flags;			// flags used for tag search
+    int		tag_file_sorted;	// !_TAG_FILE_SORTED value
+    int		get_searchpat;		// used for 'showfulltag'
     int		help_only;		// only search for help tags
+    vimconv_T	vimconv;
 #ifdef FEAT_MULTI_LANG
+    char_u	help_lang[3];		// lang of current tags file
+    int		help_pri;		// help language priority
     char_u	*help_lang_find;	// lang to be found
     int		is_txt;			// flag of file extension
 #endif
@@ -1594,6 +1663,7 @@ typedef struct {
     char_u     *lbuf;			// line buffer
     int		lbuf_size;		// length of lbuf
 #ifdef FEAT_EMACS_TAGS
+    int		is_etag;		// current file is emaces style
     char_u	*ebuf;			// additional buffer for etag fname
 #endif
     int		match_count;		// number of matches found
@@ -1615,13 +1685,17 @@ findtags_state_init(
     int		mtt;
 
     st->tag_fname = alloc(MAXPATHL + 1);
+    st->fp = NULL;
     st->orgpat.pat = pat;
     st->orgpat.len = (int)STRLEN(pat);
     st->orgpat.regmatch.regprog = NULL;
+    st->flags = flags;
+    st->tag_file_sorted = NUL;
     st->help_only = (flags & TAG_HELP);
-    st->name_only = (flags & TAG_NAMES);
     st->get_searchpat = FALSE;
 #ifdef FEAT_MULTI_LANG
+    st->help_lang[0] = NUL;
+    st->help_pri = 0;
     st->help_lang_find = NULL;
     st->is_txt = FALSE;
 #endif
@@ -1669,66 +1743,63 @@ findtags_state_free(findtags_state_T *st)
 
 #ifdef FEAT_MULTI_LANG
 /*
- * Initialize the state for searching tags in a Vim help file.
+ * Initialize the language and priority used for searching tags in a Vim help
+ * file.
  * Returns TRUE to process the help file and FALSE to skip the file.
  */
     static int
-findtags_in_help_init(
-    findtags_state_T	*st,
-    int			flags,
-    char_u		*help_lang,
-    int			*help_pri)
+findtags_in_help_init(findtags_state_T *st)
 {
     int		i;
     char_u	*s;
 
-    // Keep en if the file extension is .txt
+    // Keep 'en' as the language if the file extension is '.txt'
     if (st->is_txt)
-	STRCPY(help_lang, "en");
+	STRCPY(st->help_lang, "en");
     else
     {
-	// Prefer help tags according to 'helplang'.  Put the
-	// two-letter language name in help_lang[].
+	// Prefer help tags according to 'helplang'.  Put the two-letter
+	// language name in help_lang[].
 	i = (int)STRLEN(st->tag_fname);
 	if (i > 3 && st->tag_fname[i - 3] == '-')
-	    STRCPY(help_lang, st->tag_fname + i - 2);
+	    STRCPY(st->help_lang, st->tag_fname + i - 2);
 	else
-	    STRCPY(help_lang, "en");
+	    STRCPY(st->help_lang, "en");
     }
-    // When searching for a specific language skip tags files
-    // for other languages.
+    // When searching for a specific language skip tags files for other
+    // languages.
     if (st->help_lang_find != NULL
-	    && STRICMP(help_lang, st->help_lang_find) != 0)
+	    && STRICMP(st->help_lang, st->help_lang_find) != 0)
 	return FALSE;
 
-    // For CTRL-] in a help file prefer a match with the same
-    // language.
-    if ((flags & TAG_KEEP_LANG)
+    // For CTRL-] in a help file prefer a match with the same language.
+    if ((st->flags & TAG_KEEP_LANG)
 	    && st->help_lang_find == NULL
 	    && curbuf->b_fname != NULL
 	    && (i = (int)STRLEN(curbuf->b_fname)) > 4
 	    && curbuf->b_fname[i - 1] == 'x'
 	    && curbuf->b_fname[i - 4] == '.'
-	    && STRNICMP(curbuf->b_fname + i - 3, help_lang, 2) == 0)
-	*help_pri = 0;
+	    && STRNICMP(curbuf->b_fname + i - 3, st->help_lang, 2) == 0)
+	st->help_pri = 0;
     else
     {
-	*help_pri = 1;
+	// search for the language in 'helplang'
+	st->help_pri = 1;
 	for (s = p_hlg; *s != NUL; ++s)
 	{
-	    if (STRNICMP(s, help_lang, 2) == 0)
+	    if (STRNICMP(s, st->help_lang, 2) == 0)
 		break;
-	    ++*help_pri;
+	    ++st->help_pri;
 	    if ((s = vim_strchr(s, ',')) == NULL)
 		break;
 	}
 	if (s == NULL || *s == NUL)
 	{
-	    // Language not in 'helplang': use last, prefer English,
-	    // unless found already.
-	    ++*help_pri;
-	    if (STRICMP(help_lang, "en") != 0)
-		++*help_pri;
+	    // Language not in 'helplang': use last, prefer English, unless
+	    // found already.
+	    ++st->help_pri;
+	    if (STRICMP(st->help_lang, "en") != 0)
+		++st->help_pri;
 	}
     }
 
@@ -1738,18 +1809,15 @@ findtags_in_help_init(
 
 #ifdef FEAT_EVAL
 /*
- * Use the 'tagfunc' (if configured and enabled) to get the tags.
+ * Use the function set in 'tagfunc' (if configured and enabled) to get the
+ * tags.
  * Return OK if at least 1 tag has been successfully found, NOTDONE if the
  * 'tagfunc' is not used or the 'tagfunc' returns v:null and FAIL otherwise.
  */
     static int
-findtags_apply_tfu(
-    char_u		*pat,
-    findtags_state_T	*st,
-    int			flags,
-    char_u		*buf_ffname)
+findtags_apply_tfu(findtags_state_T *st, char_u *pat, char_u *buf_ffname)
 {
-    int         use_tfu = ((flags & TAG_NO_TAGFUNC) == 0);
+    int         use_tfu = ((st->flags & TAG_NO_TAGFUNC) == 0);
     int		retval;
 
     if (!use_tfu || tfu_in_use || *curbuf->b_p_tfu == NUL)
@@ -1757,7 +1825,7 @@ findtags_apply_tfu(
 
     tfu_in_use = TRUE;
     retval = find_tagfunc_tags(pat, st->ga_match, &st->match_count,
-							flags, buf_ffname);
+						st->flags, buf_ffname);
     tfu_in_use = FALSE;
 
     return retval;
@@ -1799,14 +1867,14 @@ emacs_tags_incstack_free(void)
  * then returns a pointer to the new tags file. The old file pointer is saved
  * in incstack.
  */
-    static FILE *
-emacs_tags_new_filename(findtags_state_T *st, FILE *fp, int *is_etag)
+    static void
+emacs_tags_new_filename(findtags_state_T *st)
 {
     char_u	*p;
     char_u	*fullpath_ebuf;
 
-    if (vim_fgets(st->ebuf, LSIZE, fp))
-	return fp;
+    if (vim_fgets(st->ebuf, LSIZE, st->fp))
+	return;
 
     for (p = st->ebuf; *p && *p != ','; p++)
 	;
@@ -1817,41 +1885,41 @@ emacs_tags_new_filename(findtags_state_T *st, FILE *fp, int *is_etag)
     // include statement. Skip the included tags file if it exceeds the
     // maximum.
     if (STRNCMP(p + 1, "include", 7) != 0 || incstack_idx >= INCSTACK_SIZE)
-	return fp;
+	return;
 
     // Save current "fp" and "tag_fname" in the stack.
     incstack[incstack_idx].etag_fname = vim_strsave(st->tag_fname);
     if (incstack[incstack_idx].etag_fname == NULL)
-	return fp;
+	return;
 
-    incstack[incstack_idx].fp = fp;
-    fp = NULL;
+    incstack[incstack_idx].fp = st->fp;
+    st->fp = NULL;
 
     // Figure out "tag_fname" and "fp" to use for
     // included file.
     fullpath_ebuf = expand_tag_fname(st->ebuf, st->tag_fname, FALSE);
     if (fullpath_ebuf != NULL)
     {
-	fp = mch_fopen((char *)fullpath_ebuf, "r");
-	if (fp != NULL)
+	st->fp = mch_fopen((char *)fullpath_ebuf, "r");
+	if (st->fp != NULL)
 	{
 	    if (STRLEN(fullpath_ebuf) > LSIZE)
 		semsg(_(e_tag_file_path_truncated_for_str), st->ebuf);
 	    vim_strncpy(st->tag_fname, fullpath_ebuf, MAXPATHL);
 	    ++incstack_idx;
-	    *is_etag = 0; // we can include anything
+	    st->is_etag = FALSE; // we can include anything
 	}
 	vim_free(fullpath_ebuf);
     }
-    if (fp == NULL)
+    if (st->fp == NULL)
     {
 	// Can't open the included file, skip it and
 	// restore old value of "fp".
-	fp = incstack[incstack_idx].fp;
+	st->fp = incstack[incstack_idx].fp;
 	vim_free(incstack[incstack_idx].etag_fname);
     }
 
-    return fp;
+    return;
 }
 
 /*
@@ -1862,15 +1930,15 @@ emacs_tags_new_filename(findtags_state_T *st, FILE *fp, int *is_etag)
  * continue with the parent tags file. Otherwise returns FALSE.
  */
     static int
-emacs_tags_file_eof(findtags_state_T *st, FILE **fp)
+emacs_tags_file_eof(findtags_state_T *st)
 {
     if (!incstack_idx)	// reached end of file. stop processing.
 	return FALSE;
 
     // reached the end of an included tags file. pop it.
     --incstack_idx;
-    fclose(*fp);	// end of this file ...
-    *fp = incstack[incstack_idx].fp;
+    fclose(st->fp);	// end of this file ...
+    st->fp = incstack[incstack_idx].fp;
     STRCPY(st->tag_fname, incstack[incstack_idx].etag_fname);
     vim_free(incstack[incstack_idx].etag_fname);
 
@@ -1943,47 +2011,489 @@ etag_fail:
 #endif
 
 /*
+ * Read the next line from a tags file.
+ * Returns TAGS_READ_SUCCESS if a tags line is successfully read and should be
+ * processed.
+ * Returns TAGS_READ_EOF if the end of file is reached.
+ * Returns TAGS_READ_IGNORE if the current line should be ignored (used when
+ * reached end of a emacs included tags file)
+ */
+    static tags_read_status_T
+findtags_get_next_line(findtags_state_T *st, tagsearch_info_T *sinfo_p)
+{
+    int		eof;
+    off_T	offset;
+
+    // For binary search: compute the next offset to use.
+    if (st->state == TS_BINARY)
+    {
+	offset = sinfo_p->low_offset + ((sinfo_p->high_offset
+						- sinfo_p->low_offset) / 2);
+	if (offset == sinfo_p->curr_offset)
+	    return TAGS_READ_EOF; // End the binary search without a match.
+	else
+	    sinfo_p->curr_offset = offset;
+    }
+
+    // Skipping back (after a match during binary search).
+    else if (st->state == TS_SKIP_BACK)
+    {
+	sinfo_p->curr_offset -= st->lbuf_size * 2;
+	if (sinfo_p->curr_offset < 0)
+	{
+	    sinfo_p->curr_offset = 0;
+	    rewind(st->fp);
+	    st->state = TS_STEP_FORWARD;
+	}
+    }
+
+    // When jumping around in the file, first read a line to find the
+    // start of the next line.
+    if (st->state == TS_BINARY || st->state == TS_SKIP_BACK)
+    {
+	// Adjust the search file offset to the correct position
+	sinfo_p->curr_offset_used = sinfo_p->curr_offset;
+	vim_fseek(st->fp, sinfo_p->curr_offset, SEEK_SET);
+	eof = vim_fgets(st->lbuf, st->lbuf_size, st->fp);
+	if (!eof && sinfo_p->curr_offset != 0)
+	{
+	    sinfo_p->curr_offset = vim_ftell(st->fp);
+	    if (sinfo_p->curr_offset == sinfo_p->high_offset)
+	    {
+		// oops, gone a bit too far; try from low offset
+		vim_fseek(st->fp, sinfo_p->low_offset, SEEK_SET);
+		sinfo_p->curr_offset = sinfo_p->low_offset;
+	    }
+	    eof = vim_fgets(st->lbuf, st->lbuf_size, st->fp);
+	}
+	// skip empty and blank lines
+	while (!eof && vim_isblankline(st->lbuf))
+	{
+	    sinfo_p->curr_offset = vim_ftell(st->fp);
+	    eof = vim_fgets(st->lbuf, st->lbuf_size, st->fp);
+	}
+	if (eof)
+	{
+	    // Hit end of file.  Skip backwards.
+	    st->state = TS_SKIP_BACK;
+	    sinfo_p->match_offset = vim_ftell(st->fp);
+	    sinfo_p->curr_offset = sinfo_p->curr_offset_used;
+	    return TAGS_READ_IGNORE;
+	}
+    }
+    // Not jumping around in the file: Read the next line.
+    else
+    {
+	// skip empty and blank lines
+	do
+	{
+#ifdef FEAT_CSCOPE
+	    if (st->flags & TAG_CSCOPE)
+		eof = cs_fgets(st->lbuf, st->lbuf_size);
+	    else
+#endif
+	    {
+		sinfo_p->curr_offset = vim_ftell(st->fp);
+		eof = vim_fgets(st->lbuf, st->lbuf_size, st->fp);
+	    }
+	} while (!eof && vim_isblankline(st->lbuf));
+
+	if (eof)
+	{
+#ifdef FEAT_EMACS_TAGS
+	    if (emacs_tags_file_eof(st) == TRUE)
+	    {
+		// an included tags file. Continue processing the parent
+		// tags file.
+		st->is_etag = TRUE;	// (only etags can include)
+		return TAGS_READ_IGNORE;
+	    }
+#endif
+	    return TAGS_READ_EOF;
+	}
+    }
+
+    return TAGS_READ_SUCCESS;
+}
+
+/*
  * Parse a tags file header line in 'st->lbuf'.
- * Returns TRUE to read the next header line and FALSE to process the line.
+ * Returns TRUE if the current line in st->lbuf is not a tags header line and
+ * should be parsed as a regular tag line. Returns FALSE if the line is a
+ * header line and the next header line should be read.
  */
     static int
-tags_file_hdr_parse(findtags_state_T *st, vimconv_T *vcp, int *sorted_file)
+findtags_hdr_parse(findtags_state_T *st)
 {
     char_u	*p;
 
+    // Header lines in a tags file start with "!_TAG_"
     if (STRNCMP(st->lbuf, "!_TAG_", 6) != 0)
 	// Non-header item before the header, e.g. "!" itself.
-	return FALSE;
+	return TRUE;
 
-    // Read header line.
+    // Process the header line.
     if (STRNCMP(st->lbuf, "!_TAG_FILE_SORTED\t", 18) == 0)
-	*sorted_file = st->lbuf[18];
+	st->tag_file_sorted = st->lbuf[18];
     if (STRNCMP(st->lbuf, "!_TAG_FILE_ENCODING\t", 20) == 0)
     {
-	// Prepare to convert every line from the specified
-	// encoding to 'encoding'.
+	// Prepare to convert every line from the specified encoding to
+	// 'encoding'.
 	for (p = st->lbuf + 20; *p > ' ' && *p < 127; ++p)
 	    ;
 	*p = NUL;
-	convert_setup(vcp, st->lbuf + 20, p_enc);
+	convert_setup(&st->vimconv, st->lbuf + 20, p_enc);
     }
 
     // Read the next line.  Unrecognized flags are ignored.
+    return FALSE;
+}
+
+/*
+ * Handler to initialize the state when starting to process a new tags file.
+ * Called in the TS_START state when finding tags from a tags file.
+ * Returns TRUE if the line read from the tags file should be parsed and
+ * FALSE if the line should be ignored.
+ */
+    static int
+findtags_start_state_handler(
+    findtags_state_T	*st,
+    int			*sortic,
+    tagsearch_info_T	*sinfo_p)
+{
+#ifdef FEAT_CSCOPE
+    int		use_cscope = (st->flags & TAG_CSCOPE);
+#endif
+    int		noic = (st->flags & TAG_NOIC);
+    off_T	filesize;
+
+    // The header ends when the line sorts below "!_TAG_".  When
+    // case is folded lower case letters sort before "_".
+    if (STRNCMP(st->lbuf, "!_TAG_", 6) <= 0
+	    || (st->lbuf[0] == '!' && ASCII_ISLOWER(st->lbuf[1])))
+	return findtags_hdr_parse(st);
+
+    // Headers ends.
+
+    // When there is no tag head, or ignoring case, need to do a
+    // linear search.
+    // When no "!_TAG_" is found, default to binary search.  If
+    // the tag file isn't sorted, the second loop will find it.
+    // When "!_TAG_FILE_SORTED" found: start binary search if
+    // flag set.
+    // For cscope, it's always linear.
+# ifdef FEAT_CSCOPE
+    if (st->linear || use_cscope)
+# else
+    if (st->linear)
+# endif
+	st->state = TS_LINEAR;
+    else if (st->tag_file_sorted == NUL)
+	st->state = TS_BINARY;
+    else if (st->tag_file_sorted == '1')
+	st->state = TS_BINARY;
+    else if (st->tag_file_sorted == '2')
+    {
+	st->state = TS_BINARY;
+	*sortic = TRUE;
+	st->orgpat.regmatch.rm_ic = (p_ic || !noic);
+    }
+    else
+	st->state = TS_LINEAR;
+
+    if (st->state == TS_BINARY && st->orgpat.regmatch.rm_ic && !*sortic)
+    {
+	// Binary search won't work for ignoring case, use linear
+	// search.
+	st->linear = TRUE;
+	st->state = TS_LINEAR;
+    }
+
+    // When starting a binary search, get the size of the file and
+    // compute the first offset.
+    if (st->state == TS_BINARY)
+    {
+	if (vim_fseek(st->fp, 0L, SEEK_END) != 0)
+	    // can't seek, don't use binary search
+	    st->state = TS_LINEAR;
+	else
+	{
+	    // Get the tag file size (don't use mch_fstat(), it's
+	    // not portable).  Don't use lseek(), it doesn't work
+	    // properly on MacOS Catalina.
+	    filesize = vim_ftell(st->fp);
+	    vim_fseek(st->fp, 0L, SEEK_SET);
+
+	    // Calculate the first read offset in the file.  Start
+	    // the search in the middle of the file.
+	    sinfo_p->low_offset = 0;
+	    sinfo_p->low_char = 0;
+	    sinfo_p->high_offset = filesize;
+	    sinfo_p->curr_offset = 0;
+	    sinfo_p->high_char = 0xff;
+	}
+	return FALSE;
+    }
+
     return TRUE;
 }
 
 /*
+ * Parse a tag line read from a tags file.
+ * Returns OK if a tags line is successfully parsed.
+ * Returns FAIL if an error is encountered.
+ */
+    static int
+findtags_parse_line(findtags_state_T *st, tagptrs_T *tagpp)
+{
+    int		status;
+
+    // Figure out where the different strings are in this line.
+    // For "normal" tags: Do a quick check if the tag matches.
+    // This speeds up tag searching a lot!
+    if (st->orgpat.headlen
+#ifdef FEAT_EMACS_TAGS
+	    && !st->is_etag
+#endif
+       )
+    {
+	CLEAR_FIELD(*tagpp);
+	tagpp->tagname = st->lbuf;
+	tagpp->tagname_end = vim_strchr(st->lbuf, TAB);
+	if (tagpp->tagname_end == NULL)
+	{
+	    // Corrupted tag line.
+	    return FAIL;
+	}
+
+	// Can be a matching tag, isolate the file name and command.
+	tagpp->fname = tagpp->tagname_end + 1;
+	tagpp->fname_end = vim_strchr(tagpp->fname, TAB);
+	if (tagpp->fname_end == NULL)
+	    status = FAIL;
+	else
+	{
+	    tagpp->command = tagpp->fname_end + 1;
+	    status = OK;
+	}
+    }
+    else
+	status = parse_tag_line(st->lbuf,
+#ifdef FEAT_EMACS_TAGS
+		st->is_etag,
+#endif
+		tagpp);
+
+    if (status == FAIL)
+	return FAIL;
+
+#ifdef FEAT_EMACS_TAGS
+    if (st->is_etag)
+	tagpp->fname = st->ebuf;
+#endif
+
+    return OK;
+}
+
+/*
+ * Initialize the structure used for tag matching.
+ */
+    static void
+findtags_matchargs_init(findtags_match_args_T *margs, int flags)
+{
+    margs->matchoff = 0;			// match offset
+    margs->match_re = FALSE;			// match with regexp
+    margs->match_no_ic = FALSE;			// matches with case
+    margs->has_re = (flags & TAG_REGEXP);	// regexp used
+    margs->sortic = FALSE;			// tag file sorted in nocase
+    margs->sort_error = FALSE;			// tags file not sorted
+}
+
+/*
+ * Compares the tag name in 'tagpp->tagname' with a search pattern in
+ * 'st->orgpat.head'.
+ * Returns TAG_MATCH_SUCCESS if the tag matches, TAG_MATCH_FAIL if the tag
+ * doesn't match, TAG_MATCH_NEXT to look for the next matching tag (used in a
+ * binary search) and TAG_MATCH_STOP if all the tags are processed without a
+ * match. Uses the values in 'margs' for doing the comparison.
+ */
+    static tagmatch_status_T
+findtags_match_tag(
+    findtags_state_T	*st,
+    tagptrs_T		*tagpp,
+    findtags_match_args_T *margs,
+    tagsearch_info_T	*sinfo_p)
+{
+    int		match = FALSE;
+    int		cmplen;
+    int		i;
+    int		tagcmp;
+
+    // Skip this line if the length of the tag is different and
+    // there is no regexp, or the tag is too short.
+    if (st->orgpat.headlen
+#ifdef FEAT_EMACS_TAGS
+	    && !st->is_etag
+#endif
+       )
+    {
+	cmplen = (int)(tagpp->tagname_end - tagpp->tagname);
+	if (p_tl != 0 && cmplen > p_tl)	    // adjust for 'taglength'
+	    cmplen = p_tl;
+	if (margs->has_re && st->orgpat.headlen < cmplen)
+	    cmplen = st->orgpat.headlen;
+	else if (st->state == TS_LINEAR && st->orgpat.headlen != cmplen)
+	    return TAG_MATCH_FAIL;
+
+	if (st->state == TS_BINARY)
+	{
+	    // Simplistic check for unsorted tags file.
+	    i = (int)tagpp->tagname[0];
+	    if (margs->sortic)
+		i = (int)TOUPPER_ASC(tagpp->tagname[0]);
+	    if (i < sinfo_p->low_char || i > sinfo_p->high_char)
+		margs->sort_error = TRUE;
+
+	    // Compare the current tag with the searched tag.
+	    if (margs->sortic)
+		tagcmp = tag_strnicmp(tagpp->tagname, st->orgpat.head,
+							(size_t)cmplen);
+	    else
+		tagcmp = STRNCMP(tagpp->tagname, st->orgpat.head, cmplen);
+
+	    // A match with a shorter tag means to search forward.
+	    // A match with a longer tag means to search backward.
+	    if (tagcmp == 0)
+	    {
+		if (cmplen < st->orgpat.headlen)
+		    tagcmp = -1;
+		else if (cmplen > st->orgpat.headlen)
+		    tagcmp = 1;
+	    }
+
+	    if (tagcmp == 0)
+	    {
+		// We've located the tag, now skip back and search
+		// forward until the first matching tag is found.
+		st->state = TS_SKIP_BACK;
+		sinfo_p->match_offset = sinfo_p->curr_offset;
+		return TAG_MATCH_NEXT;
+	    }
+	    if (tagcmp < 0)
+	    {
+		sinfo_p->curr_offset = vim_ftell(st->fp);
+		if (sinfo_p->curr_offset < sinfo_p->high_offset)
+		{
+		    sinfo_p->low_offset = sinfo_p->curr_offset;
+		    if (margs->sortic)
+			sinfo_p->low_char = TOUPPER_ASC(tagpp->tagname[0]);
+		    else
+			sinfo_p->low_char = tagpp->tagname[0];
+		    return TAG_MATCH_NEXT;
+		}
+	    }
+	    if (tagcmp > 0 && sinfo_p->curr_offset != sinfo_p->high_offset)
+	    {
+		sinfo_p->high_offset = sinfo_p->curr_offset;
+		if (margs->sortic)
+		    sinfo_p->high_char = TOUPPER_ASC(tagpp->tagname[0]);
+		else
+		    sinfo_p->high_char = tagpp->tagname[0];
+		return TAG_MATCH_NEXT;
+	    }
+
+	    // No match yet and are at the end of the binary search.
+	    return TAG_MATCH_STOP;
+	}
+	else if (st->state == TS_SKIP_BACK)
+	{
+	    if (MB_STRNICMP(tagpp->tagname, st->orgpat.head, cmplen) != 0)
+		st->state = TS_STEP_FORWARD;
+	    else
+		// Have to skip back more.  Restore the curr_offset
+		// used, otherwise we get stuck at a long line.
+		sinfo_p->curr_offset = sinfo_p->curr_offset_used;
+	    return TAG_MATCH_NEXT;
+	}
+	else if (st->state == TS_STEP_FORWARD)
+	{
+	    if (MB_STRNICMP(tagpp->tagname, st->orgpat.head, cmplen) != 0)
+	    {
+		if ((off_T)vim_ftell(st->fp) > sinfo_p->match_offset)
+		    return TAG_MATCH_STOP;	// past last match
+		else
+		    return TAG_MATCH_NEXT;	// before first match
+	    }
+	}
+	else
+	    // skip this match if it can't match
+	    if (MB_STRNICMP(tagpp->tagname, st->orgpat.head, cmplen) != 0)
+		return TAG_MATCH_FAIL;
+    }
+
+    // First try matching with the pattern literally (also when it is
+    // a regexp).
+    cmplen = (int)(tagpp->tagname_end - tagpp->tagname);
+    if (p_tl != 0 && cmplen > p_tl)	    // adjust for 'taglength'
+	cmplen = p_tl;
+    // if tag length does not match, don't try comparing
+    if (st->orgpat.len != cmplen)
+	match = FALSE;
+    else
+    {
+	if (st->orgpat.regmatch.rm_ic)
+	{
+	    match =
+		(MB_STRNICMP(tagpp->tagname, st->orgpat.pat, cmplen) == 0);
+	    if (match)
+		margs->match_no_ic =
+		    (STRNCMP(tagpp->tagname, st->orgpat.pat, cmplen) == 0);
+	}
+	else
+	    match = (STRNCMP(tagpp->tagname, st->orgpat.pat, cmplen) == 0);
+    }
+
+    // Has a regexp: Also find tags matching regexp.
+    margs->match_re = FALSE;
+    if (!match && st->orgpat.regmatch.regprog != NULL)
+    {
+	int	cc;
+
+	cc = *tagpp->tagname_end;
+	*tagpp->tagname_end = NUL;
+	match = vim_regexec(&st->orgpat.regmatch, tagpp->tagname, (colnr_T)0);
+	if (match)
+	{
+	    margs->matchoff = (int)(st->orgpat.regmatch.startp[0] -
+							tagpp->tagname);
+	    if (st->orgpat.regmatch.rm_ic)
+	    {
+		st->orgpat.regmatch.rm_ic = FALSE;
+		margs->match_no_ic = vim_regexec(&st->orgpat.regmatch,
+			tagpp->tagname, (colnr_T)0);
+		st->orgpat.regmatch.rm_ic = TRUE;
+	    }
+	}
+	*tagpp->tagname_end = cc;
+	margs->match_re = TRUE;
+    }
+
+    return match ? TAG_MATCH_SUCCESS : TAG_MATCH_FAIL;
+}
+
+/*
  * Convert the encoding of a line read from a tags file in 'st->lbuf'.
  * Converting the pattern from 'enc' to the tags file encoding doesn't work,
- * because characters are not recognized.
+ * because characters are not recognized. The converted line is saved in
+ * st->lbuf.
  */
     static void
-findtags_string_convert(findtags_state_T *st, vimconv_T *vcp)
+findtags_string_convert(findtags_state_T *st)
 {
     char_u	*conv_line;
     int		len;
 
-    conv_line = string_convert(vcp, st->lbuf, NULL);
+    conv_line = string_convert(&st->vimconv, st->lbuf, NULL);
     if (conv_line == NULL)
 	return;
 
@@ -2002,23 +2512,23 @@ findtags_string_convert(findtags_state_T *st, vimconv_T *vcp)
     }
 }
 
+/*
+ * Add a matching tag found in a tags file to st->ht_match and st->ga_match.
+ * Returns OK if successfully added the match and FAIL on memory allocation
+ * failure.
+ */
     static int
 findtags_add_match(
     findtags_state_T	*st,
-    tagptrs_T		*tagp,
+    tagptrs_T		*tagpp,
+    findtags_match_args_T   *margs,
     char_u		*buf_ffname,
-    int			flags UNUSED,
-    hash_T		*hash,
-    int			match_re,
-    int			match_no_ic,
-    int			matchoff,
-    int			is_etag UNUSED,
-    char_u		*help_lang UNUSED,
-    int			help_pri UNUSED)
+    hash_T		*hash)
 {
 #ifdef FEAT_CSCOPE
-    int		use_cscope = (flags & TAG_CSCOPE);
+    int		use_cscope = (st->flags & TAG_CSCOPE);
 #endif
+    int		name_only = (st->flags & TAG_NAMES);
     int		mtt;
     int		len = 0;
     int		is_current;		// file name matches
@@ -2039,15 +2549,14 @@ findtags_add_match(
 	// Decide in which array to store this match.
 	is_current = test_for_current(
 #ifdef FEAT_EMACS_TAGS
-		is_etag,
+		st->is_etag,
 #endif
-		tagp->fname, tagp->fname_end, st->tag_fname,
-		buf_ffname);
+		tagpp->fname, tagpp->fname_end, st->tag_fname, buf_ffname);
 #ifdef FEAT_EMACS_TAGS
 	is_static = FALSE;
-	if (!is_etag)	// emacs tags are never static
+	if (!st->is_etag)	// emacs tags are never static
 #endif
-	    is_static = test_for_static(tagp);
+	    is_static = test_for_static(tagpp);
 
 	// decide in which of the sixteen tables to store this
 	// match
@@ -2065,9 +2574,9 @@ findtags_add_match(
 	    else
 		mtt = MT_GL_OTH;
 	}
-	if (st->orgpat.regmatch.rm_ic && !match_no_ic)
+	if (st->orgpat.regmatch.rm_ic && !margs->match_no_ic)
 	    mtt += MT_IC_OFF;
-	if (match_re)
+	if (margs->match_re)
 	    mtt += MT_RE_OFF;
     }
 
@@ -2085,35 +2594,36 @@ findtags_add_match(
 	// sorting it later.  The heuristic is ignored for
 	// detecting duplicates.
 	// The format is {tagname}@{lang}NUL{heuristic}NUL
-	*tagp->tagname_end = NUL;
-	len = (int)(tagp->tagname_end - tagp->tagname);
+	*tagpp->tagname_end = NUL;
+	len = (int)(tagpp->tagname_end - tagpp->tagname);
 	mfp = alloc(sizeof(char_u) + len + 10 + ML_EXTRA + 1);
 	if (mfp != NULL)
 	{
 	    int heuristic;
 
 	    p = mfp;
-	    STRCPY(p, tagp->tagname);
+	    STRCPY(p, tagpp->tagname);
 #ifdef FEAT_MULTI_LANG
 	    p[len] = '@';
-	    STRCPY(p + len + 1, help_lang);
+	    STRCPY(p + len + 1, st->help_lang);
 #endif
 
-	    heuristic = help_heuristic(tagp->tagname,
-		    match_re ? matchoff : 0, !match_no_ic);
+	    heuristic = help_heuristic(tagpp->tagname,
+				margs->match_re ? margs->matchoff : 0,
+				!margs->match_no_ic);
 #ifdef FEAT_MULTI_LANG
-	    heuristic += help_pri;
+	    heuristic += st->help_pri;
 #endif
 	    sprintf((char *)p + len + 1 + ML_EXTRA, "%06d",
 		    heuristic);
 	}
-	*tagp->tagname_end = TAB;
+	*tagpp->tagname_end = TAB;
     }
-    else if (st->name_only)
+    else if (name_only)
     {
 	if (st->get_searchpat)
 	{
-	    char_u *temp_end = tagp->command;
+	    char_u *temp_end = tagpp->command;
 
 	    if (*temp_end == '/')
 		while (*temp_end && *temp_end != '\r'
@@ -2121,12 +2631,12 @@ findtags_add_match(
 			&& *temp_end != '$')
 		    temp_end++;
 
-	    if (tagp->command + 2 < temp_end)
+	    if (tagpp->command + 2 < temp_end)
 	    {
-		len = (int)(temp_end - tagp->command - 2);
+		len = (int)(temp_end - tagpp->command - 2);
 		mfp = alloc(len + 2);
 		if (mfp != NULL)
-		    vim_strncpy(mfp, tagp->command + 2, len);
+		    vim_strncpy(mfp, tagpp->command + 2, len);
 	    }
 	    else
 		mfp = NULL;
@@ -2134,10 +2644,10 @@ findtags_add_match(
 	}
 	else
 	{
-	    len = (int)(tagp->tagname_end - tagp->tagname);
+	    len = (int)(tagpp->tagname_end - tagpp->tagname);
 	    mfp = alloc(sizeof(char_u) + len + 1);
 	    if (mfp != NULL)
-		vim_strncpy(mfp, tagp->tagname, len);
+		vim_strncpy(mfp, tagpp->tagname, len);
 
 	    // if wanted, re-read line to get long form too
 	    if (State & INSERT)
@@ -2161,7 +2671,7 @@ findtags_add_match(
 	// Here <mtt> is the "mtt" value plus 1 to avoid NUL.
 	len = (int)tag_fname_len + (int)STRLEN(st->lbuf) + 3;
 #ifdef FEAT_EMACS_TAGS
-	if (is_etag)
+	if (st->is_etag)
 	{
 	    ebuf_len = STRLEN(st->ebuf);
 	    len += (int)ebuf_len + 1;
@@ -2183,7 +2693,7 @@ findtags_add_match(
 	    p[tag_fname_len + 1] = TAG_SEP;
 	    s = p + 1 + tag_fname_len + 1;
 #ifdef FEAT_EMACS_TAGS
-	    if (is_etag)
+	    if (st->is_etag)
 	    {
 		STRCPY(s, st->ebuf);
 		s[ebuf_len] = TAG_SEP;
@@ -2222,12 +2732,10 @@ findtags_add_match(
 		st->stop_searching = TRUE;
 		return FAIL;
 	    }
-	    else
-	    {
-		((char_u **)(st->ga_match[mtt].ga_data))
-		    [st->ga_match[mtt].ga_len++] = mfp;
-		st->match_count++;
-	    }
+
+	    ((char_u **)(st->ga_match[mtt].ga_data))
+		[st->ga_match[mtt].ga_len++] = mfp;
+	    st->match_count++;
 	}
 	else
 	    // duplicate tag, drop it
@@ -2238,117 +2746,37 @@ findtags_add_match(
 }
 
 /*
- * Search for tags matching 'st->orgpat.pat' in the 'st->tag_fname' tags file.
- * Information needed to search for the tags is in the 'st' state structure.
- * The matching tags are returned in 'st'.
- * Returns OK if successfully processed the file and FAIL on memory allocation
- * failure.
+ * Read and get all the tags from file st->tag_fname.
+ * Returns OK if all the tags are processed successfully and FAIL is a tag
+ * format error is encountered.
  */
     static int
-find_tags_in_file(
-    findtags_state_T	*st,
-    int			flags,
-    char_u		*buf_ffname)
+findtags_get_all_tags(
+    findtags_state_T		*st,
+    findtags_match_args_T	*margs,
+    char_u			*buf_ffname)
 {
-    FILE       *fp = NULL;
-    tagptrs_T	tagp;
-    int		eof = FALSE;		// found end-of-file
-    int		i;
-#ifdef FEAT_MULTI_LANG
-    int		help_pri = 0;
-    char_u	help_lang[3] = "";	// lang of current tags file
-#endif
-    int		tag_file_sorted = NUL;	// !_TAG_FILE_SORTED value
-    off_T	filesize;
-    int		tagcmp;
-    off_T	offset;
-    enum
-    {
-	TS_START,		// at start of file
-	TS_LINEAR,		// linear searching forward, till EOF
-	TS_BINARY,		// binary searching
-	TS_SKIP_BACK,		// skipping backwards
-	TS_STEP_FORWARD		// stepping forwards
-    } state;			// Current search state
-    struct tag_search_info	// Binary search file offsets
-    {
-	off_T	low_offset;	// offset for first char of first line that
-				// could match
-	off_T	high_offset;	// offset of char after last line that could
-				// match
-	off_T	curr_offset;	// Current file offset in search range
-	off_T	curr_offset_used; // curr_offset used when skipping back
-	off_T	match_offset;	// Where the binary search found a tag
-	int	low_char;	// first char at low_offset
-	int	high_char;	// first char at high_offset
-    } search_info;
-
-    int		cmplen;
-    int		match;		// matches
-    int		match_no_ic = 0;// matches with rm_ic == FALSE
-    int		match_re;	// match with regexp
-    int		matchoff = 0;
-
-    int		is_etag;		// current file is emaces style
-
-    hash_T	hash = 0;
-
-    int		sort_error = FALSE;		// tags file not sorted
-    int		sortic = FALSE;			// tag file sorted in nocase
-    int		noic = (flags & TAG_NOIC);
-    int		line_error = FALSE;		// syntax error
-    int		has_re = (flags & TAG_REGEXP);	// regexp used
+    tagptrs_T		tagp;
+    tagsearch_info_T	search_info;
+    int			retval;
 #ifdef FEAT_CSCOPE
-    int		use_cscope = (flags & TAG_CSCOPE);
+    int			use_cscope = (st->flags & TAG_CSCOPE);
 #endif
-    vimconv_T	vimconv;
-
-    vimconv.vc_type = CONV_NONE;
+    hash_T		hash = 0;
author	Yegappan Lakshmanan <yegappan@yahoo.com>	2022-03-10 18:36:54 +0000
committer	Bram Moolenaar <Bram@vim.org>	2022-03-10 18:36:54 +0000
commit	bf40e90dfeb1d3d0280077e65782beb3fee31c9f (patch)
tree	0d25c44b993fb8c6909eab71b8e274143255381d
parent	48f83c31d14702b40ad0bf3c61091871d9db072d (diff)