updated for version 7.0063

author: Bram Moolenaar <Bram@vim.org> 2005-03-22 22:54:12 +0000
committer: Bram Moolenaar <Bram@vim.org> 2005-03-22 22:54:12 +0000
commit: fc73515f7ba66b47705265bb8d01c6bec5df09c4 (patch)
tree: f98ccbf9a30c460d0c8f59e4de38d54e843cb6d8 /src
parent: dbc08a34592881124ddb1e397bf7b124c55789cc (diff)
5 files changed, 648 insertions, 261 deletions
diff --git a/src/Make_bc5.mak b/src/Make_bc5.mak
index 25145d94dc..195d79f9c1 100644
--- a/src/Make_bc5.mak
+++ b/src/Make_bc5.mak
@@ -564,6 +564,7 @@ vimobj =  \
 	$(OBJDIR)\regexp.obj \
 	$(OBJDIR)\screen.obj \
 	$(OBJDIR)\search.obj \
+	$(OBJDIR)\spell.obj \
 	$(OBJDIR)\syntax.obj \
 	$(OBJDIR)\tag.obj \
 	$(OBJDIR)\term.obj \
diff --git a/src/Make_cyg.mak b/src/Make_cyg.mak
index 24a52ce8aa..51e36189a8 100644
--- a/src/Make_cyg.mak
+++ b/src/Make_cyg.mak
@@ -1,6 +1,6 @@
 #
 # Makefile for VIM on Win32, using Cygnus gcc
-# Last updated by Dan Sharp.  Last Change: 2005 Jan 29
+# Last updated by Dan Sharp.  Last Change: 2005 Mar 21
 #
 # Also read INSTALLpc.txt!
 #
@@ -424,6 +424,7 @@ OBJ = \
 	$(OUTDIR)/regexp.o \
 	$(OUTDIR)/screen.o \
 	$(OUTDIR)/search.o \
+	$(OUTDIR)/spell.o \
 	$(OUTDIR)/syntax.o \
 	$(OUTDIR)/tag.o \
 	$(OUTDIR)/term.o \
diff --git a/src/normal.c b/src/normal.c
index 458b8155ea..8780b954d6 100644
--- a/src/normal.c
+++ b/src/normal.c
@@ -3874,6 +3874,7 @@ check_scrollbind(topline_diff, leftcol_diff)
 nv_ignore(cap)
     cmdarg_T	*cap;
 {
+    cap->retval |= CA_COMMAND_BUSY;	/* don't call edit() now */
 }
 
 /*
@@ -8675,6 +8676,7 @@ nv_cursorhold(cap)
 {
     apply_autocmds(EVENT_CURSORHOLD, NULL, NULL, FALSE, curbuf);
     did_cursorhold = TRUE;
+    cap->retval |= CA_COMMAND_BUSY;	/* don't call edit() now */
 }
 #endif
 
diff --git a/src/proto/charset.pro b/src/proto/charset.pro
index afc8562cec..a3d4c145d0 100644
--- a/src/proto/charset.pro
+++ b/src/proto/charset.pro
@@ -20,6 +20,8 @@ int vim_isIDc __ARGS((int c));
 int vim_iswordc __ARGS((int c));
 int vim_iswordp __ARGS((char_u *p));
 int vim_iswordc_buf __ARGS((char_u *p, buf_T *buf));
+void init_spell_chartab __ARGS((void));
+int spell_iswordc __ARGS((char_u *p));
 int vim_isfilec __ARGS((int c));
 int vim_isprintc __ARGS((int c));
 int vim_isprintc_strict __ARGS((int c));
diff --git a/src/spell.c b/src/spell.c
index a167ad1f12..c92eb65bf1 100644
--- a/src/spell.c
+++ b/src/spell.c
@@ -9,6 +9,11 @@
 
 /*
  * spell.c: code for spell checking
+ *
+ * Terminology:
+ * "dword" is a dictionary word, made out of letters and digits.
+ * "nword" is a word with a character that's not a letter or digit.
+ * "word"  is either a "dword" or an "nword".
  */
 
 #if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64)
@@ -23,6 +28,8 @@
 # include <fcntl.h>
 #endif
 
+#define MAXWLEN 100		/* assume max. word len is this many bytes */
+
 /*
  * Structure that is used to store the text from the language file.  This
  * avoids the need to allocate each individual word and copying it.  It's
@@ -36,24 +43,67 @@ struct sblock_S
     char_u	sb_data[1];	/* data, actually longer */
 };
 
+/* Structure to store words and additions.  Used twice : once for case-folded
+ * and once for keep-case words. */
+typedef struct winfo_S
+{
+    hashtab_T	wi_ht;		/* hashtable with all words, both dword_T and
+				   nword_T (check flags for DW_NWORD) */
+    garray_T	wi_add;		/* table with pointers to additions in a
+				   dword_T */
+    int		wi_addlen;	/* longest addition length */
+} winfo_T;
+
 /*
  * Structure used to store words and other info for one language.
  */
 typedef struct slang_S slang_T;
-
 struct slang_S
 {
     slang_T	*sl_next;	/* next language */
     char_u	sl_name[2];	/* language name "en", "nl", etc. */
-    hashtab_T	sl_ht;		/* hashtable with all words */
-    garray_T	sl_match;	/* table with pointers to matches */
-    garray_T	sl_add;		/* table with pointers to additions */
-    char_u	sl_regions[13];	/* table with up to 6 region names */
+    winfo_T	sl_fwords;	/* case-folded words and additions */
+    winfo_T	sl_kwords;	/* keep-case words and additions */
+    char_u	sl_regions[17];	/* table with up to 8 region names plus NUL */
     sblock_T	*sl_block;	/* list with allocated memory blocks */
 };
 
 static slang_T *first_lang = NULL;
 
+/* Entry for dword in "sl_ht".  Also used for part of an nword, starting with
+ * the first non-word character.  And used for additions in wi_add. */
+typedef struct dword_S
+{
+    char_u	dw_region;	/* one bit per region where it's valid */
+    char_u	dw_flags;	/* WF_ flags */
+    char_u	dw_word[1];	/* actually longer, NUL terminated */
+} dword_T;
+
+#define REGION_ALL 0xff
+
+#define HI2DWORD(hi) (dword_T *)(hi->hi_key - 2)
+
+/* Entry for a nword in "sl_ht".  Note that the last three items must be
+ * identical to dword_T, so that they can be in the same hashtable. */
+typedef struct nword_S
+{
+    garray_T	nw_ga;		/* table with pointers to dword_T for part
+				   starting with non-word character */
+    int		nw_maxlen;	/* longest nword length (after the dword) */
+    char_u	nw_region;	/* one bit per region where it's valid */
+    char_u	nw_flags;	/* WF_ flags */
+    char_u	nw_word[1];	/* actually longer, NUL terminated */
+} nword_T;
+
+/* Get nword_T pointer from hashitem that uses nw_word */
+static nword_T dumnw;
+#define HI2NWORD(hi)	((nword_T *)((hi)->hi_key - (dumnw.nw_word - (char_u *)&dumnw)))
+
+#define DW_CAP	    0x01	/* word must start with capital */
+#define DW_RARE	    0x02	/* rare word */
+#define DW_NWORD    0x04	/* this is an nword_T */
+#define DW_DWORD    0x08	/* (also) use as dword without nword */
+
 /*
  * Structure used in "b_langp", filled from 'spelllang'.
  */
@@ -64,25 +114,15 @@ typedef struct langp_S
 } langp_T;
 
 #define LANGP_ENTRY(ga, i)	(((langp_T *)(ga).ga_data) + (i))
-#define MATCH_ENTRY(gap, i)	*(((char_u **)(gap)->ga_data) + i)
-
-/*
- * The byte before a word in the hashtable indicates the type of word.
- * Also used for the byte just before a match.
- * The top two bits are used to indicate rare and case-sensitive words.
- * The lower bits are used to indicate the region in which the word is valid.
- * Words valid in all regions use REGION_ALL.
- */
-#define REGION_MASK	0x3f
-#define REGION_ALL	0x3f
-#define CASE_MASK	0x40
-#define RARE_MASK	0x80
+#define DWORD_ENTRY(gap, i)	*(((dword_T **)(gap)->ga_data) + i)
 
 #define SP_OK		0
 #define SP_BAD		1
 #define SP_RARE		2
 #define SP_LOCAL	3
 
+static char *e_invchar2 = N_("E753: Invalid character in \"%s\"");
+
 static slang_T *spell_load_lang __ARGS((char_u *lang));
 static void spell_load_file __ARGS((char_u *fname));
 static int find_region __ARGS((char_u *rp, char_u *region));
@@ -102,19 +142,34 @@ spell_check(wp, ptr, attrp)
     char_u	*ptr;
     int		*attrp;
 {
-    char_u	*e;
+    char_u	*e;		/* end of word */
+    char_u	*ne;		/* new end of word */
+    char_u	*me;		/* max. end of match */
     langp_T	*lp;
     int		result;
     int		len = 0;
-    hash_T	hash;
     hashitem_T	*hi;
-    int		c;
-#define MAXWLEN 80	/* assume max. word len is 80 */
-    char_u	word[MAXWLEN + 1];
+    int		round;
+    char_u	kword[MAXWLEN + 1];	/* word copy */
+    char_u	fword[MAXWLEN + 1];	/* word with case folded */
+    char_u	match[MAXWLEN + 1];	/* fword with additional chars */
+    char_u	kwordclen[MAXWLEN + 1];	/* len of orig chars after kword[] */
+    char_u	fwordclen[MAXWLEN + 1]; /* len of chars after fword[] */
+    char_u	*clen;
+    int		cidx = 0;		/* char index in xwordclen[] */
+    hash_T	fhash;			/* hash for fword */
+    hash_T	khash;			/* hash for kword */
+    int		match_len = 0;		/* length of match[] */
+    int		fmatch_len = 0;		/* length of nword match in chars */
     garray_T	*gap;
-    int		l, h, t;
-    char_u	*p;
+    int		l, t;
+    char_u	*p, *tp;
     int		n;
+    dword_T	*dw;
+    dword_T	*tdw;
+    winfo_T	*wi;
+    nword_T	*nw;
+    int		w_isupper;
 
     /* Find the end of the word.  We already know that *ptr is a word char. */
     e = ptr;
@@ -122,119 +177,265 @@ spell_check(wp, ptr, attrp)
     {
 	mb_ptr_adv(e);
 	++len;
-    } while (*e != NUL && vim_iswordc_buf(e, wp->w_buffer));
+    } while (*e != NUL && spell_iswordc(e));
+
+    /* A word starting with a number is always OK. */
+    if (*ptr >= '0' && *ptr <= '9')
+	return (int)(e - ptr);
+
+#ifdef FEAT_MBYTE
+    w_isupper = MB_ISUPPER(mb_ptr2char(ptr));
+#else
+    w_isupper = MB_ISUPPER(*ptr);
+#endif
+
+    /* Make a copy of the word so that it can be NUL terminated.
+     * Compute hash value. */
+    mch_memmove(kword, ptr, e - ptr);
+    kword[e - ptr] = NUL;
+    khash = hash_hash(kword);
+
+    /* Make case-folded copy of the Word.  Compute its hash value. */
+    (void)str_foldcase(ptr, e - ptr, fword, MAXWLEN + 1);
+    fhash = hash_hash(fword);
+
+    /* Further case-folded characters to check for an nword match go in
+     * match[]. */
+    me = e;
+
+    /* "ne" is the end for the longest match */
+    ne = e;
 
     /* The word is bad unless we find it in the dictionary. */
     result = SP_BAD;
 
-    /* Words are always stored with folded case. */
-    (void)str_foldcase(ptr, e - ptr, word, MAXWLEN + 1);
-    hash = hash_hash(word);
-
     /*
      * Loop over the languages specified in 'spelllang'.
-     * We check them all, because a match may find a longer word.
+     * We check them all, because a matching nword may be longer than an
+     * already found dword or nword.
      */
-    for (lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0); lp->lp_slang != NULL;
-								     ++lp)
+    for (lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0); lp->lp_slang != NULL; ++lp)
     {
-	/* Check words when it wasn't recognized as a good word yet. */
-	if (result != SP_OK)
+	/*
+	 * Check for a matching word in the hashtable.
+	 * Check both the keep-case word and the fold-case word.
+	 */
+	for (round = 0; round <= 1; ++round)
 	{
-	    /* Word lookup.  Using a hash table is fast. */
-	    hi = hash_lookup(&lp->lp_slang->sl_ht, word, hash);
+	    if (round == 0)
+	    {
+		wi = &lp->lp_slang->sl_kwords;
+		hi = hash_lookup(&wi->wi_ht, kword, khash);
+	    }
+	    else
+	    {
+		wi = &lp->lp_slang->sl_fwords;
+		hi = hash_lookup(&wi->wi_ht, fword, fhash);
+	    }
 	    if (!HASHITEM_EMPTY(hi))
 	    {
-		/* The character before the key indicates the type of word. */
-		c = hi->hi_key[-1];
-		if ((c & CASE_MASK) != 0)
+		/*
+		 * If this is an nword entry, check for match with remainder.
+		 */
+		dw = HI2DWORD(hi);
+		if (dw->dw_flags & DW_NWORD)
 		{
-		    /* Need to check first letter is uppercase.  If it is,
-		     * check region.  If it isn't it may be a rare word. */
-		    if (
+		    /* If the word is not defined as a dword we must find an
+		     * nword. */
+		    if ((dw->dw_flags & DW_DWORD) == 0)
+			dw = NULL;
+
+		    /* Fold more characters when needed for the nword.  Need
+		     * to do one extra to check for a non-word character after
+		     * the nword.  Also keep the byte-size of each character,
+		     * both before and after folding case. */
+		    nw = HI2NWORD(hi);
+		    while ((round == 0
+				? me - e <= nw->nw_maxlen
+				: match_len <= nw->nw_maxlen)
+			    && *me != NUL)
+		    {
 #ifdef FEAT_MBYTE
-			    MB_ISUPPER(mb_ptr2char(ptr))
+			l = mb_ptr2len_check(me);
 #else
-			    MB_ISUPPER(*ptr)
+			l = 1;
 #endif
-			    )
+			(void)str_foldcase(me, l, match + match_len,
+						     MAXWLEN - match_len + 1);
+			me += l;
+			kwordclen[cidx] = l;
+			fwordclen[cidx] = STRLEN(match + match_len);
+			match_len += fwordclen[cidx];
+			++cidx;
+		    }
+
+		    if (round == 0)
 		    {
-			if ((c & lp->lp_region) == 0)
-			    result = SP_LOCAL;
-			else
-			    result = SP_OK;
+			clen = kwordclen;
+			tp = e;
 		    }
-		    else if (c & RARE_MASK)
-			result = SP_RARE;
-		}
-		else
-		{
-		    if ((c & lp->lp_region) == 0)
-			result = SP_LOCAL;
-		    else if (c & RARE_MASK)
-			result = SP_RARE;
 		    else
-			result = SP_OK;
+		    {
+			clen = fwordclen;
+			tp = match;
+		    }
+
+		    /* Match with each item.  The longest match wins:
+		     * "you've" is longer than "you". */
+		    gap = &nw->nw_ga;
+		    for (t = 0; t < gap->ga_len; ++t)
+		    {
+			/* Skip entries with wrong case for first char.
+			 * Continue if it's a rare word without a captial. */
+			tdw = DWORD_ENTRY(gap, t);
+			if ((tdw->dw_flags & (DW_CAP | DW_RARE)) == DW_CAP
+								&& !w_isupper)
+			    continue;
+
+			p = tdw->dw_word;
+			l = 0;
+			for (n = 0; p[n] != 0; n += clen[l++])
+			    if (vim_memcmp(p + n, tp + n, clen[l]) != 0)
+				break;
+
+			/* Use a match if it's longer than previous matches
+			 * and the next character is not a word character. */
+			if (p[n] == 0 && l > fmatch_len && (tp[n] == 0
+						   || !spell_iswordc(tp + n)))
+			{
+			    dw = tdw;
+			    fmatch_len = l;
+			    if (round == 0)
+				ne = tp + n;
+			    else
+			    {
+				/* Need to use the length of the original
+				 * chars, not the fold-case ones. */
+				ne = e;
+				for (l = 0; l < fmatch_len; ++l)
+				    ne += kwordclen[l];
+			    }
+			    if ((lp->lp_region & tdw->dw_region) == 0)
+				result = SP_LOCAL;
+			    else if ((tdw->dw_flags & DW_CAP) && !w_isupper)
+				result = SP_RARE;
+			    else
+				result = SP_OK;
+			}
+		    }
+
 		}
-	    }
-	}
 
-	/* Match lookup.  Uses a binary search.  If there is a match adjust
-	 * "e" to the end.  This is also done when a word matched, because
-	 * "you've" is longer than "you". */
-	gap = &lp->lp_slang->sl_match;
-	l = 0;			/* low index */
-	h = gap->ga_len - 1;	/* high index */
-	/* keep searching, the match must be between "l" and "h" (inclusive) */
-	while (h >= l)
-	{
-	    t = (h + l) / 2;
-	    p = MATCH_ENTRY(gap, t) + 1;
-	    for (n = 0; p[n] != 0 && p[n] == ptr[n]; ++n)
-		;
-	    if (p[n] == 0)
-	    {
-		if ((ptr[n] == 0 || !vim_iswordc_buf(ptr + n, wp->w_buffer)))
+		if (dw != NULL)
 		{
-		    /* match! */
-		    e = ptr + n;
-		    if (result != SP_OK)
+		    if (dw->dw_flags & DW_CAP)
 		    {
-			if ((lp->lp_region & p[-1]) == 0)
+			/* Need to check first letter is uppercase.  If it is,
+			 * check region.  If it isn't it may be a rare word.
+			 * */
+			if (w_isupper)
+			{
+			    if ((dw->dw_region & lp->lp_region) == 0)
+				result = SP_LOCAL;
+			    else
+				result = SP_OK;
+			}
+			else if (dw->dw_flags & DW_RARE)
+			    result = SP_RARE;
+		    }
+		    else
+		    {
+			if ((dw->dw_region & lp->lp_region) == 0)
 			    result = SP_LOCAL;
+			else if (dw->dw_flags & DW_RARE)
+			    result = SP_RARE;
 			else
 			    result = SP_OK;
 		    }
-		    break;
 		}
-		/* match is too short, next item is new low index */
-		l = t + 1;
 	    }
-	    else if (p[n] < ptr[n])
-		/* match is before word, next item is new low index */
-		l = t + 1;
-	    else
-		/* match is after word, previous item is new high index */
-		h = t - 1;
 	}
 
-	/* Addition lookup.  Uses a linear search, there should be very few.
-	 * If there is a match adjust "e" to the end.  This doesn't change
-	 * whether a word was good or bad, only the length. */
-	gap = &lp->lp_slang->sl_add;
-	for (t = 0; t < gap->ga_len; ++t)
-	{
-	    p = MATCH_ENTRY(gap, t) + 1;
-	    for (n = 0; p[n] != 0 && p[n] == e[n]; ++n)
-		;
-	    if (p[n] == 0
-		      && (e[n] == 0 || !vim_iswordc_buf(e + n, wp->w_buffer)))
+	/*
+	 * Check for an addition.
+	 * Only after a dword, not after an nword.
+	 * Check both the keep-case word and the fold-case word.
+	 */
+	if (fmatch_len == 0)
+	    for (round = 0; round <= 1; ++round)
 	    {
-		/* match */
-		e += n;
-		break;
+		if (round == 0)
+		    wi = &lp->lp_slang->sl_kwords;
+		else
+		    wi = &lp->lp_slang->sl_fwords;
+		gap = &wi->wi_add;
+		if (gap->ga_len == 0)   /* no additions, skip quickly */
+		    continue;
+
+		/* Fold characters when needed for the addition.  Need to do one
+		 * extra to check for a word character after the addition. */
+		while ((round == 0
+			    ? me - e <= wi->wi_addlen
+			    : match_len <= wi->wi_addlen)
+			&& *me != NUL)
+		{
+#ifdef FEAT_MBYTE
+		    l = mb_ptr2len_check(me);
+#else
+		    l = 1;
+#endif
+		    (void)str_foldcase(me, l, match + match_len,
+							 MAXWLEN - match_len + 1);
+		    me += l;
+		    kwordclen[cidx] = l;
+		    fwordclen[cidx] = STRLEN(match + match_len);
+		    match_len += fwordclen[cidx];
+		    ++cidx;
+		}
+
+		if (round == 0)
+		{
+		    clen = kwordclen;
+		    tp = e;
+		}
+		else
+		{
+		    clen = fwordclen;
+		    tp = match;
+		}
+
+		/* Addition lookup.  Uses a linear search, there should be
+		 * very few.  If there is a match adjust "ne" to the end.
+		 * This doesn't change whether a word was good or bad, only
+		 * the length. */
+		for (t = 0; t < gap->ga_len; ++t)
+		{
+		    tdw = DWORD_ENTRY(gap, t);
+		    p = tdw->dw_word;
+		    l = 0;
+		    for (n = 0; p[n] != 0; n += clen[l++])
+			if (vim_memcmp(p + n, tp + n, clen[l]) != 0)
+			    break;
+
+		    /* Use a match if it's longer than previous matches
+		     * and the next character is not a word character. */
+		    if (p[n] == 0 && l > fmatch_len
+				    && (tp[n] == 0 || !spell_iswordc(tp + n)))
+		    {
+			fmatch_len = l;
+			if (round == 0)
+			    ne = tp + n;
+			else
+			{
+			    /* Need to use the length of the original
+			     * chars, not the fold-case ones. */
+			    ne = e;
+			    for (l = 0; l < fmatch_len; ++l)
+				ne += kwordclen[l];
+			}
+		    }
+		}
 	    }
-	}
     }
 
     if (result != SP_OK)
@@ -247,7 +448,7 @@ spell_check(wp, ptr, attrp)
 	    *attrp = highlight_attr[HLF_SPL];
     }
 
-    return (int)(e - ptr);
+    return (int)(ne - ptr);
 }
 
 static slang_T	    *load_lp;	/* passed from spell_load_lang() to
@@ -264,15 +465,19 @@ spell_load_lang(lang)
     char_u	fname_enc[80];
     char_u	fname_ascii[20];
     char_u	*p;
+    int		r;
 
     lp = (slang_T *)alloc(sizeof(slang_T));
     if (lp != NULL)
     {
 	lp->sl_name[0] = lang[0];
 	lp->sl_name[1] = lang[1];
-	hash_init(&lp->sl_ht);
-	ga_init2(&lp->sl_match, sizeof(char_u *), 20);
-	ga_init2(&lp->sl_add, sizeof(char_u *), 4);
+	hash_init(&lp->sl_fwords.wi_ht);
+	ga_init2(&lp->sl_fwords.wi_add, sizeof(dword_T *), 4);
+	lp->sl_fwords.wi_addlen = 0;
+	hash_init(&lp->sl_kwords.wi_ht);
+	ga_init2(&lp->sl_kwords.wi_add, sizeof(dword_T *), 4);
+	lp->sl_kwords.wi_addlen = 0;
 	lp->sl_regions[0] = NUL;
 	lp->sl_block = NULL;
 
@@ -286,17 +491,20 @@ spell_load_lang(lang)
 	    p = (char_u *)"latin1";
 	load_lp = lp;
 	sprintf((char *)fname_enc, "spell/%c%c.%s.spl", lang[0], lang[1], p);
-	if (do_in_runtimepath(fname_enc, TRUE, spell_load_file) == FAIL)
+	r = do_in_runtimepath(fname_enc, TRUE, spell_load_file);
+	if (r == FAIL)
 	{
 	    /* Try again to find an ASCII spell file. */
 	    sprintf((char *)fname_ascii, "spell/%c%c.spl", lang[0], lang[1]);
-	    if (do_in_runtimepath(fname_ascii, TRUE, spell_load_file) == FAIL)
-	    {
-		vim_free(lp);
-		lp = NULL;
-		smsg((char_u *)_("Warning: Cannot find dictionary \"%s\""),
+	    r = do_in_runtimepath(fname_ascii, TRUE, spell_load_file);
+	}
+
+	if (r == FAIL)
+	{
+	    vim_free(lp);
+	    lp = NULL;
+	    smsg((char_u *)_("Warning: Cannot find dictionary \"%s\""),
 							       fname_enc + 6);
-	    }
 	}
 	else
 	{
@@ -319,229 +527,398 @@ spell_load_file(fname)
     int		fd;
     size_t	len;
     size_t	l;
-    size_t	rest = 0;
     char_u	*p = NULL, *np;
-    sblock_T	*bl;
+    sblock_T	*bl = NULL;
+    int		bl_used = 0;
+    size_t	rest = 0;
+    char_u	*rbuf;		/* read buffer */
+    char_u	*rbuf_end;	/* past last valid char in "rbuf" */
     hash_T	hash;
     hashitem_T	*hi;
     int		c;
+    int		cc;
     int		region = REGION_ALL;
-    char_u	word[MAXWLEN + 1];
-    int		n;
+    int		wlen;
+    winfo_T	*wi;
+    dword_T	*dw, *edw;
+    nword_T	*nw = NULL;
+    int		flags;
+    char_u	*save_sourcing_name = sourcing_name;
+    linenr_T	save_sourcing_lnum = sourcing_lnum;
+
+    rbuf = alloc((unsigned)(SBLOCKSIZE + MAXWLEN + 1));
+    if (rbuf == NULL)
+	return;
 
     fd = mch_open((char *)fname, O_RDONLY | O_EXTRA, 0);
     if (fd < 0)
     {
 	EMSG2(_(e_notopen), fname);
-	return;
+	goto theend;
     }
 
+    sourcing_name = fname;
+    sourcing_lnum = 0;
+
     /* Get the length of the whole file. */
     len = lseek(fd, (off_t)0, SEEK_END);
     lseek(fd, (off_t)0, SEEK_SET);
 
-    /* Loop, reading the file one block at a time.
+    /*
+     * Read the file one block at a time.
      * "rest" is the length of an incomplete line at the previous block.
-     * "p" points to the remainder. */
+     * "p" points to the remainder.
+     */
     while (len > 0)
     {
-	/* Allocate a block of memory to store the info in.  This is not freed
-	 * until spell_reload() is called. */
+	/* Read a block from the file.  Prepend the remainder of the previous
+	 * block, if any. */
+	if (rest > 0)
+	{
+	    if (rest > MAXWLEN)	    /* truncate long line (should be comment) */
+		rest = MAXWLEN;
+	    mch_memmove(rbuf, p, rest);
+	    --sourcing_lnum;
+	}
 	if (len > SBLOCKSIZE)
 	    l = SBLOCKSIZE;
 	else
 	    l = len;
 	len -= l;
-	bl = (sblock_T *)alloc((unsigned)(sizeof(sblock_T) - 1 + l + rest));
-	if (bl == NULL)
-	    break;
-	bl->sb_next = load_lp->sl_block;
-	load_lp->sl_block = bl;
-
-	/* Read a block from the file.  Prepend the remainder of the previous
-	 * block. */
-	if (rest > 0)
-	    mch_memmove(bl->sb_data, p, rest);
-	if (read(fd, bl->sb_data + rest, l) != l)
+	if (read(fd, rbuf + rest, l) != l)
 	{
 	    EMSG2(_(e_notread), fname);
 	    break;
 	}
-	l += rest;
+	rbuf_end = rbuf + l + rest;
 	rest = 0;
 
 	/* Deal with each line that was read until we finish the block. */
-	for (p = bl->sb_data; l > 0; p = np)
+	for (p = rbuf; p < rbuf_end; p = np)
 	{
-	    /* "np" points to the char after the line (CR or NL). */
-	    for (np = p; l > 0 && *np >= ' '; ++np)
-		--l;
-	    if (l == 0)
+	    ++sourcing_lnum;
+
+	    /* "np" points to the first char after the line (CR, NL or white
+	     * space). */
+	    for (np = p; np < rbuf_end && *np >= ' '; mb_ptr_adv(np))
+		;
+	    if (np >= rbuf_end)
 	    {
-		/* Incomplete line (or end of file). */
+		/* Incomplete line or end of file. */
 		rest = np - p;
 		if (len == 0)
-		    EMSG2(_("E751: Truncated spell file: %s"), fname);
+		    EMSG(_("E751: Truncated spell file"));
 		break;
 	    }
 	    *np = NUL;	    /* terminate the line with a NUL */
 
-	    /* Skip comment and empty lines. */
-	    c = *p;
-	    if (c != '#' && np > p)
+	    if (*p == '-')
 	    {
-		if (c == '=' || c == '+')
+		/*
+		 * Region marker: ---, -xx, -xx-yy, etc.
+		 */
+		++p;
+		if (*p == '-')
 		{
-		    garray_T *gap;
-
-		    /* Match or Add item. */
-		    if (c == '=')
-			gap = &load_lp->sl_match;
-		    else
-			gap = &load_lp->sl_add;
-
-		    if (ga_grow(gap, 1) == OK)
+		    if (p[1] != '-' || p[2] != NUL)
 		    {
-			for (n = 0; n < gap->ga_len; ++n)
-			    if ((c = STRCMP(p + 1,
-						MATCH_ENTRY(gap, n) + 1)) < 0)
-				break;
-			if (c == 0)
-			{
-			    if (p_verbose > 0)
-				smsg((char_u *)_("Warning: duplicate match \"%s\" in %s"),
-								p + 1, fname);
-			}
-			else
-			{
-			    mch_memmove((char_u **)gap->ga_data + n + 1,
-				    (char_u **)gap->ga_data + n,
-				    (gap->ga_len - n) * sizeof(char_u *));
-			    *(((char_u **)gap->ga_data) + n) = p;
-			    *p = region;
-			    ++gap->ga_len;
-			}
+			EMSG2(_(e_invchar2), p - 1);
+			len = 0;
+			break;
 		    }
+		    region = REGION_ALL;
 		}
-		else if (c == '-')
+		else
 		{
-		    /* region item */
-		    ++p;
-		    if (*p == '-')
-			/* end of a region */
-			region = REGION_ALL;
-		    else
-		    {
-			char_u	*rp = load_lp->sl_regions;
-			int	r;
+		    char_u	*rp = load_lp->sl_regions;
+		    int		r;
 
-			/* The region may be repeated: "-ca-uk".  Fill
-			 * "region" with the bit mask for the ones we find. */
-			region = 0;
-			for (;;)
+		    /* Start of a region.  The region may be repeated:
+		     * "-ca-uk".  Fill "region" with the bit mask for the
+		     * ones we find. */
+		    region = 0;
+		    for (;;)
+		    {
+			r = find_region(rp, p);
+			if (r == REGION_ALL)
 			{
-			    /* start of a region */
-			    r = find_region(rp, p);
-			    if (r == REGION_ALL)
+			    /* new region, add it to sl_regions[] */
+			    r = STRLEN(rp);
+			    if (r >= 16)
 			    {
-				/* new region, add it */
-				r = STRLEN(rp);
-				if (r >= 12)
-				{
-				    EMSG2(_("E752: Too many regions in %s"),
-								       fname);
-				    r = REGION_ALL;
-				}
-				else
-				{
-				    rp[r] = p[0];
-				    rp[r + 1] = p[1];
-				    rp[r + 2] = NUL;
-				    r = 1 << (r / 2);
-				}
+				EMSG2(_("E752: Too many regions: %s"), p);
+				len = 0;
+				break;
 			    }
 			    else
-				r = 1 << r;
+			    {
+				rp[r] = p[0];
+				rp[r + 1] = p[1];
+				rp[r + 2] = NUL;
+				r = 1 << (r / 2);
+			    }
+			}
+			else
+			    r = 1 << r;
 
-			    region |= r;
-			    if (p[2] != '-')
+			region |= r;
+			if (p[2] != '-')
+			{
+			    if (p[2] > ' ')
 			    {
-				if (p[2] != NUL)
-				    EMSG2(_("E753: Invalid character in \"%s\""),
-								       p - 1);
-				break;
+				EMSG2(_(e_invchar2), p - 1);
+				len = 0;
 			    }
-			    p += 3;
+			    break;
 			}
+			p += 3;
 		    }
 		}
+	    }
+	    else if (*p != '#' && *p != NUL)
+	    {
+		/*
+		 * Not an empty line or comment.
+		 */
+		if (*p == '!')
+		{
+		    wi = &load_lp->sl_kwords;	    /* keep case */
+		    ++p;
+		}
 		else
+		    wi = &load_lp->sl_fwords;	    /* fold case */
+
+		flags = 0;
+		c = *p;
+		if (c == '>')		/* rare word */
 		{
-		    /* add the word */
-		    if (c == '>')
-			c = region | RARE_MASK;
-		    else
+		    flags = DW_RARE;
+		    ++p;
+		}
+		else if (*p == '+')	/* addition */
+		    ++p;
+
+		if (c != '+' && !spell_iswordc(p))
+		{
+		    EMSG2(_(e_invchar2), p);
+		    len = 0;
+		    break;
+		}
+
+		/* Make sure there is room for the word.  Folding case may
+		 * double the size. */
+		wlen = np - p;
+		if (bl == NULL || bl_used + sizeof(dword_T) + wlen
+#ifdef FEAT_MBYTE
+					    * (has_mbyte ? 2 : 1)
+#endif
+							    >= SBLOCKSIZE)
+		{
+		    /* Allocate a block of memory to store the dword_T in.
+		     * This is not freed until spell_reload() is called. */
+		    bl = (sblock_T *)alloc((unsigned)(sizeof(sblock_T)
+							   + SBLOCKSIZE));
+		    if (bl == NULL)
 		    {
-			if (c != ' ')
-			    EMSG2(_("E753: Invalid character in \"%s\""), p);
-			c = region;
+			len = 0;
+			break;
 		    }
+		    bl->sb_next = load_lp->sl_block;
+		    load_lp->sl_block = bl;
+		    bl_used = 0;
+		}
+		dw = (dword_T *)(bl->sb_data + bl_used);
+
+		/* For fold-case words fold the case and check for start
+		 * with uppercase letter. */
+		if (wi == &load_lp->sl_fwords)
+		{
 #ifdef FEAT_MBYTE
-		    if (MB_ISUPPER(mb_ptr2char(p + 1)))
+		    if (MB_ISUPPER(mb_ptr2char(p)))
 #else
-		    if (MB_ISUPPER(p[1]))
+		    if (MB_ISUPPER(*p))
 #endif
-			c |= CASE_MASK;
-		    *p++ = c;
-		    (void)str_foldcase(p, np - p, word, MAXWLEN + 1);
-		    n = STRLEN(word);
-		    if (n > np - p)
-		    {
-			sblock_T	*s;
+			flags |= DW_CAP;
+
+		    /* Fold case. */
+		    (void)str_foldcase(p, np - p, dw->dw_word, wlen
+#ifdef FEAT_MBYTE
+						     * (has_mbyte ? 2 : 1)
+#endif
+								     + 1);
+#ifdef FEAT_MBYTE
+		    /* case folding may change length of word */
+		    wlen = STRLEN(dw->dw_word);
+#endif
+		}
+		else
+		{
+		    /* Keep case: copy the word as-is. */
+		    mch_memmove(dw->dw_word, p, wlen + 1);
+		}
 
-			/* Folding case made word longer!  We need to allocate
-			 * memory for it. */
-			s = (sblock_T *)alloc((unsigned)sizeof(sblock_T)
-								     + n + 1);
-			if (s != NULL)
+		if (c == '+')
+		{
+		    garray_T    *gap = &wi->wi_add;
+
+		    /* Addition.  TODO: search for matching entry? */
+		    if (wi->wi_addlen < wlen)
+			wi->wi_addlen = wlen;
+		    if (ga_grow(gap, 1) == FAIL)
+		    {
+			len = 0;
+			break;
+		    }
+		    *(((dword_T **)gap->ga_data) + gap->ga_len) = dw;
+		    ++gap->ga_len;
+		    dw->dw_region = region;
+		    dw->dw_flags = flags;
+		    bl_used += sizeof(dword_T) + wlen;
+		}
+		else
+		{
+		    /*
+		     * Check for a non-word character.  If found it's
+		     * going to be an nword.
+		     * For an nword we split in two: the leading dword and
+		     * the remainder.  The dword goes in the hashtable
+		     * with an nword_T, the remainder is put in the
+		     * dword_T (starting with the first non-word
+		     * character).
+		     */
+		    cc = NUL;
+		    for (p = dw->dw_word; *p != NUL; mb_ptr_adv(p))
+			if (!spell_iswordc(p))
 			{
-			    s->sb_next = load_lp->sl_block;
-			    load_lp->sl_block = s;
-			    s->sb_data[0] = p[-1];
-			    p = s->sb_data + 1;
+			    cc = *p;
+			    *p = NUL;
+			    break;
 			}
-		    }
-		    mch_memmove(p, word, n + 1);
 
-		    hash = hash_hash(p);
-		    hi = hash_lookup(&load_lp->sl_ht, p, hash);
+		    /* check if we already have this dword */
+		    hash = hash_hash(dw->dw_word);
+		    hi = hash_lookup(&wi->wi_ht, dw->dw_word, hash);
 		    if (!HASHITEM_EMPTY(hi))
 		    {
-			c = hi->hi_key[-1];
-			if ((c & (CASE_MASK | RARE_MASK))
-					 == (p[-1] & (CASE_MASK | RARE_MASK)))
+			/* Existing entry. */
+			edw = HI2DWORD(hi);
+			if ((edw->dw_flags & (DW_CAP | DW_RARE))
+				   == (dw->dw_flags & (DW_CAP | DW_RARE)))
 			{
 			    if (p_verbose > 0)
 				smsg((char_u *)_("Warning: duplicate word \"%s\" in %s"),
-								    p, fname);
+						      dw->dw_word, fname);
+			}
+		    }
+
+		    if (cc != NUL) /* nword */
+		    {
+			if (HASHITEM_EMPTY(hi)
+				       || (edw->dw_flags & DW_NWORD) == 0)
+			{
+			    sblock_T *sb;
+
+			    /* Need to allocate a new nword_T.  Put it in an
+			     * sblock_T, so that we can free it later. */
+			    sb = (sblock_T *)alloc(
+				    (unsigned)(sizeof(sblock_T)
+					       + sizeof(nword_T) + wlen));
+			    if (sb == NULL)
+			    {
+				len = 0;
+				break;
+			    }
+			    sb->sb_next = load_lp->sl_block;
+			    load_lp->sl_block = sb;
+			    nw = (nword_T *)sb->sb_data;
+
+			    ga_init2(&nw->nw_ga, sizeof(dword_T *), 4);
+			    nw->nw_maxlen = 0;
+			    STRCPY(nw->nw_word, dw->dw_word);
+			    if (!HASHITEM_EMPTY(hi))
+			    {
+				/* Note: the nw_region and nw_flags is for
+				 * the dword that matches with the start
+				 * of this nword, not for the nword
+				 * itself! */
+				nw->nw_region = edw->dw_region;
+				nw->nw_flags = edw->dw_flags | DW_NWORD;
+
+				/* Remove the dword item so that we can
+				 * add it as an nword. */
+				hash_remove(&wi->wi_ht, hi);
+				hi = hash_lookup(&wi->wi_ht,
+						       nw->nw_word, hash);
+			    }
+			    else
+			    {
+				nw->nw_region = 0;
+				nw->nw_flags = DW_NWORD;
+			    }
 			}
 			else
-			    hi->hi_key[-1] |= (p[-1] & (CASE_MASK | RARE_MASK));
+			    nw = HI2NWORD(hi);
+		    }
+
+		    if (HASHITEM_EMPTY(hi))
+		    {
+			/* Add new dword or nword entry. */
+			hash_add_item(&wi->wi_ht, hi, cc == NUL
+				       ? dw->dw_word : nw->nw_word, hash);
+			if (cc == NUL)
+			{
+			    /* New dword: init the values and count the
+			     * used space.  */
+			    dw->dw_flags = DW_DWORD | flags;
+			    dw->dw_region = region;
+			    bl_used += sizeof(dword_T) + wlen;
+			}
+		    }
+		    else if (cc == NUL)
+		    {
+			/* existing dword: add the region and flags */
+			dw = edw;
+			dw->dw_region |= region;
+			dw->dw_flags |= DW_DWORD | flags;
+		    }
+
+		    if (cc != NUL)
+		    {
+			/* Use the dword for the non-word character and
+			 * following characters. */
+			dw->dw_region = region;
+			dw->dw_flags = flags;
+			STRCPY(dw->dw_word + 1, p + 1);
+			dw->dw_word[0] = cc;
+			l = wlen - (p - dw->dw_word);
+
author	Bram Moolenaar <Bram@vim.org>	2005-03-22 22:54:12 +0000
committer	Bram Moolenaar <Bram@vim.org>	2005-03-22 22:54:12 +0000
commit	fc73515f7ba66b47705265bb8d01c6bec5df09c4 (patch)
tree	f98ccbf9a30c460d0c8f59e4de38d54e843cb6d8 /src
parent	dbc08a34592881124ddb1e397bf7b124c55789cc (diff)