updated for version 7.0079

author: Bram Moolenaar <Bram@vim.org> 2005-06-04 21:55:20 +0000
committer: Bram Moolenaar <Bram@vim.org> 2005-06-04 21:55:20 +0000
commit: 51485f06246966898f7c00e2e53b1ba4c6855cf7 (patch)
tree: c1cfe02ab088ea7a4423f15829e4083303d62a89
parent: 4debb442bd885d182d7f77d1dfcdf143fd7cbf88 (diff)
9 files changed, 1308 insertions, 3671 deletions
diff --git a/runtime/doc/develop.txt b/runtime/doc/develop.txt
index bc8b0e8c09..cceda32b97 100644
--- a/runtime/doc/develop.txt
+++ b/runtime/doc/develop.txt
@@ -1,4 +1,4 @@
-*develop.txt*   For Vim version 7.0aa.  Last change: 2005 Mar 29
+*develop.txt*   For Vim version 7.0aa.  Last change: 2005 Jun 04
 
 
 		  VIM REFERENCE MANUAL    by Bram Moolenaar
@@ -381,10 +381,10 @@ checking engine in Vim, for various reasons:
   fly (while redrawing), just like syntax highlighting.  But the mechanisms
   used by other code are much slower.  Myspell uses a simplistic hashtable,
   for example.
-- For a program like aspell a communication mechanism would have to be setup.
-  That's complicated to do in a portable way (Unix-only would be relatively
-  simple, but that's not good enough).  And performance will become a problem
-  (lots of process switching involved).
+- For using an external program like aspell a communication mechanism would
+  have to be setup.  That's complicated to do in a portable way (Unix-only
+  would be relatively simple, but that's not good enough).  And performance
+  will become a problem (lots of process switching involved).
 - Missing support for words with non-word characters, such as "Etten-Leur" and
   "et al.", would require marking the pieces of them OK, lowering the
   reliability.
diff --git a/runtime/doc/map.txt b/runtime/doc/map.txt
index 1119618d79..18da9ccc06 100644
--- a/runtime/doc/map.txt
+++ b/runtime/doc/map.txt
@@ -1,4 +1,4 @@
-*map.txt*       For Vim version 7.0aa.  Last change: 2005 Mar 29
+*map.txt*       For Vim version 7.0aa.  Last change: 2005 Jun 03
 
 
 		  VIM REFERENCE MANUAL    by Bram Moolenaar
@@ -1032,6 +1032,7 @@ The valid escape sequences are
 If the first two characters of an escape sequence are "q-" (for example,
 <q-args>) then the value is quoted in such a way as to make it a valid value
 for use in an expression.  This uses the argument as one single value.
+When there is no argument <q-args> is an empty string.
 
 To allow commands to pass their arguments on to a user-defined function, there
 is a special form <f-args> ("function args").  This splits the command
diff --git a/runtime/spell/en.utf-8.spl b/runtime/spell/en.utf-8.spl
index 156c0979e9..300f6ce807 100644
--- a/runtime/spell/en.utf-8.spl
+++ b/runtime/spell/en.utf-8.spl
diff --git a/src/buffer.c b/src/buffer.c
index 294bc9b166..c54df88175 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -854,11 +854,11 @@ do_bufdel(command, arg, addr_count, start_bnr, end_bnr, forceit)
 	if (deleted == 0)
 	{
 	    if (command == DOBUF_UNLOAD)
-		sprintf((char *)IObuff, _("E515: No buffers were unloaded"));
+		STRCPY(IObuff, _("E515: No buffers were unloaded"));
 	    else if (command == DOBUF_DEL)
-		sprintf((char *)IObuff, _("E516: No buffers were deleted"));
+		STRCPY(IObuff, _("E516: No buffers were deleted"));
 	    else
-		sprintf((char *)IObuff, _("E517: No buffers were wiped out"));
+		STRCPY(IObuff, _("E517: No buffers were wiped out"));
 	    errormsg = IObuff;
 	}
 	else if (deleted >= p_report)
@@ -2450,7 +2450,7 @@ buflist_list(eap)
 	else
 	    home_replace(buf, buf->b_fname, NameBuff, MAXPATHL, TRUE);
 
-	sprintf((char *)IObuff, "%3d%c%c%c%c%c \"",
+	vim_snprintf((char *)IObuff, IOSIZE - 20, "%3d%c%c%c%c%c \"%s\"",
 		buf->b_fnum,
 		buf->b_p_bl ? ' ' : 'u',
 		buf == curbuf ? '%' :
@@ -2459,18 +2459,11 @@ buflist_list(eap)
 			(buf->b_nwindows == 0 ? 'h' : 'a'),
 		!buf->b_p_ma ? '-' : (buf->b_p_ro ? '=' : ' '),
 		(buf->b_flags & BF_READERR) ? 'x'
-					    : (bufIsChanged(buf) ? '+' : ' ')
-		);
-
-	len = (int)STRLEN(IObuff);
-	STRNCPY(IObuff + len, NameBuff, IOSIZE - 20 - len);
-	IObuff[IOSIZE - 20 - len] = NUL;    /* make sure it's terminated */
-
-	len = (int)STRLEN(IObuff);
-	IObuff[len++] = '"';
+					    : (bufIsChanged(buf) ? '+' : ' '),
+		NameBuff);
 
 	/* put "line 999" in column 40 or after the file name */
-	IObuff[len] = NUL;
+	len = STRLEN(IObuff);
 	i = 40 - vim_strsize(IObuff);
 	do
 	{
diff --git a/src/getchar.c b/src/getchar.c
index 2ece91aa2e..6d26d6d557 100644
--- a/src/getchar.c
+++ b/src/getchar.c
@@ -4342,7 +4342,7 @@ put_escstr(fd, strstart, what)
 	if (p != NULL)
 	{
 	    while (*p != NUL)
-		if (putc(*p++, fd) < 0)
+		if (fputc(*p++, fd) < 0)
 		    return FAIL;
 	    --str;
 	    continue;
diff --git a/src/mark.c b/src/mark.c
index 6a149bface..9d74b4a7c9 100644
--- a/src/mark.c
+++ b/src/mark.c
@@ -1445,6 +1445,7 @@ removable(name)
     char_u  *p;
     char_u  part[51];
     int	    retval = FALSE;
+    int	    n;
 
     name = home_replace_save(NULL, name);
     if (name != NULL)
@@ -1452,11 +1453,14 @@ removable(name)
 	for (p = p_viminfo; *p; )
 	{
 	    copy_option_part(&p, part, 51, ", ");
-	    if (part[0] == 'r'
-			&& MB_STRNICMP(part + 1, name, STRLEN(part + 1)) == 0)
+	    if (part[0] == 'r')
 	    {
-		retval = TRUE;
-		break;
+		n = STRLEN(part + 1);
+		if (MB_STRNICMP(part + 1, name, n) == 0)
+		{
+		    retval = TRUE;
+		    break;
+		}
 	    }
 	}
 	vim_free(name);
diff --git a/src/normal.c b/src/normal.c
index 207576698e..c3c7627575 100644
--- a/src/normal.c
+++ b/src/normal.c
@@ -2823,6 +2823,7 @@ do_mouse(oap, c, dir, count, fixindent)
 	if ((mod_mask & MOD_MASK_MULTI_CLICK) == MOD_MASK_2CLICK)
 	{
 	    pos_T	*pos = NULL;
+	    int		gc;
 
 	    if (is_click)
 	    {
@@ -2830,7 +2831,7 @@ do_mouse(oap, c, dir, count, fixindent)
 		 * not a word character, try finding a match and select a (),
 		 * {}, [], #if/#endif, etc. block. */
 		end_visual = curwin->w_cursor;
-		while (vim_iswhite(gchar_pos(&end_visual)))
+		while (gc = gchar_pos(&end_visual), vim_iswhite(gc))
 		    inc(&end_visual);
 		if (oap != NULL)
 		    oap->motion_type = MCHAR;
diff --git a/src/proto/charset.pro b/src/proto/charset.pro
index e839d6374c..9fae4063f4 100644
--- a/src/proto/charset.pro
+++ b/src/proto/charset.pro
@@ -40,6 +40,7 @@ void getvvcol __ARGS((win_T *wp, pos_T *pos, colnr_T *start, colnr_T *cursor, co
 void getvcols __ARGS((win_T *wp, pos_T *pos1, pos_T *pos2, colnr_T *left, colnr_T *right));
 char_u *skipwhite __ARGS((char_u *p));
 char_u *skipdigits __ARGS((char_u *p));
+char_u *skiphex __ARGS((char_u *p));
 char_u *skiptodigit __ARGS((char_u *p));
 char_u *skiptohex __ARGS((char_u *p));
 int vim_isdigit __ARGS((int c));
diff --git a/src/spell.c b/src/spell.c
index 9d010f912e..2d80389d96 100644
--- a/src/spell.c
+++ b/src/spell.c
@@ -10,25 +10,91 @@
 /*
  * spell.c: code for spell checking
  *
- * The basic spell checking mechanism is:
- * 1. Isolate a word, up to the next non-word character.
- * 2. Find the word in the hashtable of basic words.
- * 3. If not found, look in the hashtable with "prewords".  These are prefixes
- *    with a non-word character following a word character, e.g., "de-".
- * 4. If still not found, for each matching a prefix try if the word matches
- *    without the prefix (and with the "chop" string added back).
- * 5. If still still not found, for each matching suffix try if the word
- *    matches without the suffix (and with the "chop" string added back).
+ * The spell checking mechanism uses a tree (aka trie).  Each node in the tree
+ * has a list of bytes that can appear (siblings).  For each byte there is a
+ * pointer to the node with the byte that follows in the word (child).
+ * A NUL byte is used where the word may end.
+ *
+ * There are two trees: one with case-folded words and one with words in
+ * original case.  The second one is only used for keep-case words and is
+ * usually small.
+ *
+ * Thanks to Olaf Seibert for providing an example implementation of this tree
+ * and the compression mechanism.
  *
  * Matching involves checking the caps type: Onecap ALLCAP KeepCap.
- * After finding a matching word check for a leadstring (non-word characters
- * before the word) and addstring (more text following, starting with a
- * non-word character).
  *
  * Why doesn't Vim use aspell/ispell/myspell/etc.?
  * See ":help develop-spell".
  */
 
+/*
+ * Vim spell file format:  <HEADER> <SUGGEST> <LWORDTREE> <KWORDTREE>
+ *
+ * <HEADER>: <fileID> <regioncnt> <regionname> ...
+ *		 <charflagslen> <charflags> <fcharslen> <fchars>
+ *
+ * <fileID>     10 bytes    "VIMspell05"
+ * <regioncnt>  1 byte	    number of regions following (8 supported)
+ * <regionname>	2 bytes     Region name: ca, au, etc.
+ *			    First <regionname> is region 1.
+ *
+ * <charflagslen> 1 byte    Number of bytes in <charflags> (should be 128).
+ * <charflags>  N bytes     List of flags (first one is for character 128):
+ *			    0x01  word character
+ *			    0x01  upper-case character
+ * <fcharslen>  2 bytes     Number of bytes in <fchars>.
+ * <fchars>     N bytes	    Folded characters, first one is for character 128.
+ *
+ *
+ * <SUGGEST> : <suggestlen> <more> ...
+ *
+ * <suggestlen> 4 bytes	    Length of <SUGGEST> in bytes, excluding
+ *			    <suggestlen>.  MSB first.
+ * <more>		    To be defined.
+ *
+ *
+ * <LWORDTREE>: <wordtree>
+ *
+ * <wordtree>: <nodecount> <nodedata> ...
+ *
+ * <nodecount>	4 bytes	    Number of nodes following.  MSB first.
+ *
+ * <nodedata>: <siblingcount> <sibling> ...
+ *
+ * <siblingcount> 1 byte    Number of siblings in this node.  The siblings
+ *			    follow in sorted order.
+ *
+ * <sibling>: <byte> [<nodeidx> <xbyte> | <flags> [<region>]]
+ *
+ * <byte>	1 byte	    Byte value of the sibling.  Special cases:
+ *			    BY_NOFLAGS: End of word without flags and for all
+ *					regions.
+ *			    BY_FLAGS: End of word, <flags> follow.
+ *			    BY_INDEX: Child of sibling is shared, <nodeidx>
+ *					and <xbyte> follow.
+ *
+ * <nodeidx>	3 bytes	    Index of child for this sibling, MSB first.
+ *
+ * <xbyte>	1 byte	    byte value of the sibling.
+ *
+ * <flags>	1 byte	    bitmask of:
+ *			    WF_ALLCAP	word must have only capitals
+ *			    WF_ONECAP   first char of word must be capital
+ *			    WF_RARE	rare word
+ *			    WF_REGION	<region> follows
+ *
+ * <region>	1 byte	    Bitmask for regions in which word is valid.  When
+ *			    omitted it's valid in all regions.
+ *			    Lowest bit is for region 1.
+ *
+ * <KWORDTREE>: <wordtree>
+ *
+ *
+ * All text characters are in 'encoding', but stored as single bytes.
+ * The region name is ASCII.
+ */
+
 #if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64)
 # include <io.h>	/* for lseek(), must be before vim.h */
 #endif
@@ -41,21 +107,20 @@
 # include <fcntl.h>
 #endif
 
-#define MAXWLEN 100		/* assume max. word len is this many bytes */
+#define MAXWLEN 250		/* assume max. word len is this many bytes */
 
-/*
- * Structure that is used to store the structures and strings from the
- * language file.  This avoids the need to allocate space for each individual
- * word.  It's allocated in big chunks for speed.  It's freed all at once when
- * 'encoding' changes.
- */
-#define  SBLOCKSIZE 4096	/* default size of sb_data */
-typedef struct sblock_S sblock_T;
-struct sblock_S
-{
-    sblock_T	*sb_next;	/* next block in list */
-    char_u	sb_data[1];	/* data, actually longer */
-};
+/* Flags used for a word. */
+#define WF_REGION   0x01	/* region byte follows */
+#define WF_ONECAP   0x02	/* word with one capital (or all capitals) */
+#define WF_ALLCAP   0x04	/* word must be all capitals */
+#define WF_RARE	    0x08	/* rare word */
+
+#define WF_KEEPCAP  0x100	/* keep-case word */
+
+#define BY_NOFLAGS  0		/* end of word without flags or region */
+#define BY_FLAGS    1		/* end of word, flag byte follows */
+#define BY_INDEX    2		/* child is shared, index follows */
+#define BY_SPECIAL  BY_INDEX	/* hightest special byte value */
 
 /* Info from "REP" entries in ".aff" file used in af_rep.
  * TODO: This is not used yet.  Either use it or remove it. */
@@ -66,58 +131,33 @@ typedef struct repentry_S
 } repentry_T;
 
 /*
- * Structure to store affix info.
- */
-typedef struct affitem_S affitem_T;
-struct affitem_S
-{
-    affitem_T	*ai_next;	/* next affix with same ai_add[] or NULL */
-    short_u	ai_nr;		/* affix number */
-    char_u	ai_flags;	/* AFF_ flags */
-    char_u	ai_choplen;	/* length of chop string in bytes */
-    char_u	ai_addlen;	/* length of ai_add in bytes */
-    char_u	ai_leadlen;	/* for AFF_PREWORD: length of lead string */
-    char_u	ai_taillen;	/* for AFF_PREWORD: length of tail string */
-    char_u	ai_add[1];	/* Text added to basic word. This stores:
-				 * 0: word for AFF_PREWORD or whole addition
-				 * ai_addlen + 1: chop string
-				 * + ai_choplen + 1: lead string for AFF_PREWORD
-				 * + ai_leadlen + 1: trail string f. AFF_PREWORD
-				 */
-};
-
-/* Get affitem_T pointer from hashitem that uses ai_add */
-static affitem_T dumai;
-#define HI2AI(hi)	((affitem_T *)((hi)->hi_key - (dumai.ai_add - (char_u *)&dumai)))
-
-/* ai_flags: Affix item flags */
-#define AFF_COMBINE	0x01	/* prefix combines with suffix */
-#define AFF_PREWORD	0x02	/* prefix includes word */
-
-/*
  * Structure used to store words and other info for one language, loaded from
  * a .spl file.
- * The main access is through hashtable "sl_word", using the case-folded
- * word as the key.  This finds a linked list of fword_T.
+ * The main access is through the tree in "sl_fbyts/sl_fidxs", storing the
+ * case-folded words.  "sl_kbyts/sl_kidxs" is for keep-case words.
+ *
+ * The "byts" array stores the possible bytes in each tree node, preceded by
+ * the number of possible bytes, sorted on byte value:
+ *	<len> <byte1> <byte2> ...
+ * The "idxs" array stores the index of the child node corresponding to the
+ * byte in "byts".
+ * Exception: when the byte is zero, the word may end here and "idxs" holds
+ * the flags and region for the word.  There may be several zeros in sequence
+ * for alternative flag/region combinations.
  */
 typedef struct slang_S slang_T;
 struct slang_S
 {
     slang_T	*sl_next;	/* next language */
     char_u	*sl_name;	/* language name "en", "en.rare", "nl", etc. */
-    hashtab_T	sl_words;	/* main word table, fword_T */
-    int		sl_prefcnt;	/* number of prefix NRs */
-    garray_T	sl_preftab;	/* list of hashtables to lookup prefixes */
-    affitem_T	*sl_prefzero;	/* list of prefixes with zero add length */
-    hashtab_T	sl_prewords;	/* prefixes that include a word */
-    int		sl_suffcnt;	/* number of suffix NRs */
-    garray_T	sl_sufftab;	/* list of hashtables to lookup suffixes */
-    affitem_T	*sl_suffzero;	/* list of suffixes with zero add length */
+    char_u	*sl_fbyts;	/* case-folded word bytes */
+    int		*sl_fidxs;	/* case-folded word indexes */
+    char_u	*sl_kbyts;	/* keep-case word bytes */
+    int		*sl_kidxs;	/* keep-case word indexes */
     char_u	*sl_try;	/* "TRY" from .aff file  TODO: not used */
     garray_T	sl_rep;		/* list of repentry_T entries from REP lines
 				 * TODO not used */
     char_u	sl_regions[17];	/* table with up to 8 region names plus NUL */
-    sblock_T	*sl_block;	/* list with allocated memory blocks */
     int		sl_error;	/* error while loading */
 };
 
@@ -125,57 +165,6 @@ struct slang_S
  * languages. */
 static slang_T *first_lang = NULL;
 
-/*
- * Structure to store an addition to a basic word.
- * There are many of these, keep it small!
- */
-typedef struct addword_S addword_T;
-struct addword_S
-{
-    addword_T	*aw_next;	/* next addition */
-    char_u	aw_flags;	/* ADD_ flags */
-    char_u	aw_region;	/* region for word with this addition */
-    char_u	aw_leadlen;	/* byte length of lead in aw_word */
-    char_u	aw_wordlen;	/* byte length of first word in aw_word */
-    char_u	aw_saveb;	/* saved byte where aw_word[] is truncated at
-				   end of hashtable key; NUL when not using
-				   hashtable */
-    char_u	aw_word[1];	/* text, actually longer: case-folded addition
-				   plus, with ADD_KEEPCAP: keep-case addition */
-};
-
-/* Get addword_T pointer from hashitem that uses aw_word */
-static addword_T dumaw;
-#define HI2ADDWORD(hi)	((addword_T *)((hi)->hi_key - (dumaw.aw_word - (char_u *)&dumaw)))
-
-/*
- * Structure to store a basic word.
- * There are many of these, keep it small!
- * The list of prefix and suffix NRs is stored after "fw_word" to avoid the
- * need for two extra pointers.
- */
-typedef struct fword_S fword_T;
-struct fword_S
-{
-    fword_T	*fw_next;	/* same basic word with different caps and/or
-				 * affixes */
-    addword_T	*fw_adds;	/* first addword_T entry */
-    short_u	fw_flags;	/* BWF_ flags */
-    char_u	fw_region;	/* region bits */
-    char_u	fw_prefixcnt;	/* number of prefix NRs */
-    char_u	fw_suffixcnt;	/* number of suffix NRs */
-    char_u	fw_word[1];	/* actually longer:
-				 * 0:  case folded word or keep-case word when
-				 *     (flags & BWF_KEEPCAP)
-				 * + word length + 1: list of prefix NRs
-				 * + fw_prefixcnt [* 2]: list of suffix NRs
-				 */
-};
-
-/* Get fword_T pointer from hashitem that uses fw_word */
-static fword_T dumfw;
-#define HI2FWORD(hi)	((fword_T *)((hi)->hi_key - (dumfw.fw_word - (char_u *)&dumfw)))
-
 #define REGION_ALL 0xff
 
 
@@ -195,39 +184,7 @@ typedef struct langp_S
 #define SP_RARE		2
 #define SP_LOCAL	3
 
-/* flags used for basic words in the spell file */
-#define BWF_VALID	0x01	    /* word is valid without additions */
-#define BWF_REGION	0x02	    /* region byte follows */
-#define BWF_ONECAP	0x04	    /* first letter must be capital */
-#define BWF_SUFFIX	0x08	    /* has suffix NR list */
-#define BWF_SECOND	0x10	    /* second flags byte follows */
-
-#define BWF_ADDS	0x0100	    /* there are additions */
-#define BWF_PREFIX	0x0200	    /* has prefix NR list */
-#define BWF_ALLCAP	0x0400	    /* all letters must be capital (not used
-				       for single-letter words) */
-#define BWF_KEEPCAP	0x0800	    /* Keep case as-is */
-#define BWF_ADDS_M	0x1000	    /* there are more than 255 additions */
-
-#define BWF_ADDHASH	0x8000	    /* Internal: use hashtab for additions */
-
-#define NOWC_KEY (char_u *)"x"	    /* hashtab key used for additions without
-				       any word character */
-
-/* flags used for addition in the spell file */
-#define ADD_REGION	0x02	    /* region byte follows */
-#define ADD_ONECAP	0x04	    /* first letter must be capital */
-#define ADD_LEADLEN	0x10	    /* there is a leadlen byte */
-#define ADD_COPYLEN	0x20	    /* there is a copylen byte */
-#define ADD_ALLCAP	0x40	    /* all letters must be capital (not used
-				       for single-letter words) */
-#define ADD_KEEPCAP	0x80	    /* fixed case */
-
-/* Translate ADD_ flags to BWF_ flags.
- * (Needed to keep ADD_ flags in one byte.) */
-#define ADD2BWF(x)	(((x) & 0x0f) | (((x) & 0xf0) << 4))
-
-#define VIMSPELLMAGIC "VIMspell04"  /* string at start of Vim spell file */
+#define VIMSPELLMAGIC "VIMspell05"  /* string at start of Vim spell file */
 #define VIMSPELLMAGICL 10
 
 /*
@@ -239,49 +196,33 @@ typedef struct matchinf_S
     slang_T	*mi_slang;		/* info for the language */
 
     /* pointers to original text to be checked */
-    char_u	*mi_line;		/* start of line containing word */
     char_u	*mi_word;		/* start of word being checked */
-    char_u	*mi_end;		/* first non-word char after mi_word */
-    char_u	*mi_wend;		/* end of matching word (is mi_end
-					 * or further) */
+    char_u	*mi_end;		/* end of matching word */
     char_u	*mi_fend;		/* next char to be added to mi_fword */
+    char_u	*mi_cend;		/* char after what was used for
+					   mi_capflags */
 
     /* case-folded text */
     char_u	mi_fword[MAXWLEN + 1];	/* mi_word case-folded */
-    int		mi_fendlen;		/* byte length of first word in
-					   mi_fword */
-    int		mi_faddlen;		/* byte length of text in mi_fword
-					   after first word */
-    char_u	*mi_cword;		/* word to check, points in mi_fword */
-    char_u	*mi_awend;		/* after next word, to check for
-					   addition (NULL when not done yet) */
-    int		mi_did_awend;		/* did compute mi_awend */
+    int		mi_fwordlen;		/* nr of valid bytes in mi_fword */
 
     /* others */
     int		mi_result;		/* result so far: SP_BAD, SP_OK, etc. */
-    int		mi_capflags;		/* BWF_ONECAP BWF_ALLCAP BWF_KEEPCAP */
+    int		mi_capflags;		/* WF_ONECAP WF_ALLCAP WF_KEEPCAP */
 } matchinf_T;
 
-static int word_match __ARGS((matchinf_T *mip));
-static int check_adds __ARGS((matchinf_T *mip, fword_T *fw, int req_pref, int req_suf));
-static void fill_awend __ARGS((matchinf_T *mip));
-static void fold_addchars __ARGS((matchinf_T *mip, int addlen));
-static int supports_affix __ARGS((int cnt, char_u *afflist, int afflistlen, int nr));
-static int prefix_match __ARGS((matchinf_T *mip));
-static int noprefix_match __ARGS((matchinf_T *mip, char_u *pword, char_u *cstart, affitem_T *ai));
-static int suffix_match __ARGS((matchinf_T *mip));
-static int match_caps __ARGS((int flags, char_u *caseword, matchinf_T *mip, char_u *cword, char_u *end));
 static slang_T *slang_alloc __ARGS((char_u *lang));
 static void slang_free __ARGS((slang_T *lp));
+static void find_word __ARGS((matchinf_T *mip, int keepcap));
 static slang_T *spell_load_lang __ARGS((char_u *lang));
 static void spell_load_file __ARGS((char_u *fname, void *cookie));
-static void *getroom __ARGS((slang_T *lp, int *bl_used, int len));
+static int read_tree __ARGS((FILE *fd, char_u *byts, int *idxs, int maxidx, int startidx));
 static int find_region __ARGS((char_u *rp, char_u *region));
 static int captype __ARGS((char_u *word, char_u *end));
 
 /*
  * Main spell-checking function.
- * "ptr" points to the start of a word.
+ * "ptr" points to a character that could be the start of a word.
  * "*attrp" is set to the attributes for a badly spelled word.  For a non-word
  * or when it's OK it remains unchanged.
  * This must only be called when 'spelllang' is not empty.
@@ -289,885 +230,328 @@ static int captype __ARGS((char_u *word, char_u *end));
  * caller can skip over the word.
  */
     int
-spell_check(wp, line, ptr, attrp)
+spell_check(wp, ptr, attrp)
     win_T	*wp;		/* current window */
-    char_u	*line;		/* start of line where "ptr" points into */
     char_u	*ptr;
     int		*attrp;
 {
     matchinf_T	mi;		/* Most things are put in "mi" so that it can
 				   be passed to functions quickly. */
 
-    /* Find the end of the word.  We already know that *ptr is a word char. */
+    /* Find the end of the word. */
     mi.mi_word = ptr;
     mi.mi_end = ptr;
-    do
-    {
-	mb_ptr_adv(mi.mi_end);
-    } while (*mi.mi_end != NUL && spell_iswordc(mi.mi_end));
 
-    /* A word starting with a number is always OK. */
+    /* A word starting with a number is always OK.  Also skip hexadecimal
+     * numbers 0xFF99 and 0X99FF. */
     if (*ptr >= '0' && *ptr <= '9')
-	return (int)(mi.mi_end - ptr);
-
-    /* Make case-folded copy of the word. */
-    (void)spell_casefold(ptr, mi.mi_end - ptr, mi.mi_fword, MAXWLEN + 1);
-    mi.mi_cword = mi.mi_fword;
-    mi.mi_fendlen = STRLEN(mi.mi_fword);
-    mi.mi_faddlen = 0;
-    mi.mi_fend = mi.mi_end;
-
-    /* Check the caps type of the word. */
-    mi.mi_capflags = captype(ptr, mi.mi_end);
-
-    /* The word is bad unless we recognize it. */
-    mi.mi_result = SP_BAD;
-    mi.mi_wend = mi.mi_end;
-
-    mi.mi_awend = NULL;
-    mi.mi_did_awend = FALSE;
-    mi.mi_line = line;
-
-    /*
-     * Loop over the languages specified in 'spelllang'.
-     * We check them all, because a matching word may have additions that are
-     * longer than an already found matching word.
-     */
-    for (mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0);
-				       mi.mi_lp->lp_slang != NULL; ++mi.mi_lp)
-    {
-	/*
-	 * Check for a matching word.
-	 * If not found or wrong region try removing prefixes (and then
-	 * suffixes).
-	 * If still not found or wrong region try removing suffixes.
-	 */
-	mi.mi_slang = mi.mi_lp->lp_slang;
-	if (!word_match(&mi) || mi.mi_result != SP_OK)
-	    if (!prefix_match(&mi) || mi.mi_result != SP_OK)
-		suffix_match(&mi);
-    }
-
-    if (mi.mi_result != SP_OK)
     {
-	if (mi.mi_result == SP_BAD)
-	    *attrp = highlight_attr[HLF_SPB];
-	else if (mi.mi_result == SP_RARE)
-	    *attrp = highlight_attr[HLF_SPR];
+	if (*ptr == '0' && (ptr[1] == 'x' || ptr[2] == 'X'))
+	    mi.mi_end = skiphex(ptr);
 	else
-	    *attrp = highlight_attr[HLF_SPL];
+	    mi.mi_end = skipdigits(ptr);
     }
-
-    return (int)(mi.mi_wend - ptr);
-}
-
-/*
- * Check if the word "mip->mi_word" matches.
- * "mip->mi_fword" is the same word case-folded;
- *
- * This checks the word as a whole and for prefixes that include a word.
- *
- * Note that when called mi_fword only contains the word up to mip->mi_end,
- * but when checking additions it gets longer.
- */
-    static int
-word_match(mip)
-    matchinf_T *mip;
-{
-    hash_T	fhash = hash_hash(mip->mi_fword);
-    hashitem_T	*hi;
-    fword_T	*fw;
-    int		valid = FALSE;
-    char_u	*p;
-    char_u	pword[MAXWLEN + 1];
-    int		charlen;
-    int		capflags_save;
-    affitem_T	*ai;
-    char_u	*cstart;
-    int		addlen;
-    int		n;
-    char_u	*save_end;
-    int		cc;
-
-    hi = hash_lookup(&mip->mi_slang->sl_words, mip->mi_fword, fhash);
-    if (!HASHITEM_EMPTY(hi))
-    {
-	/*
-	 * Find a basic word for which the case of "mi_word" is correct.
-	 * If it is, check additions and use the longest one.
-	 */
-	for (fw = HI2FWORD(hi); fw != NULL; fw = fw->fw_next)
-	    if (match_caps(fw->fw_flags, fw->fw_word, mip,
-						   mip->mi_word, mip->mi_end))
-		valid |= check_adds(mip, fw, -1, -1);
-    }
-
-    /*
-     * Try finding a matching preword for "mip->mi_word".  These are
-     * prefixes that have a non-word character after a word character:
-     * "d'", "de-", "'s-", "l'de-".  But not "'s".
-     * Also need to do this when a matching word was already found, because we
-     * might find a longer match this way (French: "qu" and "qu'a-t-elle").
-     * The check above may have added characters to mi_fword, thus we need to
-     * truncate it after the basic word for the hash lookup.
-     */
-    cc = mip->mi_fword[mip->mi_fendlen];
-    mip->mi_fword[mip->mi_fendlen] = NUL;
-    hi = hash_lookup(&mip->mi_slang->sl_prewords, mip->mi_fword, fhash);
-    mip->mi_fword[mip->mi_fendlen] = cc;
-    if (!HASHITEM_EMPTY(hi))
+    else
     {
-	capflags_save = mip->mi_capflags;
-
-	/* Go through the list of matching prewords. */
-	for (ai = HI2AI(hi); ai != NULL; ai = ai->ai_next)
+	mi.mi_fend = ptr;
+	if (spell_iswordc(mi.mi_fend))
 	{
-	    /* Check that the lead string matches before the word. */
-	    p = ai->ai_add + ai->ai_addlen + ai->ai_choplen + 2;
-	    if (ai->ai_leadlen > 0)
+	    /* Make case-folded copy of the characters until the next non-word
+	     * character. */
+	    do
 	    {
-		if (mip->mi_word - mip->mi_line < ai->ai_leadlen
-			|| STRNCMP(mip->mi_word - ai->ai_leadlen, p,
-						     ai->ai_leadlen) != 0)
-		    continue;
-		p += ai->ai_leadlen + 1;	/* advance "p" to tail */
-	    }
-	    else
-		++p;			/* advance "p" to tail */
+		mb_ptr_adv(mi.mi_fend);
+	    } while (*mi.mi_fend != NUL && spell_iswordc(mi.mi_fend));
 
-	    /* Check that the tail string matches after the word.  Need
-	     * to fold case first.  */
-	    if (ai->ai_taillen > 0)
-	    {
-		if (ai->ai_taillen >= mip->mi_faddlen)
-		{
-		    fold_addchars(mip, ai->ai_taillen);
-		    if (ai->ai_taillen > mip->mi_faddlen)
-			continue;	/* not enough chars, can't match */
-		}
-		if (STRNCMP(mip->mi_fword + mip->mi_fendlen,
-						  p, ai->ai_taillen) != 0)
-		    continue;
-	    }
+	    (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword,
+								 MAXWLEN + 1);
+	    mi.mi_fwordlen = STRLEN(mi.mi_fword);
 
-	    /*
-	     * This preword matches.  Remove the preword and check that
-	     * the resulting word exits.
-	     */
-
-	    /* Find the place in the original word where the tail ends,
-	     * needed for case checks. */
-#ifdef FEAT_MBYTE
-	    charlen = mb_charlen(p);
-#else
-	    charlen = ai->ai_taillen;
-#endif
-	    cstart = mip->mi_end;
-	    for (n = 0; n < charlen; ++n)
-		mb_ptr_adv(cstart);
-
-	    /* The new word starts with the chop. Then add up to the next
-	     * non-word char. */
-	    mch_memmove(pword, ai->ai_add + ai->ai_addlen + 1,
-							  ai->ai_choplen);
-	    p = mip->mi_fword + mip->mi_fendlen + ai->ai_taillen;
-	    addlen = ai->ai_taillen;
-	    while (spell_iswordc(p))
-	    {
-		++charlen;
-#ifdef FEAT_MBYTE
-		addlen += (*mb_ptr2len_check)(p);
-#else
-		++addlen;
-#endif
-		mb_ptr_adv(p);
-		if (addlen >= mip->mi_faddlen)
-		{
-		    /* Get more folded characters in mip->mi_fword. */
-		    fold_addchars(mip, addlen);
-		    if (addlen >= mip->mi_faddlen)
-			break;	/* not enough chars, can't match */
-		}
-	    }
-	    mch_memmove(pword + ai->ai_choplen,
-		    mip->mi_fword + mip->mi_fendlen + ai->ai_taillen,
-						 addlen - ai->ai_taillen);
-	    pword[ai->ai_choplen + addlen - ai->ai_taillen] = NUL;
-
-	    /* Need to set mi_end to find additions.  Also set mi_fendlen
-	     * and mi_faddlen. */
-	    save_end = mip->mi_end;
-	    while (--charlen >= 0)
-		mb_ptr_adv(mip->mi_end);
-	    mip->mi_fendlen += addlen;
-	    mip->mi_faddlen -= addlen;
-
-	    /* Find the word "pword", caseword "cstart". */
-	    n = noprefix_match(mip, pword, cstart, ai);
-	    mip->mi_end = save_end;
-	    mip->mi_fendlen -= addlen;
-	    mip->mi_faddlen += addlen;
-	    if (n)
-		valid = TRUE;
+	    /* Check the caps type of the word. */
+	    mi.mi_capflags = captype(ptr, mi.mi_fend);
 
-	    /* If we found a valid word, we still need to try other
-	     * suffixes, because it may have an addition that's longer. */
+	    /* We always use the characters up to the next non-word character,
+	     * also for bad words. */
+	    mi.mi_end = mi.mi_fend;
 	}
-
-	mip->mi_capflags = capflags_save;
-    }
-
-    return valid;
-}
-
-/*
- * Check a matching basic word for additions.
- * Return TRUE if we have a valid match.
- */
-    static int
-check_adds(mip, fw, req_pref, req_suf)
-    matchinf_T	*mip;
-    fword_T	*fw;
-    int		req_pref;	/* required prefix nr, -1 if none */
-    int		req_suf;	/* required suffix nr, -1 if none */
-{
-    int		valid = FALSE;
-    addword_T	*aw;
-    addword_T	*naw = NULL;
-    char_u	*p;
-    int		addlen;
-    int		cc;
-    hashitem_T	*hi;
-    char_u	*cp = NULL;
-    int		n;
-
-    /* Check if required prefixes and suffixes are supported.  These are on
-     * the basic word, not on each addition. */
-    if (req_pref >= 0 || req_suf >= 0)
-    {
-	/* Prefix NRs are stored just after the word in fw_word. */
-	cp = fw->fw_word + STRLEN(fw->fw_word) + 1;
-	if (req_pref >= 0 && !supports_affix(mip->mi_slang->sl_prefcnt,
-					      cp, fw->fw_prefixcnt, req_pref))
-	    return FALSE;
-	if (req_suf >= 0)
+	else
 	{
-	    /* Suffix NRs are stored just after the Prefix NRs. */
-	    if (fw->fw_prefixcnt > 0)
-	    {
-		if (mip->mi_slang->sl_prefcnt > 256)
-		    cp += fw->fw_prefixcnt * 2;
-		else
-		    cp += fw->fw_prefixcnt;
-	    }
-	    if (!supports_affix(mip->mi_slang->sl_suffcnt,
-					       cp, fw->fw_suffixcnt, req_suf))
-		return FALSE;
+	    /* No word characters.  Don't case-fold anything, we may quickly
+	     * find out this is not a word (but it could be!). */
+	    mi.mi_fwordlen = 0;
+	    mi.mi_capflags = 0;
 	}
-    }
 
-    /* A word may be valid without an addition. */
-    if (fw->fw_flags & BWF_VALID)
-    {
-	valid = TRUE;
-	if (mip->mi_result != SP_OK)
-	{
-	    if ((fw->fw_region & mip->mi_lp->lp_region) == 0)
-		mip->mi_result = SP_LOCAL;
-	    else
-		mip->mi_result = SP_OK;
-	}
-	/* Set word end, required when matching a word after a preword. */
-	if (mip->mi_wend < mip->mi_end)
-	    mip->mi_wend = mip->mi_end;
-    }
+	mi.mi_cend = mi.mi_fend;
 
-    /*
-     * Check additions, both before and after the word.
-     * This may make the word longer, thus we also need to check
-     * when we already found a matching word.
-     * When the BWF_ADDHASH flag is present then fw_adds points to a hashtable
-     * for quick lookup.  Otherwise it points to the list of all possible
-     * additions.
-     */
-    if (fw->fw_flags & BWF_ADDHASH)
-    {
-	/* Locate the text up to the next end-of-word. */
-	if (!mip->mi_did_awend)
-	    fill_awend(mip);
-	if (mip->mi_awend == NULL)
-	    return valid;	    /* there is no next word */
-
-	cc = *mip->mi_awend;
-	*mip->mi_awend = NUL;
-	hi = hash_find((hashtab_T *)fw->fw_adds,
-					     mip->mi_fword + mip->mi_fendlen);
-	*mip->mi_awend = cc;
-	if (HASHITEM_EMPTY(hi))
-	    return valid;		/* no matching addition */
-	aw = HI2ADDWORD(hi);
-
-	/* Also check additions without word characters.  If they are there,
-	 * skip the first dummy entry. */
-	hi = hash_find((hashtab_T *)fw->fw_adds, NOWC_KEY);
-	if (!HASHITEM_EMPTY(hi))
-	    naw = HI2ADDWORD(hi)->aw_next;
-    }
-    else
-	aw = fw->fw_adds;
+	/* The word is bad unless we recognize it. */
+	mi.mi_result = SP_BAD;
 
-    for ( ; ; aw = aw->aw_next)
-    {
-	if (aw == NULL)
+	/*
+	 * Loop over the languages specified in 'spelllang'.
+	 * We check them all, because a matching word may be longer than an
+	 * already found matching word.
+	 */
+	for (mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0);
+				       mi.mi_lp->lp_slang != NULL; ++mi.mi_lp)
 	{
-	    /* At end of list: may also try additions without word chars. */
-	    if (naw == NULL)
-		break;
-	    aw = naw;
-	    naw = NULL;
-	}
+	    /* Check for a matching word in case-folded words. */
+	    find_word(&mi, FALSE);
 
-	if (aw->aw_leadlen > 0)
-	{
-	    /* There is a leader, verify that it matches. */
-	    if (aw->aw_leadlen > mip->mi_word - mip->mi_line
-		    || STRNCMP(mip->mi_word - aw->aw_leadlen,
-					    aw->aw_word, aw->aw_leadlen) != 0)
-		continue;
-	    if (mip->mi_word - aw->aw_leadlen > mip->mi_line)
-	    {
-		/* There must not be a word character just before the
-		 * leader. */
-		p = mip->mi_word - aw->aw_leadlen;
-		mb_ptr_back(mip->mi_line, p);
-		if (spell_iswordc(p))
-		    continue;
-	    }
-	    /* Leader matches.  Addition is rest of "aw_word". */
-	    p = aw->aw_word + aw->aw_leadlen;
+	    /* Try keep-case words
author	Bram Moolenaar <Bram@vim.org>	2005-06-04 21:55:20 +0000
committer	Bram Moolenaar <Bram@vim.org>	2005-06-04 21:55:20 +0000
commit	51485f06246966898f7c00e2e53b1ba4c6855cf7 (patch)
tree	c1cfe02ab088ea7a4423f15829e4083303d62a89
parent	4debb442bd885d182d7f77d1dfcdf143fd7cbf88 (diff)