10 files changed, 5335 insertions, 389 deletions
diff --git a/runtime/compiler/splint.vim b/runtime/compiler/splint.vim
index 6f40af9b62..5ba5e036dd 100644
--- a/runtime/compiler/splint.vim
+++ b/runtime/compiler/splint.vim
@@ -2,7 +2,7 @@
 " Compiler:     splint/lclint (C source code checker)
 " Maintainer:   Ralf Wildenhues <Ralf.Wildenhues@gmx.de>
 " Splint Home:	http://www.splint.org/
-" Last Change:  2004 Oct 02
+" Last Change:  2005 Apr 21
 " $Revision$
 
 if exists("current_compiler")
@@ -53,6 +53,7 @@ CompilerSet makeprg=splint\ %
 
 CompilerSet errorformat=%OLCLint*m,
 	\%OSplint*m,
+	\%f(%l\\,%c):\ %m,
 	\%*[\ ]%f:%l:%c:\ %m,
 	\%*[\ ]%f:%l:\ %m,
 	\%*[^\"]\"%f\"%*\\D%l:\ %m,
diff --git a/runtime/doc/usr_03.txt b/runtime/doc/usr_03.txt
index 1690e79aaa..d8e4c506a9 100644
--- a/runtime/doc/usr_03.txt
+++ b/runtime/doc/usr_03.txt
@@ -1,4 +1,4 @@
-*usr_03.txt*	For Vim version 7.0aa.  Last change: 2005 Apr 01
+*usr_03.txt*	For Vim version 7.0aa.  Last change: 2005 Apr 22
 
 		     VIM USER MANUAL - by Bram Moolenaar
 
@@ -601,7 +601,7 @@ The ":jumps" command gives a list of positions you jumped to.  The entry which
 you used last is marked with a ">".
 
 
-NAMED MARKS
+NAMED MARKS							*bookmark*
 
 Vim enables you to place your own marks in the text.  The command "ma" marks
 the place under the cursor as mark a.  You can place 26 marks (a through z) in
diff --git a/runtime/spell/en.utf-8.spl b/runtime/spell/en.utf-8.spl
index 96e7288082..156c0979e9 100644
--- a/runtime/spell/en.utf-8.spl
+++ b/runtime/spell/en.utf-8.spl
diff --git a/src/buffer.c b/src/buffer.c
index 993e14d34b..9772336448 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -2390,6 +2390,11 @@ get_winopts(buf)
     if (p_fdls >= 0)
 	curwin->w_p_fdl = p_fdls;
 #endif
+
+#ifdef FEAT_SYN_HL
+    if (curwin->w_p_spell && *buf->b_p_spl != NUL)
+	did_set_spelllang(buf);
+#endif
 }
 
 /*
diff --git a/src/proto/charset.pro b/src/proto/charset.pro
index a3d4c145d0..dc33f654b3 100644
--- a/src/proto/charset.pro
+++ b/src/proto/charset.pro
@@ -21,7 +21,12 @@ int vim_iswordc __ARGS((int c));
 int vim_iswordp __ARGS((char_u *p));
 int vim_iswordc_buf __ARGS((char_u *p, buf_T *buf));
 void init_spell_chartab __ARGS((void));
+int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp));
+int set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp));
+void write_spell_chartab __ARGS((FILE *fd));
 int spell_iswordc __ARGS((char_u *p));
+int spell_isupper __ARGS((int c));
+int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen));
 int vim_isfilec __ARGS((int c));
 int vim_isprintc __ARGS((int c));
 int vim_isprintc_strict __ARGS((int c));
diff --git a/src/spell.c b/src/spell.c
index cc71b8d7da..e8ac35217c 100644
--- a/src/spell.c
+++ b/src/spell.c
@@ -97,6 +97,8 @@ static affitem_T dumai;
 /*
  * Structure used to store words and other info for one language, loaded from
  * a .spl file.
+ * The main access is through hashtable "sl_word", using the case-folded
+ * word as the key.  This finds a linked list of fword_T.
  */
 typedef struct slang_S slang_T;
 struct slang_S
@@ -149,6 +151,8 @@ static addword_T dumaw;
 /*
  * Structure to store a basic word.
  * There are many of these, keep it small!
+ * The list of prefix and suffix NRs is stored after "fw_word" to avoid the
+ * need for two extra pointers.
  */
 typedef struct fword_S fword_T;
 struct fword_S
@@ -223,7 +227,7 @@ typedef struct langp_S
  * (Needed to keep ADD_ flags in one byte.) */
 #define ADD2BWF(x)	(((x) & 0x0f) | (((x) & 0xf0) << 4))
 
-#define VIMSPELLMAGIC "VIMspell03"  /* string at start of Vim spell file */
+#define VIMSPELLMAGIC "VIMspell04"  /* string at start of Vim spell file */
 #define VIMSPELLMAGICL 10
 
 /*
@@ -307,7 +311,7 @@ spell_check(wp, line, ptr, attrp)
 	return (int)(mi.mi_end - ptr);
 
     /* Make case-folded copy of the word. */
-    (void)str_foldcase(ptr, mi.mi_end - ptr, mi.mi_fword, MAXWLEN + 1);
+    (void)spell_casefold(ptr, mi.mi_end - ptr, mi.mi_fword, MAXWLEN + 1);
     mi.mi_cword = mi.mi_fword;
     mi.mi_fendlen = STRLEN(mi.mi_fword);
     mi.mi_faddlen = 0;
@@ -404,6 +408,8 @@ word_match(mip)
      * "d'", "de-", "'s-", "l'de-".  But not "'s".
      * Also need to do this when a matching word was already found, because we
      * might find a longer match this way (French: "qu" and "qu'a-t-elle").
+     * The check above may have added characters to mi_fword, thus we need to
+     * truncate it after the basic word for the hash lookup.
      */
     cc = mip->mi_fword[mip->mi_fendlen];
     mip->mi_fword[mip->mi_fendlen] = NUL;
@@ -772,7 +778,7 @@ fold_addchars(mip, addlen)
 	else
 #endif
 	    l = 1;
-	(void)str_foldcase(mip->mi_fend, l, p + mip->mi_faddlen,
+	(void)spell_casefold(mip->mi_fend, l, p + mip->mi_faddlen,
 				 MAXWLEN - mip->mi_fendlen - mip->mi_faddlen);
 	mip->mi_fend += l;
 	mip->mi_faddlen += STRLEN(p + mip->mi_faddlen);
@@ -992,6 +998,8 @@ suffix_match(mip)
      * Stop checking if there are no suffixes with so many characters.
      */
     sufp = endw;
+    *endw = NUL;	/* truncate after possible suffix */
+
     for (charlen = 0; charlen <= mip->mi_slang->sl_sufftab.ga_len; ++charlen)
     {
 	/* Move the pointer to the possible suffix back one character, unless
@@ -1012,13 +1020,11 @@ suffix_match(mip)
 	    if (ht->ht_used == 0)
 		continue;
 
-	    *endw = NUL;	/* truncate after possible suffix */
 	    hi = hash_find(ht, sufp);
 	    if (HASHITEM_EMPTY(hi))
 		ai = NULL;
 	    else
 		ai = HI2AI(hi);
-	    *endw = endw_c;
 	}
 
 	if (ai != NULL)
@@ -1027,6 +1033,7 @@ suffix_match(mip)
 	     * we can use. */
 	    tlen = sufp - mip->mi_cword;    /* length of word without suffix */
 	    mch_memmove(pword, mip->mi_cword, tlen);
+	    *endw = endw_c;
 
 	    for ( ; ai != NULL; ai = ai->ai_next)
 	    {
@@ -1068,9 +1075,12 @@ suffix_match(mip)
 		    }
 		}
 	    }
+
+	    *endw = NUL;	/* truncate after possible suffix */
 	}
     }
 
+    *endw = endw_c;
     mip->mi_capflags = capflags_save;
     return FALSE;
 }
@@ -1115,7 +1125,7 @@ match_caps(flags, caseword, mip, cword, end)
 		else
 #endif
 		    c = *p++;
-		if (MB_ISUPPER(c))
+		if (spell_isupper(c))
 		{
 		    if (capflags == 0 || (capflags & BWF_ONECAP))
 		    {
@@ -1460,7 +1470,7 @@ spell_load_file(fname, cookie)
     int		round;
     char_u	*save_sourcing_name = sourcing_name;
     linenr_T	save_sourcing_lnum = sourcing_lnum;
-    int		cnt;
+    int		cnt, ccnt;
     int		choplen;
     int		addlen;
     int		leadlen;
@@ -1474,39 +1484,41 @@ spell_load_file(fname, cookie)
     addword_T	*aw, *naw;
     int		flen;
     int		xlen;
+    char_u	*fol;
 
     fd = fopen((char *)fname, "r");
     if (fd == NULL)
     {
 	EMSG2(_(e_notopen), fname);
-	goto errorend;
+	goto endFAIL;
     }
 
     /* Set sourcing_name, so that error messages mention the file name. */
     sourcing_name = fname;
     sourcing_lnum = 0;
 
-    /* <HEADER>: <fileID> <regioncnt> <regionname> ... */
+    /* <HEADER>: <fileID> <regioncnt> <regionname> ...
+     *		 <charflagslen> <charflags>  <fcharslen> <fchars> */
     for (i = 0; i < VIMSPELLMAGICL; ++i)
 	buf[i] = getc(fd);				/* <fileID> */
     if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
     {
 	EMSG(_("E757: Wrong file ID in spell file"));
-	goto errorend;
+	goto endFAIL;
     }
 
     cnt = getc(fd);					/* <regioncnt> */
-    if (cnt == EOF)
+    if (cnt < 0)
     {
 truncerr:
 	EMSG(_("E758: Truncated spell file"));
-	goto errorend;
+	goto endFAIL;
     }
     if (cnt > 8)
     {
 formerr:
 	EMSG(_("E759: Format error in spell file"));
-	goto errorend;
+	goto endFAIL;
     }
     for (i = 0; i < cnt; ++i)
     {
@@ -1515,8 +1527,39 @@ formerr:
     }
     lp->sl_regions[cnt * 2] = NUL;
 
-    /* round 1: <PREFIXLIST>: <affcount> <afftotcnt> <affix> ...
-     * round 2: <SUFFIXLIST>: <affcount> <afftotcnt> <affix> ...  */
+    cnt = getc(fd);					/* <charflagslen> */
+    if (cnt > 0)
+    {
+	p = (char_u *)getroom(lp, &bl_used, cnt);
+	if (p == NULL)
+	    goto endFAIL;
+	for (i = 0; i < cnt; ++i)
+	    p[i] = getc(fd);				/* <charflags> */
+
+	ccnt = (getc(fd) << 8) + getc(fd);		/* <fcharslen> */
+	if (ccnt <= 0)
+	    goto formerr;
+	fol = (char_u *)getroom(lp, &bl_used, ccnt + 1);
+	if (fol == NULL)
+	    goto endFAIL;
+	for (i = 0; i < ccnt; ++i)
+	    fol[i] = getc(fd);				/* <fchars> */
+	fol[i] = NUL;
+
+	/* Set the word-char flags and fill spell_isupper() table. */
+	if (set_spell_charflags(p, cnt, fol) == FAIL)
+	    goto formerr;
+    }
+    else
+    {
+	/* When <charflagslen> is zero then <fcharlen> must also be zero. */
+	cnt = (getc(fd) << 8) + getc(fd);
+	if (cnt != 0)
+	    goto formerr;
+    }
+
+    /* round 1: <PREFIXLIST>: <affcount> <affix> ...
+     * round 2: <SUFFIXLIST>: <affcount> <affix> ...  */
     for (round = 1; round <= 2; ++round)
     {
 	affcount = (getc(fd) << 8) + getc(fd);		/* <affcount> */
@@ -1537,9 +1580,6 @@ formerr:
 	    suffm = affcount > 256 ? 2 : 1;
 	}
 
-	i = (getc(fd) << 8) + getc(fd);		/* <afftotcnt> */
-	/* afftotcnt is not used */
-
 	/*
 	 * For each affix NR there can be several affixes.
 	 */
@@ -1555,7 +1595,7 @@ formerr:
 		 *				    <affaddlen> <affadd> */
 		affflags = getc(fd);			/* <affflags> */
 		choplen = getc(fd);			/* <affchoplen> */
-		if (choplen == EOF)
+		if (choplen < 0)
 		    goto truncerr;
 		if (choplen >= MAXWLEN)
 		    goto formerr;
@@ -1563,7 +1603,7 @@ formerr:
 		    buf[i] = getc(fd);
 		buf[i] = NUL;
 		addlen = getc(fd);			/* <affaddlen> */
-		if (addlen == EOF)
+		if (addlen < 0)
 		    goto truncerr;
 		if (affflags & AFF_PREWORD)
 		    xlen = addlen + 2;	/* space for lead and trail string */
@@ -1571,12 +1611,11 @@ formerr:
 		    xlen = 0;
 
 		/* Get room to store the affitem_T, chop and add strings. */
-		p = (char_u *)getroom(lp, &bl_used,
+		ai = (affitem_T *)getroom(lp, &bl_used,
 			     sizeof(affitem_T) + addlen + choplen + 1 + xlen);
-		if (p == NULL)
-		    goto errorend;
+		if (ai == NULL)
+		    goto endFAIL;
 
-		ai = (affitem_T *)p;
 		ai->ai_nr = affnr;
 		ai->ai_flags = affflags;
 		ai->ai_choplen = choplen;
@@ -1596,8 +1635,12 @@ formerr:
 		    int	    l, leadoff, trailoff;
 
 		    /*
-		     * Separate lead and trail string, put word at ai_add, so
-		     * that it can be used as hashtable key.
+		     * A preword is a prefix that's recognized as a word: it
+		     * contains a word characters folled by a non-word
+		     * character.
+		     * <affadd> is the whole prefix.  Separate lead and trail
+		     * string, put the word itself at ai_add, so that it can
+		     * be used as hashtable key.
 		     */
 		    /* lead string: up to first word char */
 		    while (*p != NUL && !spell_iswordc(p))
@@ -1623,13 +1666,13 @@ formerr:
 		    hi = hash_lookup(&lp->sl_prewords, ai->ai_add, hash);
 		    if (HASHITEM_EMPTY(hi))
 		    {
-			/* First affix with this word, add to hashtable. */
+			/* First preword with this word, add to hashtable. */
 			hash_add_item(&lp->sl_prewords, hi, ai->ai_add, hash);
 			ai->ai_next = NULL;
 		    }
 		    else
 		    {
-			/* There already is an affix with this word, link in
+			/* There already is a preword with this word, link in
 			 * the list.  */
 			ai2 = HI2AI(hi);
 			ai->ai_next = ai2->ai_next;
@@ -1660,7 +1703,7 @@ formerr:
 			{
 			    /* Longer affix, need more hashtables. */
 			    if (ga_grow(gap, addlen - gap->ga_len) == FAIL)
-				goto errorend;
+				goto endFAIL;
 
 			    /* Re-allocating ga_data means that an ht_array
 			     * pointing to ht_smallarray becomes invalid.  We
@@ -1733,14 +1776,14 @@ formerr:
 	 */
 	/* Use <nr> bytes from the previous word. */
 	wlen = getc(fd);				/* <nr> */
-	if (wlen == EOF)
+	if (wlen < 0)
 	{
 	    if (widx >= wordcount)	/* normal way to end the file */
 		break;
 	    goto truncerr;
 	}
 
-	/* Read further word bytes until one below 0x20, that must be the
+	/* Read further word bytes until one below 0x20, that one must be the
 	 * flags.  Keep this fast! */
 	for (;;)
 	{
@@ -1760,10 +1803,12 @@ formerr:
 	{
 	    /* Read <caselen> and <caseword> first, its length may differ from
 	     * the case-folded word.  Note: this should only happen after the
-	     * basic word! */
+	     * basic word without KEEPCAP! */
 	    wlen = getc(fd);
 	    if (wlen < 0)
 		goto truncerr;
+	    if (wlen >= MAXWLEN)
+		goto formerr;
 	    for (i = 0; i < wlen; ++i)
 		cbuf[i] = getc(fd);
 	    cbuf[i] = NUL;
@@ -1800,7 +1845,7 @@ formerr:
 	fw = (fword_T *)getroom(lp, &bl_used, (int)sizeof(fword_T) + wlen
 							    + (p - affixbuf));
 	if (fw == NULL)
-	    goto errorend;
+	    goto endFAIL;
 	mch_memmove(fw->fw_word, (flags & BWF_KEEPCAP) ? cbuf : buf, wlen + 1);
 
 	/* Put the affix NRs just after the word, if any. */
@@ -1811,12 +1856,15 @@ formerr:
 	fw->fw_prefixcnt = prefixcnt;
 	fw->fw_suffixcnt = suffixcnt;
 
+	/* We store the word in the hashtable case-folded.  For a KEEPCAP word
+	 * the entry must already exist, because fw_word can't be used as the
+	 * key, it differs from "buf"! */
 	hash = hash_hash(buf);
 	hi = hash_lookup(&lp->sl_words, buf, hash);
 	if (HASHITEM_EMPTY(hi))
 	{
 	    if (hash_add_item(&lp->sl_words, hi, fw->fw_word, hash) == FAIL)
-		goto errorend;
+		goto endFAIL;
 	    fw->fw_next = NULL;
 	}
 	else
@@ -1826,7 +1874,7 @@ formerr:
 	    fw2 = HI2FWORD(hi);
 	    fw->fw_next = fw2->fw_next;
 	    fw2->fw_next = fw;
-	    --widx;			/* don't count this one */
+	    --widx;		/* don't count this one as a basic word */
 	}
 
 	if (flags & BWF_REGION)
@@ -1841,15 +1889,20 @@ formerr:
 		adds = (getc(fd) << 8) + getc(fd);	/* <addcnt> */
 	    else
 		adds = getc(fd);			/* <addcnt> */
+	    if (adds < 0)
+		goto formerr;
 
 	    if (adds > 30)
 	    {
-		/* Use a hashtable to loopup the part until the next word end.
+		/* Use a hashtable to lookup the part until the next word end.
+		 * Thus for "de-bur-die" "de" is the basic word, "-bur" is key
+		 * in the addition hashtable, "-bur<NUL>die" the whole
+		 * addition and "aw_saveb" is '-'.
 		 * This uses more memory and involves some overhead, thus only
-		 * do it when there are many additions (e.g., for French).  */
+		 * do it when there are many additions (e.g., for French). */
 		ht = (hashtab_T *)getroom(lp, &bl_used, sizeof(hashtab_T));
 		if (ht == NULL)
-		    goto errorend;
+		    goto endFAIL;
 		hash_init(ht);
 		fw->fw_adds = (addword_T *)ht;
 		fw->fw_flags |= BWF_ADDHASH;
@@ -1860,19 +1913,26 @@ formerr:
 	    else
 		ht = NULL;
 
+	    /*
+	     * Note: uses cbuf[] to copy bytes from previous addition.
+	     */
 	    while (--adds >= 0)
 	    {
 		/* <add>: <addflags> <addlen> [<leadlen>] [<copylen>]
 		 *				[<addstring>] [<region>] */
 		flags = getc(fd);			/* <addflags> */
 		addlen = getc(fd);			/* <addlen> */
-		if (addlen == EOF)
+		if (addlen < 0)
 		    goto truncerr;
 		if (addlen >= MAXWLEN)
 		    goto formerr;
 
 		if (flags & ADD_LEADLEN)
+		{
 		    leadlen = getc(fd);			/* <leadlen> */
+		    if (leadlen > addlen)
+			goto formerr;
+		}
 		else
 		    leadlen = 0;
 
@@ -1891,7 +1951,7 @@ formerr:
 		{
 		    /* <addstring> is in original case, need to get
 		     * case-folded word too. */
-		    (void)str_foldcase(cbuf, addlen, fbuf, MAXWLEN);
+		    (void)spell_casefold(cbuf, addlen, fbuf, MAXWLEN);
 		    flen = addlen - leadlen + 1;
 		    addlen = STRLEN(fbuf);
 		}
@@ -1901,7 +1961,7 @@ formerr:
 		aw = (addword_T *)getroom(lp, &bl_used,
 					   sizeof(addword_T) + addlen + flen);
 		if (aw == NULL)
-		    goto errorend;
+		    goto endFAIL;
 
 		if (flags & ADD_KEEPCAP)
 		{
@@ -1954,7 +2014,7 @@ formerr:
 			    naw = (addword_T *)getroom(lp, &bl_used,
 					sizeof(addword_T) + STRLEN(NOWC_KEY));
 			    if (naw == NULL)
-				goto errorend;
+				goto endFAIL;
 			    STRCPY(naw->aw_word, NOWC_KEY);
 			    hash_add_item(ht, hi, naw->aw_word, hash);
 			    naw->aw_next = aw;
@@ -1994,11 +2054,12 @@ formerr:
 	    }
 	}
     }
-    goto end_OK;
+    goto endOK;
 
-errorend:
+endFAIL:
     lp->sl_error = TRUE;
-end_OK:
+
+endOK:
     if (fd != NULL)
 	fclose(fd);
     hash_unlock(&lp->sl_words);
@@ -2187,7 +2248,7 @@ captype(word, end)
 #else
     c = *p++;
 #endif
-    firstcap = allcap = MB_ISUPPER(c);
+    firstcap = allcap = spell_isupper(c);
 
     /*
      * Need to check all letters to find a word with mixed upper/lower.
@@ -2201,7 +2262,7 @@ captype(word, end)
 #else
 	    c = *p;
 #endif
-	    if (!MB_ISUPPER(c))
+	    if (!spell_isupper(c))
 	    {
 		/* UUl -> KEEPCAP */
 		if (past_second && allcap)
@@ -2345,9 +2406,9 @@ struct basicword_S
     garray_T	bw_prefix;	/* table with prefix numbers */
     garray_T	bw_suffix;	/* table with suffix numbers */
     int		bw_region;	/* region bits */
-    char_u	*bw_caseword;	/* keep-case word */
-    char_u	*bw_leadstring;	/* must come before bw_word */
-    char_u	*bw_addstring;	/* must come after bw_word */
+    char_u	*bw_caseword;	/* keep-case word or NULL */
+    char_u	*bw_leadstring;	/* must come before bw_word or NULL */
+    char_u	*bw_addstring;	/* must come after bw_word or NULL */
     char_u	bw_word[1];	/* actually longer: word case folded */
 };
 
@@ -2391,12 +2452,12 @@ static void add_affhash __ARGS((hashtab_T *ht, char_u *key, int newnr));
 static void clear_affhash __ARGS((hashtab_T *ht));
 static void trans_affixes __ARGS((dicword_T *dw, basicword_T *bw, afffile_T *oldaff, hashtab_T *newwords));
 static int build_wordlist __ARGS((hashtab_T *newwords, hashtab_T *oldwords, afffile_T *oldaff, int regionmask));
+static basicword_T *get_basicword __ARGS((char_u *word, int asize));
 static void combine_regions __ARGS((hashtab_T *newwords));
 static int same_affixes __ARGS((basicword_T *bw, basicword_T *nbw));
-static void expand_affixes __ARGS((hashtab_T *newwords, garray_T *prefgap, garray_T *suffgap));
-static void expand_one_aff __ARGS((basicword_T *bw, garray_T *add_words, affentry_T *pae, affentry_T *sae));
-static void add_to_wordlist __ARGS((hashtab_T *newwords, basicword_T *bw));
-static void put_bytes __ARGS((FILE *fd, long_u nr, int len));
+static int expand_affixes __ARGS((hashtab_T *newwords, garray_T *prefgap, garray_T *suffgap));
+static int expand_one_aff __ARGS((basicword_T *bw, garray_T *add_words, affentry_T *pae, affentry_T *sae));
+static int add_to_wordlist __ARGS((hashtab_T *newwords, basicword_T *bw));
 static void write_affix __ARGS((FILE *fd, affheader_T *ah));
 static void write_affixlist __ARGS((FILE *fd, garray_T *aff, int bytes));
 static void write_vim_spell __ARGS((char_u *fname, garray_T *prefga, garray_T *suffga, hashtab_T *newwords, int regcount, char_u *regchars));
@@ -2428,6 +2489,9 @@ spell_read_aff(fname, conv, ascii)
     affheader_T	*cur_aff = NULL;
     int		aff_todo = 0;
     hashtab_T	*tp;
+    char_u	*low = NULL;
+    char_u	*fol = NULL;
+    char_u	*upp = NULL;
 
     fd = fopen((char *)fname, "r");
     if (fd == NULL)
@@ -2449,8 +2513,9 @@ spell_read_aff(fname, conv, ascii)
     /*
      * Read all the lines in the file one by one.
      */
-    while (!vim_fgets(rline, MAXLINELEN, fd))
+    while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
     {
+	line_breakcheck();
 	++lnum;
 
 	/* Skip comment lines. */
@@ -2462,6 +2527,12 @@ spell_read_aff(fname, conv, ascii)
 	if (conv->vc_type != CONV_NONE)
 	{
 	    pc = string_convert(conv, rline, NULL);
+	    if (pc == NULL)
+	    {
+		smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
+							   fname, lnum, rline);
+		continue;
+	    }
 	    line = pc;
 	}
 	else
@@ -2587,6 +2658,30 @@ spell_read_aff(fname, conv, ascii)
 		    cur_aff->ah_first = aff_entry;
 		}
 	    }
+	    else if (STRCMP(items[0], "FOL") == 0 && itemcnt == 2)
+	    {
+		if (fol != NULL)
+		    smsg((char_u *)_("Duplicate FOL in %s line %d"),
+								 fname, lnum);
+		else
+		    fol = vim_strsave(items[1]);
+	    }
+	    else if (STRCMP(items[0], "LOW") == 0 && itemcnt == 2)
+	    {
+		if (low != NULL)
+		    smsg((char_u *)_("Duplicate LOW in %s line %d"),
+								 fname, lnum);
+		else
+		    low = vim_strsave(items[1]);
+	    }
+	    else if (STRCMP(items[0], "UPP") == 0 && itemcnt == 2)
+	    {
+		if (upp != NULL)
+		    smsg((char_u *)_("Duplicate UPP in %s line %d"),
+								 fname, lnum);
+		else
+		    upp = vim_strsave(items[1]);
+	    }
 	    else if (STRCMP(items[0], "REP") == 0 && itemcnt == 2)
 		/* Ignore REP count */;
 	    else if (STRCMP(items[0], "REP") == 0 && itemcnt == 3)
@@ -2608,6 +2703,18 @@ spell_read_aff(fname, conv, ascii)
 
     }
 
+    if (fol != NULL || low != NULL || upp != NULL)
+    {
+	if (fol == NULL || low == NULL || upp == NULL)
+	    smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname);
+	else
+	    set_spell_chartab(fol, low, upp);
+
+	vim_free(fol);
+	vim_free(low);
+	vim_free(upp);
+    }
+
     vim_free(pc);
     fclose(fd);
     return aff;
@@ -2720,8 +2827,9 @@ spell_read_dic(ht, fname, conv, ascii)
      * The words are converted to 'encoding' here, before being added to
      * the hashtable.
      */
-    while (!vim_fgets(line, MAXLINELEN, fd))
+    while (!vim_fgets(line, MAXLINELEN, fd) && !got_int)
     {
+	line_breakcheck();
 	++lnum;
 
 	/* Remove CR, LF and white space from end. */
@@ -2745,6 +2853,12 @@ spell_read_dic(ht, fname, conv, ascii)
 	if (conv->vc_type != CONV_NONE)
 	{
 	    pc = string_convert(conv, line, NULL);
+	    if (pc == NULL)
+	    {
+		smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
+						       fname, lnum, line);
+		continue;
+	    }
 	    w = pc;
 	}
 	else
@@ -2756,7 +2870,10 @@ spell_read_dic(ht, fname, conv, ascii)
 	dw = (dicword_T *)alloc_clear((unsigned)sizeof(dicword_T)
 							     + STRLEN(w));
 	if (dw == NULL)
+	{
+	    vim_free(pc);
 	    break;
+	}
 	STRCPY(dw->dw_word, w);
 	vim_free(pc);
 
@@ -3136,7 +3253,7 @@ trans_affixes(dw, bw, oldaff, newwords)
     char_u	key[2];
     char_u	*p;
     char_u	*affnm;
-    garray_T	*gap;
+    garray_T	*gap, *agap;
     hashitem_T	*aff_hi;
     affheader_T	*ah;
     affentry_T	*ae;
@@ -3144,7 +3261,6 @@ trans_affixes(dw, bw, oldaff, newwords)
     int		i;
     basicword_T *nbw;
     int		alen;
-    int		wlen;
     garray_T	suffixga;	/* list of words with non-word suffixes */
     garray_T	prefixga;	/* list of words with non-word prefixes */
     char_u	nword[MAXWLEN];
@@ -3179,7 +3295,7 @@ trans_affixes(dw, bw, oldaff, newwords)
 	for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
 	{
 	    /* Setup for regexp matching.  Note that we don't ignore case.
-	     * This is weird, because he rules in an .aff file don't care
+	     * This is weird, because the rules in an .aff file don't care
 	     * about case, but it's necessary for compatibility with Myspell.
 	     */
 	    regmatch.regprog = ae->ae_prog;
@@ -3190,23 +3306,19 @@ trans_affixes(dw, bw, oldaff, newwords)
 		if ((ae->ae_add_nw != NULL || ae->ae_add_pw != NULL)
 			&& (gap != &bw->bw_suffix || bw->bw_addstring == NULL))
 		{
-		    /* Affix has a non-word character and isn't prepended to
+		    /*
+		     * Affix has a non-word character and isn't prepended to
 		     * leader or appended to addition.  Need to use another
-		     * word with an addition.  It's a copy of the basicword_T
-		     * "bw". */
-		    if (gap == &bw->bw_suffix)
+		     * word with a leadstring and/or addstring.
+		     */
+		    if (gap == &bw->bw_suffix || ae->ae_add_nw == NULL)
 		    {
-			alen = ae->ae_add_nw - ae->ae_add;
-			nbw = (basicword_T *)alloc((unsigned)(
-				    sizeof(basicword_T) + STRLEN(bw->bw_word)
-								 + alen + 1));
-			if (nbw != NULL)
+			/* Suffix or prefix with only non-word chars.
+			 * Build the new basic word in "nword": Remove chop
+			 * string and append/prepend addition. */
+			if (gap == &bw->bw_suffix)
 			{
-			    *nbw = *bw;
-			    ga_init2(&nbw->bw_prefix, sizeof(short_u), 1);
-			    ga_init2(&nbw->bw_suffix, sizeof(short_u), 1);
-
-			    /* Adding the suffix may change the caps. */
+			    /* suffix goes at the end of the word */
 			    STRCPY(nword, dw->dw_word);
 			    if (ae->ae_chop != NULL)
 			    {
@@ -3217,64 +3329,11 @@ trans_affixes(dw, bw, oldaff, newwords)
 				*p = NUL;
 			    }
 			    STRCAT(nword, ae->ae_add);
-			    flags = captype(nword, nword + STRLEN(nword));
-			    if (flags & BWF_KEEPCAP)
-			    {
-				/* "caseword" excludes the addition */
-				nword[STRLEN(dw->dw_word) + alen] = NUL;
-				nbw->bw_caseword = vim_strsave(nword);
-			    }
-			    nbw->bw_flags &= ~(BWF_ONECAP | BWF_ALLCAP
-							       | BWF_KEEPCAP);
-			    nbw->bw_flags |= flags;
-
-			    if (bw->bw_leadstring != NULL)
-				nbw->bw_leadstring =
-					       vim_strsave(bw->bw_leadstring);
-			    nbw->bw_addstring = vim_strsave(ae->ae_add_nw);
-
-			    STRCPY(nbw->bw_word, bw->bw_word);
-			    if (alen > 0 || ae->ae_chop != NULL)
-			    {
-				/* Suffix starts with word character and/or
-				 * chop off something.  Append it to the word.
-				 * Add new word entry. */
-				wlen = STRLEN(nbw->bw_word);
-				if (ae->ae_chop != NULL)
-				    wlen -= STRLEN(ae->ae_chop);
-				mch_memmove(nbw->bw_word + wlen, ae->ae_add,
-									alen);
-				nbw->bw_word[wlen + alen] = NUL;
-				add_to_wordlist(newwords, nbw);
-			    }
-			    else
-				/* Basic word is the same, link "nbw" after
-				 * "bw". */
-				bw->bw_next = nbw;
-
-			    /* Remember this word, we need to set bw_prefix
-			     * and bw_prefix later. */
-			    if (ga_grow(&suffixga, 1) == OK)
-				((basicword_T **)suffixga.ga_data)
-						    [suffixga.ga_len++] = nbw;
+			    agap = &suffixga;
 			}
-		    }
-		    else if (ae->ae_add_nw == NULL)
-		    {
-			/* Prefix that starts with non-word char(s) and may be
-			 * followed by word chars: Make a leadstring and
-			 * prepend word chars before the word. */
-			alen = STRLEN(ae->ae_add_pw);
-			nbw = (basicword_T *)alloc((unsigned)(
-				    sizeof(basicword_T) + STRLEN(bw->bw_word)
-								 + alen + 1));
-			if (nbw != NULL)
+			else
 			{
-			    *nbw = *bw;
-			    ga_init2(&nbw->bw_prefix, sizeof(short_u), 1);
-			    ga_init2(&nbw->bw_suffix, sizeof(short_u), 1);
-
-			    /* Adding the prefix may change the caps. */
+			    /* prefix goes before the word */
 			    STRCPY(nword, ae->ae_add);
 			    p = dw->dw_word;
 			    if (ae->ae_chop != NULL)
@@ -3282,51 +3341,33 @@ trans_affixes(dw, bw, oldaff, newwords)
 				for (i = mb_charlen(ae->ae_chop); i > 0; --i)
 				    mb_ptr_adv( p);
 			    STRCAT(nword, p);
+			    agap = &prefixga;
+			}
 
-			    flags = captype(nword, nword + STRLEN(nword));
-			    if (flags & BWF_KEEPCAP)
-				/* "caseword" excludes the addition */
-				nbw->bw_caseword = vim_strsave(nword
-					      + (ae->ae_add_pw - ae->ae_add));
-			    else
-				nbw->bw_caseword = NULL;
-			    nbw->bw_flags &= ~(BWF_ONECAP | BWF_ALLCAP
-							       | BWF_KEEPCAP);
-			    nbw->bw_flags |= flags;
-
-			    if (bw->bw_addstring != NULL)
-				nbw->bw_addstring =
-					       vim_strsave(bw->bw_addstring);
-			    else
-				nbw->bw_addstring = NULL;
-			    nbw->bw_leadstring = vim_strnsave(ae->ae_add,
-						  ae->ae_add_pw - ae->ae_add);
+			/* Create a basicword_T from the word. */
+			nbw = get_basicword(nword, 1);
+			if (nbw != NULL)
+			{
+			    nbw->bw_region = bw->bw_region;
+			    nbw->bw_flags |= bw->bw_flags
+				   & ~(BWF_ONECAP | BWF_ALLCAP | BWF_KEEPCAP);
 
-			    if (alen > 0 || ae->ae_chop != NULL)
-			    {
-				/* Prefix ends in word character and/or chop
-				 * off something.  Prepend it to the word.
-				 * Add new word entry. */
-				STRCPY(nbw->bw_word, ae->ae_add_pw);
-				p = bw->bw_word;
-				if (ae->ae_chop != NULL)
-				    p += STRLEN(ae->ae_chop);
-				STRCAT(nbw->bw_word, p);
-				add_to_wordlist(newwords, nbw);
-			    }
+			    if (STRCMP(bw->bw_word, nbw->bw_word) != 0)
+				/* Basic word differs, add new word entry. */
+				(void)add_to_wordlist(newwords, nbw);
 			    else
 			    {
 				/* Basic word is the same, link "nbw" after
 				 * "bw". */
-				STRCPY(nbw->bw_word, bw->bw_word);
+				nbw->bw_next = bw->bw_next;
 				bw->bw_next = nbw;
 			    }
 
-			    /* Remember this word, we need to set bw_suffix
-			     * and bw_suffix later. */
-			    if (ga_grow(&prefixga, 1) == OK)
-				((basicword_T **)prefixga.ga_data)
-						    [prefixga.ga_len++] = nbw;
+			    /* Remember this word, we need to set bw_prefix
+			     * or bw_suffix later. */
+			    if (ga_grow(agap, 1) == OK)
+				((basicword_T **)agap->ga_data)
+						       [agap->ga_len++] = nbw;
 			}
 		    }
 		    else
@@ -3345,7 +3386,7 @@ trans_affixes(dw, bw, oldaff, newwords)
 #else
 			    n = 1;
 #endif
-			    (void)str_foldcase(p, n, nword + alen,
+			    (void)spell_casefold(p, n, nword + alen,
 							      MAXWLEN - alen);
 			    alen += STRLEN(nword + alen);
 			}
@@ -3393,7 +3434,7 @@ trans_affixes(dw, bw, oldaff, newwords)
 			    else
 				nbw->bw_leadstring = NULL;
 
-			    add_to_wordlist(newwords, nbw);
+			    (void)add_to_wordlist(newwords, nbw);
 
 			    /* Remember this word, we need to set bw_suffix
 			     * and bw_suffix later. */
@@ -3482,17 +3523,6 @@ build_wordlist(newwords, oldwords, oldaff, regionmask)
     hashitem_T	*old_hi;
     dicword_T	*dw;
     basicword_T *bw;
-    char_u	foldword[MAXLINELEN];
-    int		leadlen;
-    char_u	leadstring[MAXLINELEN];
-    int		addlen;
-    char_u	addstring[MAXLINELEN];
-    int		dwlen;
-    char_u	*p;
-    int		clen;
-    int		flags;
-    char_u	*cp = NULL;
-    int		l;
     char_u	message[MAXLINELEN + MAXWLEN];
 
     todo = oldwords->ht_used;
@@ -3519,107 +3549,15 @@ build_wordlist(newwords, oldwords, oldaff, regionmask)
 		    break;
 	    }
 
-	    /* The basic words are always stored with folded case. */
-	    dwlen = STRLEN(dw->dw_word);
-	    (void)str_foldcase(dw->dw_word, dwlen, foldword, MAXLINELEN);
-	    flags = captype(dw->dw_word, dw->dw_word + dwlen);
-
-	    /* Check for non-word characters before the word. */
-	    clen = 0;
-	    leadlen = 0;
-	    if (!spell_iswordc(foldword))
-	    {
-		p = foldword;
-		for (;;)
-		{
-		    mb_ptr_adv(p);
-		    ++clen;
-		    if (*p == NUL)	/* Only non-word chars (bad word!) */
-		    {
-			if (p_verbose > 0)
-			    smsg((char_u *)_("Warning: word without word characters: \"%s\""),
-								    foldword);
-			break;
-		    }
-		    if (spell_iswordc(p))
-		    {
-			/* Move the leader to "leadstring" and remove it from
-			 * "foldword". */
-			leadlen = p - foldword;
-			mch_memmove(leadstring, foldword, leadlen);
-			leadstring[leadlen] = NUL;
-			mch_memmove(foldword, p, STRLEN(p) + 1);
-			break;
-		    }
-		}
-	    }
-
-	    /* Check for non-word characters after word characters. */
-	    addlen = 0;
-	    for (p = foldword; spell_iswordc(p); mb_ptr_adv(p))
-	    {
-		if (*p == NUL)
-		    break;
-		++clen;
-	    }
-	    if (*p != NUL)
-	    {
-		/* Move the addition to "addstring" and truncate "foldword". */
-		if (flags & BWF_KEEPCAP)
-		{
-		    /* Preserve caps, need to skip the right number of
-		     * characters in the original word (case folding may
-		     * change the byte count). */
-		    l = 0;
-		    for (cp = dw->dw_word; l < clen; mb_ptr_adv(cp))
-			++l;
-		    addlen = STRLEN(cp);
-		    mch_memmove(addstring, cp, addlen + 1);
-		}
-		else
-		{
-		    addlen = STRLEN(p);
-		    mch_memmove(addstring, p, addlen + 1);
-		}
-		*p = NUL;
-	    }
-
-	    bw = (basicword_T *)alloc_clear((unsigned)sizeof(basicword_T)
-							  + STRLEN(foldword));
+	    bw = get_basicword(dw->dw_word, 10);
 	    if (bw == NULL)
 		break;
-	    STRCPY(bw->bw_word, foldword);
 	    bw->bw_region = regionmask;
 
-	    if (leadlen > 0)
-		bw->bw_leadstring = vim_strsave(leadstring);
-	    else
-		bw->bw_leadstring = NULL;
-	    if (addlen > 0)
-		bw->bw_addstring = vim_strsave(addstring);
-	    else
-		bw->bw_addstring = NULL;
-
-	    add_to_wordlist(newwords, bw);
-
-	    if (flags & BWF_KEEPCAP)
-	    {
-		if (addlen == 0)
-		    /* use the whole word */
-		    bw->bw_caseword = vim_strsave(dw->dw_word + leadlen);
-		else
-		    /* use only up to the addition */
-		    bw->bw_caseword = vim_strnsave(dw->dw_word + leadlen,
-						  cp - dw->dw_word - leadlen);
-		if (bw->bw_caseword == NULL)	/* out of memory */
-		    flags &= ~BWF_KEEPCAP;
-	    }
-	    bw->bw_flags = flags;
+	    (void)add_to_wordlist(newwords, bw);