updated for version 7.0067

author: Bram Moolenaar <Bram@vim.org> 2005-04-17 20:18:43 +0000
committer: Bram Moolenaar <Bram@vim.org> 2005-04-17 20:18:43 +0000
commit: 5482f33f78d469b2ace7a5aa9af90d022915f3ec (patch)
tree: c31dbec0e219d0b40ef5d455f0c3a0497ef2b645
parent: 075757c932e00bad7f80325f2f944e4e1ed4a223 (diff)
2 files changed, 327 insertions, 944 deletions
diff --git a/Makefile b/Makefile
index afd8ad64c4..7e916edb24 100644
--- a/Makefile
+++ b/Makefile
@@ -282,6 +282,7 @@ unixrt: dist prepare
 	tar cf - \
 		$(RT_SCRIPTS) \
 		$(LANG_GEN) \
+		$(LANG_GEN_BIN) \
 		| (cd dist/$(VIMRTDIR); tar xf -)
 	cd dist && tar cf $(VIMVER)-rt2.tar $(VIMRTDIR)
 	gzip -9 dist/$(VIMVER)-rt2.tar
@@ -439,6 +440,7 @@ dosrt_unix2dos: dist prepare no_title.vim
 		$(RT_UNIX_DOS_BIN) \
 		$(RT_ALL_BIN) \
 		$(RT_DOS_BIN) \
+		$(LANG_GEN_BIN) \
 		| (cd dist/vim/$(VIMRTDIR); tar xf -)
 	mv dist/vim/$(VIMRTDIR)/runtime/* dist/vim/$(VIMRTDIR)
 	rmdir dist/vim/$(VIMRTDIR)/runtime
diff --git a/src/spell.c b/src/spell.c
index e3a97fc833..9f1f00c007 100644
--- a/src/spell.c
+++ b/src/spell.c
@@ -903,6 +903,13 @@ spell_load_lang(lang)
 	sprintf((char *)fname_enc, "spell/%s.%s.spl", lang, p);
 
 	r = do_in_runtimepath(fname_enc, TRUE, spell_load_file, lp);
+	if (r == FAIL && !lp->sl_error)
+	{
+	    /* Try loading the ASCII version. */
+	    sprintf((char *)fname_enc, "spell/%s.ascii.spl", lang);
+
+	    r = do_in_runtimepath(fname_enc, TRUE, spell_load_file, lp);
+	}
 	if (r == FAIL || lp->sl_error)
 	{
 	    slang_free(lp);
@@ -1001,8 +1008,8 @@ spell_load_file(fname, cookie)
     int		affitemcnt;
     int		bl_used = SBLOCKSIZE;
     int		widx;
-    int		prefm;	    /* 1 if <= 256 prefixes, sizeof(short_u) otherw. */
-    int		suffm;	    /* 1 if <= 256 suffixes, sizeof(short_u) otherw. */
+    int		prefm = 0;  /* 1 if <= 256 prefixes, sizeof(short_u) otherw. */
+    int		suffm = 0;  /* 1 if <= 256 suffixes, sizeof(short_u) otherw. */
     int		wlen;
     int		flags;
     affitem_T	*ai, *ai2, **aip;
@@ -1480,6 +1487,7 @@ did_set_spelllang(buf)
 	e = vim_strchr(lang, ',');
 	if (e == NULL)
 	    e = lang + STRLEN(lang);
+	region = NULL;
 	if (e > lang + 2)
 	{
 	    if (e - lang >= MAXWLEN)
@@ -1490,8 +1498,6 @@ did_set_spelllang(buf)
 	    if (lang[2] == '_')
 		region = lang + 3;
 	}
-	else
-	    region = NULL;
 
 	for (lp = first_lang; lp != NULL; lp = lp->sl_next)
 	    if (STRNICMP(lp->sl_name, lang, 2) == 0)
@@ -1726,7 +1732,13 @@ struct affentry_S
     affentry_T	*ae_next;	/* next affix with same name/number */
     char_u	*ae_chop;	/* text to chop off basic word (can be NULL) */
     char_u	*ae_add;	/* text to add to basic word (can be NULL) */
-    char_u	*ae_add_nw;	/* first non-word character in "ae_add" */
+    char_u	*ae_add_nw;	/* For a suffix: first non-word char in
+				 * "ae_add"; for a prefix with only non-word
+				 * chars: equal to "ae_add", for a prefix with
+				 * word and non-word chars: first non-word
+				 * char after word char.  NULL otherwise. */
+    char_u	*ae_add_pw;	/* For a prefix with both word and non-word
+			         * chars: first word char.  NULL otherwise. */
     char_u	*ae_cond;	/* condition (NULL for ".") */
     regprog_T	*ae_prog;	/* regexp program for ae_cond or NULL */
     short_u	ae_affnr;	/* for old affix: new affix number */
@@ -1778,10 +1790,11 @@ static affhash_T dumas;
 #define HI2AS(hi)	((affhash_T *)((hi)->hi_key - (dumas.as_word - (char_u *)&dumas)))
 
 
-static afffile_T *spell_read_aff __ARGS((char_u *fname, vimconv_T *conv));
+static afffile_T *spell_read_aff __ARGS((char_u *fname, vimconv_T *conv, int ascii));
 static void spell_free_aff __ARGS((afffile_T *aff));
-static int spell_read_dic __ARGS((hashtab_T *ht, char_u *fname, vimconv_T *conv));
-static int get_new_aff __ARGS((hashtab_T *oldaff, garray_T *gap));
+static int has_non_ascii __ARGS((char_u *s));
+static int spell_read_dic __ARGS((hashtab_T *ht, char_u *fname, vimconv_T *conv, int ascii));
+static int get_new_aff __ARGS((hashtab_T *oldaff, garray_T *gap, int prefix));
 static void spell_free_dic __ARGS((hashtab_T *dic));
 static int same_affentries __ARGS((affheader_T *ah1, affheader_T *ah2));
 static void add_affhash __ARGS((hashtab_T *ht, char_u *key, int newnr));
@@ -1801,15 +1814,17 @@ static void write_bword __ARGS((FILE *fd, basicword_T *bw, int lowcap, basicword
 static void free_wordtable __ARGS((hashtab_T *ht));
 static void free_basicword __ARGS((basicword_T *bw));
 static void free_affixentries __ARGS((affentry_T *first));
+static void free_affix_entry __ARGS((affentry_T *ap));
 
 /*
  * Read an affix ".aff" file.
  * Returns an afffile_T, NULL for failure.
  */
     static afffile_T *
-spell_read_aff(fname, conv)
+spell_read_aff(fname, conv, ascii)
     char_u	*fname;
     vimconv_T	*conv;		/* info for encoding conversion */
+    int		ascii;		/* Only accept ASCII characters */
 {
     FILE	*fd;
     afffile_T	*aff;
@@ -1895,7 +1910,7 @@ spell_read_aff(fname, conv)
 		{
 		    /* Setup for conversion from "ENC" to 'encoding'. */
 		    aff->af_enc = enc_canonize(items[1]);
-		    if (aff->af_enc != NULL
+		    if (aff->af_enc != NULL && !ascii
 			    && convert_setup(conv, aff->af_enc, p_enc) == FAIL)
 			smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
 						   fname, aff->af_enc, p_enc);
@@ -1952,8 +1967,7 @@ spell_read_aff(fname, conv)
 						(unsigned)sizeof(affentry_T));
 		if (aff_entry == NULL)
 		    break;
-		aff_entry->ae_next = cur_aff->ah_first;
-		cur_aff->ah_first = aff_entry;
+
 		if (STRCMP(items[2], "0") != 0)
 		    aff_entry->ae_chop = vim_strsave(items[2]);
 		if (STRCMP(items[3], "0") != 0)
@@ -1969,6 +1983,19 @@ spell_read_aff(fname, conv)
 			sprintf((char *)buf, "%s$", items[4]);
 		    aff_entry->ae_prog = vim_regcomp(buf, RE_MAGIC + RE_STRING);
 		}
+
+		if (ascii && (has_non_ascii(aff_entry->ae_chop)
+					  || has_non_ascii(aff_entry->ae_add)))
+		{
+		    /* Don't use an affix entry with non-ASCII characters when
+		     * "ascii" is TRUE. */
+		    free_affix_entry(aff_entry);
+		}
+		else
+		{
+		    aff_entry->ae_next = cur_aff->ah_first;
+		    cur_aff->ah_first = aff_entry;
+		}
 	    }
 	    else if (STRCMP(items[0], "REP") == 0 && itemcnt == 2)
 		/* Ignore REP count */;
@@ -1997,6 +2024,23 @@ spell_read_aff(fname, conv)
 }
 
 /*
+ * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
+ * When "s" is NULL FALSE is returned.
+ */
+    static int
+has_non_ascii(s)
+    char_u	*s;
+{
+    char_u	*p;
+
+    if (s != NULL)
+	for (p = s; *p != NUL; ++p)
+	    if (*p >= 128)
+		return TRUE;
+    return FALSE;
+}
+
+/*
  * Free the structure filled by spell_read_aff().
  */
     static void
@@ -2049,10 +2093,11 @@ spell_free_aff(aff)
  * Each entry in the hashtab_T is a dicword_T.
  */
     static int
-spell_read_dic(ht, fname, conv)
+spell_read_dic(ht, fname, conv, ascii)
     hashtab_T	*ht;
     char_u	*fname;
     vimconv_T	*conv;		/* info for encoding conversion */
+    int		ascii;		/* only accept ASCII words */
 {
     char_u	line[MAXLINELEN];
     char_u	*p;
@@ -2102,6 +2147,10 @@ spell_read_dic(ht, fname, conv)
 	if (p != NULL)
 	    *p++ = NUL;
 
+	/* Skip non-ASCII words when "ascii" is TRUE. */
+	if (ascii && has_non_ascii(line))
+	    continue;
+
 	/* Convert from "SET" to 'encoding' when needed. */
 	if (conv->vc_type != CONV_NONE)
 	{
@@ -2170,9 +2219,11 @@ spell_free_dic(dic)
  * Returns OK or FAIL;
  */
     static int
-get_new_aff(oldaff, gap)
+get_new_aff(oldaff, gap, prefix)
     hashtab_T	*oldaff;	/* hashtable with affheader_T */
     garray_T	*gap;		/* table with new affixes */
+    int		prefix;		/* TRUE when doing prefixes, FALSE for
+				   suffixes */
 {
     int		oldtodo;
     affheader_T	*oldah, *newah, *gapah;
@@ -2218,17 +2269,44 @@ get_new_aff(oldaff, gap)
 						       oldae = oldae->ae_next)
 	    {
 		oldae->ae_add_nw = NULL;
+		oldae->ae_add_pw = NULL;
 		if (oldae->ae_add != NULL)
 		{
-		    /* Check for non-word characters in the suffix.  If there
+		    /* Check for non-word characters in the affix.  If there
 		     * is one this affix will be turned into an addition.
 		     * This is stored with the old affix, that is where
 		     * trans_affixes() will check. */
 		    for (p = oldae->ae_add; *p != NUL; mb_ptr_adv(p))
 			if (!spell_iswordc(p))
+			{
+			    oldae->ae_add_nw = p;
 			    break;
-		    if (*p != NUL)
-			oldae->ae_add_nw = p;
+			}
+
+		    if (prefix && oldae->ae_add_nw != NULL)
+		    {
+			/* If a prefix has both word and non-word characters
+			 * special treatment is necessary.  If it has only
+			 * non-word characters it becomes a leadstring. */
+			for (p = oldae->ae_add; *p != NUL; mb_ptr_adv(p))
+			    if (spell_iswordc(p))
+			    {
+				oldae->ae_add_pw = p;
+				break;
+			    }
+			if (oldae->ae_add_pw != NULL)
+			{
+			    /* Mixed prefix, set ae_add_nw to first non-word
+			     * char after ae_add_pw (if there is one). */
+			    oldae->ae_add_nw = NULL;
+			    for ( ; *p != NUL; mb_ptr_adv(p))
+				if (!spell_iswordc(p))
+				{
+				    oldae->ae_add_nw = p;
+				    break;
+				}
+			}
+		    }
 		}
 
 		if (oldae->ae_cond == NULL)
@@ -2458,12 +2536,14 @@ trans_affixes(dw, bw, oldaff, newwords)
     basicword_T *nbw;
     int		alen;
     int		wlen;
-    garray_T	fixga;
+    garray_T	suffixga;	/* list of words with non-word suffixes */
+    garray_T	prefixga;	/* list of words with non-word prefixes */
     char_u	nword[MAXWLEN];
     int		flags;
     int		n;
 
-    ga_init2(&fixga, (int)sizeof(basicword_T *), 5);
+    ga_init2(&suffixga, (int)sizeof(basicword_T *), 5);
+    ga_init2(&prefixga, (int)sizeof(basicword_T *), 5);
 
     /* Loop over all the affix names of the old word. */
     key[1] = NUL;
@@ -2494,8 +2574,8 @@ trans_affixes(dw, bw, oldaff, newwords)
 	    if (ae->ae_prog == NULL
 			   || vim_regexec(&regmatch, dw->dw_word, (colnr_T)0))
 	    {
-		if (ae->ae_add_nw != NULL && (gap == &bw->bw_suffix
-			    ? bw->bw_addstring : bw->bw_leadstring) == NULL)
+		if ((ae->ae_add_nw != NULL || ae->ae_add_pw != NULL)
+			&& (gap != &bw->bw_suffix || bw->bw_addstring == NULL))
 		{
 		    /* Affix has a non-word character and isn't prepended to
 		     * leader or appended to addition.  Need to use another
@@ -2527,6 +2607,7 @@ trans_affixes(dw, bw, oldaff, newwords)
 			    flags = captype(nword, nword + STRLEN(nword));
 			    if (flags & BWF_KEEPCAP)
 			    {
+				/* "caseword" excludes the addition */
 				nword[STRLEN(dw->dw_word) + alen] = NUL;
 				nbw->bw_caseword = vim_strsave(nword);
 			    }
@@ -2542,8 +2623,9 @@ trans_affixes(dw, bw, oldaff, newwords)
 			    STRCPY(nbw->bw_word, bw->bw_word);
 			    if (alen > 0 || ae->ae_chop != NULL)
 			    {
-				/* Suffix starts with word character.  Append
-				 * it to the word.  Add new word entry. */
+				/* Suffix starts with word character and/or
+				 * chop off something.  Append it to the word.
+				 * Add new word entry. */
 				wlen = STRLEN(nbw->bw_word);
 				if (ae->ae_chop != NULL)
 				    wlen -= STRLEN(ae->ae_chop);
@@ -2558,15 +2640,154 @@ trans_affixes(dw, bw, oldaff, newwords)
 				bw->bw_next = nbw;
 
 			    /* Remember this word, we need to set bw_prefix
+			     * and bw_prefix later. */
+			    if (ga_grow(&suffixga, 1) == OK)
+				((basicword_T **)suffixga.ga_data)
+						    [suffixga.ga_len++] = nbw;
+			}
+		    }
+		    else if (ae->ae_add_nw == NULL)
+		    {
+			/* Prefix that starts with non-word char(s) and may be
+			 * followed by word chars: Make a leadstring and
+			 * prepend word chars before the word. */
+			alen = STRLEN(ae->ae_add_pw);
+			nbw = (basicword_T *)alloc((unsigned)(
+				    sizeof(basicword_T) + STRLEN(bw->bw_word)
+								 + alen + 1));
+			if (nbw != NULL)
+			{
+			    *nbw = *bw;
+			    ga_init2(&nbw->bw_prefix, sizeof(short_u), 1);
+			    ga_init2(&nbw->bw_suffix, sizeof(short_u), 1);
+
+			    /* Adding the prefix may change the caps. */
+			    STRCPY(nword, ae->ae_add);
+			    p = dw->dw_word;
+			    if (ae->ae_chop != NULL)
+				/* Skip chop string. */
+				for (i = mb_charlen(ae->ae_chop); i > 0; --i)
+				    mb_ptr_adv( p);
+			    STRCAT(nword, p);
+
+			    flags = captype(nword, nword + STRLEN(nword));
+			    if (flags & BWF_KEEPCAP)
+				/* "caseword" excludes the addition */
+				nbw->bw_caseword = vim_strsave(nword
+					      + (ae->ae_add_pw - ae->ae_add));
+			    else
+				nbw->bw_caseword = NULL;
+			    nbw->bw_flags &= ~(BWF_ONECAP | BWF_ALLCAP
+							       | BWF_KEEPCAP);
+			    nbw->bw_flags |= flags;
+
+			    if (bw->bw_addstring != NULL)
+				nbw->bw_addstring =
+					       vim_strsave(bw->bw_addstring);
+			    else
+				nbw->bw_addstring = NULL;
+			    nbw->bw_leadstring = vim_strnsave(ae->ae_add,
+						  ae->ae_add_pw - ae->ae_add);
+
+			    if (alen > 0 || ae->ae_chop != NULL)
+			    {
+				/* Prefix ends in word character and/or chop
+				 * off something.  Prepend it to the word.
+				 * Add new word entry. */
+				STRCPY(nbw->bw_word, ae->ae_add_pw);
+				p = bw->bw_word;
+				if (ae->ae_chop != NULL)
+				    p += STRLEN(ae->ae_chop);
+				STRCAT(nbw->bw_word, p);
+				add_to_wordlist(newwords, nbw);
+			    }
+			    else
+			    {
+				/* Basic word is the same, link "nbw" after
+				 * "bw". */
+				STRCPY(nbw->bw_word, bw->bw_word);
+				bw->bw_next = nbw;
+			    }
+
+			    /* Remember this word, we need to set bw_suffix
 			     * and bw_suffix later. */
-			    if (ga_grow(&fixga, 1) == OK)
-				((basicword_T **)fixga.ga_data)[fixga.ga_len++]
-									= nbw;
+			    if (ga_grow(&prefixga, 1) == OK)
+				((basicword_T **)prefixga.ga_data)
+						    [prefixga.ga_len++] = nbw;
 			}
 		    }
 		    else
 		    {
-			/* TODO: prefix with non-word char */
+			/* Prefix with both non-word and word characters: Turn
+			 * prefix into basic word, original word becomes an
+			 * addstring. */
+
+			/* Fold-case the word characters in the prefix into
+			 * nword[]. */
+			alen = 0;
+			for (p = ae->ae_add_pw; p < ae->ae_add_nw; p += n)
+			{
+#ifdef FEAT_MBYTE
+			    n = (*mb_ptr2len_check)(p);
+#else
+			    n = 1;
+#endif
+			    (void)str_foldcase(p, n, nword + alen,
+							      MAXWLEN - alen);
+			    alen += STRLEN(nword + alen);
+			}
+
+			/* Allocate a new word entry. */
+			nbw = (basicword_T *)alloc((unsigned)(
+					     sizeof(basicword_T) + alen + 1));
+			if (nbw != NULL)
+			{
+			    *nbw = *bw;
+			    ga_init2(&nbw->bw_prefix, sizeof(short_u), 1);
+			    ga_init2(&nbw->bw_suffix, sizeof(short_u), 1);
+
+			    mch_memmove(nbw->bw_word, nword, alen);
+			    nbw->bw_word[alen] = NUL;
+
+			    /* Use the cap type of the prefix. */
+			    alen = ae->ae_add_nw - ae->ae_add_pw;
+			    mch_memmove(nword, ae->ae_add_pw, alen);
+			    nword[alen] = NUL;
+			    flags = captype(nword, nword + STRLEN(nword));
+			    if (flags & BWF_KEEPCAP)
+				nbw->bw_caseword = vim_strsave(nword);
+			    else
+				nbw->bw_caseword = NULL;
+			    nbw->bw_flags &= ~(BWF_ONECAP | BWF_ALLCAP
+							       | BWF_KEEPCAP);
+			    nbw->bw_flags |= flags;
+
+			    /* The addstring is the prefix after the word
+			     * characters, the original word excluding "chop",
+			     * plus any addition. */
+			    STRCPY(nword, ae->ae_add_nw);
+			    p = bw->bw_word;
+			    if (ae->ae_chop != NULL)
+				p += STRLEN(ae->ae_chop);
+			    STRCAT(nword, p);
+			    if (bw->bw_addstring != NULL)
+				STRCAT(nword, bw->bw_addstring);
+			    nbw->bw_addstring = vim_strsave(nword);
+
+			    if (ae->ae_add_pw > ae->ae_add)
+				nbw->bw_leadstring = vim_strnsave(ae->ae_add,
+						  ae->ae_add_pw - ae->ae_add);
+			    else
+				nbw->bw_leadstring = NULL;
+
+			    add_to_wordlist(newwords, nbw);
+
+			    /* Remember this word, we need to set bw_suffix
+			     * and bw_suffix later. */
+			    if (ga_grow(&prefixga, 1) == OK)
+				((basicword_T **)prefixga.ga_data)
+						    [prefixga.ga_len++] = nbw;
+			}
 		    }
 		}
 		else
@@ -2601,11 +2822,10 @@ trans_affixes(dw, bw, oldaff, newwords)
     /*
      * For the words that we added for suffixes with non-word characters: Use
      * the prefix list of the main word.
-     * TODO: do the same for prefixes.
      */
-    for (i = 0; i < fixga.ga_len; ++i)
+    for (i = 0; i < suffixga.ga_len; ++i)
     {
-	nbw = ((basicword_T **)fixga.ga_data)[i];
+	nbw = ((basicword_T **)suffixga.ga_data)[i];
 	if (ga_grow(&nbw->bw_prefix, bw->bw_prefix.ga_len) == OK)
 	{
 	    mch_memmove(nbw->bw_prefix.ga_data, bw->bw_prefix.ga_data,
@@ -2614,7 +2834,23 @@ trans_affixes(dw, bw, oldaff, newwords)
 	}
     }
 
-    ga_clear(&fixga);
+    /*
+     * For the words that we added for prefixes with non-word characters: Use
+     * the suffix list of the main word.
+     */
+    for (i = 0; i < prefixga.ga_len; ++i)
+    {
+	nbw = ((basicword_T **)prefixga.ga_data)[i];
+	if (ga_grow(&nbw->bw_suffix, bw->bw_suffix.ga_len) == OK)
+	{
+	    mch_memmove(nbw->bw_suffix.ga_data, bw->bw_suffix.ga_data,
+				      bw->bw_suffix.ga_len * sizeof(short_u));
+	    nbw->bw_suffix.ga_len = bw->bw_suffix.ga_len;
+	}
+    }
+
+    ga_clear(&suffixga);
+    ga_clear(&prefixga);
 }
 
 /*
@@ -2642,8 +2878,9 @@ build_wordlist(newwords, oldwords, oldaff, regionmask)
     char_u	*p;
     int		clen;
     int		flags;
-    char_u	*cp;
+    char_u	*cp = NULL;
     int		l;
+    char_u	message[MAXLINELEN + MAXWLEN];
 
     todo = oldwords->ht_used;
     for (old_hi = oldwords->ht_array; todo > 0; ++old_hi)
@@ -2654,14 +2891,15 @@ build_wordlist(newwords, oldwords, oldaff, regionmask)
 	    dw = HI2DW(old_hi);
 
 	    /* This takes time, print a message now and then. */
-	    if ((todo & 0x3ff) == 0 || todo == oldwords->ht_used - 1)
+	    if ((todo & 0x3ff) == 0 || todo == (int)oldwords->ht_used - 1)
 	    {
-		if (todo != oldwords->ht_used - 1)
-		{
-		    msg_didout = FALSE;
-		    msg_col = 0;
-		}
-		smsg((char_u *)_("%6d todo - %s"), todo, dw->dw_word);
+		sprintf((char *)message, _("%6d todo - %s"),
+							   todo, dw->dw_word);
+		msg_start();
+		msg_outtrans_attr(message, 0);
+		msg_clr_eos();
+		msg_didout = FALSE;
+		msg_col = 0;
 		out_flush();
 		ui_breakcheck();
 		if (got_int)
@@ -2874,6 +3112,7 @@ expand_affixes(newwords, prefgap, suffgap)
     affentry_T	*pae, *sae;
     garray_T	add_words;
     int		n;
+    char_u	message[MAXLINELEN + MAXWLEN];
 
     ga_init2(&add_words, sizeof(basicword_T *), 10);
 
@@ -2883,6 +3122,23 @@ expand_affixes(newwords, prefgap, suffgap)
 	if (!HASHITEM_EMPTY(hi))
 	{
 	    --todo;
+
+	    /* This takes time, print a message now and then. */
+	    if ((todo & 0x3ff) == 0 || todo == (int)newwords->ht_used - 1)
+	    {
+		sprintf((char *)message, _("%6d todo - %s"),
+						    todo, HI2BW(hi)->bw_word);
+		msg_start();
+		msg_outtrans_attr(message, 0);
+		msg_clr_eos();
+		msg_didout = FALSE;
+		msg_col = 0;
+		out_flush();
+		ui_breakcheck();
+		if (got_int)
+		    break;
+	    }
+
 	    for (bw = HI2BW(hi); bw != NULL; bw = bw->bw_next)
 	    {
 		/*
@@ -3318,7 +3574,7 @@ write_vim_spell(fname, prefga, suffga, newwords, regcount, regchars)
     char_u	**wtab;
     int		todo;
     int		flags, aflags;
-    basicword_T	*bw, *bwf, *bw2, *prevbw = NULL;
+    basicword_T	*bw, *bwf, *bw2 = NULL, *prevbw = NULL;
     int		regionmask;	/* mask for all relevant region bits */
     int		i;
     int		cnt;
@@ -3397,7 +3653,7 @@ write_vim_spell(fname, prefga, suffga, newwords, regcount, regchars)
 
 	/* Now write each basic word to the spell file. */
 	ga_init2(&bwga, sizeof(basicword_T *), 10);
-	for (todo = 0; todo < newwords->ht_used; ++todo)
+	for (todo = 0; (long_u)todo < newwords->ht_used; ++todo)
 	{
 	    bwf = KEY2BW(wtab[todo]);
 
@@ -3661,9 +3917,17 @@ ex_mkspell(eap)
     struct stat	st;
     int		round;
     vimconv_T	conv;
+    int		ascii = FALSE;
+    char_u	*arg = eap->arg;
 
-    /* Expand all the arguments (e.g., $VIMRUNTIME). */
-    if (get_arglist_exp(eap->arg, &fcount, &fnames) == FAIL)
+    if (STRNCMP(arg, "-ascii", 6) == 0)
+    {
+	ascii = TRUE;
+	arg = skipwhite(arg + 6);
+    }
+
+    /* Expand all the remaining arguments (e.g., $VIMRUNTIME). */
+    if (get_arglist_exp(arg, &fcount, &fnames) == FAIL)
 	return;
     if (fcount < 2)
 	EMSG(_(e_invarg));	/* need at least output and input names */
@@ -3673,7 +3937,8 @@ ex_mkspell(eap)
     {
 	/* Check for overwriting before doing things that may take a lot of
 	 * time. */
-	sprintf((char *)wfname, "%s.%s.spl", fnames[0], p_enc);
+	sprintf((char *)wfname, "%s.%s.spl", fnames[0],
+					   ascii ? (char_u *)"ascii" : p_enc);
 	if (!eap->forceit && mch_stat((char *)wfname, &st) >= 0)
 	{
 	    EMSG(_(e_exists));
@@ -3719,12 +3984,12 @@ ex_mkspell(eap)
 	    /* Read the .aff file.  Will init "conv" based on the "SET" line. */
 	    conv.vc_type = CONV_NONE;
 	    sprintf((char *)fname, "%s.aff", fnames[i]);
-	    if ((afile[i - 1] = spell_read_aff(fname, &conv)) == NULL)
+	    if ((afile[i - 1] = spell_read_aff(fname, &conv, ascii)) == NULL)
 		break;
 
 	    /* Read the .dic file. */
 	    sprintf((char *)fname, "%s.dic", fnames[i]);
-	    if (spell_read_dic(&dfile[i - 1], fname, &conv) == FAIL)
+	    if (spell_read_dic(&dfile[i - 1], fname, &conv, ascii) == FAIL)
 		break;
 
 	    /* Free any conversion stuff. */
@@ -3755,7 +4020,8 @@ ex_mkspell(eap)
 		ga_init2(gap, sizeof(affheader_T), 50);
 		for (i = 1; i < fcount; ++i)
 		    get_new_aff(round == 1 ? &afile[i - 1]->af_pref
-					   : &afile[i - 1]->af_suff, gap);
+					   : &afile[i - 1]->af_suff,
+					   gap, round == 1);
 	    }
 
 	    /*
@@ -3863,7 +4129,7 @@ free_basicword(bw)
 }
 
 /*
- * Free a list of affentry_T.
+ * Free a list of affentry_T and what they contain.
  */
     static void
 free_affixentries(first)
@@ -3874,909 +4140,24 @@ free_affixentries(first)
     for (ap = first; ap != NULL; ap = an)
     {
 	an = ap->ae_next;
-	vim_free(ap->ae_chop);
-	vim_free(ap->ae_add);
-	vim_free(ap->ae_cond);
-	vim_free(ap->ae_prog);
-	vim_free(ap);
+	free_affix_entry(ap);
     }
 }
 
-#endif  /* FEAT_MBYTE */
-
-#endif  /* FEAT_SYN_HL */
-
-#if 0  /* old spell code with words in .spl file */
-/*
- * Structure that is used to store the text from the language file.  This
- * avoids the need to allocate space for each individual word.  It's allocated
- * in big chunks for speed.
- */
-#define  SBLOCKSIZE 4096	/* default size of sb_data */
-typedef struct sblock_S sblock_T;
-struct sblock_S
-{
-    sblock_T	*sb_next;	/* next block in list */
-    char_u	sb_data[1];	/* data, actually longer */
-};
-
-/* Structure to store words and additions.  Used twice : once for case-folded
- * and once for keep-case words. */
-typedef struct winfo_S
-{
-    hashtab_T	wi_ht;		/* hashtable with all words, both dword_T and
-				   nword_T (check flags for DW_NWORD) */
-    garray_T	wi_add;		/* table with pointers to additions in a
-				   dword_T */
-    int		wi_addlen;	/* longest addition length */
-} winfo_T;
-
 /*
- * Structure used to store words and other info for one language.
- */
-typedef struct slang_S slang_T;
-struct slang_S
-{
-    slang_T	*sl_next;	/* next language */
-    char_u	sl_name[2];	/* language name "en", "nl", etc. */
-    winfo_T	sl_fwords;	/* case-folded words and additions */
-    winfo_T	sl_kwords;	/* keep-case words and additions */
-    char_u	sl_regions[17];	/* table with up to 8 region names plus NUL */
-    sblock_T	*sl_block;	/* list with allocated memory blocks */
-};
-
-static slang_T *first_lang = NULL;
-
-/* Entry for dword in "sl_ht".  Also used for part of an nword, starting with
- * the first non-word character.  And used for additions in wi_add. */
-typedef struct dword_S
-{
-    char_u	dw_region;	/* one bit per region where it's valid */
-    char_u	dw_flags;	/* DW_ flags */
-    char_u	dw_word[1];	/* actually longer, NUL terminated */
-} dword_T;
-
-#define REGION_ALL 0xff
-
-#define HI2DWORD(hi) (dword_T *)(hi->hi_key - 2)
-
-/* Entry for a nword in "sl_ht".  Note that the last three items must be
- * identical to dword_T, so that they can be in the same hashtable. */
-typedef struct nword_S
-{
-    garray_T	nw_ga;		/* table with pointers to dword_T for part
-				   starting with non-word character */
-    int		nw_maxlen;	/* longest nword length (after the dword) */
-    char_u	nw_region;	/* one bit per region where it's valid */
-    char_u	nw_flags;	/* DW_ flags */
-    char_u	nw_word[1];	/* actually longer, NUL terminated */
-} nword_T;
-
-/* Get nword_T pointer from hashitem that uses nw_word */
-static nword_T dumnw;
-#define HI2NWORD(hi)	((nword_T *)((hi)->hi_key - (dumnw.nw_word - (char_u *)&dumnw)))
-
-#define DW_CAP	    0x01	/* word must start with capital */
-#define DW_RARE	    0x02	/* rare word */
-#define DW_NWORD    0x04	/* this is an nword_T */
-#define DW_DWORD    0x08	/* (also) use as dword without nword */
-
-/*
- * Structure used in "b_langp", filled from 'spelllang'.
- */
-typedef struct langp_S
-{
-    slang_T	*lp_slang;	/* info for this language (NULL for last one) */
-    int		lp_region;	/* bitmask for region or REGION_ALL */
-} langp_T;
-
-#define LANGP_ENTRY(ga, i)	(((langp_T *)(ga).ga_data) + (i))
-#define DWORD_ENTRY(gap, i)	*(((dword_T **)(gap)->ga_data) + i)
-
-#define SP_OK		0
-#define SP_BAD		1
-#define SP_RARE		2
-#define SP_LOCAL	3
-
-static char *e_invchar2 = N_("E753: Invalid character in \"%s\"");
-
-static slang_T *spell_load_lang __ARGS((char_u *lang));
-static void spell_load_file __ARGS((char_u *fname));
-static int find_region __ARGS((char_u *rp, char_u *region));
-
-/*
- * Main spell-checking function.
- * "ptr" points to the start of a word.
- * "*attrp" is set to the attributes for a badly spelled word.  For a non-word
- * or when it's OK it remains unchanged.
- * This must only be called when 'spelllang' is not empty.
- * Returns the length of the word in bytes, also when it's OK, so that the
- * caller can skip over the word.
- */
-    int
-spell_check(wp, ptr, attrp)
-    win_T	*wp;		/* current window */
-    char_u	*ptr;
-    int		*attrp;
-{
-    char_u	*e;		/* end of word */
-    char_u	*ne;		/* new end of word */
-    char_u	*me;		/* max. end of match */
-    langp_T	*lp;
-    int		result;
-    int		len = 0;
-    hashitem_T	*hi;
-    int		round;
-    char_u	kword[MAXWLEN + 1];	/* word copy */
-    char_u	fword[MAXWLEN + 1];	/* word with case folded */
-    char_u	match[MAXWLEN + 1];	/* fword with additional chars */
-    char_u	kwordclen[MAXWLEN + 1];	/* len of orig chars after kword[] */
-    char_u	fwordclen[MAXWLEN + 1]; /* len of chars after fword[] */
-    char_u	*clen;
-    int		cidx = 0;		/* char index in xwordclen[] */
-    hash_T	fhash;			/* hash for fword */
-    hash_T	khash;			/* hash for kword */
-    int		match_len = 0;		/* length of match[] */
-    int		fmatch_len = 0;		/* length of nword match in chars */
-    garray_T	*gap;
-    int		l, t;
-    char_u	*p, *tp;
-    int		n;
-    dword_T	*dw;
-    dword_T	*tdw;
-    winfo_T	*wi;
-    nword_T	*nw;
-    int		w_isupper;
-
-    /* Find the end of the word.  We already know that *ptr is a word char. */
-    e = ptr;
-    do
-    {
-	mb_ptr_adv(e);
-	++len;
-    } while (*e != NUL && spell_iswordc(e));
-
-    /* A word starting with a number is always OK. */
-    if (*ptr >= '0' && *ptr <= '9')
-	return (int)(e - ptr);
-
-#ifdef FEAT_MBYTE
-    w_isupper = MB_ISUPPER(mb_ptr2char(ptr));
-#else
-    w_isupper = MB_ISUPPER(*ptr);
-#endif
-
-    /* Make a copy of the word so that it can be NUL terminated.
-     * Compute hash value. */
-    mch_memmove(kword, ptr, e - ptr);
-    kword[e - ptr] = NUL;
-    khash = hash_hash(kword);
-
-    /* Make case-folded copy of the Word.  Compute its hash value. */
-    (void)str_foldcase(ptr, e - ptr, fword, MAXWLEN + 1);
-    fhash = hash_hash(fword);
-
-    /* Further case-folded characters to check for an nword match go in
-     * match[]. */
-    me = e;
-
-    /* "ne" is the end for the longest match */
-    ne = e;
-
-    /* The word is bad unless we find it in the dictionary. */
-    result = SP_BAD;
-
-    /*
-     * Loop over the languages specified in 'spelllang'.
-     * We check them all, because a matching nword may be longer than an
-     * already found dword or nword.
-     */
-    for (lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0); lp->lp_slang != NULL; ++lp)
-    {
-	/*
-	 * Check for a matching word in the hashtable.
-	 * Check both the keep-case word and the fold-case word.
-	 */
-	for (round = 0; round <= 1; ++round)
-	{
-	    if (round == 0)
-	    {
-		wi = &lp->lp_slang->sl_kwords;
-		hi = hash_lookup(&wi->wi_ht, kword, khash);
-	    }
-	    else
-	    {
-		wi = &lp->lp_slang->sl_fwords;
-		hi = hash_lookup(&wi->wi_ht, fword, fhash);
-	    }
-	    if (!HASHITEM_EMPTY(hi))
-	    {
-		/*
-		 * If this is an nword entry, check for match with remainder.
-		 */
-		dw = HI2DWORD(hi);
-		if (dw->dw_flags & DW_NWORD)
-		{
-		    /* If the word is not defined as a dword we must find an
-		     * nword. */
-		    if ((dw->dw_flags & DW_DWORD) == 0)
-			dw = NULL;
-
-		    /* Fold more characters when needed for the nword.  Need
-		     * to do one extra to check for a non-word character after
-		     * the nword.  Also keep the byte-size of each character,
-		     * both before and after folding case. */
-		    nw = HI2NWORD(hi);
-		    while ((round == 0
-				? me - e <= nw->nw_maxlen
-				: match_len <= nw->nw_maxlen)
-			    && *me != NUL)
-		    {
-#ifdef FEAT_MBYTE
-			l = mb_ptr2len_check(me);
-#else
-			l = 1;
-#endif
-			(void)str_foldcase(me, l, match + match_len,
-						     MAXWLEN - match_len + 1);
-			me += l;
-			kwordclen[cidx] = l;
-			fwordclen[cidx] = STRLEN(match + match_len);
-			match_len += fwordclen[cidx];
-			++cidx;
-		    }
-
-		    if (round == 0)
-		    {
-			clen = kwordclen;
-			tp = e;
-		    }
-		    else
-		    {
-			clen = fwordclen;
-			tp = match;
-		    }
-
-		    /* Match with each item.  The longest match wins:
-		     * "you've" is longer than "you". */
-		    gap = &nw->nw_ga;
-		    for (t = 0; t < gap->ga_len; ++t)
-		    {
-			/* Skip entries with wrong case for first char.
-			 * Continue if it's a rare word without a captial. */
-			tdw = DWORD_ENTRY(gap, t);
-			if ((tdw->dw_flags & (DW_CAP | DW_RARE)) == DW_CAP
-								&& !w_isupper)
-			    continue;
-
-			p = tdw->dw_word;
-			l = 0;
-			for (n = 0; p[n] != 0; n += clen[l++])
-			    if (vim_memcmp(p + n, tp + n, clen[l]) != 0)
-				break;
-
-			/* Use a match if it's longer than previous matches
-			 * and the next character is not a word character. */
-			if (p[n] == 0 && l > fmatch_len && (tp[n] == 0
-						   || !spell_iswordc(tp + n)))
-			{
-			    dw = tdw;
-			    fmatch_len = l;
-			    if (round == 0)
-				ne = tp + n;
-			    else
-			    {
-				/* Need to use the length of the original
-				 * chars, not the fold-case ones. */
-				ne = e;
-				for (l = 0; l < fmatch_len; ++l)
-				    ne += kwordclen[l];
-			    }
-			    if ((lp->lp_region & tdw->dw_region) == 0)
-				result = SP_LOCAL;
-			    else if ((tdw->dw_flags & DW_CAP) && !w_isupper)
-				result = SP_RARE;
-			    else
-				result = SP_OK;
-			}
-		    }
-
-		}
-
-		if (dw != NULL)
-		{
-		    if (dw->dw_flags & DW_CAP)
-		    {
-			/* Need to check first letter is uppercase.  If it is,
-			 * check region.  If it isn't it may be a rare word.
-			 * */
-			if (w_isupper)
-			{
-			    if ((dw->dw_region & lp->lp_region) == 0)
-				result = SP_LOCAL;
-			    else
-				result = SP_OK;
-			}
-			else if (dw->dw_flags & DW_RARE)
-			    result = SP_RARE;
-		    }
-		    else
-		    {
-			if ((dw->dw_region & lp->lp_region) == 0)
-			    result = SP_LOCAL;
-			else if (dw->dw_flags & DW_RARE)
-			    result = SP_RARE;
-			else
-			    result = SP_OK;
-		    }
-		}
-	    }
-	}
-
-	/*
-	 * Check for an addition.
-	 * Only after a dword, not after an nword.
-	 * Check both the keep-case word and the fold-case word.
-	 */
-	if (fmatch_len == 0)
-	    for (round = 0; round <= 1; ++round)
-	    {
-		if (round == 0)
-		    wi = &lp->lp_slang->sl_kwords;
-		else
-		    wi = &lp->lp_slang->sl_fwords;
-		gap = &wi->wi_add;
-		if (gap->ga_len == 0)   /* no additions, skip quickly */
-		    continue;
-
-		/* Fold characters when needed for the addition.  Need to do one
-		 * extra to check for a word character after the addition. */
-		while ((round == 0
-			    ? me - e <= wi->wi_addlen
-			    : match_len <= wi->wi_addlen)
-			&& *me != NUL)
-		{
-#ifdef FEAT_MBYTE
-		    l = mb_ptr2len_check(me);
-#else
-		    l = 1;
-#endif
-		    (void)str_foldcase(me, l, match + match_len,
-							 MAXWLEN - match_len + 1);
-		    me += l;
-		    kwordclen[cidx] = l;
-		    fwordclen[cidx] = STRLEN(match + match_len);
-		    match_len += fwordclen[cidx];
-		    ++cidx;
-		}
-
-		if (round == 0)
-		{
-		    clen = kwordclen;
-		    tp = e;
-		}
-		else
-		{
-		    clen = fwordclen;
-		    tp = match;
-		}
-
-		/* Addition lookup.  Uses a linear search, there should be
-		 * very few.  If there is a match adjust "ne" to the end.
-		 * This doesn't change whether a word was good or bad, only
-		 * the length. */
-		for (t = 0; t < gap->ga_len; ++t)
-		{
-		    tdw = DWORD_ENTRY(gap, t);
-		    p = tdw->dw_word;
-		    l = 0;
-		    for (n = 0; p[n] != 0; n += clen[l++])
-			if (vim_memcmp(p + n, tp + n, clen[l]) != 0)
-			    break;
-
-		    /* Use a match if it's longer than previous matches
-		     * and the next character is not a word character. */
-		    if (p[n] == 0 && l > fmatch_len
-				    && (tp[n] == 0 || !spell_iswordc(tp + n)))
-		    {
-			fmatch_len = l;
-			if (round == 0)
-			    ne = tp + n;
-			else
-			{
-			    /* Need to use the length of the original
-			     * chars, not the fold-case ones. */
-			    ne = e;
-			    for (l = 0; l < fmatch_len; ++l)
-				ne += kwordclen[l];
-			}
-		    }
-		}
-	    }
-    }
-
-    if (result != SP_OK)
-    {
-	if (result == SP_BAD)
-	    *attrp = highlight_attr[HLF_SPB];
-	else if (result == SP_RARE)
-	    *attrp = highlight_attr[HLF_SPR];
-	else
-	    *attrp = highlight_attr[HLF_SPL];
-    }
-
-    return (int)(ne - ptr);
-}
-
-static slang_T	    *load_lp;	/* passed from spell_load_
author	Bram Moolenaar <Bram@vim.org>	2005-04-17 20:18:43 +0000
committer	Bram Moolenaar <Bram@vim.org>	2005-04-17 20:18:43 +0000
commit	5482f33f78d469b2ace7a5aa9af90d022915f3ec (patch)
tree	c31dbec0e219d0b40ef5d455f0c3a0497ef2b645
parent	075757c932e00bad7f80325f2f944e4e1ed4a223 (diff)