updated for version 7.0091

author: Bram Moolenaar <Bram@vim.org> 2005-06-23 22:29:21 +0000
committer: Bram Moolenaar <Bram@vim.org> 2005-06-23 22:29:21 +0000
commit: f417f2b6af7926edf70c6a95f6972a2dec63bdf0 (patch)
tree: 5e7f4c1a83b4fc7f4fa29a9a54edac19c41b6795
parent: f81b0fe01158b415575e67ec4e92a8f5daae9526 (diff)
2 files changed, 512 insertions, 206 deletions
diff --git a/src/ex_docmd.c b/src/ex_docmd.c
index 1c1ecaa33c..fa94424f62 100644
--- a/src/ex_docmd.c
+++ b/src/ex_docmd.c
@@ -227,6 +227,7 @@ static void	ex_popup __ARGS((exarg_T *eap));
 # define ex_syntax		ex_ni
 # define ex_spell		ex_ni
 # define ex_mkspell		ex_ni
+# define ex_spelldump		ex_ni
 #endif
 #ifndef FEAT_MZSCHEME
 # define ex_mzscheme		ex_script_ni
diff --git a/src/spell.c b/src/spell.c
index 6251b93538..4ff413a714 100644
--- a/src/spell.c
+++ b/src/spell.c
@@ -379,7 +379,7 @@ typedef struct suggest_S
 /* Number of suggestions kept when cleaning up.  When rescore_suggestions() is
  * called the score may change, thus we need to keep more than what is
  * displayed. */
-#define SUG_CLEAN_COUNT(su)    ((su)->su_maxcount < 25 ? 25 : (su)->su_maxcount)
+#define SUG_CLEAN_COUNT(su)    ((su)->su_maxcount < 50 ? 50 : (su)->su_maxcount)
 
 /* Threshold for sorting and cleaning up suggestions.  Don't want to keep lots
  * of suggestions that are not going to be displayed. */
@@ -530,9 +530,11 @@ static slang_T *slang_alloc __ARGS((char_u *lang));
 static void slang_free __ARGS((slang_T *lp));
 static void slang_clear __ARGS((slang_T *lp));
 static void find_word __ARGS((matchinf_T *mip, int mode));
+static int valid_word_prefix __ARGS((int totprefcnt, int arridx, int prefid, char_u *word, slang_T *slang));
 static void find_prefix __ARGS((matchinf_T *mip));
 static int fold_more __ARGS((matchinf_T *mip));
 static int spell_valid_case __ARGS((int origflags, int treeflags));
+static int no_spell_checking __ARGS((void));
 static void spell_load_lang __ARGS((char_u *lang));
 static char_u *spell_enc __ARGS((void));
 static void spell_load_cb __ARGS((char_u *fname, void *cookie));
@@ -555,20 +557,22 @@ static int try_deeper __ARGS((suginfo_T *su, trystate_T *stack, int depth, int s
 static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kword));
 static void score_comp_sal __ARGS((suginfo_T *su));
 static void score_combine __ARGS((suginfo_T *su));
+static int stp_sal_score __ARGS((suggest_T *stp, suginfo_T *su, slang_T *slang, char_u *badsound));
 static void suggest_try_soundalike __ARGS((suginfo_T *su));
 static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags));
 static void set_map_str __ARGS((slang_T *lp, char_u *map));
 static int similar_chars __ARGS((slang_T *slang, int c1, int c2));
-static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int use_score, int had_bonus));
+static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus));
 static void add_banned __ARGS((suginfo_T *su, char_u *word));
 static int was_banned __ARGS((suginfo_T *su, char_u *word));
 static void free_banned __ARGS((suginfo_T *su));
 static void rescore_suggestions __ARGS((suginfo_T *su));
 static int cleanup_suggestions __ARGS((garray_T *gap, int maxscore, int keep));
 static void spell_soundfold __ARGS((slang_T *slang, char_u *inword, char_u *res));
-static int spell_sound_score __ARGS((slang_T *slang, char_u *goodword, char_u	*badsound));
 static int soundalike_score __ARGS((char_u *goodsound, char_u *badsound));
 static int spell_edit_score __ARGS((char_u *badword, char_u *goodword));
+static void dump_word __ARGS((char_u *word, int round, int flags, linenr_T lnum));
+static linenr_T apply_prefixes __ARGS((slang_T *slang, char_u *word, int round, int flags, linenr_T startlnum));
 
 /*
  * Use our own character-case definitions, because the current locale may
@@ -770,15 +774,10 @@ find_word(mip, mode)
     char_u	*p;
 #endif
     int		res = SP_BAD;
-    int		valid;
     slang_T	*slang = mip->mi_lp->lp_slang;
     unsigned	flags;
     char_u	*byts;
     idx_T	*idxs;
-    int		prefcnt;
-    int		pidx;
-    regmatch_T	regmatch;
-    regprog_T	*rp;
     int		prefid;
 
     if (mode == FIND_KEEPWORD)
@@ -964,35 +963,9 @@ find_word(mip, mode)
 	    {
 		/* The prefix ID is stored two bytes above the flags. */
 		prefid = (unsigned)flags >> 16;
-
-		valid = FALSE;
-		for (prefcnt = mip->mi_prefcnt - 1; prefcnt >= 0; --prefcnt)
-		{
-		    pidx = slang->sl_pidxs[mip->mi_prefarridx + prefcnt];
-
-		    /* Check the prefix ID. */
-		    if (prefid != (pidx & 0xff))
-			continue;
-
-		    /* Check the condition, if there is one.  The
-		     * condition index is stored above the prefix ID byte.
-		     */
-		    rp = slang->sl_prefprog[(unsigned)pidx >> 8];
-		    if (rp != NULL)
-		    {
-			regmatch.regprog = rp;
-			regmatch.rm_ic = FALSE;
-			if (!vim_regexec(&regmatch,
-					mip->mi_fword + mip->mi_prefixlen, 0))
-			    continue;
-		    }
-
-		    /* It's a match, use it. */
-		    valid = TRUE;
-		    break;
-		}
-
-		if (!valid)
+		if (!valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx,
+				   prefid, mip->mi_fword + mip->mi_prefixlen,
+								       slang))
 		    continue;
 	    }
 
@@ -1017,8 +990,7 @@ find_word(mip, mode)
 		mip->mi_result = res;
 		mip->mi_end = mip->mi_word + wlen;
 	    }
-	    else if (mip->mi_result == res
-				 && mip->mi_end < mip->mi_word + wlen)
+	    else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen)
 		mip->mi_end = mip->mi_word + wlen;
 
 	    if (res == SP_OK)
@@ -1031,6 +1003,48 @@ find_word(mip, mode)
 }
 
 /*
+ * Return TRUE if the prefix indicated by "mip->mi_prefarridx" matches with
+ * the prefix ID "prefid" for the word "word".
+ */
+    static int
+valid_word_prefix(totprefcnt, arridx, prefid, word, slang)
+    int		totprefcnt;	/* nr of prefix IDs */
+    int		arridx;		/* idx in sl_pidxs[] */
+    int		prefid;
+    char_u	*word;
+    slang_T	*slang;
+{
+    int		prefcnt;
+    int		pidx;
+    regprog_T	*rp;
+    regmatch_T	regmatch;
+
+    for (prefcnt = totprefcnt - 1; prefcnt >= 0; --prefcnt)
+    {
+	pidx = slang->sl_pidxs[arridx + prefcnt];
+
+	/* Check the prefix ID. */
+	if (prefid != (pidx & 0xff))
+	    continue;
+
+	/* Check the condition, if there is one.  The condition index is
+	 * stored above the prefix ID byte.  */
+	rp = slang->sl_prefprog[(unsigned)pidx >> 8];
+	if (rp != NULL)
+	{
+	    regmatch.regprog = rp;
+	    regmatch.rm_ic = FALSE;
+	    if (!vim_regexec(&regmatch, word, 0))
+		continue;
+	}
+
+	/* It's a match! */
+	return TRUE;
+    }
+    return FALSE;
+}
+
+/*
  * Check if the word at "mip->mi_word" has a matching prefix.
  * If it does, then check the following word.
  *
@@ -1178,6 +1192,19 @@ spell_valid_case(origflags, treeflags)
 		&& ((treeflags & WF_ONECAP) == 0 || origflags == WF_ONECAP)));
 }
 
+/*
+ * Return TRUE if spell checking is not enabled.
+ */
+    static int
+no_spell_checking()
+{
+    if (!curwin->w_p_spell || *curbuf->b_p_spl == NUL)
+    {
+	EMSG(_("E756: Spell checking is not enabled"));
+	return TRUE;
+    }
+    return FALSE;
+}
 
 /*
  * Move to next spell error.
@@ -1204,11 +1231,8 @@ spell_move_to(dir, allwords, curline)
     int		buflen = 0;
     int		skip = 0;
 
-    if (!curwin->w_p_spell || *curbuf->b_p_spl == NUL)
-    {
-	EMSG(_("E756: Spell checking not enabled"));
+    if (no_spell_checking())
 	return FAIL;
-    }
 
     /*
      * Start looking for bad word at the start of the line, because we can't
@@ -1679,8 +1703,10 @@ formerr:
 	i = set_spell_charflags(p, cnt, fol);
 	vim_free(p);
 	vim_free(fol);
+#if 0	/* tolerate the differences */
 	if (i == FAIL)
 	    goto formerr;
+#endif
     }
     else
     {
@@ -2063,69 +2089,67 @@ read_tree(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr)
 
 /*
  * Parse 'spelllang' and set buf->b_langp accordingly.
- * Returns an error message or NULL.
+ * Returns NULL if it's OK, an error message otherwise.
  */
     char_u *
 did_set_spelllang(buf)
     buf_T	*buf;
 {
     garray_T	ga;
-    char_u	*lang;
-    char_u	*e;
+    char_u	*splp;
     char_u	*region;
     int		region_mask;
     slang_T	*lp;
     int		c;
-    char_u	lbuf[MAXWLEN + 1];
+    char_u	lang[MAXWLEN + 1];
     char_u	spf_name[MAXPATHL];
-    int		did_spf = FALSE;
+    int		load_spf;
+    int		len;
+    char_u	*p;
 
     ga_init2(&ga, sizeof(langp_T), 2);
 
-    /* Get the name of the .spl file associated with 'spellfile'. */
+    /* Make the name of the .spl file associated with 'spellfile'. */
     if (*buf->b_p_spf == NUL)
-	did_spf = TRUE;
+	load_spf = FALSE;
     else
+    {
 	vim_snprintf((char *)spf_name, sizeof(spf_name), "%s.spl",
 								buf->b_p_spf);
+	load_spf = TRUE;
+    }
 
-    /* loop over comma separated languages. */
-    for (lang = buf->b_p_spl; *lang != NUL; lang = e)
+    /* loop over comma separated language names. */
+    for (splp = buf->b_p_spl; *splp != NUL; )
     {
-	e = vim_strchr(lang, ',');
-	if (e == NULL)
-	    e = lang + STRLEN(lang);
+	/* Get one language name. */
+	copy_option_part(&splp, lang, MAXWLEN, ",");
+
+	/* If there is a region name let "region" point to it and remove it
+	 * from the name. */
 	region = NULL;
-	if (e > lang + 2)
+	len = STRLEN(lang);
+	if (len > 3 && lang[len - 3] == '_')
 	{
-	    if (e - lang >= MAXWLEN)
-	    {
-		ga_clear(&ga);
-		return e_invarg;
-	    }
-	    if (lang[2] == '_')
-		region = lang + 3;
+	    region = lang + len - 2;
+	    len -= 3;
+	    lang[len] = NUL;
 	}
 
 	/* Check if we loaded this language before. */
 	for (lp = first_lang; lp != NULL; lp = lp->sl_next)
-	    if (STRNICMP(lp->sl_name, lang, 2) == 0)
+	    if (STRICMP(lp->sl_name, lang) == 0)
 		break;
 
+	/* If not found try loading the language now. */
 	if (lp == NULL)
-	{
-	    /* Not found, load the language. */
-	    vim_strncpy(lbuf, lang, e - lang);
-	    if (region != NULL)
-		mch_memmove(lbuf + 2, lbuf + 5, e - lang - 4);
-	    spell_load_lang(lbuf);
-	}
+	    spell_load_lang(lang);
 
 	/*
-	 * Loop over the languages, there can be several files for each.
+	 * Loop over the languages, there can be several files for "lang".
 	 */
 	for (lp = first_lang; lp != NULL; lp = lp->sl_next)
-	    if (STRNICMP(lp->sl_name, lang, 2) == 0)
+	    if (STRICMP(lp->sl_name, lang) == 0)
 	    {
 		region_mask = REGION_ALL;
 		if (region != NULL)
@@ -2135,13 +2159,9 @@ did_set_spelllang(buf)
 		    if (c == REGION_ALL)
 		    {
 			if (!lp->sl_add)
-			{
-			    c = *e;
-			    *e = NUL;
-			    smsg((char_u *)_("Warning: region %s not supported"),
-									lang);
-			    *e = c;
-			}
+			    smsg((char_u *)
+				    _("Warning: region %s not supported"),
+								      region);
 		    }
 		    else
 			region_mask = 1 << c;
@@ -2156,28 +2176,32 @@ did_set_spelllang(buf)
 		LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
 		++ga.ga_len;
 
-		/* Check if this is the 'spellfile' spell file. */
-		if (fullpathcmp(spf_name, lp->sl_fname, FALSE) == FPC_SAME)
-		    did_spf = TRUE;
+		/* Check if this is the spell file related to 'spellfile'. */
+		if (load_spf && fullpathcmp(spf_name, lp->sl_fname, FALSE)
+								  == FPC_SAME)
+		    load_spf = FALSE;
 	    }
-
-	if (*e == ',')
-	    ++e;
     }
 
     /*
      * Make sure the 'spellfile' file is loaded.  It may be in 'runtimepath',
      * then it's probably loaded above already.  Otherwise load it here.
      */
-    if (!did_spf)
+    if (load_spf)
     {
+	/* Check if it was loaded already. */
 	for (lp = first_lang; lp != NULL; lp = lp->sl_next)
 	    if (fullpathcmp(spf_name, lp->sl_fname, FALSE) == FPC_SAME)
 		break;
 	if (lp == NULL)
 	{
-	    vim_strncpy(lbuf, gettail(spf_name), 2);
-	    lp = spell_load_file(spf_name, lbuf, NULL, TRUE);
+	    /* Not loaded, try loading it now.  The language name includes the
+	     * region name, the region is ignored otherwise. */
+	    vim_strncpy(lang, gettail(buf->b_p_spf), MAXWLEN);
+	    p = vim_strchr(lang, '.');
+	    if (p != NULL)
+		*p = NUL;	/* truncate at ".encoding.add" */
+	    lp = spell_load_file(spf_name, lang, NULL, TRUE);
 	}
 	if (lp != NULL && ga_grow(&ga, 1) == OK)
 	{
@@ -2457,6 +2481,7 @@ typedef struct spellinfo_S
     sblock_T	*si_blocks;	/* memory blocks used */
     int		si_ascii;	/* handling only ASCII words */
     int		si_add;		/* addition file */
+    int		si_clear_chartab;   /* when TRUE clear char tables */
     int		si_region;	/* region mask */
     vimconv_T	si_conv;	/* for conversion to 'encoding' */
     int		si_memtot;	/* runtime memory used */
@@ -2909,6 +2934,14 @@ spell_read_aff(fname, spin)
 
     if (fol != NULL || low != NULL || upp != NULL)
     {
+	if (spin->si_clear_chartab)
+	{
+	    /* Clear the char type tables, don't want to use any of the
+	     * currently used spell properties. */
+	    init_spell_chartab();
+	    spin->si_clear_chartab = FALSE;
+	}
+
 	/*
 	 * Don't write a word table for an ASCII file, so that we don't check
 	 * for conflicts with a word table that matches 'encoding'.
@@ -3107,6 +3140,8 @@ spell_read_dic(fname, spin, affile)
     {
 	line_breakcheck();
 	++lnum;
+	if (line[0] == '#')
+	    continue;	/* comment line */
 
 	/* Remove CR, LF and white space from the end.  White space halfway
 	 * the word is kept to allow e.g., "et al.". */
@@ -4395,6 +4430,8 @@ mkspell(fcount, fnames, ascii, overwrite, added_word)
 
     if (incount <= 0)
 	EMSG(_(e_invarg));	/* need at least output and input names */
+    else if (vim_strchr(gettail(wfname), '_') != NULL)
+	EMSG(_("E751: Output file name must not have region name"));
     else if (incount > 8)
 	EMSG(_("E754: Only up to 8 regions supported"));
     else
@@ -4436,11 +4473,6 @@ mkspell(fcount, fnames, ascii, overwrite, added_word)
 	}
 	spin.si_region_count = incount;
 
-	if (!spin.si_add)
-	    /* Clear the char type tables, don't want to use any of the
-	     * currently used spell properties. */
-	    init_spell_chartab();
-
 	spin.si_foldroot = wordtree_alloc(&spin.si_blocks);
 	spin.si_keeproot = wordtree_alloc(&spin.si_blocks);
 	spin.si_prefroot = wordtree_alloc(&spin.si_blocks);
@@ -4452,6 +4484,14 @@ mkspell(fcount, fnames, ascii, overwrite, added_word)
 	    return;
 	}
 
+	/* When not producing a .add.spl file clear the character table when
+	 * we encounter one in the .aff file.  This means we dump the current
+	 * one in the .spl file if the .aff file doesn't define one.  That's
+	 * better than guessing the contents, the table will match a
+	 * previously loaded spell file. */
+	if (!spin.si_add)
+	    spin.si_clear_chartab = TRUE;
+
 	/*
 	 * Read all the .aff and .dic files.
 	 * Text is converted to 'encoding'.
@@ -4591,9 +4631,16 @@ spell_add_word(word, len, bad)
 {
     FILE	*fd;
     buf_T	*buf;
+    int		new_spf = FALSE;
+    struct stat	st;
 
+    /* If 'spellfile' isn't set figure out a good default value. */
     if (*curbuf->b_p_spf == NUL)
+    {
 	init_spellfile();
+	new_spf = TRUE;
+    }
+
     if (*curbuf->b_p_spf == NUL)
 	EMSG(_("E764: 'spellfile' is not set"));
     else
@@ -4607,6 +4654,23 @@ spell_add_word(word, len, bad)
 	else
 	{
 	    fd = mch_fopen((char *)curbuf->b_p_spf, "a");
+	    if (fd == NULL && new_spf)
+	    {
+		/* We just initialized the 'spellfile' option and can't open
+		 * the file.  We may need to create the "spell" directory
+		 * first.  We already checked the runtime directory is
+		 * writable in init_spellfile(). */
+		STRCPY(NameBuff, curbuf->b_p_spf);
+		*gettail_sep(NameBuff) = NUL;
+		if (mch_stat((char *)NameBuff, &st) < 0)
+		{
+		    /* The directory doesn't exist.  Try creating it and
+		     * opening the file again. */
+		    vim_mkdir(NameBuff, 0755);
+		    fd = mch_fopen((char *)curbuf->b_p_spf, "a");
+		}
+	    }
+
 	    if (fd == NULL)
 		EMSG2(_(e_notopen), curbuf->b_p_spf);
 	    else
@@ -4640,10 +4704,17 @@ init_spellfile()
     int		l;
     slang_T	*sl;
     char_u	*rtp;
+    char_u	*lend;
 
     if (*curbuf->b_p_spl != NUL && curbuf->b_langp.ga_len > 0)
     {
-	/* Loop over all entries in 'runtimepath'. */
+	/* Find the end of the language name.  Exclude the region. */
+	for (lend = curbuf->b_p_spl; *lend != NUL
+			&& vim_strchr((char_u *)",._", *lend) == NULL; ++lend)
+	    ;
+
+	/* Loop over all entries in 'runtimepath'.  Use the first one where we
+	 * are allowed to write. */
 	rtp = p_rtp;
 	while (*rtp != NUL)
 	{
@@ -4657,7 +4728,7 @@ init_spellfile()
 		l = STRLEN(buf);
 		vim_snprintf((char *)buf + l, MAXPATHL - l,
 			"/spell/%.*s.%s.add",
-			2, curbuf->b_p_spl,
+			(int)(lend - curbuf->b_p_spl), curbuf->b_p_spl,
 			strstr((char *)gettail(sl->sl_fname), ".ascii.") != NULL
 					   ? (char_u *)"ascii" : spell_enc());
 		set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL);
@@ -5113,7 +5184,7 @@ spell_suggest()
 	    if (p_verbose > 0)
 	    {
 		/* Add the score. */
-		if (sps_flags & SPS_DOUBLE)
+		if (sps_flags & (SPS_DOUBLE | SPS_BEST))
 		    vim_snprintf((char *)IObuff, IOSIZE, _(" (%s%d - %d)"),
 			stp->st_salscore ? "s " : "",
 			stp->st_score, stp->st_altscore);
@@ -5421,7 +5492,7 @@ suggest_try_special(su)
 	su->su_fbadword[len] = NUL;
 	make_case_word(su->su_fbadword, word, su->su_badflags);
 	su->su_fbadword[len] = c;
-	add_suggestion(su, &su->su_ga, word, su->su_badlen, SCORE_DEL, TRUE);
+	add_suggestion(su, &su->su_ga, word, su->su_badlen, SCORE_DEL, 0, TRUE);
     }
 }
 
@@ -5584,7 +5655,7 @@ suggest_try_change(su)
 		    /* The badword also ends: add suggestions, */
 		    add_suggestion(su, &su->su_ga, preword,
 			    sp->ts_fidx - repextra,
-					      sp->ts_score + newscore, FALSE);
+					   sp->ts_score + newscore, 0, FALSE);
 		}
 		else if (sp->ts_fidx >= sp->ts_fidxtry
 #ifdef FEAT_MBYTE
@@ -6386,8 +6457,6 @@ score_comp_sal(su)
     int		i;
     suggest_T   *stp;
     suggest_T   *sstp;
-    char_u	fword[MAXWLEN];
-    char_u	goodsound[MAXWLEN];
     int		score;
 
     if (ga_grow(&su->su_sga, su->su_ga.ga_len) == FAIL)
@@ -6405,11 +6474,9 @@ score_comp_sal(su)
 	    {
 		stp = &SUG(su->su_ga, i);
 
-		/* Case-fold the suggested word and sound-fold it. */
-		(void)spell_casefold(stp->st_word, STRLEN(stp->st_word),
-							      fword, MAXWLEN);
-		spell_soundfold(lp->lp_slang, fword, goodsound);
-		score = soundalike_score(goodsound, badsound);
+		/* Case-fold the suggested word, sound-fold it and compute the
+		 * sound-a-like score. */
+		score = stp_sal_score(stp, su, lp->lp_slang, badsound);
 		if (score < SCORE_MAXMAX)
 		{
 		    /* Add the suggestion. */
@@ -6444,9 +6511,6 @@ score_combine(su)
     suggest_T	*stp;
     char_u	*p;
     char_u	badsound[MAXWLEN];
-    char_u	badsound2[MAXWLEN];
-    char_u	goodsound[MAXWLEN];
-    char_u	fword[MAXWLEN];
     int		round;
 
     /* Add the alternate score to su_ga. */
@@ -6461,25 +6525,8 @@ score_combine(su)
 	    for (i = 0; i < su->su_ga.ga_len; ++i)
 	    {
 		stp = &SUG(su->su_ga, i);
-
-		if (stp->st_orglen <= su->su_badlen)
-		    p = badsound;
-		else
-		{
-		    /* soundfold the bad word with a different length */
-		    (void)spell_casefold(su->su_badptr, stp->st_orglen,
-							      fword, MAXWLEN);
-		    spell_soundfold(lp->lp_slang, fword, badsound2);
-		    p = badsound2;
-		}
-
-		/* Case-fold the word, sound-fold the word and compute the
-		 * score for the difference. */
-		(void)spell_casefold(stp->st_word, STRLEN(stp->st_word),
-							      fword, MAXWLEN);
-		spell_soundfold(lp->lp_slang, fword, goodsound);
-
-		stp->st_altscore = soundalike_score(goodsound, p);
+		stp->st_altscore = stp_sal_score(stp, su, lp->lp_slang,
+								    badsound);
 		if (stp->st_altscore == SCORE_MAXMAX)
 		    stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4;
 		else
@@ -6549,6 +6596,50 @@ score_combine(su)
 }
 
 /*
+ * For the goodword in "stp" compute the soundalike score compared to the
+ * badword.
+ */
+    static int
+stp_sal_score(stp, su, slang, badsound)
+    suggest_T	*stp;
+    suginfo_T	*su;
+    slang_T	*slang;
+    char_u	*badsound;	/* sound-folded badword */
+{
+    char_u	*p;
+    char_u	badsound2[MAXWLEN];
+    char_u	fword[MAXWLEN];
+    char_u	goodsound[MAXWLEN];
+
+    if (stp->st_orglen <= su->su_badlen)
+	p = badsound;
+    else
+    {
+	/* soundfold the bad word with more characters following */
+	(void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN);
+
+	/* When joining two words the sound often changes a lot.  E.g., "t he"
+	 * sounds like "t h" while "the" sounds like "@".  Avoid that by
+	 * removing the space.  Don't do it when the good word also contains a
+	 * space. */
+	if (vim_iswhite(su->su_badptr[su->su_badlen])
+					 && *skiptowhite(stp->st_word) == NUL)
+	    for (p = fword; *(p = skiptowhite(p)) != NUL; )
+		mch_memmove(p, p + 1, STRLEN(p));
+
+	spell_soundfold(slang, fword, badsound2);
+	p = badsound2;
+    }
+
+    /* Case-fold the word, sound-fold the word and compute the score for the
+     * difference. */
+    (void)spell_casefold(stp->st_word, STRLEN(stp->st_word), fword, MAXWLEN);
+    spell_soundfold(slang, fword, goodsound);
+
+    return soundalike_score(goodsound, p);
+}
+
+/*
  * Find suggestions by comparing the word in a sound-a-like form.
  */
     static void
@@ -6604,8 +6695,11 @@ suggest_try_soundalike(su)
 		while (depth >= 0 && !got_int)
 		{
 		    if (curi[depth] > byts[arridx[depth]])
+		    {
 			/* Done all bytes at this node, go up one level. */
 			--depth;
+			line_breakcheck();
+		    }
 		    else
 		    {
 			/* Do one more byte at this node. */
@@ -6642,7 +6736,7 @@ suggest_try_soundalike(su)
 				    char_u	*p;
 				    int		score;
 
-				    if (round == 1 && flags != 0)
+				    if (round == 1 && (flags & WF_CAPMASK) != 0)
 				    {
 					/* Need to fix case according to
 					 * "flags". */
@@ -6655,7 +6749,7 @@ suggest_try_soundalike(su)
 				    if (sps_flags & SPS_DOUBLE)
 					add_suggestion(su, &su->su_sga, p,
 						su->su_badlen,
-							  sound_score, FALSE);
+						       sound_score, 0, FALSE);
 				    else
 				    {
 					/* Compute the score. */
@@ -6668,11 +6762,11 @@ suggest_try_soundalike(su)
 					    add_suggestion(su, &su->su_ga, p,
 						    su->su_badlen,
 						  RESCORE(score, sound_score),
-									TRUE);
+							   sound_score, TRUE);
 					else
 					    add_suggestion(su, &su->su_ga, p,
 						    su->su_badlen,
-						  score + sound_score, FALSE);
+					       score + sound_score, 0, FALSE);
 				    }
 				}
 			    }
@@ -6692,8 +6786,6 @@ suggest_try_soundalike(su)
 			    curi[depth] = 1;
 			}
 		    }
-
-		    line_breakcheck();
 		}
 	    }
 	}
@@ -6859,12 +6951,13 @@ similar_chars(slang, c1, c2)
  * with spell_edit_score().
  */
     static void
-add_suggestion(su, gap, goodword, badlen, score, had_bonus)
+add_suggestion(su, gap, goodword, badlen, score, altscore, had_bonus)
     suginfo_T	*su;
     garray_T	*gap;
     char_u	*goodword;
     int		badlen;		/* length of bad word used */
     int		score;
+    int		altscore;
     int		had_bonus;	/* value for st_had_bonus */
 {
     suggest_T   *stp;
@@ -6918,7 +7011,7 @@ add_suggestion(su, gap, goodword, badlen, score, had_bonus)
 	    if (stp->st_word != NULL)
 	    {
 		stp->st_score = score;
-		stp->st_altscore = 0;
+		stp->st_altscore = altscore;
 		stp->st_had_bonus = had_bonus;
 		stp->st_orglen = badlen;
 		++gap->ga_len;
@@ -7003,10 +7096,6 @@ rescore_suggestions(su)
     langp_T	*lp;
     suggest_T	*stp;
     char_u	sal_badword[MAXWLEN];
-    char_u	tword[MAXWLEN];
-    char_u	salword[MAXWLEN];
-    char_u	*p;
-    int		score;
     int		i;
 
     for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0);
@@ -7022,18 +7111,11 @@ rescore_suggestions(su)
 		stp = &SUG(su->su_ga, i);
 		if (!stp->st_had_bonus)
 		{
-		    if (stp->st_orglen <= su->su_badlen)
-			p = sal_badword;
-		    else
-		    {
-			/* soundfold the bad word with a different length */
-			(void)spell_casefold(su->su_badptr, stp->st_orglen,
-							      tword, MAXWLEN);
-			spell_soundfold(lp->lp_slang, tword, salword);
-			p = salword;
-		    }
-		    score = spell_sound_score(lp->lp_slang, stp->st_word, p);
-		    stp->st_score = RESCORE(stp->st_score, score);
+		    stp->st_altscore = stp_sal_score(stp, su,
+						   lp->lp_slang, sal_badword);
+		    if (stp->st_altscore == SCORE_MAXMAX)
+			stp->st_altscore = SCORE_BIG;
+		    stp->st_score = RESCORE(stp->st_score, stp->st_altscore);
 		}
 	    }
 	    break;
@@ -7424,53 +7506,38 @@ spell_soundfold(slang, inword, res)
 }
 
 /*
- * Return the score for how much words sound different.
- */
-    static int
-spell_sound_score(slang, goodword, badsound)
-    slang_T	*slang;
-    char_u	*goodword;	/* good word */
-    char_u	*badsound;	/* sound-folded bad word */
-{
-    char_u	fword[MAXWLEN];
-    char_u	goodsound[MAXWLEN];
-    int		score;
-
-    /* Case-fold the goodword, needed for sound folding. */
-    (void)spell_casefold(goodword, STRLEN(goodword), fword, MAXWLEN);
-
-    /* sound-fold the goodword */
-    spell_soundfold(slang, fword, goodsound);
-
-    /* Compute the edit distance-score of the sounds.  This is slow but we
-     * only do it for a small number of words. */
-    score = spell_edit_score(badsound, goodsound);
-
-    /* Correction: adding/inserting "*" at the start (word starts with vowel)
-     * shouldn't be counted so much, vowels halfway the word aren't counted at
-     * all. */
-    if (*badsound != *goodsound && (*badsound == '*' || *goodsound == '*'))
-	score -= SCORE_DEL / 2;
-
-    return score;
-}
-
-/*
  * Compute a score for two sound-a-like words.
  * This permits up to two inserts/deletes/swaps/etc. to keep things fast.
  * Instead of a generic loop we write out the code.  That keeps it fast by
  * avoiding checks that will not be possible.
  */
     static int
-soundalike_score(goodsound, badsound)
-    char_u	*goodsound;	/* sound-folded good word */
-    char_u	*badsound;	/* sound-folded bad word */
+soundalike_score(goodstart, badstart)
+    char_u	*goodstart;	/* sound-folded good word */
+    char_u	*badstart;	/* sound-folded bad word */
 {
-    int		goodlen = STRLEN(goodsound);
-    int		badlen = STRLEN(badsound);
+    char_u	*goodsound = goodstart;
+    char_u	*badsound = badstart;
+    int		goodlen;
+    int		badlen;
     int		n;
     char_u	*pl, *ps;
     char_u	*pl2, *ps2;
+    int		score = 0;
+
+    /* adding/inserting "*" at the start (word starts with vowel) shouldn't be
+     * counted so much, vowels halfway the word aren't counted at all. */
+    if ((*badsound == '*' || *goodsound == '*') && *badsound != *goodsound)
+    {
+	score = SCORE_DEL / 2;
+	if (*badsound == '*')
+	    ++badsound;
+	else
+	    ++goodsound;
+    }
+
+    goodlen = STRLEN(goodsound);
+    badlen = STRLEN(badsound);
 
     /* Return quickly if the lenghts are too different to be fixed by two
      * changes. */
@@ -7480,12 +7547,12 @@ soundalike_score(goodsound, badsound)
 
     if (n > 0)
     {
-	pl = goodsound;	    /* longest */
+	pl = goodsound;	    /* goodsound is longest */
 	ps = badsound;
     }
     else
     {
-	pl = badsound;	    /* longest */
+	pl = badsound;	    /* badsound is longest */
 	ps = goodsound;
     }
 
@@ -7511,7 +7578,7 @@ soundalike_score(goodsound, badsound)
 	    }
 	    /* strings must be equal after second delete */
 	    if (STRCMP(pl + 1, ps) == 0)
-		return SCORE_DEL * 2;
+		return score + SCORE_DEL * 2;
 
 	    /* Failed to compare. */
 	    break;
@@ -7528,7 +7595,7 @@ soundalike_score(goodsound, badsound)
 	    while (*pl2 == *ps2)
 	    {
 		if (*pl2 == NUL)	/* reached the end */
-		    return SCORE_DEL;
+		    return score + SCORE_DEL;
 		++pl2;
 		++ps2;
 	    }
@@ -7536,11 +7603,11 @@ soundalike_score(goodsound, badsound)
 	    /* 2: delete then swap, then rest must be equal */
 	    if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
 					     && STRCMP(pl2 + 2, ps2 + 2) == 0)
-		return SCORE_DEL + SCORE_SWAP;
+		return score + SCORE_DEL + SCORE_SWAP;
 
 	    /* 3: delete then substitute, then the rest must be equal */
 	    if (STRCMP(pl2 + 1, ps2 + 1) == 0)
-		return SCORE_DEL + SCORE_SUBST;
+		return score + SCORE_DEL + SCORE_SUBST;
 
 	    /* 4: first swap then delete */
 	    if (pl[0] == ps[1] && pl[1] == ps[0])
@@ -7554,7 +7621,7 @@ soundalike_score(goodsound, badsound)
 		}
 		/* delete a char and then strings must be equal */
 		if (STRCMP(pl2 + 1, ps2) == 0)
-		    return SCORE_SWAP + SCORE_DEL;
+		    return score + SCORE_SWAP + SCORE_DEL;
 	    }
 
 	    /* 5: first substitute then delete */
@@ -7567,7 +7634,7 @@ soundalike_score(goodsound, badsound)
 	    }
 	    /* delete a char and then strings must be equal */
 	    if (STRCMP(pl2 + 1, ps2) == 0)
-		return SCORE_SUBST + SCORE_DEL;
+		return score + SCORE_SUBST + SCORE_DEL;
 
 	    /* Failed to compare. */
 	    break;
@@ -7579,7 +7646,7 @@ soundalike_score(goodsound, badsound)
 	     * 1: check if for identical strings
 	     */
 	    if (*pl == NUL)
-		return 0;
+		return score;
 
 	    /* 2: swap */
 	    if (pl[0] == ps[1] && pl[1] == ps[0])
@@ -7589,18 +7656,18 @@ soundalike_score(goodsound, badsound)
 		while (*pl2 == *ps2)
 		{
 		    if (*pl2 == NUL)	/* reached the end */
-			return SCORE_SWAP;
+			return score + SCORE_SWAP;
 		    ++pl2;
 		    ++ps2;
 		}
 		/* 3: swap and swap again */
 		if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
 					     && STRCMP(pl2 + 2, ps2 + 2) == 0)
-		    return SCORE_SWAP + SCORE_SWAP;
+		    return score + SCORE_SWAP + SCORE_SWAP;
 
 		/* 4: swap and substitute */
 		if (STRCMP(pl2 + 1, ps2 + 1) == 0)
-		    return SCORE_SWAP + SCORE_SUBST;
+		    return score + SCORE_SWAP + SCORE_SUBST;
 	    }
 
 	    /* 5: substitute */
@@ -7609,7 +7676,7 @@ soundalike_score(goodsound, badsound)
 	    while (*pl2 == *ps2)
 	    {
 		if (*pl2 == NUL)	/* reached the end */
-		    return SCORE_SUBST;
+		    return score + SCORE_SUBST;
 		++pl2;
 		++ps2;
 	    }
@@ -7617,11 +7684,11 @@ soundalike_score(goodsound, badsound)
 	    /* 6: substitute and swap */
 	    if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
 					     && STRCMP(pl2 + 2, ps2 + 2) == 0)
-		return SCORE_SUBST + SCORE_SWAP;
+		return score + SCORE_SUBST + SCORE_SWAP;
 
 	    /* 7: substitute and substitute */
 	    if (STRCMP(pl2 + 1, ps2 + 1) == 0)
-		return SCORE_SUBST + SCORE_SUBST;
+		return score + SCORE_SUBST + SCORE_SUBST;
 
 	    /* 8: insert then delete */
 	    pl2 = pl;
@@ -7632,7 +7699,7 @@ soundalike_score(goodsound, badsound)
 		++ps2;
 	    }
 	    if (STRCMP(pl2 + 1, ps2) == 0)
-		return SCORE_INS + SCORE_DEL;
+		return score + SCORE_INS + SCORE_DEL;
 
 	    /* 9: delete then insert */
 	    pl2 = pl + 1;
@@ -7643,7 +7710,7 @@ soundalike_score(goodsound, badsound)
 		++ps2;
 	    }
 	    if (STRCMP(pl2, ps2 + 1) == 0)
-		return SCORE_INS + SCORE_DEL;
+		return score + SCORE_INS + SCORE_DEL;
 
 	    /* Failed to compare. */
 	    break;
@@ -7768,4 +7835,242 @@ spell_edit_score(badword, goodword)
     return i;
 }
 
+/*
+ * ":spelldump"
+ */
+/*ARGSUSED*/
+    void
+ex_spelldump(eap)
+    exarg_T *eap;
+{
+    buf_T	*buf = curbuf;
+    langp_T	*lp;
+    slang_T	*slang;
+    idx_T	arridx[MAXWLEN];
+    int		curi[MAXWLEN];
+    char_u	word[MAXWLEN];
+    int		c;
+    char_u	*byts;
+    idx_T	*idxs;
+    linenr_T	lnum = 0;
+    int		round;
+    int		depth;
+    int		n;
+    int		flags;
+
+    if (no_spell_checking())
+	return;
+
+    /* Create a new empty buffer by splitting the window. */
+    do_cmdline_cmd((char_u *)"new");
+    if (!bufempty() || !buf_valid(buf))
+	return;
+
+    for (lp = LANGP_ENTRY(buf->b_langp, 0); lp->lp_slang != NULL; ++lp)
+    {
+	slang = lp->lp_slang;
+
+	vim_snprintf((char *)IObuff, IOSIZE, "# file: %s", slang->sl_fname);
+	ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
+
+	/* round 1: case-folded tree
+	 * round 2: keep-case tree */
+	for (round = 1; round <= 2; ++round)
+	{
+	    if (round == 1)
+	    {
+		byts = slang->sl_fbyts;
+		idxs = slang->sl_fidxs;
+	    }
+	    else
+	    {
+		byts = slang->sl_kbyts;
+		idxs = slang->sl_kidxs;
+	    }
+	    if (byts == NULL)
+		continue;		/* array is empty */
+
+	    depth = 0;
+	    arridx[0] = 0;
+	    curi[0] = 1;
+	    while (depth >= 0 && !got_int)
+	    {
+		if (curi[depth] > byts[arridx[depth]])
+		{
+		    /* Done all bytes at this node, go up one level. */
+		    --depth;
+		    line_breakcheck();
+		}
+		else
+		{
+		    /* Do one more byte at this node. */
+		    n = arridx[depth] + curi[depth];
+		    ++curi[depth];
+		    c = byts[n];
+		    if (c == 0)
+		    {
+			/* End of word, deal with the word.
+			 * Don't use keep-case words in the fold-case tree,
+			 * they will appear in the keep-case tree.
+			 * Only use the word when the region matches. */
+			flags = (int)idxs[n];
+			if ((round == 2 || (flags & WF_KEEPCAP) == 0)
+				&& ((flags & WF_REGION) == 0
+					|| (((unsigned)flags >> 8)
+						       & lp->lp_region) != 0))
+			{
+			    word[depth] = NUL;
+			    dump_word(word, round, flags, lnum++);
+
+			    /* Apply the prefix, if there is one. */
+			    if ((unsigned)flags >> 16 != 0)
+				lnum = apply_prefixes(slang, word, round,
+								 flags, lnum);
+			}
+		    }
+		    else
+		    {
+			/* Normal char, go one level deeper. */
+			word[depth++] = c;
+			arridx[depth] = idxs[n];
+			curi[depth] = 1;
+		    }
+		}
+	    }
+	}
+    }
+
+    /* Delete the empty line that we started with. */
+    if (curbuf->b_ml.ml_line_count > 1)
+	ml_delete(curbuf->b_ml.ml_line_count, FALSE);
+
+    redraw_later(NOT_VALID);
+}
+
+/*
+ * Dump one word: apply case modifications and append a line to the buffer.
+ */
+    static void
+dump_word(word, round, flags, lnum)
+    char_u	*word;
+    int		round;
+    int		flags;
+    linenr_T	lnum;
+{
+    int		keepcap = FALSE;
+    char_u	*p;
+    char_u	cword[MAXWLEN];
+    char_u	badword[MAXWLEN + 3];
+
+    if (round == 1 && (flags & WF_CAPMASK) != 0)
+    {
+	/* Need to fix case according to "flag
author	Bram Moolenaar <Bram@vim.org>	2005-06-23 22:29:21 +0000
committer	Bram Moolenaar <Bram@vim.org>	2005-06-23 22:29:21 +0000
commit	f417f2b6af7926edf70c6a95f6972a2dec63bdf0 (patch)
tree	5e7f4c1a83b4fc7f4fa29a9a54edac19c41b6795
parent	f81b0fe01158b415575e67ec4e92a8f5daae9526 (diff)