summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBram Moolenaar <Bram@vim.org>2005-04-17 20:18:43 +0000
committerBram Moolenaar <Bram@vim.org>2005-04-17 20:18:43 +0000
commit5482f33f78d469b2ace7a5aa9af90d022915f3ec (patch)
treec31dbec0e219d0b40ef5d455f0c3a0497ef2b645
parent075757c932e00bad7f80325f2f944e4e1ed4a223 (diff)
updated for version 7.0067
-rw-r--r--Makefile2
-rw-r--r--src/spell.c1269
2 files changed, 327 insertions, 944 deletions
diff --git a/Makefile b/Makefile
index afd8ad64c4..7e916edb24 100644
--- a/Makefile
+++ b/Makefile
@@ -282,6 +282,7 @@ unixrt: dist prepare
tar cf - \
$(RT_SCRIPTS) \
$(LANG_GEN) \
+ $(LANG_GEN_BIN) \
| (cd dist/$(VIMRTDIR); tar xf -)
cd dist && tar cf $(VIMVER)-rt2.tar $(VIMRTDIR)
gzip -9 dist/$(VIMVER)-rt2.tar
@@ -439,6 +440,7 @@ dosrt_unix2dos: dist prepare no_title.vim
$(RT_UNIX_DOS_BIN) \
$(RT_ALL_BIN) \
$(RT_DOS_BIN) \
+ $(LANG_GEN_BIN) \
| (cd dist/vim/$(VIMRTDIR); tar xf -)
mv dist/vim/$(VIMRTDIR)/runtime/* dist/vim/$(VIMRTDIR)
rmdir dist/vim/$(VIMRTDIR)/runtime
diff --git a/src/spell.c b/src/spell.c
index e3a97fc833..9f1f00c007 100644
--- a/src/spell.c
+++ b/src/spell.c
@@ -903,6 +903,13 @@ spell_load_lang(lang)
sprintf((char *)fname_enc, "spell/%s.%s.spl", lang, p);
r = do_in_runtimepath(fname_enc, TRUE, spell_load_file, lp);
+ if (r == FAIL && !lp->sl_error)
+ {
+ /* Try loading the ASCII version. */
+ sprintf((char *)fname_enc, "spell/%s.ascii.spl", lang);
+
+ r = do_in_runtimepath(fname_enc, TRUE, spell_load_file, lp);
+ }
if (r == FAIL || lp->sl_error)
{
slang_free(lp);
@@ -1001,8 +1008,8 @@ spell_load_file(fname, cookie)
int affitemcnt;
int bl_used = SBLOCKSIZE;
int widx;
- int prefm; /* 1 if <= 256 prefixes, sizeof(short_u) otherw. */
- int suffm; /* 1 if <= 256 suffixes, sizeof(short_u) otherw. */
+ int prefm = 0; /* 1 if <= 256 prefixes, sizeof(short_u) otherw. */
+ int suffm = 0; /* 1 if <= 256 suffixes, sizeof(short_u) otherw. */
int wlen;
int flags;
affitem_T *ai, *ai2, **aip;
@@ -1480,6 +1487,7 @@ did_set_spelllang(buf)
e = vim_strchr(lang, ',');
if (e == NULL)
e = lang + STRLEN(lang);
+ region = NULL;
if (e > lang + 2)
{
if (e - lang >= MAXWLEN)
@@ -1490,8 +1498,6 @@ did_set_spelllang(buf)
if (lang[2] == '_')
region = lang + 3;
}
- else
- region = NULL;
for (lp = first_lang; lp != NULL; lp = lp->sl_next)
if (STRNICMP(lp->sl_name, lang, 2) == 0)
@@ -1726,7 +1732,13 @@ struct affentry_S
affentry_T *ae_next; /* next affix with same name/number */
char_u *ae_chop; /* text to chop off basic word (can be NULL) */
char_u *ae_add; /* text to add to basic word (can be NULL) */
- char_u *ae_add_nw; /* first non-word character in "ae_add" */
+ char_u *ae_add_nw; /* For a suffix: first non-word char in
+ * "ae_add"; for a prefix with only non-word
+ * chars: equal to "ae_add", for a prefix with
+ * word and non-word chars: first non-word
+ * char after word char. NULL otherwise. */
+ char_u *ae_add_pw; /* For a prefix with both word and non-word
+ * chars: first word char. NULL otherwise. */
char_u *ae_cond; /* condition (NULL for ".") */
regprog_T *ae_prog; /* regexp program for ae_cond or NULL */
short_u ae_affnr; /* for old affix: new affix number */
@@ -1778,10 +1790,11 @@ static affhash_T dumas;
#define HI2AS(hi) ((affhash_T *)((hi)->hi_key - (dumas.as_word - (char_u *)&dumas)))
-static afffile_T *spell_read_aff __ARGS((char_u *fname, vimconv_T *conv));
+static afffile_T *spell_read_aff __ARGS((char_u *fname, vimconv_T *conv, int ascii));
static void spell_free_aff __ARGS((afffile_T *aff));
-static int spell_read_dic __ARGS((hashtab_T *ht, char_u *fname, vimconv_T *conv));
-static int get_new_aff __ARGS((hashtab_T *oldaff, garray_T *gap));
+static int has_non_ascii __ARGS((char_u *s));
+static int spell_read_dic __ARGS((hashtab_T *ht, char_u *fname, vimconv_T *conv, int ascii));
+static int get_new_aff __ARGS((hashtab_T *oldaff, garray_T *gap, int prefix));
static void spell_free_dic __ARGS((hashtab_T *dic));
static int same_affentries __ARGS((affheader_T *ah1, affheader_T *ah2));
static void add_affhash __ARGS((hashtab_T *ht, char_u *key, int newnr));
@@ -1801,15 +1814,17 @@ static void write_bword __ARGS((FILE *fd, basicword_T *bw, int lowcap, basicword
static void free_wordtable __ARGS((hashtab_T *ht));
static void free_basicword __ARGS((basicword_T *bw));
static void free_affixentries __ARGS((affentry_T *first));
+static void free_affix_entry __ARGS((affentry_T *ap));
/*
* Read an affix ".aff" file.
* Returns an afffile_T, NULL for failure.
*/
static afffile_T *
-spell_read_aff(fname, conv)
+spell_read_aff(fname, conv, ascii)
char_u *fname;
vimconv_T *conv; /* info for encoding conversion */
+ int ascii; /* Only accept ASCII characters */
{
FILE *fd;
afffile_T *aff;
@@ -1895,7 +1910,7 @@ spell_read_aff(fname, conv)
{
/* Setup for conversion from "ENC" to 'encoding'. */
aff->af_enc = enc_canonize(items[1]);
- if (aff->af_enc != NULL
+ if (aff->af_enc != NULL && !ascii
&& convert_setup(conv, aff->af_enc, p_enc) == FAIL)
smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
fname, aff->af_enc, p_enc);
@@ -1952,8 +1967,7 @@ spell_read_aff(fname, conv)
(unsigned)sizeof(affentry_T));
if (aff_entry == NULL)
break;
- aff_entry->ae_next = cur_aff->ah_first;
- cur_aff->ah_first = aff_entry;
+
if (STRCMP(items[2], "0") != 0)
aff_entry->ae_chop = vim_strsave(items[2]);
if (STRCMP(items[3], "0") != 0)
@@ -1969,6 +1983,19 @@ spell_read_aff(fname, conv)
sprintf((char *)buf, "%s$", items[4]);
aff_entry->ae_prog = vim_regcomp(buf, RE_MAGIC + RE_STRING);
}
+
+ if (ascii && (has_non_ascii(aff_entry->ae_chop)
+ || has_non_ascii(aff_entry->ae_add)))
+ {
+ /* Don't use an affix entry with non-ASCII characters when
+ * "ascii" is TRUE. */
+ free_affix_entry(aff_entry);
+ }
+ else
+ {
+ aff_entry->ae_next = cur_aff->ah_first;
+ cur_aff->ah_first = aff_entry;
+ }
}
else if (STRCMP(items[0], "REP") == 0 && itemcnt == 2)
/* Ignore REP count */;
@@ -1997,6 +2024,23 @@ spell_read_aff(fname, conv)
}
/*
+ * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
+ * When "s" is NULL FALSE is returned.
+ */
+ static int
+has_non_ascii(s)
+ char_u *s;
+{
+ char_u *p;
+
+ if (s != NULL)
+ for (p = s; *p != NUL; ++p)
+ if (*p >= 128)
+ return TRUE;
+ return FALSE;
+}
+
+/*
* Free the structure filled by spell_read_aff().
*/
static void
@@ -2049,10 +2093,11 @@ spell_free_aff(aff)
* Each entry in the hashtab_T is a dicword_T.
*/
static int
-spell_read_dic(ht, fname, conv)
+spell_read_dic(ht, fname, conv, ascii)
hashtab_T *ht;
char_u *fname;
vimconv_T *conv; /* info for encoding conversion */
+ int ascii; /* only accept ASCII words */
{
char_u line[MAXLINELEN];
char_u *p;
@@ -2102,6 +2147,10 @@ spell_read_dic(ht, fname, conv)
if (p != NULL)
*p++ = NUL;
+ /* Skip non-ASCII words when "ascii" is TRUE. */
+ if (ascii && has_non_ascii(line))
+ continue;
+
/* Convert from "SET" to 'encoding' when needed. */
if (conv->vc_type != CONV_NONE)
{
@@ -2170,9 +2219,11 @@ spell_free_dic(dic)
* Returns OK or FAIL;
*/
static int
-get_new_aff(oldaff, gap)
+get_new_aff(oldaff, gap, prefix)
hashtab_T *oldaff; /* hashtable with affheader_T */
garray_T *gap; /* table with new affixes */
+ int prefix; /* TRUE when doing prefixes, FALSE for
+ suffixes */
{
int oldtodo;
affheader_T *oldah, *newah, *gapah;
@@ -2218,17 +2269,44 @@ get_new_aff(oldaff, gap)
oldae = oldae->ae_next)
{
oldae->ae_add_nw = NULL;
+ oldae->ae_add_pw = NULL;
if (oldae->ae_add != NULL)
{
- /* Check for non-word characters in the suffix. If there
+ /* Check for non-word characters in the affix. If there
* is one this affix will be turned into an addition.
* This is stored with the old affix, that is where
* trans_affixes() will check. */
for (p = oldae->ae_add; *p != NUL; mb_ptr_adv(p))
if (!spell_iswordc(p))
+ {
+ oldae->ae_add_nw = p;
break;
- if (*p != NUL)
- oldae->ae_add_nw = p;
+ }
+
+ if (prefix && oldae->ae_add_nw != NULL)
+ {
+ /* If a prefix has both word and non-word characters
+ * special treatment is necessary. If it has only
+ * non-word characters it becomes a leadstring. */
+ for (p = oldae->ae_add; *p != NUL; mb_ptr_adv(p))
+ if (spell_iswordc(p))
+ {
+ oldae->ae_add_pw = p;
+ break;
+ }
+ if (oldae->ae_add_pw != NULL)
+ {
+ /* Mixed prefix, set ae_add_nw to first non-word
+ * char after ae_add_pw (if there is one). */
+ oldae->ae_add_nw = NULL;
+ for ( ; *p != NUL; mb_ptr_adv(p))
+ if (!spell_iswordc(p))
+ {
+ oldae->ae_add_nw = p;
+ break;
+ }
+ }
+ }
}
if (oldae->ae_cond == NULL)
@@ -2458,12 +2536,14 @@ trans_affixes(dw, bw, oldaff, newwords)
basicword_T *nbw;
int alen;
int wlen;
- garray_T fixga;
+ garray_T suffixga; /* list of words with non-word suffixes */
+ garray_T prefixga; /* list of words with non-word prefixes */
char_u nword[MAXWLEN];
int flags;
int n;
- ga_init2(&fixga, (int)sizeof(basicword_T *), 5);
+ ga_init2(&suffixga, (int)sizeof(basicword_T *), 5);
+ ga_init2(&prefixga, (int)sizeof(basicword_T *), 5);
/* Loop over all the affix names of the old word. */
key[1] = NUL;
@@ -2494,8 +2574,8 @@ trans_affixes(dw, bw, oldaff, newwords)
if (ae->ae_prog == NULL
|| vim_regexec(&regmatch, dw->dw_word, (colnr_T)0))
{
- if (ae->ae_add_nw != NULL && (gap == &bw->bw_suffix
- ? bw->bw_addstring : bw->bw_leadstring) == NULL)
+ if ((ae->ae_add_nw != NULL || ae->ae_add_pw != NULL)
+ && (gap != &bw->bw_suffix || bw->bw_addstring == NULL))
{
/* Affix has a non-word character and isn't prepended to
* leader or appended to addition. Need to use another
@@ -2527,6 +2607,7 @@ trans_affixes(dw, bw, oldaff, newwords)
flags = captype(nword, nword + STRLEN(nword));
if (flags & BWF_KEEPCAP)
{
+ /* "caseword" excludes the addition */
nword[STRLEN(dw->dw_word) + alen] = NUL;
nbw->bw_caseword = vim_strsave(nword);
}
@@ -2542,8 +2623,9 @@ trans_affixes(dw, bw, oldaff, newwords)
STRCPY(nbw->bw_word, bw->bw_word);
if (alen > 0 || ae->ae_chop != NULL)
{
- /* Suffix starts with word character. Append
- * it to the word. Add new word entry. */
+ /* Suffix starts with word character and/or
+ * chop off something. Append it to the word.
+ * Add new word entry. */
wlen = STRLEN(nbw->bw_word);
if (ae->ae_chop != NULL)
wlen -= STRLEN(ae->ae_chop);
@@ -2558,15 +2640,154 @@ trans_affixes(dw, bw, oldaff, newwords)
bw->bw_next = nbw;
/* Remember this word, we need to set bw_prefix
+ * and bw_prefix later. */
+ if (ga_grow(&suffixga, 1) == OK)
+ ((basicword_T **)suffixga.ga_data)
+ [suffixga.ga_len++] = nbw;
+ }
+ }
+ else if (ae->ae_add_nw == NULL)
+ {
+ /* Prefix that starts with non-word char(s) and may be
+ * followed by word chars: Make a leadstring and
+ * prepend word chars before the word. */
+ alen = STRLEN(ae->ae_add_pw);
+ nbw = (basicword_T *)alloc((unsigned)(
+ sizeof(basicword_T) + STRLEN(bw->bw_word)
+ + alen + 1));
+ if (nbw != NULL)
+ {
+ *nbw = *bw;
+ ga_init2(&nbw->bw_prefix, sizeof(short_u), 1);
+ ga_init2(&nbw->bw_suffix, sizeof(short_u), 1);
+
+ /* Adding the prefix may change the caps. */
+ STRCPY(nword, ae->ae_add);
+ p = dw->dw_word;
+ if (ae->ae_chop != NULL)
+ /* Skip chop string. */
+ for (i = mb_charlen(ae->ae_chop); i > 0; --i)
+ mb_ptr_adv( p);
+ STRCAT(nword, p);
+
+ flags = captype(nword, nword + STRLEN(nword));
+ if (flags & BWF_KEEPCAP)
+ /* "caseword" excludes the addition */
+ nbw->bw_caseword = vim_strsave(nword
+ + (ae->ae_add_pw - ae->ae_add));
+ else
+ nbw->bw_caseword = NULL;
+ nbw->bw_flags &= ~(BWF_ONECAP | BWF_ALLCAP
+ | BWF_KEEPCAP);
+ nbw->bw_flags |= flags;
+
+ if (bw->bw_addstring != NULL)
+ nbw->bw_addstring =
+ vim_strsave(bw->bw_addstring);
+ else
+ nbw->bw_addstring = NULL;
+ nbw->bw_leadstring = vim_strnsave(ae->ae_add,
+ ae->ae_add_pw - ae->ae_add);
+
+ if (alen > 0 || ae->ae_chop != NULL)
+ {
+ /* Prefix ends in word character and/or chop
+ * off something. Prepend it to the word.
+ * Add new word entry. */
+ STRCPY(nbw->bw_word, ae->ae_add_pw);
+ p = bw->bw_word;
+ if (ae->ae_chop != NULL)
+ p += STRLEN(ae->ae_chop);
+ STRCAT(nbw->bw_word, p);
+ add_to_wordlist(newwords, nbw);
+ }
+ else
+ {
+ /* Basic word is the same, link "nbw" after
+ * "bw". */
+ STRCPY(nbw->bw_word, bw->bw_word);
+ bw->bw_next = nbw;
+ }
+
+ /* Remember this word, we need to set bw_suffix
* and bw_suffix later. */
- if (ga_grow(&fixga, 1) == OK)
- ((basicword_T **)fixga.ga_data)[fixga.ga_len++]
- = nbw;
+ if (ga_grow(&prefixga, 1) == OK)
+ ((basicword_T **)prefixga.ga_data)
+ [prefixga.ga_len++] = nbw;
}
}
else
{
- /* TODO: prefix with non-word char */
+ /* Prefix with both non-word and word characters: Turn
+ * prefix into basic word, original word becomes an
+ * addstring. */
+
+ /* Fold-case the word characters in the prefix into
+ * nword[]. */
+ alen = 0;
+ for (p = ae->ae_add_pw; p < ae->ae_add_nw; p += n)
+ {
+#ifdef FEAT_MBYTE
+ n = (*mb_ptr2len_check)(p);
+#else
+ n = 1;
+#endif
+ (void)str_foldcase(p, n, nword + alen,
+ MAXWLEN - alen);
+ alen += STRLEN(nword + alen);
+ }
+
+ /* Allocate a new word entry. */
+ nbw = (basicword_T *)alloc((unsigned)(
+ sizeof(basicword_T) + alen + 1));
+ if (nbw != NULL)
+ {
+ *nbw = *bw;
+ ga_init2(&nbw->bw_prefix, sizeof(short_u), 1);
+ ga_init2(&nbw->bw_suffix, sizeof(short_u), 1);
+
+ mch_memmove(nbw->bw_word, nword, alen);
+ nbw->bw_word[alen] = NUL;
+
+ /* Use the cap type of the prefix. */
+ alen = ae->ae_add_nw - ae->ae_add_pw;
+ mch_memmove(nword, ae->ae_add_pw, alen);
+ nword[alen] = NUL;
+ flags = captype(nword, nword + STRLEN(nword));
+ if (flags & BWF_KEEPCAP)
+ nbw->bw_caseword = vim_strsave(nword);
+ else
+ nbw->bw_caseword = NULL;
+ nbw->bw_flags &= ~(BWF_ONECAP | BWF_ALLCAP
+ | BWF_KEEPCAP);
+ nbw->bw_flags |= flags;
+
+ /* The addstring is the prefix after the word
+ * characters, the original word excluding "chop",
+ * plus any addition. */
+ STRCPY(nword, ae->ae_add_nw);
+ p = bw->bw_word;
+ if (ae->ae_chop != NULL)
+ p += STRLEN(ae->ae_chop);
+ STRCAT(nword, p);
+ if (bw->bw_addstring != NULL)
+ STRCAT(nword, bw->bw_addstring);
+ nbw->bw_addstring = vim_strsave(nword);
+
+ if (ae->ae_add_pw > ae->ae_add)
+ nbw->bw_leadstring = vim_strnsave(ae->ae_add,
+ ae->ae_add_pw - ae->ae_add);
+ else
+ nbw->bw_leadstring = NULL;
+
+ add_to_wordlist(newwords, nbw);
+
+ /* Remember this word, we need to set bw_suffix
+ * and bw_suffix later. */
+ if (ga_grow(&prefixga, 1) == OK)
+ ((basicword_T **)prefixga.ga_data)
+ [prefixga.ga_len++] = nbw;
+ }
}
}
else
@@ -2601,11 +2822,10 @@ trans_affixes(dw, bw, oldaff, newwords)
/*
* For the words that we added for suffixes with non-word characters: Use
* the prefix list of the main word.
- * TODO: do the same for prefixes.
*/
- for (i = 0; i < fixga.ga_len; ++i)
+ for (i = 0; i < suffixga.ga_len; ++i)
{
- nbw = ((basicword_T **)fixga.ga_data)[i];
+ nbw = ((basicword_T **)suffixga.ga_data)[i];
if (ga_grow(&nbw->bw_prefix, bw->bw_prefix.ga_len) == OK)
{
mch_memmove(nbw->bw_prefix.ga_data, bw->bw_prefix.ga_data,
@@ -2614,7 +2834,23 @@ trans_affixes(dw, bw, oldaff, newwords)
}
}
- ga_clear(&fixga);
+ /*
+ * For the words that we added for prefixes with non-word characters: Use
+ * the suffix list of the main word.
+ */
+ for (i = 0; i < prefixga.ga_len; ++i)
+ {
+ nbw = ((basicword_T **)prefixga.ga_data)[i];
+ if (ga_grow(&nbw->bw_suffix, bw->bw_suffix.ga_len) == OK)
+ {
+ mch_memmove(nbw->bw_suffix.ga_data, bw->bw_suffix.ga_data,
+ bw->bw_suffix.ga_len * sizeof(short_u));
+ nbw->bw_suffix.ga_len = bw->bw_suffix.ga_len;
+ }
+ }
+
+ ga_clear(&suffixga);
+ ga_clear(&prefixga);
}
/*
@@ -2642,8 +2878,9 @@ build_wordlist(newwords, oldwords, oldaff, regionmask)
char_u *p;
int clen;
int flags;
- char_u *cp;
+ char_u *cp = NULL;
int l;
+ char_u message[MAXLINELEN + MAXWLEN];
todo = oldwords->ht_used;
for (old_hi = oldwords->ht_array; todo > 0; ++old_hi)
@@ -2654,14 +2891,15 @@ build_wordlist(newwords, oldwords, oldaff, regionmask)
dw = HI2DW(old_hi);
/* This takes time, print a message now and then. */
- if ((todo & 0x3ff) == 0 || todo == oldwords->ht_used - 1)
+ if ((todo & 0x3ff) == 0 || todo == (int)oldwords->ht_used - 1)
{
- if (todo != oldwords->ht_used - 1)
- {
- msg_didout = FALSE;
- msg_col = 0;
- }
- smsg((char_u *)_("%6d todo - %s"), todo, dw->dw_word);
+ sprintf((char *)message, _("%6d todo - %s"),
+ todo, dw->dw_word);
+ msg_start();
+ msg_outtrans_attr(message, 0);
+ msg_clr_eos();
+ msg_didout = FALSE;
+ msg_col = 0;
out_flush();
ui_breakcheck();
if (got_int)
@@ -2874,6 +3112,7 @@ expand_affixes(newwords, prefgap, suffgap)
affentry_T *pae, *sae;
garray_T add_words;
int n;
+ char_u message[MAXLINELEN + MAXWLEN];
ga_init2(&add_words, sizeof(basicword_T *), 10);
@@ -2883,6 +3122,23 @@ expand_affixes(newwords, prefgap, suffgap)
if (!HASHITEM_EMPTY(hi))
{
--todo;
+
+ /* This takes time, print a message now and then. */
+ if ((todo & 0x3ff) == 0 || todo == (int)newwords->ht_used - 1)
+ {
+ sprintf((char *)message, _("%6d todo - %s"),
+ todo, HI2BW(hi)->bw_word);
+ msg_start();
+ msg_outtrans_attr(message, 0);
+ msg_clr_eos();
+ msg_didout = FALSE;
+ msg_col = 0;
+ out_flush();
+ ui_breakcheck();
+ if (got_int)
+ break;
+ }
+
for (bw = HI2BW(hi); bw != NULL; bw = bw->bw_next)
{
/*
@@ -3318,7 +3574,7 @@ write_vim_spell(fname, prefga, suffga, newwords, regcount, regchars)
char_u **wtab;
int todo;
int flags, aflags;
- basicword_T *bw, *bwf, *bw2, *prevbw = NULL;
+ basicword_T *bw, *bwf, *bw2 = NULL, *prevbw = NULL;
int regionmask; /* mask for all relevant region bits */
int i;
int cnt;
@@ -3397,7 +3653,7 @@ write_vim_spell(fname, prefga, suffga, newwords, regcount, regchars)
/* Now write each basic word to the spell file. */
ga_init2(&bwga, sizeof(basicword_T *), 10);
- for (todo = 0; todo < newwords->ht_used; ++todo)
+ for (todo = 0; (long_u)todo < newwords->ht_used; ++todo)
{
bwf = KEY2BW(wtab[todo]);
@@ -3661,9 +3917,17 @@ ex_mkspell(eap)
struct stat st;
int round;
vimconv_T conv;
+ int ascii = FALSE;
+ char_u *arg = eap->arg;
- /* Expand all the arguments (e.g., $VIMRUNTIME). */
- if (get_arglist_exp(eap->arg, &fcount, &fnames) == FAIL)
+ if (STRNCMP(arg, "-ascii", 6) == 0)
+ {
+ ascii = TRUE;
+ arg = skipwhite(arg + 6);
+ }
+
+ /* Expand all the remaining arguments (e.g., $VIMRUNTIME). */
+ if (get_arglist_exp(arg, &fcount, &fnames) == FAIL)
return;
if (fcount < 2)
EMSG(_(e_invarg)); /* need at least output and input names */
@@ -3673,7 +3937,8 @@ ex_mkspell(eap)
{
/* Check for overwriting before doing things that may take a lot of
* time. */
- sprintf((char *)wfname, "%s.%s.spl", fnames[0], p_enc);
+ sprintf((char *)wfname, "%s.%s.spl", fnames[0],
+ ascii ? (char_u *)"ascii" : p_enc);
if (!eap->forceit && mch_stat((char *)wfname, &st) >= 0)
{
EMSG(_(e_exists));
@@ -3719,12 +3984,12 @@ ex_mkspell(eap)
/* Read the .aff file. Will init "conv" based on the "SET" line. */
conv.vc_type = CONV_NONE;
sprintf((char *)fname, "%s.aff", fnames[i]);
- if ((afile[i - 1] = spell_read_aff(fname, &conv)) == NULL)
+ if ((afile[i - 1] = spell_read_aff(fname, &conv, ascii)) == NULL)
break;
/* Read the .dic file. */
sprintf((char *)fname, "%s.dic", fnames[i]);
- if (spell_read_dic(&dfile[i - 1], fname, &conv) == FAIL)
+ if (spell_read_dic(&dfile[i - 1], fname, &conv, ascii) == FAIL)
break;
/* Free any conversion stuff. */
@@ -3755,7 +4020,8 @@ ex_mkspell(eap)
ga_init2(gap, sizeof(affheader_T), 50);
for (i = 1; i < fcount; ++i)
get_new_aff(round == 1 ? &afile[i - 1]->af_pref
- : &afile[i - 1]->af_suff, gap);
+ : &afile[i - 1]->af_suff,
+ gap, round == 1);
}
/*
@@ -3863,7 +4129,7 @@ free_basicword(bw)
}
/*
- * Free a list of affentry_T.
+ * Free a list of affentry_T and what they contain.
*/
static void
free_affixentries(first)
@@ -3874,909 +4140,24 @@ free_affixentries(first)
for (ap = first; ap != NULL; ap = an)
{
an = ap->ae_next;
- vim_free(ap->ae_chop);
- vim_free(ap->ae_add);
- vim_free(ap->ae_cond);
- vim_free(ap->ae_prog);
- vim_free(ap);
+ free_affix_entry(ap);
}
}
-#endif /* FEAT_MBYTE */
-
-#endif /* FEAT_SYN_HL */
-
-#if 0 /* old spell code with words in .spl file */
-/*
- * Structure that is used to store the text from the language file. This
- * avoids the need to allocate space for each individual word. It's allocated
- * in big chunks for speed.
- */
-#define SBLOCKSIZE 4096 /* default size of sb_data */
-typedef struct sblock_S sblock_T;
-struct sblock_S
-{
- sblock_T *sb_next; /* next block in list */
- char_u sb_data[1]; /* data, actually longer */
-};
-
-/* Structure to store words and additions. Used twice : once for case-folded
- * and once for keep-case words. */
-typedef struct winfo_S
-{
- hashtab_T wi_ht; /* hashtable with all words, both dword_T and
- nword_T (check flags for DW_NWORD) */
- garray_T wi_add; /* table with pointers to additions in a
- dword_T */
- int wi_addlen; /* longest addition length */
-} winfo_T;
-
/*
- * Structure used to store words and other info for one language.
- */
-typedef struct slang_S slang_T;
-struct slang_S
-{
- slang_T *sl_next; /* next language */
- char_u sl_name[2]; /* language name "en", "nl", etc. */
- winfo_T sl_fwords; /* case-folded words and additions */
- winfo_T sl_kwords; /* keep-case words and additions */
- char_u sl_regions[17]; /* table with up to 8 region names plus NUL */
- sblock_T *sl_block; /* list with allocated memory blocks */
-};
-
-static slang_T *first_lang = NULL;
-
-/* Entry for dword in "sl_ht". Also used for part of an nword, starting with
- * the first non-word character. And used for additions in wi_add. */
-typedef struct dword_S
-{
- char_u dw_region; /* one bit per region where it's valid */
- char_u dw_flags; /* DW_ flags */
- char_u dw_word[1]; /* actually longer, NUL terminated */
-} dword_T;
-
-#define REGION_ALL 0xff
-
-#define HI2DWORD(hi) (dword_T *)(hi->hi_key - 2)
-
-/* Entry for a nword in "sl_ht". Note that the last three items must be
- * identical to dword_T, so that they can be in the same hashtable. */
-typedef struct nword_S
-{
- garray_T nw_ga; /* table with pointers to dword_T for part
- starting with non-word character */
- int nw_maxlen; /* longest nword length (after the dword) */
- char_u nw_region; /* one bit per region where it's valid */
- char_u nw_flags; /* DW_ flags */
- char_u nw_word[1]; /* actually longer, NUL terminated */
-} nword_T;
-
-/* Get nword_T pointer from hashitem that uses nw_word */
-static nword_T dumnw;
-#define HI2NWORD(hi) ((nword_T *)((hi)->hi_key - (dumnw.nw_word - (char_u *)&dumnw)))
-
-#define DW_CAP 0x01 /* word must start with capital */
-#define DW_RARE 0x02 /* rare word */
-#define DW_NWORD 0x04 /* this is an nword_T */
-#define DW_DWORD 0x08 /* (also) use as dword without nword */
-
-/*
- * Structure used in "b_langp", filled from 'spelllang'.
- */
-typedef struct langp_S
-{
- slang_T *lp_slang; /* info for this language (NULL for last one) */
- int lp_region; /* bitmask for region or REGION_ALL */
-} langp_T;
-
-#define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i))
-#define DWORD_ENTRY(gap, i) *(((dword_T **)(gap)->ga_data) + i)
-
-#define SP_OK 0
-#define SP_BAD 1
-#define SP_RARE 2
-#define SP_LOCAL 3
-
-static char *e_invchar2 = N_("E753: Invalid character in \"%s\"");
-
-static slang_T *spell_load_lang __ARGS((char_u *lang));
-static void spell_load_file __ARGS((char_u *fname));
-static int find_region __ARGS((char_u *rp, char_u *region));
-
-/*
- * Main spell-checking function.
- * "ptr" points to the start of a word.
- * "*attrp" is set to the attributes for a badly spelled word. For a non-word
- * or when it's OK it remains unchanged.
- * This must only be called when 'spelllang' is not empty.
- * Returns the length of the word in bytes, also when it's OK, so that the
- * caller can skip over the word.
- */
- int
-spell_check(wp, ptr, attrp)
- win_T *wp; /* current window */
- char_u *ptr;
- int *attrp;
-{
- char_u *e; /* end of word */
- char_u *ne; /* new end of word */
- char_u *me; /* max. end of match */
- langp_T *lp;
- int result;
- int len = 0;
- hashitem_T *hi;
- int round;
- char_u kword[MAXWLEN + 1]; /* word copy */
- char_u fword[MAXWLEN + 1]; /* word with case folded */
- char_u match[MAXWLEN + 1]; /* fword with additional chars */
- char_u kwordclen[MAXWLEN + 1]; /* len of orig chars after kword[] */
- char_u fwordclen[MAXWLEN + 1]; /* len of chars after fword[] */
- char_u *clen;
- int cidx = 0; /* char index in xwordclen[] */
- hash_T fhash; /* hash for fword */
- hash_T khash; /* hash for kword */
- int match_len = 0; /* length of match[] */
- int fmatch_len = 0; /* length of nword match in chars */
- garray_T *gap;
- int l, t;
- char_u *p, *tp;
- int n;
- dword_T *dw;
- dword_T *tdw;
- winfo_T *wi;
- nword_T *nw;
- int w_isupper;
-
- /* Find the end of the word. We already know that *ptr is a word char. */
- e = ptr;
- do
- {
- mb_ptr_adv(e);
- ++len;
- } while (*e != NUL && spell_iswordc(e));
-
- /* A word starting with a number is always OK. */
- if (*ptr >= '0' && *ptr <= '9')
- return (int)(e - ptr);
-
-#ifdef FEAT_MBYTE
- w_isupper = MB_ISUPPER(mb_ptr2char(ptr));
-#else
- w_isupper = MB_ISUPPER(*ptr);
-#endif
-
- /* Make a copy of the word so that it can be NUL terminated.
- * Compute hash value. */
- mch_memmove(kword, ptr, e - ptr);
- kword[e - ptr] = NUL;
- khash = hash_hash(kword);
-
- /* Make case-folded copy of the Word. Compute its hash value. */
- (void)str_foldcase(ptr, e - ptr, fword, MAXWLEN + 1);
- fhash = hash_hash(fword);
-
- /* Further case-folded characters to check for an nword match go in
- * match[]. */
- me = e;
-
- /* "ne" is the end for the longest match */
- ne = e;
-
- /* The word is bad unless we find it in the dictionary. */
- result = SP_BAD;
-
- /*
- * Loop over the languages specified in 'spelllang'.
- * We check them all, because a matching nword may be longer than an
- * already found dword or nword.
- */
- for (lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0); lp->lp_slang != NULL; ++lp)
- {
- /*
- * Check for a matching word in the hashtable.
- * Check both the keep-case word and the fold-case word.
- */
- for (round = 0; round <= 1; ++round)
- {
- if (round == 0)
- {
- wi = &lp->lp_slang->sl_kwords;
- hi = hash_lookup(&wi->wi_ht, kword, khash);
- }
- else
- {
- wi = &lp->lp_slang->sl_fwords;
- hi = hash_lookup(&wi->wi_ht, fword, fhash);
- }
- if (!HASHITEM_EMPTY(hi))
- {
- /*
- * If this is an nword entry, check for match with remainder.
- */
- dw = HI2DWORD(hi);
- if (dw->dw_flags & DW_NWORD)
- {
- /* If the word is not defined as a dword we must find an
- * nword. */
- if ((dw->dw_flags & DW_DWORD) == 0)
- dw = NULL;
-
- /* Fold more characters when needed for the nword. Need
- * to do one extra to check for a non-word character after
- * the nword. Also keep the byte-size of each character,
- * both before and after folding case. */
- nw = HI2NWORD(hi);
- while ((round == 0
- ? me - e <= nw->nw_maxlen
- : match_len <= nw->nw_maxlen)
- && *me != NUL)
- {
-#ifdef FEAT_MBYTE
- l = mb_ptr2len_check(me);
-#else
- l = 1;
-#endif
- (void)str_foldcase(me, l, match + match_len,
- MAXWLEN - match_len + 1);
- me += l;
- kwordclen[cidx] = l;
- fwordclen[cidx] = STRLEN(match + match_len);
- match_len += fwordclen[cidx];
- ++cidx;
- }
-
- if (round == 0)
- {
- clen = kwordclen;
- tp = e;
- }
- else
- {
- clen = fwordclen;
- tp = match;
- }
-
- /* Match with each item. The longest match wins:
- * "you've" is longer than "you". */
- gap = &nw->nw_ga;
- for (t = 0; t < gap->ga_len; ++t)
- {
- /* Skip entries with wrong case for first char.
- * Continue if it's a rare word without a captial. */
- tdw = DWORD_ENTRY(gap, t);
- if ((tdw->dw_flags & (DW_CAP | DW_RARE)) == DW_CAP
- && !w_isupper)
- continue;
-
- p = tdw->dw_word;
- l = 0;
- for (n = 0; p[n] != 0; n += clen[l++])
- if (vim_memcmp(p + n, tp + n, clen[l]) != 0)
- break;
-
- /* Use a match if it's longer than previous matches
- * and the next character is not a word character. */
- if (p[n] == 0 && l > fmatch_len && (tp[n] == 0
- || !spell_iswordc(tp + n)))
- {
- dw = tdw;
- fmatch_len = l;
- if (round == 0)
- ne = tp + n;
- else
- {
- /* Need to use the length of the original
- * chars, not the fold-case ones. */
- ne = e;
- for (l = 0; l < fmatch_len; ++l)
- ne += kwordclen[l];
- }
- if ((lp->lp_region & tdw->dw_region) == 0)
- result = SP_LOCAL;
- else if ((tdw->dw_flags & DW_CAP) && !w_isupper)
- result = SP_RARE;
- else
- result = SP_OK;
- }
- }
-
- }
-
- if (dw != NULL)
- {
- if (dw->dw_flags & DW_CAP)
- {
- /* Need to check first letter is uppercase. If it is,
- * check region. If it isn't it may be a rare word.
- * */
- if (w_isupper)
- {
- if ((dw->dw_region & lp->lp_region) == 0)
- result = SP_LOCAL;
- else
- result = SP_OK;
- }
- else if (dw->dw_flags & DW_RARE)
- result = SP_RARE;
- }
- else
- {
- if ((dw->dw_region & lp->lp_region) == 0)
- result = SP_LOCAL;
- else if (dw->dw_flags & DW_RARE)
- result = SP_RARE;
- else
- result = SP_OK;
- }
- }
- }
- }
-
- /*
- * Check for an addition.
- * Only after a dword, not after an nword.
- * Check both the keep-case word and the fold-case word.
- */
- if (fmatch_len == 0)
- for (round = 0; round <= 1; ++round)
- {
- if (round == 0)
- wi = &lp->lp_slang->sl_kwords;
- else
- wi = &lp->lp_slang->sl_fwords;
- gap = &wi->wi_add;
- if (gap->ga_len == 0) /* no additions, skip quickly */
- continue;
-
- /* Fold characters when needed for the addition. Need to do one
- * extra to check for a word character after the addition. */
- while ((round == 0
- ? me - e <= wi->wi_addlen
- : match_len <= wi->wi_addlen)
- && *me != NUL)
- {
-#ifdef FEAT_MBYTE
- l = mb_ptr2len_check(me);
-#else
- l = 1;
-#endif
- (void)str_foldcase(me, l, match + match_len,
- MAXWLEN - match_len + 1);
- me += l;
- kwordclen[cidx] = l;
- fwordclen[cidx] = STRLEN(match + match_len);
- match_len += fwordclen[cidx];
- ++cidx;
- }
-
- if (round == 0)
- {
- clen = kwordclen;
- tp = e;
- }
- else
- {
- clen = fwordclen;
- tp = match;
- }
-
- /* Addition lookup. Uses a linear search, there should be
- * very few. If there is a match adjust "ne" to the end.
- * This doesn't change whether a word was good or bad, only
- * the length. */
- for (t = 0; t < gap->ga_len; ++t)
- {
- tdw = DWORD_ENTRY(gap, t);
- p = tdw->dw_word;
- l = 0;
- for (n = 0; p[n] != 0; n += clen[l++])
- if (vim_memcmp(p + n, tp + n, clen[l]) != 0)
- break;
-
- /* Use a match if it's longer than previous matches
- * and the next character is not a word character. */
- if (p[n] == 0 && l > fmatch_len
- && (tp[n] == 0 || !spell_iswordc(tp + n)))
- {
- fmatch_len = l;
- if (round == 0)
- ne = tp + n;
- else
- {
- /* Need to use the length of the original
- * chars, not the fold-case ones. */
- ne = e;
- for (l = 0; l < fmatch_len; ++l)
- ne += kwordclen[l];
- }
- }
- }
- }
- }
-
- if (result != SP_OK)
- {
- if (result == SP_BAD)
- *attrp = highlight_attr[HLF_SPB];
- else if (result == SP_RARE)
- *attrp = highlight_attr[HLF_SPR];
- else
- *attrp = highlight_attr[HLF_SPL];
- }
-
- return (int)(ne - ptr);
-}
-
-static slang_T *load_lp; /* passed from spell_load_