summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/ex_cmds.c12
-rw-r--r--src/ex_getln.c11
-rw-r--r--src/getchar.c10
-rw-r--r--src/mbyte.c4
-rw-r--r--src/regexp.c34
-rw-r--r--src/search.c9
-rw-r--r--src/spell.c408
-rw-r--r--src/structs.h5
-rw-r--r--src/undo.c6
-rw-r--r--src/version.h4
-rw-r--r--src/vim.h1
11 files changed, 407 insertions, 97 deletions
diff --git a/src/ex_cmds.c b/src/ex_cmds.c
index 4b7d6fd46a..52104f41ce 100644
--- a/src/ex_cmds.c
+++ b/src/ex_cmds.c
@@ -425,6 +425,11 @@ ex_sort(eap)
nrs[lnum - eap->line1].col_nr = col;
nrs[lnum - eap->line1].lnum = lnum;
+
+ if (regmatch.regprog != NULL)
+ fast_breakcheck();
+ if (got_int)
+ goto theend;
}
/* Allocate a buffer that can hold the longest line. */
@@ -432,7 +437,7 @@ ex_sort(eap)
if (sortbuf == NULL)
goto theend;
- /* sort the array of line numbers */
+ /* Sort the array of line numbers. Note: can't be interrupted! */
qsort((void *)nrs, count, sizeof(sorti_T), sort_compare);
/* Insert the lines in the sorted order below the last one. */
@@ -448,6 +453,9 @@ ex_sort(eap)
if (unique)
STRCPY(sortbuf, s);
}
+ fast_breakcheck();
+ if (got_int)
+ goto theend;
}
/* delete the original lines if appending worked */
@@ -472,6 +480,8 @@ theend:
vim_free(nrs);
vim_free(sortbuf);
vim_free(regmatch.regprog);
+ if (got_int)
+ EMSG(_(e_interr));
}
/*
diff --git a/src/ex_getln.c b/src/ex_getln.c
index 5ba7d8b076..cd40a689dd 100644
--- a/src/ex_getln.c
+++ b/src/ex_getln.c
@@ -1647,16 +1647,21 @@ cmdline_changed:
if (i != 0)
{
+ pos_T save_pos = curwin->w_cursor;
+
/*
* First move cursor to end of match, then to start. This
* moves the whole match onto the screen when 'nowrap' is set.
*/
- i = curwin->w_cursor.col;
curwin->w_cursor.lnum += search_match_lines;
curwin->w_cursor.col = search_match_endcol;
+ if (curwin->w_cursor.lnum > curbuf->b_ml.ml_line_count)
+ {
+ curwin->w_cursor.lnum = curbuf->b_ml.ml_line_count;
+ coladvance((colnr_T)MAXCOL);
+ }
validate_cursor();
- curwin->w_cursor.lnum -= search_match_lines;
- curwin->w_cursor.col = i;
+ curwin->w_cursor = save_pos;
}
validate_cursor();
diff --git a/src/getchar.c b/src/getchar.c
index 085fc4dc68..69cebaee56 100644
--- a/src/getchar.c
+++ b/src/getchar.c
@@ -3325,6 +3325,9 @@ do_map(maptype, arg, mode, abbrev)
mp->m_noremap = noremap;
mp->m_silent = silent;
mp->m_mode = mode;
+#ifdef FEAT_EVAL
+ mp->m_script_ID = current_SID;
+#endif
did_it = TRUE;
}
}
@@ -3407,6 +3410,9 @@ do_map(maptype, arg, mode, abbrev)
mp->m_noremap = noremap;
mp->m_silent = silent;
mp->m_mode = mode;
+#ifdef FEAT_EVAL
+ mp->m_script_ID = current_SID;
+#endif
/* add the new entry in front of the abbrlist or maphash[] list */
if (abbrev)
@@ -3676,6 +3682,10 @@ showmap(mp, local)
msg_puts_attr((char_u *)"<Nop>", hl_attr(HLF_8));
else
msg_outtrans_special(mp->m_str, FALSE);
+#ifdef FEAT_EVAL
+ if (p_verbose > 0)
+ last_set_msg(mp->m_script_ID);
+#endif
out_flush(); /* show one line at a time */
}
diff --git a/src/mbyte.c b/src/mbyte.c
index 61fd6980a3..5fb45568b5 100644
--- a/src/mbyte.c
+++ b/src/mbyte.c
@@ -2876,6 +2876,10 @@ enc_canonize(enc)
/* Skip "2byte-" and "8bit-". */
p = enc_skip(r);
+ /* Change "microsoft-cp" to "cp". Used in some spell files. */
+ if (STRNCMP(p, "microsoft-cp", 12) == 0)
+ mch_memmove(p, p + 10, STRLEN(p + 10) + 1);
+
/* "iso8859" -> "iso-8859" */
if (STRNCMP(p, "iso8859", 7) == 0)
{
diff --git a/src/regexp.c b/src/regexp.c
index c111778fa1..1ff5a069be 100644
--- a/src/regexp.c
+++ b/src/regexp.c
@@ -606,6 +606,7 @@ static int reg_magic; /* magicness of the pattern: */
static int reg_string; /* matching with a string instead of a buffer
line */
+static int reg_strict; /* "[abc" is illegal */
/*
* META contains all characters that may be magic, except '^' and '$'.
@@ -1132,6 +1133,7 @@ regcomp_start(expr, re_flags)
else
reg_magic = MAGIC_OFF;
reg_string = (re_flags & RE_STRING);
+ reg_strict = (re_flags & RE_STRICT);
num_complex_braces = 0;
regnpar = 1;
@@ -2253,6 +2255,9 @@ collection:
*flagp |= HASWIDTH | SIMPLE;
break;
}
+ else if (reg_strict)
+ EMSG_M_RET_NULL(_("E769: Missing ] after %s["),
+ reg_magic > MAGIC_OFF);
}
/* FALLTHROUGH */
@@ -3172,6 +3177,9 @@ reg_getline(lnum)
* can't go before line 1 */
if (reg_firstlnum + lnum < 1)
return NULL;
+ if (reg_firstlnum + lnum > reg_buf->b_ml.ml_line_count)
+ /* Must have matched the "\n" in the last line. */
+ return (char_u *)"";
return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
}
@@ -3455,8 +3463,8 @@ vim_regexec_both(line, col)
/* if not currently on the first line, get it again */
if (reglnum != 0)
{
- regline = reg_getline((linenr_T)0);
reglnum = 0;
+ regline = reg_getline((linenr_T)0);
}
if (regline[col] == NUL)
break;
@@ -3723,7 +3731,8 @@ regmatch(scan)
op = OP(scan);
/* Check for character class with NL added. */
- if (WITH_NL(op) && *reginput == NUL && reglnum < reg_maxline)
+ if (!reg_line_lbr && WITH_NL(op) && *reginput == NUL
+ && reglnum <= reg_maxline)
{
reg_nextline();
}
@@ -4369,7 +4378,7 @@ regmatch(scan)
}
if (clnum == reg_endpos[no].lnum)
break; /* match and at end! */
- if (reglnum == reg_maxline)
+ if (reglnum >= reg_maxline)
{
status = RA_NOMATCH; /* text too short */
break;
@@ -4672,7 +4681,7 @@ regmatch(scan)
break;
case NEWL:
- if ((c != NUL || reglnum == reg_maxline)
+ if ((c != NUL || reglnum > reg_maxline || reg_line_lbr)
&& (c != '\n' || !reg_line_lbr))
status = RA_NOMATCH;
else if (reg_line_lbr)
@@ -5133,7 +5142,8 @@ regrepeat(p, maxcount)
++count;
mb_ptr_adv(scan);
}
- if (!WITH_NL(OP(p)) || reglnum == reg_maxline || count == maxcount)
+ if (!WITH_NL(OP(p)) || reglnum > reg_maxline || reg_line_lbr
+ || count == maxcount)
break;
++count; /* count the line-break */
reg_nextline();
@@ -5157,7 +5167,7 @@ regrepeat(p, maxcount)
}
else if (*scan == NUL)
{
- if (!WITH_NL(OP(p)) || reglnum == reg_maxline)
+ if (!WITH_NL(OP(p)) || reglnum > reg_maxline || reg_line_lbr)
break;
reg_nextline();
scan = reginput;
@@ -5186,7 +5196,7 @@ regrepeat(p, maxcount)
}
else if (*scan == NUL)
{
- if (!WITH_NL(OP(p)) || reglnum == reg_maxline)
+ if (!WITH_NL(OP(p)) || reglnum > reg_maxline || reg_line_lbr)
break;
reg_nextline();
scan = reginput;
@@ -5215,7 +5225,7 @@ regrepeat(p, maxcount)
}
else if (*scan == NUL)
{
- if (!WITH_NL(OP(p)) || reglnum == reg_maxline)
+ if (!WITH_NL(OP(p)) || reglnum > reg_maxline || reg_line_lbr)
break;
reg_nextline();
scan = reginput;
@@ -5240,7 +5250,7 @@ regrepeat(p, maxcount)
{
if (*scan == NUL)
{
- if (!WITH_NL(OP(p)) || reglnum == reg_maxline)
+ if (!WITH_NL(OP(p)) || reglnum > reg_maxline || reg_line_lbr)
break;
reg_nextline();
scan = reginput;
@@ -5270,7 +5280,7 @@ do_class:
#endif
if (*scan == NUL)
{
- if (!WITH_NL(OP(p)) || reglnum == reg_maxline)
+ if (!WITH_NL(OP(p)) || reglnum > reg_maxline || reg_line_lbr)
break;
reg_nextline();
scan = reginput;
@@ -5433,7 +5443,7 @@ do_class:
#endif
if (*scan == NUL)
{
- if (!WITH_NL(OP(p)) || reglnum == reg_maxline)
+ if (!WITH_NL(OP(p)) || reglnum > reg_maxline || reg_line_lbr)
break;
reg_nextline();
scan = reginput;
@@ -5462,7 +5472,7 @@ do_class:
case NEWL:
while (count < maxcount
- && ((*scan == NUL && reglnum < reg_maxline)
+ && ((*scan == NUL && reglnum <= reg_maxline && !reg_line_lbr)
|| (*scan == '\n' && reg_line_lbr)))
{
count++;
diff --git a/src/search.c b/src/search.c
index bc5da67bb3..af3215b318 100644
--- a/src/search.c
+++ b/src/search.c
@@ -544,8 +544,13 @@ searchit(win, buf, pos, dir, pat, count, options, pat_use)
/* Watch out for the "col" being MAXCOL - 2, used in a closed fold. */
else if (has_mbyte && pos->lnum >= 1 && pos->lnum <= buf->b_ml.ml_line_count
&& pos->col < MAXCOL - 2)
- extra_col = (*mb_ptr2len)(ml_get_buf(buf, pos->lnum, FALSE)
- + pos->col);
+ {
+ ptr = ml_get_buf(buf, pos->lnum, FALSE) + pos->col;
+ if (*ptr == NUL)
+ extra_col = 1;
+ else
+ extra_col = (*mb_ptr2len)(ptr);
+ }
#endif
else
extra_col = 1;
diff --git a/src/spell.c b/src/spell.c
index 10390ece6d..114e3f2aa8 100644
--- a/src/spell.c
+++ b/src/spell.c
@@ -35,7 +35,7 @@
* original case. The second one is only used for keep-case words and is
* usually small.
*
- * There is one additional tree for when prefixes are not applied when
+ * There is one additional tree for when not all prefixes are applied when
* generating the .spl file. This tree stores all the possible prefixes, as
* if they were words. At each word (prefix) end the prefix nr is stored, the
* following word must support this prefix nr. And the condition nr is
@@ -72,21 +72,6 @@
#define RESCORE(word_score, sound_score) ((3 * word_score + sound_score) / 4)
/*
- * The double scoring mechanism is based on the principle that there are two
- * kinds of spelling mistakes:
- * 1. You know how to spell the word, but mistype something. This results in
- * a small editing distance (character swapped/omitted/inserted) and
- * possibly a word that sounds completely different.
- * 2. You don't know how to spell the word and type something that sounds
- * right. The edit distance can be big but the word is similar after
- * sound-folding.
- * Since scores for these two mistakes will be very different we use a list
- * for each.
- * The sound-folding is slow, only do double scoring when 'spellsuggest' is
- * "double".
- */
-
-/*
* Vim spell file format: <HEADER>
* <SUGGEST>
* <LWORDTREE>
@@ -98,9 +83,10 @@
* <charflagslen> <charflags>
* <fcharslen> <fchars>
* <midwordlen> <midword>
+ * <compoundlen> <compoundtype> <compoundinfo>
* <prefcondcnt> <prefcond> ...
*
- * <fileID> 10 bytes "VIMspell09"
+ * <fileID> 10 bytes "VIMspell10"
* <regioncnt> 1 byte number of regions following (8 supported)
* <regionname> 2 bytes Region name: ca, au, etc. Lower case.
* First <regionname> is region 1.
@@ -116,6 +102,17 @@
* <midword> N bytes Characters that are word characters only when used
* in the middle of a word.
*
+ * <compoundlen> 2 bytes Number of bytes following for compound info (can
+ * be used to skip it when it's not understood).
+ *
+ * <compoundtype 1 byte 1: compound words using <comp1minlen> and
+ * <comp1flags>
+ *
+ * <comp1minlen> 1 byte minimal word length for compounding
+ *
+ * <comp1flags> N bytes flags used for compounding words
+ *
+ *
* <prefcondcnt> 2 bytes Number of <prefcond> items following.
*
* <prefcond> : <condlen> <condstr>
@@ -182,16 +179,16 @@
* follow in sorted order.
*
* <sibling>: <byte> [ <nodeidx> <xbyte>
- * | <flags> [<flags2>] [<region>] [<prefixID>]
- * | [<pflags>] <prefixID> <prefcondnr> ]
+ * | <flags> [<flags2>] [<region>] [<affixID>]
+ * | [<pflags>] <affixID> <prefcondnr> ]
*
* <byte> 1 byte Byte value of the sibling. Special cases:
* BY_NOFLAGS: End of word without flags and for all
* regions.
- * For PREFIXTREE <prefixID> and
+ * For PREFIXTREE <affixID> and
* <prefcondnr> follow.
* BY_FLAGS: End of word, <flags> follow.
- * For PREFIXTREE <pflags>, <prefixID>
+ * For PREFIXTREE <pflags>, <affixID>
* and <prefcondnr> follow.
* BY_FLAGS2: End of word, <flags> and <flags2>
* follow. Not used in PREFIXTREE.
@@ -210,7 +207,7 @@
* WF_RARE rare word
* WF_BANNED bad word
* WF_REGION <region> follows
- * WF_PFX <prefixID> follows
+ * WF_AFX <affixID> follows
*
* <flags2> 1 byte Only used when there are postponed prefixes.
* Bitmask of:
@@ -225,7 +222,7 @@
* omitted it's valid in all regions.
* Lowest bit is for region 1.
*
- * <prefixID> 1 byte ID of prefix that can be used with this word. For
+ * <affixID> 1 byte ID of affix that can be used with this word. In
* PREFIXTREE used for the required prefix ID.
*
* <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list
@@ -265,7 +262,7 @@ typedef long idx_T;
#define WF_ALLCAP 0x04 /* word must be all capitals */
#define WF_RARE 0x08 /* rare word */
#define WF_BANNED 0x10 /* bad word */
-#define WF_PFX 0x20 /* prefix ID follows */
+#define WF_AFX 0x20 /* affix ID follows */
#define WF_FIXCAP 0x40 /* keep-case word, allcap not allowed */
#define WF_KEEPCAP 0x80 /* keep-case word */
@@ -279,7 +276,7 @@ typedef long idx_T;
#define WFP_NC 0x02 /* prefix is not combining */
#define WFP_UP 0x04 /* to-upper prefix */
-/* flags for postponed prefixes. Must be above prefixID (one byte)
+/* Flags for postponed prefixes. Must be above affixID (one byte)
* and prefcondnr (two bytes). */
#define WF_RAREPFX (WFP_RARE << 24) /* in sl_pidxs: flag for rare
* postponed prefix */
@@ -343,8 +340,8 @@ typedef short salfirst_T;
* The "idxs" array stores the index of the child node corresponding to the
* byte in "byts".
* Exception: when the byte is zero, the word may end here and "idxs" holds
- * the flags, region mask and prefixID for the word. There may be several
- * zeros in sequence for alternative flag/region combinations.
+ * the flags, region mask and affixID for the word. There may be several
+ * zeros in sequence for alternative flag/region/affixID combinations.
*/
typedef struct slang_S slang_T;
struct slang_S
@@ -365,6 +362,9 @@ struct slang_S
char_u *sl_midword; /* MIDWORD string or NULL */
+ int sl_compminlen; /* COMPOUNDMIN */
+ char_u *sl_compflags; /* COMPOUNDFLAGS (NULL when no compounding) */
+
int sl_prefixcnt; /* number of items in "sl_prefprog" */
regprog_T **sl_prefprog; /* table with regprogs for prefixes */
@@ -419,7 +419,7 @@ typedef struct langp_S
#define SP_LOCAL 2
#define SP_BAD 3
-#define VIMSPELLMAGIC "VIMspell09" /* string at start of Vim spell file */
+#define VIMSPELLMAGIC "VIMspell10" /* string at start of Vim spell file */
#define VIMSPELLMAGICL 10
/* file used for "zG" and "zW" */
@@ -510,7 +510,7 @@ typedef struct matchinf_S
/* for when checking word after a prefix */
int mi_prefarridx; /* index in sl_pidxs with list of
- prefixID/condition */
+ affixID/condition */
int mi_prefcnt; /* number of entries at mi_prefarridx */
int mi_prefixlen; /* byte length of prefix */
#ifdef FEAT_MBYTE
@@ -520,6 +520,9 @@ typedef struct matchinf_S
# define mi_cprefixlen mi_prefixlen /* it's the same value */
#endif
+ /* for when checking a compound word */
+ int mi_compoff; /* start of following word offset */
+
/* others */
int mi_result; /* result so far: SP_BAD, SP_OK, etc. */
int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */
@@ -614,9 +617,11 @@ typedef struct trystate_S
#define NOPREFIX 0xff /* not using prefixes */
/* mode values for find_word */
-#define FIND_FOLDWORD 0 /* find word case-folded */
-#define FIND_KEEPWORD 1 /* find keep-case word */
-#define FIND_PREFIX 2 /* find word after prefix */
+#define FIND_FOLDWORD 0 /* find word case-folded */
+#define FIND_KEEPWORD 1 /* find keep-case word */
+#define FIND_PREFIX 2 /* find word after prefix */
+#define FIND_COMPOUND 3 /* find case-folded compound word */
+#define FIND_KEEPCOMPOUND 4 /* find keep-case compound word */
static slang_T *slang_alloc __ARGS((char_u *lang));
static void slang_free __ARGS((slang_T *lp));
@@ -928,14 +933,19 @@ find_word(mip, mode)
unsigned flags;
char_u *byts;
idx_T *idxs;
+ int word_ends;
- if (mode == FIND_KEEPWORD)
+ if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND)
{
/* Check for word with matching case in keep-case tree. */
ptr = mip->mi_word;
flen = 9999; /* no case folding, always enough bytes */
byts = slang->sl_kbyts;
idxs = slang->sl_kidxs;
+
+ if (mode == FIND_KEEPCOMPOUND)
+ /* Skip over the previously found word(s). */
+ wlen += mip->mi_compoff;
}
else
{
@@ -951,6 +961,13 @@ find_word(mip, mode)
wlen = mip->mi_prefixlen;
flen -= mip->mi_prefixlen;
}
+ else if (mode == FIND_COMPOUND)
+ {
+ /* Skip over the previously found word(s). */
+ wlen = mip->mi_compoff;
+ flen -= mip->mi_compoff;
+ }
+
}
if (byts == NULL)
@@ -1058,7 +1075,13 @@ find_word(mip, mode)
continue; /* not at first byte of character */
#endif
if (spell_iswordp(ptr + wlen, mip->mi_buf))
- continue; /* next char is a word character */
+ {
+ if (slang->sl_compflags == NULL)
+ continue; /* next char is a word character */
+ word_ends = FALSE;
+ }
+ else
+ word_ends = TRUE;
#ifdef FEAT_MBYTE
if (mode != FIND_KEEPWORD && has_mbyte)
@@ -1108,9 +1131,8 @@ find_word(mip, mode)
/* When mode is FIND_PREFIX the word must support the prefix:
* check the prefix ID and the condition. Do that for the list at
* mip->mi_prefarridx that find_prefix() filled. */
- if (mode == FIND_PREFIX)
+ else if (mode == FIND_PREFIX)
{
- /* The prefix ID is stored two bytes above the flags. */
c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx,
flags,
mip->mi_word + mip->mi_cprefixlen, slang,
@@ -1123,6 +1145,58 @@ find_word(mip, mode)
flags |= WF_RARE;
}
+ if (mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND
+ || !word_ends)
+ {
+ /* Makes you wonder why someone puts a compound flag on a word
+ * that's too short... Myspell compatibility requires this
+ * anyway. */
+ if (wlen < slang->sl_compminlen)
+ continue;
+
+ /* The word doesn't end or it comes after another: it must
+ * have a compound flag. */
+ /* TODO: check more flags */
+ if (*slang->sl_compflags != ((unsigned)flags >> 24))
+ continue;
+ }
+
+ if (!word_ends)
+ {
+ /* Check that a valid word follows. If there is one, it will
+ * set "mi_result", thus we are always finished here.
+ * Recursive! */
+
+ /* Find following word in case-folded tree. */
+ mip->mi_compoff = endlen[endidxcnt];
+#ifdef FEAT_MBYTE
+ if (has_mbyte && mode == FIND_KEEPWORD)
+ {
+ /* Compute byte length in case-folded word from "wlen":
+ * byte length in keep-case word. Length may change when
+ * folding case. This can be slow, take a shortcut when
+ * the case-folded word is equal to the keep-case word. */
+ p = mip->mi_fword;
+ if (STRNCMP(ptr, p, wlen) != 0)
+ {
+ for (s = ptr; s < ptr + wlen; mb_ptr_adv(s))
+ mb_ptr_adv(p);
+ mip->mi_compoff = p - mip->mi_fword;
+ }
+ }
+#endif
+ find_word(mip, FIND_COMPOUND);
+ if (mip->mi_result == SP_OK)
+ break;
+
+ /* Find following word in keep-case tree. */
+ mip->mi_compoff = wlen;
+ find_word(mip, FIND_KEEPCOMPOUND);
+ if (mip->mi_result == SP_OK)
+ break;
+ continue;
+ }
+
if (flags & WF_BANNED)
res = SP_BANNED;
else if (flags & WF_REGION)
@@ -1758,6 +1832,9 @@ slang_clear(lp)
vim_free(lp->sl_midword);
lp->sl_midword = NULL;
+ vim_free(lp->sl_compflags);
+ lp->sl_compflags = NULL;
+
#ifdef FEAT_MBYTE
{
int todo = lp->sl_map_hash.ht_used;
@@ -1870,6 +1947,7 @@ spell_load_file(fname, lang, old_lp, silent)
* <charflagslen> <charflags>
* <fcharslen> <fchars>
* <midwordlen> <midword>
+ * <compoundlen> <compoundtype> <compoundinfo>
* <prefcondcnt> <prefcond> ...
*/
for (i = 0; i < VIMSPELLMAGICL; ++i)
@@ -1929,6 +2007,41 @@ formerr:
if (cnt < 0)
goto endFAIL;
+ /* <compoundlen> <compoundtype> <compoundinfo> */
+ cnt = (getc(fd) << 8) + getc(fd); /* <compoundlen> */
+ if (cnt < 0)
+ goto endFAIL;
+ if (cnt > 0)
+ {
+ --cnt;
+ c = getc(fd); /* <compoundtype> */
+ if (c != 1)
+ {
+ /* Unknown kind of compound words, skip the info. */
+ while (cnt-- > 0)
+ getc(fd);
+ }
+ else if (cnt < 2)
+ goto formerr;
+ else
+ {
+ --cnt;
+ c = getc(fd); /* <comp1minlen> */
+ if (c < 1 || c > 50)
+ c = 3;
+ lp->sl_compminlen = c;
+
+ p = alloc(cnt + 1);
+ if (p == NULL)
+ goto endFAIL;
+ lp->sl_compflags = p;
+ while (cnt-- > 0)
+ *p++ = getc(fd); /* <comp1flags> */
+ *p = NUL;
+ }
+ }
+
+
/* <prefcondcnt> <prefcond> ... */
cnt = (getc(fd) << 8) + getc(fd); /* <prefcondcnt> */
if (cnt > 0)
@@ -1943,7 +2056,7 @@ formerr:
{
/* <prefcond> : <condlen> <condstr> */
n = getc(fd); /* <condlen> */
- if (n < 0)
+ if (n < 0 || n >= MAXWLEN)
goto formerr;
/* When <condlen> is zero we have an empty condition. Otherwise
* compile the regexp program used to check for the condition. */
@@ -2518,7 +2631,7 @@ read_tree(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr)
else
c = 0;
- c |= getc(fd); /* <prefixID> */
+ c |= getc(fd); /* <affixID> */
n = (getc(fd) << 8) + getc(fd); /* <prefcondnr> */
if (n >= maxprefcondnr)
@@ -2536,8 +2649,8 @@ read_tree(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr)
c = (getc(fd) << 8) + c; /* <flags2> */
if (c & WF_REGION)
c = (getc(fd) << 16) + c; /* <region> */
- if (c & WF_PFX)
- c = (getc(fd) << 24) + c; /* <prefixID> */
+ if (c & WF_AFX)
+ c = (getc(fd) << 24) + c; /* <affixID> */
}
idxs[idx] = c;
@@ -3110,9 +3223,12 @@ spell_reload_one(fname, added_word)
typedef struct afffile_S
{
char_u *af_enc; /* "SET", normalized, alloc'ed string or NULL */
+ int af_slash; /* character used in word for slash */
int af_rar; /* RAR ID for rare word */
int af_kep; /* KEP ID for keep-case word */
int af_bad; /* BAD ID for banned word */
+ char_u *af_compflags; /* COMPOUNDFLAG or COMPOUNDFLAGS */
+ int af_compminlen; /* COMPOUNDMIN */
int af_pfxpostpone; /* postpone prefixes without chop string */
hashtab_T af_pref; /* hashtable for prefixes, affheader_T */
hashtab_T af_suff; /* hashtable for suffixes, affheader_T */
@@ -3187,7 +3303,7 @@ struct wordnode_S
siblings, in following siblings it is
always one. */
char_u wn_byte; /* Byte for this node. NUL for word end */
- char_u wn_prefixID; /* when "wn_byte" is NUL: supported/required
+ char_u wn_affixID; /* when "wn_byte" is NUL: supported/required
prefix ID or 0 */
short_u wn_flags; /* when "wn_byte" is NUL: WF_ flags */
short wn_region; /* when "wn_byte" is NUL: region mask; for
@@ -3245,6 +3361,8 @@ typedef struct spellinfo_S
int si_rem_accents; /* soundsalike: remove accents */
garray_T si_map; /* MAP info concatenated */
char_u *si_midword; /* MIDWORD chars, alloc'ed string or NULL */
+ int si_compminlen; /* minimal length for compounding */
+ char_u *si_compflags; /* flags used for compounding */
garray_T si_prefcond; /* table with conditions for postponed
* prefixes, each stored as a string */
int si_newID; /* current value for ah_newID */
@@ -3258,6 +3376,7 @@ static int has_non_ascii __ARGS((char_u *s));
static void spell_free_aff __ARGS((afffile_T *aff));
static int spell_read_dic __ARGS((spellinfo_T *spin, char_u *fname, afffile_T *affile));
static char_u *get_pfxlist __ARGS((spellinfo_T *spin, afffile_T *affile, char_u *afflist));
+static char_u *get_compflags __ARGS((spellinfo_T *spin, char_u *afflist));
static int store_aff_word __ARGS((spellinfo_T *spin, char_u *word, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int comb, int flags, char_u *pfxlist));
static int spell_read_wordfile __ARGS((spellinfo_T *spin, char_u *fname));
static void *getroom __ARGS((spellinfo_T *spin, size_t len, int align));
@@ -3265,7 +3384,7 @@ static char_u *getroom_save __ARGS((spellinfo_T *spin, char_u *s));
static void free_blocks __ARGS((sblock_T *bl));
static wordnode_T *wordtree_alloc __ARGS((spellinfo_T *spin));
static int store_word __ARGS((spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist));
-static int tree_add_word __ARGS((spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int prefixID));
+static int tree_add_word __ARGS((spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID));
static wordnode_T *get_wordnode __ARGS((spellinfo_T *spin));
static void deref_wordnode __ARGS((spellinfo_T *spin, wordnode_T *node));
static void free_wordnode __ARGS((spellinfo_T *spin, wordnode_T *n));
@@ -3547,6 +3666,14 @@ spell_read_aff(spin, fname)
{
/* ignored, we look in the tree for what chars may appear */
}
+ else if (STRCMP(items[0], "SLASH") == 0 && itemcnt == 2
+ && aff->af_slash == 0)
+ {
+ aff->af_slash = items[1][0];
+ if (items[1][1] != NUL)
+ smsg((char_u *)_("Character used for SLASH must be ASCII; in %s line %d: %s"),
+ fname, lnum, items[1]);
+ }
else if (STRCMP(items[0], "RAR") == 0 && itemcnt == 2
&& aff->af_rar == 0)
{
@@ -3568,6 +3695,26 @@ spell_read_aff(spin, fname)
if (items[1][1] != NUL)
smsg((char_u *)_(e_affname), fname, lnum, items[1]);
}
+ else if (STRCMP(items[0], "COMPOUNDFLAG") == 0 && itemcnt == 2
+ && aff->af_compflags == 0)
+ {
+ aff->af_compflags = getroom_save(spin, items[1]);
+ if (items[1][1] != NUL)
+ smsg((char_u *)_(e_affname), fname, lnum, items[1]);
+ }
+ else if (STRCMP(items[0], "COMPOUNDFLAGS") == 0 && itemcnt == 2
+ && aff->af_compflags == 0)
+ {
+ aff->af_compflags = getroom_save(spin, items[1]);
+ }
+ else if (STRCMP(items[0], "COMPOUNDMIN") == 0 && itemcnt == 2
+ && aff->af_compminlen == 0)
+ {
+ aff->af_compminlen = atoi((char *)items[1]);
+ if (aff->af_compminlen == 0)
+ smsg((char_u *)_("Wrong COMPOUNDMIN value in %s line %d: %s"),
+ fname, lnum, items[1]);
+ }
else if (STRCMP(items[0], "PFXPOSTPONE") == 0 && itemcnt == 1)
{
aff->af_pfxpostpone = TRUE;
@@ -3688,7 +3835,10 @@ spell_read_aff(spin, fname)
else
sprintf((char *)buf, "%s$", items[4]);
aff_entry->ae_prog = vim_regcomp(buf,
- RE_MAGIC + RE_STRING);
+ RE_MAGIC + RE_STRING + RE_STRICT);
+ if (aff_entry->ae_prog == NULL)
+ smsg((char_u *)_("Broken condition in %s line %d: %s"),
+ fname, lnum, items[4]);
}
/* For postponed prefixes we need an entry in si_prefcond
@@ -3908,7 +4058,7 @@ spell_read_aff(spin, fname)
spin->si_sofoto = vim_strsave(items[1]);
}
else
- smsg((char_u *)_("Unrecognized item in %s line %d: %s"),
+ smsg((char_u *)_("Unrecognized or duplicate item in %s line %d: %s"),
fname, lnum, items[0]);
}
}
@@ -3952,6 +4102,28 @@ spell_read_aff(spin, fname)
vim_free(upp);
}
+ /* Use compound specifications of the .aff file for the spell info. */
+ if (aff->af_compminlen != 0)
+ {
+ if (spin->si_compminlen != 0
+ && spin->si_compminlen != aff->af_compminlen)
+ smsg((char_u *)_("COMPOUNDMIN value differs from what is used in another .aff file"));
+ else
+ spin->si_compminlen = aff->af_compminlen;
+ }
+
+ if (aff->af_compflags != NULL)
+ {
+ if (spin->si_compflags != NULL
+ && STRCMP(spin->si_compflags, aff->af_compflags) != 0)
+ smsg((char_u *)_("COMPOUNDFLAG(S) value differs from what is used in another .aff file"));
+ else
+ spin->si_compflags = aff->af_compflags;
+
+ if (aff->af_pfxpostpone)
+ smsg((char_u *)_("Cannot use both PFXPOSTPONE and COMPOUNDFLAG(S)"));
+ }
+
vim_free(pc);
fclose(fd);
return aff;
@@ -4072,8 +4244,9 @@ spell_read_dic(spin, fname, affile)
{
hashtab_T ht;
char_u line[MAXLINELEN];
+ char_u *p;
char_u *afflist;
- char_u *pfxlist;
+ char_u *store_afflist;
char_u *dw;
char_u *pc;
char_u *w;
@@ -4086,6 +4259,7 @@ spell_read_dic(spin, fname, affile)
int retval = OK;
char_u message[MAXLINELEN + MAXWLEN];
int flags;
+ int duplicate = 0;
/*
* Open the file.
@@ -4139,10 +4313,20 @@ spell_read_dic(spin, fname, affile)
continue; /* empty line */
line[l] = NUL;
- /* Find the optional affix names. */
- afflist = vim_strchr(line, '/');
- if (afflist != NULL)
- *afflist++ = NUL;
+ /* Find the optional affix names. Replace the SLASH character by a
+ * slash. */
+ afflist = NULL;
+ for (p = line; *p != NUL; mb_ptr_adv(p))
+ {
+ if (*p == affile->af_slash)
+ *p = '/';
+ else if (*p == '/')
+ {
+ *p = NUL;
+ afflist = p + 1;
+ break;
+ }
+ }
/* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
if (spin->si_ascii && has_non_ascii(line))
@@ -4197,13 +4381,20 @@ spell_read_dic(spin, fname, affile)
hash = hash_hash(dw);
hi = hash_lookup(&ht, dw, hash);
if (!HASHITEM_EMPTY(hi))
- smsg((char_u *)_("Duplicate word in %s line %d: %s"),
+ {
+ if (p_verbose > 0)
+ smsg((char_u *)_("Duplicate word in %s line %d: %s"),
+ fname, lnum, dw);
+ else if (duplicate == 0)
+ smsg((char_u *)_("First duplicate word in %s line %d: %s"),
fname, lnum, dw);
+ ++duplicate;
+ }
else
hash_add_item(&ht, hi, dw, hash);
flags = 0;
- pfxlist = NULL;
+ store_afflist = NULL;
if (afflist != NULL)
{
/* Check for affix name that stands for keep-case word and stands
@@ -4220,11 +4411,15 @@ spell_read_dic(spin, fname, affile)
if (affile->af_pfxpostpone)
/* Need to store the list of prefix IDs with the word. */
- pfxlist = get_pfxlist(spin, affile, afflist);
+ store_afflist = get_pfxlist(spin, affile, afflist);
+ else if (spin->si_compflags)
+ /* Need to store the list of affix IDs for compounding with
+ * the word. */
+ store_afflist = get_compflags(spin, afflist);
}
/* Add the word to the word tree(s). */
- if (store_word(spin, dw, flags, spin->si_region, pfxlist) == FAIL)
+ if (store_word(spin, dw, flags, spin->si_region, store_afflist) == FAIL)
retval = FAIL;
if (afflist != NULL)
@@ -4233,20 +4428,22 @@ spell_read_dic(spin, fname, affile)
* Additionally do matching prefixes that combine. */
if (store_aff_word(spin, dw, afflist, affile,
&affile->af_suff, &affile->af_pref,
- FALSE, flags, pfxlist) == FAIL)
+ FALSE, flags, store_afflist) == FAIL)
retval = FAIL;
/* Find all matching prefixes and add the resulting words. */
if (store_aff_word(spin, dw, afflist, affile,
&affile->af_pref, NULL,
- FALSE, flags, pfxlist) == FAIL)
+ FALSE, flags, store_afflist) == FAIL)
retval = FAIL;
}
}
+ if (duplicate > 0)
+ smsg((char_u *)_("%d duplicate word(s) in %s"), duplicate, fname);
if (spin->si_ascii && non_ascii > 0)
- smsg((char_u *)_("Ignored %d words with non-ASCII characters"),
- non_ascii);
+ smsg((char_u *)_("Ignored %d word(s) with non-ASCII characters in %s"),
+ non_ascii, fname);
hash_clear(&ht);
fclose(fd);
@@ -4303,6 +4500,49 @@ get_pfxlist(spin, affile, afflist)
}
/*
+ * Get the list of affix IDs from the affix list "afflist" that are used for
+ * compound words.
+ * Returns a string allocated with getroom(). NULL when there are no relevant
+ * affixes or when out of memory.
+ */
+ static char_u *
+get_compflags(spin, afflist)
+ spellinfo_T *spin;
+ char_u *afflist;
+{
+ char_u *p;
+ int cnt;
+ int round;
+ char_u *res = NULL;
+
+ /* round 1: count the number of affix IDs.
+ * round 2: move affix IDs to "res" */
+ for (round = 1; round <= 2; ++round)
+ {
+ cnt = 0;
+ for (p = afflist; *p != NUL; ++p)
+ {
+ if (*p != ',' && *p != '-'
+ && vim_strchr(spin->si_compflags, *p) != NULL)
+ {
+ /* This is a compount affix ID. */
+ if (round == 2)
+ res[cnt] = *p;
+ ++cnt;
+ }
+ }
+ if (round == 1 && cnt > 0)
+ res = getroom(spin, cnt + 1, FALSE);
+ if (res == NULL)
+ break;
+ }
+
+ if (res != NULL)
+ res[cnt] = NUL;
+ return res;
+}
+
+/*
* Apply affixes to a word and store the resulting words.
* "ht" is the hashtable with affentry_T that need to be applied, either
* prefixes or suffixes.
@@ -4335,6 +4575,7 @@ store_aff_word(spin, word, afflist, affile, ht, xht, comb, flags, pfxlist)
int use_flags;
char_u *use_pfxlist;
int c;
+ int wordlen = STRLEN(word);