summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--runtime/doc/insert.txt9
-rw-r--r--runtime/doc/map.txt10
-rw-r--r--runtime/spell/en.ascii.splbin566603 -> 566616 bytes
-rw-r--r--runtime/spell/en.utf-8.splbin569129 -> 569151 bytes
-rw-r--r--runtime/spell/hu/hu_HU.diff78
-rw-r--r--runtime/spell/main.aap9
-rw-r--r--src/spell.c2117
7 files changed, 1487 insertions, 736 deletions
diff --git a/runtime/doc/insert.txt b/runtime/doc/insert.txt
index 43b3d4f416..ae90eb954c 100644
--- a/runtime/doc/insert.txt
+++ b/runtime/doc/insert.txt
@@ -1,4 +1,4 @@
-*insert.txt* For Vim version 7.0aa. Last change: 2005 Aug 11
+*insert.txt* For Vim version 7.0aa. Last change: 2005 Aug 17
VIM REFERENCE MANUAL by Bram Moolenaar
@@ -899,8 +899,11 @@ CTRL-X CTRL-O Guess what kind of item is in front of the cursor and
Spelling suggestions *compl-spelling*
-The word in front of the cursor is located and correctly spelled words are
-suggested to replace it. The word doesn't actually have to be badly spelled.
+A word before or at the cursor is located and correctly spelled words are
+suggested to replace it. If there is a badly spelled word in the line, before
+or under the cursor, the cursor is moved to after it. Otherwise the word just
+before the cursor is used for suggestions, even though it isn't badly spelled.
+
NOTE: CTRL-S suspends display in many Unix terminals. Use 's' instead. Type
CTRL-Q to resume displaying.
diff --git a/runtime/doc/map.txt b/runtime/doc/map.txt
index a135a481ab..5fb03e9165 100644
--- a/runtime/doc/map.txt
+++ b/runtime/doc/map.txt
@@ -281,10 +281,7 @@ last defined. Example: >
n <C-W>* * <C-W><C-S>*
Last set from /home/abcd/.vimrc
-When the map was defined by hand there is no "Last set" message. When the map
-was defined while executing a function, user command or autocommand, the
-script in which it was defined is reported.
-{not available when compiled without the +eval feature}
+See |:verbose-cmd| for more information.
*map_backslash*
Note that only CTRL-V is mentioned here as a special character for mappings
@@ -877,10 +874,7 @@ last defined. Example: >
TOhtml 0 % :call Convert2HTML(<line1>, <line2>)
Last set from /usr/share/vim/vim-7.0/plugin/tohtml.vim
<
-When the command was defined by hand there is no "Last set" message. When the
-command was defined while executing a function, user command or autocommand,
-the script in which it was defined is reported.
-{not available when compiled without the +eval feature}
+See |:verbose-cmd| for more information.
*E174* *E182*
:com[mand][!] [{attr}...] {cmd} {rep}
diff --git a/runtime/spell/en.ascii.spl b/runtime/spell/en.ascii.spl
index 9d2b068c44..360e1ae9ce 100644
--- a/runtime/spell/en.ascii.spl
+++ b/runtime/spell/en.ascii.spl
Binary files differ
diff --git a/runtime/spell/en.utf-8.spl b/runtime/spell/en.utf-8.spl
index 8b2edc6923..9ba853cebf 100644
--- a/runtime/spell/en.utf-8.spl
+++ b/runtime/spell/en.utf-8.spl
Binary files differ
diff --git a/runtime/spell/hu/hu_HU.diff b/runtime/spell/hu/hu_HU.diff
index de22c794fe..2b9a6ddf0b 100644
--- a/runtime/spell/hu/hu_HU.diff
+++ b/runtime/spell/hu/hu_HU.diff
@@ -1,5 +1,5 @@
*** hu_HU.orig.aff Tue Aug 16 18:21:10 2005
---- hu_HU.aff Tue Aug 16 19:42:34 2005
+--- hu_HU.aff Fri Aug 19 21:28:45 2005
***************
*** 57,62 ****
@@ -16,8 +16,9 @@
! #VERSION Magyar 0.99.4.2
SET ISO8859-2
***************
-*** 65,77 ****
- COMPOUNDFLAG Y
+*** 64,77 ****
+ COMPOUNDMIN 2
+! COMPOUNDFLAG Y
! COMPOUNDWORD 2 y
! COMPOUNDSYLLABLE 6 aáeéiíoóöõuúüû
! SYLLABLENUM klmc
@@ -30,24 +31,85 @@
! WORDCHARS -.§%°0123456789
! HU_KOTOHANGZO Z
---- 65,80 ----
- COMPOUNDFLAG Y
+--- 64,116 ----
+ COMPOUNDMIN 2
! #COMPOUNDWORD 2 y
+! COMPOUNDMAX 2
+! # I don't understand what the "y" is for; if it's to disable compounding simply
+! # remove the compound flag from the word.
+!
! #COMPOUNDSYLLABLE 6 aáeéiíoóöõuúüû
+! COMPOUNDSYLMAX 6
+! SYLLABLE a/á/e/é/i/í/o/ó/ö/õ/u/ú/ü/û
+! # Strange that every vowel is counted as a syllable, that's how the hunspell
+! # code works.
+!
! #SYLLABLENUM klmc
+! # Don't understand what this is for
+!
+! #COMPOUNDFLAG Y
! #COMPOUNDFIRST v
! #COMPOUNDLAST x
+! COMPOUNDFLAGS Y+
+! COMPOUNDFLAGS vY*x
+! COMPOUNDFLAGS Y+x
+! COMPOUNDFLAGS vY+
+!
! #FORBIDDENWORD w
-! BAD w
+! # I don't understand what FORBIDDENWORD is needed for, using NEEDAFFIX
+! # (ONLYROOT) should be sufficient.
+!
! #ONLYROOT u
+! NEEDAFFIX u
+!
! #ACCENT áéíóöõúüû aeiooouuu
+! MAP 5
+! MAP aáä
+! MAP eé
+! MAP ií
+! MAP oóöõ
+! MAP uúüû
+!
! #CHECKNUM
+! # Vim always handles numbers in the same way.
+!
! #WORDCHARS -.§%°0123456789
+! FOL ±¢³µ¶¨¹º»¼¾¿±²³´µ¶·¸¹º»¼½¾¿àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþßÿ-§%°
+! LOW ±¢³µ¶¨¹º»¼¾¿±²³´µ¶·¸¹º»¼½¾¿àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþßÿ-§%°
+! UPP ¡¢£¥¦¨©ª«¬®¯±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßÿ-§%°
+! MIDWORD .
+!
! #HU_KOTOHANGZO Z
!
+! # There are soooo many affixes. Postpone the prefixes to keep the time needed
+! # for generating the .spl within reasonable limits.
! PFXPOSTPONE
***************
+*** 81,96 ****
+
+! REP 89
+! REP í i
+! REP i í
+! REP ó o
+! REP o ó
+! REP o õ
+! REP ú u
+! REP u ú
+! REP u û
+! REP û ü
+! REP ü û
+ REP j ly
+ REP ly j
+- REP a ä # Handel->Händel
+ REP S © # Skoda->©koda
+--- 120,124 ----
+
+! REP 78
+ REP j ly
+ REP ly j
+ REP S © # Skoda->©koda
+***************
*** 173,241 ****
- # character conversion table
@@ -118,11 +180,11 @@
- CHR 123 U3 Û
-
SFX z Y 6
---- 176,177 ----
+--- 201,202 ----
***************
*** 17678,17681 ****
PFX D 0 leg .
-
- 1
-
---- 17614 ----
+--- 17639 ----
diff --git a/runtime/spell/main.aap b/runtime/spell/main.aap
index 1e37c1cc3d..5aaaaa1022 100644
--- a/runtime/spell/main.aap
+++ b/runtime/spell/main.aap
@@ -4,19 +4,22 @@
# aap generate all the .spl files
# aap diff create all the diff files
-LANG = af bg ca cs da de el en eo fr fo gl he hr it nl ny pl sk yi
+LANG = af bg ca cs da de el en eo fr fo gl he hr it nl ny pl sk yi hu
+# "hu" is at the end, because it takes so much time.
+#
# TODO:
# Finnish doesn't work, the dictionary fi_FI.zip file contains hyphenation...
diff: $*LANG/diff
- :print done
+ :print Done.
@for l in string.split(_no.LANG):
:child $l/main.aap
# The existing .spl files need to be generated when the spell file format
# changes. Depending on the Vim executable does that, but results in doing it
-# much too often. Generate a dummy .spl file and check if it changed.
+# much too often. Generate a dummy .spl file and let the .spl depend on it, so
+# that they are outdated when it changes.
:child check/main.aap
*.spl: check/check.latin1.spl
diff --git a/src/spell.c b/src/spell.c
index a3fc24f720..30b08ae982 100644
--- a/src/spell.c
+++ b/src/spell.c
@@ -56,16 +56,6 @@
# define SPELL_PRINTTREE
#endif
-/* SPELL_COMPRESS_CNT is after how many allocated blocks we compress the tree
- * to limit the amount of memory used (esp. for Italian and Hungarian). The
- * amount of memory used for nodes then is SPELL_COMPRESS_CNT times
- * SBLOCKSIZE.
- * Then compress again after allocating SPELL_COMPRESS_INC more blocks or
- * adding SPELL_COMPRESS_ADDED words and running out of memory again. */
-#define SPELL_COMPRESS_CNT 30000
-#define SPELL_COMPRESS_INC 100
-#define SPELL_COMPRESS_ADDED 500000
-
/*
* Use this to adjust the score after finding suggestions, based on the
* suggested word sounding like the bad word. This is much faster than doing
@@ -78,94 +68,102 @@
/*
* Vim spell file format: <HEADER>
- * <SUGGEST>
+ * <SECTIONS>
* <LWORDTREE>
* <KWORDTREE>
* <PREFIXTREE>
*
- * <HEADER>: <fileID>
- * <regioncnt> <regionname> ...
- * <charflagslen> <charflags>
- * <fcharslen> <fchars>
- * <midwordlen> <midword>
- * <compoundlen> <compoundtype> <compoundinfo>
- * <prefcondcnt> <prefcond> ...
+ * <HEADER>: <fileID> <versionnr>
*
- * <fileID> 10 bytes "VIMspell10"
- * <regioncnt> 1 byte number of regions following (8 supported)
- * <regionname> 2 bytes Region name: ca, au, etc. Lower case.
- * First <regionname> is region 1.
+ * <fileID> 8 bytes "VIMspell"
+ * <versionnr> 1 byte VIMSPELLVERSION
*
- * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128).
- * <charflags> N bytes List of flags (first one is for character 128):
- * 0x01 word character CF_WORD
- * 0x02 upper-case character CF_UPPER
- * <fcharslen> 2 bytes Number of bytes in <fchars>.
- * <fchars> N bytes Folded characters, first one is for character 128.
*
- * <midwordlen> 2 bytes Number of bytes in <midword>.
- * <midword> N bytes Characters that are word characters only when used
- * in the middle of a word.
+ * Sections make it possible to add information to the .spl file without
+ * making it incompatible with previous versions. There are two kinds of
+ * sections:
+ * 1. Not essential for correct spell checking. E.g. for making suggestions.
+ * These are skipped when not supported.
+ * 2. Optional information, but essential for spell checking when present.
+ * E.g. conditions for affixes. When this section is present but not
+ * supported an error message is given.
*
- * <compoundlen> 2 bytes Number of bytes following for compound info (can
- * be used to skip it when it's not understood).
+ * <SECTIONS>: <section> ... <sectionend>
*
- * <compoundtype 1 byte 1: compound words using <comp1minlen> and
- * <comp1flags>
+ * <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
*
- * <comp1minlen> 1 byte minimal word length for compounding
+ * <sectionID> 1 byte number from 0 to 254 identifying the section
*
- * <comp1flags> N bytes flags used for compounding words
+ * <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct
+ * spell checking
*
+ * <sectionlen> 4 bytes length of section contents, MSB first
*
- * <prefcondcnt> 2 bytes Number of <prefcond> items following.
- *
- * <prefcond> : <condlen> <condstr>
+ * <sectionend> 1 byte SN_END
*
- * <condlen> 1 byte Length of <condstr>.
*
- * <condstr> N bytes Condition for the prefix.
+ * sectionID == SN_REGION: <regionname> ...
+ * <regionname> 2 bytes Up to 8 region names: ca, au, etc. Lower case.
+ * First <regionname> is region 1.
*
+ * sectionID == SN_CHARFLAGS: <charflagslen> <charflags>
+ * <folcharslen> <folchars>
+ * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128).
+ * <charflags> N bytes List of flags (first one is for character 128):
+ * 0x01 word character CF_WORD
+ * 0x02 upper-case character CF_UPPER
+ * <folcharslen> 2 bytes Number of bytes in <folchars>.
+ * <folchars> N bytes Folded characters, first one is for character 128.
*
- * <SUGGEST> : <repcount> <rep> ...
- * <salflags> <salcount> <sal> ...
- * <maplen> <mapstr>
+ * sectionID == SN_MIDWORD: <midword>
+ * <midword> N bytes Characters that are word characters only when used
+ * in the middle of a word.
*
- * <repcount> 2 bytes number of <rep> items, MSB first.
+ * sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ...
+ * <prefcondcnt> 2 bytes Number of <prefcond> items following.
+ * <prefcond> : <condlen> <condstr>
+ * <condlen> 1 byte Length of <condstr>.
+ * <condstr> N bytes Condition for the prefix.
*
+ * sectionID == SN_REP: <repcount> <rep> ...
+ * <repcount> 2 bytes number of <rep> items, MSB first.
* <rep> : <repfromlen> <repfrom> <reptolen> <repto>
+ * <repfromlen> 1 byte length of <repfrom>
+ * <repfrom> N bytes "from" part of replacement
+ * <reptolen> 1 byte length of <repto>
+ * <repto> N bytes "to" part of replacement
*
- * <repfromlen> 1 byte length of <repfrom>
- *
- * <repfrom> N bytes "from" part of replacement
- *
- * <reptolen> 1 byte length of <repto>
- *
- * <repto> N bytes "to" part of replacement
- *
- * <salflags> 1 byte flags for soundsalike conversion:
+ * sectionID == SN_SAL: <salflags> <salcount> <sal> ...
+ * <salflags> 1 byte flags for soundsalike conversion:
* SAL_F0LLOWUP
* SAL_COLLAPSE
* SAL_REM_ACCENTS
- * SAL_SOFO: SOFOFROM and SOFOTO used instead of SAL
- *
- * <salcount> 2 bytes number of <sal> items following
- *
+ * <salcount> 2 bytes number of <sal> items following
* <sal> : <salfromlen> <salfrom> <saltolen> <salto>
+ * <salfromlen> 1 byte length of <salfrom>
+ * <salfrom> N bytes "from" part of soundsalike
+ * <saltolen> 1 byte length of <salto>
+ * <salto> N bytes "to" part of soundsalike
*
- * <salfromlen> 1-2 bytes length of <salfrom> (2 bytes for SAL_SOFO)
- *
- * <salfrom> N bytes "from" part of soundsalike
- *
- * <saltolen> 1-2 bytes length of <salto> (2 bytes for SAL_SOFO)
- *
- * <salto> N bytes "to" part of soundsalike
+ * sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
+ * <sofofromlen> 2 bytes length of <sofofrom>
+ * <sofofrom> N bytes "from" part of soundfold
+ * <sofotolen> 2 bytes length of <sofoto>
+ * <sofoto> N bytes "to" part of soundfold
*
- * <maplen> 2 bytes length of <mapstr>, MSB first
- *
- * <mapstr> N bytes String with sequences of similar characters,
+ * sectionID == SN_MAP: <mapstr>
+ * <mapstr> N bytes String with sequences of similar characters,
* separated by slashes.
*
+ * sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compflags>
+ * <compmax> 1 byte Maximum nr of words in compound word.
+ * <compminlen> 1 byte Minimal word length for compounding.
+ * <compsylmax> 1 byte Maximum nr of syllables in compound word.
+ * <compflags> N bytes Flags from COMPOUNDFLAGS items, separated by
+ * slashes.
+ *
+ * sectionID == SN_SYLLABLE: <syllable>
+ * <syllable> N bytes String from SYLLABLE item.
*
* <LWORDTREE>: <wordtree>
*
@@ -333,6 +331,12 @@ typedef int salfirst_T;
typedef short salfirst_T;
#endif
+/* Values for SP_*ERROR are negative, positive values are used by
+ * read_cnt_string(). */
+#define SP_TRUNCERROR -1 /* spell file truncated error */
+#define SP_FORMERROR -2 /* format error in spell file */
+#define SP_ERROR -3 /* other error while reading spell file */
+
/*
* Structure used to store words and other info for one language, loaded from
* a .spl file.
@@ -367,8 +371,14 @@ struct slang_S
char_u *sl_midword; /* MIDWORD string or NULL */
- int sl_compminlen; /* COMPOUNDMIN */
- char_u *sl_compflags; /* COMPOUNDFLAGS (NULL when no compounding) */
+ int sl_compmax; /* COMPOUNDMAX (default: MAXWLEN) */
+ int sl_compminlen; /* COMPOUNDMIN (default: MAXWLEN) */
+ int sl_compsylmax; /* COMPOUNDSYLMAX (default: MAXWLEN) */
+ regprog_T *sl_compprog; /* COMPOUNDFLAGS turned into a regexp progrm
+ * (NULL when no compounding) */
+ char_u *sl_compstartflags; /* flags for first compound word */
+ char_u *sl_syllable; /* SYLLABLE repeatable chars or NULL */
+ garray_T sl_syl_items; /* syllable items */
int sl_prefixcnt; /* number of items in "sl_prefprog" */
regprog_T **sl_prefprog; /* table with regprogs for prefixes */
@@ -402,7 +412,6 @@ static slang_T *first_lang = NULL;
#define SAL_F0LLOWUP 1
#define SAL_COLLAPSE 2
#define SAL_REM_ACCENTS 4
-#define SAL_SOFO 8 /* SOFOFROM and SOFOTO instead of SAL */
/*
* Structure used in "b_langp", filled from 'spelllang'.
@@ -417,6 +426,25 @@ typedef struct langp_S
#define REGION_ALL 0xff /* word valid in all regions */
+#define VIMSPELLMAGIC "VIMspell" /* string at start of Vim spell file */
+#define VIMSPELLMAGICL 8
+#define VIMSPELLVERSION 50
+
+/* Section IDs. Only renumber them when VIMSPELLVERSION changes! */
+#define SN_REGION 0 /* <regionname> section */
+#define SN_CHARFLAGS 1 /* charflags section */
+#define SN_MIDWORD 2 /* <midword> section */
+#define SN_PREFCOND 3 /* <prefcond> section */
+#define SN_REP 4 /* REP items section */
+#define SN_SAL 5 /* SAL items section */
+#define SN_SOFO 6 /* soundfolding section */
+#define SN_MAP 7 /* MAP items section */
+#define SN_COMPOUND 8 /* compound words section */
+#define SN_SYLLABLE 9 /* syllable section */
+#define SN_END 255 /* end of sections */
+
+#define SNF_REQUIRED 1 /* <sectionflags>: required section */
+
/* Result values. Lower number is accepted over higher one. */
#define SP_BANNED -1
#define SP_OK 0
@@ -424,9 +452,6 @@ typedef struct langp_S
#define SP_LOCAL 2
#define SP_BAD 3
-#define VIMSPELLMAGIC "VIMspell10" /* string at start of Vim spell file */
-#define VIMSPELLMAGICL 10
-
/* file used for "zG" and "zW" */
static char_u *int_wordlist = NULL;
@@ -472,7 +497,7 @@ typedef struct suggest_S
/* score for various changes */
#define SCORE_SPLIT 149 /* split bad word */
#define SCORE_ICASE 52 /* slightly different case */
-#define SCORE_REGION 70 /* word is for different region */
+#define SCORE_REGION 200 /* word is for different region */
#define SCORE_RARE 180 /* rare word */
#define SCORE_SWAP 90 /* swap two characters */
#define SCORE_SWAP3 110 /* swap two characters in three */
@@ -527,6 +552,8 @@ typedef struct matchinf_S
/* for when checking a compound word */
int mi_compoff; /* start of following word offset */
+ char_u mi_compflags[MAXWLEN]; /* flags for compound words used */
+ int mi_complen; /* nr of compound words used */
/* others */
int mi_result; /* result so far: SP_BAD, SP_OK, etc. */
@@ -559,7 +586,7 @@ static int spell_iswordp_nmw __ARGS((char_u *p));
#ifdef FEAT_MBYTE
static int spell_iswordp_w __ARGS((int *p, buf_T *buf));
#endif
-static void write_spell_prefcond __ARGS((FILE *fd, garray_T *gap));
+static int write_spell_prefcond __ARGS((FILE *fd, garray_T *gap));
/*
* For finding suggestions: At each node in the tree these states are tried:
@@ -608,8 +635,9 @@ typedef struct trystate_S
char_u ts_isdiff; /* DIFF_ values */
char_u ts_fcharstart; /* index in fword where badword char started */
#endif
- char_u ts_save_prewordlen; /* saved "prewordlen" */
- char_u ts_save_splitoff; /* su_splitoff saved here */
+ char_u ts_prewordlen; /* length of word in "preword[]" */
+ char_u ts_splitoff; /* index in "tword" after last split */
+ char_u ts_complen; /* nr of compound words used */
char_u ts_save_badflags; /* su_badflags saved here */
} trystate_T;
@@ -634,7 +662,7 @@ static slang_T *slang_alloc __ARGS((char_u *lang));
static void slang_free __ARGS((slang_T *lp));
static void slang_clear __ARGS((slang_T *lp));
static void find_word __ARGS((matchinf_T *mip, int mode));
-static int can_compound __ARGS((slang_T *slang, int flags));
+static int can_compound __ARGS((slang_T *slang, char_u *word, char_u *flags));
static int valid_word_prefix __ARGS((int totprefcnt, int arridx, int flags, char_u *word, slang_T *slang, int cond_req));
static void find_prefix __ARGS((matchinf_T *mip));
static int fold_more __ARGS((matchinf_T *mip));
@@ -646,6 +674,16 @@ static void int_wordlist_spl __ARGS((char_u *fname));
static void spell_load_cb __ARGS((char_u *fname, void *cookie));
static slang_T *spell_load_file __ARGS((char_u *fname, char_u *lang, slang_T *old_lp, int silent));
static char_u *read_cnt_string __ARGS((FILE *fd, int cnt_bytes, int *lenp));
+static char_u *read_string __ARGS((FILE *fd, int cnt));
+static int read_region_section __ARGS((FILE *fd, slang_T *slang, int len));
+static int read_charflags_section __ARGS((FILE *fd));
+static int read_prefcond_section __ARGS((FILE *fd, slang_T *lp));
+static int read_rep_section __ARGS((FILE *fd, slang_T *slang));
+static int read_sal_section __ARGS((FILE *fd, slang_T *slang));
+static int read_sofo_section __ARGS((FILE *fd, slang_T *slang));
+static int read_compound __ARGS((FILE *fd, slang_T *slang, int len));
+static int init_syl_tab __ARGS((slang_T *slang));
+static int count_syllables __ARGS((slang_T *slang, char_u *word));
static int set_sofo __ARGS((slang_T *lp, char_u *from, char_u *to));
static void set_sal_first __ARGS((slang_T *lp));
#ifdef FEAT_MBYTE
@@ -658,9 +696,8 @@ static int find_region __ARGS((char_u *rp, char_u *region));
static int captype __ARGS((char_u *word, char_u *end));
static int badword_captype __ARGS((char_u *word, char_u *end));
static void spell_reload_one __ARGS((char_u *fname, int added_word));
-static int set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp));
+static void set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp));
static int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp));
-static void write_spell_chartab __ARGS((FILE *fd));
static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen));
static int check_need_cap __ARGS((linenr_T lnum, colnr_T col));
static void spell_find_suggest __ARGS((char_u *badptr, suginfo_T *su, int maxcount, int banbadword, int need_cap));
@@ -777,11 +814,13 @@ spell_check(wp, ptr, attrp, capcol)
be passed to functions quickly. */
int nrlen = 0; /* found a number first */
int c;
+ int wrongcaplen = 0;
/* A word never starts at a space or a control character. Return quickly
* then, skipping over the character. */
if (*ptr <= ' ')
return 1;
+ vim_memset(&mi, 0, sizeof(matchinf_T));
/* A number is always OK. Also skip hexadecimal numbers 0xFF99 and
* 0X99FF. But when a word character follows do check spelling to find
@@ -818,10 +857,7 @@ spell_check(wp, ptr, attrp, capcol)
/* Check word starting with capital letter. */
c = PTR2CHAR(ptr);
if (!SPELL_ISUPPER(c))
- {
- *attrp = highlight_attr[HLF_SPC];
- return (int)(mi.mi_fend - ptr);
- }
+ wrongcaplen = (int)(mi.mi_fend - ptr);
}
}
if (capcol != NULL)
@@ -832,12 +868,10 @@ spell_check(wp, ptr, attrp, capcol)
mi.mi_end = mi.mi_fend;
/* Check caps type later. */
- mi.mi_capflags = 0;
- mi.mi_cend = NULL;
mi.mi_buf = wp->w_buffer;
- /* Include one non-word character so that we can check for the
- * word end. */
+ /* case-fold the word with one non-word character, so that we can check
+ * for the word end. */
if (*mi.mi_fend != NUL)
mb_ptr_adv(mi.mi_fend);
@@ -897,6 +931,10 @@ spell_check(wp, ptr, attrp, capcol)
#endif
return 1;
}
+ else if (mi.mi_end == ptr)
+ /* Always include at least one character. Required for when there
+ * is a mixup in "midword". */
+ mb_ptr_adv(mi.mi_end);
if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
*attrp = highlight_attr[HLF_SPB];
@@ -906,6 +944,13 @@ spell_check(wp, ptr, attrp, capcol)
*attrp = highlight_attr[HLF_SPL];
}
+ if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE))
+ {
+ /* Report SpellCap only when the word isn't badly spelled. */
+ *attrp = highlight_attr[HLF_SPC];
+ return wrongcaplen;
+ }
+
return (int)(mi.mi_end - ptr);
}
@@ -1085,7 +1130,7 @@ find_word(mip, mode)
#endif
if (spell_iswordp(ptr + wlen, mip->mi_buf))
{
- if (slang->sl_compflags == NULL)
+ if (slang->sl_compprog == NULL)
continue; /* next char is a word character */
word_ends = FALSE;
}
@@ -1157,16 +1202,45 @@ find_word(mip, mode)
if (mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND
|| !word_ends)
{
- /* Makes you wonder why someone puts a compound flag on a word
+ /* If there is no flag or the word is shorter than
+ * COMPOUNDMIN reject it quickly.
+ * Makes you wonder why someone puts a compound flag on a word
* that's too short... Myspell compatibility requires this
* anyway. */
- if (wlen < slang->sl_compminlen)
+ if (((unsigned)flags >> 24) == 0 || wlen < slang->sl_compminlen)
+ continue;
+
+ /* Limit the number of compound words to COMPOUNDMAX. */
+ if (!word_ends && mip->mi_complen + 2 > slang->sl_compmax)
continue;
- /* The word doesn't end or it comes after another: it must
- * have a compound flag. */
- if (!can_compound(slang, flags))
+ /* At start of word quickly check if compounding is possible
+ * with this flag. */
+ if (mip->mi_complen == 0
+ && vim_strchr(slang->sl_compstartflags,
+ ((unsigned)flags >> 24)) == NULL)
continue;
+
+ /* If the word ends the sequence of compound flags of the
+ * words must match with one of the COMPOUNDFLAGS items and
+ * the number of syllables must not be too large. */
+ mip->mi_compflags[mip->mi_complen] = ((unsigned)flags >> 24);
+ mip->mi_compflags[mip->mi_complen + 1] = NUL;
+ if (word_ends)
+ {
+ char_u fword[MAXWLEN];
+
+ if (slang->sl_compsylmax < MAXWLEN)
+ {
+ /* "fword" is only needed for checking syllables. */
+ if (ptr == mip->mi_word)
+ (void)spell_casefold(ptr, wlen, fword, MAXWLEN);
+ else
+ vim_strncpy(fword, ptr, endlen[endidxcnt]);
+ }
+ if (!can_compound(slang, fword, mip->mi_compflags))
+ continue;
+ }
}
if (!word_ends)
@@ -1193,13 +1267,17 @@ find_word(mip, mode)
}
}
#endif
+ ++mip->mi_complen;
find_word(mip, FIND_COMPOUND);
+ --mip->mi_complen;
if (mip->mi_result == SP_OK)
break;
/* Find following word in keep-case tree. */
mip->mi_compoff = wlen;
+ ++mip->mi_complen;
find_word(mip, FIND_KEEPCOMPOUND);
+ --mip->mi_complen;
if (mip->mi_result == SP_OK)
break;
continue;
@@ -1239,16 +1317,29 @@ find_word(mip, mode)
}
/*
- * Return TRUE if "flags" has a valid compound flag.
- * TODO: check flags in a more advanced way.
+ * Return TRUE if "flags" is a valid sequence of compound flags and
+ * "word[len]" does not have too many syllables.
*/
static int
-can_compound(slang, flags)
+can_compound(slang, word, flags)
slang_T *slang;
- int flags;
+ char_u *word;
+ char_u *flags;
{
- return slang->sl_compflags != NULL
- && *slang->sl_compflags == ((unsigned)flags >> 24);
+ regmatch_T regmatch;
+
+ if (slang->sl_compprog == NULL)
+ return FALSE;
+ regmatch.regprog = slang->sl_compprog;
+ regmatch.rm_ic = FALSE;
+ if (!vim_regexec(&regmatch, flags, 0))
+ return FALSE;
+
+ /* Count the number of syllables. This may be slow, do it last. */
+ if (slang->sl_compsylmax < MAXWLEN
+ && count_syllables(slang, word) > slang->sl_compsylmax)
+ return FALSE;
+ return TRUE;
}
/*
@@ -1480,6 +1571,8 @@ no_spell_checking()
/*
* Move to next spell error.
* "curline" is TRUE for "z?": find word under/after cursor in the same line.
+ * For Insert mode completion "dir" is BACKWARD and "curline" is TRUE: move
+ * to after badly spelled word before the cursor.
* Return OK if found, FAIL otherwise.
*/
int
@@ -1599,6 +1692,10 @@ spell_move_to(dir, allwords, curline)
vim_free(buf);
return OK;
}
+ else if (curline)
+ /* Insert mode completion: put cursor after
+ * the bad word. */
+ found_pos.col += len;
}
}
}
@@ -1609,19 +1706,20 @@ spell_move_to(dir, allwords, curline)
capcol -= len;
}
+ if (dir == BACKWARD && found_pos.lnum != 0)
+ {
+ /* Use the last match in the line. */
+ curwin->w_cursor = found_pos;
+ vim_free(buf);
+ return OK;
+ }
+
if (curline)
break; /* only check cursor line */
/* Advance to next line. */
if (dir == BACKWARD)
{
- if (found_pos.lnum != 0)
- {
- /* Use the last match in the line. */
- curwin->w_cursor = found_pos;
- vim_free(buf);
- return OK;
- }
if (lnum == 1)
break;
--lnum;
@@ -1715,8 +1813,8 @@ spell_load_lang(lang)
}
if (r == FAIL)
- smsg((char_u *)_("Warning: Cannot find word list \"%s\""),
- fname_enc + 6);
+ smsg((char_u *)_("Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\""),
+ lang, spell_enc(), lang);
else if (*langcp != NUL)
{
/* Load all the additions. */
@@ -1767,6 +1865,9 @@ slang_alloc(lang)
{
lp->sl_name = vim_strsave(lang);
ga_init2(&lp->sl_rep, sizeof(fromto_T), 10);
+ lp->sl_compmax = MAXWLEN;
+ lp->sl_compminlen = MAXWLEN;
+ lp->sl_compsylmax = MAXWLEN;
}
return lp;
}
@@ -1853,8 +1954,14 @@ slang_clear(lp)
vim_free(lp->sl_midword);
lp->sl_midword = NULL;
- vim_free(lp->sl_compflags);
- lp->sl_compflags = NULL;
+ vim_free(lp->sl_compprog);
+ vim_free(lp->sl_compstartflags);
+ lp->sl_compprog = NULL;
+ lp->sl_compstartflags = NULL;
+
+ vim_free(lp->sl_syllable);
+ lp->sl_syllable = NULL;
+ ga_clear(&lp->sl_syl_items);
#ifdef FEAT_MBYTE
{
@@ -1870,6 +1977,11 @@ slang_clear(lp)
}
hash_clear(&lp->sl_map_hash);
#endif
+
+ lp->sl_compmax = MAXWLEN;
+ lp->sl_compminlen = MAXWLEN;
+ lp->sl_compsylmax = MAXWLEN;
+ lp->sl_regions[0] = NUL;
}
/*
@@ -1902,7 +2014,7 @@ spell_load_file(fname, lang, old_lp, silent)
int silent; /* no error if file doesn't exist */
{
FILE *fd;
- char_u buf[MAXWLEN + 1];
+ char_u buf[VIMSPELLMAGICL];
char_u *p;
char_u *bp;
idx_T *ip;
@@ -1912,15 +2024,10 @@ spell_load_file(fname, lang, old_lp, silent)
int round;
char_u *save_sourcing_name = sourcing_name;
linenr_T save_sourcing_lnum = sourcing_lnum;
- int cnt, ccnt;
- char_u *fol;
slang_T *lp = NULL;
- garray_T *gap;
- fromto_T *ftp;
- salitem_T *smp;
- short *first;
idx_T idx;
int c = 0;
+ int res;
fd = mch_fopen((char *)fname, "r");
if (fd == NULL)
@@ -1964,332 +2071,122 @@ spell_load_file(fname, lang, old_lp, silent)
sourcing_lnum = 0;
/* <HEADER>: <fileID>
- * <regioncnt> <regionname> ...
- * <charflagslen> <charflags>
- * <fcharslen> <fchars>
- * <midwordlen> <midword>
- * <compoundlen> <compoundtype> <compoundinfo>
- * <prefcondcnt> <prefcond> ...
*/
for (i = 0; i < VIMSPELLMAGICL; ++i)
buf[i] = getc(fd); /* <fileID> */
if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
{
- EMSG(_("E757: Wrong file ID in spell file"));
+ EMSG(_("E757: This does not look like a spell file"));
goto endFAIL;
}
-
- cnt = getc(fd); /* <regioncnt> */
- if (cnt < 0)
+ c = getc(fd); /* <versionnr> */
+ if (c < VIMSPELLVERSION)
{
-truncerr:
- EMSG(_(e_spell_trunc));
+ EMSG(_("E771: Old spell file, needs to be updated"));
goto endFAIL;
}
- if (cnt > 8)
+ else if (c > VIMSPELLVERSION)
{
-formerr:
- EMSG(_(e_format));
+ EMSG(_("E772: Spell file is for newer version of Vim"));
goto endFAIL;
}
- for (i = 0; i < cnt; ++i)
- {
- lp->sl_regions[i * 2] = getc(fd); /* <regionname> */
- lp->sl_regions[i * 2 + 1] = getc(fd);
- }
- lp->sl_regions[cnt * 2] = NUL;
- /* <charflagslen> <charflags> */
- p = read_cnt_string(fd, 1, &cnt);
- if (cnt < 0)
- goto endFAIL;
- /* <fcharslen> <fchars> */
- fol = read_cnt_string(fd, 2, &ccnt);
- if (ccnt < 0)
+ /*
+ * <SECTIONS>: <section> ... <sectionend>
+ * <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
+ */
+ for (;;)
{
- vim_free(p);
- goto endFAIL;
- }
-
- /* Set the word-char flags and fill SPELL_ISUPPER() table. */
- if (p != NULL && fol != NULL)
- i = set_spell_charflags(p, cnt, fol);
-
- vim_free(p);
- vim_free(fol);
-
- /* When <charflagslen> is zero then <fcharlen> must also be zero. */
- if ((p == NULL) != (fol == NULL))
- goto formerr;
-
- /* <midwordlen> <midword> */
- lp->sl_midword = read_cnt_string(fd, 2, &cnt);
- if (cnt < 0)
- goto endFAIL;
+ n = getc(fd); /* <sectionID> or <sectionend> */
+ if (n == SN_END)
+ break;
+ c = getc(fd); /* <sectionflags> */
+ len = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + getc(fd);
+ /* <sectionlen> */
+ if (len < 0)
+ goto truncerr;
- /* <compoundlen> <compoundtype> <compoundinfo> */
- cnt = (getc(fd) << 8) + getc(fd); /* <compoundlen> */
- if (cnt < 0)
- goto endFAIL;
- if (cnt > 0)
- {
- --cnt;
- c = getc(fd); /* <compoundtype> */
- if (c != 1)
+ res = 0;
+ switch (n)
{
- /* Unknown kind of compound words, skip the info. */
- while (cnt-- > 0)
- getc(fd);
- }
- else if (cnt < 2)
- goto formerr;
- else
- {
- --cnt;
- c = getc(fd); /* <comp1minlen> */
- if (c < 1 || c > 50)
- c = 3;
- lp->sl_compminlen = c;
+ case SN_REGION:
+ res = read_region_section(fd, lp, len);
+ break;
- p = alloc(cnt + 1);
- if (p == NULL)
- goto endFAIL;
- lp->sl_compflags = p;
- while (cnt-- > 0)
- *p++ = getc(fd); /* <comp1flags> */
- *p = NUL;
- }
- }
+ case SN_CHARFLAGS:
+ res = read_charflags_section(fd);
+ break;
+ case SN_MIDWORD:
+ lp->sl_midword = read_string(fd, len); /* <midword> */
+ if (lp->sl_midword == NULL)
+ goto endFAIL;
+ break;
- /* <prefcondcnt> <prefcond> ... */
- cnt = (getc(fd) << 8) + getc(fd); /* <prefcondcnt> */
- if (cnt > 0)
- {
- lp->sl_prefprog = (regprog_T **)alloc_clear(
- (unsigned)sizeof(regprog_T *) * cnt);
- if (lp->sl_prefprog == NULL)
- goto endFAIL;
- lp->sl_prefixcnt = cnt;
+ case SN_PREFCOND:
+ res = read_prefcond_section(fd, lp);
+ break;
- for (i = 0; i < cnt; ++i)
- {
- /* <prefcond> : <condlen> <condstr> */
- n = getc(fd); /* <condlen> */
- if (n < 0 || n >= MAXWLEN)
- goto formerr;
- /* When <condlen