7 files changed, 1487 insertions, 736 deletions
diff --git a/runtime/doc/insert.txt b/runtime/doc/insert.txt
index 43b3d4f416..ae90eb954c 100644
--- a/runtime/doc/insert.txt
+++ b/runtime/doc/insert.txt
@@ -1,4 +1,4 @@
-*insert.txt*    For Vim version 7.0aa.  Last change: 2005 Aug 11
+*insert.txt*    For Vim version 7.0aa.  Last change: 2005 Aug 17
 
 
 		  VIM REFERENCE MANUAL    by Bram Moolenaar
@@ -899,8 +899,11 @@ CTRL-X CTRL-O		Guess what kind of item is in front of the cursor and
 
 Spelling suggestions					*compl-spelling*
 
-The word in front of the cursor is located and correctly spelled words are
-suggested to replace it.  The word doesn't actually have to be badly spelled.
+A word before or at the cursor is located and correctly spelled words are
+suggested to replace it.  If there is a badly spelled word in the line, before
+or under the cursor, the cursor is moved to after it.  Otherwise the word just
+before the cursor is used for suggestions, even though it isn't badly spelled.
+
 NOTE: CTRL-S suspends display in many Unix terminals.  Use 's' instead.  Type
 CTRL-Q to resume displaying.
 
diff --git a/runtime/doc/map.txt b/runtime/doc/map.txt
index a135a481ab..5fb03e9165 100644
--- a/runtime/doc/map.txt
+++ b/runtime/doc/map.txt
@@ -281,10 +281,7 @@ last defined.  Example: >
 	n  <C-W>*      * <C-W><C-S>*
 	        Last set from /home/abcd/.vimrc
 
-When the map was defined by hand there is no "Last set" message.  When the map
-was defined while executing a function, user command or autocommand, the
-script in which it was defined is reported.
-{not available when compiled without the +eval feature}
+See |:verbose-cmd| for more information.
 
 							*map_backslash*
 Note that only CTRL-V is mentioned here as a special character for mappings
@@ -877,10 +874,7 @@ last defined. Example: >
 	TOhtml      0    %               :call Convert2HTML(<line1>, <line2>)
 	    Last set from /usr/share/vim/vim-7.0/plugin/tohtml.vim
 <
-When the command was defined by hand there is no "Last set" message.  When the
-command was defined while executing a function, user command or autocommand,
-the script in which it was defined is reported.
-{not available when compiled without the +eval feature}
+See |:verbose-cmd| for more information.
 
 							*E174* *E182*
 :com[mand][!] [{attr}...] {cmd} {rep}
diff --git a/runtime/spell/en.ascii.spl b/runtime/spell/en.ascii.spl
index 9d2b068c44..360e1ae9ce 100644
--- a/runtime/spell/en.ascii.spl
+++ b/runtime/spell/en.ascii.spl
diff --git a/runtime/spell/en.utf-8.spl b/runtime/spell/en.utf-8.spl
index 8b2edc6923..9ba853cebf 100644
--- a/runtime/spell/en.utf-8.spl
+++ b/runtime/spell/en.utf-8.spl
diff --git a/runtime/spell/hu/hu_HU.diff b/runtime/spell/hu/hu_HU.diff
index de22c794fe..2b9a6ddf0b 100644
--- a/runtime/spell/hu/hu_HU.diff
+++ b/runtime/spell/hu/hu_HU.diff
@@ -1,5 +1,5 @@
 *** hu_HU.orig.aff	Tue Aug 16 18:21:10 2005
---- hu_HU.aff	Tue Aug 16 19:42:34 2005
+--- hu_HU.aff	Fri Aug 19 21:28:45 2005
 ***************
 *** 57,62 ****
   
@@ -16,8 +16,9 @@
 ! #VERSION Magyar 0.99.4.2
   SET ISO8859-2
 ***************
-*** 65,77 ****
-  COMPOUNDFLAG Y
+*** 64,77 ****
+  COMPOUNDMIN 2
+! COMPOUNDFLAG Y
 ! COMPOUNDWORD 2 y
 ! COMPOUNDSYLLABLE 6 aáeéiíoóöõuúüû
 ! SYLLABLENUM klmc
@@ -30,24 +31,85 @@
 ! WORDCHARS -.§%°0123456789
 ! HU_KOTOHANGZO Z
   
---- 65,80 ----
-  COMPOUNDFLAG Y
+--- 64,116 ----
+  COMPOUNDMIN 2
 ! #COMPOUNDWORD 2 y
+! COMPOUNDMAX 2
+! # I don't understand what the "y" is for; if it's to disable compounding simply
+! # remove the compound flag from the word.
+! 
 ! #COMPOUNDSYLLABLE 6 aáeéiíoóöõuúüû
+! COMPOUNDSYLMAX 6
+! SYLLABLE a/á/e/é/i/í/o/ó/ö/õ/u/ú/ü/û
+! # Strange that every vowel is counted as a syllable, that's how the hunspell
+! # code works.
+! 
 ! #SYLLABLENUM klmc
+! # Don't understand what this is for
+! 
+! #COMPOUNDFLAG Y
 ! #COMPOUNDFIRST v
 ! #COMPOUNDLAST x
+! COMPOUNDFLAGS Y+
+! COMPOUNDFLAGS vY*x
+! COMPOUNDFLAGS Y+x
+! COMPOUNDFLAGS vY+
+! 
 ! #FORBIDDENWORD w
-! BAD w
+! #  I don't understand what FORBIDDENWORD is needed for, using NEEDAFFIX
+! #  (ONLYROOT) should be sufficient.
+! 
 ! #ONLYROOT u
+! NEEDAFFIX u
+! 
 ! #ACCENT áéíóöõúüû aeiooouuu
+! MAP 5
+! MAP aáä
+! MAP eé
+! MAP ií
+! MAP oóöõ
+! MAP uúüû
+! 
 ! #CHECKNUM
+! # Vim always handles numbers in the same way.
+! 
 ! #WORDCHARS -.§%°0123456789
+! FOL ±¢³µ¶¨¹º»¼¾¿±²³´µ¶·¸¹º»¼½¾¿àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþßÿ-§%°
+! LOW ±¢³µ¶¨¹º»¼¾¿±²³´µ¶·¸¹º»¼½¾¿àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþßÿ-§%°
+! UPP ¡¢£¥¦¨©ª«¬®¯±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßÿ-§%°
+! MIDWORD .
+! 
 ! #HU_KOTOHANGZO Z
 ! 
+! # There are soooo many affixes.  Postpone the prefixes to keep the time needed
+! # for generating the .spl within reasonable limits.
 ! PFXPOSTPONE
   
 ***************
+*** 81,96 ****
+  
+! REP 89
+! REP í i
+! REP i í
+! REP ó o
+! REP o ó
+! REP o õ
+! REP ú u
+! REP u ú
+! REP u û
+! REP û ü
+! REP ü û
+  REP j ly
+  REP ly j
+- REP a ä # Handel->Händel
+  REP S © # Skoda->©koda
+--- 120,124 ----
+  
+! REP 78
+  REP j ly
+  REP ly j
+  REP S © # Skoda->©koda
+***************
 *** 173,241 ****
   
 - # character conversion table
@@ -118,11 +180,11 @@
 - CHR 123 U3 Û
 - 
   SFX z Y 6
---- 176,177 ----
+--- 201,202 ----
 ***************
 *** 17678,17681 ****
   PFX D   0    leg          .
 - 
 -    1
 - 
---- 17614 ----
+--- 17639 ----
diff --git a/runtime/spell/main.aap b/runtime/spell/main.aap
index 1e37c1cc3d..5aaaaa1022 100644
--- a/runtime/spell/main.aap
+++ b/runtime/spell/main.aap
@@ -4,19 +4,22 @@
 # aap        generate all the .spl files
 # aap diff   create all the diff files
 
-LANG = af bg ca cs da de el en eo fr fo gl he hr it nl ny pl sk yi
+LANG = af bg ca cs da de el en eo fr fo gl he hr it nl ny pl sk yi hu
 
+# "hu" is at the end, because it takes so much time.
+#
 # TODO:
 # Finnish doesn't work, the dictionary fi_FI.zip file contains hyphenation...
 
 diff: $*LANG/diff
-        :print done
+        :print Done.
 
 @for l in string.split(_no.LANG):
         :child $l/main.aap
 
 # The existing .spl files need to be generated when the spell file format
 # changes.  Depending on the Vim executable does that, but results in doing it
-# much too often.  Generate a dummy .spl file and check if it changed.
+# much too often.  Generate a dummy .spl file and let the .spl depend on it, so
+# that they are outdated when it changes.
 :child check/main.aap
 *.spl: check/check.latin1.spl
diff --git a/src/spell.c b/src/spell.c
index a3fc24f720..30b08ae982 100644
--- a/src/spell.c
+++ b/src/spell.c
@@ -56,16 +56,6 @@
 # define SPELL_PRINTTREE
 #endif
 
-/* SPELL_COMPRESS_CNT is after how many allocated blocks we compress the tree
- * to limit the amount of memory used (esp. for Italian and Hungarian).  The
- * amount of memory used for nodes then is SPELL_COMPRESS_CNT times
- * SBLOCKSIZE.
- * Then compress again after allocating SPELL_COMPRESS_INC more blocks or
- * adding SPELL_COMPRESS_ADDED words and running out of memory again.  */
-#define SPELL_COMPRESS_CNT 30000
-#define SPELL_COMPRESS_INC 100
-#define SPELL_COMPRESS_ADDED 500000
-
 /*
  * Use this to adjust the score after finding suggestions, based on the
  * suggested word sounding like the bad word.  This is much faster than doing
@@ -78,94 +68,102 @@
 
 /*
  * Vim spell file format: <HEADER>
- *			  <SUGGEST>
+ *			  <SECTIONS>
  *			  <LWORDTREE>
  *			  <KWORDTREE>
  *			  <PREFIXTREE>
  *
- * <HEADER>: <fileID>
- *		<regioncnt> <regionname> ...
- *		<charflagslen> <charflags>
- *		<fcharslen> <fchars>
- *		<midwordlen> <midword>
- *		<compoundlen> <compoundtype> <compoundinfo>
- *		<prefcondcnt> <prefcond> ...
+ * <HEADER>: <fileID> <versionnr>
  *
- * <fileID>     10 bytes    "VIMspell10"
- * <regioncnt>  1 byte	    number of regions following (8 supported)
- * <regionname>	2 bytes     Region name: ca, au, etc.  Lower case.
- *			    First <regionname> is region 1.
+ * <fileID>     8 bytes    "VIMspell"
+ * <versionnr>  1 byte	    VIMSPELLVERSION
  *
- * <charflagslen> 1 byte    Number of bytes in <charflags> (should be 128).
- * <charflags>  N bytes     List of flags (first one is for character 128):
- *			    0x01  word character	CF_WORD
- *			    0x02  upper-case character	CF_UPPER
- * <fcharslen>  2 bytes     Number of bytes in <fchars>.
- * <fchars>     N bytes	    Folded characters, first one is for character 128.
  *
- * <midwordlen> 2 bytes     Number of bytes in <midword>.
- * <midword>    N bytes	    Characters that are word characters only when used
- *			    in the middle of a word.
+ * Sections make it possible to add information to the .spl file without
+ * making it incompatible with previous versions.  There are two kinds of
+ * sections:
+ * 1. Not essential for correct spell checking.  E.g. for making suggestions.
+ *    These are skipped when not supported.
+ * 2. Optional information, but essential for spell checking when present.
+ *    E.g. conditions for affixes.  When this section is present but not
+ *    supported an error message is given.
  *
- * <compoundlen> 2 bytes    Number of bytes following for compound info (can
- *			    be used to skip it when it's not understood).
+ * <SECTIONS>: <section> ... <sectionend>
  *
- * <compoundtype 1 byte	    1: compound words using <comp1minlen> and
- *			    <comp1flags>
+ * <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
  *
- * <comp1minlen> 1 byte     minimal word length for compounding
+ * <sectionID>	  1 byte    number from 0 to 254 identifying the section
  *
- * <comp1flags>  N bytes    flags used for compounding words
+ * <sectionflags> 1 byte    SNF_REQUIRED: this section is required for correct
+ *					    spell checking
  *
+ * <sectionlen>   4 bytes   length of section contents, MSB first
  *
- * <prefcondcnt> 2 bytes    Number of <prefcond> items following.
- *
- * <prefcond> : <condlen> <condstr>
+ * <sectionend>	  1 byte    SN_END
  *
- * <condlen>	1 byte	    Length of <condstr>.
  *
- * <condstr>	N bytes	    Condition for the prefix.
+ * sectionID == SN_REGION: <regionname> ...
+ * <regionname>	 2 bytes    Up to 8 region names: ca, au, etc.  Lower case.
+ *			    First <regionname> is region 1.
  *
+ * sectionID == SN_CHARFLAGS: <charflagslen> <charflags>
+ *				<folcharslen> <folchars>
+ * <charflagslen> 1 byte    Number of bytes in <charflags> (should be 128).
+ * <charflags>  N bytes     List of flags (first one is for character 128):
+ *			    0x01  word character	CF_WORD
+ *			    0x02  upper-case character	CF_UPPER
+ * <folcharslen>  2 bytes   Number of bytes in <folchars>.
+ * <folchars>     N bytes   Folded characters, first one is for character 128.
  *
- * <SUGGEST> : <repcount> <rep> ...
- *	       <salflags> <salcount> <sal> ...
- *	       <maplen> <mapstr>
+ * sectionID == SN_MIDWORD: <midword>
+ * <midword>     N bytes    Characters that are word characters only when used
+ *			    in the middle of a word.
  *
- * <repcount>	2 bytes	    number of <rep> items, MSB first.
+ * sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ...
+ * <prefcondcnt> 2 bytes    Number of <prefcond> items following.
+ * <prefcond> : <condlen> <condstr>
+ * <condlen>	1 byte	    Length of <condstr>.
+ * <condstr>	N bytes	    Condition for the prefix.
  *
+ * sectionID == SN_REP: <repcount> <rep> ...
+ * <repcount>	 2 bytes    number of <rep> items, MSB first.
  * <rep> : <repfromlen> <repfrom> <reptolen> <repto>
+ * <repfromlen>	 1 byte	    length of <repfrom>
+ * <repfrom>	 N bytes    "from" part of replacement
+ * <reptolen>	 1 byte	    length of <repto>
+ * <repto>	 N bytes    "to" part of replacement
  *
- * <repfromlen>	1 byte	    length of <repfrom>
- *
- * <repfrom>	N bytes	    "from" part of replacement
- *
- * <reptolen>	1 byte	    length of <repto>
- *
- * <repto>	N bytes	    "to" part of replacement
- *
- * <salflags>	1 byte	    flags for soundsalike conversion:
+ * sectionID == SN_SAL: <salflags> <salcount> <sal> ...
+ * <salflags>	 1 byte	    flags for soundsalike conversion:
  *			    SAL_F0LLOWUP
  *			    SAL_COLLAPSE
  *			    SAL_REM_ACCENTS
- *			    SAL_SOFO: SOFOFROM and SOFOTO used instead of SAL
- *
- * <salcount>   2 bytes	    number of <sal> items following
- *
+ * <salcount>    2 bytes    number of <sal> items following
  * <sal> : <salfromlen> <salfrom> <saltolen> <salto>
+ * <salfromlen>	 1 byte	    length of <salfrom>
+ * <salfrom>	 N bytes    "from" part of soundsalike
+ * <saltolen>	 1 byte	    length of <salto>
+ * <salto>	 N bytes    "to" part of soundsalike
  *
- * <salfromlen>	1-2 bytes    length of <salfrom> (2 bytes for SAL_SOFO)
- *
- * <salfrom>	N bytes	    "from" part of soundsalike
- *
- * <saltolen>	1-2 bytes    length of <salto> (2 bytes for SAL_SOFO)
- *
- * <salto>	N bytes	    "to" part of soundsalike
+ * sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
+ * <sofofromlen> 2 bytes    length of <sofofrom>
+ * <sofofrom>	 N bytes    "from" part of soundfold
+ * <sofotolen>	 2 bytes    length of <sofoto>
+ * <sofoto>	 N bytes    "to" part of soundfold
  *
- * <maplen>	2 bytes	    length of <mapstr>, MSB first
- *
- * <mapstr>	N bytes	    String with sequences of similar characters,
+ * sectionID == SN_MAP: <mapstr>
+ * <mapstr>	 N bytes    String with sequences of similar characters,
  *			    separated by slashes.
  *
+ * sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compflags>
+ * <compmax>     1 byte	    Maximum nr of words in compound word.
+ * <compminlen>  1 byte	    Minimal word length for compounding.
+ * <compsylmax>  1 byte	    Maximum nr of syllables in compound word.
+ * <compflags>   N bytes    Flags from COMPOUNDFLAGS items, separated by
+ *			    slashes.
+ *
+ * sectionID == SN_SYLLABLE: <syllable>
+ * <syllable>    N bytes    String from SYLLABLE item.
  *
  * <LWORDTREE>: <wordtree>
  *
@@ -333,6 +331,12 @@ typedef int salfirst_T;
 typedef short salfirst_T;
 #endif
 
+/* Values for SP_*ERROR are negative, positive values are used by
+ * read_cnt_string(). */
+#define	SP_TRUNCERROR	-1	/* spell file truncated error */
+#define	SP_FORMERROR	-2	/* format error in spell file */
+#define SP_ERROR	-3	/* other error while reading spell file */
+
 /*
  * Structure used to store words and other info for one language, loaded from
  * a .spl file.
@@ -367,8 +371,14 @@ struct slang_S
 
     char_u	*sl_midword;	/* MIDWORD string or NULL */
 
-    int		sl_compminlen;	/* COMPOUNDMIN */
-    char_u	*sl_compflags;	/* COMPOUNDFLAGS (NULL when no compounding) */
+    int		sl_compmax;	/* COMPOUNDMAX (default: MAXWLEN) */
+    int		sl_compminlen;	/* COMPOUNDMIN (default: MAXWLEN) */
+    int		sl_compsylmax;	/* COMPOUNDSYLMAX (default: MAXWLEN) */
+    regprog_T	*sl_compprog;	/* COMPOUNDFLAGS turned into a regexp progrm
+				 * (NULL when no compounding) */
+    char_u	*sl_compstartflags; /* flags for first compound word */
+    char_u	*sl_syllable;	/* SYLLABLE repeatable chars or NULL */
+    garray_T	sl_syl_items;	/* syllable items */
 
     int		sl_prefixcnt;	/* number of items in "sl_prefprog" */
     regprog_T	**sl_prefprog;	/* table with regprogs for prefixes */
@@ -402,7 +412,6 @@ static slang_T *first_lang = NULL;
 #define SAL_F0LLOWUP		1
 #define SAL_COLLAPSE		2
 #define SAL_REM_ACCENTS		4
-#define SAL_SOFO		8   /* SOFOFROM and SOFOTO instead of SAL */
 
 /*
  * Structure used in "b_langp", filled from 'spelllang'.
@@ -417,6 +426,25 @@ typedef struct langp_S
 
 #define REGION_ALL 0xff		/* word valid in all regions */
 
+#define VIMSPELLMAGIC "VIMspell"  /* string at start of Vim spell file */
+#define VIMSPELLMAGICL 8
+#define VIMSPELLVERSION 50
+
+/* Section IDs.  Only renumber them when VIMSPELLVERSION changes! */
+#define SN_REGION	0	/* <regionname> section */
+#define SN_CHARFLAGS	1	/* charflags section */
+#define SN_MIDWORD	2	/* <midword> section */
+#define SN_PREFCOND	3	/* <prefcond> section */
+#define SN_REP		4	/* REP items section */
+#define SN_SAL		5	/* SAL items section */
+#define SN_SOFO		6	/* soundfolding section */
+#define SN_MAP		7	/* MAP items section */
+#define SN_COMPOUND	8	/* compound words section */
+#define SN_SYLLABLE	9	/* syllable section */
+#define SN_END		255	/* end of sections */
+
+#define SNF_REQUIRED	1	/* <sectionflags>: required section */
+
 /* Result values.  Lower number is accepted over higher one. */
 #define SP_BANNED	-1
 #define SP_OK		0
@@ -424,9 +452,6 @@ typedef struct langp_S
 #define SP_LOCAL	2
 #define SP_BAD		3
 
-#define VIMSPELLMAGIC "VIMspell10"  /* string at start of Vim spell file */
-#define VIMSPELLMAGICL 10
-
 /* file used for "zG" and "zW" */
 static char_u	*int_wordlist = NULL;
 
@@ -472,7 +497,7 @@ typedef struct suggest_S
 /* score for various changes */
 #define SCORE_SPLIT	149	/* split bad word */
 #define SCORE_ICASE	52	/* slightly different case */
-#define SCORE_REGION	70	/* word is for different region */
+#define SCORE_REGION	200	/* word is for different region */
 #define SCORE_RARE	180	/* rare word */
 #define SCORE_SWAP	90	/* swap two characters */
 #define SCORE_SWAP3	110	/* swap two characters in three */
@@ -527,6 +552,8 @@ typedef struct matchinf_S
 
     /* for when checking a compound word */
     int		mi_compoff;		/* start of following word offset */
+    char_u	mi_compflags[MAXWLEN];	/* flags for compound words used */
+    int		mi_complen;		/* nr of compound words used */
 
     /* others */
     int		mi_result;		/* result so far: SP_BAD, SP_OK, etc. */
@@ -559,7 +586,7 @@ static int spell_iswordp_nmw __ARGS((char_u *p));
 #ifdef FEAT_MBYTE
 static int spell_iswordp_w __ARGS((int *p, buf_T *buf));
 #endif
-static void write_spell_prefcond __ARGS((FILE *fd, garray_T *gap));
+static int write_spell_prefcond __ARGS((FILE *fd, garray_T *gap));
 
 /*
  * For finding suggestions: At each node in the tree these states are tried:
@@ -608,8 +635,9 @@ typedef struct trystate_S
     char_u	ts_isdiff;	/* DIFF_ values */
     char_u	ts_fcharstart;	/* index in fword where badword char started */
 #endif
-    char_u	ts_save_prewordlen; /* saved "prewordlen" */
-    char_u	ts_save_splitoff;   /* su_splitoff saved here */
+    char_u	ts_prewordlen;	/* length of word in "preword[]" */
+    char_u	ts_splitoff;	/* index in "tword" after last split */
+    char_u	ts_complen;	/* nr of compound words used */
     char_u	ts_save_badflags;   /* su_badflags saved here */
 } trystate_T;
 
@@ -634,7 +662,7 @@ static slang_T *slang_alloc __ARGS((char_u *lang));
 static void slang_free __ARGS((slang_T *lp));
 static void slang_clear __ARGS((slang_T *lp));
 static void find_word __ARGS((matchinf_T *mip, int mode));
-static int can_compound __ARGS((slang_T *slang, int flags));
+static int can_compound __ARGS((slang_T *slang, char_u *word, char_u *flags));
 static int valid_word_prefix __ARGS((int totprefcnt, int arridx, int flags, char_u *word, slang_T *slang, int cond_req));
 static void find_prefix __ARGS((matchinf_T *mip));
 static int fold_more __ARGS((matchinf_T *mip));
@@ -646,6 +674,16 @@ static void int_wordlist_spl __ARGS((char_u *fname));
 static void spell_load_cb __ARGS((char_u *fname, void *cookie));
 static slang_T *spell_load_file __ARGS((char_u *fname, char_u *lang, slang_T *old_lp, int silent));
 static char_u *read_cnt_string __ARGS((FILE *fd, int cnt_bytes, int *lenp));
+static char_u *read_string __ARGS((FILE *fd, int cnt));
+static int read_region_section __ARGS((FILE *fd, slang_T *slang, int len));
+static int read_charflags_section __ARGS((FILE *fd));
+static int read_prefcond_section __ARGS((FILE *fd, slang_T *lp));
+static int read_rep_section __ARGS((FILE *fd, slang_T *slang));
+static int read_sal_section __ARGS((FILE *fd, slang_T *slang));
+static int read_sofo_section __ARGS((FILE *fd, slang_T *slang));
+static int read_compound __ARGS((FILE *fd, slang_T *slang, int len));
+static int init_syl_tab __ARGS((slang_T *slang));
+static int count_syllables __ARGS((slang_T *slang, char_u *word));
 static int set_sofo __ARGS((slang_T *lp, char_u *from, char_u *to));
 static void set_sal_first __ARGS((slang_T *lp));
 #ifdef FEAT_MBYTE
@@ -658,9 +696,8 @@ static int find_region __ARGS((char_u *rp, char_u *region));
 static int captype __ARGS((char_u *word, char_u *end));
 static int badword_captype __ARGS((char_u *word, char_u *end));
 static void spell_reload_one __ARGS((char_u *fname, int added_word));
-static int set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp));
+static void set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp));
 static int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp));
-static void write_spell_chartab __ARGS((FILE *fd));
 static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen));
 static int check_need_cap __ARGS((linenr_T lnum, colnr_T col));
 static void spell_find_suggest __ARGS((char_u *badptr, suginfo_T *su, int maxcount, int banbadword, int need_cap));
@@ -777,11 +814,13 @@ spell_check(wp, ptr, attrp, capcol)
 				   be passed to functions quickly. */
     int		nrlen = 0;	/* found a number first */
     int		c;
+    int		wrongcaplen = 0;
 
     /* A word never starts at a space or a control character.  Return quickly
      * then, skipping over the character. */
     if (*ptr <= ' ')
 	return 1;
+    vim_memset(&mi, 0, sizeof(matchinf_T));
 
     /* A number is always OK.  Also skip hexadecimal numbers 0xFF99 and
      * 0X99FF.  But when a word character follows do check spelling to find
@@ -818,10 +857,7 @@ spell_check(wp, ptr, attrp, capcol)
 	    /* Check word starting with capital letter. */
 	    c = PTR2CHAR(ptr);
 	    if (!SPELL_ISUPPER(c))
-	    {
-		*attrp = highlight_attr[HLF_SPC];
-		return (int)(mi.mi_fend - ptr);
-	    }
+		wrongcaplen = (int)(mi.mi_fend - ptr);
 	}
     }
     if (capcol != NULL)
@@ -832,12 +868,10 @@ spell_check(wp, ptr, attrp, capcol)
     mi.mi_end = mi.mi_fend;
 
     /* Check caps type later. */
-    mi.mi_capflags = 0;
-    mi.mi_cend = NULL;
     mi.mi_buf = wp->w_buffer;
 
-    /* Include one non-word character so that we can check for the
-     * word end. */
+    /* case-fold the word with one non-word character, so that we can check
+     * for the word end. */
     if (*mi.mi_fend != NUL)
 	mb_ptr_adv(mi.mi_fend);
 
@@ -897,6 +931,10 @@ spell_check(wp, ptr, attrp, capcol)
 #endif
 	    return 1;
 	}
+	else if (mi.mi_end == ptr)
+	    /* Always include at least one character.  Required for when there
+	     * is a mixup in "midword". */
+	    mb_ptr_adv(mi.mi_end);
 
 	if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
 	    *attrp = highlight_attr[HLF_SPB];
@@ -906,6 +944,13 @@ spell_check(wp, ptr, attrp, capcol)
 	    *attrp = highlight_attr[HLF_SPL];
     }
 
+    if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE))
+    {
+	/* Report SpellCap only when the word isn't badly spelled. */
+	*attrp = highlight_attr[HLF_SPC];
+	return wrongcaplen;
+    }
+
     return (int)(mi.mi_end - ptr);
 }
 
@@ -1085,7 +1130,7 @@ find_word(mip, mode)
 #endif
 	if (spell_iswordp(ptr + wlen, mip->mi_buf))
 	{
-	    if (slang->sl_compflags == NULL)
+	    if (slang->sl_compprog == NULL)
 		continue;	    /* next char is a word character */
 	    word_ends = FALSE;
 	}
@@ -1157,16 +1202,45 @@ find_word(mip, mode)
 	    if (mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND
 								|| !word_ends)
 	    {
-		/* Makes you wonder why someone puts a compound flag on a word
+		/* If there is no  flag or the word is shorter than
+		 * COMPOUNDMIN reject it quickly.
+		 * Makes you wonder why someone puts a compound flag on a word
 		 * that's too short...  Myspell compatibility requires this
 		 * anyway. */
-		if (wlen < slang->sl_compminlen)
+		if (((unsigned)flags >> 24) == 0 || wlen < slang->sl_compminlen)
+		    continue;
+
+		/* Limit the number of compound words to COMPOUNDMAX. */
+		if (!word_ends && mip->mi_complen + 2 > slang->sl_compmax)
 		    continue;
 
-		/* The word doesn't end or it comes after another: it must
-		 * have a compound flag. */
-		if (!can_compound(slang, flags))
+		/* At start of word quickly check if compounding is possible
+		 * with this flag. */
+		if (mip->mi_complen == 0
+				&& vim_strchr(slang->sl_compstartflags,
+					    ((unsigned)flags >> 24)) == NULL)
 		    continue;
+
+		/* If the word ends the sequence of compound flags of the
+		 * words must match with one of the COMPOUNDFLAGS items and
+		 * the number of syllables must not be too large. */
+		mip->mi_compflags[mip->mi_complen] = ((unsigned)flags >> 24);
+		mip->mi_compflags[mip->mi_complen + 1] = NUL;
+		if (word_ends)
+		{
+		    char_u	fword[MAXWLEN];
+
+		    if (slang->sl_compsylmax < MAXWLEN)
+		    {
+			/* "fword" is only needed for checking syllables. */
+			if (ptr == mip->mi_word)
+			    (void)spell_casefold(ptr, wlen, fword, MAXWLEN);
+			else
+			    vim_strncpy(fword, ptr, endlen[endidxcnt]);
+		    }
+		    if (!can_compound(slang, fword, mip->mi_compflags))
+			continue;
+		}
 	    }
 
 	    if (!word_ends)
@@ -1193,13 +1267,17 @@ find_word(mip, mode)
 		    }
 		}
 #endif
+		++mip->mi_complen;
 		find_word(mip, FIND_COMPOUND);
+		--mip->mi_complen;
 		if (mip->mi_result == SP_OK)
 		    break;
 
 		/* Find following word in keep-case tree. */
 		mip->mi_compoff = wlen;
+		++mip->mi_complen;
 		find_word(mip, FIND_KEEPCOMPOUND);
+		--mip->mi_complen;
 		if (mip->mi_result == SP_OK)
 		    break;
 		continue;
@@ -1239,16 +1317,29 @@ find_word(mip, mode)
 }
 
 /*
- * Return TRUE if "flags" has a valid compound flag.
- * TODO: check flags in a more advanced way.
+ * Return TRUE if "flags" is a valid sequence of compound flags and
+ * "word[len]" does not have too many syllables.
  */
     static int
-can_compound(slang, flags)
+can_compound(slang, word, flags)
     slang_T	*slang;
-    int		flags;
+    char_u	*word;
+    char_u	*flags;
 {
-    return slang->sl_compflags != NULL
-			   && *slang->sl_compflags == ((unsigned)flags >> 24);
+    regmatch_T	regmatch;
+
+    if (slang->sl_compprog == NULL)
+	return FALSE;
+    regmatch.regprog = slang->sl_compprog;
+    regmatch.rm_ic = FALSE;
+    if (!vim_regexec(&regmatch, flags, 0))
+	return FALSE;
+
+    /* Count the number of syllables.  This may be slow, do it last. */
+    if (slang->sl_compsylmax < MAXWLEN
+		       && count_syllables(slang, word) > slang->sl_compsylmax)
+	return FALSE;
+    return TRUE;
 }
 
 /*
@@ -1480,6 +1571,8 @@ no_spell_checking()
 /*
  * Move to next spell error.
  * "curline" is TRUE for "z?": find word under/after cursor in the same line.
+ * For Insert mode completion "dir" is BACKWARD and "curline" is TRUE: move
+ * to after badly spelled word before the cursor.
  * Return OK if found, FAIL otherwise.
  */
     int
@@ -1599,6 +1692,10 @@ spell_move_to(dir, allwords, curline)
 				vim_free(buf);
 				return OK;
 			    }
+			    else if (curline)
+				/* Insert mode completion: put cursor after
+				 * the bad word. */
+				found_pos.col += len;
 			}
 		    }
 		}
@@ -1609,19 +1706,20 @@ spell_move_to(dir, allwords, curline)
 	    capcol -= len;
 	}
 
+	if (dir == BACKWARD && found_pos.lnum != 0)
+	{
+	    /* Use the last match in the line. */
+	    curwin->w_cursor = found_pos;
+	    vim_free(buf);
+	    return OK;
+	}
+
 	if (curline)
 	    break;	/* only check cursor line */
 
 	/* Advance to next line. */
 	if (dir == BACKWARD)
 	{
-	    if (found_pos.lnum != 0)
-	    {
-		/* Use the last match in the line. */
-		curwin->w_cursor = found_pos;
-		vim_free(buf);
-		return OK;
-	    }
 	    if (lnum == 1)
 		break;
 	    --lnum;
@@ -1715,8 +1813,8 @@ spell_load_lang(lang)
     }
 
     if (r == FAIL)
-	smsg((char_u *)_("Warning: Cannot find word list \"%s\""),
-							       fname_enc + 6);
+	smsg((char_u *)_("Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\""),
+						     lang, spell_enc(), lang);
     else if (*langcp != NUL)
     {
 	/* Load all the additions. */
@@ -1767,6 +1865,9 @@ slang_alloc(lang)
     {
 	lp->sl_name = vim_strsave(lang);
 	ga_init2(&lp->sl_rep, sizeof(fromto_T), 10);
+	lp->sl_compmax = MAXWLEN;
+	lp->sl_compminlen = MAXWLEN;
+	lp->sl_compsylmax = MAXWLEN;
     }
     return lp;
 }
@@ -1853,8 +1954,14 @@ slang_clear(lp)
     vim_free(lp->sl_midword);
     lp->sl_midword = NULL;
 
-    vim_free(lp->sl_compflags);
-    lp->sl_compflags = NULL;
+    vim_free(lp->sl_compprog);
+    vim_free(lp->sl_compstartflags);
+    lp->sl_compprog = NULL;
+    lp->sl_compstartflags = NULL;
+
+    vim_free(lp->sl_syllable);
+    lp->sl_syllable = NULL;
+    ga_clear(&lp->sl_syl_items);
 
 #ifdef FEAT_MBYTE
     {
@@ -1870,6 +1977,11 @@ slang_clear(lp)
     }
     hash_clear(&lp->sl_map_hash);
 #endif
+
+    lp->sl_compmax = MAXWLEN;
+    lp->sl_compminlen = MAXWLEN;
+    lp->sl_compsylmax = MAXWLEN;
+    lp->sl_regions[0] = NUL;
 }
 
 /*
@@ -1902,7 +2014,7 @@ spell_load_file(fname, lang, old_lp, silent)
     int		silent;		/* no error if file doesn't exist */
 {
     FILE	*fd;
-    char_u	buf[MAXWLEN + 1];
+    char_u	buf[VIMSPELLMAGICL];
     char_u	*p;
     char_u	*bp;
     idx_T	*ip;
@@ -1912,15 +2024,10 @@ spell_load_file(fname, lang, old_lp, silent)
     int		round;
     char_u	*save_sourcing_name = sourcing_name;
     linenr_T	save_sourcing_lnum = sourcing_lnum;
-    int		cnt, ccnt;
-    char_u	*fol;
     slang_T	*lp = NULL;
-    garray_T	*gap;
-    fromto_T	*ftp;
-    salitem_T	*smp;
-    short	*first;
     idx_T	idx;
     int		c = 0;
+    int		res;
 
     fd = mch_fopen((char *)fname, "r");
     if (fd == NULL)
@@ -1964,332 +2071,122 @@ spell_load_file(fname, lang, old_lp, silent)
     sourcing_lnum = 0;
 
     /* <HEADER>: <fileID>
-     *		<regioncnt> <regionname> ...
-     *		<charflagslen> <charflags>
-     *		<fcharslen> <fchars>
-     *		<midwordlen> <midword>
-     *		<compoundlen> <compoundtype> <compoundinfo>
-     *		<prefcondcnt> <prefcond> ...
      */
     for (i = 0; i < VIMSPELLMAGICL; ++i)
 	buf[i] = getc(fd);				/* <fileID> */
     if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
     {
-	EMSG(_("E757: Wrong file ID in spell file"));
+	EMSG(_("E757: This does not look like a spell file"));
 	goto endFAIL;
     }
-
-    cnt = getc(fd);					/* <regioncnt> */
-    if (cnt < 0)
+    c = getc(fd);					/* <versionnr> */
+    if (c < VIMSPELLVERSION)
     {
-truncerr:
-	EMSG(_(e_spell_trunc));
+	EMSG(_("E771: Old spell file, needs to be updated"));
 	goto endFAIL;
     }
-    if (cnt > 8)
+    else if (c > VIMSPELLVERSION)
     {
-formerr:
-	EMSG(_(e_format));
+	EMSG(_("E772: Spell file is for newer version of Vim"));
 	goto endFAIL;
     }
-    for (i = 0; i < cnt; ++i)
-    {
-	lp->sl_regions[i * 2] = getc(fd);		/* <regionname> */
-	lp->sl_regions[i * 2 + 1] = getc(fd);
-    }
-    lp->sl_regions[cnt * 2] = NUL;
 
-    /* <charflagslen> <charflags> */
-    p = read_cnt_string(fd, 1, &cnt);
-    if (cnt < 0)
-	goto endFAIL;
 
-    /* <fcharslen> <fchars> */
-    fol = read_cnt_string(fd, 2, &ccnt);
-    if (ccnt < 0)
+    /*
+     * <SECTIONS>: <section> ... <sectionend>
+     * <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
+     */
+    for (;;)
     {
-	vim_free(p);
-	goto endFAIL;
-    }
-
-    /* Set the word-char flags and fill SPELL_ISUPPER() table. */
-    if (p != NULL && fol != NULL)
-	i = set_spell_charflags(p, cnt, fol);
-
-    vim_free(p);
-    vim_free(fol);
-
-    /* When <charflagslen> is zero then <fcharlen> must also be zero. */
-    if ((p == NULL) != (fol == NULL))
-	goto formerr;
-
-    /* <midwordlen> <midword> */
-    lp->sl_midword = read_cnt_string(fd, 2, &cnt);
-    if (cnt < 0)
-	goto endFAIL;
+	n = getc(fd);			    /* <sectionID> or <sectionend> */
+	if (n == SN_END)
+	    break;
+	c = getc(fd);					/* <sectionflags> */
+	len = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + getc(fd);
+							/* <sectionlen> */
+	if (len < 0)
+	    goto truncerr;
 
-    /* <compoundlen> <compoundtype> <compoundinfo> */
-    cnt = (getc(fd) << 8) + getc(fd);			/* <compoundlen> */
-    if (cnt < 0)
-	goto endFAIL;
-    if (cnt > 0)
-    {
-	--cnt;
-	c = getc(fd);					/* <compoundtype> */
-	if (c != 1)
+	res = 0;
+	switch (n)
 	{
-	    /* Unknown kind of compound words, skip the info. */
-	    while (cnt-- > 0)
-		getc(fd);
-	}
-	else if (cnt < 2)
-	    goto formerr;
-	else
-	{
-	    --cnt;
-	    c = getc(fd);				/* <comp1minlen> */
-	    if (c < 1 || c > 50)
-		c = 3;
-	    lp->sl_compminlen = c;
+	    case SN_REGION:
+		res = read_region_section(fd, lp, len);
+		break;
 
-	    p = alloc(cnt + 1);
-	    if (p == NULL)
-		goto endFAIL;
-	    lp->sl_compflags = p;
-	    while (cnt-- > 0)
-		*p++ = getc(fd);			/* <comp1flags> */
-	    *p = NUL;
-	}
-    }
+	    case SN_CHARFLAGS:
+		res = read_charflags_section(fd);
+		break;
 
+	    case SN_MIDWORD:
+		lp->sl_midword = read_string(fd, len);	/* <midword> */
+		if (lp->sl_midword == NULL)
+		    goto endFAIL;
+		break;
 
-    /* <prefcondcnt> <prefcond> ... */
-    cnt = (getc(fd) << 8) + getc(fd);			/* <prefcondcnt> */
-    if (cnt > 0)
-    {
-	lp->sl_prefprog = (regprog_T **)alloc_clear(
-					 (unsigned)sizeof(regprog_T *) * cnt);
-	if (lp->sl_prefprog == NULL)
-	    goto endFAIL;
-	lp->sl_prefixcnt = cnt;
+	    case SN_PREFCOND:
+		res = read_prefcond_section(fd, lp);
+		break;
 
-	for (i = 0; i < cnt; ++i)
-	{
-	    /* <prefcond> : <condlen> <condstr> */
-	    n = getc(fd);				/* <condlen> */
-	    if (n < 0 || n >= MAXWLEN)
-		goto formerr;
-	    /* When <condlen