updated for version 7.0135

author: Bram Moolenaar <Bram@vim.org> 2005-08-23 21:00:13 +0000
committer: Bram Moolenaar <Bram@vim.org> 2005-08-23 21:00:13 +0000
commit: 7862282f2edc76533c5c0dcf49b57bf747a30ebc (patch)
tree: 8feddb7d932bf1d473752be7460746d706935df1
parent: a6c840d7d4d51af2a15f64db5e5b908457b572cf (diff)
12 files changed, 346 insertions, 60 deletions
diff --git a/runtime/spell/am/am_ET.diff b/runtime/spell/am/am_ET.diff
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/runtime/spell/am/am_ET.diff
diff --git a/runtime/spell/main.aap b/runtime/spell/main.aap
index e98b365337..b33b45e7d3 100644
--- a/runtime/spell/main.aap
+++ b/runtime/spell/main.aap
@@ -4,7 +4,7 @@
 # aap        generate all the .spl files
 # aap diff   create all the diff files
 
-LANG = af am bg ca cs da de el en eo fr fo gl he hr it nl ny pl sk yi hu
+LANG = af am bg ca cs da de el en eo fr fo gl he hr it nl ny pl sk th yi hu
 
 # "hu" is at the end, because it takes very long.
 #
diff --git a/runtime/spell/nl/nl_NL.diff b/runtime/spell/nl/nl_NL.diff
index dc6c599811..d66d76dbf4 100644
--- a/runtime/spell/nl/nl_NL.diff
+++ b/runtime/spell/nl/nl_NL.diff
@@ -1,12 +1,12 @@
 *** nl_NL.orig.aff	Sun Jul  3 18:24:07 2005
---- nl_NL.aff	Tue Aug 16 22:39:54 2005
+--- nl_NL.aff	Tue Aug 23 14:03:48 2005
 ***************
 *** 3,6 ****
 --- 3,30 ----
   
-+ FOL  àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþßÿ
-+ LOW  àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþßÿ
-+ UPP  ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßÿ
++ FOL  ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ
++ LOW  ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ
++ UPP  ßÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞÿ
 + 
 + SOFOFROM abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþßÿÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ¿
 + SOFOTO   ebctefghejklnnepkrstevvkesebctefghejklnnepkrstevvkeseeeeeeeceeeeeeeedneeeeeeeeeeepseeeeeeeeceeeeeeeedneeeeeeeeeeep?
diff --git a/runtime/spell/yi/yi_tr.diff b/runtime/spell/yi/yi_tr.diff
index 4b5148f0c3..1616f303c5 100644
--- a/runtime/spell/yi/yi_tr.diff
+++ b/runtime/spell/yi/yi_tr.diff
@@ -6,7 +6,7 @@
 + 84608
   gruntelement
   dzhobendiks
-*** /dev/null	Fri Aug 19 23:01:14 2005
+*** /dev/null	Tue Aug 23 22:51:11 2005
 --- yi_tr.aff	Tue Aug 16 10:48:01 2005
 ***************
 *** 0 ****
diff --git a/src/charset.c b/src/charset.c
index c14bef03e1..c769486cf1 100644
--- a/src/charset.c
+++ b/src/charset.c
@@ -1531,6 +1531,122 @@ vim_isxdigit(c)
 	|| (c >= 'A' && c <= 'F');
 }
 
+#if defined(FEAT_MBYTE) || defined(PROTO)
+/*
+ * Vim's own character class functions.  These exist because many library
+ * islower()/toupper() etc. do not work properly: they crash when used with
+ * invalid values or can't handle latin1 when the locale is C.
+ * Speed is most important here.
+ */
+#define LATIN1LOWER 'l'
+#define LATIN1UPPER 'U'
+
+/*                                                                 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]%_'abcdefghijklmnopqrstuvwxyz{|}~                                  ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ */
+static char_u latin1flags[256] = "                                                                 UUUUUUUUUUUUUUUUUUUUUUUUUU      llllllllllllllllllllllllll                                                                     UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
+static char_u latin1upper[256] = "                                 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ÷ØÙÚÛÜÝÞÿ";
+static char_u latin1lower[256] = "                                 !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿àáâãäåæçèéêëìíîïðñòóôõö×øùúûüýþßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ";
+
+    int
+vim_islower(c)
+    int	    c;
+{
+    if (c <= '@')
+	return FALSE;
+    if (c >= 0x80)
+    {
+	if (enc_utf8)
+	    return utf_islower(c);
+	if (c >= 0x100)
+	{
+#ifdef HAVE_ISWLOWER
+	    if (has_mbyte)
+		return iswlower(c);
+#endif
+	    /* islower() can't handle these chars and may crash */
+	    return FALSE;
+	}
+	if (enc_latin1like)
+	    return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
+    }
+    return islower(c);
+}
+
+    int
+vim_isupper(c)
+    int	    c;
+{
+    if (c <= '@')
+	return FALSE;
+    if (c >= 0x80)
+    {
+	if (enc_utf8)
+	    return utf_isupper(c);
+	if (c >= 0x100)
+	{
+#ifdef HAVE_ISWUPPER
+	    if (has_mbyte)
+		return iswupper(c);
+#endif
+	    /* islower() can't handle these chars and may crash */
+	    return FALSE;
+	}
+	if (enc_latin1like)
+	    return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
+    }
+    return isupper(c);
+}
+
+    int
+vim_toupper(c)
+    int	    c;
+{
+    if (c <= '@')
+	return c;
+    if (c >= 0x80)
+    {
+	if (enc_utf8)
+	    return utf_toupper(c);
+	if (c >= 0x100)
+	{
+#ifdef HAVE_TOWUPPER
+	    if (has_mbyte)
+		return towupper(c);
+#endif
+	    /* toupper() can't handle these chars and may crash */
+	    return c;
+	}
+	if (enc_latin1like)
+	    return latin1upper[c];
+    }
+    return TOUPPER_LOC(c);
+}
+
+    int
+vim_tolower(c)
+    int	    c;
+{
+    if (c <= '@')
+	return c;
+    if (c >= 0x80)
+    {
+	if (enc_utf8)
+	    return utf_tolower(c);
+	if (c >= 0x100)
+	{
+#ifdef HAVE_TOWLOWER
+	    if (has_mbyte)
+		return towlower(c);
+#endif
+	    /* tolower() can't handle these chars and may crash */
+	    return c;
+	}
+	if (enc_latin1like)
+	    return latin1lower[c];
+    }
+    return TOLOWER_LOC(c);
+}
+#endif
+
 /*
  * skiptowhite: skip over text until ' ' or '\t' or NUL.
  */
diff --git a/src/macros.h b/src/macros.h
index d18813ce20..9310bdd202 100644
--- a/src/macros.h
+++ b/src/macros.h
@@ -81,13 +81,13 @@
 
 /*
  * MB_ISLOWER() and MB_ISUPPER() are to be used on multi-byte characters.  But
- * don't use them for negative values.
+ * don't use them for negative values!
  */
 #ifdef FEAT_MBYTE
-# define MB_ISLOWER(c)	(enc_utf8 && (c) >= 0x80 ? utf_islower(c) : (has_mbyte && c > 255 ? FALSE : islower(c)))
-# define MB_ISUPPER(c)	(enc_utf8 && (c) >= 0x80 ? utf_isupper(c) : (has_mbyte && c > 255 ? FALSE : isupper(c)))
-# define MB_TOLOWER(c)	(enc_utf8 && (c) >= 0x80 ? utf_tolower(c) : (has_mbyte && c > 255 ? c : TOLOWER_LOC(c)))
-# define MB_TOUPPER(c)	(enc_utf8 && (c) >= 0x80 ? utf_toupper(c) : (has_mbyte && c > 255 ? c : TOUPPER_LOC(c)))
+# define MB_ISLOWER(c)	vim_islower(c)
+# define MB_ISUPPER(c)	vim_isupper(c)
+# define MB_TOLOWER(c)	vim_tolower(c)
+# define MB_TOUPPER(c)	vim_toupper(c)
 #else
 # define MB_ISLOWER(c)	islower(c)
 # define MB_ISUPPER(c)	isupper(c)
diff --git a/src/mbyte.c b/src/mbyte.c
index 5fb45568b5..9f2df3ad40 100644
--- a/src/mbyte.c
+++ b/src/mbyte.c
@@ -584,6 +584,10 @@ codepage_invalid:
     enc_latin9 = (STRCMP(p_enc, "iso-8859-15") == 0);
 #endif
 
+    /* Detect an encoding that uses latin1 characters. */
+    enc_latin1like = (enc_utf8 || STRCMP(p_enc, "latin1") == 0
+					|| STRCMP(p_enc, "iso-8859-15") == 0);
+
     /*
      * Set the function pointers.
      */
diff --git a/src/os_macosx.c b/src/os_macosx.c
index d361958ade..8e9d55d38a 100644
--- a/src/os_macosx.c
+++ b/src/os_macosx.c
@@ -85,6 +85,7 @@ get_textencoding(const char* encodename)
 	{"iso-8859-8", kTextEncodingISOLatinHebrew},
 	{"iso-8859-9", kTextEncodingISOLatin5},
 	{"iso-8859-10", kTextEncodingISOLatin6},
+	{"iso-8859-15", kTextEncodingISOLatin9},
 
 	/* Unicode encodings. */
 	/* TODO: Add other type of unicode */
diff --git a/src/po/Makefile b/src/po/Makefile
index f4281878b8..a06488870b 100644
--- a/src/po/Makefile
+++ b/src/po/Makefile
@@ -26,8 +26,7 @@ LANGUAGES = \
 		zh_CN \
 		zh_CN.UTF-8 \
 		zh_TW \
-		zh_TW.UTF-8 \
-# end marker
+		zh_TW.UTF-8
 
 MOFILES = \
 		af.mo \
@@ -51,8 +50,7 @@ MOFILES = \
 		zh_CN.UTF-8.mo \
 		zh_CN.mo \
 		zh_TW.UTF-8.mo \
-		zh_TW.mo \
-# end marker
+		zh_TW.mo
 
 CONVERTED = \
 		cs.cp1250.mo \
@@ -61,8 +59,7 @@ CONVERTED = \
 		ru.cp1251.mo \
 		sk.cp1250.mo \
 		uk.cp1251.mo \
-		zh_CN.cp936.mo \
-# end marker
+		zh_CN.cp936.mo
 
 CHECKFILES = \
 		af.ck \
@@ -93,8 +90,7 @@ CHECKFILES = \
 		ru.cp1251.ck \
 		sk.cp1250.ck \
 		uk.cp1251.ck \
-		zh_CN.cp936.ck \
-# end marker
+		zh_CN.cp936.ck
 
 PACKAGE = vim
 SHELL = /bin/sh
diff --git a/src/proto/charset.pro b/src/proto/charset.pro
index 437e885311..4b6890d18b 100644
--- a/src/proto/charset.pro
+++ b/src/proto/charset.pro
@@ -38,6 +38,10 @@ char_u *skiptodigit __ARGS((char_u *p));
 char_u *skiptohex __ARGS((char_u *p));
 int vim_isdigit __ARGS((int c));
 int vim_isxdigit __ARGS((int c));
+int vim_islower __ARGS((int c));
+int vim_isupper __ARGS((int c));
+int vim_toupper __ARGS((int c));
+int vim_tolower __ARGS((int c));
 char_u *skiptowhite __ARGS((char_u *p));
 char_u *skiptowhite_esc __ARGS((char_u *p));
 long getdigits __ARGS((char_u **pp));
diff --git a/src/regexp.c b/src/regexp.c
index f8b86168d1..e212cc0dc7 100644
--- a/src/regexp.c
+++ b/src/regexp.c
@@ -742,7 +742,7 @@ reg_equi_class(c)
 {
 #ifdef FEAT_MBYTE
     if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
-					      || STRCMP(p_enc, "latin9") == 0)
+					 || STRCMP(p_enc, "iso-8859-15") == 0)
 #endif
     {
 	switch (c)
diff --git a/src/spell.c b/src/spell.c
index dd0fe834f8..128373a7fe 100644
--- a/src/spell.c
+++ b/src/spell.c
@@ -162,6 +162,8 @@
  * <compflags>   N bytes    Flags from COMPOUNDFLAGS items, separated by
  *			    slashes.
  *
+ * sectionID == SN_NOBREAK: (empty, its presence is enough)
+ *
  * sectionID == SN_SYLLABLE: <syllable>
  * <syllable>    N bytes    String from SYLLABLE item.
  *
@@ -378,6 +380,7 @@ struct slang_S
 				 * (NULL when no compounding) */
     char_u	*sl_compstartflags; /* flags for first compound word */
     char_u	*sl_compallflags; /* all flags for compound words */
+    char_u	sl_nobreak;	/* When TRUE: no spaces between words */
     char_u	*sl_syllable;	/* SYLLABLE repeatable chars or NULL */
     garray_T	sl_syl_items;	/* syllable items */
 
@@ -442,6 +445,7 @@ typedef struct langp_S
 #define SN_MAP		7	/* MAP items section */
 #define SN_COMPOUND	8	/* compound words section */
 #define SN_SYLLABLE	9	/* syllable section */
+#define SN_NOBREAK	10	/* NOBREAK section */
 #define SN_END		255	/* end of sections */
 
 #define SNF_REQUIRED	1	/* <sectionflags>: required section */
@@ -560,6 +564,10 @@ typedef struct matchinf_S
     int		mi_result;		/* result so far: SP_BAD, SP_OK, etc. */
     int		mi_capflags;		/* WF_ONECAP WF_ALLCAP WF_KEEPCAP */
     buf_T	*mi_buf;		/* buffer being checked */
+
+    /* for NOBREAK */
+    int		mi_result2;		/* "mi_resul" without following word */
+    char_u	*mi_end2;		/* "mi_end" without following word */
 } matchinf_T;
 
 /*
@@ -638,6 +646,7 @@ typedef struct trystate_S
 #endif
     char_u	ts_prewordlen;	/* length of word in "preword[]" */
     char_u	ts_splitoff;	/* index in "tword" after last split */
+    char_u	ts_splitfidx;	/* "ts_fidx" at word split */
     char_u	ts_complen;	/* nr of compound words used */
     char_u	ts_compsplit;	/* index for "compflags" where word was spit */
     char_u	ts_save_badflags;   /* su_badflags saved here */
@@ -887,6 +896,7 @@ spell_check(wp, ptr, attrp, capcol)
 
     /* The word is bad unless we recognize it. */
     mi.mi_result = SP_BAD;
+    mi.mi_result2 = SP_BAD;
 
     /*
      * Loop over the languages specified in 'spelllang'.
@@ -904,6 +914,15 @@ spell_check(wp, ptr, attrp, capcol)
 
 	/* Check for matching prefixes. */
 	find_prefix(&mi, FIND_FOLDWORD);
+
+	/* For a NOBREAK language, may want to use a word without a following
+	 * word as a backup. */
+	if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD
+						   && mi.mi_result2 != SP_BAD)
+	{
+	    mi.mi_result = mi.mi_result2;
+	    mi.mi_end = mi.mi_end2;
+	}
     }
 
     if (mi.mi_result != SP_OK)
@@ -941,6 +960,33 @@ spell_check(wp, ptr, attrp, capcol)
 	    /* Always include at least one character.  Required for when there
 	     * is a mixup in "midword". */
 	    mb_ptr_adv(mi.mi_end);
+	else if (mi.mi_result == SP_BAD
+		&& LANGP_ENTRY(wp->w_buffer->b_langp, 0)->lp_slang->sl_nobreak)
+	{
+	    char_u	*p, *fp;
+	    int		save_result = mi.mi_result;
+
+	    /* First language in 'spelllang' is NOBREAK.  Find first position
+	     * at which any word would be valid. */
+	    mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0);
+	    p = mi.mi_word;
+	    fp = mi.mi_fword;
+	    for (;;)
+	    {
+		mb_ptr_adv(p);
+		mb_ptr_adv(fp);
+		if (p >= mi.mi_end)
+		    break;
+		mi.mi_compoff = fp - mi.mi_fword;
+		find_word(&mi, FIND_COMPOUND);
+		if (mi.mi_result != SP_BAD)
+		{
+		    mi.mi_end = p;
+		    break;
+		}
+	    }
+	    mi.mi_result = save_result;
+	}
 
 	if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
 	    *attrp = highlight_attr[HLF_SPB];
@@ -995,6 +1041,7 @@ find_word(mip, mode)
     idx_T	*idxs;
     int		word_ends;
     int		prefix_found;
+    int		nobreak_result;
 
     if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND)
     {
@@ -1137,7 +1184,7 @@ find_word(mip, mode)
 #endif
 	if (spell_iswordp(ptr + wlen, mip->mi_buf))
 	{
-	    if (slang->sl_compprog == NULL)
+	    if (slang->sl_compprog == NULL && !slang->sl_nobreak)
 		continue;	    /* next char is a word character */
 	    word_ends = FALSE;
 	}
@@ -1210,8 +1257,20 @@ find_word(mip, mode)
 		prefix_found = TRUE;
 	    }
 
-	    if (mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND
-								|| !word_ends)
+	    if (slang->sl_nobreak)
+	    {
+		if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND)
+			&& (flags & WF_BANNED) == 0)
+		{
+		    /* NOBREAK: found a valid following word.  That's all we
+		     * need to know, so return. */
+		    mip->mi_result = SP_OK;
+		    break;
+		}
+	    }
+
+	    else if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND
+								|| !word_ends))
 	    {
 		/* If there is no  flag or the word is shorter than
 		 * COMPOUNDMIN reject it quickly.
@@ -1295,11 +1354,20 @@ find_word(mip, mode)
 		}
 	    }
 
+	    nobreak_result = SP_OK;
+
 	    if (!word_ends)
 	    {
-		/* Check that a valid word follows.  If there is one, it will
-		 * set "mi_result", thus we are always finished here.
+		int	save_result = mip->mi_result;
+		char_u	*save_end = mip->mi_end;
+
+		/* Check that a valid word follows.  If there is one and we
+		 * are compounding, it will set "mi_result", thus we are
+		 * always finished here.  For NOBREAK we only check that a
+		 * valid word follows.
 		 * Recursive! */
+		if (slang->sl_nobreak)
+		    mip->mi_result = SP_BAD;
 
 		/* Find following word in case-folded tree. */
 		mip->mi_compoff = endlen[endidxcnt];
@@ -1323,18 +1391,36 @@ find_word(mip, mode)
 		++mip->mi_complen;
 		find_word(mip, FIND_COMPOUND);
 
-		/* Find following word in keep-case tree. */
-		mip->mi_compoff = wlen;
-		find_word(mip, FIND_KEEPCOMPOUND);
+		/* When NOBREAK any word that matches is OK.  Otherwise we
+		 * need to find the longest match, thus try with keep-case and
+		 * prefix too. */
+		if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
+		{
+		    /* Find following word in keep-case tree. */
+		    mip->mi_compoff = wlen;
+		    find_word(mip, FIND_KEEPCOMPOUND);
 
-		/* Check for following word with prefix. */
-		mip->mi_compoff = c;
-		find_prefix(mip, FIND_COMPOUND);
+		    if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
+		    {
+			/* Check for following word with prefix. */
+			mip->mi_compoff = c;
+			find_prefix(mip, FIND_COMPOUND);
+		    }
+		}
 		--mip->mi_complen;
 
-		if (mip->mi_result == SP_OK)
-		    break;
-		continue;
+		if (slang->sl_nobreak)
+		{
+		    nobreak_result = mip->mi_result;
+		    mip->mi_result = save_result;
+		    mip->mi_end = save_end;
+		}
+		else
+		{
+		    if (mip->mi_result == SP_OK)
+			break;
+		    continue;
+		}
 	    }
 
 	    if (flags & WF_BANNED)
@@ -1352,8 +1438,21 @@ find_word(mip, mode)
 	    else
 		res = SP_OK;
 
-	    /* Always use the longest match and the best result. */
-	    if (mip->mi_result > res)
+	    /* Always use the longest match and the best result.  For NOBREAK
+	     * we separately keep the longest match without a following good
+	     * word as a fall-back. */
+	    if (nobreak_result == SP_BAD)
+	    {
+		if (mip->mi_result2 > res)
+		{
+		    mip->mi_result2 = res;
+		    mip->mi_end2 = mip->mi_word + wlen;
+		}
+		else if (mip->mi_result2 == res
+					&& mip->mi_end2 < mip->mi_word + wlen)
+		    mip->mi_end2 = mip->mi_word + wlen;
+	    }
+	    else if (mip->mi_result > res)
 	    {
 		mip->mi_result = res;
 		mip->mi_end = mip->mi_word + wlen;
@@ -1361,11 +1460,11 @@ find_word(mip, mode)
 	    else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen)
 		mip->mi_end = mip->mi_word + wlen;
 
-	    if (res == SP_OK)
+	    if (mip->mi_result == SP_OK)
 		break;
 	}
 
-	if (res == SP_OK)
+	if (mip->mi_result == SP_OK)
 	    break;
     }
 }
@@ -2224,6 +2323,10 @@ spell_load_file(fname, lang, old_lp, silent)
 		res = read_compound(fd, lp, len);
 		break;
 
+	    case SN_NOBREAK:
+		lp->sl_nobreak = TRUE;
+		break;
+
 	    case SN_SYLLABLE:
 		lp->sl_syllable = read_string(fd, len);	/* <syllable> */
 		if (lp->sl_syllable == NULL)
@@ -3951,6 +4054,7 @@ typedef struct spellinfo_S
     int		si_compminlen;	/* minimal length for compounding */
     int		si_compsylmax;	/* max nr of syllables for compounding */
     char_u	*si_compflags;	/* flags used for compounding */
+    char_u	si_nobreak;	/* NOBREAK */
     char_u	*si_syllable;	/* syllable string */
     garray_T	si_prefcond;	/* table with conditions for postponed
 				 * prefixes, each stored as a string */
@@ -4362,6 +4466,10 @@ spell_read_aff(spin, fname)
 	    {
 		aff->af_syllable = getroom_save(spin, items[1]);
 	    }
+	    else if (STRCMP(items[0], "NOBREAK") == 0 && itemcnt == 1)
+	    {
+		spin->si_nobreak = TRUE;
+	    }
 	    else if (STRCMP(items[0], "PFXPOSTPONE") == 0 && itemcnt == 1)
 	    {
 		aff->af_pfxpostpone = TRUE;
@@ -6441,6 +6549,16 @@ write_vim_spell(spin, fname)
 	fwrite(spin->si_compflags, (size_t)l, (size_t)1, fd);
     }
 
+    /* SN_NOBREAK: NOBREAK flag */
+    if (spin->si_nobreak)
+    {
+	putc(SN_NOBREAK, fd);				/* <sectionID> */
+	putc(0, fd);					/* <sectionflags> */
+
+	/* It's empty, the precense of the section flags the feature. */
+	put_bytes(fd, (long_u)0, 4);			/* <sectionlen> */
+    }
+
     /* SN_SYLLABLE: syllable info.
      * We don't mark it required, when not supported syllables will not be
      * counted. */
@@ -6861,6 +6979,9 @@ mkspell(fcount, fnames, ascii, overwrite, added_word)
 #endif
 	}
 
+	if (spin.si_compflags != NULL && spin.si_nobreak)
+	    MSG(_("Warning: both compounding and NOBREAK specified"));
+
 	if (!error)
 	{
 	    /*
@@ -8360,7 +8481,20 @@ allcap_copy(word, wcopy)
 	else
 #endif
 	    c = *s++;
-	c = SPELL_TOUPPER(c);
+
+#ifdef FEAT_MBYTE
+	/* We only change ß to SS when we are certain latin1 is used.  It
+	 * would cause weird errors in other 8-bit encodings. */
+	if (enc_latin1like && c == 0xdf)
+	{
+	    c = 'S';
+	    if (d - wcopy >= MAXWLEN - 1)
+		break;
+	    *d++ = c;
+	}
+	else
+#endif
+	    c = SPELL_TOUPPER(c);
 
 #ifdef FEAT_MBYTE
 	if (has_mbyte)
@@ -8629,29 +8763,52 @@ suggest_try_change(su)
 
 		if (sp->ts_complen > sp->ts_compsplit)
 		{
-		    /* There was a compound word before this word.  If this
-		     * word does not support compounding then give up
-		     * (splitting is tried for the word without compound
-		     * flag). */
-		    if (((unsigned)flags >> 24) == 0
-			    || sp->ts_twordlen - sp->ts_splitoff
+		    if (slang->sl_nobreak)
+		    {
+			/* There was a word before this word.  When there was
+			 * no change in this word (it was correct) add the
+			 * first word as a suggestion.  If this word was
+			 * corrected too, we need to check if a correct word
+			 * follows. */
+			if (sp->ts_fidx - sp->ts_splitfidx
+					  == sp->ts_twordlen - sp->ts_splitoff
+				&& STRNCMP(fword + sp->ts_splitfidx,
+					    tword + sp->ts_splitoff,
+					 sp->ts_fidx - sp->ts_splitfidx) == 0)
+			{
+			    preword[sp->ts_prewordlen] = NUL;
+			    add_suggestion(su, &su->su_ga, preword,
+					sp->ts_splitfidx - repextra,
+						      sp->ts_score, 0, FALSE);
+			    break;
+			}
+		    }
+		    else
+		    {
+			/* There was a compound word before this word.  If
+			 * this word does not support compounding then give up
+			 * (splitting is tried for the word without compound
+			 * flag). */
+			if (((unsigned)flags >> 24) == 0
+				|| sp->ts_twordlen - sp->ts_splitoff
 						       < slang->sl_compminlen)
-			break;
-		    compflags[sp->ts_complen] = ((unsigned)flags >> 24);
-		    compflags[sp->ts_complen + 1] = NUL;
-		    vim_strncpy(preword + sp->ts_prewordlen,
-			    tword + sp->ts_splitoff,
-			    sp->ts_twordlen - sp->ts_splitoff);
-		    p = preword;
-		    while (*skiptowhite(p) != NUL)
-			p = skipwhite(skiptowhite(p));
-		    if (fword_ends && !can_compound(slang, p,
+			    break;
+			compflags[sp->ts_complen] = ((unsigned)flags >> 24);
+			compflags[sp->ts_complen + 1] = NUL;
+			vim_strncpy(preword + sp->ts_prewordlen,
+				tword + sp->ts_splitoff,
+				sp->ts_twordlen - sp->ts_splitoff);
+			p = preword;
+			while (*skiptowhite(p) != NUL)
+			    p = skipwhite(skiptowhite(p));
+			if (fword_ends && !can_compound(slang, p,
 						compflags + sp->ts_compsplit))
-			break;
+			    break;
 
-		    /* Get pointer to last char of previous word. */
-		    p = preword + sp->ts_prewordlen;
-		    mb_ptr_back(preword, p);
+			/* Get pointer to last char of previous word. */
+			p = preword + sp->ts_prewordlen;
+			mb_ptr_back(preword, p);
+		    }
 		}
 		else
 		    p = NULL;
@@ -8753,6 +8910,8 @@ suggest_try_change(su)
 		     *    If the word allows compounding try that.  Otherwise
 		     *    try a split by inserting a space.  For both check
 		     *    that a valid words starts at fword[sp->ts_fidx].
+		     *    For NOBREAK do like compounding to be able to check
+		     *    if the next word is valid.
 		     * 2. The badword does end, but it was due to a change
 		     *    (e.g., a swap).  No need to split, but do check that
 		     *    the following word is valid.
@@ -8775,10 +8934,15 @@ suggest_try_change(su)
 			compflags[sp->ts_complen + 1] = NUL;
 		    }
 
+		    /* For NOBREAK we never try splitting, it won't make any
+		     * word valid. */
+		    if (slang->sl_nobreak)
+			try_compound = TRUE;
+
 		    /* If we could add a compound word, and it's also possible
 		     * to split at this point, do the split first and set
 		     * TSF_DIDSPLIT to avoid doing it again. */
-		    if (!fword_ends
+		    else if (!fword_ends
 			    && try_compound
 			    && (sp->ts_flags & TSF_DIDSPLIT) == 0)
 		    {
@@ -8818,6 +8982,7 @@ suggest_try_change(su)
 			    STRCAT(preword, " ");
 			sp->ts_prewordlen = STRLEN(preword);
 			sp->ts_splitoff = sp->ts_twordlen;
+			sp->ts_splitfidx = sp->ts_fidx;
 
 			/* If the badword has a non-word character at this
 			 * position skip it.  That means replacing the
author	Bram Moolenaar <Bram@vim.org>	2005-08-23 21:00:13 +0000
committer	Bram Moolenaar <Bram@vim.org>	2005-08-23 21:00:13 +0000
commit	7862282f2edc76533c5c0dcf49b57bf747a30ebc (patch)
tree	8feddb7d932bf1d473752be7460746d706935df1
parent	a6c840d7d4d51af2a15f64db5e5b908457b572cf (diff)