patch 8.0.0243: tolower() does not work if the byte count changesv8.0.0243

Problem: When making a character lower case with tolower() changes the byte cound, it is not made lower case. Solution: Add strlow_save(). (Dominique Pelle, closes #1406)
author: Bram Moolenaar <Bram@vim.org> 2017-01-26 22:51:56 +0100
committer: Bram Moolenaar <Bram@vim.org> 2017-01-26 22:51:56 +0100
commit: cc5b22b3bfdc0e9e835cf7871166badda31447bd (patch)
tree: 11e117ab82deca899a492b823dd0b0cb1c0ed37b
parent: 65c836e6004647196ae0bc18e409a9e7b79207c0 (diff)
5 files changed, 216 insertions, 33 deletions
diff --git a/src/evalfunc.c b/src/evalfunc.c
index 9ebd2df41d..99d01c42de 100644
--- a/src/evalfunc.c
+++ b/src/evalfunc.c
@@ -12503,39 +12503,8 @@ f_timer_stopall(typval_T *argvars UNUSED, typval_T *rettv UNUSED)
     static void
 f_tolower(typval_T *argvars, typval_T *rettv)
 {
-    char_u	*p;
-
-    p = vim_strsave(get_tv_string(&argvars[0]));
     rettv->v_type = VAR_STRING;
-    rettv->vval.v_string = p;
-
-    if (p != NULL)
-	while (*p != NUL)
-	{
-#ifdef FEAT_MBYTE
-	    int		l;
-
-	    if (enc_utf8)
-	    {
-		int c, lc;
-
-		c = utf_ptr2char(p);
-		lc = utf_tolower(c);
-		l = utf_ptr2len(p);
-		/* TODO: reallocate string when byte count changes. */
-		if (utf_char2len(lc) == l)
-		    utf_char2bytes(lc, p);
-		p += l;
-	    }
-	    else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
-		p += l;		/* skip multi-byte character */
-	    else
-#endif
-	    {
-		*p = TOLOWER_LOC(*p); /* note that tolower() can be a macro */
-		++p;
-	    }
-	}
+    rettv->vval.v_string = strlow_save(get_tv_string(&argvars[0]));
 }
 
 /*
diff --git a/src/misc2.c b/src/misc2.c
index dd0e694643..9c39d40394 100644
--- a/src/misc2.c
+++ b/src/misc2.c
@@ -1602,7 +1602,10 @@ strup_save(char_u *orig)
 		{
 		    s = alloc((unsigned)STRLEN(res) + 1 + newl - l);
 		    if (s == NULL)
-			break;
+		    {
+			vim_free(res);
+			return NULL;
+		    }
 		    mch_memmove(s, res, p - res);
 		    STRCPY(s + (p - res) + newl, p + l);
 		    p = s + (p - res);
@@ -1625,6 +1628,69 @@ strup_save(char_u *orig)
 
     return res;
 }
+
+/*
+ * Make string "s" all lower-case and return it in allocated memory.
+ * Handles multi-byte characters as well as possible.
+ * Returns NULL when out of memory.
+ */
+    char_u *
+strlow_save(char_u *orig)
+{
+    char_u	*p;
+    char_u	*res;
+
+    res = p = vim_strsave(orig);
+
+    if (res != NULL)
+	while (*p != NUL)
+	{
+# ifdef FEAT_MBYTE
+	    int		l;
+
+	    if (enc_utf8)
+	    {
+		int	c, lc;
+		int	newl;
+		char_u	*s;
+
+		c = utf_ptr2char(p);
+		lc = utf_tolower(c);
+
+		/* Reallocate string when byte count changes.  This is rare,
+		 * thus it's OK to do another malloc()/free(). */
+		l = utf_ptr2len(p);
+		newl = utf_char2len(lc);
+		if (newl != l)
+		{
+		    s = alloc((unsigned)STRLEN(res) + 1 + newl - l);
+		    if (s == NULL)
+		    {
+			vim_free(res);
+			return NULL;
+		    }
+		    mch_memmove(s, res, p - res);
+		    STRCPY(s + (p - res) + newl, p + l);
+		    p = s + (p - res);
+		    vim_free(res);
+		    res = s;
+		}
+
+		utf_char2bytes(lc, p);
+		p += newl;
+	    }
+	    else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
+		p += l;		/* skip multi-byte character */
+	    else
+# endif
+	    {
+		*p = TOLOWER_LOC(*p); /* note that tolower() can be a macro */
+		p++;
+	    }
+	}
+
+    return res;
+}
 #endif
 
 /*
diff --git a/src/proto/misc2.pro b/src/proto/misc2.pro
index 144324f6ed..83384c028c 100644
--- a/src/proto/misc2.pro
+++ b/src/proto/misc2.pro
@@ -40,6 +40,7 @@ char_u *vim_strsave_up(char_u *string);
 char_u *vim_strnsave_up(char_u *string, int len);
 void vim_strup(char_u *p);
 char_u *strup_save(char_u *orig);
+char_u *strlow_save(char_u *orig);
 void del_trailing_spaces(char_u *ptr);
 void vim_strncpy(char_u *to, char_u *from, size_t len);
 void vim_strcat(char_u *to, char_u *from, size_t tosize);
diff --git a/src/testdir/test_functions.vim b/src/testdir/test_functions.vim
index ec816d1414..869c75fe6b 100644
--- a/src/testdir/test_functions.vim
+++ b/src/testdir/test_functions.vim
@@ -16,3 +16,148 @@ func Test_str2nr()
   call assert_equal(123456789, str2nr('123456789'))
   call assert_equal(-123456789, str2nr('-123456789'))
 endfunc
+
+func Test_tolower()
+  call assert_equal("", tolower(""))
+
+  " Test with all printable ASCII characters.
+  call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\]^_`abcdefghijklmnopqrstuvwxyz{|}~',
+          \ tolower(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'))
+
+  if !has('multi_byte')
+    return
+  endif
+
+  " Test with a few uppercase diacritics.
+  call assert_equal("aàáâãäåāăąǎǟǡả", tolower("AÀÁÂÃÄÅĀĂĄǍǞǠẢ"))
+  call assert_equal("bḃḇ", tolower("BḂḆ"))
+  call assert_equal("cçćĉċč", tolower("CÇĆĈĊČ"))
+  call assert_equal("dďđḋḏḑ", tolower("DĎĐḊḎḐ"))
+  call assert_equal("eèéêëēĕėęěẻẽ", tolower("EÈÉÊËĒĔĖĘĚẺẼ"))
+  call assert_equal("fḟ ", tolower("FḞ "))
+  call assert_equal("gĝğġģǥǧǵḡ", tolower("GĜĞĠĢǤǦǴḠ"))
+  call assert_equal("hĥħḣḧḩ", tolower("HĤĦḢḦḨ"))
+  call assert_equal("iìíîïĩīĭįiǐỉ", tolower("IÌÍÎÏĨĪĬĮİǏỈ"))
+  call assert_equal("jĵ", tolower("JĴ"))
+  call assert_equal("kķǩḱḵ", tolower("KĶǨḰḴ"))
+  call assert_equal("lĺļľŀłḻ", tolower("LĹĻĽĿŁḺ"))
+  call assert_equal("mḿṁ", tolower("MḾṀ"))
+  call assert_equal("nñńņňṅṉ", tolower("NÑŃŅŇṄṈ"))
+  call assert_equal("oòóôõöøōŏőơǒǫǭỏ", tolower("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ"))
+  call assert_equal("pṕṗ", tolower("PṔṖ"))
+  call assert_equal("q", tolower("Q"))
+  call assert_equal("rŕŗřṙṟ", tolower("RŔŖŘṘṞ"))
+  call assert_equal("sśŝşšṡ", tolower("SŚŜŞŠṠ"))
+  call assert_equal("tţťŧṫṯ", tolower("TŢŤŦṪṮ"))
+  call assert_equal("uùúûüũūŭůűųưǔủ", tolower("UÙÚÛÜŨŪŬŮŰŲƯǓỦ"))
+  call assert_equal("vṽ", tolower("VṼ"))
+  call assert_equal("wŵẁẃẅẇ", tolower("WŴẀẂẄẆ"))
+  call assert_equal("xẋẍ", tolower("XẊẌ"))
+  call assert_equal("yýŷÿẏỳỷỹ", tolower("YÝŶŸẎỲỶỸ"))
+  call assert_equal("zźżžƶẑẕ", tolower("ZŹŻŽƵẐẔ"))
+
+  " Test with a few lowercase diacritics, which should remain unchanged.
+  call assert_equal("aàáâãäåāăąǎǟǡả", tolower("aàáâãäåāăąǎǟǡả"))
+  call assert_equal("bḃḇ", tolower("bḃḇ"))
+  call assert_equal("cçćĉċč", tolower("cçćĉċč"))
+  call assert_equal("dďđḋḏḑ", tolower("dďđḋḏḑ"))
+  call assert_equal("eèéêëēĕėęěẻẽ", tolower("eèéêëēĕėęěẻẽ"))
+  call assert_equal("fḟ", tolower("fḟ"))
+  call assert_equal("gĝğġģǥǧǵḡ", tolower("gĝğġģǥǧǵḡ"))
+  call assert_equal("hĥħḣḧḩẖ", tolower("hĥħḣḧḩẖ"))
+  call assert_equal("iìíîïĩīĭįǐỉ", tolower("iìíîïĩīĭįǐỉ"))
+  call assert_equal("jĵǰ", tolower("jĵǰ"))
+  call assert_equal("kķǩḱḵ", tolower("kķǩḱḵ"))
+  call assert_equal("lĺļľŀłḻ", tolower("lĺļľŀłḻ"))
+  call assert_equal("mḿṁ ", tolower("mḿṁ "))
+  call assert_equal("nñńņňŉṅṉ", tolower("nñńņňŉṅṉ"))
+  call assert_equal("oòóôõöøōŏőơǒǫǭỏ", tolower("oòóôõöøōŏőơǒǫǭỏ"))
+  call assert_equal("pṕṗ", tolower("pṕṗ"))
+  call assert_equal("q", tolower("q"))
+  call assert_equal("rŕŗřṙṟ", tolower("rŕŗřṙṟ"))
+  call assert_equal("sśŝşšṡ", tolower("sśŝşšṡ"))
+  call assert_equal("tţťŧṫṯẗ", tolower("tţťŧṫṯẗ"))
+  call assert_equal("uùúûüũūŭůűųưǔủ", tolower("uùúûüũūŭůűųưǔủ"))
+  call assert_equal("vṽ", tolower("vṽ"))
+  call assert_equal("wŵẁẃẅẇẘ", tolower("wŵẁẃẅẇẘ"))
+  call assert_equal("ẋẍ", tolower("ẋẍ"))
+  call assert_equal("yýÿŷẏẙỳỷỹ", tolower("yýÿŷẏẙỳỷỹ"))
+  call assert_equal("zźżžƶẑẕ", tolower("zźżžƶẑẕ"))
+
+  " According to https://twitter.com/jifa/status/625776454479970304
+  " Ⱥ (U+023A) and Ⱦ (U+023E) are the *only* code points to increase
+  " in length (2 to 3 bytes) when lowercased. So let's test them.
+  call assert_equal("ⱥ ⱦ", tolower("Ⱥ Ⱦ"))
+endfunc
+
+func Test_toupper()
+  call assert_equal("", toupper(""))
+
+  " Test with all printable ASCII characters.
+  call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~',
+          \ toupper(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'))
+
+  if !has('multi_byte')
+    return
+  endif
+
+  " Test with a few lowercase diacritics.
+  call assert_equal("AÀÁÂÃÄÅĀĂĄǍǞǠẢ", toupper("aàáâãäåāăąǎǟǡả"))
+  call assert_equal("BḂḆ", toupper("bḃḇ"))
+  call assert_equal("CÇĆĈĊČ", toupper("cçćĉċč"))
+  call assert_equal("DĎĐḊḎḐ", toupper("dďđḋḏḑ"))
+  call assert_equal("EÈÉÊËĒĔĖĘĚẺẼ", toupper("eèéêëēĕėęěẻẽ"))
+  call assert_equal("FḞ", toupper("fḟ"))
+  call assert_equal("GĜĞĠĢǤǦǴḠ", toupper("gĝğġģǥǧǵḡ"))
+  call assert_equal("HĤĦḢḦḨẖ", toupper("hĥħḣḧḩẖ"))
+  call assert_equal("IÌÍÎÏĨĪĬĮǏỈ", toupper("iìíîïĩīĭįǐỉ"))
+  call assert_equal("JĴǰ", toupper("jĵǰ"))
+  call assert_equal("KĶǨḰḴ", toupper("kķǩḱḵ"))
+  call assert_equal("LĹĻĽĿŁḺ", toupper("lĺļľŀłḻ"))
+  call assert_equal("MḾṀ ", toupper("mḿṁ "))
+  call assert_equal("NÑŃŅŇŉṄṈ", toupper("nñńņňŉṅṉ"))
+  call assert_equal("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ", toupper("oòóôõöøōŏőơǒǫǭỏ"))
+  call assert_equal("PṔṖ", toupper("pṕṗ"))
+  call assert_equal("Q", toupper("q"))
+  call assert_equal("RŔŖŘṘṞ", toupper("rŕŗřṙṟ"))
+  call assert_equal("SŚŜŞŠṠ", toupper("sśŝşšṡ"))
+  call assert_equal("TŢŤŦṪṮẗ", toupper("tţťŧṫṯẗ"))
+  call assert_equal("UÙÚÛÜŨŪŬŮŰŲƯǓỦ", toupper("uùúûüũūŭůűųưǔủ"))
+  call assert_equal("VṼ", toupper("vṽ"))
+  call assert_equal("WŴẀẂẄẆẘ", toupper("wŵẁẃẅẇẘ"))
+  call assert_equal("ẊẌ", toupper("ẋẍ"))
+  call assert_equal("YÝŸŶẎẙỲỶỸ", toupper("yýÿŷẏẙỳỷỹ"))
+  call assert_equal("ZŹŻŽƵẐẔ", toupper("zźżžƶẑẕ"))
+
+  " Test that uppercase diacritics, which should remain unchanged.
+  call assert_equal("AÀÁÂÃÄÅĀĂĄǍǞǠẢ", toupper("AÀÁÂÃÄÅĀĂĄǍǞǠẢ"))
+  call assert_equal("BḂḆ", toupper("BḂḆ"))
+  call assert_equal("CÇĆĈĊČ", toupper("CÇĆĈĊČ"))
+  call assert_equal("DĎĐḊḎḐ", toupper("DĎĐḊḎḐ"))
+  call assert_equal("EÈÉÊËĒĔĖĘĚẺẼ", toupper("EÈÉÊËĒĔĖĘĚẺẼ"))
+  call assert_equal("FḞ ", toupper("FḞ "))
+  call assert_equal("GĜĞĠĢǤǦǴḠ", toupper("GĜĞĠĢǤǦǴḠ"))
+  call assert_equal("HĤĦḢḦḨ", toupper("HĤĦḢḦḨ"))
+  call assert_equal("IÌÍÎÏĨĪĬĮİǏỈ", toupper("IÌÍÎÏĨĪĬĮİǏỈ"))
+  call assert_equal("JĴ", toupper("JĴ"))
+  call assert_equal("KĶǨḰḴ", toupper("KĶǨḰḴ"))
+  call assert_equal("LĹĻĽĿŁḺ", toupper("LĹĻĽĿŁḺ"))
+  call assert_equal("MḾṀ", toupper("MḾṀ"))
+  call assert_equal("NÑŃŅŇṄṈ", toupper("NÑŃŅŇṄṈ"))
+  call assert_equal("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ", toupper("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ"))
+  call assert_equal("PṔṖ", toupper("PṔṖ"))
+  call assert_equal("Q", toupper("Q"))
+  call assert_equal("RŔŖŘṘṞ", toupper("RŔŖŘṘṞ"))
+  call assert_equal("SŚŜŞŠṠ", toupper("SŚŜŞŠṠ"))
+  call assert_equal("TŢŤŦṪṮ", toupper("TŢŤŦṪṮ"))
+  call assert_equal("UÙÚÛÜŨŪŬŮŰŲƯǓỦ", toupper("UÙÚÛÜŨŪŬŮŰŲƯǓỦ"))
+  call assert_equal("VṼ", toupper("VṼ"))
+  call assert_equal("WŴẀẂẄẆ", toupper("WŴẀẂẄẆ"))
+  call assert_equal("XẊẌ", toupper("XẊẌ"))
+  call assert_equal("YÝŶŸẎỲỶỸ", toupper("YÝŶŸẎỲỶỸ"))
+  call assert_equal("ZŹŻŽƵẐẔ", toupper("ZŹŻŽƵẐẔ"))
+
+  call assert_equal("ⱥ ⱦ", tolower("Ⱥ Ⱦ"))
+endfunc
+
+
diff --git a/src/version.c b/src/version.c
index df3ff07e70..d86705f241 100644
--- a/src/version.c
+++ b/src/version.c
@@ -765,6 +765,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    243,
+/**/
     242,
 /**/
     241,
author	Bram Moolenaar <Bram@vim.org>	2017-01-26 22:51:56 +0100
committer	Bram Moolenaar <Bram@vim.org>	2017-01-26 22:51:56 +0100
commit	cc5b22b3bfdc0e9e835cf7871166badda31447bd (patch)
tree	11e117ab82deca899a492b823dd0b0cb1c0ed37b
parent	65c836e6004647196ae0bc18e409a9e7b79207c0 (diff)