summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBram Moolenaar <Bram@vim.org>2011-07-15 21:16:59 +0200
committerBram Moolenaar <Bram@vim.org>2011-07-15 21:16:59 +0200
commit35ee452de05a61ad4797166c3d37113a540f17f9 (patch)
tree01a5f9201e2d248e62fec64100115466ec619c03
parent01b2a23b95e9d2ca2fee5ad421756f3b1ca53910 (diff)
updated for version 7.3.253v7.3.253
Problem: "echo 'abc' > ''" returns 0 or 1, depending on 'ignorecase'. Checks in mb_strnicmp() for illegal and truncated bytes are wrong. Should not assume that byte length is equal before case folding. Solution: Add utf_safe_read_char_adv() and utf_strnicmp(). Add a test for this. (Ivan Krasilnikov)
-rw-r--r--src/mbyte.c186
-rw-r--r--src/testdir/Make_amiga.mak3
-rw-r--r--src/testdir/Make_dos.mak2
-rw-r--r--src/testdir/Make_ming.mak2
-rw-r--r--src/testdir/Make_os2.mak2
-rw-r--r--src/testdir/Make_vms.mms5
-rw-r--r--src/testdir/Makefile2
-rw-r--r--src/version.c2
8 files changed, 158 insertions, 46 deletions
diff --git a/src/mbyte.c b/src/mbyte.c
index 6e0dbf6549..88599264b0 100644
--- a/src/mbyte.c
+++ b/src/mbyte.c
@@ -132,6 +132,7 @@ static int utf_ptr2cells_len __ARGS((char_u *p, int size));
static int dbcs_char2cells __ARGS((int c));
static int dbcs_ptr2cells_len __ARGS((char_u *p, int size));
static int dbcs_ptr2char __ARGS((char_u *p));
+static int utf_safe_read_char_adv __ARGS((char_u **s, size_t *n));
/*
* Lookup table to quickly get the length in bytes of a UTF-8 character from
@@ -1701,6 +1702,66 @@ utf_ptr2char(p)
}
/*
+ * Convert a UTF-8 byte sequence to a wide character.
+ * String is assumed to be terminated by NUL or after "n" bytes, whichever
+ * comes first.
+ * The function is safe in the sense that it never accesses memory beyond the
+ * first "n" bytes of "s".
+ *
+ * On success, returns decoded codepoint, advances "s" to the beginning of
+ * next character and decreases "n" accordingly.
+ *
+ * If end of string was reached, returns 0 and, if "n" > 0, advances "s" past
+ * NUL byte.
+ *
+ * If byte sequence is illegal or incomplete, returns -1 and does not advance
+ * "s".
+ */
+ static int
+utf_safe_read_char_adv(s, n)
+ char_u **s;
+ size_t *n;
+{
+ int c, k;
+
+ if (*n == 0) /* end of buffer */
+ return 0;
+
+ k = utf8len_tab_zero[**s];
+
+ if (k == 1)
+ {
+ /* ASCII character or NUL */
+ (*n)--;
+ return *(*s)++;
+ }
+
+ if ((size_t)k <= *n)
+ {
+ /* We have a multibyte sequence and it isn't truncated by buffer
+ * limits so utf_ptr2char() is safe to use. Or the first byte is
+ * illegal (k=0), and it's also safe to use utf_ptr2char(). */
+ c = utf_ptr2char(*s);
+
+ /* On failure, utf_ptr2char() returns the first byte, so here we
+ * check equality with the first byte. The only non-ASCII character
+ * which equals the first byte of its own UTF-8 representation is
+ * U+00C3 (UTF-8: 0xC3 0x83), so need to check that special case too.
+ * It's safe even if n=1, else we would have k=2 > n. */
+ if (c != (int)(**s) || (c == 0xC3 && (*s)[1] == 0x83))
+ {
+ /* byte sequence was successfully decoded */
+ *s += k;
+ *n -= k;
+ return c;
+ }
+ }
+
+ /* byte sequence is incomplete or illegal */
+ return -1;
+}
+
+/*
* Get character at **pp and advance *pp to the next character.
* Note: composing characters are skipped!
*/
@@ -2667,7 +2728,8 @@ static convertStruct foldCase[] =
{0x10400,0x10427,1,40}
};
-static int utf_convert(int a, convertStruct table[], int tableSize);
+static int utf_convert __ARGS((int a, convertStruct table[], int tableSize));
+static int utf_strnicmp __ARGS((char_u *s1, char_u *s2, size_t n1, size_t n2));
/*
* Generic conversion function for case operations.
@@ -3079,6 +3141,80 @@ utf_isupper(a)
return (utf_tolower(a) != a);
}
+ static int
+utf_strnicmp(s1, s2, n1, n2)
+ char_u *s1, *s2;
+ size_t n1, n2;
+{
+ int c1, c2, cdiff;
+ char_u buffer[6];
+
+ for (;;)
+ {
+ c1 = utf_safe_read_char_adv(&s1, &n1);
+ c2 = utf_safe_read_char_adv(&s2, &n2);
+
+ if (c1 <= 0 || c2 <= 0)
+ break;
+
+ if (c1 == c2)
+ continue;
+
+ cdiff = utf_fold(c1) - utf_fold(c2);
+ if (cdiff != 0)
+ return cdiff;
+ }
+
+ /* some string ended or has an incomplete/illegal character sequence */
+
+ if (c1 == 0 || c2 == 0)
+ {
+ /* some string ended. shorter string is smaller */
+ if (c1 == 0 && c2 == 0)
+ return 0;
+ return c1 == 0 ? -1 : 1;
+ }
+
+ /* Continue with bytewise comparison to produce some result that
+ * would make comparison operations involving this function transitive.
+ *
+ * If only one string had an error, comparison should be made with
+ * folded version of the other string. In this case it is enough
+ * to fold just one character to determine the result of comparison. */
+
+ if (c1 != -1 && c2 == -1)
+ {
+ n1 = utf_char2bytes(utf_fold(c1), buffer);
+ s1 = buffer;
+ }
+ else if (c2 != -1 && c1 == -1)
+ {
+ n2 = utf_char2bytes(utf_fold(c2), buffer);
+ s2 = buffer;
+ }
+
+ while (n1 > 0 && n2 > 0 && *s1 != NUL && *s2 != NUL)
+ {
+ cdiff = (int)(*s1) - (int)(*s2);
+ if (cdiff != 0)
+ return cdiff;
+
+ s1++;
+ s2++;
+ n1--;
+ n2--;
+ }
+
+ if (n1 > 0 && *s1 == NUL)
+ n1 = 0;
+ if (n2 > 0 && *s2 == NUL)
+ n2 = 0;
+
+ if (n1 == 0 && n2 == 0)
+ return 0;
+ return n1 == 0 ? -1 : 1;
+}
+
/*
* Version of strnicmp() that handles multi-byte characters.
* Needed for Big5, Sjift-JIS and UTF-8 encoding. Other DBCS encodings can
@@ -3092,49 +3228,21 @@ mb_strnicmp(s1, s2, nn)
char_u *s1, *s2;
size_t nn;
{
- int i, j, l;
+ int i, l;
int cdiff;
- int incomplete = FALSE;
int n = (int)nn;
- for (i = 0; i < n; i += l)
+ if (enc_utf8)
{
- if (s1[i] == NUL && s2[i] == NUL) /* both strings end */
- return 0;
- if (enc_utf8)
- {
- l = utf_byte2len(s1[i]);
- if (l > n - i)
- {
- l = n - i; /* incomplete character */
- incomplete = TRUE;
- }
- /* Check directly first, it's faster. */
- for (j = 0; j < l; ++j)
- {
- if (s1[i + j] != s2[i + j])
- break;
- if (s1[i + j] == 0)
- /* Both stings have the same bytes but are incomplete or
- * have illegal bytes, accept them as equal. */
- l = j;
- }
- if (j < l)
- {
- /* If one of the two characters is incomplete return -1. */
- if (incomplete || i + utf_byte2len(s2[i]) > n)
- return -1;
- /* Don't case-fold illegal bytes or truncated characters. */
- if (utf_ptr2len(s1 + i) < l || utf_ptr2len(s2 + i) < l)
- return -1;
- cdiff = utf_fold(utf_ptr2char(s1 + i))
- - utf_fold(utf_ptr2char(s2 + i));
- if (cdiff != 0)
- return cdiff;
- }
- }
- else
+ return utf_strnicmp(s1, s2, nn, nn);
+ }
+ else
+ {
+ for (i = 0; i < n; i += l)
{
+ if (s1[i] == NUL && s2[i] == NUL) /* both strings end */
+ return 0;
+
l = (*mb_ptr2len)(s1 + i);
if (l <= 1)
{
diff --git a/src/testdir/Make_amiga.mak b/src/testdir/Make_amiga.mak
index f3035b18fc..55d13e48f3 100644
--- a/src/testdir/Make_amiga.mak
+++ b/src/testdir/Make_amiga.mak
@@ -29,7 +29,7 @@ SCRIPTS = test1.out test3.out test4.out test5.out test6.out \
test66.out test67.out test68.out test69.out test70.out \
test71.out test72.out test73.out test74.out test75.out \
test76.out test77.out test78.out test79.out test80.out \
- test81.out
+ test81.out test82.out
.SUFFIXES: .in .out
@@ -130,3 +130,4 @@ test78.out: test78.in
test79.out: test79.in
test80.out: test80.in
test81.out: test81.in
+test82.out: test82.in
diff --git a/src/testdir/Make_dos.mak b/src/testdir/Make_dos.mak
index 9ece4b5179..ba77d22682 100644
--- a/src/testdir/Make_dos.mak
+++ b/src/testdir/Make_dos.mak
@@ -29,7 +29,7 @@ SCRIPTS = test3.out test4.out test5.out test6.out test7.out \
test42.out test52.out test65.out test66.out test67.out \
test68.out test69.out test71.out test72.out test73.out \
test74.out test75.out test76.out test77.out test78.out \
- test79.out test80.out test81.out
+ test79.out test80.out test81.out test82.out
SCRIPTS32 = test50.out test70.out
diff --git a/src/testdir/Make_ming.mak b/src/testdir/Make_ming.mak
index 2ff20ad1b2..d844c3926f 100644
--- a/src/testdir/Make_ming.mak
+++ b/src/testdir/Make_ming.mak
@@ -49,7 +49,7 @@ SCRIPTS = test3.out test4.out test5.out test6.out test7.out \
test42.out test52.out test65.out test66.out test67.out \
test68.out test69.out test71.out test72.out test73.out \
test74.out test75.out test76.out test77.out test78.out \
- test79.out test80.out test81.out
+ test79.out test80.out test81.out test82.out
SCRIPTS32 = test50.out test70.out
diff --git a/src/testdir/Make_os2.mak b/src/testdir/Make_os2.mak
index 4442a41de0..bd11d7f263 100644
--- a/src/testdir/Make_os2.mak
+++ b/src/testdir/Make_os2.mak
@@ -29,7 +29,7 @@ SCRIPTS = test1.out test3.out test4.out test5.out test6.out \
test66.out test67.out test68.out test69.out test70.out \
test71.out test72.out test73.out test74.out test75.out \
test76.out test77.out test78.out test79.out test80.out \
- test81.out
+ test81.out test82.out
.SUFFIXES: .in .out
diff --git a/src/testdir/Make_vms.mms b/src/testdir/Make_vms.mms
index d4101ed959..fe062c30ab 100644
--- a/src/testdir/Make_vms.mms
+++ b/src/testdir/Make_vms.mms
@@ -4,7 +4,7 @@
# Authors: Zoltan Arpadffy, <arpadffy@polarhome.com>
# Sandor Kopanyi, <sandor.kopanyi@mailbox.hu>
#
-# Last change: 2011 Jun 26
+# Last change: 2011 Jul 15
#
# This has been tested on VMS 6.2 to 8.3 on DEC Alpha, VAX and IA64.
# Edit the lines in the Configuration section below to select.
@@ -75,7 +75,8 @@ SCRIPT = test1.out test2.out test3.out test4.out test5.out \
test61.out test62.out test63.out test64.out test65.out \
test66.out test67.out test68.out test69.out \
test71.out test72.out test74.out test75.out test76.out \
- test77.out test78.out test79.out test80.out test81.out
+ test77.out test78.out test79.out test80.out test81.out \
+ test82.out
# Known problems:
# Test 30: a problem around mac format - unknown reason
diff --git a/src/testdir/Makefile b/src/testdir/Makefile
index 2eea86e265..c776c15f95 100644
--- a/src/testdir/Makefile
+++ b/src/testdir/Makefile
@@ -26,7 +26,7 @@ SCRIPTS = test1.out test2.out test3.out test4.out test5.out test6.out \
test64.out test65.out test66.out test67.out test68.out \
test69.out test70.out test71.out test72.out test73.out \
test74.out test75.out test76.out test77.out test78.out \
- test79.out test80.out test81.out
+ test79.out test80.out test81.out test82.out
SCRIPTS_GUI = test16.out
diff --git a/src/version.c b/src/version.c
index f206664f58..94d71aedef 100644
--- a/src/version.c
+++ b/src/version.c
@@ -710,6 +710,8 @@ static char *(features[]) =
static int included_patches[] =
{ /* Add new patch number below this line */
/**/
+ 253,
+/**/
252,
/**/
251,