summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristian Brabandt <cb@256bit.org>2023-08-20 22:26:15 +0200
committerChristian Brabandt <cb@256bit.org>2023-08-20 22:28:28 +0200
commitbe07caa071ea93c07b1b2204a17237133f38b2bd (patch)
tree034bd9653476bcaaf1e9a22ac90c86a9f0465c39
parent6d626c41842e2c3ab698338bbe5fcfcf0557ecd8 (diff)
patch 9.0.1777: patch 9.0.1771 causes problemsv9.0.1777
Problem: patch 9.0.1771 causes problems Solution: revert it Revert "patch 9.0.1771: regex: combining chars in collections not handled" This reverts commit ca22fc36a4e8a315f199893ee8ff6253573f5fbe. Signed-off-by: Christian Brabandt <cb@256bit.org>
-rw-r--r--src/regexp_bt.c39
-rw-r--r--src/regexp_nfa.c104
-rw-r--r--src/testdir/test_regexp_utf8.vim11
-rw-r--r--src/version.c2
4 files changed, 10 insertions, 146 deletions
diff --git a/src/regexp_bt.c b/src/regexp_bt.c
index 198946e0dc..522cf37e2d 100644
--- a/src/regexp_bt.c
+++ b/src/regexp_bt.c
@@ -3743,38 +3743,13 @@ regmatch(
case ANYOF:
case ANYBUT:
- {
- char_u *q = OPERAND(scan);
-
- if (c == NUL)
- status = RA_NOMATCH;
- else if ((cstrchr(q, c) == NULL) == (op == ANYOF))
- status = RA_NOMATCH;
- else
- {
- // Check following combining characters
- int len = 0;
- int i;
-
- if (enc_utf8)
- len = utfc_ptr2len(q) - utf_ptr2len(q);
-
- MB_CPTR_ADV(rex.input);
- MB_CPTR_ADV(q);
-
- if (!enc_utf8 || len == 0)
- break;
-
- for (i = 0; i < len; ++i)
- if (q[i] != rex.input[i])
- {
- status = RA_NOMATCH;
- break;
- }
- rex.input += len;
- }
- break;
- }
+ if (c == NUL)
+ status = RA_NOMATCH;
+ else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
+ status = RA_NOMATCH;
+ else
+ ADVANCE_REGINPUT();
+ break;
case MULTIBYTECODE:
if (has_mbyte)
diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c
index 60cd29cf53..d724d527b6 100644
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -1764,7 +1764,6 @@ collection:
endp = skip_anyof(p);
if (*endp == ']')
{
- int plen;
/*
* Try to reverse engineer character classes. For example,
* recognize that [0-9] stands for \d and [A-Za-z_] for \h,
@@ -2036,34 +2035,11 @@ collection:
if (got_coll_char == TRUE && startc == 0)
EMIT(0x0a);
else
- {
EMIT(startc);
- if (!(enc_utf8 && (utf_ptr2len(regparse) != (plen = utfc_ptr2len(regparse)))))
- {
- EMIT(NFA_CONCAT);
- }
- }
- }
- }
-
- if (enc_utf8 && (utf_ptr2len(regparse) != (plen = utfc_ptr2len(regparse))))
- {
- int i = utf_ptr2len(regparse);
-
- c = utf_ptr2char(regparse + i);
-
- // Add composing characters
- for (;;)
- {
- EMIT(c);
EMIT(NFA_CONCAT);
- if ((i += utf_char2len(c)) >= plen)
- break;
- c = utf_ptr2char(regparse + i);
}
- EMIT(NFA_COMPOSING);
- EMIT(NFA_CONCAT);
}
+
MB_PTR_ADV(regparse);
} // while (p < endp)
@@ -6442,84 +6418,6 @@ nfa_regmatch(
result_if_matched = (t->state->c == NFA_START_COLL);
for (;;)
{
- if (state->c == NFA_COMPOSING)
- {
- int mc = curc;
- int len = 0;
- nfa_state_T *end;
- nfa_state_T *sta;
- int cchars[MAX_MCO];
- int ccount = 0;
- int j;
-
- sta = t->state->out->out;
- len = 0;
- if (utf_iscomposing(sta->c))
- {
- // Only match composing character(s), ignore base
- // character. Used for ".{composing}" and "{composing}"
- // (no preceding character).
- len += mb_char2len(mc);
- }
- if (rex.reg_icombine && len == 0)
- {
- // If \Z was present, then ignore composing characters.
- // When ignoring the base character this always matches.
- if (sta->c != curc)
- result = FAIL;
- else
- result = OK;
- while (sta->c != NFA_END_COMPOSING)
- sta = sta->out;
- }
- // Check base character matches first, unless ignored.
- else if (len > 0 || mc == sta->c)
-// if (len > 0 || mc == sta->c)
- {
- if (len == 0)
- {
- len += mb_char2len(mc);
- sta = sta->out;
- }
-
- // We don't care about the order of composing characters.
- // Get them into cchars[] first.
- while (len < clen)
- {
- mc = mb_ptr2char(rex.input + len);
- cchars[ccount++] = mc;
- len += mb_char2len(mc);
- if (ccount == MAX_MCO)
- break;
- }
-
- // Check that each composing char in the pattern matches a
- // composing char in the text. We do not check if all
- // composing chars are matched.
- result = OK;
- while (sta->c != NFA_END_COMPOSING)
- {
- for (j = 0; j < ccount; ++j)
- if (cchars[j] == sta->c)
- break;
- if (j == ccount)
- {
- result = FAIL;
- break;
- }
- sta = sta->out;
- }
- }
- else
- result = FAIL;
-
- if (t->state->out->out1->c == NFA_END_COMPOSING)
- {
- end = t->state->out->out1;
- ADD_STATE_IF_MATCH(end);
- }
- break;
- }
if (state->c == NFA_END_COLL)
{
result = !result_if_matched;
diff --git a/src/testdir/test_regexp_utf8.vim b/src/testdir/test_regexp_utf8.vim
index 6669dee57e..b591aedbb7 100644
--- a/src/testdir/test_regexp_utf8.vim
+++ b/src/testdir/test_regexp_utf8.vim
@@ -575,16 +575,5 @@ func Test_match_too_complicated()
set regexpengine=0
endfunc
-func Test_combining_chars_in_collection()
- new
- for i in range(0,2)
- exe "set re=".i
- put =['ɔ̃', 'ɔ', '̃ ã', 'abcd']
- :%s/[ɔ̃]//
- call assert_equal(['', '', 'ɔ', '̃ ã', 'abcd'], getline(1,'$'))
- %d
- endfor
- bw!
-endfunc
" vim: shiftwidth=2 sts=2 expandtab
diff --git a/src/version.c b/src/version.c
index 9a6f1622a6..43626ab65c 100644
--- a/src/version.c
+++ b/src/version.c
@@ -700,6 +700,8 @@ static char *(features[]) =
static int included_patches[] =
{ /* Add new patch number below this line */
/**/
+ 1777,
+/**/
1776,
/**/
1775,