summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBram Moolenaar <Bram@vim.org>2021-01-02 17:43:49 +0100
committerBram Moolenaar <Bram@vim.org>2021-01-02 17:43:49 +0100
commit66c50c565321d4d49d8d5620912e5e8fe4825644 (patch)
tree73c2ddf3479db220eaf11cb9608df66825844612
parent9281c6cae4e1cec2c661487d761d407bad7c6ad6 (diff)
patch 8.2.2278: falling back to old regexp engine can some patternsv8.2.2278
Problem: Falling back to old regexp engine can some patterns. Solution: Do not fall back once [[:lower:]] or [[:upper:]] is used. (Christian Brabandt, closes #7572)
-rw-r--r--src/regexp.c9
-rw-r--r--src/regexp_nfa.c19
-rw-r--r--src/testdir/test_regexp_utf8.vim46
-rw-r--r--src/version.c2
4 files changed, 73 insertions, 3 deletions
diff --git a/src/regexp.c b/src/regexp.c
index f7f04ea876..0fd6de61ec 100644
--- a/src/regexp.c
+++ b/src/regexp.c
@@ -294,6 +294,7 @@ init_class_tab(void)
static char_u *regparse; // Input-scan pointer.
static int regnpar; // () count.
+static int wants_nfa; // regex should use NFA engine
#ifdef FEAT_SYN_HL
static int regnzpar; // \z() count.
static int re_has_z; // \z item detected
@@ -381,6 +382,9 @@ static int cstrncmp(char_u *s1, char_u *s2, int *n);
static char_u *cstrchr(char_u *, int);
static int re_mult_next(char *what);
static int reg_iswordc(int);
+#ifdef FEAT_EVAL
+static void report_re_switch(char_u *pat);
+#endif
static regengine_T bt_regengine;
static regengine_T nfa_regengine;
@@ -2662,7 +2666,7 @@ vim_regcomp(char_u *expr_arg, int re_flags)
if (prog == NULL)
{
#ifdef BT_REGEXP_DEBUG_LOG
- if (regexp_engine != BACKTRACKING_ENGINE) // debugging log for NFA
+ if (regexp_engine == BACKTRACKING_ENGINE) // debugging log for BT engine
{
FILE *f;
f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
@@ -2686,6 +2690,9 @@ vim_regcomp(char_u *expr_arg, int re_flags)
&& called_emsg == called_emsg_before)
{
regexp_engine = BACKTRACKING_ENGINE;
+#ifdef FEAT_EVAL
+ report_re_switch(expr);
+#endif
prog = bt_regengine.regcomp(expr, re_flags);
}
}
diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c
index ad47142d6a..064d90a033 100644
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -253,6 +253,12 @@ static int nfa_re_flags; // re_flags passed to nfa_regcomp()
static int *post_start; // holds the postfix form of r.e.
static int *post_end;
static int *post_ptr;
+
+// Set when the pattern should use the NFA engine.
+// E.g. [[:upper:]] only allows 8bit characters for BT engine,
+// while NFA engine handles multibyte characters correctly.
+static int wants_nfa;
+
static int nstate; // Number of states in the NFA.
static int istate; // Index in the state vector, used in alloc_state()
@@ -306,6 +312,7 @@ nfa_regcomp_start(
return FAIL;
post_ptr = post_start;
post_end = post_start + nstate_max;
+ wants_nfa = FALSE;
rex.nfa_has_zend = FALSE;
rex.nfa_has_backref = FALSE;
@@ -1707,6 +1714,7 @@ collection:
EMIT(NFA_CLASS_GRAPH);
break;
case CLASS_LOWER:
+ wants_nfa = TRUE;
EMIT(NFA_CLASS_LOWER);
break;
case CLASS_PRINT:
@@ -1719,6 +1727,7 @@ collection:
EMIT(NFA_CLASS_SPACE);
break;
case CLASS_UPPER:
+ wants_nfa = TRUE;
EMIT(NFA_CLASS_UPPER);
break;
case CLASS_XDIGIT:
@@ -2137,9 +2146,15 @@ nfa_regpiece(void)
// The engine is very inefficient (uses too many states) when the
// maximum is much larger than the minimum and when the maximum is
- // large. Bail out if we can use the other engine.
+ // large. However, when maxval is MAX_LIMIT, it is okay, as this
+ // will emit NFA_STAR.
+ // Bail out if we can use the other engine, but only, when the
+ // pattern does not need the NFA engine like (e.g. [[:upper:]]\{2,\}
+ // does not work with with characters > 8 bit with the BT engine)
if ((nfa_re_flags & RE_AUTO)
- && (maxval > 500 || maxval > minval + 200))
+ && (maxval > 500 || maxval > minval + 200)
+ && (maxval != MAX_LIMIT && minval < 200)
+ && !wants_nfa)
return FAIL;
// Ignore previous call to nfa_regatom()
diff --git a/src/testdir/test_regexp_utf8.vim b/src/testdir/test_regexp_utf8.vim
index 78702407c4..a7d1020b7d 100644
--- a/src/testdir/test_regexp_utf8.vim
+++ b/src/testdir/test_regexp_utf8.vim
@@ -510,6 +510,52 @@ func Test_match_start_of_line_combining()
bwipe!
endfunc
+" Check that [[:upper:]] matches for automatic engine
+func Test_match_char_class_upper()
+ new
+ let _engine=&regexpengine
+
+ " Test 1: [[:upper:]]\{2,\}
+ set regexpengine=0
+ call setline(1, ['05. ПЕСНЯ О ГЕРОЯХ муз. А. Давиденко, М. Коваля и Б. Шехтера ...', '05. PJESNJA O GJEROJAKH mus. A. Davidjenko, M. Kovalja i B. Shjekhtjera ...'])
+ call cursor(1,1)
+ let search_cmd='norm /\<[[:upper:]]\{2,\}\>' .. "\<CR>"
+ exe search_cmd
+ call assert_equal(4, searchcount().total, 'TEST 1')
+ set regexpengine=1
+ exe search_cmd
+ call assert_equal(2, searchcount().total, 'TEST 1')
+ set regexpengine=2
+ exe search_cmd
+ call assert_equal(4, searchcount().total, 'TEST 1')
+
+ " Test 2: [[:upper:]].\+
+ let search_cmd='norm /\<[[:upper:]].\+\>' .. "\<CR>"
+ set regexpengine=0
+ exe search_cmd
+ call assert_equal(2, searchcount().total, 'TEST 2')
+ set regexpengine=1
+ exe search_cmd
+ call assert_equal(1, searchcount().total, 'TEST 2')
+ set regexpengine=2
+ exe search_cmd
+ call assert_equal(2, searchcount().total, 'TEST 2')
+
+ " Test 3: [[:lower:]]\+
+ let search_cmd='norm /\<[[:lower:]]\+\>' .. "\<CR>"
+ set regexpengine=0
+ exe search_cmd
+ call assert_equal(4, searchcount().total, 'TEST 3 lower')
+ set regexpengine=1
+ exe search_cmd
+ call assert_equal(2, searchcount().total, 'TEST 3 lower')
+ set regexpengine=2
+ exe search_cmd
+ call assert_equal(4, searchcount().total, 'TEST 3 lower')
+ " clean up
+ let &regexpengine=_engine
+ bwipe!
+endfunc
" vim: shiftwidth=2 sts=2 expandtab
diff --git a/src/version.c b/src/version.c
index 7d720f7ece..af74bcbbad 100644
--- a/src/version.c
+++ b/src/version.c
@@ -751,6 +751,8 @@ static char *(features[]) =
static int included_patches[] =
{ /* Add new patch number below this line */
/**/
+ 2278,
+/**/
2277,
/**/
2276,