summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBram Moolenaar <Bram@vim.org>2013-05-19 19:40:29 +0200
committerBram Moolenaar <Bram@vim.org>2013-05-19 19:40:29 +0200
commitfbc0d2ea1e13fb55c267b72d64046e5ef984b97f (patch)
tree8c539274e29070536bb28615e32d236983361246
parent6fa41fb3746e5ab2f793de713879afc9b1e25647 (diff)
updated for version 7.3.970v7.3.970
Problem: Syntax highlighting can be slow. Solution: Include the NFA regexp engine. Add the 'regexpengine' option to select which one is used. (various authors, including Ken Takata, Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)
-rw-r--r--Filelist1
-rw-r--r--runtime/doc/pattern.txt37
-rw-r--r--runtime/doc/tags19
-rw-r--r--src/Make_cyg.mak3
-rw-r--r--src/Make_ming.mak3
-rw-r--r--src/Make_mvc.mak2
-rw-r--r--src/Makefile12
-rw-r--r--src/option.c8
-rw-r--r--src/option.h1
-rw-r--r--src/regexp.c445
-rw-r--r--src/regexp.h78
-rw-r--r--src/regexp_nfa.c3819
-rw-r--r--src/structs.h13
-rw-r--r--src/testdir/Make_amiga.mak3
-rw-r--r--src/testdir/Make_dos.mak2
-rw-r--r--src/testdir/Make_ming.mak2
-rw-r--r--src/testdir/Make_os2.mak2
-rw-r--r--src/testdir/Make_vms.mms5
-rw-r--r--src/testdir/Makefile7
-rw-r--r--src/testdir/test64.in180
-rw-r--r--src/testdir/test64.ok330
-rw-r--r--src/testdir/test95.in63
-rw-r--r--src/testdir/test95.ok6
-rw-r--r--src/version.c2
24 files changed, 4808 insertions, 235 deletions
diff --git a/Filelist b/Filelist
index 6503dbcc31..ee5ad8f34b 100644
--- a/Filelist
+++ b/Filelist
@@ -57,6 +57,7 @@ SRC_ALL = \
src/popupmnu.c \
src/quickfix.c \
src/regexp.c \
+ src/regexp_nfa.c \
src/regexp.h \
src/screen.c \
src/search.c \
diff --git a/runtime/doc/pattern.txt b/runtime/doc/pattern.txt
index 332dbee6d8..5f47889fd1 100644
--- a/runtime/doc/pattern.txt
+++ b/runtime/doc/pattern.txt
@@ -1,4 +1,4 @@
-*pattern.txt* For Vim version 7.3. Last change: 2013 Apr 20
+*pattern.txt* For Vim version 7.3. Last change: 2013 May 17
VIM REFERENCE MANUAL by Bram Moolenaar
@@ -350,6 +350,27 @@ For starters, read chapter 27 of the user manual |usr_27.txt|.
or \z( pattern \) |/\z(|
+ */\%#=* *two-engines*
+Vim includes two regexp engines:
+1. An old, backtracking engine that supports everything.
+2. A new, NFA engine that works much faster on some patterns, but does not
+ support everything.
+
+Vim will automatically select the right engine for you. However, if you run
+into a problem or want to specifically select one engine or the other, you can
+prepend one of the following to the pattern:
+
+ \%#=0 Force automatic selection. Only has an effect when
+ 'regexpengine' has been set to a non-zero value.
+ \%#=1 Force using the old engine.
+ \%#=2 Force using the NFA engine.
+
+You can also use the 'regexpengine' option to change the default.
+
+ *E864* *E868* *E874* *E875* *E876* *E877* *E878*
+If selecting the NFA engine and it runs into something that is not implemented
+the pattern will not match. This is only useful when debugging Vim.
+
==============================================================================
3. Magic */magic*
@@ -396,9 +417,10 @@ pattern.
==============================================================================
4. Overview of pattern items *pattern-overview*
+ *E865* *E866* *E867* *E869*
Overview of multi items. */multi* *E61* *E62*
-More explanation and examples below, follow the links. *E64*
+More explanation and examples below, follow the links. *E64* *E871*
multi ~
'magic' 'nomagic' matches of the preceding atom ~
@@ -508,12 +530,14 @@ Character classes {not in Vi}: */character-classes*
|/\c| \c \c ignore case, do not use the 'ignorecase' option
|/\C| \C \C match case, do not use the 'ignorecase' option
+|/\Z| \Z \Z ignore differences in Unicode "combining characters".
+ Useful when searching voweled Hebrew or Arabic text.
+
|/\m| \m \m 'magic' on for the following chars in the pattern
|/\M| \M \M 'magic' off for the following chars in the pattern
|/\v| \v \v the following chars in the pattern are "very magic"
|/\V| \V \V the following chars in the pattern are "very nomagic"
-|/\Z| \Z \Z ignore differences in Unicode "combining characters".
- Useful when searching voweled Hebrew or Arabic text.
+|/\%#=| \%#=1 \%#=1 select regexp engine |/zero-width|
|/\%d| \%d \%d match specified decimal character (eg \%d123)
|/\%x| \%x \%x match specified hex character (eg \%x2a)
@@ -581,7 +605,7 @@ overview.
\? Just like \=. Cannot be used when searching backwards with the "?"
command. {not in Vi}
- */\{* *E58* *E60* *E554*
+ */\{* *E58* *E60* *E554* *E870*
\{n,m} Matches n to m of the preceding atom, as many as possible
\{n} Matches n of the preceding atom
\{n,} Matches at least n of the preceding atom, as many as possible
@@ -962,7 +986,8 @@ match ASCII characters, as indicated by the range.
~ matches the last given substitute string */~* */\~*
\(\) A pattern enclosed by escaped parentheses. */\(* */\(\)* */\)*
- E.g., "\(^a\)" matches 'a' at the start of a line. *E51* *E54* *E55*
+ E.g., "\(^a\)" matches 'a' at the start of a line.
+ *E51* *E54* *E55* *E872* *E873*
\1 Matches the same string that was matched by */\1* *E65*
the first sub-expression in \( and \). {not in Vi}
diff --git a/runtime/doc/tags b/runtime/doc/tags
index 9af196ae46..2f40a4cb91 100644
--- a/runtime/doc/tags
+++ b/runtime/doc/tags
@@ -736,9 +736,11 @@ $VIMRUNTIME starting.txt /*$VIMRUNTIME*
'quote motion.txt /*'quote*
'quoteescape' options.txt /*'quoteescape'*
'rdt' options.txt /*'rdt'*
+'re' options.txt /*'re'*
'readonly' options.txt /*'readonly'*
'redraw' vi_diff.txt /*'redraw'*
'redrawtime' options.txt /*'redrawtime'*
+'regexpengine'' options.txt /*'regexpengine''*
'relativenumber' options.txt /*'relativenumber'*
'remap' options.txt /*'remap'*
'report' options.txt /*'report'*
@@ -1389,6 +1391,7 @@ $VIMRUNTIME starting.txt /*$VIMRUNTIME*
/\ pattern.txt /*\/\\*
/\$ pattern.txt /*\/\\$*
/\%# pattern.txt /*\/\\%#*
+/\%#= pattern.txt /*\/\\%#=*
/\%$ pattern.txt /*\/\\%$*
/\%'m pattern.txt /*\/\\%'m*
/\%( pattern.txt /*\/\\%(*
@@ -4261,7 +4264,22 @@ E860 eval.txt /*E860*
E861 eval.txt /*E861*
E862 eval.txt /*E862*
E863 if_pyth.txt /*E863*
+E864 pattern.txt /*E864*
+E865 pattern.txt /*E865*
+E866 pattern.txt /*E866*
+E867 pattern.txt /*E867*
+E868 pattern.txt /*E868*
+E869 pattern.txt /*E869*
E87 windows.txt /*E87*
+E870 pattern.txt /*E870*
+E871 pattern.txt /*E871*
+E872 pattern.txt /*E872*
+E873 pattern.txt /*E873*
+E874 pattern.txt /*E874*
+E875 pattern.txt /*E875*
+E876 pattern.txt /*E876*
+E877 pattern.txt /*E877*
+E878 pattern.txt /*E878*
E88 windows.txt /*E88*
E89 message.txt /*E89*
E90 message.txt /*E90*
@@ -8172,6 +8190,7 @@ try-nested eval.txt /*try-nested*
try-nesting eval.txt /*try-nesting*
tutor usr_01.txt /*tutor*
twice if_cscop.txt /*twice*
+two-engines pattern.txt /*two-engines*
type() eval.txt /*type()*
type-mistakes tips.txt /*type-mistakes*
typecorr-settings usr_41.txt /*typecorr-settings*
diff --git a/src/Make_cyg.mak b/src/Make_cyg.mak
index b00ae3b71b..d72282aa86 100644
--- a/src/Make_cyg.mak
+++ b/src/Make_cyg.mak
@@ -672,6 +672,9 @@ endif
$(OUTDIR)/netbeans.o: netbeans.c $(INCL) $(NBDEBUG_DEP)
$(CC) -c $(CFLAGS) netbeans.c -o $(OUTDIR)/netbeans.o
+$(OUTDIR)/regexp.o: regexp.c regexp_nfa.c $(INCL)
+ $(CC) -c $(CFLAGS) regexp.c -o $(OUTDIR)/regexp.o
+
$(OUTDIR)/if_mzsch.o: if_mzsch.c $(INCL) if_mzsch.h $(MZ_EXTRA_DEP)
$(CC) -c $(CFLAGS) if_mzsch.c -o $(OUTDIR)/if_mzsch.o
diff --git a/src/Make_ming.mak b/src/Make_ming.mak
index b8d7b200e8..3670e7102e 100644
--- a/src/Make_ming.mak
+++ b/src/Make_ming.mak
@@ -765,6 +765,9 @@ if_perl.c: if_perl.xs typemap
$(OUTDIR)/netbeans.o: netbeans.c $(INCL) $(NBDEBUG_INCL) $(NBDEBUG_SRC)
$(CC) -c $(CFLAGS) netbeans.c -o $(OUTDIR)/netbeans.o
+$(OUTDIR)/regexp.o: regexp.c regexp_nfa.c $(INCL)
+ $(CC) -c $(CFLAGS) regexp.c -o $(OUTDIR)/regexp.o
+
$(OUTDIR)/if_mzsch.o: if_mzsch.c $(INCL) if_mzsch.h $(MZ_EXTRA_DEP)
$(CC) -c $(CFLAGS) if_mzsch.c -o $(OUTDIR)/if_mzsch.o
diff --git a/src/Make_mvc.mak b/src/Make_mvc.mak
index 2a4a3e8c45..acd1346ab9 100644
--- a/src/Make_mvc.mak
+++ b/src/Make_mvc.mak
@@ -1166,7 +1166,7 @@ $(OUTDIR)/popupmnu.obj: $(OUTDIR) popupmnu.c $(INCL)
$(OUTDIR)/quickfix.obj: $(OUTDIR) quickfix.c $(INCL)
-$(OUTDIR)/regexp.obj: $(OUTDIR) regexp.c $(INCL)
+$(OUTDIR)/regexp.obj: $(OUTDIR) regexp.c regexp_nfa.c $(INCL)
$(OUTDIR)/screen.obj: $(OUTDIR) screen.c $(INCL)
diff --git a/src/Makefile b/src/Makefile
index fb821ca8c7..67a53bb0ec 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -454,7 +454,7 @@ CClink = $(CC)
# MULTIBYTE - To edit multi-byte characters.
# Uncomment this when you want to edit a multibyte language.
-# It's automatically enabled with big features or IME support.
+# It's automatically enabled with normal features, GTK or IME support.
# Note: Compile on a machine where setlocale() actually works, otherwise the
# configure tests may fail.
#CONF_OPT_MULTIBYTE = --enable-multibyte
@@ -2664,7 +2664,7 @@ objects/popupmnu.o: popupmnu.c
objects/quickfix.o: quickfix.c
$(CCC) -o $@ quickfix.c
-objects/regexp.o: regexp.c
+objects/regexp.o: regexp.c regexp_nfa.c
$(CCC) -o $@ regexp.c
objects/screen.o: screen.c
@@ -2938,10 +2938,10 @@ objects/quickfix.o: quickfix.c vim.h auto/config.h feature.h os_unix.h \
auto/osdef.h ascii.h keymap.h term.h macros.h option.h structs.h \
regexp.h gui.h gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h \
globals.h farsi.h arabic.h
-objects/regexp.o: regexp.c vim.h auto/config.h feature.h os_unix.h auto/osdef.h \
- ascii.h keymap.h term.h macros.h option.h structs.h regexp.h gui.h \
- gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h globals.h farsi.h \
- arabic.h
+objects/regexp.o: regexp.c regexp_nfa.c vim.h auto/config.h feature.h os_unix.h \
+ auto/osdef.h ascii.h keymap.h term.h macros.h option.h structs.h \
+ regexp.h gui.h gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h \
+ globals.h farsi.h arabic.h
objects/screen.o: screen.c vim.h auto/config.h feature.h os_unix.h auto/osdef.h \
ascii.h keymap.h term.h macros.h option.h structs.h regexp.h gui.h \
gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h globals.h farsi.h \
diff --git a/src/option.c b/src/option.c
index cfe464c5b0..325f061d95 100644
--- a/src/option.c
+++ b/src/option.c
@@ -2077,6 +2077,9 @@ static struct vimoption
(char_u *)NULL, PV_NONE,
#endif
{(char_u *)2000L, (char_u *)0L} SCRIPTID_INIT},
+ {"regexpengine", "re", P_NUM|P_VI_DEF,
+ (char_u *)&p_re, PV_NONE,
+ {(char_u *)0L, (char_u *)0L} SCRIPTID_INIT},
{"relativenumber", "rnu", P_BOOL|P_VI_DEF|P_RWIN,
(char_u *)VAR_WIN, PV_RNU,
{(char_u *)FALSE, (char_u *)0L} SCRIPTID_INIT},
@@ -8604,6 +8607,11 @@ set_num_option(opt_idx, varp, value, errbuf, errbuflen, opt_flags)
errmsg = e_positive;
p_hi = 0;
}
+ if (p_re < 0 || p_re > 2)
+ {
+ errmsg = e_invarg;
+ p_re = 0;
+ }
if (p_report < 0)
{
errmsg = e_positive;
diff --git a/src/option.h b/src/option.h
index 8b982f580e..b11316faa4 100644
--- a/src/option.h
+++ b/src/option.h
@@ -653,6 +653,7 @@ EXTERN char_u *p_cdpath; /* 'cdpath' */
EXTERN long p_rdt; /* 'redrawtime' */
#endif
EXTERN int p_remap; /* 'remap' */
+EXTERN long p_re; /* 'regexpengine' */
EXTERN long p_report; /* 'report' */
#if defined(FEAT_WINDOWS) && defined(FEAT_QUICKFIX)
EXTERN long p_pvh; /* 'previewheight' */
diff --git a/src/regexp.c b/src/regexp.c
index e456b5d5f3..a1f71ab979 100644
--- a/src/regexp.c
+++ b/src/regexp.c
@@ -38,9 +38,20 @@
* Named character class support added by Walter Briscoe (1998 Jul 01)
*/
+/* Uncomment the first if you do not want to see debugging logs or files
+ * related to regular expressions, even when compiling with -DDEBUG.
+ * Uncomment the second to get the regexp debugging. */
+/* #undef DEBUG */
+/* #define DEBUG */
+
#include "vim.h"
-#undef DEBUG
+#ifdef DEBUG
+/* show/save debugging data when BT engine is used */
+# define BT_REGEXP_DUMP
+/* save the debugging data to a file instead of displaying it */
+# define BT_REGEXP_LOG
+#endif
/*
* The "internal use only" fields in regexp.h are present to pass info from
@@ -326,9 +337,10 @@ toggle_Magic(x)
/* Used for an error (down from) vim_regcomp(): give the error message, set
* rc_did_emsg and return NULL */
#define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL)
-#define EMSG_M_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
#define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL)
-#define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
+#define EMSG2_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
+#define EMSG2_RET_FAIL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
+#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
#define MAX_LIMIT (32767L << 16L)
@@ -336,11 +348,18 @@ static int re_multi_type __ARGS((int));
static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n));
static char_u *cstrchr __ARGS((char_u *, int));
+#ifdef BT_REGEXP_DUMP
+static void regdump __ARGS((char_u *, bt_regprog_T *));
+#endif
#ifdef DEBUG
-static void regdump __ARGS((char_u *, regprog_T *));
static char_u *regprop __ARGS((char_u *));
#endif
+static char_u e_missingbracket[] = N_("E769: Missing ] after %s[");
+static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%(");
+static char_u e_unmatchedp[] = N_("E54: Unmatched %s(");
+static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)");
+
#define NOT_MULTI 0
#define MULTI_ONE 1
#define MULTI_MULT 2
@@ -630,7 +649,13 @@ static char_u META_flags[] = {
};
#endif
-static int curchr;
+static int curchr; /* currently parsed character */
+/* Previous character. Note: prevchr is sometimes -1 when we are not at the
+ * start, eg in /[ ^I]^ the pattern was never found even if it existed,
+ * because ^ was taken to be magic -- webb */
+static int prevchr;
+static int prevprevchr; /* previous-previous character */
+static int nextchr; /* used for ungetchr() */
/* arguments for reg() */
#define REG_NOPAREN 0 /* toplevel reg() */
@@ -680,6 +705,9 @@ static int read_limits __ARGS((long *, long *));
static void regtail __ARGS((char_u *, char_u *));
static void regoptail __ARGS((char_u *, char_u *));
+static regengine_T bt_regengine;
+static regengine_T nfa_regengine;
+
/*
* Return TRUE if compiled regular expression "prog" can match a line break.
*/
@@ -762,6 +790,7 @@ char *EQUIVAL_CLASS_C[16] = {
/*
* Produce the bytes for equivalence class "c".
* Currently only handles latin1, latin9 and utf-8.
+ * NOTE: When changing this function, also change nfa_emit_equi_class()
*/
static void
reg_equi_class(c)
@@ -1239,8 +1268,11 @@ skip_regexp(startp, dirc, magic, newp)
return p;
}
+static regprog_T *bt_regcomp __ARGS((char_u *expr, int re_flags));
+
/*
- * vim_regcomp() - compile a regular expression into internal code
+ * bt_regcomp() - compile a regular expression into internal code for the
+ * traditional back track matcher.
* Returns the program in allocated space. Returns NULL for an error.
*
* We can't allocate space until we know how big the compiled form will be,
@@ -1259,12 +1291,12 @@ skip_regexp(startp, dirc, magic, newp)
* of the structure of the compiled regexp.
* "re_flags": RE_MAGIC and/or RE_STRING.
*/
- regprog_T *
-vim_regcomp(expr, re_flags)
+ static regprog_T *
+bt_regcomp(expr, re_flags)
char_u *expr;
int re_flags;
{
- regprog_T *r;
+ bt_regprog_T *r;
char_u *scan;
char_u *longest;
int len;
@@ -1291,7 +1323,7 @@ vim_regcomp(expr, re_flags)
#endif
/* Allocate space. */
- r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE);
+ r = (bt_regprog_T *)lalloc(sizeof(bt_regprog_T) + regsize, TRUE);
if (r == NULL)
return NULL;
@@ -1386,10 +1418,11 @@ vim_regcomp(expr, re_flags)
r->regmlen = len;
}
}
-#ifdef DEBUG
+#ifdef BT_REGEXP_DUMP
regdump(expr, r);
#endif
- return r;
+ r->engine = &bt_regengine;
+ return (regprog_T *)r;
}
/*
@@ -1436,7 +1469,7 @@ vim_regcomp_had_eol()
#endif
/*
- * reg - regular expression, i.e. main body or parenthesized thing
+ * Parse regular expression, i.e. main body or parenthesized thing.
*
* Caller must absorb opening parenthesis.
*
@@ -1473,7 +1506,7 @@ reg(paren, flagp)
{
/* Make a MOPEN node. */
if (regnpar >= NSUBEXP)
- EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
+ EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
parno = regnpar;
++regnpar;
ret = regnode(MOPEN + parno);
@@ -1534,14 +1567,14 @@ reg(paren, flagp)
else
#endif
if (paren == REG_NPAREN)
- EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL);
+ EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL);
else
- EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL);
+ EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL);
}
else if (paren == REG_NOPAREN && peekchr() != NUL)
{
if (curchr == Magic(')'))
- EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL);
+ EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL);
else
EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */
/* NOTREACHED */
@@ -1556,7 +1589,7 @@ reg(paren, flagp)
}
/*
- * Handle one alternative of an | operator.
+ * Parse one alternative of an | operator.
* Implements the & operator.
*/
static char_u *
@@ -1599,7 +1632,7 @@ regbranch(flagp)
}
/*
- * Handle one alternative of an | or & operator.
+ * Parse one alternative of an | or & operator.
* Implements the concatenation operator.
*/
static char_u *
@@ -1679,7 +1712,7 @@ regconcat(flagp)
}
/*
- * regpiece - something followed by possible [*+=]
+ * Parse something followed by possible [*+=].
*
* Note that the branching code sequences used for = and the general cases
* of * and + are somewhat optimized: they use the same NOTHING node as
@@ -1759,7 +1792,7 @@ regpiece(flagp)
}
}
if (lop == END)
- EMSG_M_RET_NULL(_("E59: invalid character after %s@"),
+ EMSG2_RET_NULL(_("E59: invalid character after %s@"),
reg_magic == MAGIC_ALL);
/* Look behind must match with behind_pos. */
if (lop == BEHIND || lop == NOBEHIND)
@@ -1793,7 +1826,7 @@ regpiece(flagp)
else
{
if (num_complex_braces >= 10)
- EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"),
+ EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"),
reg_magic == MAGIC_ALL);
reginsert(BRACE_COMPLEX + num_complex_braces, ret);
regoptail(ret, regnode(BACK));
@@ -1820,8 +1853,20 @@ regpiece(flagp)
return ret;
}
+/* When making changes to classchars also change nfa_classcodes. */
+static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
+static int classcodes[] = {
+ ANY, IDENT, SIDENT, KWORD, SKWORD,
+ FNAME, SFNAME, PRINT, SPRINT,
+ WHITE, NWHITE, DIGIT, NDIGIT,
+ HEX, NHEX, OCTAL, NOCTAL,
+ WORD, NWORD, HEAD, NHEAD,
+ ALPHA, NALPHA, LOWER, NLOWER,
+ UPPER, NUPPER
+};
+
/*
- * regatom - the lowest level
+ * Parse the lowest level.
*
* Optimization: gobbles an entire sequence of ordinary characters so that
* it can turn them into a single node, which is smaller to store and
@@ -1836,15 +1881,6 @@ regatom(flagp)
int cpo_lit; /* 'cpoptions' contains 'l' flag */
int cpo_bsl; /* 'cpoptions' contains '\' flag */
int c;
- static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
- static int classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD,
- FNAME, SFNAME, PRINT, SPRINT,
- WHITE, NWHITE, DIGIT, NDIGIT,
- HEX, NHEX, OCTAL, NOCTAL,
- WORD, NWORD, HEAD, NHEAD,
- ALPHA, NALPHA, LOWER, NLOWER,
- UPPER, NUPPER
- };
char_u *p;
int extra = 0;
@@ -2140,7 +2176,7 @@ regatom(flagp)
while ((c = getchr()) != ']')
{
if (c == NUL)
- EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["),
+ EMSG2_RET_NULL(_("E69: Missing ] after %s%%["),
reg_magic == MAGIC_ALL);
br = regnode(BRANCH);
if (ret == NULL)
@@ -2156,7 +2192,7 @@ regatom(flagp)
return NULL;
}
if (ret == NULL)
- EMSG_M_RET_NULL(_("E70: Empty %s%%[]"),
+ EMSG2_RET_NULL(_("E70: Empty %s%%[]"),
reg_magic == MAGIC_ALL);
lastbranch = regnode(BRANCH);
br = regnode(NOTHING);
@@ -2200,7 +2236,7 @@ regatom(flagp)
}
if (i < 0)
- EMSG_M_RET_NULL(
+ EMSG2_RET_NULL(
_("E678: Invalid character after %s%%[dxouU]"),
reg_magic == MAGIC_ALL);
#ifdef FEAT_MBYTE
@@ -2272,7 +2308,7 @@ regatom(flagp)
}
}
- EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"),
+ EMSG2_RET_NULL(_("E71: Invalid character after %s%%"),
reg_magic == MAGIC_ALL);
}
}
@@ -2567,8 +2603,7 @@ collection:
break;
}
else if (reg_strict)
- EMSG_M_RET_NULL(_("E769: Missing ] after %s["),
- reg_magic > MAGIC_OFF);
+ EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF);
}
/* FALLTHROUGH */
@@ -2659,7 +2694,7 @@ use_multibytecode(c)
#endif
/*
- * emit a node
+ * Emit a node.
* Return pointer to generated code.
*/
static char_u *
@@ -2711,7 +2746,7 @@ regmbc(c)
#endif
/*
- * reginsert - insert an operator in front of already-emitted operand
+ * Insert an operator in front of already-emitted operand
*
* Means relocating the operand.
*/
@@ -2742,7 +2777,7 @@ reginsert(op, opnd)
}
/*
- * reginsert_limits - insert an operator in front of already-emitted operand.
+ * Insert an operator in front of already-emitted operand.
* The operator has the given limit values as operands. Also set next pointer.
*
* Means relocating the operand.
@@ -2794,7 +2829,7 @@ re_put_long(p, val)
}
/*
- * regtail - set the next-pointer at the end of a node chain
+ * Set the next-pointer at the end of a node chain.
*/
static void
regtail(p, val)
@@ -2835,7 +2870,7 @@ regtail(p, val)
}
/*
- * regoptail - regtail on item after a BRANCH; nop if none
+ * Like regtail, on item after a BRANCH; nop if none.
*/
static void
regoptail(p, val)
@@ -2851,22 +2886,15 @@ regoptail(p, val)
}
/*
- * getchr() - get the next character from the pattern. We know about
- * magic and such, so therefore we need a lexical analyzer.
+ * Functions for getting characters from the regexp input.
*/
-/* static int curchr; */
-static int prevprevchr;
-static int prevchr;
-static int nextchr; /* used for ungetchr() */
-/*
- * Note: prevchr is sometimes -1 when we are not at the start,
- * eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was
- * taken to be magic -- webb
- */
static int at_start; /* True when on the first character */
static int prev_at_start; /* True when on the second character */
+/*
+ * Start parsing at "str".
+ */
static void
initchr(str)
char_u *str;
@@ -2878,6 +2906,9 @@ initchr(str)
prev_at_start = FALSE;
}
+/*
+ * Get the next character without advancing.
+ */
static int
peekchr()
{
@@ -3086,6 +3117,10 @@ skipchr_keepstart()
prevprevchr = prpr;
}
+/*
+ * Get the next character from the pattern. We know about magic and such, so
+ * therefore we need a lexical analyzer.
+ */
static int
getchr()
{
@@ -3340,8 +3375,8 @@ typedef struct regbehind_S
} regbehind_T;
static char_u *reg_getline __ARGS((linenr_T lnum));
-static long vim_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm));
-static long regtry __ARGS((regprog_T *prog, colnr_T col));
+static long bt_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm));
+static long regtry __ARGS((bt_regprog_T *prog, colnr_T col));
static void cleanup_subexpr __ARGS((void));
#ifdef FEAT_SYN_HL
static void cleanup_zsubexpr __ARGS((void));
@@ -3398,7 +3433,7 @@ static colnr_T ireg_maxcol;
/*
* Sometimes need to save a copy of a line. Since alloc()/free() is very
* slow, we keep one allocated piece of memory and only re-allocate it when
- * it's too small. It's freed in vim_regexec_both() when finished.
+ * it's too small. It's freed in bt_regexec_both() when finished.
*/
static char_u *reg_tofree = NULL;
static unsigned reg_tofreelen;
@@ -3556,6 +3591,8 @@ static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
/* TRUE if using multi-line regexp. */
#define REG_MULTI (reg_match == NULL)
+static int bt_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col));
+
/*
* Match a regexp against a string.
* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
@@ -3563,8 +3600,8 @@ static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
*
* Return TRUE if there is a match, FALSE if not.
*/
- int
-vim_regexec(rmp, line, col)
+ static int
+bt_regexec(rmp, line, col)
regmatch_T *rmp;
char_u *line; /* string to match against */
colnr_T col; /* column to start looking for match */
@@ -3580,16 +3617,19 @@ vim_regexec(rmp, line, col)
ireg_icombine = FALSE;
#endif
ireg_maxcol = 0;
- return (vim_regexec_both(line, col, NULL) != 0);
+ return (bt_regexec_both(line, col, NULL) != 0);
}
#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
|| defined(FIND_REPLACE_DIALOG) || defined(PROTO)
+
+static int bt_regexec_nl __ARGS((regmatch_T *rmp, char_u *line, colnr_T col));
+
/*
* Like vim_regexec(), but consider a "\n" in "line" to be a line break.
*/
- int
-vim_regexec_nl(rmp, line, col)
+ static int
+bt_regexec_nl(rmp, line, col)
regmatch_T *rmp;
char_u *line; /* string to match against */
colnr_T col; /* column to start looking for match */
@@ -3605,10 +3645,12 @@ vim_regexec_nl(rmp, line, col)
ireg_icombine = FALSE;
#endif
ireg_maxcol = 0;
- return (vim_regexec_both(line, col, NULL) != 0);
+ return (bt_regexec_both(line, col, NULL) != 0);
}
#endif
+static long bt_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm));
+
/*
* Match a regexp against multiple lines.
* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
@@ -3617,8 +3659,8 @@ vim_regexec_nl(rmp, line, col)
* Return zero if there is no match. Return number of lines contained in the
* match otherwise.
*/
- long
-vim_regexec_multi(rmp, win, buf, lnum, col, tm)
+ static long
+bt_regexec_multi(rmp, win, buf, lnum, col, tm)
regmmatch_T *rmp;
win_T *win; /* window in which to search or NULL */
buf_T *buf; /* buffer in which to search */
@@ -3641,7 +3683,7 @@ vim_regexec_multi(rmp, win, buf, lnum, col, tm)
#endif
ireg_maxcol = rmp->rmm_maxcol;
- r = vim_regexec_both(NULL, col, tm);
+ r = bt_regexec_both(NULL, col, tm);
return r;
}
@@ -3651,12 +3693,12 @@ vim_regexec_multi(rmp, win, buf, lnum, col, tm)
* lines ("line" is NULL, use reg_getline()).
*/
static long
-vim_regexec_both(line, col, tm)
+bt_regexec_both(line, col, tm)
char_u *line;
colnr_T col; /* column to start looking for match */
proftime_T *tm UNUSED; /* timeout limit or NULL */
{
- regprog_T *prog;
+ bt_regprog_T *prog;
char_u *s;
long retval = 0L;
@@ -3682,14 +3724,14 @@ vim_regexec_both(line, col, tm)
if (REG_MULTI)
{
- prog = reg_mmatch->regprog;
+ prog = (bt_regprog_T *)reg_mmatch->regprog;
line = reg_getline((linenr_T)0);
reg_startpos = reg_mmatch->startpos;
reg_endpos = reg_mmatch->endpos;
}
else
{
- prog = reg_match->regprog;
+ prog = (bt_regprog_T *)reg_match->regprog;
reg_startp = reg_match->startp;
reg_endp = reg_match->endp;
}
@@ -3931,7 +3973,7 @@ unref_extmatch(em)
*/
static long
regtry(prog, col)
- regprog_T *prog;
+ bt_regprog_T *prog;
colnr_T col;
{
reginput = regline + col;
@@ -4063,7 +4105,7 @@ regmatch(scan)
#define RA_NOMATCH 5 /* didn't match */
/* Make "regstack" and "backpos" empty. They are allocated and freed in
- * vim_regexec_both() to reduce malloc()/free() calls. */
+ * bt_regexec_both() to reduce malloc()/free() calls. */
regstack.ga_len = 0;
backpos.ga_len = 0;
@@ -4072,14 +4114,14 @@ regmatch(scan)
*/
for (;;)
{
- /* Some patterns my cause a long time to match, even though they are not
+ /* Some patterns may cause a long time to match, even though they are not
* illegal. E.g., "\([a-z]\+\)\+Q". Allow breaking them with CTRL-C. */
fast_breakcheck();
#ifdef DEBUG
if (scan != NULL && regnarrate)
{
- mch_errmsg(regprop(scan));
+ mch_errmsg((char *)regprop(scan));
mch_errmsg("(\n");
}
#endif
@@ -4100,7 +4142,7 @@ regmatch(scan)
#ifdef DEBUG
if (regnarrate)
{
- mch_errmsg(regprop(scan));
+ mch_errmsg((char *)regprop(scan));
mch_errmsg("...\n");
# ifdef FEAT_SYN_HL
if (re_extmatch_in != NULL)
@@ -4112,7 +4154,7 @@ regmatch(scan)
{
mch_errmsg(" \"");
if (re_extmatch_in->matches[i] != NULL)
- mch_errmsg(re_extmatch_in->matches[i]);
+ mch_errmsg((char *)re_extmatch_in->matches[i]);
mch_errmsg("\"\n");
}
}
@@ -6091,9 +6133,14 @@ regnext(p)
static int
prog_magic_wrong()
{
- if (UCHARAT(REG_MULTI
- ? reg_mmatch->regprog->program
- : reg_match->regprog->program) != REGMAGIC)
+ regprog_T *prog;
+
+ prog = REG_MULTI ? reg_mmatch->regprog : reg_match->regprog;
+ if (prog->engine == &nfa_regengine)
+ /* For NFA matcher we don't check the magic */
+ return FALSE;
+
+ if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
{
EMSG(_(e_re_corr));
return TRUE;
@@ -6318,7 +6365,7 @@ re_num_cmp(val, scan)
}
-#ifdef DEBUG
+#ifdef BT_REGEXP_DUMP
/*