summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorBram Moolenaar <Bram@vim.org>2013-05-19 19:40:29 +0200
committerBram Moolenaar <Bram@vim.org>2013-05-19 19:40:29 +0200
commitfbc0d2ea1e13fb55c267b72d64046e5ef984b97f (patch)
tree8c539274e29070536bb28615e32d236983361246 /src
parent6fa41fb3746e5ab2f793de713879afc9b1e25647 (diff)
updated for version 7.3.970v7.3.970
Problem: Syntax highlighting can be slow. Solution: Include the NFA regexp engine. Add the 'regexpengine' option to select which one is used. (various authors, including Ken Takata, Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)
Diffstat (limited to 'src')
-rw-r--r--src/Make_cyg.mak3
-rw-r--r--src/Make_ming.mak3
-rw-r--r--src/Make_mvc.mak2
-rw-r--r--src/Makefile12
-rw-r--r--src/option.c8
-rw-r--r--src/option.h1
-rw-r--r--src/regexp.c445
-rw-r--r--src/regexp.h78
-rw-r--r--src/regexp_nfa.c3819
-rw-r--r--src/structs.h13
-rw-r--r--src/testdir/Make_amiga.mak3
-rw-r--r--src/testdir/Make_dos.mak2
-rw-r--r--src/testdir/Make_ming.mak2
-rw-r--r--src/testdir/Make_os2.mak2
-rw-r--r--src/testdir/Make_vms.mms5
-rw-r--r--src/testdir/Makefile7
-rw-r--r--src/testdir/test64.in180
-rw-r--r--src/testdir/test64.ok330
-rw-r--r--src/testdir/test95.in63
-rw-r--r--src/testdir/test95.ok6
-rw-r--r--src/version.c2
21 files changed, 4757 insertions, 229 deletions
diff --git a/src/Make_cyg.mak b/src/Make_cyg.mak
index b00ae3b71b..d72282aa86 100644
--- a/src/Make_cyg.mak
+++ b/src/Make_cyg.mak
@@ -672,6 +672,9 @@ endif
$(OUTDIR)/netbeans.o: netbeans.c $(INCL) $(NBDEBUG_DEP)
$(CC) -c $(CFLAGS) netbeans.c -o $(OUTDIR)/netbeans.o
+$(OUTDIR)/regexp.o: regexp.c regexp_nfa.c $(INCL)
+ $(CC) -c $(CFLAGS) regexp.c -o $(OUTDIR)/regexp.o
+
$(OUTDIR)/if_mzsch.o: if_mzsch.c $(INCL) if_mzsch.h $(MZ_EXTRA_DEP)
$(CC) -c $(CFLAGS) if_mzsch.c -o $(OUTDIR)/if_mzsch.o
diff --git a/src/Make_ming.mak b/src/Make_ming.mak
index b8d7b200e8..3670e7102e 100644
--- a/src/Make_ming.mak
+++ b/src/Make_ming.mak
@@ -765,6 +765,9 @@ if_perl.c: if_perl.xs typemap
$(OUTDIR)/netbeans.o: netbeans.c $(INCL) $(NBDEBUG_INCL) $(NBDEBUG_SRC)
$(CC) -c $(CFLAGS) netbeans.c -o $(OUTDIR)/netbeans.o
+$(OUTDIR)/regexp.o: regexp.c regexp_nfa.c $(INCL)
+ $(CC) -c $(CFLAGS) regexp.c -o $(OUTDIR)/regexp.o
+
$(OUTDIR)/if_mzsch.o: if_mzsch.c $(INCL) if_mzsch.h $(MZ_EXTRA_DEP)
$(CC) -c $(CFLAGS) if_mzsch.c -o $(OUTDIR)/if_mzsch.o
diff --git a/src/Make_mvc.mak b/src/Make_mvc.mak
index 2a4a3e8c45..acd1346ab9 100644
--- a/src/Make_mvc.mak
+++ b/src/Make_mvc.mak
@@ -1166,7 +1166,7 @@ $(OUTDIR)/popupmnu.obj: $(OUTDIR) popupmnu.c $(INCL)
$(OUTDIR)/quickfix.obj: $(OUTDIR) quickfix.c $(INCL)
-$(OUTDIR)/regexp.obj: $(OUTDIR) regexp.c $(INCL)
+$(OUTDIR)/regexp.obj: $(OUTDIR) regexp.c regexp_nfa.c $(INCL)
$(OUTDIR)/screen.obj: $(OUTDIR) screen.c $(INCL)
diff --git a/src/Makefile b/src/Makefile
index fb821ca8c7..67a53bb0ec 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -454,7 +454,7 @@ CClink = $(CC)
# MULTIBYTE - To edit multi-byte characters.
# Uncomment this when you want to edit a multibyte language.
-# It's automatically enabled with big features or IME support.
+# It's automatically enabled with normal features, GTK or IME support.
# Note: Compile on a machine where setlocale() actually works, otherwise the
# configure tests may fail.
#CONF_OPT_MULTIBYTE = --enable-multibyte
@@ -2664,7 +2664,7 @@ objects/popupmnu.o: popupmnu.c
objects/quickfix.o: quickfix.c
$(CCC) -o $@ quickfix.c
-objects/regexp.o: regexp.c
+objects/regexp.o: regexp.c regexp_nfa.c
$(CCC) -o $@ regexp.c
objects/screen.o: screen.c
@@ -2938,10 +2938,10 @@ objects/quickfix.o: quickfix.c vim.h auto/config.h feature.h os_unix.h \
auto/osdef.h ascii.h keymap.h term.h macros.h option.h structs.h \
regexp.h gui.h gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h \
globals.h farsi.h arabic.h
-objects/regexp.o: regexp.c vim.h auto/config.h feature.h os_unix.h auto/osdef.h \
- ascii.h keymap.h term.h macros.h option.h structs.h regexp.h gui.h \
- gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h globals.h farsi.h \
- arabic.h
+objects/regexp.o: regexp.c regexp_nfa.c vim.h auto/config.h feature.h os_unix.h \
+ auto/osdef.h ascii.h keymap.h term.h macros.h option.h structs.h \
+ regexp.h gui.h gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h \
+ globals.h farsi.h arabic.h
objects/screen.o: screen.c vim.h auto/config.h feature.h os_unix.h auto/osdef.h \
ascii.h keymap.h term.h macros.h option.h structs.h regexp.h gui.h \
gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h globals.h farsi.h \
diff --git a/src/option.c b/src/option.c
index cfe464c5b0..325f061d95 100644
--- a/src/option.c
+++ b/src/option.c
@@ -2077,6 +2077,9 @@ static struct vimoption
(char_u *)NULL, PV_NONE,
#endif
{(char_u *)2000L, (char_u *)0L} SCRIPTID_INIT},
+ {"regexpengine", "re", P_NUM|P_VI_DEF,
+ (char_u *)&p_re, PV_NONE,
+ {(char_u *)0L, (char_u *)0L} SCRIPTID_INIT},
{"relativenumber", "rnu", P_BOOL|P_VI_DEF|P_RWIN,
(char_u *)VAR_WIN, PV_RNU,
{(char_u *)FALSE, (char_u *)0L} SCRIPTID_INIT},
@@ -8604,6 +8607,11 @@ set_num_option(opt_idx, varp, value, errbuf, errbuflen, opt_flags)
errmsg = e_positive;
p_hi = 0;
}
+ if (p_re < 0 || p_re > 2)
+ {
+ errmsg = e_invarg;
+ p_re = 0;
+ }
if (p_report < 0)
{
errmsg = e_positive;
diff --git a/src/option.h b/src/option.h
index 8b982f580e..b11316faa4 100644
--- a/src/option.h
+++ b/src/option.h
@@ -653,6 +653,7 @@ EXTERN char_u *p_cdpath; /* 'cdpath' */
EXTERN long p_rdt; /* 'redrawtime' */
#endif
EXTERN int p_remap; /* 'remap' */
+EXTERN long p_re; /* 'regexpengine' */
EXTERN long p_report; /* 'report' */
#if defined(FEAT_WINDOWS) && defined(FEAT_QUICKFIX)
EXTERN long p_pvh; /* 'previewheight' */
diff --git a/src/regexp.c b/src/regexp.c
index e456b5d5f3..a1f71ab979 100644
--- a/src/regexp.c
+++ b/src/regexp.c
@@ -38,9 +38,20 @@
* Named character class support added by Walter Briscoe (1998 Jul 01)
*/
+/* Uncomment the first if you do not want to see debugging logs or files
+ * related to regular expressions, even when compiling with -DDEBUG.
+ * Uncomment the second to get the regexp debugging. */
+/* #undef DEBUG */
+/* #define DEBUG */
+
#include "vim.h"
-#undef DEBUG
+#ifdef DEBUG
+/* show/save debugging data when BT engine is used */
+# define BT_REGEXP_DUMP
+/* save the debugging data to a file instead of displaying it */
+# define BT_REGEXP_LOG
+#endif
/*
* The "internal use only" fields in regexp.h are present to pass info from
@@ -326,9 +337,10 @@ toggle_Magic(x)
/* Used for an error (down from) vim_regcomp(): give the error message, set
* rc_did_emsg and return NULL */
#define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL)
-#define EMSG_M_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
#define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL)
-#define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
+#define EMSG2_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
+#define EMSG2_RET_FAIL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
+#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
#define MAX_LIMIT (32767L << 16L)
@@ -336,11 +348,18 @@ static int re_multi_type __ARGS((int));
static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n));
static char_u *cstrchr __ARGS((char_u *, int));
+#ifdef BT_REGEXP_DUMP
+static void regdump __ARGS((char_u *, bt_regprog_T *));
+#endif
#ifdef DEBUG
-static void regdump __ARGS((char_u *, regprog_T *));
static char_u *regprop __ARGS((char_u *));
#endif
+static char_u e_missingbracket[] = N_("E769: Missing ] after %s[");
+static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%(");
+static char_u e_unmatchedp[] = N_("E54: Unmatched %s(");
+static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)");
+
#define NOT_MULTI 0
#define MULTI_ONE 1
#define MULTI_MULT 2
@@ -630,7 +649,13 @@ static char_u META_flags[] = {
};
#endif
-static int curchr;
+static int curchr; /* currently parsed character */
+/* Previous character. Note: prevchr is sometimes -1 when we are not at the
+ * start, eg in /[ ^I]^ the pattern was never found even if it existed,
+ * because ^ was taken to be magic -- webb */
+static int prevchr;
+static int prevprevchr; /* previous-previous character */
+static int nextchr; /* used for ungetchr() */
/* arguments for reg() */
#define REG_NOPAREN 0 /* toplevel reg() */
@@ -680,6 +705,9 @@ static int read_limits __ARGS((long *, long *));
static void regtail __ARGS((char_u *, char_u *));
static void regoptail __ARGS((char_u *, char_u *));
+static regengine_T bt_regengine;
+static regengine_T nfa_regengine;
+
/*
* Return TRUE if compiled regular expression "prog" can match a line break.
*/
@@ -762,6 +790,7 @@ char *EQUIVAL_CLASS_C[16] = {
/*
* Produce the bytes for equivalence class "c".
* Currently only handles latin1, latin9 and utf-8.
+ * NOTE: When changing this function, also change nfa_emit_equi_class()
*/
static void
reg_equi_class(c)
@@ -1239,8 +1268,11 @@ skip_regexp(startp, dirc, magic, newp)
return p;
}
+static regprog_T *bt_regcomp __ARGS((char_u *expr, int re_flags));
+
/*
- * vim_regcomp() - compile a regular expression into internal code
+ * bt_regcomp() - compile a regular expression into internal code for the
+ * traditional back track matcher.
* Returns the program in allocated space. Returns NULL for an error.
*
* We can't allocate space until we know how big the compiled form will be,
@@ -1259,12 +1291,12 @@ skip_regexp(startp, dirc, magic, newp)
* of the structure of the compiled regexp.
* "re_flags": RE_MAGIC and/or RE_STRING.
*/
- regprog_T *
-vim_regcomp(expr, re_flags)
+ static regprog_T *
+bt_regcomp(expr, re_flags)
char_u *expr;
int re_flags;
{
- regprog_T *r;
+ bt_regprog_T *r;
char_u *scan;
char_u *longest;
int len;
@@ -1291,7 +1323,7 @@ vim_regcomp(expr, re_flags)
#endif
/* Allocate space. */
- r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE);
+ r = (bt_regprog_T *)lalloc(sizeof(bt_regprog_T) + regsize, TRUE);
if (r == NULL)
return NULL;
@@ -1386,10 +1418,11 @@ vim_regcomp(expr, re_flags)
r->regmlen = len;
}
}
-#ifdef DEBUG
+#ifdef BT_REGEXP_DUMP
regdump(expr, r);
#endif
- return r;
+ r->engine = &bt_regengine;
+ return (regprog_T *)r;
}
/*
@@ -1436,7 +1469,7 @@ vim_regcomp_had_eol()
#endif
/*
- * reg - regular expression, i.e. main body or parenthesized thing
+ * Parse regular expression, i.e. main body or parenthesized thing.
*
* Caller must absorb opening parenthesis.
*
@@ -1473,7 +1506,7 @@ reg(paren, flagp)
{
/* Make a MOPEN node. */
if (regnpar >= NSUBEXP)
- EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
+ EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
parno = regnpar;
++regnpar;
ret = regnode(MOPEN + parno);
@@ -1534,14 +1567,14 @@ reg(paren, flagp)
else
#endif
if (paren == REG_NPAREN)
- EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL);
+ EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL);
else
- EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL);
+ EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL);
}
else if (paren == REG_NOPAREN && peekchr() != NUL)
{
if (curchr == Magic(')'))
- EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL);
+ EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL);
else
EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */
/* NOTREACHED */
@@ -1556,7 +1589,7 @@ reg(paren, flagp)
}
/*
- * Handle one alternative of an | operator.
+ * Parse one alternative of an | operator.
* Implements the & operator.
*/
static char_u *
@@ -1599,7 +1632,7 @@ regbranch(flagp)
}
/*
- * Handle one alternative of an | or & operator.
+ * Parse one alternative of an | or & operator.
* Implements the concatenation operator.
*/
static char_u *
@@ -1679,7 +1712,7 @@ regconcat(flagp)
}
/*
- * regpiece - something followed by possible [*+=]
+ * Parse something followed by possible [*+=].
*
* Note that the branching code sequences used for = and the general cases
* of * and + are somewhat optimized: they use the same NOTHING node as
@@ -1759,7 +1792,7 @@ regpiece(flagp)
}
}
if (lop == END)
- EMSG_M_RET_NULL(_("E59: invalid character after %s@"),
+ EMSG2_RET_NULL(_("E59: invalid character after %s@"),
reg_magic == MAGIC_ALL);
/* Look behind must match with behind_pos. */
if (lop == BEHIND || lop == NOBEHIND)
@@ -1793,7 +1826,7 @@ regpiece(flagp)
else
{
if (num_complex_braces >= 10)
- EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"),
+ EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"),
reg_magic == MAGIC_ALL);
reginsert(BRACE_COMPLEX + num_complex_braces, ret);
regoptail(ret, regnode(BACK));
@@ -1820,8 +1853,20 @@ regpiece(flagp)
return ret;
}
+/* When making changes to classchars also change nfa_classcodes. */
+static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
+static int classcodes[] = {
+ ANY, IDENT, SIDENT, KWORD, SKWORD,
+ FNAME, SFNAME, PRINT, SPRINT,
+ WHITE, NWHITE, DIGIT, NDIGIT,
+ HEX, NHEX, OCTAL, NOCTAL,
+ WORD, NWORD, HEAD, NHEAD,
+ ALPHA, NALPHA, LOWER, NLOWER,
+ UPPER, NUPPER
+};
+
/*
- * regatom - the lowest level
+ * Parse the lowest level.
*
* Optimization: gobbles an entire sequence of ordinary characters so that
* it can turn them into a single node, which is smaller to store and
@@ -1836,15 +1881,6 @@ regatom(flagp)
int cpo_lit; /* 'cpoptions' contains 'l' flag */
int cpo_bsl; /* 'cpoptions' contains '\' flag */
int c;
- static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
- static int classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD,
- FNAME, SFNAME, PRINT, SPRINT,
- WHITE, NWHITE, DIGIT, NDIGIT,
- HEX, NHEX, OCTAL, NOCTAL,
- WORD, NWORD, HEAD, NHEAD,
- ALPHA, NALPHA, LOWER, NLOWER,
- UPPER, NUPPER
- };
char_u *p;
int extra = 0;
@@ -2140,7 +2176,7 @@ regatom(flagp)
while ((c = getchr()) != ']')
{
if (c == NUL)
- EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["),
+ EMSG2_RET_NULL(_("E69: Missing ] after %s%%["),
reg_magic == MAGIC_ALL);
br = regnode(BRANCH);
if (ret == NULL)
@@ -2156,7 +2192,7 @@ regatom(flagp)
return NULL;
}
if (ret == NULL)
- EMSG_M_RET_NULL(_("E70: Empty %s%%[]"),
+ EMSG2_RET_NULL(_("E70: Empty %s%%[]"),
reg_magic == MAGIC_ALL);
lastbranch = regnode(BRANCH);
br = regnode(NOTHING);
@@ -2200,7 +2236,7 @@ regatom(flagp)
}
if (i < 0)
- EMSG_M_RET_NULL(
+ EMSG2_RET_NULL(
_("E678: Invalid character after %s%%[dxouU]"),
reg_magic == MAGIC_ALL);
#ifdef FEAT_MBYTE
@@ -2272,7 +2308,7 @@ regatom(flagp)
}
}
- EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"),
+ EMSG2_RET_NULL(_("E71: Invalid character after %s%%"),
reg_magic == MAGIC_ALL);
}
}
@@ -2567,8 +2603,7 @@ collection:
break;
}
else if (reg_strict)
- EMSG_M_RET_NULL(_("E769: Missing ] after %s["),
- reg_magic > MAGIC_OFF);
+ EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF);
}
/* FALLTHROUGH */
@@ -2659,7 +2694,7 @@ use_multibytecode(c)
#endif
/*
- * emit a node
+ * Emit a node.
* Return pointer to generated code.
*/
static char_u *
@@ -2711,7 +2746,7 @@ regmbc(c)
#endif
/*
- * reginsert - insert an operator in front of already-emitted operand
+ * Insert an operator in front of already-emitted operand
*
* Means relocating the operand.
*/
@@ -2742,7 +2777,7 @@ reginsert(op, opnd)
}
/*
- * reginsert_limits - insert an operator in front of already-emitted operand.
+ * Insert an operator in front of already-emitted operand.
* The operator has the given limit values as operands. Also set next pointer.
*
* Means relocating the operand.
@@ -2794,7 +2829,7 @@ re_put_long(p, val)
}
/*
- * regtail - set the next-pointer at the end of a node chain
+ * Set the next-pointer at the end of a node chain.
*/
static void
regtail(p, val)
@@ -2835,7 +2870,7 @@ regtail(p, val)
}
/*
- * regoptail - regtail on item after a BRANCH; nop if none
+ * Like regtail, on item after a BRANCH; nop if none.
*/
static void
regoptail(p, val)
@@ -2851,22 +2886,15 @@ regoptail(p, val)
}
/*
- * getchr() - get the next character from the pattern. We know about
- * magic and such, so therefore we need a lexical analyzer.
+ * Functions for getting characters from the regexp input.
*/
-/* static int curchr; */
-static int prevprevchr;
-static int prevchr;
-static int nextchr; /* used for ungetchr() */
-/*
- * Note: prevchr is sometimes -1 when we are not at the start,
- * eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was
- * taken to be magic -- webb
- */
static int at_start; /* True when on the first character */
static int prev_at_start; /* True when on the second character */
+/*
+ * Start parsing at "str".
+ */
static void
initchr(str)
char_u *str;
@@ -2878,6 +2906,9 @@ initchr(str)
prev_at_start = FALSE;
}
+/*
+ * Get the next character without advancing.
+ */
static int
peekchr()
{
@@ -3086,6 +3117,10 @@ skipchr_keepstart()
prevprevchr = prpr;
}
+/*
+ * Get the next character from the pattern. We know about magic and such, so
+ * therefore we need a lexical analyzer.
+ */
static int
getchr()
{
@@ -3340,8 +3375,8 @@ typedef struct regbehind_S
} regbehind_T;
static char_u *reg_getline __ARGS((linenr_T lnum));
-static long vim_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm));
-static long regtry __ARGS((regprog_T *prog, colnr_T col));
+static long bt_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm));
+static long regtry __ARGS((bt_regprog_T *prog, colnr_T col));
static void cleanup_subexpr __ARGS((void));
#ifdef FEAT_SYN_HL
static void cleanup_zsubexpr __ARGS((void));
@@ -3398,7 +3433,7 @@ static colnr_T ireg_maxcol;
/*
* Sometimes need to save a copy of a line. Since alloc()/free() is very
* slow, we keep one allocated piece of memory and only re-allocate it when
- * it's too small. It's freed in vim_regexec_both() when finished.
+ * it's too small. It's freed in bt_regexec_both() when finished.
*/
static char_u *reg_tofree = NULL;
static unsigned reg_tofreelen;
@@ -3556,6 +3591,8 @@ static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
/* TRUE if using multi-line regexp. */
#define REG_MULTI (reg_match == NULL)
+static int bt_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col));
+
/*
* Match a regexp against a string.
* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
@@ -3563,8 +3600,8 @@ static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
*
* Return TRUE if there is a match, FALSE if not.
*/
- int
-vim_regexec(rmp, line, col)
+ static int
+bt_regexec(rmp, line, col)
regmatch_T *rmp;
char_u *line; /* string to match against */
colnr_T col; /* column to start looking for match */
@@ -3580,16 +3617,19 @@ vim_regexec(rmp, line, col)
ireg_icombine = FALSE;
#endif
ireg_maxcol = 0;
- return (vim_regexec_both(line, col, NULL) != 0);
+ return (bt_regexec_both(line, col, NULL) != 0);
}
#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
|| defined(FIND_REPLACE_DIALOG) || defined(PROTO)
+
+static int bt_regexec_nl __ARGS((regmatch_T *rmp, char_u *line, colnr_T col));
+
/*
* Like vim_regexec(), but consider a "\n" in "line" to be a line break.
*/
- int
-vim_regexec_nl(rmp, line, col)
+ static int
+bt_regexec_nl(rmp, line, col)
regmatch_T *rmp;
char_u *line; /* string to match against */
colnr_T col; /* column to start looking for match */
@@ -3605,10 +3645,12 @@ vim_regexec_nl(rmp, line, col)
ireg_icombine = FALSE;
#endif
ireg_maxcol = 0;
- return (vim_regexec_both(line, col, NULL) != 0);
+ return (bt_regexec_both(line, col, NULL) != 0);
}
#endif
+static long bt_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm));
+
/*
* Match a regexp against multiple lines.
* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
@@ -3617,8 +3659,8 @@ vim_regexec_nl(rmp, line, col)
* Return zero if there is no match. Return number of lines contained in the
* match otherwise.
*/
- long
-vim_regexec_multi(rmp, win, buf, lnum, col, tm)
+ static long
+bt_regexec_multi(rmp, win, buf, lnum, col, tm)
regmmatch_T *rmp;
win_T *win; /* window in which to search or NULL */
buf_T *buf; /* buffer in which to search */
@@ -3641,7 +3683,7 @@ vim_regexec_multi(rmp, win, buf, lnum, col, tm)
#endif
ireg_maxcol = rmp->rmm_maxcol;
- r = vim_regexec_both(NULL, col, tm);
+ r = bt_regexec_both(NULL, col, tm);
return r;
}
@@ -3651,12 +3693,12 @@ vim_regexec_multi(rmp, win, buf, lnum, col, tm)
* lines ("line" is NULL, use reg_getline()).
*/
static long
-vim_regexec_both(line, col, tm)
+bt_regexec_both(line, col, tm)
char_u *line;
colnr_T col; /* column to start looking for match */
proftime_T *tm UNUSED; /* timeout limit or NULL */
{
- regprog_T *prog;
+ bt_regprog_T *prog;
char_u *s;
long retval = 0L;
@@ -3682,14 +3724,14 @@ vim_regexec_both(line, col, tm)
if (REG_MULTI)
{
- prog = reg_mmatch->regprog;
+ prog = (bt_regprog_T *)reg_mmatch->regprog;
line = reg_getline((linenr_T)0);
reg_startpos = reg_mmatch->startpos;
reg_endpos = reg_mmatch->endpos;
}
else
{
- prog = reg_match->regprog;
+ prog = (bt_regprog_T *)reg_match->regprog;
reg_startp = reg_match->startp;
reg_endp = reg_match->endp;
}
@@ -3931,7 +3973,7 @@ unref_extmatch(em)
*/
static long
regtry(prog, col)
- regprog_T *prog;
+ bt_regprog_T *prog;
colnr_T col;
{
reginput = regline + col;
@@ -4063,7 +4105,7 @@ regmatch(scan)
#define RA_NOMATCH 5 /* didn't match */
/* Make "regstack" and "backpos" empty. They are allocated and freed in
- * vim_regexec_both() to reduce malloc()/free() calls. */
+ * bt_regexec_both() to reduce malloc()/free() calls. */
regstack.ga_len = 0;
backpos.ga_len = 0;
@@ -4072,14 +4114,14 @@ regmatch(scan)
*/
for (;;)
{
- /* Some patterns my cause a long time to match, even though they are not
+ /* Some patterns may cause a long time to match, even though they are not
* illegal. E.g., "\([a-z]\+\)\+Q". Allow breaking them with CTRL-C. */
fast_breakcheck();
#ifdef DEBUG
if (scan != NULL && regnarrate)
{
- mch_errmsg(regprop(scan));
+ mch_errmsg((char *)regprop(scan));
mch_errmsg("(\n");
}
#endif
@@ -4100,7 +4142,7 @@ regmatch(scan)
#ifdef DEBUG
if (regnarrate)
{
- mch_errmsg(regprop(scan));
+ mch_errmsg((char *)regprop(scan));
mch_errmsg("...\n");
# ifdef FEAT_SYN_HL
if (re_extmatch_in != NULL)
@@ -4112,7 +4154,7 @@ regmatch(scan)
{
mch_errmsg(" \"");
if (re_extmatch_in->matches[i] != NULL)
- mch_errmsg(re_extmatch_in->matches[i]);
+ mch_errmsg((char *)re_extmatch_in->matches[i]);
mch_errmsg("\"\n");
}
}
@@ -6091,9 +6133,14 @@ regnext(p)
static int
prog_magic_wrong()
{
- if (UCHARAT(REG_MULTI
- ? reg_mmatch->regprog->program
- : reg_match->regprog->program) != REGMAGIC)
+ regprog_T *prog;
+
+ prog = REG_MULTI ? reg_mmatch->regprog : reg_match->regprog;
+ if (prog->engine == &nfa_regengine)
+ /* For NFA matcher we don't check the magic */
+ return FALSE;
+
+ if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
{
EMSG(_(e_re_corr));
return TRUE;
@@ -6318,7 +6365,7 @@ re_num_cmp(val, scan)
}
-#ifdef DEBUG
+#ifdef BT_REGEXP_DUMP
/*
* regdump - dump a regexp onto stdout in vaguely comprehensible form
@@ -6326,14 +6373,22 @@ re_num_cmp(val, scan)
static void
regdump(pattern, r)
char_u *pattern;
- regprog_T *r;
+ bt_regprog_T *r;
{
char_u *s;
int op = EXACTLY; /* Arbitrary non-END op. */
char_u *next;
char_u *end = NULL;
+ FILE *f;
- printf("\r\nregcomp(%s):\r\n", pattern);
+#ifdef BT_REGEXP_LOG
+ f = fopen("bt_regexp_log.log", "a");
+#else
+ f = stdout;
+#endif
+ if (f == NULL)
+ return;
+ fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", pattern);
s = r->program + 1;
/*
@@ -6343,18 +6398,18 @@ regdump(pattern, r)
while (op != END || s <= end)
{
op = OP(s);
- printf("%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
+ fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
next = regnext(s);
if (next == NULL) /* Next ptr. */
- printf("(0)");
+ fprintf(f, "(0)");
else
- printf("(%d)", (int)((s - r->program) + (next - s)));
+ fprintf(f, "(%d)", (int)((s - r->program) + (next - s)));
if (end < next)
end = next;
if (op == BRACE_LIMITS)
{
/* Two short ints */
- printf(" minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
+ fprintf(f, " minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
s += 8;
}
s += 3;
@@ -6363,25 +6418,33 @@ regdump(pattern, r)
|| op == EXACTLY)
{
/* Literal string, where present. */
+ fprintf(f, "\nxxxxxxxxx\n");
while (*s != NUL)
- printf("%c", *s++);
+ fprintf(f, "%c", *s++);
+ fprintf(f, "\nxxxxxxxxx\n");
s++;
}
- printf("\r\n");
+ fprintf(f, "\r\n");
}
/* Header fields of interest. */
if (r->regstart != NUL)
- printf("start `%s' 0x%x; ", r->regstart < 256
+ fprintf(f, "start `%s' 0x%x; ", r->regstart < 256
? (char *)transchar(r->regstart)
: "multibyte", r->regstart);
if (r->reganch)
- printf("anchored; ");
+ fprintf(f, "anchored; ");
if (r->regmust != NULL)
- printf("must have \"%s\"", r->regmust);
- printf("\r\n");
+ fprintf(f, "must have \"%s\"", r->regmust);
+ fprintf(f, "\r\n");
+
+#ifdef BT_REGEXP_LOG
+ fclose(f);
+#endif
}
+#endif /* BT_REGEXP_DUMP */
+#ifdef DEBUG
/*
* regprop - printable representation of opcode
*/
@@ -6389,12 +6452,12 @@ regdump(pattern, r)
regprop(op)
char_u *op;
{
- char_u *p;
- static char_u buf[50];
+ char *p;
+ static char buf[50];
- (void) strcpy(buf, ":");
+ STRCPY(buf, ":");
- switch (OP(op))
+ switch ((int) OP(op))
{
case BOL:
p = "BOL";
@@ -6761,10 +6824,10 @@ regprop(op)
break;
}
if (p != NULL)
- (void) strcat(buf, p);
- return buf;
+ STRCAT(buf, p);
+ return (char_u *)buf;
}
-#endif
+#endif /* DEBUG */
#ifdef FEAT_MBYTE
static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3));
@@ -7667,3 +7730,187 @@ reg_submatch(no)
return retval;
}
#endif
+
+static regengine_T bt_regengine =
+{
+ bt_regcomp,
+ bt_regexec,
+#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
+ || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
+ bt_regexec_nl,
+#endif
+ bt_regexec_multi
+#ifdef DEBUG
+ ,(char_u *)""
+#endif
+};
+
+
+#include "regexp_nfa.c"
+
+static regengine_T nfa_regengine =
+{
+ nfa_regcomp,
+ nfa_regexec,
+#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
+ || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
+ nfa_regexec_nl,
+#endif
+ nfa_regexec_multi
+#ifdef DEBUG
+ ,(char_u *)""
+#endif
+};
+
+/* Which regexp engine to use? Needed for vim_regcomp().
+ * Must match with 'regexpengine'. */
+static int regexp_engine = 0;
+#define AUTOMATIC_ENGINE 0
+#define BACKTRACKING_ENGINE 1
+#define NFA_ENGINE 2
+#ifdef DEBUG
+static char_u regname[][30] = {
+ "AUTOMATIC Regexp Engine",
+ "BACKTACKING Regexp Engine",
+ "NFA Regexp Engine"
+ };
+#endif
+
+/*
+ * Compile a regular expression into internal code.
+ * Returns the program in allocated memory. Returns NULL for an error.
+ */
+ regprog_T *
+vim_regcomp(expr_arg, re_flags)
+ char_u *expr_arg;
+ int re_flags;
+{
+ regprog_T *prog = NULL;
+ char_u *expr = expr_arg;
+
+ syntax_error = FALSE;
+ regexp_engine = p_re;
+
+ /* Check for prefix "\%#=", that sets the regexp engine */
+ if (STRNCMP(expr, "\\%#=", 4) == 0)
+ {
+ int newengine = expr[4] - '0';
+
+ if (newengine == AUTOMATIC_ENGINE
+ || newengine == BACKTRACKING_ENGINE
+ || newengine == NFA_ENGINE)
+ {
+ regexp_engine = expr[4] - '0';
+ expr += 5;
+#ifdef DEBUG
+ EMSG3("New regexp mode selected (%d): %s", regexp_engine,
+ regname[newengine]);
+#endif
+ }
+ else
+ {
+ EMSG(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used "));
+ regexp_engine = AUTOMATIC_ENGINE;
+ }
+ }
+#ifdef DEBUG
+ bt_regengine.expr = expr;
+ nfa_regengine.expr = expr;
+#endif
+
+ /*
+ * First try the NFA engine, unless backtracking was requested.
+ */
+ if (regexp_engine != BACKTRACKING_ENGINE)
+ prog = nfa_regengine.regcomp(expr, re_flags);
+ else
+ prog = bt_regengine.regcomp(expr, re_flags);
+
+ if (prog == NULL) /* error compiling regexp with initial engine */
+ {
+#ifdef DEBUG