updated for version 7.3.970v7.3.970

Problem: Syntax highlighting can be slow. Solution: Include the NFA regexp engine. Add the 'regexpengine' option to select which one is used. (various authors, including Ken Takata, Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)
author: Bram Moolenaar <Bram@vim.org> 2013-05-19 19:40:29 +0200
committer: Bram Moolenaar <Bram@vim.org> 2013-05-19 19:40:29 +0200
commit: fbc0d2ea1e13fb55c267b72d64046e5ef984b97f (patch)
tree: 8c539274e29070536bb28615e32d236983361246
parent: 6fa41fb3746e5ab2f793de713879afc9b1e25647 (diff)
24 files changed, 4808 insertions, 235 deletions
diff --git a/Filelist b/Filelist
index 6503dbcc31..ee5ad8f34b 100644
--- a/Filelist
+++ b/Filelist
@@ -57,6 +57,7 @@ SRC_ALL =	\
 		src/popupmnu.c \
 		src/quickfix.c \
 		src/regexp.c \
+		src/regexp_nfa.c \
 		src/regexp.h \
 		src/screen.c \
 		src/search.c \
diff --git a/runtime/doc/pattern.txt b/runtime/doc/pattern.txt
index 332dbee6d8..5f47889fd1 100644
--- a/runtime/doc/pattern.txt
+++ b/runtime/doc/pattern.txt
@@ -1,4 +1,4 @@
-*pattern.txt*   For Vim version 7.3.  Last change: 2013 Apr 20
+*pattern.txt*   For Vim version 7.3.  Last change: 2013 May 17
 
 
 		  VIM REFERENCE MANUAL    by Bram Moolenaar
@@ -350,6 +350,27 @@ For starters, read chapter 27 of the user manual |usr_27.txt|.
 		or  \z( pattern \)		|/\z(|
 
 
+				*/\%#=* *two-engines*
+Vim includes two regexp engines:
+1. An old, backtracking engine that supports everything.
+2. A new, NFA engine that works much faster on some patterns, but does not
+   support everything.
+
+Vim will automatically select the right engine for you.  However, if you run
+into a problem or want to specifically select one engine or the other, you can
+prepend one of the following to the pattern:
+
+	\%#=0	Force automatic selection.  Only has an effect when
+	        'regexpengine' has been set to a non-zero value.
+	\%#=1	Force using the old engine.
+	\%#=2	Force using the NFA engine.
+
+You can also use the 'regexpengine' option to change the default.
+
+			 *E864* *E868* *E874* *E875* *E876* *E877* *E878*
+If selecting the NFA engine and it runs into something that is not implemented
+the pattern will not match.  This is only useful when debugging Vim.
+
 ==============================================================================
 3. Magic							*/magic*
 
@@ -396,9 +417,10 @@ pattern.
 
 ==============================================================================
 4. Overview of pattern items				*pattern-overview*
+						*E865* *E866* *E867* *E869*
 
 Overview of multi items.				*/multi* *E61* *E62*
-More explanation and examples below, follow the links.			*E64*
+More explanation and examples below, follow the links.		*E64* *E871*
 
 	  multi ~
      'magic' 'nomagic'	matches of the preceding atom ~
@@ -508,12 +530,14 @@ Character classes {not in Vi}:				*/character-classes*
 
 |/\c|	\c	\c	ignore case, do not use the 'ignorecase' option
 |/\C|	\C	\C	match case, do not use the 'ignorecase' option
+|/\Z|	\Z	\Z	ignore differences in Unicode "combining characters".
+			Useful when searching voweled Hebrew or Arabic text.
+
 |/\m|	\m	\m	'magic' on for the following chars in the pattern
 |/\M|	\M	\M	'magic' off for the following chars in the pattern
 |/\v|	\v	\v	the following chars in the pattern are "very magic"
 |/\V|	\V	\V	the following chars in the pattern are "very nomagic"
-|/\Z|	\Z	\Z	ignore differences in Unicode "combining characters".
-			Useful when searching voweled Hebrew or Arabic text.
+|/\%#=|   \%#=1   \%#=1   select regexp engine |/zero-width|
 
 |/\%d|	\%d	\%d	match specified decimal character (eg \%d123)
 |/\%x|	\%x	\%x	match specified hex character (eg \%x2a)
@@ -581,7 +605,7 @@ overview.
 \?	Just like \=.  Cannot be used when searching backwards with the "?"
 	command. {not in Vi}
 
-						*/\{* *E58* *E60* *E554*
+					*/\{* *E58* *E60* *E554* *E870*
 \{n,m}	Matches n to m of the preceding atom, as many as possible
 \{n}	Matches n of the preceding atom
 \{n,}	Matches at least n of the preceding atom, as many as possible
@@ -962,7 +986,8 @@ match ASCII characters, as indicated by the range.
 ~	matches the last given substitute string	*/~* */\~*
 
 \(\)	A pattern enclosed by escaped parentheses.	*/\(* */\(\)* */\)*
-	E.g., "\(^a\)" matches 'a' at the start of a line.  *E51* *E54* *E55*
+	E.g., "\(^a\)" matches 'a' at the start of a line.
+	*E51* *E54* *E55* *E872* *E873*
 
 \1      Matches the same string that was matched by	*/\1* *E65*
 	the first sub-expression in \( and \). {not in Vi}
diff --git a/runtime/doc/tags b/runtime/doc/tags
index 9af196ae46..2f40a4cb91 100644
--- a/runtime/doc/tags
+++ b/runtime/doc/tags
@@ -736,9 +736,11 @@ $VIMRUNTIME	starting.txt	/*$VIMRUNTIME*
 'quote	motion.txt	/*'quote*
 'quoteescape'	options.txt	/*'quoteescape'*
 'rdt'	options.txt	/*'rdt'*
+'re'	options.txt	/*'re'*
 'readonly'	options.txt	/*'readonly'*
 'redraw'	vi_diff.txt	/*'redraw'*
 'redrawtime'	options.txt	/*'redrawtime'*
+'regexpengine''	options.txt	/*'regexpengine''*
 'relativenumber'	options.txt	/*'relativenumber'*
 'remap'	options.txt	/*'remap'*
 'report'	options.txt	/*'report'*
@@ -1389,6 +1391,7 @@ $VIMRUNTIME	starting.txt	/*$VIMRUNTIME*
 /\	pattern.txt	/*\/\\*
 /\$	pattern.txt	/*\/\\$*
 /\%#	pattern.txt	/*\/\\%#*
+/\%#=	pattern.txt	/*\/\\%#=*
 /\%$	pattern.txt	/*\/\\%$*
 /\%'m	pattern.txt	/*\/\\%'m*
 /\%(	pattern.txt	/*\/\\%(*
@@ -4261,7 +4264,22 @@ E860	eval.txt	/*E860*
 E861	eval.txt	/*E861*
 E862	eval.txt	/*E862*
 E863	if_pyth.txt	/*E863*
+E864	pattern.txt	/*E864*
+E865	pattern.txt	/*E865*
+E866	pattern.txt	/*E866*
+E867	pattern.txt	/*E867*
+E868	pattern.txt	/*E868*
+E869	pattern.txt	/*E869*
 E87	windows.txt	/*E87*
+E870	pattern.txt	/*E870*
+E871	pattern.txt	/*E871*
+E872	pattern.txt	/*E872*
+E873	pattern.txt	/*E873*
+E874	pattern.txt	/*E874*
+E875	pattern.txt	/*E875*
+E876	pattern.txt	/*E876*
+E877	pattern.txt	/*E877*
+E878	pattern.txt	/*E878*
 E88	windows.txt	/*E88*
 E89	message.txt	/*E89*
 E90	message.txt	/*E90*
@@ -8172,6 +8190,7 @@ try-nested	eval.txt	/*try-nested*
 try-nesting	eval.txt	/*try-nesting*
 tutor	usr_01.txt	/*tutor*
 twice	if_cscop.txt	/*twice*
+two-engines	pattern.txt	/*two-engines*
 type()	eval.txt	/*type()*
 type-mistakes	tips.txt	/*type-mistakes*
 typecorr-settings	usr_41.txt	/*typecorr-settings*
diff --git a/src/Make_cyg.mak b/src/Make_cyg.mak
index b00ae3b71b..d72282aa86 100644
--- a/src/Make_cyg.mak
+++ b/src/Make_cyg.mak
@@ -672,6 +672,9 @@ endif
 $(OUTDIR)/netbeans.o:	netbeans.c $(INCL) $(NBDEBUG_DEP)
 	$(CC) -c $(CFLAGS) netbeans.c -o $(OUTDIR)/netbeans.o
 
+$(OUTDIR)/regexp.o:		regexp.c regexp_nfa.c $(INCL)
+	$(CC) -c $(CFLAGS) regexp.c -o $(OUTDIR)/regexp.o
+
 $(OUTDIR)/if_mzsch.o:	if_mzsch.c $(INCL) if_mzsch.h $(MZ_EXTRA_DEP)
 	$(CC) -c $(CFLAGS) if_mzsch.c -o $(OUTDIR)/if_mzsch.o
 
diff --git a/src/Make_ming.mak b/src/Make_ming.mak
index b8d7b200e8..3670e7102e 100644
--- a/src/Make_ming.mak
+++ b/src/Make_ming.mak
@@ -765,6 +765,9 @@ if_perl.c: if_perl.xs typemap
 $(OUTDIR)/netbeans.o:	netbeans.c $(INCL) $(NBDEBUG_INCL) $(NBDEBUG_SRC)
 	$(CC) -c $(CFLAGS) netbeans.c -o $(OUTDIR)/netbeans.o
 
+$(OUTDIR)/regexp.o:		regexp.c regexp_nfa.c $(INCL)
+	$(CC) -c $(CFLAGS) regexp.c -o $(OUTDIR)/regexp.o
+
 $(OUTDIR)/if_mzsch.o:	if_mzsch.c $(INCL) if_mzsch.h $(MZ_EXTRA_DEP)
 	$(CC) -c $(CFLAGS) if_mzsch.c -o $(OUTDIR)/if_mzsch.o
 
diff --git a/src/Make_mvc.mak b/src/Make_mvc.mak
index 2a4a3e8c45..acd1346ab9 100644
--- a/src/Make_mvc.mak
+++ b/src/Make_mvc.mak
@@ -1166,7 +1166,7 @@ $(OUTDIR)/popupmnu.obj:	$(OUTDIR) popupmnu.c  $(INCL)
 
 $(OUTDIR)/quickfix.obj:	$(OUTDIR) quickfix.c  $(INCL)
 
-$(OUTDIR)/regexp.obj:	$(OUTDIR) regexp.c  $(INCL)
+$(OUTDIR)/regexp.obj:	$(OUTDIR) regexp.c regexp_nfa.c  $(INCL)
 
 $(OUTDIR)/screen.obj:	$(OUTDIR) screen.c  $(INCL)
 
diff --git a/src/Makefile b/src/Makefile
index fb821ca8c7..67a53bb0ec 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -454,7 +454,7 @@ CClink = $(CC)
 
 # MULTIBYTE - To edit multi-byte characters.
 # Uncomment this when you want to edit a multibyte language.
-# It's automatically enabled with big features or IME support.
+# It's automatically enabled with normal features, GTK or IME support.
 # Note: Compile on a machine where setlocale() actually works, otherwise the
 # configure tests may fail.
 #CONF_OPT_MULTIBYTE = --enable-multibyte
@@ -2664,7 +2664,7 @@ objects/popupmnu.o: popupmnu.c
 objects/quickfix.o: quickfix.c
 	$(CCC) -o $@ quickfix.c
 
-objects/regexp.o: regexp.c
+objects/regexp.o: regexp.c regexp_nfa.c
 	$(CCC) -o $@ regexp.c
 
 objects/screen.o: screen.c
@@ -2938,10 +2938,10 @@ objects/quickfix.o: quickfix.c vim.h auto/config.h feature.h os_unix.h \
  auto/osdef.h ascii.h keymap.h term.h macros.h option.h structs.h \
  regexp.h gui.h gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h \
  globals.h farsi.h arabic.h
-objects/regexp.o: regexp.c vim.h auto/config.h feature.h os_unix.h auto/osdef.h \
- ascii.h keymap.h term.h macros.h option.h structs.h regexp.h gui.h \
- gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h globals.h farsi.h \
- arabic.h
+objects/regexp.o: regexp.c regexp_nfa.c vim.h auto/config.h feature.h os_unix.h \
+ auto/osdef.h ascii.h keymap.h term.h macros.h option.h structs.h \
+ regexp.h gui.h gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h \
+ globals.h farsi.h arabic.h
 objects/screen.o: screen.c vim.h auto/config.h feature.h os_unix.h auto/osdef.h \
  ascii.h keymap.h term.h macros.h option.h structs.h regexp.h gui.h \
  gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h globals.h farsi.h \
diff --git a/src/option.c b/src/option.c
index cfe464c5b0..325f061d95 100644
--- a/src/option.c
+++ b/src/option.c
@@ -2077,6 +2077,9 @@ static struct vimoption
 			    (char_u *)NULL, PV_NONE,
 #endif
 			    {(char_u *)2000L, (char_u *)0L} SCRIPTID_INIT},
+    {"regexpengine", "re",  P_NUM|P_VI_DEF,
+			    (char_u *)&p_re, PV_NONE,
+			    {(char_u *)0L, (char_u *)0L} SCRIPTID_INIT},
     {"relativenumber", "rnu", P_BOOL|P_VI_DEF|P_RWIN,
 			    (char_u *)VAR_WIN, PV_RNU,
 			    {(char_u *)FALSE, (char_u *)0L} SCRIPTID_INIT},
@@ -8604,6 +8607,11 @@ set_num_option(opt_idx, varp, value, errbuf, errbuflen, opt_flags)
 	errmsg = e_positive;
 	p_hi = 0;
     }
+    if (p_re < 0 || p_re > 2)
+    {
+	errmsg = e_invarg;
+	p_re = 0;
+    }
     if (p_report < 0)
     {
 	errmsg = e_positive;
diff --git a/src/option.h b/src/option.h
index 8b982f580e..b11316faa4 100644
--- a/src/option.h
+++ b/src/option.h
@@ -653,6 +653,7 @@ EXTERN char_u	*p_cdpath;	/* 'cdpath' */
 EXTERN long	p_rdt;		/* 'redrawtime' */
 #endif
 EXTERN int	p_remap;	/* 'remap' */
+EXTERN long	p_re;		/* 'regexpengine' */
 EXTERN long	p_report;	/* 'report' */
 #if defined(FEAT_WINDOWS) && defined(FEAT_QUICKFIX)
 EXTERN long	p_pvh;		/* 'previewheight' */
diff --git a/src/regexp.c b/src/regexp.c
index e456b5d5f3..a1f71ab979 100644
--- a/src/regexp.c
+++ b/src/regexp.c
@@ -38,9 +38,20 @@
  * Named character class support added by Walter Briscoe (1998 Jul 01)
  */
 
+/* Uncomment the first if you do not want to see debugging logs or files
+ * related to regular expressions, even when compiling with -DDEBUG.
+ * Uncomment the second to get the regexp debugging. */
+/* #undef DEBUG */
+/* #define DEBUG */
+
 #include "vim.h"
 
-#undef DEBUG
+#ifdef DEBUG
+/* show/save debugging data when BT engine is used */
+# define BT_REGEXP_DUMP
+/* save the debugging data to a file instead of displaying it */
+# define BT_REGEXP_LOG
+#endif
 
 /*
  * The "internal use only" fields in regexp.h are present to pass info from
@@ -326,9 +337,10 @@ toggle_Magic(x)
 /* Used for an error (down from) vim_regcomp(): give the error message, set
  * rc_did_emsg and return NULL */
 #define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL)
-#define EMSG_M_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
 #define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL)
-#define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
+#define EMSG2_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
+#define EMSG2_RET_FAIL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
+#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
 
 #define MAX_LIMIT	(32767L << 16L)
 
@@ -336,11 +348,18 @@ static int re_multi_type __ARGS((int));
 static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n));
 static char_u *cstrchr __ARGS((char_u *, int));
 
+#ifdef BT_REGEXP_DUMP
+static void	regdump __ARGS((char_u *, bt_regprog_T *));
+#endif
 #ifdef DEBUG
-static void	regdump __ARGS((char_u *, regprog_T *));
 static char_u	*regprop __ARGS((char_u *));
 #endif
 
+static char_u e_missingbracket[] = N_("E769: Missing ] after %s[");
+static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%(");
+static char_u e_unmatchedp[] = N_("E54: Unmatched %s(");
+static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)");
+
 #define NOT_MULTI	0
 #define MULTI_ONE	1
 #define MULTI_MULT	2
@@ -630,7 +649,13 @@ static char_u META_flags[] = {
 };
 #endif
 
-static int	curchr;
+static int	curchr;		/* currently parsed character */
+/* Previous character.  Note: prevchr is sometimes -1 when we are not at the
+ * start, eg in /[ ^I]^ the pattern was never found even if it existed,
+ * because ^ was taken to be magic -- webb */
+static int	prevchr;
+static int	prevprevchr;	/* previous-previous character */
+static int	nextchr;	/* used for ungetchr() */
 
 /* arguments for reg() */
 #define REG_NOPAREN	0	/* toplevel reg() */
@@ -680,6 +705,9 @@ static int	read_limits __ARGS((long *, long *));
 static void	regtail __ARGS((char_u *, char_u *));
 static void	regoptail __ARGS((char_u *, char_u *));
 
+static regengine_T bt_regengine;
+static regengine_T nfa_regengine;
+
 /*
  * Return TRUE if compiled regular expression "prog" can match a line break.
  */
@@ -762,6 +790,7 @@ char *EQUIVAL_CLASS_C[16] = {
 /*
  * Produce the bytes for equivalence class "c".
  * Currently only handles latin1, latin9 and utf-8.
+ * NOTE: When changing this function, also change nfa_emit_equi_class()
  */
     static void
 reg_equi_class(c)
@@ -1239,8 +1268,11 @@ skip_regexp(startp, dirc, magic, newp)
     return p;
 }
 
+static regprog_T    *bt_regcomp __ARGS((char_u *expr, int re_flags));
+
 /*
- * vim_regcomp() - compile a regular expression into internal code
+ * bt_regcomp() - compile a regular expression into internal code for the
+ * traditional back track matcher.
  * Returns the program in allocated space.  Returns NULL for an error.
  *
  * We can't allocate space until we know how big the compiled form will be,
@@ -1259,12 +1291,12 @@ skip_regexp(startp, dirc, magic, newp)
  * of the structure of the compiled regexp.
  * "re_flags": RE_MAGIC and/or RE_STRING.
  */
-    regprog_T *
-vim_regcomp(expr, re_flags)
+    static regprog_T *
+bt_regcomp(expr, re_flags)
     char_u	*expr;
     int		re_flags;
 {
-    regprog_T	*r;
+    bt_regprog_T    *r;
     char_u	*scan;
     char_u	*longest;
     int		len;
@@ -1291,7 +1323,7 @@ vim_regcomp(expr, re_flags)
 #endif
 
     /* Allocate space. */
-    r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE);
+    r = (bt_regprog_T *)lalloc(sizeof(bt_regprog_T) + regsize, TRUE);
     if (r == NULL)
 	return NULL;
 
@@ -1386,10 +1418,11 @@ vim_regcomp(expr, re_flags)
 	    r->regmlen = len;
 	}
     }
-#ifdef DEBUG
+#ifdef BT_REGEXP_DUMP
     regdump(expr, r);
 #endif
-    return r;
+    r->engine = &bt_regengine;
+    return (regprog_T *)r;
 }
 
 /*
@@ -1436,7 +1469,7 @@ vim_regcomp_had_eol()
 #endif
 
 /*
- * reg - regular expression, i.e. main body or parenthesized thing
+ * Parse regular expression, i.e. main body or parenthesized thing.
  *
  * Caller must absorb opening parenthesis.
  *
@@ -1473,7 +1506,7 @@ reg(paren, flagp)
     {
 	/* Make a MOPEN node. */
 	if (regnpar >= NSUBEXP)
-	    EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
+	    EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
 	parno = regnpar;
 	++regnpar;
 	ret = regnode(MOPEN + parno);
@@ -1534,14 +1567,14 @@ reg(paren, flagp)
 	else
 #endif
 	    if (paren == REG_NPAREN)
-	    EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL);
+	    EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL);
 	else
-	    EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL);
+	    EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL);
     }
     else if (paren == REG_NOPAREN && peekchr() != NUL)
     {
 	if (curchr == Magic(')'))
-	    EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL);
+	    EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL);
 	else
 	    EMSG_RET_NULL(_(e_trailing));	/* "Can't happen". */
 	/* NOTREACHED */
@@ -1556,7 +1589,7 @@ reg(paren, flagp)
 }
 
 /*
- * Handle one alternative of an | operator.
+ * Parse one alternative of an | operator.
  * Implements the & operator.
  */
     static char_u *
@@ -1599,7 +1632,7 @@ regbranch(flagp)
 }
 
 /*
- * Handle one alternative of an | or & operator.
+ * Parse one alternative of an | or & operator.
  * Implements the concatenation operator.
  */
     static char_u *
@@ -1679,7 +1712,7 @@ regconcat(flagp)
 }
 
 /*
- * regpiece - something followed by possible [*+=]
+ * Parse something followed by possible [*+=].
  *
  * Note that the branching code sequences used for = and the general cases
  * of * and + are somewhat optimized:  they use the same NOTHING node as
@@ -1759,7 +1792,7 @@ regpiece(flagp)
 			      }
 		}
 		if (lop == END)
-		    EMSG_M_RET_NULL(_("E59: invalid character after %s@"),
+		    EMSG2_RET_NULL(_("E59: invalid character after %s@"),
 						      reg_magic == MAGIC_ALL);
 		/* Look behind must match with behind_pos. */
 		if (lop == BEHIND || lop == NOBEHIND)
@@ -1793,7 +1826,7 @@ regpiece(flagp)
 	    else
 	    {
 		if (num_complex_braces >= 10)
-		    EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"),
+		    EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"),
 						      reg_magic == MAGIC_ALL);
 		reginsert(BRACE_COMPLEX + num_complex_braces, ret);
 		regoptail(ret, regnode(BACK));
@@ -1820,8 +1853,20 @@ regpiece(flagp)
     return ret;
 }
 
+/* When making changes to classchars also change nfa_classcodes. */
+static char_u	*classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
+static int	classcodes[] = {
+    ANY, IDENT, SIDENT, KWORD, SKWORD,
+    FNAME, SFNAME, PRINT, SPRINT,
+    WHITE, NWHITE, DIGIT, NDIGIT,
+    HEX, NHEX, OCTAL, NOCTAL,
+    WORD, NWORD, HEAD, NHEAD,
+    ALPHA, NALPHA, LOWER, NLOWER,
+    UPPER, NUPPER
+};
+
 /*
- * regatom - the lowest level
+ * Parse the lowest level.
  *
  * Optimization:  gobbles an entire sequence of ordinary characters so that
  * it can turn them into a single node, which is smaller to store and
@@ -1836,15 +1881,6 @@ regatom(flagp)
     int		    cpo_lit;	    /* 'cpoptions' contains 'l' flag */
     int		    cpo_bsl;	    /* 'cpoptions' contains '\' flag */
     int		    c;
-    static char_u   *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
-    static int	    classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD,
-				    FNAME, SFNAME, PRINT, SPRINT,
-				    WHITE, NWHITE, DIGIT, NDIGIT,
-				    HEX, NHEX, OCTAL, NOCTAL,
-				    WORD, NWORD, HEAD, NHEAD,
-				    ALPHA, NALPHA, LOWER, NLOWER,
-				    UPPER, NUPPER
-				    };
     char_u	    *p;
     int		    extra = 0;
 
@@ -2140,7 +2176,7 @@ regatom(flagp)
 			      while ((c = getchr()) != ']')
 			      {
 				  if (c == NUL)
-				      EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["),
+				      EMSG2_RET_NULL(_("E69: Missing ] after %s%%["),
 						      reg_magic == MAGIC_ALL);
 				  br = regnode(BRANCH);
 				  if (ret == NULL)
@@ -2156,7 +2192,7 @@ regatom(flagp)
 				      return NULL;
 			      }
 			      if (ret == NULL)
-				  EMSG_M_RET_NULL(_("E70: Empty %s%%[]"),
+				  EMSG2_RET_NULL(_("E70: Empty %s%%[]"),
 						      reg_magic == MAGIC_ALL);
 			      lastbranch = regnode(BRANCH);
 			      br = regnode(NOTHING);
@@ -2200,7 +2236,7 @@ regatom(flagp)
 			      }
 
 			      if (i < 0)
-				  EMSG_M_RET_NULL(
+				  EMSG2_RET_NULL(
 					_("E678: Invalid character after %s%%[dxouU]"),
 					reg_magic == MAGIC_ALL);
 #ifdef FEAT_MBYTE
@@ -2272,7 +2308,7 @@ regatom(flagp)
 			      }
 			  }
 
-			  EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"),
+			  EMSG2_RET_NULL(_("E71: Invalid character after %s%%"),
 						      reg_magic == MAGIC_ALL);
 	    }
 	}
@@ -2567,8 +2603,7 @@ collection:
 		break;
 	    }
 	    else if (reg_strict)
-		EMSG_M_RET_NULL(_("E769: Missing ] after %s["),
-						       reg_magic > MAGIC_OFF);
+		EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF);
 	}
 	/* FALLTHROUGH */
 
@@ -2659,7 +2694,7 @@ use_multibytecode(c)
 #endif
 
 /*
- * emit a node
+ * Emit a node.
  * Return pointer to generated code.
  */
     static char_u *
@@ -2711,7 +2746,7 @@ regmbc(c)
 #endif
 
 /*
- * reginsert - insert an operator in front of already-emitted operand
+ * Insert an operator in front of already-emitted operand
  *
  * Means relocating the operand.
  */
@@ -2742,7 +2777,7 @@ reginsert(op, opnd)
 }
 
 /*
- * reginsert_limits - insert an operator in front of already-emitted operand.
+ * Insert an operator in front of already-emitted operand.
  * The operator has the given limit values as operands.  Also set next pointer.
  *
  * Means relocating the operand.
@@ -2794,7 +2829,7 @@ re_put_long(p, val)
 }
 
 /*
- * regtail - set the next-pointer at the end of a node chain
+ * Set the next-pointer at the end of a node chain.
  */
     static void
 regtail(p, val)
@@ -2835,7 +2870,7 @@ regtail(p, val)
 }
 
 /*
- * regoptail - regtail on item after a BRANCH; nop if none
+ * Like regtail, on item after a BRANCH; nop if none.
  */
     static void
 regoptail(p, val)
@@ -2851,22 +2886,15 @@ regoptail(p, val)
 }
 
 /*
- * getchr() - get the next character from the pattern. We know about
- * magic and such, so therefore we need a lexical analyzer.
+ * Functions for getting characters from the regexp input.
  */
 
-/* static int	    curchr; */
-static int	prevprevchr;
-static int	prevchr;
-static int	nextchr;    /* used for ungetchr() */
-/*
- * Note: prevchr is sometimes -1 when we are not at the start,
- * eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was
- * taken to be magic -- webb
- */
 static int	at_start;	/* True when on the first character */
 static int	prev_at_start;  /* True when on the second character */
 
+/*
+ * Start parsing at "str".
+ */
     static void
 initchr(str)
     char_u *str;
@@ -2878,6 +2906,9 @@ initchr(str)
     prev_at_start = FALSE;
 }
 
+/*
+ * Get the next character without advancing.
+ */
     static int
 peekchr()
 {
@@ -3086,6 +3117,10 @@ skipchr_keepstart()
     prevprevchr = prpr;
 }
 
+/*
+ * Get the next character from the pattern. We know about magic and such, so
+ * therefore we need a lexical analyzer.
+ */
     static int
 getchr()
 {
@@ -3340,8 +3375,8 @@ typedef struct regbehind_S
 } regbehind_T;
 
 static char_u	*reg_getline __ARGS((linenr_T lnum));
-static long	vim_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm));
-static long	regtry __ARGS((regprog_T *prog, colnr_T col));
+static long	bt_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm));
+static long	regtry __ARGS((bt_regprog_T *prog, colnr_T col));
 static void	cleanup_subexpr __ARGS((void));
 #ifdef FEAT_SYN_HL
 static void	cleanup_zsubexpr __ARGS((void));
@@ -3398,7 +3433,7 @@ static colnr_T	ireg_maxcol;
 /*
  * Sometimes need to save a copy of a line.  Since alloc()/free() is very
  * slow, we keep one allocated piece of memory and only re-allocate it when
- * it's too small.  It's freed in vim_regexec_both() when finished.
+ * it's too small.  It's freed in bt_regexec_both() when finished.
  */
 static char_u	*reg_tofree = NULL;
 static unsigned	reg_tofreelen;
@@ -3556,6 +3591,8 @@ static lpos_T	reg_endzpos[NSUBEXP];	/* idem, end pos */
 /* TRUE if using multi-line regexp. */
 #define REG_MULTI	(reg_match == NULL)
 
+static int  bt_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col));
+
 /*
  * Match a regexp against a string.
  * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
@@ -3563,8 +3600,8 @@ static lpos_T	reg_endzpos[NSUBEXP];	/* idem, end pos */
  *
  * Return TRUE if there is a match, FALSE if not.
  */
-    int
-vim_regexec(rmp, line, col)
+    static int
+bt_regexec(rmp, line, col)
     regmatch_T	*rmp;
     char_u	*line;	/* string to match against */
     colnr_T	col;	/* column to start looking for match */
@@ -3580,16 +3617,19 @@ vim_regexec(rmp, line, col)
     ireg_icombine = FALSE;
 #endif
     ireg_maxcol = 0;
-    return (vim_regexec_both(line, col, NULL) != 0);
+    return (bt_regexec_both(line, col, NULL) != 0);
 }
 
 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
 	|| defined(FIND_REPLACE_DIALOG) || defined(PROTO)
+
+static int  bt_regexec_nl __ARGS((regmatch_T *rmp, char_u *line, colnr_T col));
+
 /*
  * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
  */
-    int
-vim_regexec_nl(rmp, line, col)
+    static int
+bt_regexec_nl(rmp, line, col)
     regmatch_T	*rmp;
     char_u	*line;	/* string to match against */
     colnr_T	col;	/* column to start looking for match */
@@ -3605,10 +3645,12 @@ vim_regexec_nl(rmp, line, col)
     ireg_icombine = FALSE;
 #endif
     ireg_maxcol = 0;
-    return (vim_regexec_both(line, col, NULL) != 0);
+    return (bt_regexec_both(line, col, NULL) != 0);
 }
 #endif
 
+static long bt_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm));
+
 /*
  * Match a regexp against multiple lines.
  * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
@@ -3617,8 +3659,8 @@ vim_regexec_nl(rmp, line, col)
  * Return zero if there is no match.  Return number of lines contained in the
  * match otherwise.
  */
-    long
-vim_regexec_multi(rmp, win, buf, lnum, col, tm)
+    static long
+bt_regexec_multi(rmp, win, buf, lnum, col, tm)
     regmmatch_T	*rmp;
     win_T	*win;		/* window in which to search or NULL */
     buf_T	*buf;		/* buffer in which to search */
@@ -3641,7 +3683,7 @@ vim_regexec_multi(rmp, win, buf, lnum, col, tm)
 #endif
     ireg_maxcol = rmp->rmm_maxcol;
 
-    r = vim_regexec_both(NULL, col, tm);
+    r = bt_regexec_both(NULL, col, tm);
 
     return r;
 }
@@ -3651,12 +3693,12 @@ vim_regexec_multi(rmp, win, buf, lnum, col, tm)
  * lines ("line" is NULL, use reg_getline()).
  */
     static long
-vim_regexec_both(line, col, tm)
+bt_regexec_both(line, col, tm)
     char_u	*line;
     colnr_T	col;		/* column to start looking for match */
     proftime_T	*tm UNUSED;	/* timeout limit or NULL */
 {
-    regprog_T	*prog;
+    bt_regprog_T	*prog;
     char_u	*s;
     long	retval = 0L;
 
@@ -3682,14 +3724,14 @@ vim_regexec_both(line, col, tm)
 
     if (REG_MULTI)
     {
-	prog = reg_mmatch->regprog;
+	prog = (bt_regprog_T *)reg_mmatch->regprog;
 	line = reg_getline((linenr_T)0);
 	reg_startpos = reg_mmatch->startpos;
 	reg_endpos = reg_mmatch->endpos;
     }
     else
     {
-	prog = reg_match->regprog;
+	prog = (bt_regprog_T *)reg_match->regprog;
 	reg_startp = reg_match->startp;
 	reg_endp = reg_match->endp;
     }
@@ -3931,7 +3973,7 @@ unref_extmatch(em)
  */
     static long
 regtry(prog, col)
-    regprog_T	*prog;
+    bt_regprog_T    *prog;
     colnr_T	col;
 {
     reginput = regline + col;
@@ -4063,7 +4105,7 @@ regmatch(scan)
 #define RA_NOMATCH	5	/* didn't match */
 
   /* Make "regstack" and "backpos" empty.  They are allocated and freed in
-   * vim_regexec_both() to reduce malloc()/free() calls. */
+   * bt_regexec_both() to reduce malloc()/free() calls. */
   regstack.ga_len = 0;
   backpos.ga_len = 0;
 
@@ -4072,14 +4114,14 @@ regmatch(scan)
    */
   for (;;)
   {
-    /* Some patterns my cause a long time to match, even though they are not
+    /* Some patterns may cause a long time to match, even though they are not
      * illegal.  E.g., "\([a-z]\+\)\+Q".  Allow breaking them with CTRL-C. */
     fast_breakcheck();
 
 #ifdef DEBUG
     if (scan != NULL && regnarrate)
     {
-	mch_errmsg(regprop(scan));
+	mch_errmsg((char *)regprop(scan));
 	mch_errmsg("(\n");
     }
 #endif
@@ -4100,7 +4142,7 @@ regmatch(scan)
 #ifdef DEBUG
 	if (regnarrate)
 	{
-	    mch_errmsg(regprop(scan));
+	    mch_errmsg((char *)regprop(scan));
 	    mch_errmsg("...\n");
 # ifdef FEAT_SYN_HL
 	    if (re_extmatch_in != NULL)
@@ -4112,7 +4154,7 @@ regmatch(scan)
 		{
 		    mch_errmsg("    \"");
 		    if (re_extmatch_in->matches[i] != NULL)
-			mch_errmsg(re_extmatch_in->matches[i]);
+			mch_errmsg((char *)re_extmatch_in->matches[i]);
 		    mch_errmsg("\"\n");
 		}
 	    }
@@ -6091,9 +6133,14 @@ regnext(p)
     static int
 prog_magic_wrong()
 {
-    if (UCHARAT(REG_MULTI
-		? reg_mmatch->regprog->program
-		: reg_match->regprog->program) != REGMAGIC)
+    regprog_T	*prog;
+
+    prog = REG_MULTI ? reg_mmatch->regprog : reg_match->regprog;
+    if (prog->engine == &nfa_regengine)
+	/* For NFA matcher we don't check the magic */
+	return FALSE;
+
+    if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
     {
 	EMSG(_(e_re_corr));
 	return TRUE;
@@ -6318,7 +6365,7 @@ re_num_cmp(val, scan)
 }
 
 
-#ifdef DEBUG
+#ifdef BT_REGEXP_DUMP
 
 /*
author	Bram Moolenaar <Bram@vim.org>	2013-05-19 19:40:29 +0200
committer	Bram Moolenaar <Bram@vim.org>	2013-05-19 19:40:29 +0200
commit	fbc0d2ea1e13fb55c267b72d64046e5ef984b97f (patch)
tree	8c539274e29070536bb28615e32d236983361246
parent	6fa41fb3746e5ab2f793de713879afc9b1e25647 (diff)