diff options
author | Bram Moolenaar <Bram@vim.org> | 2013-05-19 19:40:29 +0200 |
---|---|---|
committer | Bram Moolenaar <Bram@vim.org> | 2013-05-19 19:40:29 +0200 |
commit | fbc0d2ea1e13fb55c267b72d64046e5ef984b97f (patch) | |
tree | 8c539274e29070536bb28615e32d236983361246 | |
parent | 6fa41fb3746e5ab2f793de713879afc9b1e25647 (diff) |
updated for version 7.3.970v7.3.970
Problem: Syntax highlighting can be slow.
Solution: Include the NFA regexp engine. Add the 'regexpengine' option to
select which one is used. (various authors, including Ken Takata,
Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)
-rw-r--r-- | Filelist | 1 | ||||
-rw-r--r-- | runtime/doc/pattern.txt | 37 | ||||
-rw-r--r-- | runtime/doc/tags | 19 | ||||
-rw-r--r-- | src/Make_cyg.mak | 3 | ||||
-rw-r--r-- | src/Make_ming.mak | 3 | ||||
-rw-r--r-- | src/Make_mvc.mak | 2 | ||||
-rw-r--r-- | src/Makefile | 12 | ||||
-rw-r--r-- | src/option.c | 8 | ||||
-rw-r--r-- | src/option.h | 1 | ||||
-rw-r--r-- | src/regexp.c | 445 | ||||
-rw-r--r-- | src/regexp.h | 78 | ||||
-rw-r--r-- | src/regexp_nfa.c | 3819 | ||||
-rw-r--r-- | src/structs.h | 13 | ||||
-rw-r--r-- | src/testdir/Make_amiga.mak | 3 | ||||
-rw-r--r-- | src/testdir/Make_dos.mak | 2 | ||||
-rw-r--r-- | src/testdir/Make_ming.mak | 2 | ||||
-rw-r--r-- | src/testdir/Make_os2.mak | 2 | ||||
-rw-r--r-- | src/testdir/Make_vms.mms | 5 | ||||
-rw-r--r-- | src/testdir/Makefile | 7 | ||||
-rw-r--r-- | src/testdir/test64.in | 180 | ||||
-rw-r--r-- | src/testdir/test64.ok | 330 | ||||
-rw-r--r-- | src/testdir/test95.in | 63 | ||||
-rw-r--r-- | src/testdir/test95.ok | 6 | ||||
-rw-r--r-- | src/version.c | 2 |
24 files changed, 4808 insertions, 235 deletions
@@ -57,6 +57,7 @@ SRC_ALL = \ src/popupmnu.c \ src/quickfix.c \ src/regexp.c \ + src/regexp_nfa.c \ src/regexp.h \ src/screen.c \ src/search.c \ diff --git a/runtime/doc/pattern.txt b/runtime/doc/pattern.txt index 332dbee6d8..5f47889fd1 100644 --- a/runtime/doc/pattern.txt +++ b/runtime/doc/pattern.txt @@ -1,4 +1,4 @@ -*pattern.txt* For Vim version 7.3. Last change: 2013 Apr 20 +*pattern.txt* For Vim version 7.3. Last change: 2013 May 17 VIM REFERENCE MANUAL by Bram Moolenaar @@ -350,6 +350,27 @@ For starters, read chapter 27 of the user manual |usr_27.txt|. or \z( pattern \) |/\z(| + */\%#=* *two-engines* +Vim includes two regexp engines: +1. An old, backtracking engine that supports everything. +2. A new, NFA engine that works much faster on some patterns, but does not + support everything. + +Vim will automatically select the right engine for you. However, if you run +into a problem or want to specifically select one engine or the other, you can +prepend one of the following to the pattern: + + \%#=0 Force automatic selection. Only has an effect when + 'regexpengine' has been set to a non-zero value. + \%#=1 Force using the old engine. + \%#=2 Force using the NFA engine. + +You can also use the 'regexpengine' option to change the default. + + *E864* *E868* *E874* *E875* *E876* *E877* *E878* +If selecting the NFA engine and it runs into something that is not implemented +the pattern will not match. This is only useful when debugging Vim. + ============================================================================== 3. Magic */magic* @@ -396,9 +417,10 @@ pattern. ============================================================================== 4. Overview of pattern items *pattern-overview* + *E865* *E866* *E867* *E869* Overview of multi items. */multi* *E61* *E62* -More explanation and examples below, follow the links. *E64* +More explanation and examples below, follow the links. *E64* *E871* multi ~ 'magic' 'nomagic' matches of the preceding atom ~ @@ -508,12 +530,14 @@ Character classes {not in Vi}: */character-classes* |/\c| \c \c ignore case, do not use the 'ignorecase' option |/\C| \C \C match case, do not use the 'ignorecase' option +|/\Z| \Z \Z ignore differences in Unicode "combining characters". + Useful when searching voweled Hebrew or Arabic text. + |/\m| \m \m 'magic' on for the following chars in the pattern |/\M| \M \M 'magic' off for the following chars in the pattern |/\v| \v \v the following chars in the pattern are "very magic" |/\V| \V \V the following chars in the pattern are "very nomagic" -|/\Z| \Z \Z ignore differences in Unicode "combining characters". - Useful when searching voweled Hebrew or Arabic text. +|/\%#=| \%#=1 \%#=1 select regexp engine |/zero-width| |/\%d| \%d \%d match specified decimal character (eg \%d123) |/\%x| \%x \%x match specified hex character (eg \%x2a) @@ -581,7 +605,7 @@ overview. \? Just like \=. Cannot be used when searching backwards with the "?" command. {not in Vi} - */\{* *E58* *E60* *E554* + */\{* *E58* *E60* *E554* *E870* \{n,m} Matches n to m of the preceding atom, as many as possible \{n} Matches n of the preceding atom \{n,} Matches at least n of the preceding atom, as many as possible @@ -962,7 +986,8 @@ match ASCII characters, as indicated by the range. ~ matches the last given substitute string */~* */\~* \(\) A pattern enclosed by escaped parentheses. */\(* */\(\)* */\)* - E.g., "\(^a\)" matches 'a' at the start of a line. *E51* *E54* *E55* + E.g., "\(^a\)" matches 'a' at the start of a line. + *E51* *E54* *E55* *E872* *E873* \1 Matches the same string that was matched by */\1* *E65* the first sub-expression in \( and \). {not in Vi} diff --git a/runtime/doc/tags b/runtime/doc/tags index 9af196ae46..2f40a4cb91 100644 --- a/runtime/doc/tags +++ b/runtime/doc/tags @@ -736,9 +736,11 @@ $VIMRUNTIME starting.txt /*$VIMRUNTIME* 'quote motion.txt /*'quote* 'quoteescape' options.txt /*'quoteescape'* 'rdt' options.txt /*'rdt'* +'re' options.txt /*'re'* 'readonly' options.txt /*'readonly'* 'redraw' vi_diff.txt /*'redraw'* 'redrawtime' options.txt /*'redrawtime'* +'regexpengine'' options.txt /*'regexpengine''* 'relativenumber' options.txt /*'relativenumber'* 'remap' options.txt /*'remap'* 'report' options.txt /*'report'* @@ -1389,6 +1391,7 @@ $VIMRUNTIME starting.txt /*$VIMRUNTIME* /\ pattern.txt /*\/\\* /\$ pattern.txt /*\/\\$* /\%# pattern.txt /*\/\\%#* +/\%#= pattern.txt /*\/\\%#=* /\%$ pattern.txt /*\/\\%$* /\%'m pattern.txt /*\/\\%'m* /\%( pattern.txt /*\/\\%(* @@ -4261,7 +4264,22 @@ E860 eval.txt /*E860* E861 eval.txt /*E861* E862 eval.txt /*E862* E863 if_pyth.txt /*E863* +E864 pattern.txt /*E864* +E865 pattern.txt /*E865* +E866 pattern.txt /*E866* +E867 pattern.txt /*E867* +E868 pattern.txt /*E868* +E869 pattern.txt /*E869* E87 windows.txt /*E87* +E870 pattern.txt /*E870* +E871 pattern.txt /*E871* +E872 pattern.txt /*E872* +E873 pattern.txt /*E873* +E874 pattern.txt /*E874* +E875 pattern.txt /*E875* +E876 pattern.txt /*E876* +E877 pattern.txt /*E877* +E878 pattern.txt /*E878* E88 windows.txt /*E88* E89 message.txt /*E89* E90 message.txt /*E90* @@ -8172,6 +8190,7 @@ try-nested eval.txt /*try-nested* try-nesting eval.txt /*try-nesting* tutor usr_01.txt /*tutor* twice if_cscop.txt /*twice* +two-engines pattern.txt /*two-engines* type() eval.txt /*type()* type-mistakes tips.txt /*type-mistakes* typecorr-settings usr_41.txt /*typecorr-settings* diff --git a/src/Make_cyg.mak b/src/Make_cyg.mak index b00ae3b71b..d72282aa86 100644 --- a/src/Make_cyg.mak +++ b/src/Make_cyg.mak @@ -672,6 +672,9 @@ endif $(OUTDIR)/netbeans.o: netbeans.c $(INCL) $(NBDEBUG_DEP) $(CC) -c $(CFLAGS) netbeans.c -o $(OUTDIR)/netbeans.o +$(OUTDIR)/regexp.o: regexp.c regexp_nfa.c $(INCL) + $(CC) -c $(CFLAGS) regexp.c -o $(OUTDIR)/regexp.o + $(OUTDIR)/if_mzsch.o: if_mzsch.c $(INCL) if_mzsch.h $(MZ_EXTRA_DEP) $(CC) -c $(CFLAGS) if_mzsch.c -o $(OUTDIR)/if_mzsch.o diff --git a/src/Make_ming.mak b/src/Make_ming.mak index b8d7b200e8..3670e7102e 100644 --- a/src/Make_ming.mak +++ b/src/Make_ming.mak @@ -765,6 +765,9 @@ if_perl.c: if_perl.xs typemap $(OUTDIR)/netbeans.o: netbeans.c $(INCL) $(NBDEBUG_INCL) $(NBDEBUG_SRC) $(CC) -c $(CFLAGS) netbeans.c -o $(OUTDIR)/netbeans.o +$(OUTDIR)/regexp.o: regexp.c regexp_nfa.c $(INCL) + $(CC) -c $(CFLAGS) regexp.c -o $(OUTDIR)/regexp.o + $(OUTDIR)/if_mzsch.o: if_mzsch.c $(INCL) if_mzsch.h $(MZ_EXTRA_DEP) $(CC) -c $(CFLAGS) if_mzsch.c -o $(OUTDIR)/if_mzsch.o diff --git a/src/Make_mvc.mak b/src/Make_mvc.mak index 2a4a3e8c45..acd1346ab9 100644 --- a/src/Make_mvc.mak +++ b/src/Make_mvc.mak @@ -1166,7 +1166,7 @@ $(OUTDIR)/popupmnu.obj: $(OUTDIR) popupmnu.c $(INCL) $(OUTDIR)/quickfix.obj: $(OUTDIR) quickfix.c $(INCL) -$(OUTDIR)/regexp.obj: $(OUTDIR) regexp.c $(INCL) +$(OUTDIR)/regexp.obj: $(OUTDIR) regexp.c regexp_nfa.c $(INCL) $(OUTDIR)/screen.obj: $(OUTDIR) screen.c $(INCL) diff --git a/src/Makefile b/src/Makefile index fb821ca8c7..67a53bb0ec 100644 --- a/src/Makefile +++ b/src/Makefile @@ -454,7 +454,7 @@ CClink = $(CC) # MULTIBYTE - To edit multi-byte characters. # Uncomment this when you want to edit a multibyte language. -# It's automatically enabled with big features or IME support. +# It's automatically enabled with normal features, GTK or IME support. # Note: Compile on a machine where setlocale() actually works, otherwise the # configure tests may fail. #CONF_OPT_MULTIBYTE = --enable-multibyte @@ -2664,7 +2664,7 @@ objects/popupmnu.o: popupmnu.c objects/quickfix.o: quickfix.c $(CCC) -o $@ quickfix.c -objects/regexp.o: regexp.c +objects/regexp.o: regexp.c regexp_nfa.c $(CCC) -o $@ regexp.c objects/screen.o: screen.c @@ -2938,10 +2938,10 @@ objects/quickfix.o: quickfix.c vim.h auto/config.h feature.h os_unix.h \ auto/osdef.h ascii.h keymap.h term.h macros.h option.h structs.h \ regexp.h gui.h gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h \ globals.h farsi.h arabic.h -objects/regexp.o: regexp.c vim.h auto/config.h feature.h os_unix.h auto/osdef.h \ - ascii.h keymap.h term.h macros.h option.h structs.h regexp.h gui.h \ - gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h globals.h farsi.h \ - arabic.h +objects/regexp.o: regexp.c regexp_nfa.c vim.h auto/config.h feature.h os_unix.h \ + auto/osdef.h ascii.h keymap.h term.h macros.h option.h structs.h \ + regexp.h gui.h gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h \ + globals.h farsi.h arabic.h objects/screen.o: screen.c vim.h auto/config.h feature.h os_unix.h auto/osdef.h \ ascii.h keymap.h term.h macros.h option.h structs.h regexp.h gui.h \ gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h globals.h farsi.h \ diff --git a/src/option.c b/src/option.c index cfe464c5b0..325f061d95 100644 --- a/src/option.c +++ b/src/option.c @@ -2077,6 +2077,9 @@ static struct vimoption (char_u *)NULL, PV_NONE, #endif {(char_u *)2000L, (char_u *)0L} SCRIPTID_INIT}, + {"regexpengine", "re", P_NUM|P_VI_DEF, + (char_u *)&p_re, PV_NONE, + {(char_u *)0L, (char_u *)0L} SCRIPTID_INIT}, {"relativenumber", "rnu", P_BOOL|P_VI_DEF|P_RWIN, (char_u *)VAR_WIN, PV_RNU, {(char_u *)FALSE, (char_u *)0L} SCRIPTID_INIT}, @@ -8604,6 +8607,11 @@ set_num_option(opt_idx, varp, value, errbuf, errbuflen, opt_flags) errmsg = e_positive; p_hi = 0; } + if (p_re < 0 || p_re > 2) + { + errmsg = e_invarg; + p_re = 0; + } if (p_report < 0) { errmsg = e_positive; diff --git a/src/option.h b/src/option.h index 8b982f580e..b11316faa4 100644 --- a/src/option.h +++ b/src/option.h @@ -653,6 +653,7 @@ EXTERN char_u *p_cdpath; /* 'cdpath' */ EXTERN long p_rdt; /* 'redrawtime' */ #endif EXTERN int p_remap; /* 'remap' */ +EXTERN long p_re; /* 'regexpengine' */ EXTERN long p_report; /* 'report' */ #if defined(FEAT_WINDOWS) && defined(FEAT_QUICKFIX) EXTERN long p_pvh; /* 'previewheight' */ diff --git a/src/regexp.c b/src/regexp.c index e456b5d5f3..a1f71ab979 100644 --- a/src/regexp.c +++ b/src/regexp.c @@ -38,9 +38,20 @@ * Named character class support added by Walter Briscoe (1998 Jul 01) */ +/* Uncomment the first if you do not want to see debugging logs or files + * related to regular expressions, even when compiling with -DDEBUG. + * Uncomment the second to get the regexp debugging. */ +/* #undef DEBUG */ +/* #define DEBUG */ + #include "vim.h" -#undef DEBUG +#ifdef DEBUG +/* show/save debugging data when BT engine is used */ +# define BT_REGEXP_DUMP +/* save the debugging data to a file instead of displaying it */ +# define BT_REGEXP_LOG +#endif /* * The "internal use only" fields in regexp.h are present to pass info from @@ -326,9 +337,10 @@ toggle_Magic(x) /* Used for an error (down from) vim_regcomp(): give the error message, set * rc_did_emsg and return NULL */ #define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL) -#define EMSG_M_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL) #define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL) -#define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL) +#define EMSG2_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL) +#define EMSG2_RET_FAIL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL) +#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL) #define MAX_LIMIT (32767L << 16L) @@ -336,11 +348,18 @@ static int re_multi_type __ARGS((int)); static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n)); static char_u *cstrchr __ARGS((char_u *, int)); +#ifdef BT_REGEXP_DUMP +static void regdump __ARGS((char_u *, bt_regprog_T *)); +#endif #ifdef DEBUG -static void regdump __ARGS((char_u *, regprog_T *)); static char_u *regprop __ARGS((char_u *)); #endif +static char_u e_missingbracket[] = N_("E769: Missing ] after %s["); +static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%("); +static char_u e_unmatchedp[] = N_("E54: Unmatched %s("); +static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)"); + #define NOT_MULTI 0 #define MULTI_ONE 1 #define MULTI_MULT 2 @@ -630,7 +649,13 @@ static char_u META_flags[] = { }; #endif -static int curchr; +static int curchr; /* currently parsed character */ +/* Previous character. Note: prevchr is sometimes -1 when we are not at the + * start, eg in /[ ^I]^ the pattern was never found even if it existed, + * because ^ was taken to be magic -- webb */ +static int prevchr; +static int prevprevchr; /* previous-previous character */ +static int nextchr; /* used for ungetchr() */ /* arguments for reg() */ #define REG_NOPAREN 0 /* toplevel reg() */ @@ -680,6 +705,9 @@ static int read_limits __ARGS((long *, long *)); static void regtail __ARGS((char_u *, char_u *)); static void regoptail __ARGS((char_u *, char_u *)); +static regengine_T bt_regengine; +static regengine_T nfa_regengine; + /* * Return TRUE if compiled regular expression "prog" can match a line break. */ @@ -762,6 +790,7 @@ char *EQUIVAL_CLASS_C[16] = { /* * Produce the bytes for equivalence class "c". * Currently only handles latin1, latin9 and utf-8. + * NOTE: When changing this function, also change nfa_emit_equi_class() */ static void reg_equi_class(c) @@ -1239,8 +1268,11 @@ skip_regexp(startp, dirc, magic, newp) return p; } +static regprog_T *bt_regcomp __ARGS((char_u *expr, int re_flags)); + /* - * vim_regcomp() - compile a regular expression into internal code + * bt_regcomp() - compile a regular expression into internal code for the + * traditional back track matcher. * Returns the program in allocated space. Returns NULL for an error. * * We can't allocate space until we know how big the compiled form will be, @@ -1259,12 +1291,12 @@ skip_regexp(startp, dirc, magic, newp) * of the structure of the compiled regexp. * "re_flags": RE_MAGIC and/or RE_STRING. */ - regprog_T * -vim_regcomp(expr, re_flags) + static regprog_T * +bt_regcomp(expr, re_flags) char_u *expr; int re_flags; { - regprog_T *r; + bt_regprog_T *r; char_u *scan; char_u *longest; int len; @@ -1291,7 +1323,7 @@ vim_regcomp(expr, re_flags) #endif /* Allocate space. */ - r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE); + r = (bt_regprog_T *)lalloc(sizeof(bt_regprog_T) + regsize, TRUE); if (r == NULL) return NULL; @@ -1386,10 +1418,11 @@ vim_regcomp(expr, re_flags) r->regmlen = len; } } -#ifdef DEBUG +#ifdef BT_REGEXP_DUMP regdump(expr, r); #endif - return r; + r->engine = &bt_regengine; + return (regprog_T *)r; } /* @@ -1436,7 +1469,7 @@ vim_regcomp_had_eol() #endif /* - * reg - regular expression, i.e. main body or parenthesized thing + * Parse regular expression, i.e. main body or parenthesized thing. * * Caller must absorb opening parenthesis. * @@ -1473,7 +1506,7 @@ reg(paren, flagp) { /* Make a MOPEN node. */ if (regnpar >= NSUBEXP) - EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL); + EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL); parno = regnpar; ++regnpar; ret = regnode(MOPEN + parno); @@ -1534,14 +1567,14 @@ reg(paren, flagp) else #endif if (paren == REG_NPAREN) - EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL); + EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL); else - EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL); + EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL); } else if (paren == REG_NOPAREN && peekchr() != NUL) { if (curchr == Magic(')')) - EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL); + EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL); else EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */ /* NOTREACHED */ @@ -1556,7 +1589,7 @@ reg(paren, flagp) } /* - * Handle one alternative of an | operator. + * Parse one alternative of an | operator. * Implements the & operator. */ static char_u * @@ -1599,7 +1632,7 @@ regbranch(flagp) } /* - * Handle one alternative of an | or & operator. + * Parse one alternative of an | or & operator. * Implements the concatenation operator. */ static char_u * @@ -1679,7 +1712,7 @@ regconcat(flagp) } /* - * regpiece - something followed by possible [*+=] + * Parse something followed by possible [*+=]. * * Note that the branching code sequences used for = and the general cases * of * and + are somewhat optimized: they use the same NOTHING node as @@ -1759,7 +1792,7 @@ regpiece(flagp) } } if (lop == END) - EMSG_M_RET_NULL(_("E59: invalid character after %s@"), + EMSG2_RET_NULL(_("E59: invalid character after %s@"), reg_magic == MAGIC_ALL); /* Look behind must match with behind_pos. */ if (lop == BEHIND || lop == NOBEHIND) @@ -1793,7 +1826,7 @@ regpiece(flagp) else { if (num_complex_braces >= 10) - EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"), + EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"), reg_magic == MAGIC_ALL); reginsert(BRACE_COMPLEX + num_complex_braces, ret); regoptail(ret, regnode(BACK)); @@ -1820,8 +1853,20 @@ regpiece(flagp) return ret; } +/* When making changes to classchars also change nfa_classcodes. */ +static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU"; +static int classcodes[] = { + ANY, IDENT, SIDENT, KWORD, SKWORD, + FNAME, SFNAME, PRINT, SPRINT, + WHITE, NWHITE, DIGIT, NDIGIT, + HEX, NHEX, OCTAL, NOCTAL, + WORD, NWORD, HEAD, NHEAD, + ALPHA, NALPHA, LOWER, NLOWER, + UPPER, NUPPER +}; + /* - * regatom - the lowest level + * Parse the lowest level. * * Optimization: gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and @@ -1836,15 +1881,6 @@ regatom(flagp) int cpo_lit; /* 'cpoptions' contains 'l' flag */ int cpo_bsl; /* 'cpoptions' contains '\' flag */ int c; - static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU"; - static int classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD, - FNAME, SFNAME, PRINT, SPRINT, - WHITE, NWHITE, DIGIT, NDIGIT, - HEX, NHEX, OCTAL, NOCTAL, - WORD, NWORD, HEAD, NHEAD, - ALPHA, NALPHA, LOWER, NLOWER, - UPPER, NUPPER - }; char_u *p; int extra = 0; @@ -2140,7 +2176,7 @@ regatom(flagp) while ((c = getchr()) != ']') { if (c == NUL) - EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["), + EMSG2_RET_NULL(_("E69: Missing ] after %s%%["), reg_magic == MAGIC_ALL); br = regnode(BRANCH); if (ret == NULL) @@ -2156,7 +2192,7 @@ regatom(flagp) return NULL; } if (ret == NULL) - EMSG_M_RET_NULL(_("E70: Empty %s%%[]"), + EMSG2_RET_NULL(_("E70: Empty %s%%[]"), reg_magic == MAGIC_ALL); lastbranch = regnode(BRANCH); br = regnode(NOTHING); @@ -2200,7 +2236,7 @@ regatom(flagp) } if (i < 0) - EMSG_M_RET_NULL( + EMSG2_RET_NULL( _("E678: Invalid character after %s%%[dxouU]"), reg_magic == MAGIC_ALL); #ifdef FEAT_MBYTE @@ -2272,7 +2308,7 @@ regatom(flagp) } } - EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"), + EMSG2_RET_NULL(_("E71: Invalid character after %s%%"), reg_magic == MAGIC_ALL); } } @@ -2567,8 +2603,7 @@ collection: break; } else if (reg_strict) - EMSG_M_RET_NULL(_("E769: Missing ] after %s["), - reg_magic > MAGIC_OFF); + EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF); } /* FALLTHROUGH */ @@ -2659,7 +2694,7 @@ use_multibytecode(c) #endif /* - * emit a node + * Emit a node. * Return pointer to generated code. */ static char_u * @@ -2711,7 +2746,7 @@ regmbc(c) #endif /* - * reginsert - insert an operator in front of already-emitted operand + * Insert an operator in front of already-emitted operand * * Means relocating the operand. */ @@ -2742,7 +2777,7 @@ reginsert(op, opnd) } /* - * reginsert_limits - insert an operator in front of already-emitted operand. + * Insert an operator in front of already-emitted operand. * The operator has the given limit values as operands. Also set next pointer. * * Means relocating the operand. @@ -2794,7 +2829,7 @@ re_put_long(p, val) } /* - * regtail - set the next-pointer at the end of a node chain + * Set the next-pointer at the end of a node chain. */ static void regtail(p, val) @@ -2835,7 +2870,7 @@ regtail(p, val) } /* - * regoptail - regtail on item after a BRANCH; nop if none + * Like regtail, on item after a BRANCH; nop if none. */ static void regoptail(p, val) @@ -2851,22 +2886,15 @@ regoptail(p, val) } /* - * getchr() - get the next character from the pattern. We know about - * magic and such, so therefore we need a lexical analyzer. + * Functions for getting characters from the regexp input. */ -/* static int curchr; */ -static int prevprevchr; -static int prevchr; -static int nextchr; /* used for ungetchr() */ -/* - * Note: prevchr is sometimes -1 when we are not at the start, - * eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was - * taken to be magic -- webb - */ static int at_start; /* True when on the first character */ static int prev_at_start; /* True when on the second character */ +/* + * Start parsing at "str". + */ static void initchr(str) char_u *str; @@ -2878,6 +2906,9 @@ initchr(str) prev_at_start = FALSE; } +/* + * Get the next character without advancing. + */ static int peekchr() { @@ -3086,6 +3117,10 @@ skipchr_keepstart() prevprevchr = prpr; } +/* + * Get the next character from the pattern. We know about magic and such, so + * therefore we need a lexical analyzer. + */ static int getchr() { @@ -3340,8 +3375,8 @@ typedef struct regbehind_S } regbehind_T; static char_u *reg_getline __ARGS((linenr_T lnum)); -static long vim_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm)); -static long regtry __ARGS((regprog_T *prog, colnr_T col)); +static long bt_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm)); +static long regtry __ARGS((bt_regprog_T *prog, colnr_T col)); static void cleanup_subexpr __ARGS((void)); #ifdef FEAT_SYN_HL static void cleanup_zsubexpr __ARGS((void)); @@ -3398,7 +3433,7 @@ static colnr_T ireg_maxcol; /* * Sometimes need to save a copy of a line. Since alloc()/free() is very * slow, we keep one allocated piece of memory and only re-allocate it when - * it's too small. It's freed in vim_regexec_both() when finished. + * it's too small. It's freed in bt_regexec_both() when finished. */ static char_u *reg_tofree = NULL; static unsigned reg_tofreelen; @@ -3556,6 +3591,8 @@ static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */ /* TRUE if using multi-line regexp. */ #define REG_MULTI (reg_match == NULL) +static int bt_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col)); + /* * Match a regexp against a string. * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). @@ -3563,8 +3600,8 @@ static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */ * * Return TRUE if there is a match, FALSE if not. */ - int -vim_regexec(rmp, line, col) + static int +bt_regexec(rmp, line, col) regmatch_T *rmp; char_u *line; /* string to match against */ colnr_T col; /* column to start looking for match */ @@ -3580,16 +3617,19 @@ vim_regexec(rmp, line, col) ireg_icombine = FALSE; #endif ireg_maxcol = 0; - return (vim_regexec_both(line, col, NULL) != 0); + return (bt_regexec_both(line, col, NULL) != 0); } #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \ || defined(FIND_REPLACE_DIALOG) || defined(PROTO) + +static int bt_regexec_nl __ARGS((regmatch_T *rmp, char_u *line, colnr_T col)); + /* * Like vim_regexec(), but consider a "\n" in "line" to be a line break. */ - int -vim_regexec_nl(rmp, line, col) + static int +bt_regexec_nl(rmp, line, col) regmatch_T *rmp; char_u *line; /* string to match against */ colnr_T col; /* column to start looking for match */ @@ -3605,10 +3645,12 @@ vim_regexec_nl(rmp, line, col) ireg_icombine = FALSE; #endif ireg_maxcol = 0; - return (vim_regexec_both(line, col, NULL) != 0); + return (bt_regexec_both(line, col, NULL) != 0); } #endif +static long bt_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm)); + /* * Match a regexp against multiple lines. * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). @@ -3617,8 +3659,8 @@ vim_regexec_nl(rmp, line, col) * Return zero if there is no match. Return number of lines contained in the * match otherwise. */ - long -vim_regexec_multi(rmp, win, buf, lnum, col, tm) + static long +bt_regexec_multi(rmp, win, buf, lnum, col, tm) regmmatch_T *rmp; win_T *win; /* window in which to search or NULL */ buf_T *buf; /* buffer in which to search */ @@ -3641,7 +3683,7 @@ vim_regexec_multi(rmp, win, buf, lnum, col, tm) #endif ireg_maxcol = rmp->rmm_maxcol; - r = vim_regexec_both(NULL, col, tm); + r = bt_regexec_both(NULL, col, tm); return r; } @@ -3651,12 +3693,12 @@ vim_regexec_multi(rmp, win, buf, lnum, col, tm) * lines ("line" is NULL, use reg_getline()). */ static long -vim_regexec_both(line, col, tm) +bt_regexec_both(line, col, tm) char_u *line; colnr_T col; /* column to start looking for match */ proftime_T *tm UNUSED; /* timeout limit or NULL */ { - regprog_T *prog; + bt_regprog_T *prog; char_u *s; long retval = 0L; @@ -3682,14 +3724,14 @@ vim_regexec_both(line, col, tm) if (REG_MULTI) { - prog = reg_mmatch->regprog; + prog = (bt_regprog_T *)reg_mmatch->regprog; line = reg_getline((linenr_T)0); reg_startpos = reg_mmatch->startpos; reg_endpos = reg_mmatch->endpos; } else { - prog = reg_match->regprog; + prog = (bt_regprog_T *)reg_match->regprog; reg_startp = reg_match->startp; reg_endp = reg_match->endp; } @@ -3931,7 +3973,7 @@ unref_extmatch(em) */ static long regtry(prog, col) - regprog_T *prog; + bt_regprog_T *prog; colnr_T col; { reginput = regline + col; @@ -4063,7 +4105,7 @@ regmatch(scan) #define RA_NOMATCH 5 /* didn't match */ /* Make "regstack" and "backpos" empty. They are allocated and freed in - * vim_regexec_both() to reduce malloc()/free() calls. */ + * bt_regexec_both() to reduce malloc()/free() calls. */ regstack.ga_len = 0; backpos.ga_len = 0; @@ -4072,14 +4114,14 @@ regmatch(scan) */ for (;;) { - /* Some patterns my cause a long time to match, even though they are not + /* Some patterns may cause a long time to match, even though they are not * illegal. E.g., "\([a-z]\+\)\+Q". Allow breaking them with CTRL-C. */ fast_breakcheck(); #ifdef DEBUG if (scan != NULL && regnarrate) { - mch_errmsg(regprop(scan)); + mch_errmsg((char *)regprop(scan)); mch_errmsg("(\n"); } #endif @@ -4100,7 +4142,7 @@ regmatch(scan) #ifdef DEBUG if (regnarrate) { - mch_errmsg(regprop(scan)); + mch_errmsg((char *)regprop(scan)); mch_errmsg("...\n"); # ifdef FEAT_SYN_HL if (re_extmatch_in != NULL) @@ -4112,7 +4154,7 @@ regmatch(scan) { mch_errmsg(" \""); if (re_extmatch_in->matches[i] != NULL) - mch_errmsg(re_extmatch_in->matches[i]); + mch_errmsg((char *)re_extmatch_in->matches[i]); mch_errmsg("\"\n"); } } @@ -6091,9 +6133,14 @@ regnext(p) static int prog_magic_wrong() { - if (UCHARAT(REG_MULTI - ? reg_mmatch->regprog->program - : reg_match->regprog->program) != REGMAGIC) + regprog_T *prog; + + prog = REG_MULTI ? reg_mmatch->regprog : reg_match->regprog; + if (prog->engine == &nfa_regengine) + /* For NFA matcher we don't check the magic */ + return FALSE; + + if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC) { EMSG(_(e_re_corr)); return TRUE; @@ -6318,7 +6365,7 @@ re_num_cmp(val, scan) } -#ifdef DEBUG +#ifdef BT_REGEXP_DUMP /* |