1 files changed, 45 insertions, 118 deletions
diff --git a/benchsuite/benchsuite b/benchsuite/benchsuite
index 9353cf49..f8cf6ea8 100755
--- a/benchsuite/benchsuite
+++ b/benchsuite/benchsuite
@@ -71,15 +71,8 @@ def bench_linux_literal_default(suite_dir):
     return Benchmark(pattern=pat, commands=[
         mkcmd('rg', ['rg', pat]),
         mkcmd('ag', ['ag', pat]),
-        # ucg reports the exact same matches as ag and rg even though it
-        # doesn't read gitignore files. Instead, it has a file whitelist
-        # that happens to match up exactly with the gitignores for this search.
-        mkcmd('ucg', ['ucg', pat]),
         # I guess setting LC_ALL=en_US.UTF-8 probably isn't necessarily the
         # default, but I'd guess it to be on most desktop systems.
-        mkcmd('pt', ['pt', pat]),
-        # sift reports an extra line here for a binary file matched.
-        mkcmd('sift', ['sift', pat]),
         mkcmd('git grep', ['git', 'grep', pat], env={'LC_ALL': 'en_US.UTF-8'}),
     ])
 
@@ -102,16 +95,12 @@ def bench_linux_literal(suite_dir):
         return Command(*args, **kwargs)
 
     return Benchmark(pattern=pat, commands=[
-        mkcmd('rg (ignore)', ['rg', '-n', pat]),
-        mkcmd('rg (ignore) (mmap)', ['rg', '-n', '--mmap', pat]),
-        mkcmd('ag (ignore) (mmap)', ['ag', '-s', pat]),
-        mkcmd('pt (ignore)', ['pt', pat]),
-        mkcmd('sift (ignore)', SIFT + ['-n', '--git', pat]),
-        mkcmd('git grep (ignore)', [
+        mkcmd('rg', ['rg', '-n', pat]),
+        mkcmd('rg (mmap)', ['rg', '-n', '--mmap', pat]),
+        mkcmd('ag (mmap)', ['ag', '-s', pat]),
+        mkcmd('git grep', [
             'git', 'grep', '-I', '-n', pat,
         ], env={'LC_ALL': 'C'}),
-        mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
-        mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', pat]),
     ])
 
 
@@ -131,31 +120,22 @@ def bench_linux_literal_casei(suite_dir):
         return Command(*args, **kwargs)
 
     return Benchmark(pattern=pat, commands=[
-        mkcmd('rg (ignore)', ['rg', '-n', '-i', pat]),
-        mkcmd('rg (ignore) (mmap)', ['rg', '-n', '-i', '--mmap', pat]),
-        mkcmd('ag (ignore) (mmap)', ['ag', '-i', pat]),
-        mkcmd('pt (ignore)', ['pt', '-i', pat]),
-        mkcmd('sift (ignore)', SIFT + ['-n', '-i', '--git', pat]),
+        mkcmd('rg', ['rg', '-n', '-i', pat]),
+        mkcmd('rg (mmap)', ['rg', '-n', '-i', '--mmap', pat]),
+        mkcmd('ag (mmap)', ['ag', '-i', pat]),
         # It'd technically be more appropriate to set LC_ALL=en_US.UTF-8 here,
         # since that is certainly what ripgrep is doing, but this is for an
         # ASCII literal, so we should give `git grep` all the opportunity to
         # do its best.
-        mkcmd('git grep (ignore)', [
+        mkcmd('git grep', [
             'git', 'grep', '-I', '-n', '-i', pat,
         ], env={'LC_ALL': 'C'}),
-        mkcmd('rg (whitelist)', [
-            'rg', '-n', '-i', '--no-ignore', '-tall', pat,
-        ]),
-        mkcmd('ucg (whitelist)', ['ucg', '-i', pat]),
     ])
 
 
 def bench_linux_re_literal_suffix(suite_dir):
     '''
     Benchmark the speed of a literal inside a regex.
-
-    This, for example, inhibits a prefix byte optimization used
-    inside of Go's regex engine (relevant for sift and pt).
     '''
     require(suite_dir, 'linux')
     cwd = path.join(suite_dir, LINUX_DIR)
@@ -166,26 +146,19 @@ def bench_linux_re_literal_suffix(suite_dir):
         return Command(*args, **kwargs)
 
     return Benchmark(pattern=pat, commands=[
-        mkcmd('rg (ignore)', ['rg', '-n', pat]),
-        mkcmd('ag (ignore)', ['ag', '-s', pat]),
-        mkcmd('pt (ignore)', ['pt', '-e', pat]),
-        mkcmd('sift (ignore)', SIFT + ['-n', '--git', pat]),
+        mkcmd('rg', ['rg', '-n', pat]),
+        mkcmd('ag', ['ag', '-s', pat]),
         mkcmd(
-            'git grep (ignore)',
+            'git grep',
             ['git', 'grep', '-E', '-I', '-n', pat],
             env={'LC_ALL': 'C'},
         ),
-        mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
-        mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', pat]),
     ])
 
 
 def bench_linux_word(suite_dir):
     '''
     Benchmark use of the -w ("match word") flag in each tool.
-
-    sift has a lot of trouble with this because it forces it into Go's
-    regex engine by surrounding the pattern with \b assertions.
     '''
     require(suite_dir, 'linux')
     cwd = path.join(suite_dir, LINUX_DIR)
@@ -196,28 +169,19 @@ def bench_linux_word(suite_dir):
         return Command(*args, **kwargs)
 
     return Benchmark(pattern=pat, commands=[
-        mkcmd('rg (ignore)', ['rg', '-n', '-w', pat]),
-        mkcmd('ag (ignore)', ['ag', '-s', '-w', pat]),
-        mkcmd('pt (ignore)', ['pt', '-w', pat]),
-        mkcmd('sift (ignore)', SIFT + ['-n', '-w', '--git', pat]),
+        mkcmd('rg', ['rg', '-n', '-w', pat]),
+        mkcmd('ag', ['ag', '-s', '-w', pat]),
         mkcmd(
-            'git grep (ignore)',
+            'git grep',
             ['git', 'grep', '-E', '-I', '-n', '-w', pat],
             env={'LC_ALL': 'C'},
         ),
-        mkcmd('rg (whitelist)', [
-            'rg', '-n', '-w', '--no-ignore', '-tall', pat,
-        ]),
-        mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', '-w', pat]),
     ])
 
 
 def bench_linux_unicode_greek(suite_dir):
     '''
     Benchmark matching of a Unicode category.
-
-    Only three tools (ripgrep, sift and pt) support this. We omit
-    pt because it is too slow.
     '''
     require(suite_dir, 'linux')
     cwd = path.join(suite_dir, LINUX_DIR)
@@ -229,8 +193,6 @@ def bench_linux_unicode_greek(suite_dir):
 
     return Benchmark(pattern=pat, commands=[
         mkcmd('rg', ['rg', '-n', pat]),
-        mkcmd('pt', ['pt', '-e', pat]),
-        mkcmd('sift', SIFT + ['-n', '--git', pat]),
     ])
 
 
@@ -250,8 +212,6 @@ def bench_linux_unicode_greek_casei(suite_dir):
 
     return Benchmark(pattern=pat, commands=[
         mkcmd('rg', ['rg', '-n', '-i', pat]),
-        mkcmd('pt', ['pt', '-i', '-e', pat]),
-        mkcmd('sift', SIFT + ['-n', '-i', '--git', pat]),
     ])
 
 
@@ -272,26 +232,19 @@ def bench_linux_unicode_word(suite_dir):
         return Command(*args, **kwargs)
 
     return Benchmark(pattern=pat, commands=[
-        mkcmd('rg (ignore)', ['rg', '-n', pat]),
-        mkcmd('rg (ignore) (ASCII)', ['rg', '-n', '(?-u)' + pat]),
-        mkcmd('ag (ignore) (ASCII)', ['ag', '-s', pat]),
-        mkcmd('pt (ignore) (ASCII)', ['pt', '-e', pat]),
-        mkcmd('sift (ignore) (ASCII)', SIFT + ['-n', '--git', pat]),
+        mkcmd('rg', ['rg', '-n', pat]),
+        mkcmd('rg (ASCII)', ['rg', '-n', '(?-u)' + pat]),
+        mkcmd('ag (ASCII)', ['ag', '-s', pat]),
         mkcmd(
-            'git grep (ignore)',
+            'git grep',
             ['git', 'grep', '-E', '-I', '-n', pat],
             env={'LC_ALL': 'en_US.UTF-8'},
         ),
         mkcmd(
-            'git grep (ignore) (ASCII)',
+            'git grep (ASCII)',
             ['git', 'grep', '-E', '-I', '-n', pat],
             env={'LC_ALL': 'C'},
         ),
-        mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
-        mkcmd('rg (whitelist) (ASCII)', [
-            'rg', '-n', '--no-ignore', '-tall', '(?-u)' + pat,
-        ]),
-        mkcmd('ucg (ASCII)', ['ucg', '--nosmart-case', pat]),
     ])
 
 
@@ -313,26 +266,19 @@ def bench_linux_no_literal(suite_dir):
         return Command(*args, **kwargs)
 
     return Benchmark(pattern=pat, commands=[
-        mkcmd('rg (ignore)', ['rg', '-n', pat]),
-        mkcmd('rg (ignore) (ASCII)', ['rg', '-n', '(?-u)' + pat]),
-        mkcmd('ag (ignore) (ASCII)', ['ag', '-s', pat]),
-        mkcmd('pt (ignore) (ASCII)', ['pt', '-e', pat]),
-        mkcmd('sift (ignore) (ASCII)', SIFT + ['-n', '--git', pat]),
+        mkcmd('rg', ['rg', '-n', pat]),
+        mkcmd('rg (ASCII)', ['rg', '-n', '(?-u)' + pat]),
+        mkcmd('ag (ASCII)', ['ag', '-s', pat]),
         mkcmd(
-            'git grep (ignore)',
+            'git grep',
             ['git', 'grep', '-E', '-I', '-n', pat],
             env={'LC_ALL': 'en_US.UTF-8'},
         ),
         mkcmd(
-            'git grep (ignore) (ASCII)',
+            'git grep (ASCII)',
             ['git', 'grep', '-E', '-I', '-n', pat],
             env={'LC_ALL': 'C'},
         ),
-        mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
-        mkcmd('rg (whitelist) (ASCII)', [
-            'rg', '-n', '--no-ignore', '-tall', '(?-u)' + pat,
-        ]),
-        mkcmd('ucg (whitelist) (ASCII)', ['ucg', '--nosmart-case', pat]),
     ])
 
 
@@ -354,15 +300,13 @@ def bench_linux_alternates(suite_dir):
         return Command(*args, **kwargs)
 
     return Benchmark(pattern=pat, commands=[
-        mkcmd('rg (ignore)', ['rg', '-n', pat]),
-        mkcmd('ag (ignore)', ['ag', '-s', pat]),
+        mkcmd('rg', ['rg', '-n', pat]),
+        mkcmd('ag', ['ag', '-s', pat]),
         mkcmd(
-            'git grep (ignore)',
+            'git grep',
             ['git', 'grep', '-E', '-I', '-n', pat],
             env={'LC_ALL': 'C'},
         ),
-        mkcmd('rg (whitelist)', ['rg', '--no-ignore', '-n', pat]),
-        mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', pat]),
     ])
 
 
@@ -377,15 +321,13 @@ def bench_linux_alternates_casei(suite_dir):
         return Command(*args, **kwargs)
 
     return Benchmark(pattern=pat, commands=[
-        mkcmd('rg (ignore)', ['rg', '-n', '-i', pat]),
-        mkcmd('ag (ignore)', ['ag', '-i', pat]),
+        mkcmd('rg', ['rg', '-n', '-i', pat]),
+        mkcmd('ag', ['ag', '-i', pat]),
         mkcmd(
-            'git grep (ignore)',
+            'git grep',
             ['git', 'grep', '-E', '-I', '-n', '-i', pat],
             env={'LC_ALL': 'C'},
         ),
-        mkcmd('rg (whitelist)', ['rg', '--no-ignore', '-n', '-i', pat]),
-        mkcmd('ucg (whitelist)', ['ucg', '-i', pat]),
     ])
 
 
@@ -400,15 +342,10 @@ def bench_subtitles_en_literal(suite_dir):
     return Benchmark(pattern=pat, commands=[
         Command('rg', ['rg', pat, en]),
         Command('rg (no mmap)', ['rg', '--no-mmap', pat, en]),
-        Command('pt', ['pt', '-N', pat, en]),
-        Command('sift', ['sift', pat, en]),
-        Command('grep', ['grep', '-a', pat, en], env=GREP_ASCII),
+        Command('grep', ['grep', pat, en], env=GREP_ASCII),
         Command('rg (lines)', ['rg', '-n', pat, en]),
         Command('ag (lines)', ['ag', '-s', pat, en]),
-        Command('ucg (lines)', ['ucg', '--nosmart-case', pat, en]),
-        Command('pt (lines)', ['pt', pat, en]),
-        Command('sift (lines)', ['sift', '-n', pat, en]),
-        Command('grep (lines)', ['grep', '-an', pat, en], env=GREP_ASCII),
+        Command('grep (lines)', ['grep', '-n', pat, en], env=GREP_ASCII),
     ])
 
 
@@ -428,7 +365,6 @@ def bench_subtitles_en_literal_casei(suite_dir):
         ], env=GREP_ASCII),
         Command('rg (lines)', ['rg', '-n', '-i', pat, en]),
         Command('ag (lines) (ASCII)', ['ag', '-i', pat, en]),
-        Command('ucg (lines) (ASCII)', ['ucg', '-i', pat, en]),
     ])
 
 
@@ -445,7 +381,6 @@ def bench_subtitles_en_literal_word(suite_dir):
             'rg', '-n', r'(?-u:\b)' + pat + r'(?-u:\b)', en,
         ]),
         Command('ag (ASCII)', ['ag', '-sw', pat, en]),
-        Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, en]),
         Command('grep (ASCII)', [
             'grep', '-anw', pat, en,
         ], env=GREP_ASCII),
@@ -471,7 +406,6 @@ def bench_subtitles_en_alternate(suite_dir):
     return Benchmark(pattern=pat, commands=[
         Command('rg (lines)', ['rg', '-n', pat, en]),
         Command('ag (lines)', ['ag', '-s', pat, en]),
-        Command('ucg (lines)', ['ucg', '--nosmart-case', pat, en]),
         Command('grep (lines)', [
             'grep', '-E', '-an', pat, en,
         ], env=GREP_ASCII),
@@ -498,7 +432,6 @@ def bench_subtitles_en_alternate_casei(suite_dir):
 
     return Benchmark(pattern=pat, commands=[
         Command('ag (ASCII)', ['ag', '-s', '-i', pat, en]),
-        Command('ucg (ASCII)', ['ucg', '-i', pat, en]),
         Command('grep (ASCII)', [
             'grep', '-E', '-ani', pat, en,
         ], env=GREP_ASCII),
@@ -520,7 +453,6 @@ def bench_subtitles_en_surrounding_words(suite_dir):
         Command('grep', ['grep', '-E', '-an', pat, en], env=GREP_UNICODE),
         Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, en]),
         Command('ag (ASCII)', ['ag', '-s', pat, en]),
-        Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, en]),
         Command('grep (ASCII)', [
             'grep', '-E', '-an', pat, en,
         ], env=GREP_ASCII),
@@ -544,7 +476,6 @@ def bench_subtitles_en_no_literal(suite_dir):
         Command('rg', ['rg', '-n', pat, en]),
         Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, en]),
         Command('ag (ASCII)', ['ag', '-s', pat, en]),
-        Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, en]),
         Command('grep (ASCII)', [
             'grep', '-E', '-an', pat, en,
         ], env=GREP_ASCII),
@@ -562,14 +493,9 @@ def bench_subtitles_ru_literal(suite_dir):
     return Benchmark(pattern=pat, commands=[
         Command('rg', ['rg', pat, ru]),
         Command('rg (no mmap)', ['rg', '--no-mmap', pat, ru]),
-        Command('pt', ['pt', '-N', pat, ru]),
-        Command('sift', ['sift', pat, ru]),
         Command('grep', ['grep', '-a', pat, ru], env=GREP_ASCII),
         Command('rg (lines)', ['rg', '-n', pat, ru]),
         Command('ag (lines)', ['ag', '-s', pat, ru]),
-        Command('ucg (lines)', ['ucg', '--nosmart-case', pat, ru]),
-        Command('pt (lines)', ['pt', pat, ru]),
-        Command('sift (lines)', ['sift', '-n', pat, ru]),
         Command('grep (lines)', ['grep', '-an', pat, ru], env=GREP_ASCII),
     ])
 
@@ -590,7 +516,6 @@ def bench_subtitles_ru_literal_casei(suite_dir):
         ], env=GREP_ASCII),
         Command('rg (lines)', ['rg', '-n', '-i', pat, ru]),
         Command('ag (lines) (ASCII)', ['ag', '-i', pat, ru]),
-        Command('ucg (lines) (ASCII)', ['ucg', '-i', pat, ru]),
     ])
 
 
@@ -607,7 +532,6 @@ def bench_subtitles_ru_literal_word(suite_dir):
             'rg', '-n', r'(?-u:\b)' + pat + r'(?-u:\b)', ru,
         ]),
         Command('ag (ASCII)', ['ag', '-sw', pat, ru]),
-        Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, ru]),
         Command('grep (ASCII)', [
             'grep', '-anw', pat, ru,
         ], env=GREP_ASCII),
@@ -633,7 +557,6 @@ def bench_subtitles_ru_alternate(suite_dir):
     return Benchmark(pattern=pat, commands=[
         Command('rg (lines)', ['rg', '-n', pat, ru]),
         Command('ag (lines)', ['ag', '-s', pat, ru]),
-        Command('ucg (lines)', ['ucg', '--nosmart-case', pat, ru]),
         Command('grep (lines)', [
             'grep', '-E', '-an', pat, ru,
         ], env=GREP_ASCII),
@@ -660,7 +583,6 @@ def bench_subtitles_ru_alternate_casei(suite_dir):
 
     return Benchmark(pattern=pat, commands=[
         Command('ag (ASCII)', ['ag', '-s', '-i', pat, ru]),
-        Command('ucg (ASCII)', ['ucg', '-i', pat, ru]),
         Command('grep (ASCII)', [
             'grep', '-E', '-ani', pat, ru,
         ], env=GREP_ASCII),
@@ -681,7 +603,6 @@ def bench_subtitles_ru_surrounding_words(suite_dir):
         Command('rg', ['rg', '-n', pat, ru]),
         Command('grep', ['grep', '-E', '-an', pat, ru], env=GREP_UNICODE),
         Command('ag (ASCII)', ['ag', '-s', pat, ru]),
-        Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, ru]),
         Command('grep (ASCII)', [
             'grep', '-E', '-an', pat, ru,
         ], env=GREP_ASCII),
@@ -705,7 +626,6 @@ def bench_subtitles_ru_no_literal(suite_dir):
         Command('rg', ['rg', '-n', pat, ru]),
         Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, ru]),
         Command('ag (ASCII)', ['ag', '-s', pat, ru]),
-        Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, ru]),
         Command('grep (ASCII)', [
             'grep', '-E', '-an', pat, ru,
         ], env=GREP_ASCII),
@@ -758,7 +678,7 @@ class Benchmark(object):
     def __init__(self, name=None, pattern=None, commands=None,
                  warmup_count=1, count=3, line_count=True,
                  allow_missing_commands=False,
-                 disabled_cmds=None):
+                 disabled_cmds=None, order=0):
         '''
         Create a single benchmark.
 
@@ -794,6 +714,8 @@ class Benchmark(object):
             will simply skip it.
         :param list(str) disabled_cmds:
             A list of commands to skip.
+        :param int order:
+            An integer indicating the sequence number of this benchmark.
         '''
         self.name = name
         self.pattern = pattern
@@ -803,6 +725,7 @@ class Benchmark(object):
         self.line_count = line_count
         self.allow_missing_commands = allow_missing_commands
         self.disabled_cmds = set(disabled_cmds or [])
+        self.order = order
 
     def raise_if_missing(self):
         '''
@@ -1165,19 +1088,22 @@ def collect_benchmarks(suite_dir, filter_pat=None,
         requires corpora that are missing, then a log message is
         emitted to stderr and it is not yielded.
     '''
-    for fun in sorted(globals()):
-        if not fun.startswith('bench_'):
+    benchmarks = []
+    for global_name in globals():
+        if not global_name.startswith('bench_'):
             continue
-        name = re.sub('^bench_', '', fun)
+        name = re.sub('^bench_', '', global_name)
         if filter_pat is not None and not re.search(filter_pat, name):
             continue
         try:
-            benchmark = globals()[fun](suite_dir)
+            fun = globals()[global_name]
+            benchmark = fun(suite_dir)
             benchmark.name = name
             benchmark.warmup_count = warmup_iter
             benchmark.count = bench_iter
             benchmark.allow_missing_commands = allow_missing_commands
             benchmark.disabled_cmds = disabled_cmds
+            benchmark.order = fun.__code__.co_firstlineno
             benchmark.raise_if_missing()
         except MissingDependencies as e:
             eprint(
@@ -1192,7 +1118,8 @@ def collect_benchmarks(suite_dir, filter_pat=None,
                   '(run with --allow-missing to run incomplete benchmarks)'
             eprint(fmt % (', '.join(e.missing_names), name))
             continue
-        yield benchmark
+        benchmarks.append(benchmark)
+    return sorted(benchmarks, key=lambda b: b.order)
 
 
 def main():