summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xbenchsuite/benchsuite163
1 files changed, 45 insertions, 118 deletions
diff --git a/benchsuite/benchsuite b/benchsuite/benchsuite
index 9353cf49..f8cf6ea8 100755
--- a/benchsuite/benchsuite
+++ b/benchsuite/benchsuite
@@ -71,15 +71,8 @@ def bench_linux_literal_default(suite_dir):
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', pat]),
mkcmd('ag', ['ag', pat]),
- # ucg reports the exact same matches as ag and rg even though it
- # doesn't read gitignore files. Instead, it has a file whitelist
- # that happens to match up exactly with the gitignores for this search.
- mkcmd('ucg', ['ucg', pat]),
# I guess setting LC_ALL=en_US.UTF-8 probably isn't necessarily the
# default, but I'd guess it to be on most desktop systems.
- mkcmd('pt', ['pt', pat]),
- # sift reports an extra line here for a binary file matched.
- mkcmd('sift', ['sift', pat]),
mkcmd('git grep', ['git', 'grep', pat], env={'LC_ALL': 'en_US.UTF-8'}),
])
@@ -102,16 +95,12 @@ def bench_linux_literal(suite_dir):
return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[
- mkcmd('rg (ignore)', ['rg', '-n', pat]),
- mkcmd('rg (ignore) (mmap)', ['rg', '-n', '--mmap', pat]),
- mkcmd('ag (ignore) (mmap)', ['ag', '-s', pat]),
- mkcmd('pt (ignore)', ['pt', pat]),
- mkcmd('sift (ignore)', SIFT + ['-n', '--git', pat]),
- mkcmd('git grep (ignore)', [
+ mkcmd('rg', ['rg', '-n', pat]),
+ mkcmd('rg (mmap)', ['rg', '-n', '--mmap', pat]),
+ mkcmd('ag (mmap)', ['ag', '-s', pat]),
+ mkcmd('git grep', [
'git', 'grep', '-I', '-n', pat,
], env={'LC_ALL': 'C'}),
- mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
- mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', pat]),
])
@@ -131,31 +120,22 @@ def bench_linux_literal_casei(suite_dir):
return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[
- mkcmd('rg (ignore)', ['rg', '-n', '-i', pat]),
- mkcmd('rg (ignore) (mmap)', ['rg', '-n', '-i', '--mmap', pat]),
- mkcmd('ag (ignore) (mmap)', ['ag', '-i', pat]),
- mkcmd('pt (ignore)', ['pt', '-i', pat]),
- mkcmd('sift (ignore)', SIFT + ['-n', '-i', '--git', pat]),
+ mkcmd('rg', ['rg', '-n', '-i', pat]),
+ mkcmd('rg (mmap)', ['rg', '-n', '-i', '--mmap', pat]),
+ mkcmd('ag (mmap)', ['ag', '-i', pat]),
# It'd technically be more appropriate to set LC_ALL=en_US.UTF-8 here,
# since that is certainly what ripgrep is doing, but this is for an
# ASCII literal, so we should give `git grep` all the opportunity to
# do its best.
- mkcmd('git grep (ignore)', [
+ mkcmd('git grep', [
'git', 'grep', '-I', '-n', '-i', pat,
], env={'LC_ALL': 'C'}),
- mkcmd('rg (whitelist)', [
- 'rg', '-n', '-i', '--no-ignore', '-tall', pat,
- ]),
- mkcmd('ucg (whitelist)', ['ucg', '-i', pat]),
])
def bench_linux_re_literal_suffix(suite_dir):
'''
Benchmark the speed of a literal inside a regex.
-
- This, for example, inhibits a prefix byte optimization used
- inside of Go's regex engine (relevant for sift and pt).
'''
require(suite_dir, 'linux')
cwd = path.join(suite_dir, LINUX_DIR)
@@ -166,26 +146,19 @@ def bench_linux_re_literal_suffix(suite_dir):
return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[
- mkcmd('rg (ignore)', ['rg', '-n', pat]),
- mkcmd('ag (ignore)', ['ag', '-s', pat]),
- mkcmd('pt (ignore)', ['pt', '-e', pat]),
- mkcmd('sift (ignore)', SIFT + ['-n', '--git', pat]),
+ mkcmd('rg', ['rg', '-n', pat]),
+ mkcmd('ag', ['ag', '-s', pat]),
mkcmd(
- 'git grep (ignore)',
+ 'git grep',
['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'C'},
),
- mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
- mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', pat]),
])
def bench_linux_word(suite_dir):
'''
Benchmark use of the -w ("match word") flag in each tool.
-
- sift has a lot of trouble with this because it forces it into Go's
- regex engine by surrounding the pattern with \b assertions.
'''
require(suite_dir, 'linux')
cwd = path.join(suite_dir, LINUX_DIR)
@@ -196,28 +169,19 @@ def bench_linux_word(suite_dir):
return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[
- mkcmd('rg (ignore)', ['rg', '-n', '-w', pat]),
- mkcmd('ag (ignore)', ['ag', '-s', '-w', pat]),
- mkcmd('pt (ignore)', ['pt', '-w', pat]),
- mkcmd('sift (ignore)', SIFT + ['-n', '-w', '--git', pat]),
+ mkcmd('rg', ['rg', '-n', '-w', pat]),
+ mkcmd('ag', ['ag', '-s', '-w', pat]),
mkcmd(
- 'git grep (ignore)',
+ 'git grep',
['git', 'grep', '-E', '-I', '-n', '-w', pat],
env={'LC_ALL': 'C'},
),
- mkcmd('rg (whitelist)', [
- 'rg', '-n', '-w', '--no-ignore', '-tall', pat,
- ]),
- mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', '-w', pat]),
])
def bench_linux_unicode_greek(suite_dir):
'''
Benchmark matching of a Unicode category.
-
- Only three tools (ripgrep, sift and pt) support this. We omit
- pt because it is too slow.
'''
require(suite_dir, 'linux')
cwd = path.join(suite_dir, LINUX_DIR)
@@ -229,8 +193,6 @@ def bench_linux_unicode_greek(suite_dir):
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', pat]),
- mkcmd('pt', ['pt', '-e', pat]),
- mkcmd('sift', SIFT + ['-n', '--git', pat]),
])
@@ -250,8 +212,6 @@ def bench_linux_unicode_greek_casei(suite_dir):
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', '-i', pat]),
- mkcmd('pt', ['pt', '-i', '-e', pat]),
- mkcmd('sift', SIFT + ['-n', '-i', '--git', pat]),
])
@@ -272,26 +232,19 @@ def bench_linux_unicode_word(suite_dir):
return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[
- mkcmd('rg (ignore)', ['rg', '-n', pat]),
- mkcmd('rg (ignore) (ASCII)', ['rg', '-n', '(?-u)' + pat]),
- mkcmd('ag (ignore) (ASCII)', ['ag', '-s', pat]),
- mkcmd('pt (ignore) (ASCII)', ['pt', '-e', pat]),
- mkcmd('sift (ignore) (ASCII)', SIFT + ['-n', '--git', pat]),
+ mkcmd('rg', ['rg', '-n', pat]),
+ mkcmd('rg (ASCII)', ['rg', '-n', '(?-u)' + pat]),
+ mkcmd('ag (ASCII)', ['ag', '-s', pat]),
mkcmd(
- 'git grep (ignore)',
+ 'git grep',
['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'en_US.UTF-8'},
),
mkcmd(
- 'git grep (ignore) (ASCII)',
+ 'git grep (ASCII)',
['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'C'},
),
- mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
- mkcmd('rg (whitelist) (ASCII)', [
- 'rg', '-n', '--no-ignore', '-tall', '(?-u)' + pat,
- ]),
- mkcmd('ucg (ASCII)', ['ucg', '--nosmart-case', pat]),
])
@@ -313,26 +266,19 @@ def bench_linux_no_literal(suite_dir):
return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[
- mkcmd('rg (ignore)', ['rg', '-n', pat]),
- mkcmd('rg (ignore) (ASCII)', ['rg', '-n', '(?-u)' + pat]),
- mkcmd('ag (ignore) (ASCII)', ['ag', '-s', pat]),
- mkcmd('pt (ignore) (ASCII)', ['pt', '-e', pat]),
- mkcmd('sift (ignore) (ASCII)', SIFT + ['-n', '--git', pat]),
+ mkcmd('rg', ['rg', '-n', pat]),
+ mkcmd('rg (ASCII)', ['rg', '-n', '(?-u)' + pat]),
+ mkcmd('ag (ASCII)', ['ag', '-s', pat]),
mkcmd(
- 'git grep (ignore)',
+ 'git grep',
['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'en_US.UTF-8'},
),
mkcmd(
- 'git grep (ignore) (ASCII)',
+ 'git grep (ASCII)',
['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'C'},
),
- mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
- mkcmd('rg (whitelist) (ASCII)', [
- 'rg', '-n', '--no-ignore', '-tall', '(?-u)' + pat,
- ]),
- mkcmd('ucg (whitelist) (ASCII)', ['ucg', '--nosmart-case', pat]),
])
@@ -354,15 +300,13 @@ def bench_linux_alternates(suite_dir):
return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[
- mkcmd('rg (ignore)', ['rg', '-n', pat]),
- mkcmd('ag (ignore)', ['ag', '-s', pat]),
+ mkcmd('rg', ['rg', '-n', pat]),
+ mkcmd('ag', ['ag', '-s', pat]),
mkcmd(
- 'git grep (ignore)',
+ 'git grep',
['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'C'},
),
- mkcmd('rg (whitelist)', ['rg', '--no-ignore', '-n', pat]),
- mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', pat]),
])
@@ -377,15 +321,13 @@ def bench_linux_alternates_casei(suite_dir):
return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[
- mkcmd('rg (ignore)', ['rg', '-n', '-i', pat]),
- mkcmd('ag (ignore)', ['ag', '-i', pat]),
+ mkcmd('rg', ['rg', '-n', '-i', pat]),
+ mkcmd('ag', ['ag', '-i', pat]),
mkcmd(
- 'git grep (ignore)',
+ 'git grep',
['git', 'grep', '-E', '-I', '-n', '-i', pat],
env={'LC_ALL': 'C'},
),
- mkcmd('rg (whitelist)', ['rg', '--no-ignore', '-n', '-i', pat]),
- mkcmd('ucg (whitelist)', ['ucg', '-i', pat]),
])
@@ -400,15 +342,10 @@ def bench_subtitles_en_literal(suite_dir):
return Benchmark(pattern=pat, commands=[
Command('rg', ['rg', pat, en]),
Command('rg (no mmap)', ['rg', '--no-mmap', pat, en]),
- Command('pt', ['pt', '-N', pat, en]),
- Command('sift', ['sift', pat, en]),
- Command('grep', ['grep', '-a', pat, en], env=GREP_ASCII),
+ Command('grep', ['grep', pat, en], env=GREP_ASCII),
Command('rg (lines)', ['rg', '-n', pat, en]),
Command('ag (lines)', ['ag', '-s', pat, en]),
- Command('ucg (lines)', ['ucg', '--nosmart-case', pat, en]),
- Command('pt (lines)', ['pt', pat, en]),
- Command('sift (lines)', ['sift', '-n', pat, en]),
- Command('grep (lines)', ['grep', '-an', pat, en], env=GREP_ASCII),
+ Command('grep (lines)', ['grep', '-n', pat, en], env=GREP_ASCII),
])
@@ -428,7 +365,6 @@ def bench_subtitles_en_literal_casei(suite_dir):
], env=GREP_ASCII),
Command('rg (lines)', ['rg', '-n', '-i', pat, en]),
Command('ag (lines) (ASCII)', ['ag', '-i', pat, en]),
- Command('ucg (lines) (ASCII)', ['ucg', '-i', pat, en]),
])
@@ -445,7 +381,6 @@ def bench_subtitles_en_literal_word(suite_dir):
'rg', '-n', r'(?-u:\b)' + pat + r'(?-u:\b)', en,
]),
Command('ag (ASCII)', ['ag', '-sw', pat, en]),
- Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, en]),
Command('grep (ASCII)', [
'grep', '-anw', pat, en,
], env=GREP_ASCII),
@@ -471,7 +406,6 @@ def bench_subtitles_en_alternate(suite_dir):
return Benchmark(pattern=pat, commands=[
Command('rg (lines)', ['rg', '-n', pat, en]),
Command('ag (lines)', ['ag', '-s', pat, en]),
- Command('ucg (lines)', ['ucg', '--nosmart-case', pat, en]),
Command('grep (lines)', [
'grep', '-E', '-an', pat, en,
], env=GREP_ASCII),
@@ -498,7 +432,6 @@ def bench_subtitles_en_alternate_casei(suite_dir):
return Benchmark(pattern=pat, commands=[
Command('ag (ASCII)', ['ag', '-s', '-i', pat, en]),
- Command('ucg (ASCII)', ['ucg', '-i', pat, en]),
Command('grep (ASCII)', [
'grep', '-E', '-ani', pat, en,
], env=GREP_ASCII),
@@ -520,7 +453,6 @@ def bench_subtitles_en_surrounding_words(suite_dir):
Command('grep', ['grep', '-E', '-an', pat, en], env=GREP_UNICODE),
Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, en]),
Command('ag (ASCII)', ['ag', '-s', pat, en]),
- Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, en]),
Command('grep (ASCII)', [
'grep', '-E', '-an', pat, en,
], env=GREP_ASCII),
@@ -544,7 +476,6 @@ def bench_subtitles_en_no_literal(suite_dir):
Command('rg', ['rg', '-n', pat, en]),
Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, en]),
Command('ag (ASCII)', ['ag', '-s', pat, en]),
- Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, en]),
Command('grep (ASCII)', [
'grep', '-E', '-an', pat, en,
], env=GREP_ASCII),
@@ -562,14 +493,9 @@ def bench_subtitles_ru_literal(suite_dir):
return Benchmark(pattern=pat, commands=[
Command('rg', ['rg', pat, ru]),
Command('rg (no mmap)', ['rg', '--no-mmap', pat, ru]),
- Command('pt', ['pt', '-N', pat, ru]),
- Command('sift', ['sift', pat, ru]),
Command('grep', ['grep', '-a', pat, ru], env=GREP_ASCII),
Command('rg (lines)', ['rg', '-n', pat, ru]),
Command('ag (lines)', ['ag', '-s', pat, ru]),
- Command('ucg (lines)', ['ucg', '--nosmart-case', pat, ru]),
- Command('pt (lines)', ['pt', pat, ru]),
- Command('sift (lines)', ['sift', '-n', pat, ru]),
Command('grep (lines)', ['grep', '-an', pat, ru], env=GREP_ASCII),
])
@@ -590,7 +516,6 @@ def bench_subtitles_ru_literal_casei(suite_dir):
], env=GREP_ASCII),
Command('rg (lines)', ['rg', '-n', '-i', pat, ru]),
Command('ag (lines) (ASCII)', ['ag', '-i', pat, ru]),
- Command('ucg (lines) (ASCII)', ['ucg', '-i', pat, ru]),
])
@@ -607,7 +532,6 @@ def bench_subtitles_ru_literal_word(suite_dir):
'rg', '-n', r'(?-u:\b)' + pat + r'(?-u:\b)', ru,
]),
Command('ag (ASCII)', ['ag', '-sw', pat, ru]),
- Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, ru]),
Command('grep (ASCII)', [
'grep', '-anw', pat, ru,
], env=GREP_ASCII),
@@ -633,7 +557,6 @@ def bench_subtitles_ru_alternate(suite_dir):
return Benchmark(pattern=pat, commands=[
Command('rg (lines)', ['rg', '-n', pat, ru]),
Command('ag (lines)', ['ag', '-s', pat, ru]),
- Command('ucg (lines)', ['ucg', '--nosmart-case', pat, ru]),
Command('grep (lines)', [
'grep', '-E', '-an', pat, ru,
], env=GREP_ASCII),
@@ -660,7 +583,6 @@ def bench_subtitles_ru_alternate_casei(suite_dir):
return Benchmark(pattern=pat, commands=[
Command('ag (ASCII)', ['ag', '-s', '-i', pat, ru]),
- Command('ucg (ASCII)', ['ucg', '-i', pat, ru]),
Command('grep (ASCII)', [
'grep', '-E', '-ani', pat, ru,
], env=GREP_ASCII),
@@ -681,7 +603,6 @@ def bench_subtitles_ru_surrounding_words(suite_dir):
Command('rg', ['rg', '-n', pat, ru]),
Command('grep', ['grep', '-E', '-an', pat, ru], env=GREP_UNICODE),
Command('ag (ASCII)', ['ag', '-s', pat, ru]),
- Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, ru]),
Command('grep (ASCII)', [
'grep', '-E', '-an', pat, ru,
], env=GREP_ASCII),
@@ -705,7 +626,6 @@ def bench_subtitles_ru_no_literal(suite_dir):
Command('rg', ['rg', '-n', pat, ru]),
Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, ru]),
Command('ag (ASCII)', ['ag', '-s', pat, ru]),
- Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, ru]),
Command('grep (ASCII)', [
'grep', '-E', '-an', pat, ru,
], env=GREP_ASCII),
@@ -758,7 +678,7 @@ class Benchmark(object):
def __init__(self, name=None, pattern=None, commands=None,
warmup_count=1, count=3, line_count=True,
allow_missing_commands=False,
- disabled_cmds=None):
+ disabled_cmds=None, order=0):
'''
Create a single benchmark.
@@ -794,6 +714,8 @@ class Benchmark(object):
will simply skip it.
:param list(str) disabled_cmds:
A list of commands to skip.
+ :param int order:
+ An integer indicating the sequence number of this benchmark.
'''
self.name = name
self.pattern = pattern
@@ -803,6 +725,7 @@ class Benchmark(object):
self.line_count = line_count
self.allow_missing_commands = allow_missing_commands
self.disabled_cmds = set(disabled_cmds or [])
+ self.order = order
def raise_if_missing(self):
'''
@@ -1165,19 +1088,22 @@ def collect_benchmarks(suite_dir, filter_pat=None,
requires corpora that are missing, then a log message is
emitted to stderr and it is not yielded.
'''
- for fun in sorted(globals()):
- if not fun.startswith('bench_'):
+ benchmarks = []
+ for global_name in globals():
+ if not global_name.startswith('bench_'):
continue
- name = re.sub('^bench_', '', fun)
+ name = re.sub('^bench_', '', global_name)
if filter_pat is not None and not re.search(filter_pat, name):
continue
try:
- benchmark = globals()[fun](suite_dir)
+ fun = globals()[global_name]
+ benchmark = fun(suite_dir)
benchmark.name = name
benchmark.warmup_count = warmup_iter
benchmark.count = bench_iter
benchmark.allow_missing_commands = allow_missing_commands
benchmark.disabled_cmds = disabled_cmds
+ benchmark.order = fun.__code__.co_firstlineno
benchmark.raise_if_missing()
except MissingDependencies as e:
eprint(
@@ -1192,7 +1118,8 @@ def collect_benchmarks(suite_dir, filter_pat=None,
'(run with --allow-missing to run incomplete benchmarks)'
eprint(fmt % (', '.join(e.missing_names), name))
continue
- yield benchmark
+ benchmarks.append(benchmark)
+ return sorted(benchmarks, key=lambda b: b.order)
def main():