summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2020-10-14 17:00:35 -0400
committerAndrew Gallant <jamslam@gmail.com>2020-10-14 17:00:35 -0400
commitde0c24f31c6a1218c1ff78ae8a6920650e226949 (patch)
tree9549bf0af3705dd7e23912235ecec3abf72efc30
parentc55e7af675f392f2829c69bb50a9334d1f7d967a (diff)
benchsuite: add ugrep commands to benchmarks
-rwxr-xr-xbenchsuite/benchsuite82
1 files changed, 78 insertions, 4 deletions
diff --git a/benchsuite/benchsuite b/benchsuite/benchsuite
index 5a67503d..a70eb540 100755
--- a/benchsuite/benchsuite
+++ b/benchsuite/benchsuite
@@ -57,8 +57,10 @@ def bench_linux_literal_default(suite_dir):
Benchmark the speed of a literal using *default* settings.
This is a purposefully unfair benchmark for use in performance
- analysis, but it is pedagogically useful to demonstrate how
- default behaviors differ.
+ analysis, but it is pedagogically useful to demonstrate how default
+ behaviors differ. For example, ugrep and grep don't do any smart
+ filtering by default, so they will invariably search more files
+ than ripgrep, ag or git grep.
'''
require(suite_dir, 'linux')
cwd = path.join(suite_dir, LINUX_DIR)
@@ -73,7 +75,9 @@ def bench_linux_literal_default(suite_dir):
mkcmd('ag', ['ag', pat]),
# I guess setting LC_ALL=en_US.UTF-8 probably isn't necessarily the
# default, but I'd guess it to be on most desktop systems.
- mkcmd('git grep', ['git', 'grep', pat], env={'LC_ALL': 'en_US.UTF-8'}),
+ mkcmd('git grep', ['git', 'grep', pat], env=GREP_UNICODE),
+ mkcmd('ugrep', ['ugrep', '-r', pat, './']),
+ mkcmd('grep', ['grep', '-r', pat, './'], env=GREP_UNICODE),
])
@@ -101,6 +105,10 @@ def bench_linux_literal(suite_dir):
mkcmd('git grep', [
'git', 'grep', '-I', '-n', pat,
], env={'LC_ALL': 'C'}),
+ mkcmd('ugrep', [
+ 'ugrep', '-r', '--ignore-files', '--no-hidden', '-I',
+ '-n', pat, './',
+ ])
])
@@ -130,6 +138,10 @@ def bench_linux_literal_casei(suite_dir):
mkcmd('git grep', [
'git', 'grep', '-I', '-n', '-i', pat,
], env={'LC_ALL': 'C'}),
+ mkcmd('ugrep', [
+ 'ugrep', '-r', '--ignore-files', '--no-hidden', '-I',
+ '-n', '-i', pat, './',
+ ])
])
@@ -153,6 +165,10 @@ def bench_linux_re_literal_suffix(suite_dir):
['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'C'},
),
+ mkcmd('ugrep', [
+ 'ugrep', '-r', '--ignore-files', '--no-hidden', '-I',
+ '-n', pat, './',
+ ])
])
@@ -176,6 +192,10 @@ def bench_linux_word(suite_dir):
['git', 'grep', '-E', '-I', '-n', '-w', pat],
env={'LC_ALL': 'C'},
),
+ mkcmd('ugrep', [
+ 'ugrep', '-r', '--ignore-files', '--no-hidden', '-I',
+ '-n', '-w', pat, './',
+ ])
])
@@ -193,6 +213,10 @@ def bench_linux_unicode_greek(suite_dir):
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', pat]),
+ mkcmd('ugrep', [
+ 'ugrep', '-r', '--ignore-files', '--no-hidden', '-I',
+ '-n', pat, './',
+ ])
])
@@ -212,6 +236,10 @@ def bench_linux_unicode_greek_casei(suite_dir):
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', '-i', pat]),
+ mkcmd('ugrep', [
+ 'ugrep', '-r', '--ignore-files', '--no-hidden', '-I',
+ '-n', '-i', pat, './',
+ ])
])
@@ -245,6 +273,14 @@ def bench_linux_unicode_word(suite_dir):
['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'C'},
),
+ mkcmd('ugrep', [
+ 'ugrep', '-r', '--ignore-files', '--no-hidden', '-I',
+ '-n', pat, './',
+ ]),
+ mkcmd('ugrep (ASCII)', [
+ 'ugrep', '-r', '--ignore-files', '--no-hidden', '-I',
+ '-n', '-U', pat, './',
+ ]),
])
@@ -279,6 +315,14 @@ def bench_linux_no_literal(suite_dir):
['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'C'},
),
+ mkcmd('ugrep', [
+ 'ugrep', '-r', '--ignore-files', '--no-hidden', '-I',
+ '-n', pat, './',
+ ]),
+ mkcmd('ugrep (ASCII)', [
+ 'ugrep', '-r', '--ignore-files', '--no-hidden', '-I',
+ '-n', '-U', pat, './',
+ ]),
])
@@ -307,6 +351,10 @@ def bench_linux_alternates(suite_dir):
['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'C'},
),
+ mkcmd('ugrep', [
+ 'ugrep', '-r', '--ignore-files', '--no-hidden', '-I',
+ '-n', pat, './',
+ ])
])
@@ -328,6 +376,10 @@ def bench_linux_alternates_casei(suite_dir):
['git', 'grep', '-E', '-I', '-n', '-i', pat],
env={'LC_ALL': 'C'},
),
+ mkcmd('ugrep', [
+ 'ugrep', '-r', '--ignore-files', '--no-hidden', '-I',
+ '-n', '-i', pat, './',
+ ])
])
@@ -346,6 +398,7 @@ def bench_subtitles_en_literal(suite_dir):
Command('rg (lines)', ['rg', '-n', pat, en]),
Command('ag (lines)', ['ag', '-s', pat, en]),
Command('grep (lines)', ['grep', '-n', pat, en], env=GREP_ASCII),
+ Command('ugrep (lines)', ['ugrep', '-n', pat, en])
])
@@ -363,6 +416,7 @@ def bench_subtitles_en_literal_casei(suite_dir):
Command('grep (ASCII)', ['grep', '-E', '-i', pat, en], env=GREP_ASCII),
Command('rg (lines)', ['rg', '-n', '-i', pat, en]),
Command('ag (lines) (ASCII)', ['ag', '-i', pat, en]),
+ Command('ugrep (lines)', ['ugrep', '-n', '-i', pat, en])
])
@@ -380,6 +434,7 @@ def bench_subtitles_en_literal_word(suite_dir):
]),
Command('ag (ASCII)', ['ag', '-sw', pat, en]),
Command('grep (ASCII)', ['grep', '-nw', pat, en], env=GREP_ASCII),
+ Command('ugrep (ASCII)', ['ugrep', '-nw', pat, en]),
Command('rg', ['rg', '-nw', pat, en]),
Command('grep', ['grep', '-nw', pat, en], env=GREP_UNICODE),
])
@@ -403,6 +458,7 @@ def bench_subtitles_en_alternate(suite_dir):
Command('rg (lines)', ['rg', '-n', pat, en]),
Command('ag (lines)', ['ag', '-s', pat, en]),
Command('grep (lines)', ['grep', '-E', '-n', pat, en], env=GREP_ASCII),
+ Command('ugrep (lines)', ['ugrep', '-n', pat, en]),
Command('rg', ['rg', pat, en]),
Command('grep', ['grep', '-E', pat, en], env=GREP_ASCII),
])
@@ -427,6 +483,7 @@ def bench_subtitles_en_alternate_casei(suite_dir):
Command('grep (ASCII)', [
'grep', '-E', '-ni', pat, en,
], env=GREP_ASCII),
+ Command('ugrep (ASCII)', ['ugrep', '-n', '-i', pat, en]),
Command('rg', ['rg', '-n', '-i', pat, en]),
Command('grep', ['grep', '-E', '-ni', pat, en], env=GREP_UNICODE),
])
@@ -443,9 +500,11 @@ def bench_subtitles_en_surrounding_words(suite_dir):
return Benchmark(pattern=pat, commands=[
Command('rg', ['rg', '-n', pat, en]),
Command('grep', ['grep', '-E', '-n', pat, en], env=GREP_UNICODE),
+ Command('ugrep', ['ugrep', '-n', pat, en]),
Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, en]),
Command('ag (ASCII)', ['ag', '-s', pat, en]),
Command('grep (ASCII)', ['grep', '-E', '-n', pat, en], env=GREP_ASCII),
+ Command('ugrep (ASCII)', ['ugrep', '-n', '-U', pat, en])
])
@@ -464,9 +523,11 @@ def bench_subtitles_en_no_literal(suite_dir):
return Benchmark(pattern=pat, commands=[
Command('rg', ['rg', '-n', pat, en]),
+ Command('ugrep', ['ugrep', '-n', pat, en]),
Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, en]),
Command('ag (ASCII)', ['ag', '-s', pat, en]),
Command('grep (ASCII)', ['grep', '-E', '-n', pat, en], env=GREP_ASCII),
+ Command('ugrep (ASCII)', ['ugrep', '-n', '-U', pat, en])
])
@@ -485,6 +546,7 @@ def bench_subtitles_ru_literal(suite_dir):
Command('rg (lines)', ['rg', '-n', pat, ru]),
Command('ag (lines)', ['ag', '-s', pat, ru]),
Command('grep (lines)', ['grep', '-n', pat, ru], env=GREP_ASCII),
+ Command('ugrep (lines)', ['ugrep', '-n', pat, ru])
])
@@ -502,6 +564,7 @@ def bench_subtitles_ru_literal_casei(suite_dir):
Command('grep (ASCII)', ['grep', '-E', '-i', pat, ru], env=GREP_ASCII),
Command('rg (lines)', ['rg', '-n', '-i', pat, ru]),
Command('ag (lines) (ASCII)', ['ag', '-i', pat, ru]),
+ Command('ugrep (lines) (ASCII)', ['ugrep', '-n', '-i', pat, ru])
])
@@ -515,12 +578,17 @@ def bench_subtitles_ru_literal_word(suite_dir):
return Benchmark(pattern=pat, commands=[
Command('rg (ASCII)', [
- 'rg', '-n', r'(?-u:\b)' + pat + r'(?-u:\b)', ru,
+ # You might think we'd use \b here for word boundaries, but both
+ # GNU grep and ripgrep implement -w with the formulation below.
+ # Since we can't use Unicode in a pattern and disable Unicode word
+ # boundaries, we just hand-jam this ourselves.
+ 'rg', '-n', r'(?-u:^|\W)' + pat + r'(?-u:$|\W)', ru,
]),
Command('ag (ASCII)', ['ag', '-sw', pat, ru]),
Command('grep (ASCII)', [
'grep', '-nw', pat, ru,
], env=GREP_ASCII),
+ Command('ugrep (ASCII)', ['ugrep', '-nw', pat, ru]),
Command('rg', ['rg', '-nw', pat, ru]),
Command('grep', ['grep', '-nw', pat, ru], env=GREP_UNICODE),
])
@@ -544,6 +612,7 @@ def bench_subtitles_ru_alternate(suite_dir):
Command('rg (lines)', ['rg', '-n', pat, ru]),
Command('ag (lines)', ['ag', '-s', pat, ru]),
Command('grep (lines)', ['grep', '-E', '-n', pat, ru], env=GREP_ASCII),
+ Command('ugrep (lines)', ['ugrep', '-n', pat, ru]),
Command('rg', ['rg', pat, ru]),
Command('grep', ['grep', '-E', pat, ru], env=GREP_ASCII),
])
@@ -568,6 +637,7 @@ def bench_subtitles_ru_alternate_casei(suite_dir):
Command('grep (ASCII)', [
'grep', '-E', '-ni', pat, ru,
], env=GREP_ASCII),
+ Command('ugrep (ASCII)', ['ugrep', '-n', '-i', pat, ru]),
Command('rg', ['rg', '-n', '-i', pat, ru]),
Command('grep', ['grep', '-E', '-ni', pat, ru], env=GREP_UNICODE),
])
@@ -584,8 +654,10 @@ def bench_subtitles_ru_surrounding_words(suite_dir):
return Benchmark(pattern=pat, commands=[
Command('rg', ['rg', '-n', pat, ru]),
Command('grep', ['grep', '-E', '-n', pat, ru], env=GREP_UNICODE),
+ Command('ugrep', ['ugrep', '-n', pat, ru]),
Command('ag (ASCII)', ['ag', '-s', pat, ru]),
Command('grep (ASCII)', ['grep', '-E', '-n', pat, ru], env=GREP_ASCII),
+ Command('ugrep (ASCII)', ['ugrep', '-n', '-U', pat, ru]),
])
@@ -604,9 +676,11 @@ def bench_subtitles_ru_no_literal(suite_dir):
return Benchmark(pattern=pat, commands=[
Command('rg', ['rg', '-n', pat, ru]),
+ Command('ugrep', ['ugrep', '-n', pat, ru]),
Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, ru]),
Command('ag (ASCII)', ['ag', '-s', pat, ru]),
Command('grep (ASCII)', ['grep', '-E', '-n', pat, ru], env=GREP_ASCII),
+ Command('ugrep (ASCII)', ['ugrep', '-n', '-U', pat, ru])
])