summaryrefslogtreecommitdiffstats
path: root/benchsuite/benchsuite
diff options
context:
space:
mode:
Diffstat (limited to 'benchsuite/benchsuite')
-rwxr-xr-xbenchsuite/benchsuite29
1 files changed, 20 insertions, 9 deletions
diff --git a/benchsuite/benchsuite b/benchsuite/benchsuite
index c6a87220..8ab233a8 100755
--- a/benchsuite/benchsuite
+++ b/benchsuite/benchsuite
@@ -544,7 +544,11 @@ def bench_subtitles_ru_literal(suite_dir):
Command('rg (lines)', ['rg', '-n', pat, ru]),
Command('ag (lines)', ['ag', '-s', pat, ru]),
Command('grep (lines)', ['grep', '-n', pat, ru], env=GREP_ASCII),
- Command('ugrep (lines)', ['ugrep', '-n', pat, ru])
+ # ugrep incorrectly identifies this corpus as binary, but it is
+ # entirely valid UTF-8. So we tell ugrep to always treat the corpus
+ # as text even though this technically gives it an edge over other
+ # tools. (It no longer needs to check for binary data.)
+ Command('ugrep (lines)', ['ugrep', '-a', '-n', pat, ru])
])
@@ -562,7 +566,8 @@ def bench_subtitles_ru_literal_casei(suite_dir):
Command('grep (ASCII)', ['grep', '-E', '-i', pat, ru], env=GREP_ASCII),
Command('rg (lines)', ['rg', '-n', '-i', pat, ru]),
Command('ag (lines) (ASCII)', ['ag', '-i', pat, ru]),
- Command('ugrep (lines) (ASCII)', ['ugrep', '-n', '-i', pat, ru])
+ # See bench_subtitles_ru_literal for why we use '-a' here.
+ Command('ugrep (lines) (ASCII)', ['ugrep', '-a', '-n', '-i', pat, ru])
])
@@ -586,7 +591,8 @@ def bench_subtitles_ru_literal_word(suite_dir):
Command('grep (ASCII)', [
'grep', '-nw', pat, ru,
], env=GREP_ASCII),
- Command('ugrep (ASCII)', ['ugrep', '-nw', pat, ru]),
+ # See bench_subtitles_ru_literal for why we use '-a' here.
+ Command('ugrep (ASCII)', ['ugrep', '-anw', pat, ru]),
Command('rg', ['rg', '-nw', pat, ru]),
Command('grep', ['grep', '-nw', pat, ru], env=GREP_UNICODE),
])
@@ -610,7 +616,8 @@ def bench_subtitles_ru_alternate(suite_dir):
Command('rg (lines)', ['rg', '-n', pat, ru]),
Command('ag (lines)', ['ag', '-s', pat, ru]),
Command('grep (lines)', ['grep', '-E', '-n', pat, ru], env=GREP_ASCII),
- Command('ugrep (lines)', ['ugrep', '-n', pat, ru]),
+ # See bench_subtitles_ru_literal for why we use '-a' here.
+ Command('ugrep (lines)', ['ugrep', '-an', pat, ru]),
Command('rg', ['rg', pat, ru]),
Command('grep', ['grep', '-E', pat, ru], env=GREP_ASCII),
])
@@ -635,7 +642,8 @@ def bench_subtitles_ru_alternate_casei(suite_dir):
Command('grep (ASCII)', [
'grep', '-E', '-ni', pat, ru,
], env=GREP_ASCII),
- Command('ugrep (ASCII)', ['ugrep', '-n', '-i', pat, ru]),
+ # See bench_subtitles_ru_literal for why we use '-a' here.
+ Command('ugrep (ASCII)', ['ugrep', '-ani', pat, ru]),
Command('rg', ['rg', '-n', '-i', pat, ru]),
Command('grep', ['grep', '-E', '-ni', pat, ru], env=GREP_UNICODE),
])
@@ -652,10 +660,11 @@ def bench_subtitles_ru_surrounding_words(suite_dir):
return Benchmark(pattern=pat, commands=[
Command('rg', ['rg', '-n', pat, ru]),
Command('grep', ['grep', '-E', '-n', pat, ru], env=GREP_UNICODE),
- Command('ugrep', ['ugrep', '-n', pat, ru]),
+ Command('ugrep', ['ugrep', '-an', pat, ru]),
Command('ag (ASCII)', ['ag', '-s', pat, ru]),
Command('grep (ASCII)', ['grep', '-E', '-n', pat, ru], env=GREP_ASCII),
- Command('ugrep (ASCII)', ['ugrep', '-n', '-U', pat, ru]),
+ # See bench_subtitles_ru_literal for why we use '-a' here.
+ Command('ugrep (ASCII)', ['ugrep', '-a', '-n', '-U', pat, ru]),
])
@@ -674,11 +683,13 @@ def bench_subtitles_ru_no_literal(suite_dir):
return Benchmark(pattern=pat, commands=[
Command('rg', ['rg', '-n', pat, ru]),
- Command('ugrep', ['ugrep', '-n', pat, ru]),
+ # See bench_subtitles_ru_literal for why we use '-a' here.
+ Command('ugrep', ['ugrep', '-an', pat, ru]),
Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, ru]),
Command('ag (ASCII)', ['ag', '-s', pat, ru]),
Command('grep (ASCII)', ['grep', '-E', '-n', pat, ru], env=GREP_ASCII),
- Command('ugrep (ASCII)', ['ugrep', '-n', '-U', pat, ru])
+ # See bench_subtitles_ru_literal for why we use '-a' here.
+ Command('ugrep (ASCII)', ['ugrep', '-anU', pat, ru])
])