From 7fbf2f014ce27a1194dcb8bf53ff51d6517a5a98 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Fri, 16 Sep 2016 18:22:35 -0400 Subject: Reorganize some files. --- Makefile | 14 - benches/README.md | 5 + benchsuite | 1083 ----------------------------------------- benchsuite.raw.csv | 343 ------------- benchsuite.summary | 164 ------- benchsuite/benchsuite | 1083 +++++++++++++++++++++++++++++++++++++++++ benchsuite/benchsuite.raw.csv | 343 +++++++++++++ benchsuite/benchsuite.summary | 164 +++++++ ctags.rust | 11 - session.vim | 1 - 10 files changed, 1595 insertions(+), 1616 deletions(-) delete mode 100644 Makefile create mode 100644 benches/README.md delete mode 100755 benchsuite delete mode 100644 benchsuite.raw.csv delete mode 100644 benchsuite.summary create mode 100755 benchsuite/benchsuite create mode 100644 benchsuite/benchsuite.raw.csv create mode 100644 benchsuite/benchsuite.summary delete mode 100644 ctags.rust delete mode 100644 session.vim diff --git a/Makefile b/Makefile deleted file mode 100644 index 290ac68a..00000000 --- a/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -all: - echo Nothing to do... - -ctags: - ctags --options=ctags.rust --languages=Rust src/*.rs src/*/*.rs - -docs: - cargo doc - in-dir ./target/doc fix-perms - rscp ./target/doc/* gopher:~/www/burntsushi.net/rustdoc/ - -push: - git push origin master - git push github master diff --git a/benches/README.md b/benches/README.md new file mode 100644 index 00000000..18cf9131 --- /dev/null +++ b/benches/README.md @@ -0,0 +1,5 @@ +These are internal microbenchmarks for tracking the peformance of individual +components inside of ripgrep. At the moment, they aren't heavily used. + +For performance benchmarks of ripgrep proper, see the sibling `benchsuite` +directory. diff --git a/benchsuite b/benchsuite deleted file mode 100755 index 82bb31df..00000000 --- a/benchsuite +++ /dev/null @@ -1,1083 +0,0 @@ -#!/usr/bin/env python - -''' -benchsuite is a benchmark runner for comparing command line search tools. -''' - -import argparse -import csv -import os -import os.path as path -from multiprocessing import cpu_count -import re -import statistics -import subprocess -import sys -import time - -# Some constants for identifying the corpora we use to run tests. -# We establish two very different kinds of corpora: a small number of large -# files and a large number of small files. These are vastly different use cases -# not only because of their performance characteristics, but also the -# strategies used to increase the relevance of results returned. - -SUBTITLES_DIR = 'subtitles' -SUBTITLES_EN_NAME = 'OpenSubtitles2016.raw.en' -SUBTITLES_EN_NAME_SAMPLE = 'OpenSubtitles2016.raw.sample.en' -SUBTITLES_EN_NAME_GZ = '%s.gz' % SUBTITLES_EN_NAME -SUBTITLES_EN_URL = 'http://opus.lingfil.uu.se/OpenSubtitles2016/mono/OpenSubtitles2016.raw.en.gz' -SUBTITLES_RU_NAME = 'OpenSubtitles2016.raw.ru' -SUBTITLES_RU_NAME_GZ = '%s.gz' % SUBTITLES_RU_NAME -SUBTITLES_RU_URL = 'http://opus.lingfil.uu.se/OpenSubtitles2016/mono/OpenSubtitles2016.raw.ru.gz' - -LINUX_DIR = 'linux' -LINUX_CLONE = 'git://github.com/BurntSushi/linux' - -# Grep takes locale settings from the environment. There is a *substantial* -# performance impact for enabling Unicode, so we need to handle this explicitly -# in our benchmarks. -GREP_ASCII = {'LC_ALL': 'C'} -GREP_UNICODE = {'LC_ALL': 'en_US.UTF-8'} - - -def bench_linux_literal_default(suite_dir): - ''' - Benchmark the speed of a literal using *default* settings. - - This is a purposefully unfair benchmark for use in performance - analysis, but it is pedagogically useful. - ''' - require(suite_dir, 'linux') - cwd = path.join(suite_dir, LINUX_DIR) - pat = 'PM_RESUME' - - def mkcmd(*args, **kwargs): - kwargs['cwd'] = cwd - return Command(*args, **kwargs) - - # N.B. This is a purposefully unfair benchmark for illustrative purposes - # of how the default modes for each search tool differ. - return Benchmark(pattern=pat, commands=[ - mkcmd('rg', ['rg', pat]), - mkcmd('ag', ['ag', pat]), - # ucg reports the exact same matches as ag and rg even though it - # doesn't read gitignore files. Instead, it has a file whitelist - # that happens to match up exactly with the gitignores for this search. - mkcmd('ucg', ['ucg', pat]), - # I guess setting LC_ALL=en_US.UTF-8 probably isn't necessarily the - # default, but I'd guess it to be on most desktop systems. - mkcmd('git grep', ['git', 'grep', pat], env={'LC_ALL': 'en_US.UTF-8'}), - mkcmd('pt', ['pt', pat]), - # sift reports an extra line here for a binary file matched. - mkcmd('sift', ['sift', pat]), - ]) - - -def bench_linux_literal(suite_dir): - ''' - Benchmark the speed of a literal, attempting to be fair. - - This tries to use the minimum set of options available in all tools - to test how fast they are. For example, it makes sure there is no - case insensitive matching and that line numbers are computed. - ''' - require(suite_dir, 'linux') - cwd = path.join(suite_dir, LINUX_DIR) - pat = 'PM_RESUME' - - def mkcmd(*args, **kwargs): - kwargs['cwd'] = cwd - return Command(*args, **kwargs) - - return Benchmark(pattern=pat, commands=[ - mkcmd('rg', ['rg', '-n', pat]), - mkcmd('rg (mmap)', ['rg', '-n', '--mmap', pat]), - mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]), - mkcmd('ag (mmap)', ['ag', '-s', pat]), - mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', pat]), - mkcmd('git grep', [ - 'git', 'grep', '-I', '-n', pat, - ], env={'LC_ALL': 'C'}), - mkcmd('pt', ['pt', pat]), - mkcmd('sift', [ - 'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat, - ]), - ]) - - -def bench_linux_literal_casei(suite_dir): - ''' - Benchmark the speed of a case insensitive literal search. - - This is like the linux_literal benchmark, except we ask the - search tools to do case insensitive search. - ''' - require(suite_dir, 'linux') - cwd = path.join(suite_dir, LINUX_DIR) - pat = 'PM_RESUME' - - def mkcmd(*args, **kwargs): - kwargs['cwd'] = cwd - return Command(*args, **kwargs) - - return Benchmark(pattern=pat, commands=[ - mkcmd('rg', ['rg', '-n', '-i', pat]), - mkcmd('rg (mmap)', ['rg', '-n', '-i', pat]), - mkcmd('rg (whitelist)', [ - 'rg', '-n', '-i', '--no-ignore', '-tall', pat, - ]), - mkcmd('ag (mmap)', ['ag', '-i', pat]), - mkcmd('ucg', ['ucg', '-i', pat]), - # It'd technically be more appropriate to set LC_ALL=en_US.UTF-8 here, - # since that is certainly what ripgrep is doing, but this is for an - # ASCII literal, so we should give `git grep` all the opportunity to - # do its best. - mkcmd('git grep', [ - 'git', 'grep', '-I', '-n', '-i', pat, - ], env={'LC_ALL': 'C'}), - # sift yields more matches than it should here. Specifically, it gets - # matches in Module.symvers and System.map in the repo root. Both of - # those files show up in the repo root's .gitignore file. - mkcmd('sift', [ - 'sift', '-n', '--binary-skip', '--exclude-files', '.*', '-i', pat, - ]), - ]) - - -def bench_linux_re_literal_suffix(suite_dir): - ''' - Benchmark the speed of a literal inside a regex. - - This, for example, inhibits a prefix byte optimization used - inside of Go's regex engine (relevant for sift and pt). - ''' - require(suite_dir, 'linux') - cwd = path.join(suite_dir, LINUX_DIR) - pat = '[A-Z]+_RESUME' - - def mkcmd(*args, **kwargs): - kwargs['cwd'] = cwd - return Command(*args, **kwargs) - - return Benchmark(pattern=pat, commands=[ - mkcmd('rg', ['rg', '-n', pat]), - mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]), - mkcmd('rg-novcs-mmap', ['rg', '--mmap', '--no-ignore', '-n', pat]), - mkcmd('ag', ['ag', '-s', pat]), - mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', pat]), - mkcmd('ucg', ['ucg', '--nosmart-case', pat]), - mkcmd( - 'git grep', - ['git', 'grep', '-E', '-I', '-n', pat], - env={'LC_ALL': 'C'}, - ), - mkcmd('sift', [ - 'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat, - ]), - ]) - - -def bench_linux_word(suite_dir): - ''' - Benchmark use of the -w ("match word") flag in each tool. - - sift has a lot of trouble with this because it forces it into Go's - regex engine by surrounding the pattern with \b assertions. - ''' - require(suite_dir, 'linux') - cwd = path.join(suite_dir, LINUX_DIR) - pat = 'PM_RESUME' - - def mkcmd(*args, **kwargs): - kwargs['cwd'] = cwd - return Command(*args, **kwargs) - - return Benchmark(pattern=pat, commands=[ - mkcmd('rg', ['rg', '-n', '-w', pat]), - mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-w', pat]), - mkcmd('rg-novcs-mmap', [ - 'rg', '--mmap', '--no-ignore', '-n', '-w', pat, - ]), - mkcmd('ag', ['ag', '-s', '-w', pat]), - mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', '-w', pat]), - mkcmd('ucg', ['ucg', '--nosmart-case', '-w', pat]), - mkcmd( - 'git grep', - ['git', 'grep', '-E', '-I', '-n', '-w', pat], - env={'LC_ALL': 'C'}, - ), - mkcmd('sift', [ - 'sift', '-n', '--binary-skip', '--exclude-files', '.*', '-w', pat, - ]), - ]) - - -def bench_linux_unicode_greek(suite_dir): - ''' - Benchmark matching of a Unicode category. - - Only three tools (ripgrep, sift and pt) support this. - ''' - require(suite_dir, 'linux') - cwd = path.join(suite_dir, LINUX_DIR) - pat = r'\p{Greek}' - - def mkcmd(*args, **kwargs): - kwargs['cwd'] = cwd - return Command(*args, **kwargs) - - return Benchmark(pattern=pat, commands=[ - mkcmd('rg', ['rg', '-n', pat]), - # sift tries to search a bunch of PDF files and clutters up the - # results, even though --binary-skip is provided. They are excluded - # here explicitly, but don't have a measurable impact on performance. - mkcmd('sift', [ - 'sift', '-n', '--binary-skip', - '--exclude-files', '.*', - '--exclude-files', '*.pdf', - pat, - ]), - ]) - - -def bench_linux_unicode_greek_casei(suite_dir): - ''' - Benchmark matching of a Unicode category, case insensitively. - - Only ripgrep gets this right (and it's still fast). - ''' - require(suite_dir, 'linux') - cwd = path.join(suite_dir, LINUX_DIR) - pat = r'\p{Greek}' - - def mkcmd(*args, **kwargs): - kwargs['cwd'] = cwd - return Command(*args, **kwargs) - - return Benchmark(pattern=pat, commands=[ - mkcmd('rg', ['rg', '-n', '-i', pat]), - # sift tries to search a bunch of PDF files and clutters up the - # results, even though --binary-skip is provided. They are excluded - # here explicitly, but don't have a measurable impact on performance. - mkcmd('sift', [ - 'sift', '-n', '--binary-skip', - '--exclude-files', '.*', - '--exclude-files', '*.pdf', - pat, - ]), - ]) - - -def bench_linux_unicode_word(suite_dir): - ''' - Benchmark Unicode aware \w character class. - - Only ripgrep and git-grep (with LC_ALL=en_US.UTF-8) actually get - this right. Everything else uses the standard ASCII interpretation - of \w. - ''' - require(suite_dir, 'linux') - cwd = path.join(suite_dir, LINUX_DIR) - pat = r'\wAh' - - def mkcmd(*args, **kwargs): - kwargs['cwd'] = cwd - return Command(*args, **kwargs) - - return Benchmark(pattern=pat, commands=[ - mkcmd('rg', ['rg', '-n', pat]), - mkcmd('rg (no Unicode)', ['rg', '-n', '(?-u)' + pat]), - mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]), - mkcmd('rg-novcs-mmap', [ - 'rg', '--mmap', '--no-ignore', '-n', pat, - ]), - mkcmd('ag (no Unicode)', ['ag', '-s', pat]), - mkcmd('ag-novcs (no Unicode)', [ - 'ag', '--skip-vcs-ignores', '-s', pat, - ]), - mkcmd('ucg (no Unicode)', ['ucg', '--nosmart-case', pat]), - mkcmd( - 'git grep', - ['git', 'grep', '-E', '-I', '-n', pat], - env={'LC_ALL': 'en_US.UTF-8'}, - ), - mkcmd( - 'git grep (no Unicode)', - ['git', 'grep', '-E', '-I', '-n', pat], - env={'LC_ALL': 'C'}, - ), - mkcmd('sift (no Unicode)', [ - 'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat, - ]), - ]) - - -def bench_linux_no_literal(suite_dir): - ''' - Benchmark a regex that defeats all literal optimizations. - - Most search patterns have some kind of literal in them, which - typically permits searches to take some shortcuts. Therefore, the - applicability of this benchmark is somewhat suspicious, but the - suite wouldn't feel complete without it. - ''' - require(suite_dir, 'linux') - cwd = path.join(suite_dir, LINUX_DIR) - pat = r'\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}' - - def mkcmd(*args, **kwargs): - kwargs['cwd'] = cwd - return Command(*args, **kwargs) - - return Benchmark(pattern=pat, commands=[ - mkcmd('rg', ['rg', '-n', pat]), - mkcmd('rg-whitelist', ['rg', '-tall', '--no-ignore', '-n', pat]), - mkcmd('rg (no Unicode)', ['rg', '-n', '(?-u)' + pat]), - mkcmd('rg-whitelist (no Unicode)', [ - 'rg', '-tall', '--no-ignore', '-n', '(?-u)' + pat, - ]), - mkcmd('ag (no Unicode)', ['ag', '-s', pat]), - mkcmd('ag-novcs (no Unicode)', [ - 'ag', '--skip-vcs-ignores', '-s', pat, - ]), - mkcmd('ucg (no Unicode)', ['ucg', '--nosmart-case', pat]), - mkcmd( - 'git grep', - ['git', 'grep', '-E', '-I', '-n', pat], - env={'LC_ALL': 'en_US.UTF-8'}, - ), - mkcmd( - 'git grep (no Unicode)', - ['git', 'grep', '-E', '-I', '-n', pat], - env={'LC_ALL': 'C'}, - ), - mkcmd('sift (no Unicode)', [ - 'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat, - ]), - ]) - - -def bench_linux_alternates(suite_dir): - ''' - Benchmark a small alternation of literals. - - sift doesn't make the cut. It's more than 10x slower than the next - fastest result. The slowdown is likely because the Go regexp engine - doesn't do any literal optimizations for this case (there is no - common leading byte). - ''' - require(suite_dir, 'linux') - cwd = path.join(suite_dir, LINUX_DIR) - pat = 'ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT' - - def mkcmd(*args, **kwargs): - kwargs['cwd'] = cwd - return Command(*args, **kwargs) - - return Benchmark(pattern=pat, commands=[ - mkcmd('rg', ['rg', '-n', pat]), - mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]), - mkcmd('rg-novcs-mmap', [ - 'rg', '--mmap', '--no-ignore', '-n', pat, - ]), - mkcmd('ag', ['ag', '-s', pat]), - mkcmd('ag-novcs', [ - 'ag', '--skip-vcs-ignores', '-s', pat, - ]), - mkcmd('ucg', ['ucg', '--nosmart-case', pat]), - mkcmd( - 'git grep', - ['git', 'grep', '-E', '-I', '-n', pat], - env={'LC_ALL': 'C'}, - ), - ]) - - -def bench_linux_alternates_casei(suite_dir): - 'Benchmark a small alternation of literals case insensitively.' - require(suite_dir, 'linux') - cwd = path.join(suite_dir, LINUX_DIR) - pat = 'ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT' - - def mkcmd(*args, **kwargs): - kwargs['cwd'] = cwd - return Command(*args, **kwargs) - - return Benchmark(pattern=pat, commands=[ - mkcmd('rg', ['rg', '-n', '-i', pat]), - mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-i', pat]), - mkcmd('rg-novcs-mmap', [ - 'rg', '--mmap', '--no-ignore', '-n', '-i', pat, - ]), - mkcmd('ag', ['ag', '-i', pat]), - mkcmd('ag-novcs', [ - 'ag', '--skip-vcs-ignores', '-i', pat, - ]), - mkcmd('ucg', ['ucg', '-i', pat]), - mkcmd( - 'git grep', - ['git', 'grep', '-E', '-I', '-n', '-i', pat], - env={'LC_ALL': 'C'}, - ), - ]) - - -def bench_subtitles_en_literal(suite_dir): - ''' - Benchmark the speed of an ASCII string literal. - ''' - require(suite_dir, 'subtitles-en') - ru = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE) - pat = 'Sherlock Holmes' - - return Benchmark(pattern=pat, commands=[ - Command('rg', ['rg', '-n', pat, ru]), - Command('rg (no line numbers)', ['rg', pat, ru]), - Command('ag', ['ag', '-s', pat, ru]), - Command('ucg', ['ucg', '--nosmart-case', pat, ru]), - Command('grep', ['grep', '-an', pat, ru], env=GREP_ASCII), - Command('grep (no line numbers)', [ - 'grep', '-a', pat, ru, - ], env=GREP_ASCII), - Command('pt', ['pt', pat, ru]), - Command('pt (no line numbers)', ['pt', '-N', pat, ru]), - Command('sift', ['sift', '-n', pat, ru]), - Command('sift (no line numbers)', ['sift', pat, ru]), - ]) - - -def bench_subtitles_ru_literal(suite_dir): - ''' - Benchmark the speed of a Unicode-y string literal. - ''' - require(suite_dir, 'subtitles-ru') - ru = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_RU_NAME) - pat = 'Шерлок Холмс' # Sherlock Holmes - - return Benchmark(pattern=pat, commands=[ - Command('rg', ['rg', '-n', pat, ru]), - Command('rg (no line numbers)', ['rg', pat, ru]), - Command('ag', ['ag', '-s', pat, ru]), - Command('ucg', ['ucg', '--nosmart-case', pat, ru]), - Command('grep', ['grep', '-an', pat, ru], env=GREP_ASCII), - Command('grep (no line numbers)', [ - 'grep', '-a', pat, ru, - ], env=GREP_ASCII), - Command('pt', ['pt', pat, ru]), - Command('pt (no line numbers)', ['pt', '-N', pat, ru]), - Command('sift', ['sift', '-n', pat, ru]), - Command('sift (no line numbers)', ['sift', pat, ru]), - ]) - - -def bench_subtitles_ru_literal_casei(suite_dir): - ''' - Benchmark the speed of a Unicode-y string case insensitively. - ''' - require(suite_dir, 'subtitles-ru') - ru = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_RU_NAME) - pat = 'Шерлок Холмс' # Sherlock Holmes - - return Benchmark(pattern=pat, commands=[ - Command('rg', ['rg', '-n', '-i', pat, ru]), - Command('ag (not Unicode)', ['ag', '-i', pat, ru]), - Command('ucg (not Unicode)', ['ucg', '-i', pat, ru]), - Command('grep', ['grep', '-ani', pat, ru], env=GREP_UNICODE), - Command('grep (not Unicode)', [ - 'grep', '-E', '-ani', pat, ru, - ], env=GREP_ASCII), - ]) - - -def bench_subtitles_ru_literal_word(suite_dir): - ''' - Benchmark the speed of finding a literal inside word boundaries. - ''' - require(suite_dir, 'subtitles-ru') - ru = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_RU_NAME) - pat = 'Шерлок Холмс' # Sherlock Holmes - - return Benchmark(pattern=pat, commands=[ - Command('rg', ['rg', '-nw', pat, ru]), - Command('rg (not Unicode)', [ - 'rg', '-n', r'(?-u:\b)' + pat + r'(?-u:\b)', ru, - ]), - Command('ag (not Unicode)', ['ag', '-sw', pat, ru]), - Command('ucg (not Unicode)', ['ucg', '--nosmart-case', pat, ru]), - Command('grep (not Unicode)', [ - 'grep', '-anw', pat, ru, - ], env=GREP_ASCII), - Command('grep', ['grep', '-anw', pat, ru], env=GREP_UNICODE), - ]) - - -def bench_subtitles_ru_alternate(suite_dir): - ''' - Benchmark the speed of a set of alternate literals. - ''' - require(suite_dir, 'subtitles-ru') - ru = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_RU_NAME) - pat = '|'.join([ - 'Шерлок Холмс', # Sherlock Holmes - 'Джон Уотсон', # John Watson - 'Ирен Адлер', # Irene Adler - 'инспектор Лестрейд', # Inspector Lestrade - 'профессор Мориарти', # Professor Moriarty - ]) - - return Benchmark(pattern=pat, commands=[ - Command('rg', ['rg', '-n', pat, ru]), - Command('rg (no line numbers)', ['rg', pat, ru]), - Command('ucg', ['ucg', '--nosmart-case', pat, ru]), - Command('grep', ['grep', '-E', '-an', pat, ru], env=GREP_ASCII), - Command('grep (no line numbers)', [ - 'grep', '-E', '-a', pat, ru, - ], env=GREP_ASCII), - ]) - - -def bench_subtitles_ru_alternate_casei(suite_dir): - ''' - Benchmark the speed of a set of alternate literals. - ''' - require(suite_dir, 'subtitles-ru') - ru = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_RU_NAME) - pat = '|'.join([ - 'Шерлок Холмс', # Sherlock Holmes - 'Джон Уотсон', # John Watson - 'Ирен Адлер', # Irene Adler - 'инспектор Лестрейд', # Inspector Lestrade - 'профессор Мориарти', # Professor Moriarty - ]) - - return Benchmark(pattern=pat, commands=[ - Command('rg', ['rg', '-n', '-i', pat, ru]), - Command('ucg (not Unicode)', ['ucg', '-i', pat, ru]), - Command('grep', ['grep', '-E', '-ani', pat, ru], env=GREP_UNICODE), - Command('grep (not Unicode)', [ - 'grep', '-E', '-ani', pat, ru, - ], env=GREP_ASCII), - ]) - - -def bench_subtitles_ru_no_literal(suite_dir): - ''' - Benchmark the speed of a regex with no literals. - - Note that we don't even try to run grep with Unicode support - on this one. While it should eventually get the right answer, - I killed it after it had already been running for two minutes - and showed no signs of finishing soon. - ''' - require(suite_dir, 'subtitles-ru') - ru = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_RU_NAME) - pat = r'\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}' - - return Benchmark(pattern=pat, commands=[ - Command('rg', ['rg', '-n', pat, ru]), - Command('rg (no line numbers)', ['rg', pat, ru]), - Command('ucg (no Unicode)', ['ucg', '--nosmart-case', pat, ru]), - Command('grep (no Unicode)', [ - 'grep', '-E', '-an', pat, ru, - ], env=GREP_ASCII), - ]) - - -class MissingDependencies(Exception): - ''' - A missing dependency exception. - - This exception occurs when running a benchmark that requires a - particular corpus that isn't available. - - :ivar list(str) missing_names: - A list of missing dependency names. These names correspond to - names that can be used with the --download flag. - ''' - def __init__(self, missing_names): - self.missing_names = missing_names - - def __str__(self): - return 'MissingDependency(%s)' % repr(self.missing_names) - - -class Benchmark(object): - ''' - A single benchmark corresponding to a grouping of commands. - - The main purpose of a benchmark is to compare the performance - characteristics of a group of commands. - ''' - - def __init__(self, name=None, pattern=None, commands=None, - warmup_count=1, count=3, line_count=True): - ''' - Create a single benchmark. - - A single benchmark is composed of a set of commands that are - benchmarked and compared against one another. A benchmark may - have multiple commands that use the same search tool (but - probably should have something differentiating them). - - The grouping of commands is a purely human driven process. - - By default, the output of every command is sent to /dev/null. - Other types of behavior are available via the methods defined - on this benchmark. - - :param str name: - A human readable string denoting the name of this - benchmark. - :param str pattern: - The pattern that is used in search. - :param list(Command) commands: - A list of commands to initialize this benchmark with. More - commands may be added before running the benchmark. - :param int warmup_count: - The number of times to run each command before recording - samples. - :param int count: - The number of samples to collect from each command. - :param bool line_count: - When set, the lines of each search are counted and included - in the samples produced. - ''' - self.name = name - self.pattern = pattern - self.commands = commands or [] - self.warmup_count = warmup_count - self.count = count - self.line_count = line_count - - def run(self): - ''' - Runs this benchmark and returns the results. - - :rtype: Result - ''' - result = Result(self) - for cmd in self.commands: - # Do a warmup first. - for _ in range(self.warmup_count): - self.run_one(cmd) - for _ in range(self.count): - result.add(cmd, **self.run_one(cmd)) - return result - - def run_one(self, cmd): - ''' - Runs the given command exactly once. - - Returns an object that includes the time taken by the command. - If this benchmark was configured to count the number of lines - returned, then the line count is also returned. - - :param Command cmd: The command to run. - :returns: - A dict with two fields, duration and line_count. - The duration is in seconds, with fractional milliseconds, - and is guaranteed to be available. The line_count is set - to None unless line counting is enabled, in which case, - it is the number of lines in the search output. - :rtype: int - ''' - cmd.kwargs['stderr'] = subprocess.DEVNULL - if self.line_count: - cmd.kwargs['stdout'] = subprocess.PIPE - else: - cmd.kwargs['stdout'] = subprocess.DEVNULL - - start = time.time() - completed = cmd.run() - end = time.time() - - line_count = None - if self.line_count: - line_count = completed.stdout.count(b'\n') - return { - 'duration': end - start, - 'line_count': line_count, - } - - -class Result(object): - ''' - The result of running a benchmark. - - Benchmark results consist of a set of samples, where each sample - corresponds to a single run of a single command in the benchmark. - Various statistics can be computed from these samples such as mean - and standard deviation. - ''' - def __init__(self, benchmark): - ''' - Create a new set of results, initially empty. - - :param Benchmarl benchmark: - The benchmark that produced these results. - ''' - self.benchmark = benchmark - self.samples = [] - - def add(self, cmd, duration, line_count=None): - ''' - Add a new sample to this result set. - - :param Command cmd: - The command that produced this sample. - :param int duration: - The duration, in milliseconds, that the command took to - run. - :param int line_count: - The number of lines in the search output. This is optional. - ''' - self.samples.append({ - 'cmd': cmd, - 'duration': duration, - 'line_count': line_count, - }) - - def fastest_sample(self): - ''' - Returns the fastest recorded sample. - ''' - return min(self.samples, key=lambda s: s['duration']) - - def fastest_cmd(self): - ''' - Returns the fastest command according to distribution. - ''' - means = [] - for cmd in self.benchmark.commands: - mean, _ = self.distribution_for(cmd) - means.append((cmd, mean)) - return min(means, key=lambda tup: tup[1])[0] - - def samples_for(self, cmd): - 'Returns an iterable of samples for cmd' - yield from (s for s in self.samples if s['cmd'].name == cmd.name) - - def line_counts_for(self, cmd): - ''' - Returns the line counts recorded for each command. - - :returns: - A dictionary from command name to a set of line - counts recorded. - ''' - return {s['line_count'] for s in self.samples_for(cmd) - if s['line_count'] is not None} - - def distribution_for(self, cmd): - ''' - Returns the distribution (mean +/- std) of the given command. - - :rtype: (float, float) - :returns: - A tuple containing the mean and standard deviation, in that - order. - ''' - mean = statistics.mean( - s['duration'] for s in self.samples_for(cmd)) - stdev = statistics.stdev( - s['duration'] for s in self.samples_for(cmd)) - return mean, stdev - - -class Command(object): - def __init__(self, name, cmd, *args, **kwargs): - ''' - Create a new command that is run as part of a benchmark. - - *args and **kwargs are passed directly to ``subprocess.run``. - An exception to this is stdin/stdout/stderr. Output - redirection is completely controlled by the benchmark harness. - Trying to set them here will trigger an assert. - - :param str name: - The human readable name of this command. This is - particularly useful if the same search tool is used - multiple times in the same benchmark with different - arguments. - :param list(str) cmd: - The command to run as a list of arguments (including the - command name itself). - ''' - assert 'stdin' not in kwargs - assert 'stdout' not in kwargs - assert 'stderr' not in kwargs - self.name = name - self.cmd = cmd - self.args = args - self.kwargs = kwargs - - def run(self): - ''' - Runs this command and returns its status. - - :rtype: subprocess.CompletedProcess - ''' - return subprocess.run(self.cmd, *self.args, **self.kwargs) - - -def eprint(*args, **kwargs): - 'Like print, but to stderr.' - kwargs['file'] = sys.stderr - print(*args, **kwargs) - - -def run_cmd(cmd, *args, **kwargs): - ''' - Print the command to stderr and run it. - - If the command fails, throw a traceback. - ''' - eprint('# %s' % ' '.join(cmd)) - kwargs['check'] = True - return subprocess.run(cmd, *args, **kwargs) - - -def require(suite_dir, *names): - ''' - Declare a dependency on the given names for a benchmark. - - If any dependency doesn't exist, then fail with an error message. - ''' - errs = [] - for name in names: - fun_name = name.replace('-', '_') - if not globals()['has_%s' % fun_name](suite_dir): - errs.append(name) - if len(errs) > 0: - raise MissingDependencies(errs) - - -def download_linux(suite_dir): - 'Download and build the Linux kernel.' - checkout_dir = path.join(suite_dir, LINUX_DIR) - if not os.path.isdir(checkout_dir): - # Clone from my fork so that we always get the same corpus *and* still - # do a shallow clone. Shallow clones are much much cheaper than full - # clones. - run_cmd(['git', 'clone', '--depth', '1', LINUX_CLONE, checkout_dir]) - # We want to build the kernel because the process of building it produces - # a lot of junk in the repository that a search tool probably shouldn't - # touch. - if not os.path.exists(path.join(checkout_dir, 'vmlinux')): - eprint('# Building Linux kernel...') - run_cmd(['make', 'defconfig'], cwd=checkout_dir) - run_cmd(['make', '-j', str(cpu_count())], cwd=checkout_dir) - - -def has_linux(suite_dir): - 'Returns true if we believe the Linux kernel is built.' - checkout_dir = path.join(suite_dir, LINUX_DIR) - return path.exists(path.join(checkout_dir, 'vmlinux')) - - -def download_subtitles_en(suite_dir): - 'Download and decompress English subtitles.' - subtitle_dir = path.join(suite_dir, SUBTITLES_DIR) - en_path_gz = path.join(subtitle_dir, SUBTITLES_EN_NAME_GZ) - en_path = path.join(subtitle_dir, SUBTITLES_EN_NAME) - en_path_sample = path.join(subtitle_dir, SUBTITLES_EN_NAME_SAMPLE) - - if not os.path.isdir(subtitle_dir): - os.makedirs(subtitle_dir) - if not os.path.exists(en_path): - if not os.path.exists(en_path_gz): - run_cmd(['curl', '-LO', SUBTITLES_EN_URL], cwd=subtitle_dir) - run_cmd(['gunzip', en_path_gz], cwd=subtitle_dir) - if not os.path.exists(en_path_sample): - # Get a sample roughly the same size as the Russian corpus so that - # benchmarks finish in a reasonable time. - with open(path.join(subtitle_dir, en_path_sample), 'wb+') as f: - run_cmd( - ['head', '-n', '32722372', en_path], - cwd=subtitle_dir, stdout=f) - - -def has_subtitles_en(suite_dir): - 'Returns true if English subtitles have been downloaded.' - subtitle_dir = path.join(suite_dir, SUBTITLES_DIR) - return path.exists(path.join(subtitle_dir, SUBTITLES_EN_NAME_SAMPLE)) - - -def download_subtitles_ru(suite_dir): - 'Download and decompress Russian subtitles.' - subtitle_dir = path.join(suite_dir, SUBTITLES_DIR) - ru_path_gz = path.join(subtitle_dir, SUBTITLES_RU_NAME_GZ) - ru_path = path.join(subtitle_dir, SUBTITLES_RU_NAME) - - if not os.path.isdir(subtitle_dir): - os.makedirs(subtitle_dir) - if not os.path.exists(ru_path): - if not os.path.exists(ru_path_gz): - run_cmd(['curl', '-LO', SUBTITLES_RU_URL], cwd=subtitle_dir) - run_cmd(['gunzip', ru_path_gz], cwd=subtitle_dir) - - -def has_subtitles_ru(suite_dir): - 'Returns true if Russian subtitles have been downloaded.' - subtitle_dir = path.join(suite_dir, SUBTITLES_DIR) - return path.exists(path.join(subtitle_dir, SUBTITLES_RU_NAME)) - - -def download(suite_dir, choices): - ''' - Download choices into suite_dir. - - Specifically, choices specifies a list of corpora to fetch. - - :param str suite_dir: - The directory in which to download corpora. - :param list(str) choices: - A list of corpora to download. Available choices are: - all, linux, subtitles-en, subtitles-ru. - ''' - for choice in choices: - if choice == 'linux': - download_linux(suite_dir) - elif choice == 'subtitles-en': - download_subtitles_en(suite_dir) - elif choice == 'subtitles-ru': - download_subtitles_ru(suite_dir) - elif choice == 'all': - download_linux(suite_dir) - download_subtitles_en(suite_dir) - download_subtitles_ru(suite_dir) - else: - eprint('Unrecognized download choice: %s' % choice) - sys.exit(1) - - -def collect_benchmarks(suite_dir, filter_pat=None): - ''' - Return an iterable of all runnable benchmarks. - - :param str suite_dir: - The directory containing corpora. - :param str filter_pat: - A single regular expression that is used to filter benchmarks - by their name. When not specified, all benchmarks are run. - :returns: - An iterable over all runnable benchmarks. If a benchmark - requires corpora that are missing, then a log message is - emitted to stderr and it is not yielded. - ''' - for fun in sorted(globals()): - if not fun.startswith('bench_'): - continue - name = re.sub('^bench_', '', fun) - if filter_pat is not None and not re.search(filter_pat, name): - continue - try: - benchmark = globals()[fun](suite_dir) - except MissingDependencies as e: - eprint( - 'missing: %s, skipping benchmark %s (try running with: %s)' % ( - ', '.join(e.missing_names), - name, - ' '.join(['--download %s' % n for n in e.missing_names]), - )) - continue - benchmark.name = name - yield benchmark - - -def main(): - p = argparse.ArgumentParser('Command line search tool benchmark suite.') - p.add_argument( - '--dir', metavar='PATH', default=os.getcwd(), - help='The directory in which to download data and perform searches.') - p.add_argument( - '--download', metavar='CORPUS', action='append', - choices=['all', 'linux', 'subtitles-en', 'subtitles-ru'], - help='Download and prepare corpus data, then exit without running ' - 'any benchmarks. Note that this command is intended to be ' - 'idempotent. WARNING: This downloads over a gigabyte of data, ' - 'and also includes building the Linux kernel. If "all" is used ' - 'then the total uncompressed size is around 13 GB.') - p.add_argument( - '-f', '--force', action='store_true', - help='Overwrite existing files if there is a conflict.') - p.add_argument( - '--list', action='store_true', - help='List available benchmarks by name.') - p.add_argument( - '--raw', metavar='PATH', - help='Dump raw data (all samples collected) in CSV format to the ' - 'file path provided.') - p.add_argument( - 'bench', metavar='PAT', nargs='?', - help='A regex pattern that will only run benchmarks that match.') - args = p.parse_args() - - if args.download is not None and len(args.download) > 0: - download(args.dir, args.download) - sys.exit(0) - - if not path.isdir(args.dir): - os.makedirs(args.dir) - if args.raw is not None and path.exists(args.raw) and not args.force: - eprint('File %s already exists (delete it or use --force)' % args.raw) - sys.exit(1) - raw_handle, raw_csv_wtr = None, None - if args.raw is not None: - fields = [ - 'benchmark', 'warmup_iter', 'iter', - 'name', 'command', 'duration', 'lines', 'env', - ] - raw_handle = open(args.raw, 'w+') - raw_csv_wtr = csv.DictWriter(raw_handle, fields) - raw_csv_wtr.writerow({x: x for x in fields}) - - benchmarks = collect_benchmarks(args.dir, filter_pat=args.bench) - for i, b in enumerate(benchmarks): - result = b.run() - fastest_cmd = result.fastest_cmd() - fastest_sample = result.fastest_sample() - max_name_len = max(len(cmd.name) for cmd in b.commands) - - if i > 0: - print() - header = '%s (pattern: %s)' % (b.name, b.pattern) - print('%s\n%s' % (header, '-' * len(header))) - for cmd in b.commands: - name = cmd.name - mean, stdev = result.distribution_for(cmd) - line_counts = result.line_counts_for(cmd) - show_fast_cmd, show_line_counts = '', '' - if fastest_cmd.name == cmd.name: - show_fast_cmd = '*' - if fastest_sample['cmd'].name == cmd.name: - name += '*' - if len(line_counts) > 0: - counts = map(str, line_counts) - show_line_counts = ' (lines: %s)' % ', '.join(counts) - fmt = '{name:{pad}} {mean:0.3f} +/- {stdev:0.3f}{lines}{fast_cmd}' - print(fmt.format( - name=name, pad=max_name_len + 2, fast_cmd=show_fast_cmd, - mean=mean, stdev=stdev, lines=show_line_counts)) - sys.stdout.flush() - - if raw_csv_wtr is not None: - for sample in result.samples: - cmd, duration = sample['cmd'], sample['duration'] - env = ' '.join(['%s=%s' % (k, v) - for k, v in cmd.kwargs.get('env', {}).items()]) - raw_csv_wtr.writerow({ - 'benchmark': b.name, - 'warmup_iter': b.warmup_count, - 'iter': b.count, - 'name': sample['cmd'].name, - 'command': ' '.join(cmd.cmd), - 'duration': duration, - 'lines': sample['line_count'] or '', - 'env': env, - }) - raw_handle.flush() - - -if __name__ == '__main__': - main() diff --git a/benchsuite.raw.csv b/benchsuite.raw.csv deleted file mode 100644 index c99acb98..00000000 --- a/benchsuite.raw.csv +++ /dev/null @@ -1,343 +0,0 @@ -benchmark,warmup_iter,iter,name,command,duration,lines,env -linux_alternates,1,3,rg,rg -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.24781584739685059,68, -linux_alternates,1,3,rg,rg -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.23398137092590332,68, -linux_alternates,1,3,rg,rg -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.23497819900512695,68, -linux_alternates,1,3,rg-novcs,rg --no-ignore -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.11090731620788574,68, -linux_alternates,1,3,rg-novcs,rg --no-ignore -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.14237189292907715,68, -linux_alternates,1,3,rg-novcs,rg --no-ignore -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.11315393447875977,68, -linux_alternates,1,3,rg-novcs-mmap,rg --mmap --no-ignore -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.39145565032958984,68, -linux_alternates,1,3,rg-novcs-mmap,rg --mmap --no-ignore -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.39217114448547363,68, -linux_alternates,1,3,rg-novcs-mmap,rg --mmap --no-ignore -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.39917516708374023,68, -linux_alternates,1,3,ag,ag -s ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.4898416996002197,68, -linux_alternates,1,3,ag,ag -s ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.5078432559967041,68, -linux_alternates,1,3,ag,ag -s ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.4942047595977783,68, -linux_alternates,1,3,ag-novcs,ag --skip-vcs-ignores -s ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.4839494228363037,68, -linux_alternates,1,3,ag-novcs,ag --skip-vcs-ignores -s ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.4806060791015625,68, -linux_alternates,1,3,ag-novcs,ag --skip-vcs-ignores -s ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.6986649036407471,68, -linux_alternates,1,3,ucg,ucg --nosmart-case ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.1494297981262207,68, -linux_alternates,1,3,ucg,ucg --nosmart-case ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.15127253532409668,68, -linux_alternates,1,3,ucg,ucg --nosmart-case ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.1573657989501953,68, -linux_alternates,1,3,git grep,git grep -E -I -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.25092005729675293,68,LC_ALL=C -linux_alternates,1,3,git grep,git grep -E -I -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.2663850784301758,68,LC_ALL=C -linux_alternates,1,3,git grep,git grep -E -I -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.24485993385314941,68,LC_ALL=C -linux_alternates_casei,1,3,rg,rg -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.24762463569641113,160, -linux_alternates_casei,1,3,rg,rg -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.21982502937316895,160, -linux_alternates_casei,1,3,rg,rg -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.2219092845916748,160, -linux_alternates_casei,1,3,rg-novcs,rg --no-ignore -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.12809348106384277,160, -linux_alternates_casei,1,3,rg-novcs,rg --no-ignore -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.12208032608032227,160, -linux_alternates_casei,1,3,rg-novcs,rg --no-ignore -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.12648415565490723,160, -linux_alternates_casei,1,3,rg-novcs-mmap,rg --mmap --no-ignore -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.39945435523986816,160, -linux_alternates_casei,1,3,rg-novcs-mmap,rg --mmap --no-ignore -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.39914917945861816,160, -linux_alternates_casei,1,3,rg-novcs-mmap,rg --mmap --no-ignore -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.3931403160095215,160, -linux_alternates_casei,1,3,ag,ag -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,1.26180100440979,160, -linux_alternates_casei,1,3,ag,ag -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.6076450347900391,160, -linux_alternates_casei,1,3,ag,ag -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.6092875003814697,160, -linux_alternates_casei,1,3,ag-novcs,ag --skip-vcs-ignores -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.5668354034423828,160, -linux_alternates_casei,1,3,ag-novcs,ag --skip-vcs-ignores -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.5638954639434814,160, -linux_alternates_casei,1,3,ag-novcs,ag --skip-vcs-ignores -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.6463086605072021,160, -linux_alternates_casei,1,3,ucg,ucg -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.23877739906311035,160, -linux_alternates_casei,1,3,ucg,ucg -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.2357316017150879,160, -linux_alternates_casei,1,3,ucg,ucg -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.23951983451843262,160, -linux_alternates_casei,1,3,git grep,git grep -E -I -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.8604335784912109,160,LC_ALL=C -linux_alternates_casei,1,3,git grep,git grep -E -I -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.9313437938690186,160,LC_ALL=C -linux_alternates_casei,1,3,git grep,git grep -E -I -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.9674036502838135,160,LC_ALL=C -linux_literal,1,3,rg,rg -n PM_RESUME,0.2203836441040039,16, -linux_literal,1,3,rg,rg -n PM_RESUME,0.21490192413330078,16, -linux_literal,1,3,rg,rg -n PM_RESUME,0.21895813941955566,16, -linux_literal,1,3,rg-novcs,rg --no-ignore -n PM_RESUME,0.10370588302612305,16, -linux_literal,1,3,rg-novcs,rg --no-ignore -n PM_RESUME,0.12161660194396973,16, -linux_literal,1,3,rg-novcs,rg --no-ignore -n PM_RESUME,0.10118246078491211,16, -linux_literal,1,3,rg-novcs-mmap,rg --mmap --no-ignore -n PM_RESUME,0.3846700191497803,16, -linux_literal,1,3,rg-novcs-mmap,rg --mmap --no-ignore -n PM_RESUME,0.3972609043121338,16, -linux_literal,1,3,rg-novcs-mmap,rg --mmap --no-ignore -n PM_RESUME,0.3864939212799072,16, -linux_literal,1,3,ag,ag -s PM_RESUME,0.5170495510101318,16, -linux_literal,1,3,ag,ag -s PM_RESUME,0.5066406726837158,16, -linux_literal,1,3,ag,ag -s PM_RESUME,0.5110535621643066,16, -linux_literal,1,3,ag-novcs,ag --skip-vcs-ignores -s PM_RESUME,0.5622231960296631,16, -linux_literal,1,3,ag-novcs,ag --skip-vcs-ignores -s PM_RESUME,0.7810573577880859,16, -linux_literal,1,3,ag-novcs,ag --skip-vcs-ignores -s PM_RESUME,1.2847375869750977,16, -linux_literal,1,3,ucg,ucg --nosmart-case PM_RESUME,0.16497445106506348,16, -linux_literal,1,3,ucg,ucg --nosmart-case PM_RESUME,0.16525840759277344,16, -linux_literal,1,3,ucg,ucg --nosmart-case PM_RESUME,0.1590101718902588,16, -linux_literal,1,3,git grep,git grep -I -n PM_RESUME,0.1928420066833496,16,LC_ALL=C -linux_literal,1,3,git grep,git grep -I -n PM_RESUME,0.19345307350158691,16,LC_ALL=C -linux_literal,1,3,git grep,git grep -I -n PM_RESUME,0.1954176425933838,16,LC_ALL=C -linux_literal,1,3,pt,pt PM_RESUME,0.20350170135498047,16, -linux_literal,1,3,pt,pt PM_RESUME,0.17547011375427246,16, -linux_literal,1,3,pt,pt PM_RESUME,0.21206402778625488,16, -linux_literal,1,3,sift,sift -n --binary-skip --exclude-files .* PM_RESUME,0.14285612106323242,16, -linux_literal,1,3,sift,sift -n --binary-skip --exclude-files .* PM_RESUME,0.14221596717834473,16, -linux_literal,1,3,sift,sift -n --binary-skip --exclude-files .* PM_RESUME,0.1395282745361328,16, -linux_literal_casei,1,3,rg,rg -n -i PM_RESUME,0.32401490211486816,370, -linux_literal_casei,1,3,rg,rg -n -i PM_RESUME,0.26114439964294434,370, -linux_literal_casei,1,3,rg,rg -n -i PM_RESUME,0.21575093269348145,370, -linux_literal_casei,1,3,rg-novcs,rg --no-ignore -n -i PM_RESUME,0.13971185684204102,399, -linux_literal_casei,1,3,rg-novcs,rg --no-ignore -n -i PM_RESUME,0.11648797988891602,399, -linux_literal_casei,1,3,rg-novcs,rg --no-ignore -n -i PM_RESUME,0.11060571670532227,399, -linux_literal_casei,1,3,rg-novcs-mmap,rg --mmap --no-ignore -n -i PM_RESUME,0.41420912742614746,399, -linux_literal_casei,1,3,rg-novcs-mmap,rg --mmap --no-ignore -n -i PM_RESUME,0.3933844566345215,399, -linux_literal_casei,1,3,rg-novcs-mmap,rg --mmap --no-ignore -n -i PM_RESUME,0.4015076160430908,399, -linux_literal_casei,1,3,ag,ag -i PM_RESUME,0.45723628997802734,370, -linux_literal_casei,1,3,ag,ag -i PM_RESUME,0.41663575172424316,370, -linux_literal_casei,1,3,ag,ag -i PM_RESUME,0.4088137149810791,370, -linux_literal_casei,1,3,ag-novcs,ag --skip-vcs-ignores -i PM_RESUME,0.44587063789367676,399, -linux_literal_casei,1,3,ag-novcs,ag --skip-vcs-ignores -i PM_RESUME,0.45557403564453125,399, -linux_literal_casei,1,3,ag-novcs,ag --skip-vcs-ignores -i PM_RESUME,0.41840505599975586,399, -linux_literal_casei,1,3,ucg,ucg -i PM_RESUME,0.1598954200744629,370, -linux_literal_casei,1,3,ucg,ucg -i PM_RESUME,0.15562868118286133,370, -linux_literal_casei,1,3,ucg,ucg -i PM_RESUME,0.15644288063049316,370, -linux_literal_casei,1,3,git grep,git grep -I -n -i PM_RESUME,0.1857764720916748,370,LC_ALL=C -linux_literal_casei,1,3,git grep,git grep -I -n -i PM_RESUME,0.17730069160461426,370,LC_ALL=C -linux_literal_casei,1,3,git grep,git grep -I -n -i PM_RESUME,0.18560075759887695,370,LC_ALL=C -linux_literal_casei,1,3,sift,sift -n --binary-skip --exclude-files .* -i PM_RESUME,0.20816397666931152,399, -linux_literal_casei,1,3,sift,sift -n --binary-skip --exclude-files .* -i PM_RESUME,0.1995244026184082,399, -linux_literal_casei,1,3,sift,sift -n --binary-skip --exclude-files .* -i PM_RESUME,0.2000412940979004,399, -linux_literal_default,1,3,rg,rg PM_RESUME,0.21781229972839355,16, -linux_literal_default,1,3,rg,rg PM_RESUME,0.2195730209350586,16, -linux_literal_default,1,3,rg,rg PM_RESUME,0.259692907333374,16, -linux_literal_default,1,3,ag,ag PM_RESUME,0.39571118354797363,16, -linux_literal_default,1,3,ag,ag PM_RESUME,0.48991870880126953,16, -linux_literal_default,1,3,ag,ag PM_RESUME,0.49497532844543457,16, -linux_literal_default,1,3,ucg,ucg PM_RESUME,0.166459321975708,16, -linux_literal_default,1,3,ucg,ucg PM_RESUME,0.1644885540008545,16, -linux_literal_default,1,3,ucg,ucg PM_RESUME,0.16440844535827637,16, -linux_literal_default,1,3,git grep,git grep PM_RESUME,0.1860334873199463,16,LC_ALL=C -linux_literal_default,1,3,git grep,git grep PM_RESUME,0.16703486442565918,16,LC_ALL=C -linux_literal_default,1,3,git grep,git grep PM_RESUME,0.20740580558776855,16,LC_ALL=C -linux_literal_default,1,3,pt,pt PM_RESUME,0.15500974655151367,16, -linux_literal_default,1,3,pt,pt PM_RESUME,0.15694642066955566,16, -linux_literal_default,1,3,pt,pt PM_RESUME,0.15679144859313965,16, -linux_literal_default,1,3,sift,sift PM_RESUME,0.11694097518920898,16, -linux_literal_default,1,3,sift,sift PM_RESUME,0.11726593971252441,16, -linux_literal_default,1,3,sift,sift PM_RESUME,0.11739015579223633,16, -linux_no_literal,1,3,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.3873450756072998,490, -linux_no_literal,1,3,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.38909482955932617,490, -linux_no_literal,1,3,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.4019286632537842,490, -linux_no_literal,1,3,rg-whitelist,rg -tall --no-ignore -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.32698678970336914,419, -linux_no_literal,1,3,rg-whitelist,rg -tall --no-ignore -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.3220486640930176,419, -linux_no_literal,1,3,rg-whitelist,rg -tall --no-ignore -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.32451391220092773,419, -linux_no_literal,1,3,rg (no Unicode),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.2920851707458496,490, -linux_no_literal,1,3,rg (no Unicode),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.3335237503051758,490, -linux_no_literal,1,3,rg (no Unicode),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.28688979148864746,490, -linux_no_literal,1,3,rg-whitelist (no Unicode),rg -tall --no-ignore -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.222395658493042,419, -linux_no_literal,1,3,rg-whitelist (no Unicode),rg -tall --no-ignore -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.22271037101745605,419, -linux_no_literal,1,3,rg-whitelist (no Unicode),rg -tall --no-ignore -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.23732328414916992,419, -linux_no_literal,1,3,ag (no Unicode),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.7565908432006836,766, -linux_no_literal,1,3,ag (no Unicode),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.9270203113555908,766, -linux_no_literal,1,3,ag (no Unicode),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.7432494163513184,766, -linux_no_literal,1,3,ag-novcs (no Unicode),ag --skip-vcs-ignores -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.6899797916412354,767, -linux_no_literal,1,3,ag-novcs (no Unicode),ag --skip-vcs-ignores -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.7057938575744629,767, -linux_no_literal,1,3,ag-novcs (no Unicode),ag --skip-vcs-ignores -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.7126791477203369,767, -linux_no_literal,1,3,ucg (no Unicode),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.44383835792541504,416, -linux_no_literal,1,3,ucg (no Unicode),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.4448375701904297,416, -linux_no_literal,1,3,ucg (no Unicode),ucg --nosmart-case \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.4412264823913574,416, -linux_no_literal,1,3,git grep,git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},9.985018730163574,490,LC_ALL=en_US.UTF-8 -linux_no_literal,1,3,git grep,git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},9.671714782714844,490,LC_ALL=en_US.UTF-8 -linux_no_literal,1,3,git grep,git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},9.0708589553833,490,LC_ALL=en_US.UTF-8 -linux_no_literal,1,3,git grep (no Unicode),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},1.9452569484710693,490,LC_ALL=C -linux_no_literal,1,3,git grep (no Unicode),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},2.3153109550476074,490,LC_ALL=C -linux_no_literal,1,3,git grep (no Unicode),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},1.876504898071289,490,LC_ALL=C -linux_no_literal,1,3,sift (no Unicode),sift -n --binary-skip --exclude-files .* \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},8.800794124603271,491, -linux_no_literal,1,3,sift (no Unicode),sift -n --binary-skip --exclude-files .* \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},8.91090989112854,491, -linux_no_literal,1,3,sift (no Unicode),sift -n --binary-skip --exclude-files .* \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},8.970277547836304,491, -linux_re_literal_suffix,1,3,rg,rg -n [A-Z]+_RESUME,0.21518707275390625,1652, -linux_re_literal_suffix,1,3,rg,rg -n [A-Z]+_RESUME,0.2159252166748047,1652, -linux_re_literal_suffix,1,3,rg,rg -n [A-Z]+_RESUME,0.2178945541381836,1652, -linux_re_literal_suffix,1,3,rg-novcs,rg --no-ignore -n [A-Z]+_RESUME,0.10395693778991699,1653, -linux_re_literal_suffix,1,3,rg-novcs,rg --no-ignore -n [A-Z]+_RESUME,0.101318359375,1653, -linux_re_literal_suffix,1,3,rg-novcs,rg --no-ignore -n [A-Z]+_RESUME,0.09963226318359375,1653, -linux_re_literal_suffix,1,3,rg-novcs-mmap,rg --mmap --no-ignore -n [A-Z]+_RESUME,0.40993452072143555,1653, -linux_re_literal_suffix,1,3,rg-novcs-mmap,rg --mmap --no-ignore -n [A-Z]+_RESUME,0.4102144241333008,1653, -linux_re_literal_suffix,1,3,rg-novcs-mmap,rg --mmap --no-ignore -n [A-Z]+_RESUME,0.3837263584136963,1653, -linux_re_literal_suffix,1,3,ag,ag -s [A-Z]+_RESUME,1.7373137474060059,1652, -linux_re_literal_suffix,1,3,ag,ag -s [A-Z]+_RESUME,1.7011380195617676,1652, -linux_re_literal_suffix,1,3,ag,ag -s [A-Z]+_RESUME,0.7572276592254639,1652, -linux_re_literal_suffix,1,3,ag-novcs,ag --skip-vcs-ignores -s [A-Z]+_RESUME,0.5061535835266113,1653, -linux_re_literal_suffix,1,3,ag-novcs,ag --skip-vcs-ignores -s [A-Z]+_RESUME,0.5377681255340576,1653, -linux_re_literal_suffix,1,3,ag-novcs,ag --skip-vcs-ignores -s [A-Z]+_RESUME,0.5237703323364258,1653, -linux_re_literal_suffix,1,3,ucg,ucg --nosmart-case [A-Z]+_RESUME,0.13993382453918457,1630, -linux_re_literal_suffix,1,3,ucg,ucg --nosmart-case [A-Z]+_RESUME,0.14264798164367676,1630, -linux_re_literal_suffix,1,3,ucg,ucg --nosmart-case [A-Z]+_RESUME,0.1370248794555664,1630, -linux_re_literal_suffix,1,3,git grep,git grep -E -I -n [A-Z]+_RESUME,0.5916049480438232,1652,LC_ALL=C -linux_re_literal_suffix,1,3,git grep,git grep -E -I -n [A-Z]+_RESUME,0.5460302829742432,1652,LC_ALL=C -linux_re_literal_suffix,1,3,git grep,git grep -E -I -n [A-Z]+_RESUME,0.5453126430511475,1652,LC_ALL=C -linux_re_literal_suffix,1,3,sift,sift -n --binary-skip --exclude-files .* [A-Z]+_RESUME,4.272618055343628,1653, -linux_re_literal_suffix,1,3,sift,sift -n --binary-skip --exclude-files .* [A-Z]+_RESUME,3.953784704208374,1653, -linux_re_literal_suffix,1,3,sift,sift -n --binary-skip --exclude-files .* [A-Z]+_RESUME,4.050055742263794,1653, -linux_unicode_greek,1,3,rg,rg -n \p{Greek},0.2921295166015625,23, -linux_unicode_greek,1,3,rg,rg -n \p{Greek},0.2845008373260498,23, -linux_unicode_greek,1,3,rg,rg -n \p{Greek},0.295884370803833,23, -linux_unicode_greek,1,3,sift,sift -n --binary-skip --exclude-files .* --exclude-files *.pdf \p{Greek},2.8051228523254395,23, -linux_unicode_greek,1,3,sift,sift -n --binary-skip --exclude-files .* --exclude-files *.pdf \p{Greek},2.824496269226074,23, -linux_unicode_greek,1,3,sift,sift -n --binary-skip --exclude-files .* --exclude-files *.pdf \p{Greek},2.8370847702026367,23, -linux_unicode_greek_casei,1,3,rg,rg -n -i \p{Greek},0.2810385227203369,103, -linux_unicode_greek_casei,1,3,rg,rg -n -i \p{Greek},0.3430476188659668,103, -linux_unicode_greek_casei,1,3,rg,rg -n -i \p{Greek},0.34683680534362793,103, -linux_unicode_greek_casei,1,3,sift,sift -n --binary-skip --exclude-files .* --exclude-files *.pdf \p{Greek},2.8701119422912598,23, -linux_unicode_greek_casei,1,3,sift,sift -n --binary-skip --exclude-files .* --exclude-files *.pdf \p{Greek},2.9307808876037598,23, -linux_unicode_greek_casei,1,3,sift,sift -n --binary-skip --exclude-files .* --exclude-files *.pdf \p{Greek},2.9754345417022705,23, -linux_unicode_word,1,3,rg,rg -n \wAh,0.21484971046447754,186, -linux_unicode_word,1,3,rg,rg -n \wAh,0.2730236053466797,186, -linux_unicode_word,1,3,rg,rg -n \wAh,0.21688318252563477,186, -linux_unicode_word,1,3,rg (no Unicode),rg -n (?-u)\wAh,0.23591041564941406,174, -linux_unicode_word,1,3,rg (no Unicode),rg -n (?-u)\wAh,0.23375535011291504,174, -linux_unicode_word,1,3,rg (no Unicode),rg -n (?-u)\wAh,0.23137831687927246,174, -linux_unicode_word,1,3,rg-novcs,rg --no-ignore -n \wAh,0.11421418190002441,186, -linux_unicode_word,1,3,rg-novcs,rg --no-ignore -n \wAh,0.11203289031982422,186, -linux_unicode_word,1,3,rg-novcs,rg --no-ignore -n \wAh,0.10834765434265137,186, -linux_unicode_word,1,3,rg-novcs-mmap,rg --mmap --no-ignore -n \wAh,0.39345431327819824,186, -linux_unicode_word,1,3,rg-novcs-mmap,rg --mmap --no-ignore -n \wAh,0.40348386764526367,186, -linux_unicode_word,1,3,rg-novcs-mmap,rg --mmap --no-ignore -n \wAh,0.40167927742004395,186, -linux_unicode_word,1,3,ag (no Unicode),ag -s \wAh,0.9391078948974609,174, -linux_unicode_word,1,3,ag (no Unicode),ag -s \wAh,0.9322304725646973,174, -linux_unicode_word,1,3,ag (no Unicode),ag -s \wAh,0.9393062591552734,174, -linux_unicode_word,1,3,ag-novcs (no Unicode),ag --skip-vcs-ignores -s \wAh,0.9509954452514648,174, -linux_unicode_word,1,3,ag-novcs (no Unicode),ag --skip-vcs-ignores -s \wAh,0.9229059219360352,174, -linux_unicode_word,1,3,ag-novcs (no Unicode),ag --skip-vcs-ignores -s \wAh,0.8915724754333496,174, -linux_unicode_word,1,3,ucg (no Unicode),ucg --nosmart-case \wAh,0.1719198226928711,168, -linux_unicode_word,1,3,ucg (no Unicode),ucg --nosmart-case \wAh,0.18027615547180176,168, -linux_unicode_word,1,3,ucg (no Unicode),ucg --nosmart-case \wAh,0.17251205444335938,168, -linux_unicode_word,1,3,git grep,git grep -E -I -n \wAh,4.980919122695923,186,LC_ALL=en_US.UTF-8 -linux_unicode_word,1,3,git grep,git grep -E -I -n \wAh,4.995086193084717,186,LC_ALL=en_US.UTF-8 -linux_unicode_word,1,3,git grep,git grep -E -I -n \wAh,4.941043376922607,186,LC_ALL=en_US.UTF-8 -linux_unicode_word,1,3,git grep (no Unicode),git grep -E -I -n \wAh,1.5811383724212646,174,LC_ALL=C -linux_unicode_word,1,3,git grep (no Unicode),git grep -E -I -n \wAh,1.5947043895721436,174,LC_ALL=C -linux_unicode_word,1,3,git grep (no Unicode),git grep -E -I -n \wAh,1.522637128829956,174,LC_ALL=C -linux_unicode_word,1,3,sift (no Unicode),sift -n --binary-skip --exclude-files .* \wAh,4.356529951095581,174, -linux_unicode_word,1,3,sift (no Unicode),sift -n --binary-skip --exclude-files .* \wAh,4.129682540893555,174, -linux_unicode_word,1,3,sift (no Unicode),sift -n --binary-skip --exclude-files .* \wAh,4.098994731903076,174, -linux_word,1,3,rg,rg -n -w PM_RESUME,0.21953463554382324,6, -linux_word,1,3,rg,rg -n -w PM_RESUME,0.2401576042175293,6, -linux_word,1,3,rg,rg -n -w PM_RESUME,0.21302008628845215,6, -linux_word,1,3,rg-novcs,rg --no-ignore -n -w PM_RESUME,0.14854192733764648,6, -linux_word,1,3,rg-novcs,rg --no-ignore -n -w PM_RESUME,0.09938955307006836,6, -linux_word,1,3,rg-novcs,rg --no-ignore -n -w PM_RESUME,0.1005239486694336,6, -linux_word,1,3,rg-novcs-mmap,rg --mmap --no-ignore -n -w PM_RESUME,0.3981668949127197,6, -linux_word,1,3,rg-novcs-mmap,rg --mmap --no-ignore -n -w PM_RESUME,0.3999497890472412,6, -linux_word,1,3,rg-novcs-mmap,rg --mmap --no-ignore -n -w PM_RESUME,0.38619542121887207,6, -linux_word,1,3,ag,ag -s -w PM_RESUME,0.6837906837463379,6, -linux_word,1,3,ag,ag -s -w PM_RESUME,0.6158981323242188,6, -linux_word,1,3,ag,ag -s -w PM_RESUME,0.41248440742492676,6, -linux_word,1,3,ag-novcs,ag --skip-vcs-ignores -s -w PM_RESUME,0.42545604705810547,6, -linux_word,1,3,ag-novcs,ag --skip-vcs-ignores -s -w PM_RESUME,0.4142575263977051,6, -linux_word,1,3,ag-novcs,ag --skip-vcs-ignores -s -w PM_RESUME,0.4717123508453369,6, -linux_word,1,3,ucg,ucg --nosmart-case -w PM_RESUME,0.1639394760131836,6, -linux_word,1,3,ucg,ucg --nosmart-case -w PM_RESUME,0.16333961486816406,6, -linux_word,1,3,ucg,ucg --nosmart-case -w PM_RESUME,0.16097497940063477,6, -linux_word,1,3,git grep,git grep -E -I -n -w PM_RESUME,0.17300987243652344,6,LC_ALL=C -linux_word,1,3,git grep,git grep -E -I -n -w PM_RESUME,0.171494722366333,6,LC_ALL=C -linux_word,1,3,git grep,git grep -E -I -n -w PM_RESUME,0.19692277908325195,6,LC_ALL=C -linux_word,1,3,sift,sift -n --binary-skip --exclude-files .* -w PM_RESUME,3.190856695175171,6, -linux_word,1,3,sift,sift -n --binary-skip --exclude-files .* -w PM_RESUME,3.1970269680023193,6, -linux_word,1,3,sift,sift -n --binary-skip --exclude-files .* -w PM_RESUME,3.094048500061035,6, -subtitles_en_literal,1,3,rg,rg -n Sherlock Holmes /home/andrew/tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.41990160942077637,629, -subtitles_en_literal,1,3,rg,rg -n Sherlock Holmes /home/andrew/tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.5151379108428955,629, -subtitles_en_literal,1,3,rg,rg -n Sherlock Holmes /home/andrew/tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.5245285034179688,629, -subtitles_en_literal,1,3,rg (no line numbers),rg Sherlock Holmes /home/andrew/tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.2494678497314453,629, -subtitles_en_literal,1,3,rg (no line numbers),rg Sherlock Holmes /home/andrew/tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.2570071220397949,629, -subtitles_en_literal,1,3,rg (no line numbers),rg Sherlock Holmes /home/andrew/tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,0.20493078231811523,629, -subtitles_en_literal,1,3,ag,ag -s Sherlock Holmes /home/andrew/tmp/benchsuite/subtitles/OpenSubtitles2016.raw.sample.en,2.0035574436187744,629, -subtitles_en_literal,1,3,ag,ag -s Sherlock Holmes /home/andrew/tmp/benchsuite/subtitles/OpenSu