summaryrefslogtreecommitdiffstats
path: root/benches
diff options
context:
space:
mode:
authorCanop <cano.petrole@gmail.com>2019-12-01 20:54:21 +0100
committerCanop <cano.petrole@gmail.com>2019-12-01 20:54:21 +0100
commit429c7f15d2c1f3b8705d59646a4fbbbc57f32b30 (patch)
tree570d00b33456cf2f09776813fd7be1b5e6c4f7cb /benches
parentd46304ba98991a658c0a344b13b4454d45257a7a (diff)
faster fuzzy searches
The `find` function do a lot of computations whose purpose is to find the exact position in chars of the best match. I've made another function, `score_of`, which only returns the score of the best match. This function is about 3 times faster. In order to manage the code duplication this brought, I've added a few test units. The code has also been modified to be benchmarkable and a bench case measures fuzzy searches.
Diffstat (limited to 'benches')
-rw-r--r--benches/fuzzy.rs57
1 files changed, 57 insertions, 0 deletions
diff --git a/benches/fuzzy.rs b/benches/fuzzy.rs
new file mode 100644
index 0000000..0bcffd5
--- /dev/null
+++ b/benches/fuzzy.rs
@@ -0,0 +1,57 @@
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+
+use broot::fuzzy_patterns::FuzzyPattern;
+
+static PATTERNS: &[&str] = &["réveil", "broot", "AB", "é"];
+// this list contains 100 names, which makes it easier to estimate the duration
+// of a pattern matching per file name.
+static NAMES: &[&str] = &[
+ " brr ooT", "Réveillon", "dys", "test", " tetsesstteststt ",
+ "a rbrroot", "Ab", "test again", "des réveils", "pi",
+ "a quite longuer name", "compliqué - 这个大象有多重", "brrooT", "1", "another name.jpeg",
+ "aaaaaab", "a ab abba aab", "abcdrtodota", "palimpsestes désordonnés", "a",
+ "π", "normal.dot", "ùmeé9$njfbaù rz&é", "FactoryFactoryFactoryFactory.java", "leftPad.js",
+ "Cargo.toml", "Cargo.lock", "main.rs", ".gitignore", "lib.rs",
+ " un réveil", "aaaaaaaaaaaaaaaaabbbbbbb", "BABABC B AB", "réveils", "paem",
+ "poëme", "mjrzemrjzm mrjz mrzr rb root", "&cq", "..a", "~~~~~",
+ "ba", "bar", "bar ro ot", "& aé &a é", "mùrz*jfzùenfzeùrjmùe",
+ "krz", "q", "mjrfzm e", "dystroy.org", "www",
+ "termimad", "minimad", "regex", "lazy_regex", "jaquerie",
+ "Tillon", "Tellini", "Garo", "Portequoi", "Terdi",
+ "Ploplo", "le dragon", "l'ours", "la tortue géante", "le chamois",
+ "dystroy", "un petit peu n'importe quoi", "dans", "cette", "liste",
+ "Broot", " broot", " broot ", "b-root", "biroute",
+ "Miaou", "meow", "et", "surtout", "La Grande Roulette",
+ "this list is", "very obviously", "tailored at stressing", "the engine", "and the reader",
+ "C++", "javascript", "SQL", "C#", "Haskell",
+ "Lisp", "Pascal", "and", "Fortran", "are just missing from this codebase",
+ "denys", "seguret", "is", "the", "author",
+];
+
+fn score_of_benchmark(c: &mut Criterion) {
+ assert_eq!(NAMES.len(), 100);
+ for pattern in PATTERNS {
+ let task = format!("FuzzyPattern({:?})::score_of", pattern);
+ c.bench_function(&task, |b| {
+ let fp = FuzzyPattern::from(pattern);
+ b.iter(|| {
+ for name in NAMES {
+ black_box(fp.score_of(name));
+ }
+ });
+ });
+ let task = format!("FuzzyPattern({:?})::find", pattern);
+ c.bench_function(&task, |b| {
+ let fp = FuzzyPattern::from(pattern);
+ b.iter(|| {
+ for name in NAMES {
+ black_box(fp.find(name));
+ }
+ });
+ });
+ }
+}
+
+criterion_group!(benches, score_of_benchmark);
+criterion_main!(benches);