faster fuzzy searches

The `find` function do a lot of computations whose purpose is to find the exact position in chars of the best match. I've made another function, `score_of`, which only returns the score of the best match. This function is about 3 times faster. In order to manage the code duplication this brought, I've added a few test units. The code has also been modified to be benchmarkable and a bench case measures fuzzy searches.
author: Canop <cano.petrole@gmail.com> 2019-12-01 20:54:21 +0100
committer: Canop <cano.petrole@gmail.com> 2019-12-01 20:54:21 +0100
commit: 429c7f15d2c1f3b8705d59646a4fbbbc57f32b30 (patch)
tree: 570d00b33456cf2f09776813fd7be1b5e6c4f7cb /benches
parent: d46304ba98991a658c0a344b13b4454d45257a7a (diff)
1 files changed, 57 insertions, 0 deletions
diff --git a/benches/fuzzy.rs b/benches/fuzzy.rs
new file mode 100644
index 0000000..0bcffd5
--- /dev/null
+++ b/benches/fuzzy.rs
@@ -0,0 +1,57 @@
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+
+use broot::fuzzy_patterns::FuzzyPattern;
+
+static PATTERNS: &[&str] = &["réveil", "broot", "AB", "é"];
+// this list contains 100 names, which makes it easier to estimate the duration
+// of a pattern matching per file name.
+static NAMES: &[&str] = &[
+    " brr ooT", "Réveillon", "dys", "test", " tetsesstteststt ",
+    "a rbrroot", "Ab", "test again", "des réveils", "pi",
+    "a quite longuer name", "compliqué - 这个大象有多重", "brrooT", "1", "another name.jpeg",
+    "aaaaaab", "a ab abba aab", "abcdrtodota", "palimpsestes désordonnés", "a",
+    "π", "normal.dot", "ùmeé9$njfbaù rz&é", "FactoryFactoryFactoryFactory.java", "leftPad.js",
+    "Cargo.toml", "Cargo.lock", "main.rs", ".gitignore", "lib.rs",
+    " un réveil", "aaaaaaaaaaaaaaaaabbbbbbb", "BABABC B AB", "réveils", "paem",
+    "poëme", "mjrzemrjzm mrjz mrzr rb root", "&cq", "..a", "~~~~~",
+    "ba", "bar", "bar ro ot", "& aé &a é", "mùrz*jfzùenfzeùrjmùe",
+    "krz", "q", "mjrfzm e", "dystroy.org", "www",
+    "termimad", "minimad", "regex", "lazy_regex", "jaquerie",
+    "Tillon", "Tellini", "Garo", "Portequoi", "Terdi",
+    "Ploplo", "le dragon", "l'ours", "la tortue géante", "le chamois",
+    "dystroy", "un petit peu n'importe quoi", "dans", "cette", "liste",
+    "Broot", " broot", " broot ", "b-root", "biroute",
+    "Miaou", "meow", "et", "surtout", "La Grande Roulette",
+    "this list is", "very obviously", "tailored at stressing", "the engine", "and the reader",
+    "C++", "javascript", "SQL", "C#", "Haskell",
+    "Lisp", "Pascal", "and", "Fortran", "are just missing from this codebase",
+    "denys", "seguret", "is", "the", "author",
+];
+
+fn score_of_benchmark(c: &mut Criterion) {
+    assert_eq!(NAMES.len(), 100);
+    for pattern in PATTERNS {
+        let task = format!("FuzzyPattern({:?})::score_of", pattern);
+        c.bench_function(&task, |b| {
+            let fp = FuzzyPattern::from(pattern);
+            b.iter(|| {
+                for name in NAMES {
+                    black_box(fp.score_of(name));
+                }
+            });
+        });
+        let task = format!("FuzzyPattern({:?})::find", pattern);
+        c.bench_function(&task, |b| {
+            let fp = FuzzyPattern::from(pattern);
+            b.iter(|| {
+                for name in NAMES {
+                    black_box(fp.find(name));
+                }
+            });
+        });
+    }
+}
+
+criterion_group!(benches, score_of_benchmark);
+criterion_main!(benches);
author	Canop <cano.petrole@gmail.com>	2019-12-01 20:54:21 +0100
committer	Canop <cano.petrole@gmail.com>	2019-12-01 20:54:21 +0100
commit	429c7f15d2c1f3b8705d59646a4fbbbc57f32b30 (patch)
tree	570d00b33456cf2f09776813fd7be1b5e6c4f7cb /benches
parent	d46304ba98991a658c0a344b13b4454d45257a7a (diff)