1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
/*!
This module benchmarks the glob implementation. For benchmarks on the ripgrep
tool itself, see the benchsuite directory.
*/
#![feature(test)]
extern crate glob;
#[macro_use]
extern crate lazy_static;
extern crate regex;
extern crate test;
const SHORT: &'static str = "some/needle.txt";
const SHORT_PAT: &'static str = "some/**/needle.txt";
const LONG: &'static str = "some/a/bigger/path/to/the/crazy/needle.txt";
const LONG_PAT: &'static str = "some/**/needle.txt";
#[allow(dead_code, unused_variables)]
#[path = "../src/glob.rs"]
mod reglob;
fn new_glob(pat: &str) -> glob::Pattern {
glob::Pattern::new(pat).unwrap()
}
fn new_reglob(pat: &str) -> reglob::Set {
let mut builder = reglob::SetBuilder::new();
builder.add(pat).unwrap();
builder.build().unwrap()
}
fn new_reglob_many(pats: &[&str]) -> reglob::Set {
let mut builder = reglob::SetBuilder::new();
for pat in pats {
builder.add(pat).unwrap();
}
builder.build().unwrap()
}
#[bench]
fn short_glob(b: &mut test::Bencher) {
let pat = new_glob(SHORT_PAT);
b.iter(|| assert!(pat.matches(SHORT)));
}
#[bench]
fn short_regex(b: &mut test::Bencher) {
let set = new_reglob(SHORT_PAT);
b.iter(|| assert!(set.is_match(SHORT)));
}
#[bench]
fn long_glob(b: &mut test::Bencher) {
let pat = new_glob(LONG_PAT);
b.iter(|| assert!(pat.matches(LONG)));
}
#[bench]
fn long_regex(b: &mut test::Bencher) {
let set = new_reglob(LONG_PAT);
b.iter(|| assert!(set.is_match(LONG)));
}
const MANY_SHORT_GLOBS: &'static [&'static str] = &[
// Taken from a random .gitignore on my system.
".*.swp",
"tags",
"target",
"*.lock",
"tmp",
"*.csv",
"*.fst",
"*-got",
"*.csv.idx",
"words",
"98m*",
"dict",
"test",
"months",
];
const MANY_SHORT_SEARCH: &'static str = "98m-blah.csv.idx";
#[bench]
fn many_short_glob(b: &mut test::Bencher) {
let pats: Vec<_> = MANY_SHORT_GLOBS.iter().map(|&s| new_glob(s)).collect();
b.iter(|| {
let mut count = 0;
for pat in &pats {
if pat.matches(MANY_SHORT_SEARCH) {
count += 1;
}
}
assert_eq!(2, count);
})
}
#[bench]
fn many_short_regex_set(b: &mut test::Bencher) {
let set = new_reglob_many(MANY_SHORT_GLOBS);
b.iter(|| assert_eq!(2, set.matches(MANY_SHORT_SEARCH).iter().count()));
}
// This is the fastest on my system (beating many_glob by about 2x). This
// suggests that a RegexSet needs quite a few regexes (or a larger haystack)
// in order for it to scale.
//
// TODO(burntsushi): come up with a benchmark that uses more complex patterns
// or a longer haystack.
#[bench]
fn many_short_regex_pattern(b: &mut test::Bencher) {
let pats: Vec<_> = MANY_SHORT_GLOBS.iter().map(|&s| {
let pat = reglob::Pattern::new(s).unwrap();
regex::Regex::new(&pat.to_regex()).unwrap()
}).collect();
b.iter(|| {
let mut count = 0;
for pat in &pats {
if pat.is_match(MANY_SHORT_SEARCH) {
count += 1;
}
}
assert_eq!(2, count);
})
}
|