summaryrefslogtreecommitdiffstats
path: root/src/pattern
diff options
context:
space:
mode:
authorCanop <cano.petrole@gmail.com>2021-05-05 17:41:29 +0200
committerCanop <cano.petrole@gmail.com>2021-05-05 17:41:29 +0200
commitf77b763ab09a2f45438793e656ad334c0e609669 (patch)
tree6695650d458a7771dd760a557beb61f56fd88659 /src/pattern
parentcaaf12eeb3ca0cf8f212753a056b8ee10e09b72c (diff)
small perf improvement in tokens patternsunordered-tokens
Diffstat (limited to 'src/pattern')
-rw-r--r--src/pattern/composite_pattern.rs6
-rw-r--r--src/pattern/content_pattern.rs4
-rw-r--r--src/pattern/content_regex_pattern.rs4
-rw-r--r--src/pattern/exact_pattern.rs4
-rw-r--r--src/pattern/fuzzy_pattern.rs6
-rw-r--r--src/pattern/input_pattern.rs2
-rw-r--r--src/pattern/pattern.rs21
-rw-r--r--src/pattern/regex_pattern.rs4
-rw-r--r--src/pattern/tok_pattern.rs83
9 files changed, 95 insertions, 39 deletions
diff --git a/src/pattern/composite_pattern.rs b/src/pattern/composite_pattern.rs
index 7eec3ea..4d093f5 100644
--- a/src/pattern/composite_pattern.rs
+++ b/src/pattern/composite_pattern.rs
@@ -145,4 +145,10 @@ impl CompositePattern {
})
}
+ pub fn is_empty(&self) -> bool {
+ let is_not_empty = self.expr.iter_atoms()
+ .any(|p| p.is_some());
+ !is_not_empty
+ }
+
}
diff --git a/src/pattern/content_pattern.rs b/src/pattern/content_pattern.rs
index 7aaf798..be7014a 100644
--- a/src/pattern/content_pattern.rs
+++ b/src/pattern/content_pattern.rs
@@ -34,6 +34,10 @@ impl ContentExactPattern {
self.needle.as_str()
}
+ pub fn is_empty(&self) -> bool {
+ self.needle.is_empty()
+ }
+
pub fn to_regex_parts(&self) -> (String, String) {
(self.as_str().to_string(), "".to_string())
}
diff --git a/src/pattern/content_regex_pattern.rs b/src/pattern/content_regex_pattern.rs
index 7bf7639..9ca9858 100644
--- a/src/pattern/content_regex_pattern.rs
+++ b/src/pattern/content_regex_pattern.rs
@@ -35,6 +35,10 @@ impl ContentRegexPattern {
})
}
+ pub fn is_empty(&self) -> bool {
+ self.rex.as_str().is_empty()
+ }
+
pub fn to_regex_parts(&self) -> (String, String) {
(self.rex.to_string(), self.flags.clone())
}
diff --git a/src/pattern/exact_pattern.rs b/src/pattern/exact_pattern.rs
index 23c6e3e..ae2fae1 100644
--- a/src/pattern/exact_pattern.rs
+++ b/src/pattern/exact_pattern.rs
@@ -44,6 +44,10 @@ impl ExactPattern {
}
}
+ pub fn is_empty(&self) -> bool {
+ self.chars_count == 0
+ }
+
fn score(&self, start: usize, candidate: &str) -> i32 {
// start is the byte index
let mut score = BONUS_MATCH + BONUS_CANDIDATE_LENGTH * candidate.len() as i32;
diff --git a/src/pattern/fuzzy_pattern.rs b/src/pattern/fuzzy_pattern.rs
index b0be711..d0adee0 100644
--- a/src/pattern/fuzzy_pattern.rs
+++ b/src/pattern/fuzzy_pattern.rs
@@ -72,6 +72,12 @@ impl FuzzyPattern {
}
}
+ /// an "empty" pattern is one which accepts everything because
+ /// it has no discriminant
+ pub fn is_empty(&self) -> bool {
+ self.chars.is_empty()
+ }
+
fn tight_match_from_index(
&self,
cand_chars: &CandChars,
diff --git a/src/pattern/input_pattern.rs b/src/pattern/input_pattern.rs
index 1ab90db..42b8b67 100644
--- a/src/pattern/input_pattern.rs
+++ b/src/pattern/input_pattern.rs
@@ -34,7 +34,7 @@ impl InputPattern {
Ok(Self { raw, pattern })
}
pub fn is_none(&self) -> bool {
- self.pattern.is_none()
+ self.pattern.is_empty()
}
pub fn is_some(&self) -> bool {
self.pattern.is_some()
diff --git a/src/pattern/pattern.rs b/src/pattern/pattern.rs
index 5624193..d479995 100644
--- a/src/pattern/pattern.rs
+++ b/src/pattern/pattern.rs
@@ -177,11 +177,26 @@ impl Pattern {
}
pub fn is_some(&self) -> bool {
- !matches!(&self, Pattern::None)
+ !self.is_empty()
}
- pub fn is_none(&self) -> bool {
- matches!(&self, Pattern::None)
+ /// an empty pattern is one which doesn't discriminate
+ /// (it accepts everything)
+ pub fn is_empty(&self) -> bool {
+ match self {
+ Self::NameExact(ep) => ep.is_empty(),
+ Self::NameFuzzy(fp) => fp.is_empty(),
+ Self::NameRegex(rp) => rp.is_empty(),
+ Self::NameTokens(tp) => tp.is_empty(),
+ Self::PathExact(ep) => ep.is_empty(),
+ Self::PathFuzzy(fp) => fp.is_empty(),
+ Self::PathRegex(rp) => rp.is_empty(),
+ Self::PathTokens(tp) => tp.is_empty(),
+ Self::ContentExact(ep) => ep.is_empty(),
+ Self::ContentRegex(rp) => rp.is_empty(),
+ Self::Composite(cp) => cp.is_empty(),
+ Self::None => true,
+ }
}
/// whether the scores are more than just 0 or 1.
diff --git a/src/pattern/regex_pattern.rs b/src/pattern/regex_pattern.rs
index f78f410..0090498 100644
--- a/src/pattern/regex_pattern.rs
+++ b/src/pattern/regex_pattern.rs
@@ -39,4 +39,8 @@ impl RegexPattern {
super::NameMatch { score: 1, pos }
})
}
+ pub fn is_empty(&self) -> bool {
+ self.rex.as_str().is_empty()
+ }
+
}
diff --git a/src/pattern/tok_pattern.rs b/src/pattern/tok_pattern.rs
index 0b31127..a3c743f 100644
--- a/src/pattern/tok_pattern.rs
+++ b/src/pattern/tok_pattern.rs
@@ -31,15 +31,6 @@ pub struct TokPattern {
sum_len: usize,
}
-// optimizations to test:
-// - score_of doesn't need to compute pos, just the ranges
-// - try the first tok before creating the matching_ranges vec
-// (the first one is also impler to test)
-// - if there's no room for any of the other toks before the
-// first matching tok, start other loops after the first
-// matching range
-// But until I add some scoring and multiple positions tests
-// there should not be any perf concerns
// scoring basis ?
// - number of parts of the candidats (separated by / for example)
// that are touched by a tok ?
@@ -82,45 +73,65 @@ impl TokPattern {
}
}
+ /// an "empty" pattern is one which accepts everything because
+ /// it has no discriminant
+ pub fn is_empty(&self) -> bool {
+ self.sum_len == 0
+ }
+
/// return either None (no match) or a vec whose size is the number
/// of tokens
- pub fn find_ranges(&self, candidate: &str) -> Option<Vec<Range<usize>>> {
- if candidate.len() < self.sum_len {
+ fn find_ranges(&self, candidate: &str) -> Option<Vec<Range<usize>>> {
+ if candidate.len() < self.sum_len || self.sum_len == 0 {
return None;
}
let mut cand_chars: CandChars = SmallVec::with_capacity(candidate.len());
cand_chars.extend(candidate.chars().map(secular::lower_lay_char));
- let mut matching_ranges: Vec<Range<usize>> = Vec::with_capacity(self.toks.len());
- for tok in &self.toks {
- let l = tok.len();
- let matching_range = (0..cand_chars.len()+1-l)
- .map(|idx| idx..idx+l)
- .filter(|r| {
- &cand_chars[r.start..r.end] == tok.as_ref()
- })
- .filter(|r| {
- // check we're not intersecting a previous range
- for pr in &matching_ranges {
- if pr.contains(&r.start) || pr.contains(&(r.end-1)) {
- return false;
- }
+ // we first look for the first tok, it's simpler
+ let first_tok = &self.toks[0];
+ let l = first_tok.len();
+ let first_matching_range = (0..cand_chars.len()+1-l)
+ .map(|idx| idx..idx+l)
+ .filter(|r| {
+ &cand_chars[r.start..r.end] == first_tok.as_ref()
+ })
+ .next();
+ // we initialize the vec only when the first tok is found
+ first_matching_range
+ .and_then(|first_matching_range| {
+ let mut matching_ranges = vec![first_matching_range];
+ for tok in self.toks.iter().skip(1) {
+ let l = tok.len();
+ let matching_range = (0..cand_chars.len()+1-l)
+ .map(|idx| idx..idx+l)
+ .filter(|r| {
+ &cand_chars[r.start..r.end] == tok.as_ref()
+ })
+ .filter(|r| {
+ // check we're not intersecting a previous range
+ for pr in &matching_ranges {
+ if pr.contains(&r.start) || pr.contains(&(r.end-1)) {
+ return false;
+ }
+ }
+ true
+ })
+ .next();
+ if let Some(r) = matching_range {
+ matching_ranges.push(r);
+ } else {
+ return None;
}
- true
- })
- .next();
- if let Some(r) = matching_range {
- matching_ranges.push(r);
- } else {
- return None;
- }
- }
- Some(matching_ranges)
+ }
+ Some(matching_ranges)
+ })
}
fn score_of_matching(&self, candidate: &str) -> i32 {
BONUS_MATCH + BONUS_CANDIDATE_LENGTH * candidate.len() as i32
}
+ /// note that it should not be called on empty patterns
pub fn find(&self, candidate: &str) -> Option<NameMatch> {
self.find_ranges(candidate)
.map(|matching_ranges| {
@@ -139,6 +150,7 @@ impl TokPattern {
}
/// compute the score of the best match
+ /// Note that it should not be called on empty patterns
pub fn score_of(&self, candidate: &str) -> Option<i32> {
self.find_ranges(candidate)
.map(|_| self.score_of_matching(candidate))
@@ -155,6 +167,7 @@ mod tok_pattern_tests {
/// check position of the match of the pattern in name
fn check_pos(pattern: &str, name: &str, pos: &str) {
+ println!("checking pattern={:?} name={:?}", pattern, name);
let pat = TokPattern::new(pattern);
let match_pos = pat.find(name).unwrap().pos;
let target_pos: Pos = pos.chars()