diff options
author | Canop <cano.petrole@gmail.com> | 2021-05-05 17:41:29 +0200 |
---|---|---|
committer | Canop <cano.petrole@gmail.com> | 2021-05-05 17:41:29 +0200 |
commit | f77b763ab09a2f45438793e656ad334c0e609669 (patch) | |
tree | 6695650d458a7771dd760a557beb61f56fd88659 /src/pattern | |
parent | caaf12eeb3ca0cf8f212753a056b8ee10e09b72c (diff) |
small perf improvement in tokens patternsunordered-tokens
Diffstat (limited to 'src/pattern')
-rw-r--r-- | src/pattern/composite_pattern.rs | 6 | ||||
-rw-r--r-- | src/pattern/content_pattern.rs | 4 | ||||
-rw-r--r-- | src/pattern/content_regex_pattern.rs | 4 | ||||
-rw-r--r-- | src/pattern/exact_pattern.rs | 4 | ||||
-rw-r--r-- | src/pattern/fuzzy_pattern.rs | 6 | ||||
-rw-r--r-- | src/pattern/input_pattern.rs | 2 | ||||
-rw-r--r-- | src/pattern/pattern.rs | 21 | ||||
-rw-r--r-- | src/pattern/regex_pattern.rs | 4 | ||||
-rw-r--r-- | src/pattern/tok_pattern.rs | 83 |
9 files changed, 95 insertions, 39 deletions
diff --git a/src/pattern/composite_pattern.rs b/src/pattern/composite_pattern.rs index 7eec3ea..4d093f5 100644 --- a/src/pattern/composite_pattern.rs +++ b/src/pattern/composite_pattern.rs @@ -145,4 +145,10 @@ impl CompositePattern { }) } + pub fn is_empty(&self) -> bool { + let is_not_empty = self.expr.iter_atoms() + .any(|p| p.is_some()); + !is_not_empty + } + } diff --git a/src/pattern/content_pattern.rs b/src/pattern/content_pattern.rs index 7aaf798..be7014a 100644 --- a/src/pattern/content_pattern.rs +++ b/src/pattern/content_pattern.rs @@ -34,6 +34,10 @@ impl ContentExactPattern { self.needle.as_str() } + pub fn is_empty(&self) -> bool { + self.needle.is_empty() + } + pub fn to_regex_parts(&self) -> (String, String) { (self.as_str().to_string(), "".to_string()) } diff --git a/src/pattern/content_regex_pattern.rs b/src/pattern/content_regex_pattern.rs index 7bf7639..9ca9858 100644 --- a/src/pattern/content_regex_pattern.rs +++ b/src/pattern/content_regex_pattern.rs @@ -35,6 +35,10 @@ impl ContentRegexPattern { }) } + pub fn is_empty(&self) -> bool { + self.rex.as_str().is_empty() + } + pub fn to_regex_parts(&self) -> (String, String) { (self.rex.to_string(), self.flags.clone()) } diff --git a/src/pattern/exact_pattern.rs b/src/pattern/exact_pattern.rs index 23c6e3e..ae2fae1 100644 --- a/src/pattern/exact_pattern.rs +++ b/src/pattern/exact_pattern.rs @@ -44,6 +44,10 @@ impl ExactPattern { } } + pub fn is_empty(&self) -> bool { + self.chars_count == 0 + } + fn score(&self, start: usize, candidate: &str) -> i32 { // start is the byte index let mut score = BONUS_MATCH + BONUS_CANDIDATE_LENGTH * candidate.len() as i32; diff --git a/src/pattern/fuzzy_pattern.rs b/src/pattern/fuzzy_pattern.rs index b0be711..d0adee0 100644 --- a/src/pattern/fuzzy_pattern.rs +++ b/src/pattern/fuzzy_pattern.rs @@ -72,6 +72,12 @@ impl FuzzyPattern { } } + /// an "empty" pattern is one which accepts everything because + /// it has no discriminant + pub fn is_empty(&self) -> bool { + self.chars.is_empty() + } + fn tight_match_from_index( &self, cand_chars: &CandChars, diff --git a/src/pattern/input_pattern.rs b/src/pattern/input_pattern.rs index 1ab90db..42b8b67 100644 --- a/src/pattern/input_pattern.rs +++ b/src/pattern/input_pattern.rs @@ -34,7 +34,7 @@ impl InputPattern { Ok(Self { raw, pattern }) } pub fn is_none(&self) -> bool { - self.pattern.is_none() + self.pattern.is_empty() } pub fn is_some(&self) -> bool { self.pattern.is_some() diff --git a/src/pattern/pattern.rs b/src/pattern/pattern.rs index 5624193..d479995 100644 --- a/src/pattern/pattern.rs +++ b/src/pattern/pattern.rs @@ -177,11 +177,26 @@ impl Pattern { } pub fn is_some(&self) -> bool { - !matches!(&self, Pattern::None) + !self.is_empty() } - pub fn is_none(&self) -> bool { - matches!(&self, Pattern::None) + /// an empty pattern is one which doesn't discriminate + /// (it accepts everything) + pub fn is_empty(&self) -> bool { + match self { + Self::NameExact(ep) => ep.is_empty(), + Self::NameFuzzy(fp) => fp.is_empty(), + Self::NameRegex(rp) => rp.is_empty(), + Self::NameTokens(tp) => tp.is_empty(), + Self::PathExact(ep) => ep.is_empty(), + Self::PathFuzzy(fp) => fp.is_empty(), + Self::PathRegex(rp) => rp.is_empty(), + Self::PathTokens(tp) => tp.is_empty(), + Self::ContentExact(ep) => ep.is_empty(), + Self::ContentRegex(rp) => rp.is_empty(), + Self::Composite(cp) => cp.is_empty(), + Self::None => true, + } } /// whether the scores are more than just 0 or 1. diff --git a/src/pattern/regex_pattern.rs b/src/pattern/regex_pattern.rs index f78f410..0090498 100644 --- a/src/pattern/regex_pattern.rs +++ b/src/pattern/regex_pattern.rs @@ -39,4 +39,8 @@ impl RegexPattern { super::NameMatch { score: 1, pos } }) } + pub fn is_empty(&self) -> bool { + self.rex.as_str().is_empty() + } + } diff --git a/src/pattern/tok_pattern.rs b/src/pattern/tok_pattern.rs index 0b31127..a3c743f 100644 --- a/src/pattern/tok_pattern.rs +++ b/src/pattern/tok_pattern.rs @@ -31,15 +31,6 @@ pub struct TokPattern { sum_len: usize, } -// optimizations to test: -// - score_of doesn't need to compute pos, just the ranges -// - try the first tok before creating the matching_ranges vec -// (the first one is also impler to test) -// - if there's no room for any of the other toks before the -// first matching tok, start other loops after the first -// matching range -// But until I add some scoring and multiple positions tests -// there should not be any perf concerns // scoring basis ? // - number of parts of the candidats (separated by / for example) // that are touched by a tok ? @@ -82,45 +73,65 @@ impl TokPattern { } } + /// an "empty" pattern is one which accepts everything because + /// it has no discriminant + pub fn is_empty(&self) -> bool { + self.sum_len == 0 + } + /// return either None (no match) or a vec whose size is the number /// of tokens - pub fn find_ranges(&self, candidate: &str) -> Option<Vec<Range<usize>>> { - if candidate.len() < self.sum_len { + fn find_ranges(&self, candidate: &str) -> Option<Vec<Range<usize>>> { + if candidate.len() < self.sum_len || self.sum_len == 0 { return None; } let mut cand_chars: CandChars = SmallVec::with_capacity(candidate.len()); cand_chars.extend(candidate.chars().map(secular::lower_lay_char)); - let mut matching_ranges: Vec<Range<usize>> = Vec::with_capacity(self.toks.len()); - for tok in &self.toks { - let l = tok.len(); - let matching_range = (0..cand_chars.len()+1-l) - .map(|idx| idx..idx+l) - .filter(|r| { - &cand_chars[r.start..r.end] == tok.as_ref() - }) - .filter(|r| { - // check we're not intersecting a previous range - for pr in &matching_ranges { - if pr.contains(&r.start) || pr.contains(&(r.end-1)) { - return false; - } + // we first look for the first tok, it's simpler + let first_tok = &self.toks[0]; + let l = first_tok.len(); + let first_matching_range = (0..cand_chars.len()+1-l) + .map(|idx| idx..idx+l) + .filter(|r| { + &cand_chars[r.start..r.end] == first_tok.as_ref() + }) + .next(); + // we initialize the vec only when the first tok is found + first_matching_range + .and_then(|first_matching_range| { + let mut matching_ranges = vec![first_matching_range]; + for tok in self.toks.iter().skip(1) { + let l = tok.len(); + let matching_range = (0..cand_chars.len()+1-l) + .map(|idx| idx..idx+l) + .filter(|r| { + &cand_chars[r.start..r.end] == tok.as_ref() + }) + .filter(|r| { + // check we're not intersecting a previous range + for pr in &matching_ranges { + if pr.contains(&r.start) || pr.contains(&(r.end-1)) { + return false; + } + } + true + }) + .next(); + if let Some(r) = matching_range { + matching_ranges.push(r); + } else { + return None; } - true - }) - .next(); - if let Some(r) = matching_range { - matching_ranges.push(r); - } else { - return None; - } - } - Some(matching_ranges) + } + Some(matching_ranges) + }) } fn score_of_matching(&self, candidate: &str) -> i32 { BONUS_MATCH + BONUS_CANDIDATE_LENGTH * candidate.len() as i32 } + /// note that it should not be called on empty patterns pub fn find(&self, candidate: &str) -> Option<NameMatch> { self.find_ranges(candidate) .map(|matching_ranges| { @@ -139,6 +150,7 @@ impl TokPattern { } /// compute the score of the best match + /// Note that it should not be called on empty patterns pub fn score_of(&self, candidate: &str) -> Option<i32> { self.find_ranges(candidate) .map(|_| self.score_of_matching(candidate)) @@ -155,6 +167,7 @@ mod tok_pattern_tests { /// check position of the match of the pattern in name fn check_pos(pattern: &str, name: &str, pos: &str) { + println!("checking pattern={:?} name={:?}", pattern, name); let pat = TokPattern::new(pattern); let match_pos = pat.find(name).unwrap().pos; let target_pos: Pos = pos.chars() |