diff options
author | Canop <cano.petrole@gmail.com> | 2021-05-05 10:27:33 +0200 |
---|---|---|
committer | Canop <cano.petrole@gmail.com> | 2021-05-05 10:27:33 +0200 |
commit | e943d7c88fe0e0b8a8d799ceec3439c16c5150f2 (patch) | |
tree | 1aedc7d568b75909453516ac4ac0c787c9bc9f74 /src/pattern | |
parent | d97b05a9536b08cd48dec4d3e244e14d4ed080ef (diff) |
Tokens Patterns support both ',' and ';' as separator
Diffstat (limited to 'src/pattern')
-rw-r--r-- | src/pattern/pattern.rs | 4 | ||||
-rw-r--r-- | src/pattern/tok_pattern.rs | 67 |
2 files changed, 54 insertions, 17 deletions
diff --git a/src/pattern/pattern.rs b/src/pattern/pattern.rs index 157c0ac..5624193 100644 --- a/src/pattern/pattern.rs +++ b/src/pattern/pattern.rs @@ -54,7 +54,7 @@ impl Pattern { RegexPattern::from(core, flags.unwrap_or(""))? ), SearchMode::NameTokens => Self::NameTokens( - TokPattern::new(core, ',') + TokPattern::new(core) ), SearchMode::PathExact => Self::PathExact( ExactPattern::from(core) @@ -66,7 +66,7 @@ impl Pattern { RegexPattern::from(core, flags.unwrap_or(""))? ), SearchMode::PathTokens => Self::PathTokens( - TokPattern::new(core, ',') + TokPattern::new(core) ), SearchMode::ContentExact => Self::ContentExact( ContentExactPattern::from(core) diff --git a/src/pattern/tok_pattern.rs b/src/pattern/tok_pattern.rs index fc98d68..eb23fd6 100644 --- a/src/pattern/tok_pattern.rs +++ b/src/pattern/tok_pattern.rs @@ -10,9 +10,22 @@ use { type CandChars = SmallVec<[char; 32]>; +static SEPARATORS: &[char] = &[',', ';']; + +// weights used in match score computing +const BONUS_MATCH: i32 = 50_000; +const BONUS_CANDIDATE_LENGTH: i32 = -1; // per char + +pub fn norm_chars(s: &str) -> Box<[char]> { + secular::normalized_lower_lay_string(s) + .chars() + .collect::<Vec<char>>() + .into_boxed_slice() +} + /// a list of tokens we want to find, non overlapping /// and in any order, in strings -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub struct TokPattern { toks: Vec<Box<[char]>>, sum_len: usize, @@ -35,16 +48,27 @@ pub struct TokPattern { // - bonus for order ? impl TokPattern { - pub fn new(pattern: &str, sep: char) -> Self { - let mut toks: Vec<Box<[char]>> = pattern.split(sep) - .filter(|s| s.len() > 0) - .map(|s| { - secular::normalized_lower_lay_string(s) - .chars() - .collect::<Vec<char>>() - .into_boxed_slice() - }) - .collect(); + pub fn new(pattern: &str) -> Self { + // we accept several separators. The first one + // we encounter among the possible ones is the + // separator of the whole. This allows using the + // other char: In ";ab,er", the comma isn't seen + // as a separator but as part of a tok + let sep = pattern.chars() + .filter(|c| SEPARATORS.contains(c)) + .next(); + let mut toks: Vec<Box<[char]>> = if let Some(sep) = sep { + pattern.split(sep) + .filter(|s| s.len() > 0) + .map(norm_chars) + .collect() + } else { + if pattern.is_empty() { + Vec::new() + } else { + vec![norm_chars(pattern)] + } + }; // we sort the tokens from biggest to smallest // because the current algorithm stops at the // first match for any tok. Thus it would fail @@ -97,7 +121,7 @@ impl TokPattern { } } pos.sort(); - let score = 42; // maybe find a better scoring + let score = BONUS_MATCH + BONUS_CANDIDATE_LENGTH * candidate.len() as i32; Some(NameMatch { score, pos }) } @@ -118,7 +142,7 @@ mod tok_pattern_tests { /// check position of the match of the pattern in name fn check_pos(pattern: &str, name: &str, pos: &str) { - let pat = TokPattern::new(pattern, ','); + let pat = TokPattern::new(pattern); let match_pos = pat.find(name).unwrap().pos; let target_pos: Pos = pos.chars() .enumerate() @@ -141,7 +165,7 @@ mod tok_pattern_tests { " ^^^", ); check_pos( - "ba", + ";ba", "babababaaa", "^^ ", ); @@ -164,12 +188,25 @@ mod tok_pattern_tests { fn check_match(pattern: &str, name: &str, do_match: bool) { assert_eq!( - TokPattern::new(pattern, ',').find(name).is_some(), + TokPattern::new(pattern).find(name).is_some(), do_match, ); } #[test] + fn test_separators() { + let a = TokPattern::new("ab;cd;ef"); + let b = TokPattern::new("ab,cd,ef"); + assert_eq!(a, b); + let a = TokPattern::new(",ab;cd;ef"); + assert_eq!(a.toks.len(), 1); + assert_eq!(a.toks[0].len(), 8); + let a = TokPattern::new(";ab,cd,ef;"); + assert_eq!(a.toks.len(), 1); + assert_eq!(a.toks[0].len(), 8); + } + + #[test] fn test_match() { check_match("mia", "android/phonegap", false); check_match("mi", "a", false); |