summaryrefslogtreecommitdiffstats
path: root/src/pattern
diff options
context:
space:
mode:
authorCanop <cano.petrole@gmail.com>2021-05-05 10:27:33 +0200
committerCanop <cano.petrole@gmail.com>2021-05-05 10:27:33 +0200
commite943d7c88fe0e0b8a8d799ceec3439c16c5150f2 (patch)
tree1aedc7d568b75909453516ac4ac0c787c9bc9f74 /src/pattern
parentd97b05a9536b08cd48dec4d3e244e14d4ed080ef (diff)
Tokens Patterns support both ',' and ';' as separator
Diffstat (limited to 'src/pattern')
-rw-r--r--src/pattern/pattern.rs4
-rw-r--r--src/pattern/tok_pattern.rs67
2 files changed, 54 insertions, 17 deletions
diff --git a/src/pattern/pattern.rs b/src/pattern/pattern.rs
index 157c0ac..5624193 100644
--- a/src/pattern/pattern.rs
+++ b/src/pattern/pattern.rs
@@ -54,7 +54,7 @@ impl Pattern {
RegexPattern::from(core, flags.unwrap_or(""))?
),
SearchMode::NameTokens => Self::NameTokens(
- TokPattern::new(core, ',')
+ TokPattern::new(core)
),
SearchMode::PathExact => Self::PathExact(
ExactPattern::from(core)
@@ -66,7 +66,7 @@ impl Pattern {
RegexPattern::from(core, flags.unwrap_or(""))?
),
SearchMode::PathTokens => Self::PathTokens(
- TokPattern::new(core, ',')
+ TokPattern::new(core)
),
SearchMode::ContentExact => Self::ContentExact(
ContentExactPattern::from(core)
diff --git a/src/pattern/tok_pattern.rs b/src/pattern/tok_pattern.rs
index fc98d68..eb23fd6 100644
--- a/src/pattern/tok_pattern.rs
+++ b/src/pattern/tok_pattern.rs
@@ -10,9 +10,22 @@ use {
type CandChars = SmallVec<[char; 32]>;
+static SEPARATORS: &[char] = &[',', ';'];
+
+// weights used in match score computing
+const BONUS_MATCH: i32 = 50_000;
+const BONUS_CANDIDATE_LENGTH: i32 = -1; // per char
+
+pub fn norm_chars(s: &str) -> Box<[char]> {
+ secular::normalized_lower_lay_string(s)
+ .chars()
+ .collect::<Vec<char>>()
+ .into_boxed_slice()
+}
+
/// a list of tokens we want to find, non overlapping
/// and in any order, in strings
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, PartialEq)]
pub struct TokPattern {
toks: Vec<Box<[char]>>,
sum_len: usize,
@@ -35,16 +48,27 @@ pub struct TokPattern {
// - bonus for order ?
impl TokPattern {
- pub fn new(pattern: &str, sep: char) -> Self {
- let mut toks: Vec<Box<[char]>> = pattern.split(sep)
- .filter(|s| s.len() > 0)
- .map(|s| {
- secular::normalized_lower_lay_string(s)
- .chars()
- .collect::<Vec<char>>()
- .into_boxed_slice()
- })
- .collect();
+ pub fn new(pattern: &str) -> Self {
+ // we accept several separators. The first one
+ // we encounter among the possible ones is the
+ // separator of the whole. This allows using the
+ // other char: In ";ab,er", the comma isn't seen
+ // as a separator but as part of a tok
+ let sep = pattern.chars()
+ .filter(|c| SEPARATORS.contains(c))
+ .next();
+ let mut toks: Vec<Box<[char]>> = if let Some(sep) = sep {
+ pattern.split(sep)
+ .filter(|s| s.len() > 0)
+ .map(norm_chars)
+ .collect()
+ } else {
+ if pattern.is_empty() {
+ Vec::new()
+ } else {
+ vec![norm_chars(pattern)]
+ }
+ };
// we sort the tokens from biggest to smallest
// because the current algorithm stops at the
// first match for any tok. Thus it would fail
@@ -97,7 +121,7 @@ impl TokPattern {
}
}
pos.sort();
- let score = 42; // maybe find a better scoring
+ let score = BONUS_MATCH + BONUS_CANDIDATE_LENGTH * candidate.len() as i32;
Some(NameMatch { score, pos })
}
@@ -118,7 +142,7 @@ mod tok_pattern_tests {
/// check position of the match of the pattern in name
fn check_pos(pattern: &str, name: &str, pos: &str) {
- let pat = TokPattern::new(pattern, ',');
+ let pat = TokPattern::new(pattern);
let match_pos = pat.find(name).unwrap().pos;
let target_pos: Pos = pos.chars()
.enumerate()
@@ -141,7 +165,7 @@ mod tok_pattern_tests {
" ^^^",
);
check_pos(
- "ba",
+ ";ba",
"babababaaa",
"^^ ",
);
@@ -164,12 +188,25 @@ mod tok_pattern_tests {
fn check_match(pattern: &str, name: &str, do_match: bool) {
assert_eq!(
- TokPattern::new(pattern, ',').find(name).is_some(),
+ TokPattern::new(pattern).find(name).is_some(),
do_match,
);
}
#[test]
+ fn test_separators() {
+ let a = TokPattern::new("ab;cd;ef");
+ let b = TokPattern::new("ab,cd,ef");
+ assert_eq!(a, b);
+ let a = TokPattern::new(",ab;cd;ef");
+ assert_eq!(a.toks.len(), 1);
+ assert_eq!(a.toks[0].len(), 8);
+ let a = TokPattern::new(";ab,cd,ef;");
+ assert_eq!(a.toks.len(), 1);
+ assert_eq!(a.toks[0].len(), 8);
+ }
+
+ #[test]
fn test_match() {
check_match("mia", "android/phonegap", false);
check_match("mi", "a", false);