summaryrefslogtreecommitdiffstats
path: root/src/align.rs
diff options
context:
space:
mode:
authorDan Davison <dandavison7@gmail.com>2019-08-07 08:47:33 -0700
committerDan Davison <dandavison7@gmail.com>2019-08-07 09:20:27 -0700
commit64001cfbd1543fe0bc8de45e7670ddc46c7d8385 (patch)
treeb7655e6dc127f7b2ec3490c3491b5f6ce654cdc5 /src/align.rs
parent402eace86e579e9666eb879f3637f0c5d09d6c05 (diff)
Move run-length encoding
Diffstat (limited to 'src/align.rs')
-rw-r--r--src/align.rs43
1 files changed, 43 insertions, 0 deletions
diff --git a/src/align.rs b/src/align.rs
index aa1f189f..9d4aaf97 100644
--- a/src/align.rs
+++ b/src/align.rs
@@ -115,6 +115,10 @@ impl<'a> Alignment<'a> {
Vec::from(ops)
}
+ pub fn coalesced_operations(&self) -> Vec<(Operation, usize)> {
+ run_length_encode(self.operations())
+ }
+
/// Compute custom distance metric from the filled table. The distance metric is
///
/// (total length of edits) / (total length of longer string)
@@ -184,6 +188,34 @@ impl<'a> Alignment<'a> {
}
}
+fn run_length_encode<T>(sequence: Vec<T>) -> Vec<(T, usize)>
+where
+ T: Copy,
+ T: PartialEq,
+{
+ let mut encoded = Vec::with_capacity(sequence.len());
+
+ if sequence.len() == 0 {
+ return encoded;
+ }
+
+ let end = sequence.len();
+ let (mut i, mut j) = (0, 1);
+ let mut curr = &sequence[i];
+ loop {
+ if j == end || sequence[j] != *curr {
+ encoded.push((*curr, j - i));
+ if j == end {
+ return encoded;
+ } else {
+ curr = &sequence[j];
+ i = j;
+ }
+ }
+ j += 1;
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;
@@ -191,6 +223,17 @@ mod tests {
use unicode_segmentation::UnicodeSegmentation;
#[test]
+ fn test_run_length_encode() {
+ assert_eq!(run_length_encode::<usize>(vec![]), vec![]);
+ assert_eq!(run_length_encode(vec![0]), vec![(0, 1)]);
+ assert_eq!(run_length_encode(vec!["0", "0"]), vec![("0", 2)]);
+ assert_eq!(
+ run_length_encode(vec![0, 0, 1, 2, 2, 2, 3, 4, 4, 4]),
+ vec![(0, 2), (1, 1), (2, 3), (3, 1), (4, 3)]
+ );
+ }
+
+ #[test]
fn test_0() {
let (before, after) = ("aaa", "aba");
assert_string_distance_parts(before, after, (1, 3));