diff options
author | Dan Davison <dandavison7@gmail.com> | 2019-08-07 08:47:33 -0700 |
---|---|---|
committer | Dan Davison <dandavison7@gmail.com> | 2019-08-07 09:20:27 -0700 |
commit | 64001cfbd1543fe0bc8de45e7670ddc46c7d8385 (patch) | |
tree | b7655e6dc127f7b2ec3490c3491b5f6ce654cdc5 | |
parent | 402eace86e579e9666eb879f3637f0c5d09d6c05 (diff) |
Move run-length encoding
-rw-r--r-- | src/align.rs | 43 | ||||
-rw-r--r-- | src/edits.rs | 41 |
2 files changed, 44 insertions, 40 deletions
diff --git a/src/align.rs b/src/align.rs index aa1f189f..9d4aaf97 100644 --- a/src/align.rs +++ b/src/align.rs @@ -115,6 +115,10 @@ impl<'a> Alignment<'a> { Vec::from(ops) } + pub fn coalesced_operations(&self) -> Vec<(Operation, usize)> { + run_length_encode(self.operations()) + } + /// Compute custom distance metric from the filled table. The distance metric is /// /// (total length of edits) / (total length of longer string) @@ -184,6 +188,34 @@ impl<'a> Alignment<'a> { } } +fn run_length_encode<T>(sequence: Vec<T>) -> Vec<(T, usize)> +where + T: Copy, + T: PartialEq, +{ + let mut encoded = Vec::with_capacity(sequence.len()); + + if sequence.len() == 0 { + return encoded; + } + + let end = sequence.len(); + let (mut i, mut j) = (0, 1); + let mut curr = &sequence[i]; + loop { + if j == end || sequence[j] != *curr { + encoded.push((*curr, j - i)); + if j == end { + return encoded; + } else { + curr = &sequence[j]; + i = j; + } + } + j += 1; + } +} + #[cfg(test)] mod tests { use super::*; @@ -191,6 +223,17 @@ mod tests { use unicode_segmentation::UnicodeSegmentation; #[test] + fn test_run_length_encode() { + assert_eq!(run_length_encode::<usize>(vec![]), vec![]); + assert_eq!(run_length_encode(vec![0]), vec![(0, 1)]); + assert_eq!(run_length_encode(vec!["0", "0"]), vec![("0", 2)]); + assert_eq!( + run_length_encode(vec![0, 0, 1, 2, 2, 2, 3, 4, 4, 4]), + vec![(0, 2), (1, 1), (2, 3), (3, 1), (4, 3)] + ); + } + + #[test] fn test_0() { let (before, after) = ("aaa", "aba"); assert_string_distance_parts(before, after, (1, 3)); diff --git a/src/edits.rs b/src/edits.rs index e315f20a..d028b45f 100644 --- a/src/edits.rs +++ b/src/edits.rs @@ -144,7 +144,7 @@ where ) }; - for (op, n) in run_length_encode(alignment.operations()) { + for (op, n) in alignment.coalesced_operations() { match op { align::Operation::Deletion => { annotated_minus_line.push((deletion, minus_section(n))); @@ -165,34 +165,6 @@ where (annotated_minus_line, annotated_plus_line) } -fn run_length_encode<T>(sequence: Vec<T>) -> Vec<(T, usize)> -where - T: Copy, - T: PartialEq, -{ - let mut encoded = Vec::with_capacity(sequence.len()); - - if sequence.len() == 0 { - return encoded; - } - - let end = sequence.len(); - let (mut i, mut j) = (0, 1); - let mut curr = &sequence[i]; - loop { - if j == end || sequence[j] != *curr { - encoded.push((*curr, j - i)); - if j == end { - return encoded; - } else { - curr = &sequence[j]; - i = j; - } - } - j += 1; - } -} - #[cfg(test)] mod tests { use super::*; @@ -274,17 +246,6 @@ mod tests { } #[test] - fn test_run_length_encode() { - assert_eq!(run_length_encode::<usize>(vec![]), vec![]); - assert_eq!(run_length_encode(vec![0]), vec![(0, 1)]); - assert_eq!(run_length_encode(vec!["0", "0"]), vec![("0", 2)]); - assert_eq!( - run_length_encode(vec![0, 0, 1, 2, 2, 2, 3, 4, 4, 4]), - vec![(0, 2), (1, 1), (2, 3), (3, 1), (4, 3)] - ); - } - - #[test] fn test_infer_edits_1() { assert_paired_edits( vec!["aaa"], |