summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Davison <dandavison7@gmail.com>2019-08-07 08:47:33 -0700
committerDan Davison <dandavison7@gmail.com>2019-08-07 09:20:27 -0700
commit64001cfbd1543fe0bc8de45e7670ddc46c7d8385 (patch)
treeb7655e6dc127f7b2ec3490c3491b5f6ce654cdc5
parent402eace86e579e9666eb879f3637f0c5d09d6c05 (diff)
Move run-length encoding
-rw-r--r--src/align.rs43
-rw-r--r--src/edits.rs41
2 files changed, 44 insertions, 40 deletions
diff --git a/src/align.rs b/src/align.rs
index aa1f189f..9d4aaf97 100644
--- a/src/align.rs
+++ b/src/align.rs
@@ -115,6 +115,10 @@ impl<'a> Alignment<'a> {
Vec::from(ops)
}
+ pub fn coalesced_operations(&self) -> Vec<(Operation, usize)> {
+ run_length_encode(self.operations())
+ }
+
/// Compute custom distance metric from the filled table. The distance metric is
///
/// (total length of edits) / (total length of longer string)
@@ -184,6 +188,34 @@ impl<'a> Alignment<'a> {
}
}
+fn run_length_encode<T>(sequence: Vec<T>) -> Vec<(T, usize)>
+where
+ T: Copy,
+ T: PartialEq,
+{
+ let mut encoded = Vec::with_capacity(sequence.len());
+
+ if sequence.len() == 0 {
+ return encoded;
+ }
+
+ let end = sequence.len();
+ let (mut i, mut j) = (0, 1);
+ let mut curr = &sequence[i];
+ loop {
+ if j == end || sequence[j] != *curr {
+ encoded.push((*curr, j - i));
+ if j == end {
+ return encoded;
+ } else {
+ curr = &sequence[j];
+ i = j;
+ }
+ }
+ j += 1;
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;
@@ -191,6 +223,17 @@ mod tests {
use unicode_segmentation::UnicodeSegmentation;
#[test]
+ fn test_run_length_encode() {
+ assert_eq!(run_length_encode::<usize>(vec![]), vec![]);
+ assert_eq!(run_length_encode(vec![0]), vec![(0, 1)]);
+ assert_eq!(run_length_encode(vec!["0", "0"]), vec![("0", 2)]);
+ assert_eq!(
+ run_length_encode(vec![0, 0, 1, 2, 2, 2, 3, 4, 4, 4]),
+ vec![(0, 2), (1, 1), (2, 3), (3, 1), (4, 3)]
+ );
+ }
+
+ #[test]
fn test_0() {
let (before, after) = ("aaa", "aba");
assert_string_distance_parts(before, after, (1, 3));
diff --git a/src/edits.rs b/src/edits.rs
index e315f20a..d028b45f 100644
--- a/src/edits.rs
+++ b/src/edits.rs
@@ -144,7 +144,7 @@ where
)
};
- for (op, n) in run_length_encode(alignment.operations()) {
+ for (op, n) in alignment.coalesced_operations() {
match op {
align::Operation::Deletion => {
annotated_minus_line.push((deletion, minus_section(n)));
@@ -165,34 +165,6 @@ where
(annotated_minus_line, annotated_plus_line)
}
-fn run_length_encode<T>(sequence: Vec<T>) -> Vec<(T, usize)>
-where
- T: Copy,
- T: PartialEq,
-{
- let mut encoded = Vec::with_capacity(sequence.len());
-
- if sequence.len() == 0 {
- return encoded;
- }
-
- let end = sequence.len();
- let (mut i, mut j) = (0, 1);
- let mut curr = &sequence[i];
- loop {
- if j == end || sequence[j] != *curr {
- encoded.push((*curr, j - i));
- if j == end {
- return encoded;
- } else {
- curr = &sequence[j];
- i = j;
- }
- }
- j += 1;
- }
-}
-
#[cfg(test)]
mod tests {
use super::*;
@@ -274,17 +246,6 @@ mod tests {
}
#[test]
- fn test_run_length_encode() {
- assert_eq!(run_length_encode::<usize>(vec![]), vec![]);
- assert_eq!(run_length_encode(vec![0]), vec![(0, 1)]);
- assert_eq!(run_length_encode(vec!["0", "0"]), vec![("0", 2)]);
- assert_eq!(
- run_length_encode(vec![0, 0, 1, 2, 2, 2, 3, 4, 4, 4]),
- vec![(0, 2), (1, 1), (2, 3), (3, 1), (4, 3)]
- );
- }
-
- #[test]
fn test_infer_edits_1() {
assert_paired_edits(
vec!["aaa"],