diff options
author | Dan Davison <dandavison7@gmail.com> | 2019-07-28 13:31:10 -0400 |
---|---|---|
committer | Dan Davison <dandavison7@gmail.com> | 2019-07-28 21:49:57 -0400 |
commit | 0be75d3746915bd559b2e4185d090d4f11281470 (patch) | |
tree | 8787c4106bc46969da4b56e866aa1bc171327efb | |
parent | a0b7c42599eebbb4250962fd3819c55b320cb4c2 (diff) |
Revert interleavings algorithm
-rw-r--r-- | src/edits.rs | 117 | ||||
-rw-r--r-- | src/interleavings.rs | 88 | ||||
-rw-r--r-- | src/main.rs | 1 |
3 files changed, 0 insertions, 206 deletions
diff --git a/src/edits.rs b/src/edits.rs index a0dd4626..2eb1e0d3 100644 --- a/src/edits.rs +++ b/src/edits.rs @@ -1,5 +1,4 @@ use crate::edits::line_pair::LinePair; -use crate::interleavings; /// Infer the edit operations responsible for the differences between a collection of old and new /// lines. @@ -46,125 +45,9 @@ where let mut minus_line_sections = Vec::new(); let mut plus_line_sections = Vec::new(); - for assignment in LineAssignments::infer(minus_lines, plus_lines).assignments { - match assignment { - LineAssignment::LinePair(LinePair { - minus_line, - plus_line, - minus_edit, - plus_edit, - distance, - }) => { - if distance < distance_threshold { - minus_line_sections.push(vec![ - (non_deletion, &minus_line[0..minus_edit.start]), - (deletion, &minus_line[minus_edit.start..minus_edit.end]), - (non_deletion, &minus_line[minus_edit.end..]), - ]); - plus_line_sections.push(vec![ - (non_insertion, &plus_line[0..plus_edit.start]), - (insertion, &plus_line[plus_edit.start..plus_edit.end]), - (non_insertion, &plus_line[plus_edit.end..]), - ]); - } else { - minus_line_sections.push(vec![(non_deletion, minus_line)]); - plus_line_sections.push(vec![(non_insertion, plus_line)]); - } - } - LineAssignment::Line(Minus(line)) => { - minus_line_sections.push(vec![(non_deletion, line)]) - } - LineAssignment::Line(Plus(line)) => { - plus_line_sections.push(vec![(non_insertion, line)]) - } - } - } (minus_line_sections, plus_line_sections) } -pub enum PolarizedLine<'a> { - Minus(&'a str), - Plus(&'a str), -} - -use PolarizedLine::*; - -enum LineAssignment<'a> { - LinePair(LinePair<'a>), - Line(PolarizedLine<'a>), -} - -struct LineAssignments<'a> { - assignments: Vec<LineAssignment<'a>>, -} - -impl<'a> LineAssignments<'a> { - fn infer(minus_lines: &'a Vec<String>, plus_lines: &'a Vec<String>) -> Self { - interleavings::interleavings( - &minus_lines - .iter() - .map(|line| Minus(line)) - .collect::<Vec<PolarizedLine>>(), - &plus_lines - .iter() - .map(|line| Plus(line)) - .collect::<Vec<PolarizedLine>>(), - ) - .iter() - .map(LineAssignments::from_interleaving) - .min_by(|las_1, las_2| las_1.distance().partial_cmp(&las_2.distance()).unwrap()) - .unwrap() - } - - // In the interleaving, a Minus followed by a Plus is a pair of lines to be scored for homology - // and thus possibly painted according to their inferred edits. All other lines in the - // interleaving should be emitted as normal. - // For example, suppose the interleaving is - // m p m m p - // This should be emitted as - // [(m,p), m, (m,p)] - // To do so we iterate over windows of size two: - // [mp, pm, mm, mp] - // 2, -, 1, 2 - // Where the annotations mean - // 2: emit a LinePair containing this window of 2 lines - // 1: emit the first line of the window, alone - // -: skip this window of lines - pub fn from_interleaving(interleaving: &Vec<&PolarizedLine<'a>>) -> Self { - let mut assignments = Vec::with_capacity(interleaving.len()); - let windows = interleaving.windows(2); - let mut skip = false; - for window in windows { - if skip { - skip = false; - continue; - } - match window { - [Minus(minus), Plus(plus)] => { - assignments.push(LineAssignment::LinePair(LinePair::new(minus, plus))); - // We've consumed both lines in this window, so we've consumed the first line of - // the next window, so we must skip it. - skip = true; - } - [Minus(minus), _] => assignments.push(LineAssignment::Line(Minus(minus))), - [Plus(plus), _] => assignments.push(LineAssignment::Line(Plus(plus))), - _ => panic!("Impossible"), - } - } - Self { assignments } - } - - pub fn distance(&self) -> f64 { - self.assignments - .iter() - .map(|la| match la { - LineAssignment::LinePair(lp) => lp.distance, - _ => 1f64, - }) - .sum() - } -} - mod line_pair { use std::cmp::{max, min}; diff --git a/src/interleavings.rs b/src/interleavings.rs deleted file mode 100644 index 25dcc871..00000000 --- a/src/interleavings.rs +++ /dev/null @@ -1,88 +0,0 @@ -/// Return the set of all interleavings of two sequences. -pub fn interleavings<'a, T>(s1: &'a [T], s2: &'a [T]) -> Vec<Vec<&'a T>> { - let mut _interleavings = Vec::with_capacity(number_of_interleavings(s1, s2)); - if s1.len() > 0 || s2.len() > 0 { - if s1.len() == 0 { - _interleavings.push(s2.iter().collect()); - } else if s2.len() == 0 { - _interleavings.push(s1.iter().collect()); - } else { - extend_interleavings(s1, s2, &mut _interleavings); - extend_interleavings(s2, s1, &mut _interleavings); - } - } - _interleavings -} - -fn extend_interleavings<'a, T>(s1: &'a [T], s2: &'a [T], _interleavings: &mut Vec<Vec<&'a T>>) { - let (first, rest) = s1.split_first().unwrap(); - for child_interleaving in interleavings(rest, s2) { - let mut interleaving = Vec::new(); - interleaving.push(first); - interleaving.extend(child_interleaving); - _interleavings.push(interleaving); - } -} - -/// The number of possible interleavings of two sequences. -// Let s1.len() == n1 and s2.len() == n2. Then, since the subsequences -// retain their order, the number of possible interleavings is equal -// to the number of ways of choosing n1 slots from (n1 + n2) slots. -fn number_of_interleavings<T>(s1: &[T], s2: &[T]) -> usize { - number_of_subsets(s1.len() + s2.len(), s1.len()) -} - -/// The number of subsets of size k that can be formed from a set of size n, i.e. n choose k. -fn number_of_subsets(n: usize, k: usize) -> usize { - number_of_tuples(n, k) / number_of_tuples(k, k) -} - -/// The number of k-tuples that can be formed from a set of size n, i.e. the falling factorial. -/// prod_{i=0}^{k-1} (n - i) -fn number_of_tuples(n: usize, k: usize) -> usize { - (0..k).map(|i| n - i).product() -} - -mod tests { - use super::*; - - #[test] - fn test_number_of_tuples() { - assert_eq!(number_of_tuples(5, 3), 60); - } - - #[test] - fn test_number_of_subsets() { - assert_eq!(number_of_subsets(5, 3), 10); - } - - #[test] - fn test_number_of_interleavings() { - let s1 = vec![1, 2, 3, 4, 5, 6]; - let s2 = vec![7, 8, 9, 10, 11]; - assert_eq!(number_of_interleavings(&s1, &s2), number_of_subsets(11, 6)); - assert_eq!(interleavings(&s1, &s2).len(), number_of_subsets(11, 6)); - } - - #[test] - fn test_interleavings_0() { - assert_eq!( - interleavings(&Vec::<u8>::new(), &Vec::<u8>::new()), - Vec::<Vec::<&u8>>::new() - ); - } - - #[test] - fn test_interleavings_1() { - assert_eq!(interleavings(&vec![1], &Vec::<u8>::new()), vec![vec![&1]]); - } - - #[test] - fn test_interleavings_2() { - assert_eq!( - interleavings(&vec![1], &vec![2, 3]), - vec![vec![&1, &2, &3], vec![&2, &3, &1], vec![&2, &1, &3]] - ); - } - -} diff --git a/src/main.rs b/src/main.rs index b7838633..cb13b118 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,7 +7,6 @@ mod config; mod delta; mod draw; mod edits; -mod interleavings; mod paint; mod parse; mod style; |