summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Davison <dandavison7@gmail.com>2019-07-28 13:31:10 -0400
committerDan Davison <dandavison7@gmail.com>2019-07-28 21:49:57 -0400
commit0be75d3746915bd559b2e4185d090d4f11281470 (patch)
tree8787c4106bc46969da4b56e866aa1bc171327efb
parenta0b7c42599eebbb4250962fd3819c55b320cb4c2 (diff)
Revert interleavings algorithm
-rw-r--r--src/edits.rs117
-rw-r--r--src/interleavings.rs88
-rw-r--r--src/main.rs1
3 files changed, 0 insertions, 206 deletions
diff --git a/src/edits.rs b/src/edits.rs
index a0dd4626..2eb1e0d3 100644
--- a/src/edits.rs
+++ b/src/edits.rs
@@ -1,5 +1,4 @@
use crate::edits::line_pair::LinePair;
-use crate::interleavings;
/// Infer the edit operations responsible for the differences between a collection of old and new
/// lines.
@@ -46,125 +45,9 @@ where
let mut minus_line_sections = Vec::new();
let mut plus_line_sections = Vec::new();
- for assignment in LineAssignments::infer(minus_lines, plus_lines).assignments {
- match assignment {
- LineAssignment::LinePair(LinePair {
- minus_line,
- plus_line,
- minus_edit,
- plus_edit,
- distance,
- }) => {
- if distance < distance_threshold {
- minus_line_sections.push(vec![
- (non_deletion, &minus_line[0..minus_edit.start]),
- (deletion, &minus_line[minus_edit.start..minus_edit.end]),
- (non_deletion, &minus_line[minus_edit.end..]),
- ]);
- plus_line_sections.push(vec![
- (non_insertion, &plus_line[0..plus_edit.start]),
- (insertion, &plus_line[plus_edit.start..plus_edit.end]),
- (non_insertion, &plus_line[plus_edit.end..]),
- ]);
- } else {
- minus_line_sections.push(vec![(non_deletion, minus_line)]);
- plus_line_sections.push(vec![(non_insertion, plus_line)]);
- }
- }
- LineAssignment::Line(Minus(line)) => {
- minus_line_sections.push(vec![(non_deletion, line)])
- }
- LineAssignment::Line(Plus(line)) => {
- plus_line_sections.push(vec![(non_insertion, line)])
- }
- }
- }
(minus_line_sections, plus_line_sections)
}
-pub enum PolarizedLine<'a> {
- Minus(&'a str),
- Plus(&'a str),
-}
-
-use PolarizedLine::*;
-
-enum LineAssignment<'a> {
- LinePair(LinePair<'a>),
- Line(PolarizedLine<'a>),
-}
-
-struct LineAssignments<'a> {
- assignments: Vec<LineAssignment<'a>>,
-}
-
-impl<'a> LineAssignments<'a> {
- fn infer(minus_lines: &'a Vec<String>, plus_lines: &'a Vec<String>) -> Self {
- interleavings::interleavings(
- &minus_lines
- .iter()
- .map(|line| Minus(line))
- .collect::<Vec<PolarizedLine>>(),
- &plus_lines
- .iter()
- .map(|line| Plus(line))
- .collect::<Vec<PolarizedLine>>(),
- )
- .iter()
- .map(LineAssignments::from_interleaving)
- .min_by(|las_1, las_2| las_1.distance().partial_cmp(&las_2.distance()).unwrap())
- .unwrap()
- }
-
- // In the interleaving, a Minus followed by a Plus is a pair of lines to be scored for homology
- // and thus possibly painted according to their inferred edits. All other lines in the
- // interleaving should be emitted as normal.
- // For example, suppose the interleaving is
- // m p m m p
- // This should be emitted as
- // [(m,p), m, (m,p)]
- // To do so we iterate over windows of size two:
- // [mp, pm, mm, mp]
- // 2, -, 1, 2
- // Where the annotations mean
- // 2: emit a LinePair containing this window of 2 lines
- // 1: emit the first line of the window, alone
- // -: skip this window of lines
- pub fn from_interleaving(interleaving: &Vec<&PolarizedLine<'a>>) -> Self {
- let mut assignments = Vec::with_capacity(interleaving.len());
- let windows = interleaving.windows(2);
- let mut skip = false;
- for window in windows {
- if skip {
- skip = false;
- continue;
- }
- match window {
- [Minus(minus), Plus(plus)] => {
- assignments.push(LineAssignment::LinePair(LinePair::new(minus, plus)));
- // We've consumed both lines in this window, so we've consumed the first line of
- // the next window, so we must skip it.
- skip = true;
- }
- [Minus(minus), _] => assignments.push(LineAssignment::Line(Minus(minus))),
- [Plus(plus), _] => assignments.push(LineAssignment::Line(Plus(plus))),
- _ => panic!("Impossible"),
- }
- }
- Self { assignments }
- }
-
- pub fn distance(&self) -> f64 {
- self.assignments
- .iter()
- .map(|la| match la {
- LineAssignment::LinePair(lp) => lp.distance,
- _ => 1f64,
- })
- .sum()
- }
-}
-
mod line_pair {
use std::cmp::{max, min};
diff --git a/src/interleavings.rs b/src/interleavings.rs
deleted file mode 100644
index 25dcc871..00000000
--- a/src/interleavings.rs
+++ /dev/null
@@ -1,88 +0,0 @@
-/// Return the set of all interleavings of two sequences.
-pub fn interleavings<'a, T>(s1: &'a [T], s2: &'a [T]) -> Vec<Vec<&'a T>> {
- let mut _interleavings = Vec::with_capacity(number_of_interleavings(s1, s2));
- if s1.len() > 0 || s2.len() > 0 {
- if s1.len() == 0 {
- _interleavings.push(s2.iter().collect());
- } else if s2.len() == 0 {
- _interleavings.push(s1.iter().collect());
- } else {
- extend_interleavings(s1, s2, &mut _interleavings);
- extend_interleavings(s2, s1, &mut _interleavings);
- }
- }
- _interleavings
-}
-
-fn extend_interleavings<'a, T>(s1: &'a [T], s2: &'a [T], _interleavings: &mut Vec<Vec<&'a T>>) {
- let (first, rest) = s1.split_first().unwrap();
- for child_interleaving in interleavings(rest, s2) {
- let mut interleaving = Vec::new();
- interleaving.push(first);
- interleaving.extend(child_interleaving);
- _interleavings.push(interleaving);
- }
-}
-
-/// The number of possible interleavings of two sequences.
-// Let s1.len() == n1 and s2.len() == n2. Then, since the subsequences
-// retain their order, the number of possible interleavings is equal
-// to the number of ways of choosing n1 slots from (n1 + n2) slots.
-fn number_of_interleavings<T>(s1: &[T], s2: &[T]) -> usize {
- number_of_subsets(s1.len() + s2.len(), s1.len())
-}
-
-/// The number of subsets of size k that can be formed from a set of size n, i.e. n choose k.
-fn number_of_subsets(n: usize, k: usize) -> usize {
- number_of_tuples(n, k) / number_of_tuples(k, k)
-}
-
-/// The number of k-tuples that can be formed from a set of size n, i.e. the falling factorial.
-/// prod_{i=0}^{k-1} (n - i)
-fn number_of_tuples(n: usize, k: usize) -> usize {
- (0..k).map(|i| n - i).product()
-}
-
-mod tests {
- use super::*;
-
- #[test]
- fn test_number_of_tuples() {
- assert_eq!(number_of_tuples(5, 3), 60);
- }
-
- #[test]
- fn test_number_of_subsets() {
- assert_eq!(number_of_subsets(5, 3), 10);
- }
-
- #[test]
- fn test_number_of_interleavings() {
- let s1 = vec![1, 2, 3, 4, 5, 6];
- let s2 = vec![7, 8, 9, 10, 11];
- assert_eq!(number_of_interleavings(&s1, &s2), number_of_subsets(11, 6));
- assert_eq!(interleavings(&s1, &s2).len(), number_of_subsets(11, 6));
- }
-
- #[test]
- fn test_interleavings_0() {
- assert_eq!(
- interleavings(&Vec::<u8>::new(), &Vec::<u8>::new()),
- Vec::<Vec::<&u8>>::new()
- );
- }
-
- #[test]
- fn test_interleavings_1() {
- assert_eq!(interleavings(&vec![1], &Vec::<u8>::new()), vec![vec![&1]]);
- }
-
- #[test]
- fn test_interleavings_2() {
- assert_eq!(
- interleavings(&vec![1], &vec![2, 3]),
- vec![vec![&1, &2, &3], vec![&2, &3, &1], vec![&2, &1, &3]]
- );
- }
-
-}
diff --git a/src/main.rs b/src/main.rs
index b7838633..cb13b118 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -7,7 +7,6 @@ mod config;
mod delta;
mod draw;
mod edits;
-mod interleavings;
mod paint;
mod parse;
mod style;