4 files changed, 353 insertions, 302 deletions
diff --git a/src/align.rs b/src/align.rs
new file mode 100644
index 00000000..9a3978cc
--- /dev/null
+++ b/src/align.rs
@@ -0,0 +1,219 @@
+use std::cmp::{max, min};
+
+/// Needleman-Wunsch / Wagner-Fischer table for computation of edit distance and associated
+/// alignment.
+pub struct Alignment<'a> {
+    // Inputs to be aligned are vecs derived from grapheme_indices().
+    pub xx: Vec<(usize, &'a str)>,
+    pub yy: Vec<(usize, &'a str)>,
+    table: Vec<usize>,
+    path: Vec<usize>,
+    dim: [usize; 2],
+}
+
+impl<'a> Alignment<'a> {
+    /// Fill table for Levenshtein distance / alignment computation
+    pub fn new(xx: Vec<(usize, &'a str)>, yy: Vec<(usize, &'a str)>) -> Self {
+        let dim = [yy.len() + 1, xx.len() + 1];
+        let table = vec![0; dim[0] * dim[1]];
+        let path = vec![0; dim[0] * dim[1]];
+        let mut alignment = Self {
+            xx,
+            yy,
+            table,
+            path,
+            dim,
+        };
+        alignment.fill();
+        alignment
+    }
+
+    // Row-major storage of 2D array.
+    fn index(&self, i: usize, j: usize) -> usize {
+        j * self.dim[1] + i
+    }
+
+    fn reverse_index(&self, n: usize) -> (usize, usize) {
+        (n % self.dim[1], n / self.dim[1])
+    }
+
+    fn cell(&self, i: usize, j: usize) -> usize {
+        self.table[self.index(i, j)]
+    }
+
+    #[allow(dead_code)]
+    fn print(&self) {
+        for i in 0..self.dim[0] {
+            for j in 0..self.dim[1] {
+                print!("{} ", self.cell(j, i));
+            }
+            println!("");
+        }
+    }
+
+    /// Fill table for Levenshtein distance / alignment computation
+    pub fn fill(&mut self) {
+        // xx is written along the top of the table; yy is written down the left side of the
+        // table. Also, we insert a 0 in cell (0, 0) of the table, so xx and yy are shifted by one
+        // position. Therefore, the element corresponding to (xx[i], yy[j]) is in column (i + 1) and row
+        // (j + 1); the index of this element is given by index(i, j).
+        for i in 1..self.dim[1] {
+            self.table[i] = i;
+        }
+        for j in 1..self.dim[0] {
+            self.table[j * self.dim[1]] = j;
+        }
+        let (xx, yy) = (&self.xx, &self.yy);
+        for (i, (_, x)) in (1..=xx.len()).zip(xx) {
+            for (j, (_, y)) in (1..=yy.len()).zip(yy) {
+                let substitution_cost = self.cell(i - 1, j - 1) + if x == y { 0 } else { 1 };
+                let deletion_cost = self.cell(i - 1, j) + 1;
+                let insertion_cost = self.cell(i, j - 1) + 1;
+                let index = self.index(i, j);
+                self.table[index] = min(substitution_cost, min(deletion_cost, insertion_cost));
+                if self.table[index] == substitution_cost {
+                    self.path[index] = self.index(i - 1, j - 1)
+                } else if self.table[index] == deletion_cost {
+                    self.path[index] = self.index(i - 1, j)
+                } else {
+                    self.path[index] = self.index(i, j - 1)
+                }
+            }
+        }
+    }
+
+    /// Read edit distance from the table.
+    pub fn edit_distance(&self) -> usize {
+        self.table[self.table.len() - 1]
+    }
+
+    pub fn normalized_edit_distance(&self) -> f64 {
+        self.edit_distance() as f64 / max(self.xx.len(), self.yy.len()) as f64
+    }
+
+    /// Read edit operations from the table.
+    pub fn edit_operations<EditOperation>(
+        &self,
+        noop: EditOperation,
+        substitution: EditOperation,
+        deletion: EditOperation,
+        insertion: EditOperation,
+        forwards: bool,
+    ) -> Vec<(EditOperation, (usize, &'a str))>
+    where
+        EditOperation: Copy,
+        EditOperation: PartialEq,
+    {
+        let mut ops = Vec::new();
+        let (mut i, mut j) = (self.xx.len(), self.yy.len());
+
+        while i > 0 && j > 0 {
+            let x = self.xx[i - 1];
+            let y = self.yy[j - 1];
+            let (_i, _j) = self.reverse_index(self.path[self.index(i, j)]);
+
+            let op = if (_i, _j) == (i - 1, j) {
+                deletion
+            } else if x.1 == y.1 && (_i, _j) == (i - 1, j - 1) {
+                noop
+            } else if x.1 != y.1 && (_i, _j) == (i - 1, j - 1) {
+                substitution
+            } else {
+                insertion
+            };
+            ops.push((op, if forwards { x } else { y }));
+            i = _i;
+            j = _j;
+        }
+        ops.into_iter().rev().collect()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use unicode_segmentation::UnicodeSegmentation;
+
+    #[derive(Copy, Clone, Debug, PartialEq)]
+    enum EditOperation {
+        NoOp,
+        Substitution,
+        Deletion,
+        Insertion,
+    }
+
+    use EditOperation::*;
+
+    #[test]
+    fn test_0() {
+        assert_eq!(levenshtein_distance("aaa", "aba"), 1);
+        assert_eq!(
+            edit_operations("aaa", "aba"),
+            vec![NoOp, Substitution, NoOp,]
+        );
+    }
+
+    #[test]
+    fn test_0_nonascii() {
+        assert_eq!(levenshtein_distance("áaa", "ááb"), 2);
+        assert_eq!(
+            edit_operations("áaa", "ááb"),
+            vec![NoOp, Substitution, Substitution,]
+        );
+    }
+
+    #[test]
+    fn test_1() {
+        assert_eq!(levenshtein_distance("kitten", "sitting"), 3);
+        assert_eq!(
+            edit_operations("kitten", "sitting"),
+            vec![
+                Substitution, // K S
+                NoOp,         // I I
+                NoOp,         // T T
+                NoOp,         // T T
+                Substitution, // E I
+                NoOp,         // N N
+                Insertion     // - G
+            ]
+        );
+    }
+
+    #[test]
+    fn test_2() {
+        assert_eq!(levenshtein_distance("saturday", "sunday"), 3);
+        assert_eq!(
+            edit_operations("saturday", "sunday"),
+            vec![
+                NoOp,         // S S
+                Deletion,     // A -
+                Deletion,     // T -
+                NoOp,         // U U
+                Substitution, // R N
+                NoOp,         // D D
+                NoOp,         // A A
+                NoOp          // Y Y
+            ]
+        );
+    }
+
+    fn levenshtein_distance(x: &str, y: &str) -> usize {
+        let (x, y) = (
+            x.grapheme_indices(true).collect::<Vec<(usize, &str)>>(),
+            y.grapheme_indices(true).collect::<Vec<(usize, &str)>>(),
+        );
+        Alignment::new(x, y).edit_distance()
+    }
+
+    fn edit_operations<'a>(x: &'a str, y: &'a str) -> Vec<EditOperation> {
+        let (x, y) = (
+            x.grapheme_indices(true).collect::<Vec<(usize, &str)>>(),
+            y.grapheme_indices(true).collect::<Vec<(usize, &str)>>(),
+        );
+        Alignment::new(x, y)
+            .edit_operations(NoOp, Substitution, Deletion, Insertion, true)
+            .iter()
+            .map(|(op, _)| *op)
+            .collect()
+    }
+}
diff --git a/src/edits.rs b/src/edits.rs
index 060c2851..95c7191e 100644
--- a/src/edits.rs
+++ b/src/edits.rs
@@ -1,31 +1,11 @@
-use crate::edits::line_pair::LinePair;
+use unicode_segmentation::UnicodeSegmentation;
 
-/*
-  Consider minus line m and paired plus line p, respectively.  The following cases exist:
-
-  1. Whitespace deleted at line beginning.
-     => The deleted section is highlighted in m; p is unstyled.
-
-  2. Whitespace inserted at line beginning.
-     => The inserted section is highlighted in p; m is unstyled.
-
-  3. An internal section of the line containing a non-whitespace character has been deleted.
-     => The deleted section is highlighted in m; p is unstyled.
-
-  4. An internal section of the line containing a non-whitespace character has been changed.
-     => The original section is highlighted in m; the replacement is highlighted in p.
-
-  5. An internal section of the line containing a non-whitespace character has been inserted.
-     => The inserted section is highlighted in p; m is unstyled.
-
-  Note that whitespace can be neither deleted nor inserted at the end of the line: the line by
-  definition has no trailing whitespace.
-*/
-
-type AnnotatedLine<'a, EditOperation> = Vec<(EditOperation, &'a str)>;
+use crate::align;
 
 /// Infer the edit operations responsible for the differences between a collection of old and new
-/// lines. Return the input minus and plus lines, in annotated form.
+/// lines. A "line" is a string. An annotated line is a Vec of (op, &str) pairs, where the &str
+/// slices are slices of the line, and their concatenation equals the line. Return the input minus
+/// and plus lines, in annotated form.
 pub fn infer_edits<'a, EditOperation>(
     minus_lines: &'a Vec<String>,
     plus_lines: &'a Vec<String>,
@@ -35,42 +15,58 @@ pub fn infer_edits<'a, EditOperation>(
     insertion: EditOperation,
     distance_threshold: f64,
 ) -> (
-    Vec<AnnotatedLine<'a, EditOperation>>,
-    Vec<AnnotatedLine<'a, EditOperation>>,
+    Vec<Vec<(EditOperation, &'a str)>>, // annotated minus lines
+    Vec<Vec<(EditOperation, &'a str)>>, // annotated plus lines
 )
 where
     EditOperation: Copy,
+    EditOperation: PartialEq,
 {
-    let mut annotated_minus_lines = Vec::<AnnotatedLine<EditOperation>>::new();
-    let mut annotated_plus_lines = Vec::<AnnotatedLine<EditOperation>>::new();
+    let mut annotated_minus_lines = Vec::<Vec<(EditOperation, &str)>>::new();
+    let mut annotated_plus_lines = Vec::<Vec<(EditOperation, &str)>>::new();
 
     let mut emitted = 0; // plus lines emitted so far
 
     'minus_lines_loop: for minus_line in minus_lines {
         let mut considered = 0; // plus lines considered so far as match for minus_line
+        let minus_line = minus_line.trim_end();
         for plus_line in &plus_lines[emitted..] {
-            let line_pair = LinePair::new(minus_line, plus_line);
-            if line_pair.distance < distance_threshold {
+            let plus_line = plus_line.trim_end();
+            let alignment = align::Alignment::new(
+                minus_line
+                    .grapheme_indices(true)
+                    .collect::<Vec<(usize, &str)>>(),
+                plus_line
+                    .grapheme_indices(true)
+                    .collect::<Vec<(usize, &str)>>(),
+            );
+
+            if alignment.normalized_edit_distance() < distance_threshold {
                 // minus_line and plus_line are inferred to be a homologous pair.
 
                 // Emit as unpaired the plus lines already considered and rejected
                 for plus_line in &plus_lines[emitted..(emitted + considered)] {
-                    annotated_plus_lines.push(vec![(non_insertion, plus_line)]);
+                    annotated_plus_lines.push(vec![(non_insertion, plus_line.trim_end())]);
                 }
                 emitted += considered;
 
                 // Emit the homologous pair.
-                let (minus_edit, plus_edit) = (line_pair.minus_edit, line_pair.plus_edit);
-                annotated_minus_lines.push(vec![
-                    (non_deletion, &minus_line[0..minus_edit.start]),
-                    (deletion, &minus_line[minus_edit.start..minus_edit.end]),
-                    (non_deletion, &minus_line[minus_edit.end..]),
-                ]);
-                annotated_plus_lines.push(vec![
-                    (non_insertion, &plus_line[0..plus_edit.start]),
-                    (insertion, &plus_line[plus_edit.start..plus_edit.end]),
-                    (non_insertion, &plus_line[plus_edit.end..]),
-                ]);
+                annotated_minus_lines.push(coalesce_minus_edits(
+                    &alignment,
+                    minus_line,
+                    non_deletion,
+                    deletion,
+                    non_insertion,
+                    insertion,
+                ));
+                annotated_plus_lines.push(coalesce_plus_edits(
+                    &alignment,
+                    plus_line,
+                    non_deletion,
+                    deletion,
+                    non_insertion,
+                    insertion,
+                ));
                 emitted += 1;
 
                 // Move on to the next minus line.
@@ -80,260 +76,85 @@ where
             }
         }
         // No homolog was found for minus i; emit as unpaired.
-        annotated_minus_lines.push(vec![(non_deletion, minus_line)]);
+        annotated_minus_lines.push(vec![(non_deletion, minus_line.trim_end())]);
     }
     // Emit any remaining plus lines
     for plus_line in &plus_lines[emitted..] {
-        annotated_plus_lines.push(vec![(non_insertion, plus_line)]);
+        annotated_plus_lines.push(vec![(non_insertion, plus_line.trim_end())]);
     }
 
     (annotated_minus_lines, annotated_plus_lines)
 }
 
-mod line_pair {
-    use std::cmp::{max, min};
-
-    use itertools::{Itertools, PeekingNext};
-    use unicode_segmentation::UnicodeSegmentation;
+pub fn coalesce_minus_edits<'a, EditOperation>(
+    alignment: &align::Alignment<'a>,
+    line: &'a str,
+    non_deletion: EditOperation,
+    deletion: EditOperation,
+    _non_insertion: EditOperation,
+    insertion: EditOperation,
+) -> Vec<(EditOperation, &'a str)>
+where
+    EditOperation: Copy,
+    EditOperation: PartialEq,
+{
+    coalesce_edits(
+        alignment.edit_operations(non_deletion, deletion, deletion, insertion, true),
+        line,
+        insertion,
+    )
+}
 
-    /// A pair of right-trimmed strings.
-    pub struct LinePair<'a> {
-        pub minus_line: &'a str,
-        pub plus_line: &'a str,
-        pub minus_edit: Edit,
-        pub plus_edit: Edit,
-        pub distance: f64,
-    }
+pub fn coalesce_plus_edits<'a, EditOperation>(
+    alignment: &align::Alignment<'a>,
+    line: &'a str,
+    _non_deletion: EditOperation,
+    deletion: EditOperation,
+    non_insertion: EditOperation,
+    insertion: EditOperation,
+) -> Vec<(EditOperation, &'a str)>
+where
+    EditOperation: Copy,
+    EditOperation: PartialEq,
+{
+    coalesce_edits(
+        alignment.edit_operations(non_insertion, insertion, deletion, insertion, false),
+        line,
+        deletion,
+    )
+}
 
-    #[derive(Debug)]
-    pub struct Edit {
-        pub start: usize,
-        pub end: usize,
-        string_length: usize,
+fn coalesce_edits<'a, 'b, EditOperation>(
+    operations: Vec<(EditOperation, (usize, &'b str))>,
+    line: &'a str,
+    irrelevant: EditOperation,
+) -> Vec<(EditOperation, &'a str)>
+where
+    EditOperation: Copy,
+    EditOperation: PartialEq,
+{
+    let mut edits = Vec::new(); // TODO capacity
+    let mut operations = operations.iter().filter(|(op, _)| *op != irrelevant);
+    let next = operations.next();
+    if next.is_none() {
+        return edits;
     }
-
-    impl Edit {
-        fn distance(&self) -> f64 {
-            (self.end - self.start) as f64 / self.string_length as f64
+    let (mut last_op, (mut last_offset, _)) = next.unwrap();
+    let mut curr_op = last_op;
+    let mut curr_offset;
+    for (op, (offset, _)) in operations {
+        curr_op = *op;
+        curr_offset = *offset;
+        if curr_op != last_op {
+            edits.push((last_op, &line[last_offset..*offset]));
+            last_offset = curr_offset;
+            last_op = curr_op;
         }
     }
-
-    impl<'a> LinePair<'a> {
-        pub fn new(s0: &'a str, s1: &'a str) -> Self {
-            let (g0, g1) = (s0.grapheme_indices(true), s1.grapheme_indices(true));
-            let (prefix_length, leading_whitespace) = LinePair::prefix_data(g0, g1);
-            let common_prefix_length = [
-                leading_whitespace[0] + prefix_length,
-                leading_whitespace[1] + prefix_length,
-            ];
-            // TODO: Don't compute grapheme segmentation twice?
-            let (g0, g1) = (s0.grapheme_indices(true), s1.grapheme_indices(true));
-            let (common_suffix_length, trailing_whitespace) = LinePair::suffix_data(g0, g1);
-            let lengths = [
-                s0.len() - trailing_whitespace[0],
-                s1.len() - trailing_whitespace[1],
-            ];
-
-            // We require that (right-trimmed length) >= (common prefix length). Consider:
-            // minus = "a    "
-            // plus  = "a b  "
-            // Here, the right-trimmed length of minus is 1, yet the common prefix length is 2. We
-            // resolve this by taking the following maxima:
-            let minus_length = max(lengths[0], common_prefix_length[0]);
-            let plus_length = max(lengths[1], common_prefix_length[1]);
-
-            // Work backwards from the end of the strings. The end of the change region is equal to
-            // the start of their common suffix. To find the start of the change region, start with
-            // the end of their common prefix, and then move leftwards until it is before the start
-            // of the common suffix in both strings.
-            let minus_change_end = minus_length - common_suffix_length;
-            let plus_change_end = plus_length - common_suffix_length;
-            let change_begin = [
-                min(
-                    common_prefix_length[0],
-                    min(minus_change_end, plus_change_end),
-                ),
-                min(
-                    common_prefix_length[1],
-                    min(minus_change_end, plus_change_end),
-                ),
-            ];
-
-            let minus_edit = Edit {
-                start: change_begin[0],
-                end: minus_change_end,
-                string_length: minus_length - leading_whitespace[0],
-            };
-            let plus_edit = Edit {
-                start: change_begin[1],
-                end: plus_change_end,
-                string_length: plus_length - leading_whitespace[1],
-            };
-            let distance = minus_edit.distance() + plus_edit.distance();
-            LinePair {
-                minus_line: s0,
-                plus_line: s1,
-                minus_edit,
-                plus_edit,
-                distance,
-            }
-        }
-
-        #[allow(dead_code)]
-        pub fn format(&self) -> String {
-            format!(
-                "LinePair\n \
-                 \t{} {} {}\n \
-                 \t{} {} {}\n \
-                 \t{}",
-                self.minus_line.trim_end(),
-                self.minus_edit.start,
-                self.minus_edit.end,
-                self.plus_line.trim_end(),
-                self.plus_edit.start,
-                self.plus_edit.end,
-                self.distance
-            )
-        }
-
-        /// Align the two strings at their left ends and consider only the bytes up to the length of
-        /// the shorter string. Return the byte offset of the first differing grapheme cluster, or
-        /// the byte length of shorter string if they do not differ.
-        fn prefix_data<'b, I>(s0: I, s1: I) -> (usize, [usize; 2])
-        where
-            I: Iterator<Item = (usize, &'b str)>,
-            I: Itertools,
-        {
-            let mut s0 = s0.peekable();
-            let mut s1 = s1.peekable();
-
-            let w0 = consume_whitespace(&mut s0);
-            let w1 = consume_whitespace(&mut s1);
-
-            (
-                s0.zip(s1)
-                    .peekable()
-                    .peeking_take_while(|((_, c0), (_, c1))| c0 == c1)
-                    .fold(0, |offset, ((_, c0), (_, _))| offset + c0.len()),
-                [w0, w1],
-            )
-        }
-
-        /// Trim trailing whitespace and align the two strings at their right ends. Fix the origin
-        /// at their right ends and, looking left, consider only the bytes up to the length of the
-        /// shorter string. Return the byte offset of the first differing grapheme cluster, or the
-        /// byte length of the shorter string if they do not differ. Also return the number of bytes
-        /// of whitespace trimmed from each string.
-        fn suffix_data<'b, I>(s0: I, s1: I) -> (usize, [usize; 2])
-        where
-            I: DoubleEndedIterator<Item = (usize, &'b str)>,
-            I: Itertools,
-        {
-            let mut s0 = s0.rev().peekable();
-            let mut s1 = s1.rev().peekable();
-
-            let w0 = consume_whitespace(&mut s0);
-            let w1 = consume_whitespace(&mut s1);
-
-            let (suffix_length, [_, _]) = LinePair::prefix_data(s0, s1);
-            (suffix_length, [w0, w1])
-        }
-    }
-
-    fn consume_whitespace<'b, I>(s: &mut I) -> usize
-    where
-        I: Iterator<Item = (usize, &'b str)>,
-        I: Itertools,
-        I: PeekingNext,
-    {
-        let is_whitespace = |(_, c): &(usize, &str)| *c == " " || *c == "\n";
-        s.peeking_take_while(is_whitespace).count()
-    }
-
-    #[cfg(test)]
-    mod tests {
-        use super::*;
-
-        fn common_prefix_length(s1: &str, s2: &str) -> usize {
-            let (prefix_length, [w0, w1]) =
-                super::LinePair::prefix_data(s1.grapheme_indices(true), s2.grapheme_indices(true));
-            assert_eq!(
-                w0, w1,
-                "Lines with different amounts of leading whitespace are not supported"
-            );
-            w0 + prefix_length
-        }
-
-        fn common_suffix_length(s1: &str, s2: &str) -> usize {
-            super::LinePair::suffix_data(s1.grapheme_indices(true), s2.grapheme_indices(true)).0
-        }
-
-        #[test]
-        fn test_common_prefix_length() {
-            assert_eq!(common_prefix_length("", ""), 0);
-            assert_eq!(common_prefix_length("", "a"), 0);
-            assert_eq!(common_prefix_length("a", ""), 0);
-            assert_eq!(common_prefix_length("a", "b"), 0);
-            assert_eq!(common_prefix_length("a", "a"), 1);
-            assert_eq!(common_prefix_length("a", "ab"), 1);
-            assert_eq!(common_prefix_length("ab", "a"), 1);
-            assert_eq!(common_prefix_length("ab", "aba"), 2);
-            assert_eq!(common_prefix_length("aba", "ab"), 2);
-        }
-
-        #[test]
-        fn test_common_prefix_length_with_leading_whitespace() {
-            // assert_eq!(common_prefix_length(" ", ""), 0);
-            assert_eq!(common_prefix_length(" ", " "), 1);
-            assert_eq!(common_prefix_length(" a", " a"), 2);
-            // assert_eq!(common_prefix_length(" a", "a"), 0);
-        }
-
-        #[test]
-        fn test_common_suffix_length() {
-            assert_eq!(common_suffix_length("", ""), 0);
-            assert_eq!(common_suffix_length("", "a"), 0);
-            assert_eq!(common_suffix_length("a", ""), 0);
-            assert_eq!(common_suffix_length("a", "b"), 0);
-            assert_eq!(common_suffix_length("a", "a"), 1);
-            assert_eq!(common_suffix_length("a", "ab"), 0);
-            assert_eq!(common_suffix_length("ab", "a"), 0);
-            assert_eq!(common_suffix_length("ab", "b"), 1);
-            assert_eq!(common_suffix_length("ab", "aab"), 2);
-            assert_eq!(common_suffix_length("aba", "ba"), 2);
-        }
-
-        #[test]
-        fn test_common_suffix_length_with_trailing_whitespace() {
-            assert_eq!(common_suffix_length("", "  "), 0);
-            assert_eq!(common_suffix_length("  ", "a"), 0);
-            assert_eq!(common_suffix_length("a  ", ""), 0);
-            assert_eq!(common_suffix_length("a", "b  "), 0);
-            assert_eq!(common_suffix_length("a", "a  "), 1);
-            assert_eq!(common_suffix_length("a  ", "ab  "), 0);
-            assert_eq!(common_suffix_length("ab", "a  "), 0);
-            assert_eq!(common_suffix_length("ab  ", "b "), 1);
-            assert_eq!(common_suffix_length("ab ", "aab  "), 2);
-            assert_eq!(common_suffix_length("aba ", "ba"), 2);
-        }
-
-        #[test]
-        fn test_common_suffix_length_with_trailing_whitespace_nonascii() {
-            assert_eq!(common_suffix_length("  ", "á"), 0);
-            assert_eq!(common_suffix_length("á  ", ""), 0);
-            assert_eq!(common_suffix_length("á", "b  "), 0);
-            assert_eq!(common_suffix_length("á", "á  "), "á".len());
-            assert_eq!(common_suffix_length("a  ", "áb  "), 0);
-            assert_eq!(common_suffix_length("ab", "á  "), 0);
-            assert_eq!(common_suffix_length("áb  ", "b "), 1);
-            assert_eq!(common_suffix_length("áb ", "aáb  "), 1 + "á".len());
-            assert_eq!(common_suffix_length("abá ", "bá"), 1 + "á".len());
-            assert_eq!(
-                common_suffix_length("áaáabá ", "ááabá   "),
-                2 + 2 * "á".len()
-            );
-        }
+    if curr_op == last_op {
+        edits.push((last_op, &line[last_offset..]));
     }
+    edits
 }
 
 #[cfg(test)]
@@ -359,13 +180,25 @@ mod tests {
     const DISTANCE_MAX: f64 = 2.0;
 
     #[test]
+    fn test_coalesce_edits_1() {
+        assert_eq!(
+            coalesce_edits(
+                vec![(MinusNoop, (0, "a")), (MinusNoop, (1, "b"))],
+                "ab",
+                Insertion
+            ),
+            vec![(MinusNoop, "ab")]
+        )
+    }
+
+    #[test]
     fn test_infer_edits_1() {
         assert_paired_edits(
             vec!["aaa\n"],
             vec!["aba\n"],
             (
-                vec![vec![(MinusNoop, "a"), (Deletion, "a"), (MinusNoop, "a\n")]],
-                vec![vec![(PlusNoop, "a"), (Insertion, "b"), (PlusNoop, "a\n")]],
+                vec![vec![(MinusNoop, "a"), (Deletion, "a"), (MinusNoop, "a")]],
+                vec![vec![(PlusNoop, "a"), (Insertion, "b"), (PlusNoop, "a")]],
             ),
         )
     }
@@ -376,8 +209,8 @@ mod tests {
             vec!["áaa\n"],
             vec!["ááb\n"],
             (
-                vec![vec![(MinusNoop, "á"), (Deletion, "aa"), (MinusNoop, "\n")]],
-                vec![vec![(PlusNoop, "á"), (Insertion, "áb"), (PlusNoop, "\n")]],
+                vec![vec![(MinusNoop, "á"), (Deletion, "aa")]],
+                vec![vec![(PlusNoop, "á"), (Insertion, "áb")]],
             ),
         )
     }
@@ -391,13 +224,9 @@ mod tests {
                 vec![vec![
                     (MinusNoop, "d."),
                     (Deletion, "iter"),
-                    (MinusNoop, "items()\n"),
-                ]],
-                vec![vec![
-                    (PlusNoop, "d."),
-                    (Insertion, ""),
-                    (PlusNoop, "items()\n"),
+                    (MinusNoop, "items()"),
                 ]],
+                vec![vec![(PlusNoop, "d.items()")]],
             ),
         )
     }
@@ -409,17 +238,17 @@ mod tests {
             vec!["áábáácááb\n"],
             (
                 vec![
-                    vec![(MinusNoop, "áaaáaaáaa\n")],
+                    vec![(MinusNoop, "áaaáaaáaa")],
                     vec![
                         (MinusNoop, "áábáá"),
                         (Deletion, "b"),
-                        (MinusNoop, "ááb\n"),
+                        (MinusNoop, "ááb"),
                     ],
                 ],
                 vec![vec![
                     (PlusNoop, "áábáá"),
                     (Insertion, "c"),
-                    (PlusNoop, "ááb\n"),
+                    (PlusNoop, "ááb"),
                 ]],
             ),
             0.66,
@@ -433,14 +262,14 @@ mod tests {
             vec!["bbbb!bbb\n", "dddddddd\n", "cccc!ccc\n"],
             (
                 vec![
-                    vec![(MinusNoop, "aaaaaaaa\n")],
-                    vec![(MinusNoop, "bbbb"), (Deletion, "b"), (MinusNoop, "bbb\n")],
-                    vec![(MinusNoop, "cccc"), (Deletion, "c"), (MinusNoop, "ccc\n")],
+                    vec![(MinusNoop, "aaaaaaaa")],
+                    vec![(MinusNoop, "bbbb"), (Deletion, "b"), (MinusNoop, "bbb")],
+                    vec![(MinusNoop, "cccc"), (Deletion, "c"), (MinusNoop, "ccc")],
                 ],
                 vec![
-                    vec![(PlusNoop, "bbbb"), (Insertion, "!"), (PlusNoop, "bbb\n")],
-                    vec![(PlusNoop, "dddddddd\n")],
-                    vec![(PlusNoop, "cccc"), (Insertion, "!"), (PlusNoop, "ccc\n")],
+                    vec![(PlusNoop, "bbbb"), (Insertion, "!"), (PlusNoop, "bbb")],
+                    vec![(PlusNoop, "dddddddd")],
+                    vec![(PlusNoop, "cccc"), (Insertion, "!"), (PlusNoop, "ccc")],
                 ],
             ),
             0.66,
diff --git a/src/main.rs b/src/main.rs
index cb13b118..10da054a 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,6 +1,7 @@
 #[macro_use]
 extern crate error_chain;
 
+mod align;
 mod bat;
 mod cli;
 mod config;
diff --git a/src/paint.rs b/src/paint.rs
index 02e2a285..4fcb593b 100644
--- a/src/paint.rs
+++ b/src/paint.rs
@@ -80,12 +80,14 @@ impl<'a> Painter<'a> {
         syntax_style_sections: Vec<Vec<(Style, &str)>>,
         diff_style_sections: Vec<Vec<(StyleModifier, &str)>>,
     ) {
+        use std::fmt::Write;
         for (syntax_sections, diff_sections) in
             syntax_style_sections.iter().zip(diff_style_sections.iter())
         {
             for (style, text) in superimpose_style_sections(syntax_sections, diff_sections) {
                 paint_text(&text, style, output_buffer).unwrap();
             }
+            write!(output_buffer, "\n").unwrap();
         }
     }