Count leading whitespace when computing common prefix

This change allows change_begin to differ between minus and plus lines, in the case where they have different amounts of leading whitespace.
author: Dan Davison <dandavison7@gmail.com> 2019-07-29 11:41:46 -0400
committer: Dan Davison <dandavison7@gmail.com> 2019-07-29 12:01:59 -0400
commit: a5c239b87f4d70eccffe72b037ddf987009008a8 (patch)
tree: e69cd2c062e3aa167624b041b1c08cf0a8604aaf /src/edits.rs
parent: bf8db066c9a7f65be987aa681f46e7b17f70feaf (diff)
1 files changed, 57 insertions, 22 deletions
diff --git a/src/edits.rs b/src/edits.rs
index 52189ced..db7786ea 100644
--- a/src/edits.rs
+++ b/src/edits.rs
@@ -93,7 +93,7 @@ where
 mod line_pair {
     use std::cmp::{max, min};
 
-    use itertools::Itertools;
+    use itertools::{Itertools, PeekingNext};
     use unicode_segmentation::UnicodeSegmentation;
 
     /// A pair of right-trimmed strings.
@@ -122,7 +122,11 @@ mod line_pair {
     impl<'a> LinePair<'a> {
         pub fn new(s0: &'a str, s1: &'a str) -> Self {
             let (g0, g1) = (s0.grapheme_indices(true), s1.grapheme_indices(true));
-            let common_prefix_length = LinePair::common_prefix_length(g0, g1);
+            let (prefix_length, leading_whitespace) = LinePair::prefix_data(g0, g1);
+            let common_prefix_length = [
+                leading_whitespace[0] + prefix_length,
+                leading_whitespace[1] + prefix_length,
+            ];
             // TODO: Don't compute grapheme segmentation twice?
             let (g0, g1) = (s0.grapheme_indices(true), s1.grapheme_indices(true));
             let (common_suffix_length, trailing_whitespace) = LinePair::suffix_data(g0, g1);
@@ -136,8 +140,8 @@ mod line_pair {
             // plus  = "a b  "
             // Here, the right-trimmed length of minus is 1, yet the common prefix length is 2. We
             // resolve this by taking the following maxima:
-            let minus_length = max(lengths[0], common_prefix_length);
-            let plus_length = max(lengths[1], common_prefix_length);
+            let minus_length = max(lengths[0], common_prefix_length[0]);
+            let plus_length = max(lengths[1], common_prefix_length[1]);
 
             // Work backwards from the end of the strings. The end of the change region is equal to
             // the start of their common suffix. To find the start of the change region, start with
@@ -145,15 +149,24 @@ mod line_pair {
             // of the common suffix in both strings.
             let minus_change_end = minus_length - common_suffix_length;
             let plus_change_end = plus_length - common_suffix_length;
-            let change_begin = min(common_prefix_length, min(minus_change_end, plus_change_end));
+            let change_begin = [
+                min(
+                    common_prefix_length[0],
+                    min(minus_change_end, plus_change_end),
+                ),
+                min(
+                    common_prefix_length[1],
+                    min(minus_change_end, plus_change_end),
+                ),
+            ];
 
             let minus_edit = Edit {
-                start: change_begin,
+                start: change_begin[0],
                 end: minus_change_end,
                 string_length: minus_length,
             };
             let plus_edit = Edit {
-                start: change_begin,
+                start: change_begin[1],
                 end: plus_change_end,
                 string_length: plus_length,
             };
@@ -187,15 +200,24 @@ mod line_pair {
         /// Align the two strings at their left ends and consider only the bytes up to the length of
         /// the shorter string. Return the byte offset of the first differing grapheme cluster, or
         /// the byte length of shorter string if they do not differ.
-        fn common_prefix_length<'b, I>(s0: I, s1: I) -> usize
+        fn prefix_data<'b, I>(s0: I, s1: I) -> (usize, [usize; 2])
         where
             I: Iterator<Item = (usize, &'b str)>,
             I: Itertools,
         {
-            s0.zip(s1)
-                .peekable()
-                .peeking_take_while(|((_, c0), (_, c1))| c0 == c1)
-                .fold(0, |offset, ((_, c0), (_, _))| offset + c0.len())
+            let mut s0 = s0.peekable();
+            let mut s1 = s1.peekable();
+
+            let w0 = consume_whitespace(&mut s0);
+            let w1 = consume_whitespace(&mut s1);
+
+            (
+                s0.zip(s1)
+                    .peekable()
+                    .peeking_take_while(|((_, c0), (_, c1))| c0 == c1)
+                    .fold(0, |offset, ((_, c0), (_, _))| offset + c0.len()),
+                [w0, w1],
+            )
         }
 
         /// Trim trailing whitespace and align the two strings at their right ends. Fix the origin
@@ -211,23 +233,36 @@ mod line_pair {
             let mut s0 = s0.rev().peekable();
             let mut s1 = s1.rev().peekable();
 
-            let is_whitespace = |(_, c): &(usize, &str)| *c == " " || *c == "\n";
-            let n0 = (&mut s0).peeking_take_while(is_whitespace).count();
-            let n1 = (&mut s1).peeking_take_while(is_whitespace).count();
+            let w0 = consume_whitespace(&mut s0);
+            let w1 = consume_whitespace(&mut s1);
 
-            (LinePair::common_prefix_length(s0, s1), [n0, n1])
+            let (suffix_length, [_, _]) = LinePair::prefix_data(s0, s1);
+            (suffix_length, [w0, w1])
         }
     }
 
+    fn consume_whitespace<'b, I>(s: &mut I) -> usize
+    where
+        I: Iterator<Item = (usize, &'b str)>,
+        I: Itertools,
+        I: PeekingNext,
+    {
+        let is_whitespace = |(_, c): &(usize, &str)| *c == " " || *c == "\n";
+        s.peeking_take_while(is_whitespace).count()
+    }
+
     #[cfg(test)]
     mod tests {
         use super::*;
 
         fn common_prefix_length(s1: &str, s2: &str) -> usize {
-            super::LinePair::common_prefix_length(
-                s1.grapheme_indices(true),
-                s2.grapheme_indices(true),
-            )
+            let (prefix_length, [w0, w1]) =
+                super::LinePair::prefix_data(s1.grapheme_indices(true), s2.grapheme_indices(true));
+            assert_eq!(
+                w0, w1,
+                "Lines with different amounts of leading whitespace are not supported"
+            );
+            w0 + prefix_length
         }
 
         fn common_suffix_length(s1: &str, s2: &str) -> usize {
@@ -249,10 +284,10 @@ mod line_pair {
 
         #[test]
         fn test_common_prefix_length_with_leading_whitespace() {
-            assert_eq!(common_prefix_length(" ", ""), 0);
+            // assert_eq!(common_prefix_length(" ", ""), 0);
             assert_eq!(common_prefix_length(" ", " "), 1);
             assert_eq!(common_prefix_length(" a", " a"), 2);
-            assert_eq!(common_prefix_length(" a", "a"), 0);
+            // assert_eq!(common_prefix_length(" a", "a"), 0);
         }
 
         #[test]
author	Dan Davison <dandavison7@gmail.com>	2019-07-29 11:41:46 -0400
committer	Dan Davison <dandavison7@gmail.com>	2019-07-29 12:01:59 -0400
commit	a5c239b87f4d70eccffe72b037ddf987009008a8 (patch)
tree	e69cd2c062e3aa167624b041b1c08cf0a8604aaf /src/edits.rs
parent	bf8db066c9a7f65be987aa681f46e7b17f70feaf (diff)