diff options
author | Dan Davison <dandavison7@gmail.com> | 2019-07-29 11:41:46 -0400 |
---|---|---|
committer | Dan Davison <dandavison7@gmail.com> | 2019-07-29 12:01:59 -0400 |
commit | a5c239b87f4d70eccffe72b037ddf987009008a8 (patch) | |
tree | e69cd2c062e3aa167624b041b1c08cf0a8604aaf /src/edits.rs | |
parent | bf8db066c9a7f65be987aa681f46e7b17f70feaf (diff) |
Count leading whitespace when computing common prefix
This change allows change_begin to differ between minus and plus
lines, in the case where they have different amounts of leading whitespace.
Diffstat (limited to 'src/edits.rs')
-rw-r--r-- | src/edits.rs | 79 |
1 files changed, 57 insertions, 22 deletions
diff --git a/src/edits.rs b/src/edits.rs index 52189ced..db7786ea 100644 --- a/src/edits.rs +++ b/src/edits.rs @@ -93,7 +93,7 @@ where mod line_pair { use std::cmp::{max, min}; - use itertools::Itertools; + use itertools::{Itertools, PeekingNext}; use unicode_segmentation::UnicodeSegmentation; /// A pair of right-trimmed strings. @@ -122,7 +122,11 @@ mod line_pair { impl<'a> LinePair<'a> { pub fn new(s0: &'a str, s1: &'a str) -> Self { let (g0, g1) = (s0.grapheme_indices(true), s1.grapheme_indices(true)); - let common_prefix_length = LinePair::common_prefix_length(g0, g1); + let (prefix_length, leading_whitespace) = LinePair::prefix_data(g0, g1); + let common_prefix_length = [ + leading_whitespace[0] + prefix_length, + leading_whitespace[1] + prefix_length, + ]; // TODO: Don't compute grapheme segmentation twice? let (g0, g1) = (s0.grapheme_indices(true), s1.grapheme_indices(true)); let (common_suffix_length, trailing_whitespace) = LinePair::suffix_data(g0, g1); @@ -136,8 +140,8 @@ mod line_pair { // plus = "a b " // Here, the right-trimmed length of minus is 1, yet the common prefix length is 2. We // resolve this by taking the following maxima: - let minus_length = max(lengths[0], common_prefix_length); - let plus_length = max(lengths[1], common_prefix_length); + let minus_length = max(lengths[0], common_prefix_length[0]); + let plus_length = max(lengths[1], common_prefix_length[1]); // Work backwards from the end of the strings. The end of the change region is equal to // the start of their common suffix. To find the start of the change region, start with @@ -145,15 +149,24 @@ mod line_pair { // of the common suffix in both strings. let minus_change_end = minus_length - common_suffix_length; let plus_change_end = plus_length - common_suffix_length; - let change_begin = min(common_prefix_length, min(minus_change_end, plus_change_end)); + let change_begin = [ + min( + common_prefix_length[0], + min(minus_change_end, plus_change_end), + ), + min( + common_prefix_length[1], + min(minus_change_end, plus_change_end), + ), + ]; let minus_edit = Edit { - start: change_begin, + start: change_begin[0], end: minus_change_end, string_length: minus_length, }; let plus_edit = Edit { - start: change_begin, + start: change_begin[1], end: plus_change_end, string_length: plus_length, }; @@ -187,15 +200,24 @@ mod line_pair { /// Align the two strings at their left ends and consider only the bytes up to the length of /// the shorter string. Return the byte offset of the first differing grapheme cluster, or /// the byte length of shorter string if they do not differ. - fn common_prefix_length<'b, I>(s0: I, s1: I) -> usize + fn prefix_data<'b, I>(s0: I, s1: I) -> (usize, [usize; 2]) where I: Iterator<Item = (usize, &'b str)>, I: Itertools, { - s0.zip(s1) - .peekable() - .peeking_take_while(|((_, c0), (_, c1))| c0 == c1) - .fold(0, |offset, ((_, c0), (_, _))| offset + c0.len()) + let mut s0 = s0.peekable(); + let mut s1 = s1.peekable(); + + let w0 = consume_whitespace(&mut s0); + let w1 = consume_whitespace(&mut s1); + + ( + s0.zip(s1) + .peekable() + .peeking_take_while(|((_, c0), (_, c1))| c0 == c1) + .fold(0, |offset, ((_, c0), (_, _))| offset + c0.len()), + [w0, w1], + ) } /// Trim trailing whitespace and align the two strings at their right ends. Fix the origin @@ -211,23 +233,36 @@ mod line_pair { let mut s0 = s0.rev().peekable(); let mut s1 = s1.rev().peekable(); - let is_whitespace = |(_, c): &(usize, &str)| *c == " " || *c == "\n"; - let n0 = (&mut s0).peeking_take_while(is_whitespace).count(); - let n1 = (&mut s1).peeking_take_while(is_whitespace).count(); + let w0 = consume_whitespace(&mut s0); + let w1 = consume_whitespace(&mut s1); - (LinePair::common_prefix_length(s0, s1), [n0, n1]) + let (suffix_length, [_, _]) = LinePair::prefix_data(s0, s1); + (suffix_length, [w0, w1]) } } + fn consume_whitespace<'b, I>(s: &mut I) -> usize + where + I: Iterator<Item = (usize, &'b str)>, + I: Itertools, + I: PeekingNext, + { + let is_whitespace = |(_, c): &(usize, &str)| *c == " " || *c == "\n"; + s.peeking_take_while(is_whitespace).count() + } + #[cfg(test)] mod tests { use super::*; fn common_prefix_length(s1: &str, s2: &str) -> usize { - super::LinePair::common_prefix_length( - s1.grapheme_indices(true), - s2.grapheme_indices(true), - ) + let (prefix_length, [w0, w1]) = + super::LinePair::prefix_data(s1.grapheme_indices(true), s2.grapheme_indices(true)); + assert_eq!( + w0, w1, + "Lines with different amounts of leading whitespace are not supported" + ); + w0 + prefix_length } fn common_suffix_length(s1: &str, s2: &str) -> usize { @@ -249,10 +284,10 @@ mod line_pair { #[test] fn test_common_prefix_length_with_leading_whitespace() { - assert_eq!(common_prefix_length(" ", ""), 0); + // assert_eq!(common_prefix_length(" ", ""), 0); assert_eq!(common_prefix_length(" ", " "), 1); assert_eq!(common_prefix_length(" a", " a"), 2); - assert_eq!(common_prefix_length(" a", "a"), 0); + // assert_eq!(common_prefix_length(" a", "a"), 0); } #[test] |