summaryrefslogtreecommitdiffstats
path: root/src/edits.rs
diff options
context:
space:
mode:
authorDan Davison <dandavison7@gmail.com>2019-07-29 11:41:46 -0400
committerDan Davison <dandavison7@gmail.com>2019-07-29 12:01:59 -0400
commita5c239b87f4d70eccffe72b037ddf987009008a8 (patch)
treee69cd2c062e3aa167624b041b1c08cf0a8604aaf /src/edits.rs
parentbf8db066c9a7f65be987aa681f46e7b17f70feaf (diff)
Count leading whitespace when computing common prefix
This change allows change_begin to differ between minus and plus lines, in the case where they have different amounts of leading whitespace.
Diffstat (limited to 'src/edits.rs')
-rw-r--r--src/edits.rs79
1 files changed, 57 insertions, 22 deletions
diff --git a/src/edits.rs b/src/edits.rs
index 52189ced..db7786ea 100644
--- a/src/edits.rs
+++ b/src/edits.rs
@@ -93,7 +93,7 @@ where
mod line_pair {
use std::cmp::{max, min};
- use itertools::Itertools;
+ use itertools::{Itertools, PeekingNext};
use unicode_segmentation::UnicodeSegmentation;
/// A pair of right-trimmed strings.
@@ -122,7 +122,11 @@ mod line_pair {
impl<'a> LinePair<'a> {
pub fn new(s0: &'a str, s1: &'a str) -> Self {
let (g0, g1) = (s0.grapheme_indices(true), s1.grapheme_indices(true));
- let common_prefix_length = LinePair::common_prefix_length(g0, g1);
+ let (prefix_length, leading_whitespace) = LinePair::prefix_data(g0, g1);
+ let common_prefix_length = [
+ leading_whitespace[0] + prefix_length,
+ leading_whitespace[1] + prefix_length,
+ ];
// TODO: Don't compute grapheme segmentation twice?
let (g0, g1) = (s0.grapheme_indices(true), s1.grapheme_indices(true));
let (common_suffix_length, trailing_whitespace) = LinePair::suffix_data(g0, g1);
@@ -136,8 +140,8 @@ mod line_pair {
// plus = "a b "
// Here, the right-trimmed length of minus is 1, yet the common prefix length is 2. We
// resolve this by taking the following maxima:
- let minus_length = max(lengths[0], common_prefix_length);
- let plus_length = max(lengths[1], common_prefix_length);
+ let minus_length = max(lengths[0], common_prefix_length[0]);
+ let plus_length = max(lengths[1], common_prefix_length[1]);
// Work backwards from the end of the strings. The end of the change region is equal to
// the start of their common suffix. To find the start of the change region, start with
@@ -145,15 +149,24 @@ mod line_pair {
// of the common suffix in both strings.
let minus_change_end = minus_length - common_suffix_length;
let plus_change_end = plus_length - common_suffix_length;
- let change_begin = min(common_prefix_length, min(minus_change_end, plus_change_end));
+ let change_begin = [
+ min(
+ common_prefix_length[0],
+ min(minus_change_end, plus_change_end),
+ ),
+ min(
+ common_prefix_length[1],
+ min(minus_change_end, plus_change_end),
+ ),
+ ];
let minus_edit = Edit {
- start: change_begin,
+ start: change_begin[0],
end: minus_change_end,
string_length: minus_length,
};
let plus_edit = Edit {
- start: change_begin,
+ start: change_begin[1],
end: plus_change_end,
string_length: plus_length,
};
@@ -187,15 +200,24 @@ mod line_pair {
/// Align the two strings at their left ends and consider only the bytes up to the length of
/// the shorter string. Return the byte offset of the first differing grapheme cluster, or
/// the byte length of shorter string if they do not differ.
- fn common_prefix_length<'b, I>(s0: I, s1: I) -> usize
+ fn prefix_data<'b, I>(s0: I, s1: I) -> (usize, [usize; 2])
where
I: Iterator<Item = (usize, &'b str)>,
I: Itertools,
{
- s0.zip(s1)
- .peekable()
- .peeking_take_while(|((_, c0), (_, c1))| c0 == c1)
- .fold(0, |offset, ((_, c0), (_, _))| offset + c0.len())
+ let mut s0 = s0.peekable();
+ let mut s1 = s1.peekable();
+
+ let w0 = consume_whitespace(&mut s0);
+ let w1 = consume_whitespace(&mut s1);
+
+ (
+ s0.zip(s1)
+ .peekable()
+ .peeking_take_while(|((_, c0), (_, c1))| c0 == c1)
+ .fold(0, |offset, ((_, c0), (_, _))| offset + c0.len()),
+ [w0, w1],
+ )
}
/// Trim trailing whitespace and align the two strings at their right ends. Fix the origin
@@ -211,23 +233,36 @@ mod line_pair {
let mut s0 = s0.rev().peekable();
let mut s1 = s1.rev().peekable();
- let is_whitespace = |(_, c): &(usize, &str)| *c == " " || *c == "\n";
- let n0 = (&mut s0).peeking_take_while(is_whitespace).count();
- let n1 = (&mut s1).peeking_take_while(is_whitespace).count();
+ let w0 = consume_whitespace(&mut s0);
+ let w1 = consume_whitespace(&mut s1);
- (LinePair::common_prefix_length(s0, s1), [n0, n1])
+ let (suffix_length, [_, _]) = LinePair::prefix_data(s0, s1);
+ (suffix_length, [w0, w1])
}
}
+ fn consume_whitespace<'b, I>(s: &mut I) -> usize
+ where
+ I: Iterator<Item = (usize, &'b str)>,
+ I: Itertools,
+ I: PeekingNext,
+ {
+ let is_whitespace = |(_, c): &(usize, &str)| *c == " " || *c == "\n";
+ s.peeking_take_while(is_whitespace).count()
+ }
+
#[cfg(test)]
mod tests {
use super::*;
fn common_prefix_length(s1: &str, s2: &str) -> usize {
- super::LinePair::common_prefix_length(
- s1.grapheme_indices(true),
- s2.grapheme_indices(true),
- )
+ let (prefix_length, [w0, w1]) =
+ super::LinePair::prefix_data(s1.grapheme_indices(true), s2.grapheme_indices(true));
+ assert_eq!(
+ w0, w1,
+ "Lines with different amounts of leading whitespace are not supported"
+ );
+ w0 + prefix_length
}
fn common_suffix_length(s1: &str, s2: &str) -> usize {
@@ -249,10 +284,10 @@ mod line_pair {
#[test]
fn test_common_prefix_length_with_leading_whitespace() {
- assert_eq!(common_prefix_length(" ", ""), 0);
+ // assert_eq!(common_prefix_length(" ", ""), 0);
assert_eq!(common_prefix_length(" ", " "), 1);
assert_eq!(common_prefix_length(" a", " a"), 2);
- assert_eq!(common_prefix_length(" a", "a"), 0);
+ // assert_eq!(common_prefix_length(" a", "a"), 0);
}
#[test]