From 93d37649ae63312548bb319e5d6d7305834e502f Mon Sep 17 00:00:00 2001 From: Thomas Otto Date: Mon, 1 Nov 2021 22:31:33 +0100 Subject: Remove +/- line prefix instead of substituting a space Simplifies line handling and printing by removing a "magical" 1-offset previously required in various locations. Now explicitly prepend "" in `tokenize()`. --- src/edits.rs | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'src/edits.rs') diff --git a/src/edits.rs b/src/edits.rs index 06d934ff..11979aa8 100644 --- a/src/edits.rs +++ b/src/edits.rs @@ -30,8 +30,7 @@ pub fn infer_edits<'a, EditOperation>( Vec<(Option, Option)>, // line alignment ) where - EditOperation: Copy, - EditOperation: PartialEq, + EditOperation: Copy + PartialEq + std::fmt::Debug, { let mut annotated_minus_lines = Vec::>::new(); let mut annotated_plus_lines = Vec::>::new(); @@ -95,7 +94,9 @@ where /// Split line into tokens for alignment. The alignment algorithm aligns sequences of substrings; /// not individual characters. fn tokenize<'a>(line: &'a str, regex: &Regex) -> Vec<&'a str> { - let mut tokens = Vec::new(); + // Starting with "", see comment in Alignment::new(). Historical note: Replacing the '+/-' + // prefix with a space implicitly generated this. + let mut tokens = vec![""]; let mut offset = 0; for m in regex.find_iter(line) { if offset == 0 && m.start() > 0 { @@ -136,8 +137,7 @@ fn annotate<'a, Annotation>( plus_line: &'a str, ) -> (Vec<(Annotation, &'a str)>, Vec<(Annotation, &'a str)>, f64) where - Annotation: Copy, - Annotation: PartialEq, + Annotation: Copy + PartialEq + std::fmt::Debug, { let mut annotated_minus_line = Vec::new(); let mut annotated_plus_line = Vec::new(); @@ -467,7 +467,10 @@ mod tests { fn assert_tokenize(text: &str, expected_tokens: &[&str]) { let actual_tokens = tokenize(text, &*DEFAULT_TOKENIZATION_REGEXP); assert_eq!(text, expected_tokens.iter().join("")); - assert_eq!(actual_tokens, expected_tokens); + // tokenize() guarantees that the first element of the token stream is "". + // See comment in Alignment::new() + assert_eq!(actual_tokens[0], ""); + assert_eq!(&actual_tokens[1..], expected_tokens); } #[test] @@ -476,8 +479,8 @@ mod tests { vec!["aaa"], vec!["aba"], ( - vec![vec![(Deletion, "aaa")]], - vec![vec![(Insertion, "aba")]], + vec![vec![(MinusNoop, ""), (Deletion, "aaa")]], + vec![vec![(PlusNoop, ""), (Insertion, "aba")]], ), ) } @@ -488,8 +491,12 @@ mod tests { vec!["aaa ccc"], vec!["aba ccc"], ( - vec![vec![(Deletion, "aaa"), (MinusNoop, " ccc")]], - vec![vec![(Insertion, "aba"), (PlusNoop, " ccc")]], + vec![vec![ + (MinusNoop, ""), + (Deletion, "aaa"), + (MinusNoop, " ccc"), + ]], + vec![vec![(PlusNoop, ""), (Insertion, "aba"), (PlusNoop, " ccc")]], ), ) } @@ -500,8 +507,8 @@ mod tests { vec!["áaa"], vec!["ááb"], ( - vec![vec![(Deletion, "áaa")]], - vec![vec![(Insertion, "ááb")]], + vec![vec![(MinusNoop, ""), (Deletion, "áaa")]], + vec![vec![(PlusNoop, ""), (Insertion, "ááb")]], ), ) } -- cgit v1.2.3