Remove +/- line prefix instead of substituting a space

Simplifies line handling and printing by removing a "magical" 1-offset previously required in various locations. Now explicitly prepend "" in `tokenize()`.
author: Thomas Otto <th1000s@posteo.net> 2021-11-01 22:31:33 +0100
committer: Dan Davison <dandavison7@gmail.com> 2021-11-10 20:08:28 -0500
commit: 93d37649ae63312548bb319e5d6d7305834e502f (patch)
tree: 22f54397bed36d19f3667d80ffeca0e618901286 /src/edits.rs
parent: 54bab7051774c82d5af9d6024f03529008254657 (diff)
1 files changed, 19 insertions, 12 deletions
diff --git a/src/edits.rs b/src/edits.rs
index 06d934ff..11979aa8 100644
--- a/src/edits.rs
+++ b/src/edits.rs
@@ -30,8 +30,7 @@ pub fn infer_edits<'a, EditOperation>(
     Vec<(Option<usize>, Option<usize>)>, // line alignment
 )
 where
-    EditOperation: Copy,
-    EditOperation: PartialEq,
+    EditOperation: Copy + PartialEq + std::fmt::Debug,
 {
     let mut annotated_minus_lines = Vec::<Vec<(EditOperation, &str)>>::new();
     let mut annotated_plus_lines = Vec::<Vec<(EditOperation, &str)>>::new();
@@ -95,7 +94,9 @@ where
 /// Split line into tokens for alignment. The alignment algorithm aligns sequences of substrings;
 /// not individual characters.
 fn tokenize<'a>(line: &'a str, regex: &Regex) -> Vec<&'a str> {
-    let mut tokens = Vec::new();
+    // Starting with "", see comment in Alignment::new(). Historical note: Replacing the '+/-'
+    // prefix with a space implicitly generated this.
+    let mut tokens = vec![""];
     let mut offset = 0;
     for m in regex.find_iter(line) {
         if offset == 0 && m.start() > 0 {
@@ -136,8 +137,7 @@ fn annotate<'a, Annotation>(
     plus_line: &'a str,
 ) -> (Vec<(Annotation, &'a str)>, Vec<(Annotation, &'a str)>, f64)
 where
-    Annotation: Copy,
-    Annotation: PartialEq,
+    Annotation: Copy + PartialEq + std::fmt::Debug,
 {
     let mut annotated_minus_line = Vec::new();
     let mut annotated_plus_line = Vec::new();
@@ -467,7 +467,10 @@ mod tests {
     fn assert_tokenize(text: &str, expected_tokens: &[&str]) {
         let actual_tokens = tokenize(text, &*DEFAULT_TOKENIZATION_REGEXP);
         assert_eq!(text, expected_tokens.iter().join(""));
-        assert_eq!(actual_tokens, expected_tokens);
+        // tokenize() guarantees that the first element of the token stream is "".
+        // See comment in Alignment::new()
+        assert_eq!(actual_tokens[0], "");
+        assert_eq!(&actual_tokens[1..], expected_tokens);
     }
 
     #[test]
@@ -476,8 +479,8 @@ mod tests {
             vec!["aaa"],
             vec!["aba"],
             (
-                vec![vec![(Deletion, "aaa")]],
-                vec![vec![(Insertion, "aba")]],
+                vec![vec![(MinusNoop, ""), (Deletion, "aaa")]],
+                vec![vec![(PlusNoop, ""), (Insertion, "aba")]],
             ),
         )
     }
@@ -488,8 +491,12 @@ mod tests {
             vec!["aaa ccc"],
             vec!["aba ccc"],
             (
-                vec![vec![(Deletion, "aaa"), (MinusNoop, " ccc")]],
-                vec![vec![(Insertion, "aba"), (PlusNoop, " ccc")]],
+                vec![vec![
+                    (MinusNoop, ""),
+                    (Deletion, "aaa"),
+                    (MinusNoop, " ccc"),
+                ]],
+                vec![vec![(PlusNoop, ""), (Insertion, "aba"), (PlusNoop, " ccc")]],
             ),
         )
     }
@@ -500,8 +507,8 @@ mod tests {
             vec!["áaa"],
             vec!["ááb"],
             (
-                vec![vec![(Deletion, "áaa")]],
-                vec![vec![(Insertion, "ááb")]],
+                vec![vec![(MinusNoop, ""), (Deletion, "áaa")]],
+                vec![vec![(PlusNoop, ""), (Insertion, "ááb")]],
             ),
         )
     }
author	Thomas Otto <th1000s@posteo.net>	2021-11-01 22:31:33 +0100
committer	Dan Davison <dandavison7@gmail.com>	2021-11-10 20:08:28 -0500
commit	93d37649ae63312548bb319e5d6d7305834e502f (patch)
tree	22f54397bed36d19f3667d80ffeca0e618901286 /src/edits.rs
parent	54bab7051774c82d5af9d6024f03529008254657 (diff)