summaryrefslogtreecommitdiffstats
path: root/src/edits.rs
diff options
context:
space:
mode:
authorThomas Otto <th1000s@posteo.net>2021-11-01 22:31:33 +0100
committerDan Davison <dandavison7@gmail.com>2021-11-10 20:08:28 -0500
commit93d37649ae63312548bb319e5d6d7305834e502f (patch)
tree22f54397bed36d19f3667d80ffeca0e618901286 /src/edits.rs
parent54bab7051774c82d5af9d6024f03529008254657 (diff)
Remove +/- line prefix instead of substituting a space
Simplifies line handling and printing by removing a "magical" 1-offset previously required in various locations. Now explicitly prepend "" in `tokenize()`.
Diffstat (limited to 'src/edits.rs')
-rw-r--r--src/edits.rs31
1 files changed, 19 insertions, 12 deletions
diff --git a/src/edits.rs b/src/edits.rs
index 06d934ff..11979aa8 100644
--- a/src/edits.rs
+++ b/src/edits.rs
@@ -30,8 +30,7 @@ pub fn infer_edits<'a, EditOperation>(
Vec<(Option<usize>, Option<usize>)>, // line alignment
)
where
- EditOperation: Copy,
- EditOperation: PartialEq,
+ EditOperation: Copy + PartialEq + std::fmt::Debug,
{
let mut annotated_minus_lines = Vec::<Vec<(EditOperation, &str)>>::new();
let mut annotated_plus_lines = Vec::<Vec<(EditOperation, &str)>>::new();
@@ -95,7 +94,9 @@ where
/// Split line into tokens for alignment. The alignment algorithm aligns sequences of substrings;
/// not individual characters.
fn tokenize<'a>(line: &'a str, regex: &Regex) -> Vec<&'a str> {
- let mut tokens = Vec::new();
+ // Starting with "", see comment in Alignment::new(). Historical note: Replacing the '+/-'
+ // prefix with a space implicitly generated this.
+ let mut tokens = vec![""];
let mut offset = 0;
for m in regex.find_iter(line) {
if offset == 0 && m.start() > 0 {
@@ -136,8 +137,7 @@ fn annotate<'a, Annotation>(
plus_line: &'a str,
) -> (Vec<(Annotation, &'a str)>, Vec<(Annotation, &'a str)>, f64)
where
- Annotation: Copy,
- Annotation: PartialEq,
+ Annotation: Copy + PartialEq + std::fmt::Debug,
{
let mut annotated_minus_line = Vec::new();
let mut annotated_plus_line = Vec::new();
@@ -467,7 +467,10 @@ mod tests {
fn assert_tokenize(text: &str, expected_tokens: &[&str]) {
let actual_tokens = tokenize(text, &*DEFAULT_TOKENIZATION_REGEXP);
assert_eq!(text, expected_tokens.iter().join(""));
- assert_eq!(actual_tokens, expected_tokens);
+ // tokenize() guarantees that the first element of the token stream is "".
+ // See comment in Alignment::new()
+ assert_eq!(actual_tokens[0], "");
+ assert_eq!(&actual_tokens[1..], expected_tokens);
}
#[test]
@@ -476,8 +479,8 @@ mod tests {
vec!["aaa"],
vec!["aba"],
(
- vec![vec![(Deletion, "aaa")]],
- vec![vec![(Insertion, "aba")]],
+ vec![vec![(MinusNoop, ""), (Deletion, "aaa")]],
+ vec![vec![(PlusNoop, ""), (Insertion, "aba")]],
),
)
}
@@ -488,8 +491,12 @@ mod tests {
vec!["aaa ccc"],
vec!["aba ccc"],
(
- vec![vec![(Deletion, "aaa"), (MinusNoop, " ccc")]],
- vec![vec![(Insertion, "aba"), (PlusNoop, " ccc")]],
+ vec![vec![
+ (MinusNoop, ""),
+ (Deletion, "aaa"),
+ (MinusNoop, " ccc"),
+ ]],
+ vec![vec![(PlusNoop, ""), (Insertion, "aba"), (PlusNoop, " ccc")]],
),
)
}
@@ -500,8 +507,8 @@ mod tests {
vec!["áaa"],
vec!["ááb"],
(
- vec![vec![(Deletion, "áaa")]],
- vec![vec![(Insertion, "ááb")]],
+ vec![vec![(MinusNoop, ""), (Deletion, "áaa")]],
+ vec![vec![(PlusNoop, ""), (Insertion, "ááb")]],
),
)
}