WIP don't use StringIgnoringNewline due to #755fix_newline_splitting

author: Wilfred Hughes <me@wilfred.me.uk> 2024-09-29 21:46:01 -0700
committer: Wilfred Hughes <me@wilfred.me.uk> 2024-09-29 21:46:01 -0700
commit: f1c69d3b92593414bb9a8b14183b6f06d6a32966 (patch)
tree: 5dfad0a0e69b491436ebe8ab0407d7af73bcfa88
parent: 393845ddcbabbd35c427ada66f1cec25f15a44c4 (diff)
1 files changed, 24 insertions, 82 deletions
diff --git a/src/line_parser.rs b/src/line_parser.rs
index dfea571f2b..6c08deacf8 100644
--- a/src/line_parser.rs
+++ b/src/line_parser.rs
@@ -1,10 +1,8 @@
 //! A fallback "parser" for plain text.
 
-use lazy_static::lazy_static;
 use line_numbers::{LinePositions, SingleLineSpan};
-use regex::Regex;
-use std::hash::Hash;
 
+use crate::lines::split_on_newlines;
 use crate::words::split_words;
 use crate::{
     diff::myers_diff,
@@ -13,25 +11,6 @@ use crate::{
 
 const MAX_WORDS_IN_LINE: usize = 1000;
 
-fn split_lines_keep_newline(s: &str) -> Vec<&str> {
-    lazy_static! {
-        static ref NEWLINE_RE: Regex = Regex::new("\n").unwrap();
-    }
-
-    let mut offset = 0;
-    let mut lines = vec![];
-    for newline_match in NEWLINE_RE.find_iter(s) {
-        lines.push(s[offset..newline_match.end()].into());
-        offset = newline_match.end();
-    }
-
-    if offset < s.len() {
-        lines.push(s[offset..].into());
-    }
-
-    lines
-}
-
 #[derive(Debug)]
 enum TextChangeKind {
     Novel,
@@ -74,66 +53,24 @@ fn merge_novel<'a>(
     res
 }
 
-#[derive(Debug, Clone)]
-struct StringIgnoringNewline<'a>(&'a str);
-
-impl PartialEq for StringIgnoringNewline<'_> {
-    fn eq(&self, other: &Self) -> bool {
-        let mut s = self.0;
-        if s.ends_with('\n') {
-            s = &s[..s.len() - 1];
-        }
-
-        let mut other_s = other.0;
-        if other_s.ends_with('\n') {
-            other_s = &other_s[..other_s.len() - 1];
-        }
-
-        s == other_s
-    }
-}
-
-impl Eq for StringIgnoringNewline<'_> {}
-
-impl Hash for StringIgnoringNewline<'_> {
-    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
-        let mut s = self.0;
-        if s.ends_with('\n') {
-            s = &s[..s.len() - 1];
-        }
-
-        s.hash(state);
-    }
-}
-
 fn changed_parts<'a>(
     src: &'a str,
     opposite_src: &'a str,
 ) -> Vec<(TextChangeKind, Vec<&'a str>, Vec<&'a str>)> {
-    let src_lines = split_lines_keep_newline(src)
-        .into_iter()
-        .map(StringIgnoringNewline)
-        .collect::<Vec<_>>();
-    let opposite_src_lines = split_lines_keep_newline(opposite_src)
-        .into_iter()
-        .map(StringIgnoringNewline)
-        .collect::<Vec<_>>();
+    let src_lines = split_on_newlines(src).collect::<Vec<_>>();
+    let opposite_src_lines = split_on_newlines(opposite_src).collect::<Vec<_>>();
 
     let mut res: Vec<(TextChangeKind, Vec<&'a str>, Vec<&'a str>)> = vec![];
     for diff_res in myers_diff::slice_unique_by_hash(&src_lines, &opposite_src_lines) {
         match diff_res {
             myers_diff::DiffResult::Left(line) => {
-                res.push((TextChangeKind::Novel, vec![line.0], vec![]));
+                res.push((TextChangeKind::Novel, vec![line], vec![]));
             }
             myers_diff::DiffResult::Both(line, opposite_line) => {
-                res.push((
-                    TextChangeKind::Unchanged,
-                    vec![line.0],
-                    vec![opposite_line.0],
-                ));
+                res.push((TextChangeKind::Unchanged, vec![line], vec![opposite_line]));
             }
             myers_diff::DiffResult::Right(opposite_line) => {
-                res.push((TextChangeKind::Novel, vec![], vec![opposite_line.0]));
+                res.push((TextChangeKind::Novel, vec![], vec![opposite_line]));
             }
         }
     }
@@ -168,6 +105,7 @@ pub(crate) fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos>
             TextChangeKind::Unchanged => {
                 seen_unchanged = true;
                 for (lhs_line, rhs_line) in lhs_lines.iter().zip(rhs_lines) {
+                    // offset crashing from from_region here
                     let lhs_pos =
                         lhs_lp.from_region(lhs_offset, lhs_offset + line_len_in_bytes(lhs_line));
                     let rhs_pos =
@@ -182,13 +120,13 @@ pub(crate) fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos>
                         pos: lhs_pos[0],
                     });
 
-                    lhs_offset += lhs_line.len();
-                    rhs_offset += rhs_line.len();
+                    lhs_offset += lhs_line.len() + "\n".len();
+                    rhs_offset += rhs_line.len() + "\n".len();
                 }
             }
             TextChangeKind::Novel => {
-                let lhs_part = lhs_lines.join("");
-                let rhs_part = rhs_lines.join("");
+                let lhs_part = lhs_lines.join("\n");
+                let rhs_part = rhs_lines.join("\n");
 
                 let lhs_words = split_words(&lhs_part);
                 let rhs_words = split_words(&rhs_part);
@@ -209,8 +147,12 @@ pub(crate) fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos>
                         }
                     }
 
-                    lhs_offset += lhs_part.len();
-                    rhs_offset += rhs_part.len();
+                    // if !lhs_lines.is_empty() {
+                    //     lhs_offset += "\n".len();
+                    // }
+                    // if !rhs_lines.is_empty() {
+                    //     rhs_offset += "\n".len();
+                    // }
                     continue;
                 }
 
@@ -254,6 +196,13 @@ pub(crate) fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos>
                         }
                     }
                 }
+
+                if !lhs_lines.is_empty() {
+                    lhs_offset += "\n".len();
+                }
+                if !rhs_lines.is_empty() {
+                    rhs_offset += "\n".len();
+                }
             }
         }
     }
@@ -296,13 +245,6 @@ mod tests {
     use super::*;
 
     #[test]
-    fn test_split_newlines() {
-        let s = "foo\nbar\nbaz";
-        let res = split_lines_keep_newline(s);
-        assert_eq!(res, vec!["foo\n", "bar\n", "baz"])
-    }
-
-    #[test]
     fn test_positions_no_changes() {
         let positions = change_positions("foo", "foo");
author	Wilfred Hughes <me@wilfred.me.uk>	2024-09-29 21:46:01 -0700
committer	Wilfred Hughes <me@wilfred.me.uk>	2024-09-29 21:46:01 -0700
commit	f1c69d3b92593414bb9a8b14183b6f06d6a32966 (patch)
tree	5dfad0a0e69b491436ebe8ab0407d7af73bcfa88
parent	393845ddcbabbd35c427ada66f1cec25f15a44c4 (diff)