diff options
author | Wilfred Hughes <me@wilfred.me.uk> | 2024-09-29 21:46:01 -0700 |
---|---|---|
committer | Wilfred Hughes <me@wilfred.me.uk> | 2024-09-29 21:46:01 -0700 |
commit | f1c69d3b92593414bb9a8b14183b6f06d6a32966 (patch) | |
tree | 5dfad0a0e69b491436ebe8ab0407d7af73bcfa88 | |
parent | 393845ddcbabbd35c427ada66f1cec25f15a44c4 (diff) |
WIP don't use StringIgnoringNewline due to #755fix_newline_splitting
-rw-r--r-- | src/line_parser.rs | 106 |
1 files changed, 24 insertions, 82 deletions
diff --git a/src/line_parser.rs b/src/line_parser.rs index dfea571f2b..6c08deacf8 100644 --- a/src/line_parser.rs +++ b/src/line_parser.rs @@ -1,10 +1,8 @@ //! A fallback "parser" for plain text. -use lazy_static::lazy_static; use line_numbers::{LinePositions, SingleLineSpan}; -use regex::Regex; -use std::hash::Hash; +use crate::lines::split_on_newlines; use crate::words::split_words; use crate::{ diff::myers_diff, @@ -13,25 +11,6 @@ use crate::{ const MAX_WORDS_IN_LINE: usize = 1000; -fn split_lines_keep_newline(s: &str) -> Vec<&str> { - lazy_static! { - static ref NEWLINE_RE: Regex = Regex::new("\n").unwrap(); - } - - let mut offset = 0; - let mut lines = vec![]; - for newline_match in NEWLINE_RE.find_iter(s) { - lines.push(s[offset..newline_match.end()].into()); - offset = newline_match.end(); - } - - if offset < s.len() { - lines.push(s[offset..].into()); - } - - lines -} - #[derive(Debug)] enum TextChangeKind { Novel, @@ -74,66 +53,24 @@ fn merge_novel<'a>( res } -#[derive(Debug, Clone)] -struct StringIgnoringNewline<'a>(&'a str); - -impl PartialEq for StringIgnoringNewline<'_> { - fn eq(&self, other: &Self) -> bool { - let mut s = self.0; - if s.ends_with('\n') { - s = &s[..s.len() - 1]; - } - - let mut other_s = other.0; - if other_s.ends_with('\n') { - other_s = &other_s[..other_s.len() - 1]; - } - - s == other_s - } -} - -impl Eq for StringIgnoringNewline<'_> {} - -impl Hash for StringIgnoringNewline<'_> { - fn hash<H: std::hash::Hasher>(&self, state: &mut H) { - let mut s = self.0; - if s.ends_with('\n') { - s = &s[..s.len() - 1]; - } - - s.hash(state); - } -} - fn changed_parts<'a>( src: &'a str, opposite_src: &'a str, ) -> Vec<(TextChangeKind, Vec<&'a str>, Vec<&'a str>)> { - let src_lines = split_lines_keep_newline(src) - .into_iter() - .map(StringIgnoringNewline) - .collect::<Vec<_>>(); - let opposite_src_lines = split_lines_keep_newline(opposite_src) - .into_iter() - .map(StringIgnoringNewline) - .collect::<Vec<_>>(); + let src_lines = split_on_newlines(src).collect::<Vec<_>>(); + let opposite_src_lines = split_on_newlines(opposite_src).collect::<Vec<_>>(); let mut res: Vec<(TextChangeKind, Vec<&'a str>, Vec<&'a str>)> = vec![]; for diff_res in myers_diff::slice_unique_by_hash(&src_lines, &opposite_src_lines) { match diff_res { myers_diff::DiffResult::Left(line) => { - res.push((TextChangeKind::Novel, vec![line.0], vec![])); + res.push((TextChangeKind::Novel, vec![line], vec![])); } myers_diff::DiffResult::Both(line, opposite_line) => { - res.push(( - TextChangeKind::Unchanged, - vec![line.0], - vec![opposite_line.0], - )); + res.push((TextChangeKind::Unchanged, vec![line], vec![opposite_line])); } myers_diff::DiffResult::Right(opposite_line) => { - res.push((TextChangeKind::Novel, vec![], vec![opposite_line.0])); + res.push((TextChangeKind::Novel, vec![], vec![opposite_line])); } } } @@ -168,6 +105,7 @@ pub(crate) fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos> TextChangeKind::Unchanged => { seen_unchanged = true; for (lhs_line, rhs_line) in lhs_lines.iter().zip(rhs_lines) { + // offset crashing from from_region here let lhs_pos = lhs_lp.from_region(lhs_offset, lhs_offset + line_len_in_bytes(lhs_line)); let rhs_pos = @@ -182,13 +120,13 @@ pub(crate) fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos> pos: lhs_pos[0], }); - lhs_offset += lhs_line.len(); - rhs_offset += rhs_line.len(); + lhs_offset += lhs_line.len() + "\n".len(); + rhs_offset += rhs_line.len() + "\n".len(); } } TextChangeKind::Novel => { - let lhs_part = lhs_lines.join(""); - let rhs_part = rhs_lines.join(""); + let lhs_part = lhs_lines.join("\n"); + let rhs_part = rhs_lines.join("\n"); let lhs_words = split_words(&lhs_part); let rhs_words = split_words(&rhs_part); @@ -209,8 +147,12 @@ pub(crate) fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos> } } - lhs_offset += lhs_part.len(); - rhs_offset += rhs_part.len(); + // if !lhs_lines.is_empty() { + // lhs_offset += "\n".len(); + // } + // if !rhs_lines.is_empty() { + // rhs_offset += "\n".len(); + // } continue; } @@ -254,6 +196,13 @@ pub(crate) fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos> } } } + + if !lhs_lines.is_empty() { + lhs_offset += "\n".len(); + } + if !rhs_lines.is_empty() { + rhs_offset += "\n".len(); + } } } } @@ -296,13 +245,6 @@ mod tests { use super::*; #[test] - fn test_split_newlines() { - let s = "foo\nbar\nbaz"; - let res = split_lines_keep_newline(s); - assert_eq!(res, vec!["foo\n", "bar\n", "baz"]) - } - - #[test] fn test_positions_no_changes() { let positions = change_positions("foo", "foo"); |