summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWilfred Hughes <me@wilfred.me.uk>2024-07-20 15:54:27 -0700
committerWilfred Hughes <me@wilfred.me.uk>2024-07-20 16:09:44 -0700
commitffe27c575ee222647cec98ba52f1c03a19055372 (patch)
tree1fba4a51e6ebb12bf5427304532e43c35b50fd71
parentefe1b10e8d4aaddeecd5797d27fe6fc10c05bdf7 (diff)
Ensure line splitting distinguishes "foo" and "foo\n"0.59.0
We rely on being able to split lines and rejoin them to obtain the original string. `str::lines()` in the Rust stdlib does not have this property. This was causing crashes in word-diffing on textual diffing, where code paths differed on the number of lines they thought a string had. This was broken in 8b842387a16e5ae4a79aaf6befb0bac1894882d0. Fixes #688.
-rw-r--r--CHANGELOG.md5
-rw-r--r--sample_files/big_text_hunk_1.txt7
-rw-r--r--sample_files/big_text_hunk_2.txt49
-rw-r--r--sample_files/compare.expected7
-rw-r--r--src/display/inline.rs18
-rw-r--r--src/display/side_by_side.rs39
-rw-r--r--src/display/style.rs3
-rw-r--r--src/lines.rs49
-rw-r--r--src/parse/guess_language.rs8
-rw-r--r--src/parse/syntax.rs6
10 files changed, 166 insertions, 25 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 86a6d69191..6b37d8c604 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,10 @@
## 0.59 (unreleased)
+### Diffing
+
+Fixed crash on some textual files where a single change contained more than
+1,000 words.
+
### Parsing
Added support for device tree and F#.
diff --git a/sample_files/big_text_hunk_1.txt b/sample_files/big_text_hunk_1.txt
new file mode 100644
index 0000000000..9f2293d29f
--- /dev/null
+++ b/sample_files/big_text_hunk_1.txt
@@ -0,0 +1,7 @@
+github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
+github.com/naoina/go-stringutil v0.1.0 h1:rCUeRUHjBjGTSHl0VC00jUPLz8/F9dDzYI70Hzifhks=
+github.com/naoina/go-stringutil v0.1.0/go.mod h1:XJ2SJL9jCtBh+P9q5btrd/Ylo8XwT/h1USek5+NqSA0=
+github.com/naoina/toml v0.1.1 h1:PT/lllxVVN0gzzSqSlHEmP8MJB4MY2U7STGxiouV4X8=
+github.com/naoina/toml v0.1.1/go.mod h1:NBIhNtsFMo3G2szEBne+bO4gS192HuIYRqfvOWb4i1E=
+github.com/streadway/amqp v0.0.0-20160311215503-2e25825abdbd h1:625/bJvSNfQrzzK5ttnUqMqnVe8M5MILmf5ZRGgeeDY=
+github.com/streadway/amqp v0.0.0-20160311215503-2e25825abdbd/go.mod h1:1WNBiOZtZQLpVAyu0iTduoJL9hEsMloAK5XWrtW0xdY=
diff --git a/sample_files/big_text_hunk_2.txt b/sample_files/big_text_hunk_2.txt
new file mode 100644
index 0000000000..dfb2e4a3ee
--- /dev/null
+++ b/sample_files/big_text_hunk_2.txt
@@ -0,0 +1,49 @@
+github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
+github.com/naoina/go-stringutil v0.1.0 h1:rCUeRUHjBjGTSHl0VC00jUPLz8/F9dDzYI70Hzifhks=
+github.com/naoina/go-stringutil v0.1.0/go.mod h1:XJ2SJL9jCtBh+P9q5btrd/Ylo8XwT/h1USek5+NqSA0=
+github.com/naoina/toml v0.1.1 h1:PT/lllxVVN0gzzSqSlHEmP8MJB4MY2U7STGxiouV4X8=
+github.com/naoina/toml v0.1.1/go.mod h1:NBIhNtsFMo3G2szEBne+bO4gS192HuIYRqfvOWb4i1E=
+github.com/streadway/amqp v0.0.0-20160311215503-2e25825abdbd h1:625/bJvSNfQrzzK5ttnUqMqnVe8M5MILmf5ZRGgeeDY=
+github.com/streadway/amqp v0.0.0-20160311215503-2e25825abdbd/go.mod h1:1WNBiOZtZQLpVAyu0iTduoJL9hEsMloAK5XWrtW0xdY=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
+github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
+github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
+go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
+go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
+golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
+golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
+golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
+golang.org/x/net v0.0.0-20220708220712-1185a9018129/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
+golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
+golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
+golang.org/x/oauth2 v0.7.0 h1:qe6s0zUXlPX80/dITx3440hWZ7GwMwgDDyrSGTPJG/g=
+golang.org/x/oauth2 v0.7.0/go.mod h1:hPLQkd9LyjfXTiRohC/41GhcFqxisoUQ99sCUOHO9x4=
+golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
+golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
+golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
diff --git a/sample_files/compare.expected b/sample_files/compare.expected
index 42e6e6b948..d7b192402c 100644
--- a/sample_files/compare.expected
+++ b/sample_files/compare.expected
@@ -19,6 +19,9 @@ sample_files/b2_math_1.h sample_files/b2_math_2.h
sample_files/bad_combine_1.rs sample_files/bad_combine_2.rs
f5051bf7d2b8afa3a677388cbd458891 -
+sample_files/big_text_hunk_1.txt sample_files/big_text_hunk_2.txt
+fd0c8912c094097f82c6b29ae66fb912 -
+
sample_files/change_outer_1.el sample_files/change_outer_2.el
2b9334a4cc72da63bba28eff958f0038 -
@@ -140,7 +143,7 @@ sample_files/makefile_1.mk sample_files/makefile_2.mk
d0572210b5121ce68ac0ce45e43b922b -
sample_files/many_newlines_1.txt sample_files/many_newlines_2.txt
-615de4b145b7b161e4fb285728280ed1 -
+52ca05855e520876479e6f608c5e7831 -
sample_files/metadata_1.clj sample_files/metadata_2.clj
4b58ce366467c8cca46db53508e81323 -
@@ -293,5 +296,5 @@ sample_files/yaml_1.yaml sample_files/yaml_2.yaml
f068239fc7bade0e6de96d81136c1ac5 -
sample_files/zig_1.zig sample_files/zig_2.zig
-4516796003b81f35bfa57d84bb7c0cbe -
+e36d1ea126b8b68e3344434bb63f205e -
diff --git a/src/display/inline.rs b/src/display/inline.rs
index dc34248d5e..3973dba538 100644
--- a/src/display/inline.rs
+++ b/src/display/inline.rs
@@ -2,10 +2,12 @@
use crate::{
constants::Side,
- display::context::{calculate_after_context, calculate_before_context, opposite_positions},
- display::hunks::Hunk,
- display::style::{self, apply_colors, apply_line_number_color},
- lines::{format_line_num, MaxLine},
+ display::{
+ context::{calculate_after_context, calculate_before_context, opposite_positions},
+ hunks::Hunk,
+ style::{self, apply_colors, apply_line_number_color},
+ },
+ lines::{format_line_num, split_on_newlines, MaxLine},
options::DisplayOptions,
parse::syntax::MatchedPos,
summary::FileFormat,
@@ -43,8 +45,12 @@ pub(crate) fn print(
)
} else {
(
- lhs_src.lines().map(|s| format!("{}\n", s)).collect(),
- rhs_src.lines().map(|s| format!("{}\n", s)).collect(),
+ split_on_newlines(lhs_src)
+ .map(|s| format!("{}\n", s))
+ .collect(),
+ split_on_newlines(rhs_src)
+ .map(|s| format!("{}\n", s))
+ .collect(),
)
};
diff --git a/src/display/side_by_side.rs b/src/display/side_by_side.rs
index 914bae43b4..afec92d466 100644
--- a/src/display/side_by_side.rs
+++ b/src/display/side_by_side.rs
@@ -11,14 +11,16 @@ use owo_colors::{OwoColorize, Style};
use crate::{
constants::Side,
- display::context::all_matched_lines_filled,
- display::hunks::{matched_lines_indexes_for_hunk, Hunk},
- display::style::{
- self, apply_colors, apply_line_number_color, color_positions, novel_style, replace_tabs,
- split_and_apply, BackgroundColor,
+ display::{
+ context::all_matched_lines_filled,
+ hunks::{matched_lines_indexes_for_hunk, Hunk},
+ style::{
+ self, apply_colors, apply_line_number_color, color_positions, novel_style,
+ replace_tabs, split_and_apply, BackgroundColor,
+ },
},
hash::DftHashMap,
- lines::format_line_num,
+ lines::{format_line_num, split_on_newlines},
options::{DisplayMode, DisplayOptions},
parse::syntax::{zip_pad_shorter, MatchedPos},
summary::FileFormat,
@@ -338,8 +340,12 @@ pub(crate) fn print(
)
} else {
(
- lhs_src.lines().map(|s| format!("{}\n", s)).collect(),
- rhs_src.lines().map(|s| format!("{}\n", s)).collect(),
+ split_on_newlines(lhs_src)
+ .map(|s| format!("{}\n", s))
+ .collect(),
+ split_on_newlines(rhs_src)
+ .map(|s| format!("{}\n", s))
+ .collect(),
)
};
@@ -401,8 +407,21 @@ pub(crate) fn print(
let mut prev_lhs_line_num = None;
let mut prev_rhs_line_num = None;
- let lhs_lines = lhs_src.lines().collect::<Vec<_>>();
- let rhs_lines = rhs_src.lines().collect::<Vec<_>>();
+ let mut lhs_lines = split_on_newlines(lhs_src).collect::<Vec<_>>();
+ let mut rhs_lines = split_on_newlines(rhs_src).collect::<Vec<_>>();
+
+ // If "foo" is one line, is "foo\n" two lines? Generally we want
+ // to care about newlines when deciding whether content differs.
+ //
+ // Ending a file with a trailing newline is extremely common
+ // though. If both files have a trailing newline, consider "foo\n"
+ // to be "foo" so we don't end up displaying a blank line on both
+ // sides.
+ if lhs_lines.last() == Some(&"") && rhs_lines.last() == Some(&"") {
+ lhs_lines.pop();
+ rhs_lines.pop();
+ }
+
let matched_lines = all_matched_lines_filled(lhs_mps, rhs_mps, &lhs_lines, &rhs_lines);
let mut matched_lines_to_print = &matched_lines[..];
diff --git a/src/display/style.rs b/src/display/style.rs
index ff63c4da2b..d65ef22459 100644
--- a/src/display/style.rs
+++ b/src/display/style.rs
@@ -7,6 +7,7 @@ use line_numbers::SingleLineSpan;
use owo_colors::{OwoColorize, Style};
use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};
+use crate::lines::split_on_newlines;
use crate::parse::syntax::StringKind;
use crate::{
constants::Side,
@@ -401,7 +402,7 @@ pub(crate) fn apply_colors(
positions: &[MatchedPos],
) -> Vec<String> {
let styles = color_positions(side, background, syntax_highlight, file_format, positions);
- let lines = s.lines().collect::<Vec<_>>();
+ let lines = split_on_newlines(s).collect::<Vec<_>>();
style_lines(&lines, &styles)
}
diff --git a/src/lines.rs b/src/lines.rs
index f6b1fb8a72..85c5bb119f 100644
--- a/src/lines.rs
+++ b/src/lines.rs
@@ -32,6 +32,21 @@ impl<S: AsRef<str>> MaxLine for S {
}
}
+/// Split `s` on \n or \r\n. Always returns a non-empty vec. Each line
+/// in the vec does not include the trailing newline.
+///
+/// This differs from `str::lines`, which considers `""` to be zero
+/// lines and `"foo\n"` to be one line.
+pub(crate) fn split_on_newlines(s: &str) -> impl Iterator<Item = &str> {
+ s.split('\n').map(|l| {
+ if let Some(l) = l.strip_suffix('\r') {
+ l
+ } else {
+ l
+ }
+ })
+}
+
pub(crate) fn is_all_whitespace(s: &str) -> bool {
s.chars().all(|c| c.is_whitespace())
}
@@ -67,6 +82,40 @@ mod tests {
}
#[test]
+ fn test_split_line_empty() {
+ assert_eq!(split_on_newlines("").collect::<Vec<_>>(), vec![""]);
+ }
+
+ #[test]
+ fn test_split_line_single() {
+ assert_eq!(split_on_newlines("foo").collect::<Vec<_>>(), vec!["foo"]);
+ }
+
+ #[test]
+ fn test_split_line_with_newline() {
+ assert_eq!(
+ split_on_newlines("foo\nbar").collect::<Vec<_>>(),
+ vec!["foo", "bar"]
+ );
+ }
+
+ #[test]
+ fn test_split_line_with_crlf() {
+ assert_eq!(
+ split_on_newlines("foo\r\nbar").collect::<Vec<_>>(),
+ vec!["foo", "bar"]
+ );
+ }
+
+ #[test]
+ fn test_split_line_with_trailing_newline() {
+ assert_eq!(
+ split_on_newlines("foo\nbar\n").collect::<Vec<_>>(),
+ vec!["foo", "bar", ""]
+ );
+ }
+
+ #[test]
fn test_is_all_whitespace() {
assert!(is_all_whitespace(" \n\t"));
}
diff --git a/src/parse/guess_language.rs b/src/parse/guess_language.rs
index eef2a0df41..5f0fb1d348 100644
--- a/src/parse/guess_language.rs
+++ b/src/parse/guess_language.rs
@@ -179,6 +179,8 @@ pub(crate) fn language_name(language: Language) -> &'static str {
use Language::*;
+use crate::lines::split_on_newlines;
+
/// File globs that identify languages based on the file path.
pub(crate) fn language_globs(language: Language) -> Vec<glob::Pattern> {
let glob_strs: &'static [&'static str] = match language {
@@ -420,7 +422,7 @@ fn looks_like_hacklang(path: &Path, src: &str) -> bool {
fn looks_like_objc(path: &Path, src: &str) -> bool {
if let Some(extension) = path.extension() {
if extension == "h" {
- return src.lines().take(100).any(|line| {
+ return split_on_newlines(src).take(100).any(|line| {
["#import", "@interface", "@protocol"]
.iter()
.any(|keyword| line.starts_with(keyword))
@@ -497,7 +499,7 @@ fn from_emacs_mode_header(src: &str) -> Option<Language> {
// Emacs allows the mode header to occur on the second line if the
// first line is a shebang.
- for line in src.lines().take(2) {
+ for line in split_on_newlines(src).take(2) {
let mode_name: String = match (MODE_RE.captures(line), SHORTHAND_RE.captures(line)) {
(Some(cap), _) | (_, Some(cap)) => cap[1].into(),
_ => "".into(),
@@ -559,7 +561,7 @@ fn from_shebang(src: &str) -> Option<Language> {
lazy_static! {
static ref RE: Regex = Regex::new(r"#! *(?:/usr/bin/env )?([^ ]+)").unwrap();
}
- if let Some(first_line) = src.lines().next() {
+ if let Some(first_line) = split_on_newlines(src).next() {
if let Some(cap) = RE.captures(first_line) {
let interpreter_path = Path::new(&cap[1]);
if let Some(name) = interpreter_path.file_name() {
diff --git a/src/parse/syntax.rs b/src/parse/syntax.rs
index bdde5934d5..65167fda32 100644
--- a/src/parse/syntax.rs
+++ b/src/parse/syntax.rs
@@ -9,6 +9,7 @@ use line_numbers::SingleLineSpan;
use typed_arena::Arena;
use self::Syntax::*;
+use crate::lines::split_on_newlines;
use crate::words::split_words_and_numbers;
use crate::{
diff::changes::ChangeKind,
@@ -408,9 +409,8 @@ fn set_content_id(nodes: &[&Syntax], existing: &mut DftHashMap<ContentKey, u32>)
..
} => {
let is_comment = *highlight == AtomKind::Comment;
- let clean_content = if is_comment && content.lines().count() > 1 {
- content
- .lines()
+ let clean_content = if is_comment && split_on_newlines(content).count() > 1 {
+ split_on_newlines(content)
.map(|l| l.trim_start())
.collect::<Vec<_>>()
.join("\n")