summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSheldon Nico <11974052+SheldonNico@users.noreply.github.com>2022-12-03 22:59:06 +0800
committerGitHub <noreply@github.com>2022-12-03 06:59:06 -0800
commit54e1ee79c7cefe573138c4d843fd8001b774422c (patch)
treef09f8fd7dc146383491fea046a9dff87a0436e4f
parent667d44c88552f4cd85de9a2917b41f308ceec6ce (diff)
try fix bad alignment in unicode (#1144) (#1145)
* try fix bad alignment in unicode (#1144) * use width instead of count in wrap_line * fix fmt * 3 tests do not need fail * fix tests Co-authored-by: Thomas Otto <th1000s@posteo.net>
-rw-r--r--src/ansi/console_tests.rs3
-rw-r--r--src/ansi/mod.rs4
-rw-r--r--src/features/line_numbers.rs17
-rw-r--r--src/features/side_by_side.rs64
-rw-r--r--src/wrapping.rs61
5 files changed, 121 insertions, 28 deletions
diff --git a/src/ansi/console_tests.rs b/src/ansi/console_tests.rs
index d6372316..09f5647c 100644
--- a/src/ansi/console_tests.rs
+++ b/src/ansi/console_tests.rs
@@ -54,10 +54,11 @@ mod tests {
&truncate_str(&s, 10, "..."),
&format!("foo {}...", style("bar").red().force_styling(true))
);
+ // `バ`(width = 2) will be truncate to 1, we use space to fill
let s = format!("foo {}", style("バー").red().force_styling(true));
assert_eq!(
&truncate_str(&s, 5, ""),
- &format!("foo {}", style("").red().force_styling(true))
+ &format!("foo {}", style(" ").red().force_styling(true))
);
let s = format!("foo {}", style("バー").red().force_styling(true));
assert_eq!(
diff --git a/src/ansi/mod.rs b/src/ansi/mod.rs
index ae202d31..2b7b2f60 100644
--- a/src/ansi/mod.rs
+++ b/src/ansi/mod.rs
@@ -54,6 +54,10 @@ pub fn truncate_str<'a, 'b>(s: &'a str, display_width: usize, tail: &'b str) ->
for g in t.graphemes(true) {
let w = g.width();
if used + w > display_width {
+ // use space to fill the gap left by truncate. Need another
+ // option to custom this or just use `WrapOption.wrap_right_prefix_symbol`
+ // in the future.
+ result.push_str(&" ".repeat(display_width.saturating_sub(used)));
break;
}
result.push_str(g);
diff --git a/src/features/line_numbers.rs b/src/features/line_numbers.rs
index d9ab7948..bbdeb592 100644
--- a/src/features/line_numbers.rs
+++ b/src/features/line_numbers.rs
@@ -797,6 +797,8 @@ pub mod tests {
"@",
"--wrap-right-symbol",
"@",
+ "--wrap-right-prefix-symbol",
+ ">",
])
.with_input(DIFF_PLUS_MINUS_WITH_1_CONTEXT_DIFF)
.expect_after_header(
@@ -816,6 +818,8 @@ pub mod tests {
"@",
"--wrap-right-symbol",
"@",
+ "--wrap-right-prefix-symbol",
+ ">",
];
DeltaTest::with_args(cfg)
@@ -824,7 +828,7 @@ pub mod tests {
r#"
│ 1 │abc │ 1 │abc
│ 2 │a = one side │ 2 │a = one longer@
- │ │ │ │ side
+ │ │ │ │ > side
│ 3 │xyz │ 3 │xyz"#,
);
@@ -834,7 +838,7 @@ pub mod tests {
r#"
│ 1 │abc │ 1 │abc
│ 2 │a = one longer@│ 2 │a = one side
- │ │ side │ │
+ │ │ > side│ │
│ 3 │xyz │ 3 │xyz"#,
);
@@ -975,4 +979,13 @@ index 223ca50..367a6f6 100644
-a = other one
+a = right side which is longer
xyz";
+
+ pub const TWO_MINUS_LINES_UNICODE_DIFF: &str = "\
+diff --git a/a.py b/a.py
+index 8b0d958..e69de29 100644
+--- a/a.txt
++++ b/b.txt
+@@ -1,1 +0,0 @@
+-一二三
+";
}
diff --git a/src/features/side_by_side.rs b/src/features/side_by_side.rs
index f175fa2a..ac2de08e 100644
--- a/src/features/side_by_side.rs
+++ b/src/features/side_by_side.rs
@@ -1,6 +1,6 @@
use itertools::Itertools;
use syntect::highlighting::Style as SyntectStyle;
-use unicode_segmentation::UnicodeSegmentation;
+use unicode_width::UnicodeWidthStr;
use crate::ansi;
use crate::cli;
@@ -75,12 +75,10 @@ pub fn available_line_width(
}
pub fn line_is_too_long(line: &str, line_width: usize) -> bool {
- let line_sum = line.graphemes(true).count();
-
debug_assert!(line.ends_with('\n'));
- // `line_sum` is too large because a trailing newline is present,
- // so allow one more character.
- line_sum > line_width + 1
+
+ // graphemes will take care of newlines
+ line.width() > line_width
}
/// Return whether any of the input lines is too long, and a data
@@ -715,4 +713,58 @@ pub mod tests {
│ 2 │b = 2 │ 2 │bb = 2 "#,
);
}
+
+ #[test]
+ fn test_two_minus_lines_unicode_truncated() {
+ DeltaTest::with_args(&[
+ "--side-by-side",
+ "--wrap-max-lines",
+ "2",
+ "--width",
+ "16",
+ "--line-fill-method=spaces",
+ ])
+ .set_config(|cfg| cfg.truncation_symbol = ">".into())
+ .with_input(TWO_MINUS_LINES_UNICODE_DIFF)
+ .expect_after_header(
+ r#"
+ │ 1 │↵ │ │
+ │ │↵ │ │
+ │ │ >│ │"#,
+ );
+
+ DeltaTest::with_args(&[
+ "--side-by-side",
+ "--wrap-max-lines",
+ "2",
+ "--width",
+ "17",
+ "--line-fill-method=spaces",
+ ])
+ .set_config(|cfg| cfg.truncation_symbol = ">".into())
+ .with_input(TWO_MINUS_LINES_UNICODE_DIFF)
+ .expect_after_header(
+ r#"
+ │ 1 │↵ │ │
+ │ │↵ │ │
+ │ │ >│ │"#,
+ );
+
+ DeltaTest::with_args(&[
+ "--side-by-side",
+ "--wrap-max-lines",
+ "2",
+ "--width",
+ "18",
+ "--line-fill-method=spaces",
+ ])
+ .set_config(|cfg| cfg.truncation_symbol = ">".into())
+ .with_input(TWO_MINUS_LINES_UNICODE_DIFF)
+ .expect_after_header(
+ r#"
+ │ 1 │一↵│ │
+ │ │二↵│ │
+ │ │三 │ │"#,
+ );
+ }
}
diff --git a/src/wrapping.rs b/src/wrapping.rs
index 771d7f78..d9b0ff89 100644
--- a/src/wrapping.rs
+++ b/src/wrapping.rs
@@ -1,5 +1,6 @@
use syntect::highlighting::Style as SyntectStyle;
use unicode_segmentation::UnicodeSegmentation;
+use unicode_width::UnicodeWidthStr;
use crate::cli;
use crate::config::INLINE_SYMBOL_WIDTH_1;
@@ -202,11 +203,21 @@ where
let (style, text, graphemes) = stack
.pop()
- .map(|(style, text)| (style, text, text.grapheme_indices(true).collect::<Vec<_>>()))
+ .map(|(style, text)| {
+ (
+ style,
+ text,
+ text.graphemes(true)
+ .map(|item| (item.len(), item.width()))
+ .collect::<Vec<_>>(),
+ )
+ })
.unwrap();
- let new_len = curr_line.len + graphemes.len();
+ let graphemes_width: usize = graphemes.iter().map(|(_, w)| w).sum();
+ let new_len = curr_line.len + graphemes_width;
+ #[allow(clippy::comparison_chain)]
let must_split = if new_len < line_width {
curr_line.push_and_set_len((style, text), new_len);
false
@@ -228,15 +239,6 @@ where
}
_ => true,
}
- } else if new_len == line_width + 1 && stack.is_empty() {
- // If the one overhanging char is '\n' then keep it on the current line.
- if text.ends_with('\n') {
- // Do not count the included '\n': - 1
- curr_line.push_and_set_len((style, text), new_len - 1);
- false
- } else {
- true
- }
} else {
true
};
@@ -244,16 +246,29 @@ where
// Text must be split, one part (or just `wrap_symbol`) is added to the
// current line, the other is pushed onto the stack.
if must_split {
- let grapheme_split_pos = graphemes.len() - (new_len - line_width) - 1;
+ let mut width_left = graphemes_width
+ .saturating_sub(new_len - line_width)
+ .saturating_sub(wrap_config.left_symbol.width());
// The length does not matter anymore and `curr_line` will be reset
// at the end, so move the line segments out.
let mut line_segments = curr_line.line_segments;
- let next_line = if grapheme_split_pos == 0 {
+ let next_line = if width_left == 0 {
text
} else {
- let byte_split_pos = graphemes[grapheme_split_pos].0;
+ let mut byte_split_pos = 0;
+ // After loop byte_split_pos may still equal to 0. If width_left
+ // is less than the width of first character, We can't display it.
+ for &(item_len, item_width) in graphemes.iter() {
+ if width_left >= item_width {
+ byte_split_pos += item_len;
+ width_left -= item_width;
+ } else {
+ break;
+ }
+ }
+
let this_line = &text[..byte_split_pos];
line_segments.push((style, this_line));
&text[byte_split_pos..]
@@ -273,7 +288,9 @@ where
if result.len() == 1 && curr_line.has_text() {
let current_permille = (curr_line.text_len() * 1000) / line_width;
- let pad_len = line_width.saturating_sub(curr_line.text_len());
+ // pad line will add a wrap_config.right_prefix_symbol
+ let pad_len = line_width
+ .saturating_sub(curr_line.text_len() + wrap_config.right_prefix_symbol.width());
if wrap_config.use_wrap_right_permille > current_permille && pad_len > 0 {
// The inserted spaces, which align a line to the right, point into this string.
@@ -785,7 +802,7 @@ mod tests {
let lines = wrap_test(&cfg, line, 11);
assert_eq!(lines.len(), 2);
assert_eq!(lines[0].last().unwrap().1, WR);
- assert_eq!(lines[1], [(*SD, " "), (*SD, ">"), (*S1, "ab")]);
+ assert_eq!(lines[1], [(*SD, " "), (*SD, ">"), (*S1, "ab")]);
}
#[test]
@@ -801,7 +818,7 @@ mod tests {
lines,
vec![
vec![(*S1, "012"), (*S2, "34"), (*SD, WR)],
- vec![(*SD, " "), (*SD, RA), (*S2, "56")]
+ vec![(*SD, " "), (*SD, RA), (*S2, "56")]
]
);
}
@@ -940,13 +957,19 @@ mod tests {
);
// Not working: Tailored grapheme clusters: क्षि = क् + षि
- let line = vec![(*S1, "abc"), (*S2, "deநி"), (*S1, "ghij")];
+ //
+ // Difference compare to previous example (even they may look like the
+ // same width in text editor.) :
+ //
+ // width நி: 2
+ // width ö̲: 1
+ let line = vec![(*S1, "abc"), (*S2, "dநி"), (*S1, "ghij")];
let lines = wrap_test(&cfg, line, 4);
assert_eq!(
lines,
vec![
vec![(*S1, "abc"), (*SD, W)],
- vec![(*S2, "deநி"), (*SD, W)],
+ vec![(*S2, "dநி"), (*SD, W)],
vec![(*S1, "ghij")]
]
);