From 54e1ee79c7cefe573138c4d843fd8001b774422c Mon Sep 17 00:00:00 2001 From: Sheldon Nico <11974052+SheldonNico@users.noreply.github.com> Date: Sat, 3 Dec 2022 22:59:06 +0800 Subject: try fix bad alignment in unicode (#1144) (#1145) * try fix bad alignment in unicode (#1144) * use width instead of count in wrap_line * fix fmt * 3 tests do not need fail * fix tests Co-authored-by: Thomas Otto --- src/ansi/console_tests.rs | 3 ++- src/ansi/mod.rs | 4 +++ src/features/line_numbers.rs | 17 ++++++++++-- src/features/side_by_side.rs | 64 +++++++++++++++++++++++++++++++++++++++----- src/wrapping.rs | 61 ++++++++++++++++++++++++++++------------- 5 files changed, 121 insertions(+), 28 deletions(-) (limited to 'src') diff --git a/src/ansi/console_tests.rs b/src/ansi/console_tests.rs index d6372316..09f5647c 100644 --- a/src/ansi/console_tests.rs +++ b/src/ansi/console_tests.rs @@ -54,10 +54,11 @@ mod tests { &truncate_str(&s, 10, "..."), &format!("foo {}...", style("bar").red().force_styling(true)) ); + // `バ`(width = 2) will be truncate to 1, we use space to fill let s = format!("foo {}", style("バー").red().force_styling(true)); assert_eq!( &truncate_str(&s, 5, ""), - &format!("foo {}", style("").red().force_styling(true)) + &format!("foo {}", style(" ").red().force_styling(true)) ); let s = format!("foo {}", style("バー").red().force_styling(true)); assert_eq!( diff --git a/src/ansi/mod.rs b/src/ansi/mod.rs index ae202d31..2b7b2f60 100644 --- a/src/ansi/mod.rs +++ b/src/ansi/mod.rs @@ -54,6 +54,10 @@ pub fn truncate_str<'a, 'b>(s: &'a str, display_width: usize, tail: &'b str) -> for g in t.graphemes(true) { let w = g.width(); if used + w > display_width { + // use space to fill the gap left by truncate. Need another + // option to custom this or just use `WrapOption.wrap_right_prefix_symbol` + // in the future. + result.push_str(&" ".repeat(display_width.saturating_sub(used))); break; } result.push_str(g); diff --git a/src/features/line_numbers.rs b/src/features/line_numbers.rs index d9ab7948..bbdeb592 100644 --- a/src/features/line_numbers.rs +++ b/src/features/line_numbers.rs @@ -797,6 +797,8 @@ pub mod tests { "@", "--wrap-right-symbol", "@", + "--wrap-right-prefix-symbol", + ">", ]) .with_input(DIFF_PLUS_MINUS_WITH_1_CONTEXT_DIFF) .expect_after_header( @@ -816,6 +818,8 @@ pub mod tests { "@", "--wrap-right-symbol", "@", + "--wrap-right-prefix-symbol", + ">", ]; DeltaTest::with_args(cfg) @@ -824,7 +828,7 @@ pub mod tests { r#" │ 1 │abc │ 1 │abc │ 2 │a = one side │ 2 │a = one longer@ - │ │ │ │ side + │ │ │ │ > side │ 3 │xyz │ 3 │xyz"#, ); @@ -834,7 +838,7 @@ pub mod tests { r#" │ 1 │abc │ 1 │abc │ 2 │a = one longer@│ 2 │a = one side - │ │ side │ │ + │ │ > side│ │ │ 3 │xyz │ 3 │xyz"#, ); @@ -975,4 +979,13 @@ index 223ca50..367a6f6 100644 -a = other one +a = right side which is longer xyz"; + + pub const TWO_MINUS_LINES_UNICODE_DIFF: &str = "\ +diff --git a/a.py b/a.py +index 8b0d958..e69de29 100644 +--- a/a.txt ++++ b/b.txt +@@ -1,1 +0,0 @@ +-一二三 +"; } diff --git a/src/features/side_by_side.rs b/src/features/side_by_side.rs index f175fa2a..ac2de08e 100644 --- a/src/features/side_by_side.rs +++ b/src/features/side_by_side.rs @@ -1,6 +1,6 @@ use itertools::Itertools; use syntect::highlighting::Style as SyntectStyle; -use unicode_segmentation::UnicodeSegmentation; +use unicode_width::UnicodeWidthStr; use crate::ansi; use crate::cli; @@ -75,12 +75,10 @@ pub fn available_line_width( } pub fn line_is_too_long(line: &str, line_width: usize) -> bool { - let line_sum = line.graphemes(true).count(); - debug_assert!(line.ends_with('\n')); - // `line_sum` is too large because a trailing newline is present, - // so allow one more character. - line_sum > line_width + 1 + + // graphemes will take care of newlines + line.width() > line_width } /// Return whether any of the input lines is too long, and a data @@ -715,4 +713,58 @@ pub mod tests { │ 2 │b = 2 │ 2 │bb = 2 "#, ); } + + #[test] + fn test_two_minus_lines_unicode_truncated() { + DeltaTest::with_args(&[ + "--side-by-side", + "--wrap-max-lines", + "2", + "--width", + "16", + "--line-fill-method=spaces", + ]) + .set_config(|cfg| cfg.truncation_symbol = ">".into()) + .with_input(TWO_MINUS_LINES_UNICODE_DIFF) + .expect_after_header( + r#" + │ 1 │↵ │ │ + │ │↵ │ │ + │ │ >│ │"#, + ); + + DeltaTest::with_args(&[ + "--side-by-side", + "--wrap-max-lines", + "2", + "--width", + "17", + "--line-fill-method=spaces", + ]) + .set_config(|cfg| cfg.truncation_symbol = ">".into()) + .with_input(TWO_MINUS_LINES_UNICODE_DIFF) + .expect_after_header( + r#" + │ 1 │↵ │ │ + │ │↵ │ │ + │ │ >│ │"#, + ); + + DeltaTest::with_args(&[ + "--side-by-side", + "--wrap-max-lines", + "2", + "--width", + "18", + "--line-fill-method=spaces", + ]) + .set_config(|cfg| cfg.truncation_symbol = ">".into()) + .with_input(TWO_MINUS_LINES_UNICODE_DIFF) + .expect_after_header( + r#" + │ 1 │一↵│ │ + │ │二↵│ │ + │ │三 │ │"#, + ); + } } diff --git a/src/wrapping.rs b/src/wrapping.rs index 771d7f78..d9b0ff89 100644 --- a/src/wrapping.rs +++ b/src/wrapping.rs @@ -1,5 +1,6 @@ use syntect::highlighting::Style as SyntectStyle; use unicode_segmentation::UnicodeSegmentation; +use unicode_width::UnicodeWidthStr; use crate::cli; use crate::config::INLINE_SYMBOL_WIDTH_1; @@ -202,11 +203,21 @@ where let (style, text, graphemes) = stack .pop() - .map(|(style, text)| (style, text, text.grapheme_indices(true).collect::>())) + .map(|(style, text)| { + ( + style, + text, + text.graphemes(true) + .map(|item| (item.len(), item.width())) + .collect::>(), + ) + }) .unwrap(); - let new_len = curr_line.len + graphemes.len(); + let graphemes_width: usize = graphemes.iter().map(|(_, w)| w).sum(); + let new_len = curr_line.len + graphemes_width; + #[allow(clippy::comparison_chain)] let must_split = if new_len < line_width { curr_line.push_and_set_len((style, text), new_len); false @@ -228,15 +239,6 @@ where } _ => true, } - } else if new_len == line_width + 1 && stack.is_empty() { - // If the one overhanging char is '\n' then keep it on the current line. - if text.ends_with('\n') { - // Do not count the included '\n': - 1 - curr_line.push_and_set_len((style, text), new_len - 1); - false - } else { - true - } } else { true }; @@ -244,16 +246,29 @@ where // Text must be split, one part (or just `wrap_symbol`) is added to the // current line, the other is pushed onto the stack. if must_split { - let grapheme_split_pos = graphemes.len() - (new_len - line_width) - 1; + let mut width_left = graphemes_width + .saturating_sub(new_len - line_width) + .saturating_sub(wrap_config.left_symbol.width()); // The length does not matter anymore and `curr_line` will be reset // at the end, so move the line segments out. let mut line_segments = curr_line.line_segments; - let next_line = if grapheme_split_pos == 0 { + let next_line = if width_left == 0 { text } else { - let byte_split_pos = graphemes[grapheme_split_pos].0; + let mut byte_split_pos = 0; + // After loop byte_split_pos may still equal to 0. If width_left + // is less than the width of first character, We can't display it. + for &(item_len, item_width) in graphemes.iter() { + if width_left >= item_width { + byte_split_pos += item_len; + width_left -= item_width; + } else { + break; + } + } + let this_line = &text[..byte_split_pos]; line_segments.push((style, this_line)); &text[byte_split_pos..] @@ -273,7 +288,9 @@ where if result.len() == 1 && curr_line.has_text() { let current_permille = (curr_line.text_len() * 1000) / line_width; - let pad_len = line_width.saturating_sub(curr_line.text_len()); + // pad line will add a wrap_config.right_prefix_symbol + let pad_len = line_width + .saturating_sub(curr_line.text_len() + wrap_config.right_prefix_symbol.width()); if wrap_config.use_wrap_right_permille > current_permille && pad_len > 0 { // The inserted spaces, which align a line to the right, point into this string. @@ -785,7 +802,7 @@ mod tests { let lines = wrap_test(&cfg, line, 11); assert_eq!(lines.len(), 2); assert_eq!(lines[0].last().unwrap().1, WR); - assert_eq!(lines[1], [(*SD, " "), (*SD, ">"), (*S1, "ab")]); + assert_eq!(lines[1], [(*SD, " "), (*SD, ">"), (*S1, "ab")]); } #[test] @@ -801,7 +818,7 @@ mod tests { lines, vec![ vec![(*S1, "012"), (*S2, "34"), (*SD, WR)], - vec![(*SD, " "), (*SD, RA), (*S2, "56")] + vec![(*SD, " "), (*SD, RA), (*S2, "56")] ] ); } @@ -940,13 +957,19 @@ mod tests { ); // Not working: Tailored grapheme clusters: क्षि = क् + षि - let line = vec![(*S1, "abc"), (*S2, "deநி"), (*S1, "ghij")]; + // + // Difference compare to previous example (even they may look like the + // same width in text editor.) : + // + // width நி: 2 + // width ö̲: 1 + let line = vec![(*S1, "abc"), (*S2, "dநி"), (*S1, "ghij")]; let lines = wrap_test(&cfg, line, 4); assert_eq!( lines, vec![ vec![(*S1, "abc"), (*SD, W)], - vec![(*S2, "deநி"), (*SD, W)], + vec![(*S2, "dநி"), (*SD, W)], vec![(*S1, "ghij")] ] ); -- cgit v1.2.3