From 54e1ee79c7cefe573138c4d843fd8001b774422c Mon Sep 17 00:00:00 2001
From: Sheldon Nico <11974052+SheldonNico@users.noreply.github.com>
Date: Sat, 3 Dec 2022 22:59:06 +0800
Subject: try fix bad alignment in unicode (#1144) (#1145)

* try fix bad alignment in unicode (#1144)

* use width instead of count in wrap_line

* fix fmt

* 3 tests do not need fail

* fix tests

Co-authored-by: Thomas Otto <th1000s@posteo.net>
---
 src/ansi/console_tests.rs    |  3 ++-
 src/ansi/mod.rs              |  4 +++
 src/features/line_numbers.rs | 17 ++++++++++--
 src/features/side_by_side.rs | 64 +++++++++++++++++++++++++++++++++++++++-----
 src/wrapping.rs              | 61 ++++++++++++++++++++++++++++-------------
 5 files changed, 121 insertions(+), 28 deletions(-)

(limited to 'src')

diff --git a/src/ansi/console_tests.rs b/src/ansi/console_tests.rs
index d6372316..09f5647c 100644
--- a/src/ansi/console_tests.rs
+++ b/src/ansi/console_tests.rs
@@ -54,10 +54,11 @@ mod tests {
             &truncate_str(&s, 10, "..."),
             &format!("foo {}...", style("bar").red().force_styling(true))
         );
+        // `バ`(width = 2) will be truncate to 1, we use space to fill
         let s = format!("foo {}", style("バー").red().force_styling(true));
         assert_eq!(
             &truncate_str(&s, 5, ""),
-            &format!("foo {}", style("").red().force_styling(true))
+            &format!("foo {}", style(" ").red().force_styling(true))
         );
         let s = format!("foo {}", style("バー").red().force_styling(true));
         assert_eq!(
diff --git a/src/ansi/mod.rs b/src/ansi/mod.rs
index ae202d31..2b7b2f60 100644
--- a/src/ansi/mod.rs
+++ b/src/ansi/mod.rs
@@ -54,6 +54,10 @@ pub fn truncate_str<'a, 'b>(s: &'a str, display_width: usize, tail: &'b str) ->
             for g in t.graphemes(true) {
                 let w = g.width();
                 if used + w > display_width {
+                    // use space to fill the gap left by truncate. Need another
+                    // option to custom this or just use `WrapOption.wrap_right_prefix_symbol`
+                    // in the future.
+                    result.push_str(&" ".repeat(display_width.saturating_sub(used)));
                     break;
                 }
                 result.push_str(g);
diff --git a/src/features/line_numbers.rs b/src/features/line_numbers.rs
index d9ab7948..bbdeb592 100644
--- a/src/features/line_numbers.rs
+++ b/src/features/line_numbers.rs
@@ -797,6 +797,8 @@ pub mod tests {
             "@",
             "--wrap-right-symbol",
             "@",
+            "--wrap-right-prefix-symbol",
+            ">",
         ])
         .with_input(DIFF_PLUS_MINUS_WITH_1_CONTEXT_DIFF)
         .expect_after_header(
@@ -816,6 +818,8 @@ pub mod tests {
             "@",
             "--wrap-right-symbol",
             "@",
+            "--wrap-right-prefix-symbol",
+            ">",
         ];
 
         DeltaTest::with_args(cfg)
@@ -824,7 +828,7 @@ pub mod tests {
                 r#"
                 │  1 │abc            │  1 │abc
                 │  2 │a = one side   │  2 │a = one longer@
-                │    │               │    │ side
+                │    │               │    │         > side
                 │  3 │xyz            │  3 │xyz"#,
             );
 
@@ -834,7 +838,7 @@ pub mod tests {
                 r#"
                 │  1 │abc            │  1 │abc
                 │  2 │a = one longer@│  2 │a = one side
-                │    │ side          │    │
+                │    │         > side│    │
                 │  3 │xyz            │  3 │xyz"#,
             );
 
@@ -975,4 +979,13 @@ index 223ca50..367a6f6 100644
 -a = other one
 +a = right side which is longer
  xyz";
+
+    pub const TWO_MINUS_LINES_UNICODE_DIFF: &str = "\
+diff --git a/a.py b/a.py
+index 8b0d958..e69de29 100644
+--- a/a.txt
++++ b/b.txt
+@@ -1,1 +0,0 @@
+-一二三
+";
 }
diff --git a/src/features/side_by_side.rs b/src/features/side_by_side.rs
index f175fa2a..ac2de08e 100644
--- a/src/features/side_by_side.rs
+++ b/src/features/side_by_side.rs
@@ -1,6 +1,6 @@
 use itertools::Itertools;
 use syntect::highlighting::Style as SyntectStyle;
-use unicode_segmentation::UnicodeSegmentation;
+use unicode_width::UnicodeWidthStr;
 
 use crate::ansi;
 use crate::cli;
@@ -75,12 +75,10 @@ pub fn available_line_width(
 }
 
 pub fn line_is_too_long(line: &str, line_width: usize) -> bool {
-    let line_sum = line.graphemes(true).count();
-
     debug_assert!(line.ends_with('\n'));
-    // `line_sum` is too large because a trailing newline is present,
-    // so allow one more character.
-    line_sum > line_width + 1
+
+    // graphemes will take care of newlines
+    line.width() > line_width
 }
 
 /// Return whether any of the input lines is too long, and a data
@@ -715,4 +713,58 @@ pub mod tests {
             │  2 │b = 2         │  2 │bb = 2        "#,
         );
     }
+
+    #[test]
+    fn test_two_minus_lines_unicode_truncated() {
+        DeltaTest::with_args(&[
+            "--side-by-side",
+            "--wrap-max-lines",
+            "2",
+            "--width",
+            "16",
+            "--line-fill-method=spaces",
+        ])
+        .set_config(|cfg| cfg.truncation_symbol = ">".into())
+        .with_input(TWO_MINUS_LINES_UNICODE_DIFF)
+        .expect_after_header(
+            r#"
+            │  1 │↵ │    │
+            │    │↵ │    │
+            │    │ >│    │"#,
+        );
+
+        DeltaTest::with_args(&[
+            "--side-by-side",
+            "--wrap-max-lines",
+            "2",
+            "--width",
+            "17",
+            "--line-fill-method=spaces",
+        ])
+        .set_config(|cfg| cfg.truncation_symbol = ">".into())
+        .with_input(TWO_MINUS_LINES_UNICODE_DIFF)
+        .expect_after_header(
+            r#"
+            │  1 │↵ │    │
+            │    │↵ │    │
+            │    │ >│    │"#,
+        );
+
+        DeltaTest::with_args(&[
+            "--side-by-side",
+            "--wrap-max-lines",
+            "2",
+            "--width",
+            "18",
+            "--line-fill-method=spaces",
+        ])
+        .set_config(|cfg| cfg.truncation_symbol = ">".into())
+        .with_input(TWO_MINUS_LINES_UNICODE_DIFF)
+        .expect_after_header(
+            r#"
+            │  1 │一↵│    │
+            │    │二↵│    │
+            │    │三 │    │"#,
+        );
+    }
 }
diff --git a/src/wrapping.rs b/src/wrapping.rs
index 771d7f78..d9b0ff89 100644
--- a/src/wrapping.rs
+++ b/src/wrapping.rs
@@ -1,5 +1,6 @@
 use syntect::highlighting::Style as SyntectStyle;
 use unicode_segmentation::UnicodeSegmentation;
+use unicode_width::UnicodeWidthStr;
 
 use crate::cli;
 use crate::config::INLINE_SYMBOL_WIDTH_1;
@@ -202,11 +203,21 @@ where
 
         let (style, text, graphemes) = stack
             .pop()
-            .map(|(style, text)| (style, text, text.grapheme_indices(true).collect::<Vec<_>>()))
+            .map(|(style, text)| {
+                (
+                    style,
+                    text,
+                    text.graphemes(true)
+                        .map(|item| (item.len(), item.width()))
+                        .collect::<Vec<_>>(),
+                )
+            })
             .unwrap();
 
-        let new_len = curr_line.len + graphemes.len();
+        let graphemes_width: usize = graphemes.iter().map(|(_, w)| w).sum();
+        let new_len = curr_line.len + graphemes_width;
 
+        #[allow(clippy::comparison_chain)]
         let must_split = if new_len < line_width {
             curr_line.push_and_set_len((style, text), new_len);
             false
@@ -228,15 +239,6 @@ where
                 }
                 _ => true,
             }
-        } else if new_len == line_width + 1 && stack.is_empty() {
-            // If the one overhanging char is '\n' then keep it on the current line.
-            if text.ends_with('\n') {
-                // Do not count the included '\n': - 1
-                curr_line.push_and_set_len((style, text), new_len - 1);
-                false
-            } else {
-                true
-            }
         } else {
             true
         };
@@ -244,16 +246,29 @@ where
         // Text must be split, one part (or just `wrap_symbol`) is added to the
         // current line, the other is pushed onto the stack.
         if must_split {
-            let grapheme_split_pos = graphemes.len() - (new_len - line_width) - 1;
+            let mut width_left = graphemes_width
+                .saturating_sub(new_len - line_width)
+                .saturating_sub(wrap_config.left_symbol.width());
 
             // The length does not matter anymore and `curr_line` will be reset
             // at the end, so move the line segments out.
             let mut line_segments = curr_line.line_segments;
 
-            let next_line = if grapheme_split_pos == 0 {
+            let next_line = if width_left == 0 {
                 text
             } else {
-                let byte_split_pos = graphemes[grapheme_split_pos].0;
+                let mut byte_split_pos = 0;
+                // After loop byte_split_pos may still equal to 0. If width_left
+                // is less than the width of first character, We can't display it.
+                for &(item_len, item_width) in graphemes.iter() {
+                    if width_left >= item_width {
+                        byte_split_pos += item_len;
+                        width_left -= item_width;
+                    } else {
+                        break;
+                    }
+                }
+
                 let this_line = &text[..byte_split_pos];
                 line_segments.push((style, this_line));
                 &text[byte_split_pos..]
@@ -273,7 +288,9 @@ where
     if result.len() == 1 && curr_line.has_text() {
         let current_permille = (curr_line.text_len() * 1000) / line_width;
 
-        let pad_len = line_width.saturating_sub(curr_line.text_len());
+        // pad line will add a wrap_config.right_prefix_symbol
+        let pad_len = line_width
+            .saturating_sub(curr_line.text_len() + wrap_config.right_prefix_symbol.width());
 
         if wrap_config.use_wrap_right_permille > current_permille && pad_len > 0 {
             // The inserted spaces, which align a line to the right, point into this string.
@@ -785,7 +802,7 @@ mod tests {
         let lines = wrap_test(&cfg, line, 11);
         assert_eq!(lines.len(), 2);
         assert_eq!(lines[0].last().unwrap().1, WR);
-        assert_eq!(lines[1], [(*SD, "         "), (*SD, ">"), (*S1, "ab")]);
+        assert_eq!(lines[1], [(*SD, "        "), (*SD, ">"), (*S1, "ab")]);
     }
 
     #[test]
@@ -801,7 +818,7 @@ mod tests {
                 lines,
                 vec![
                     vec![(*S1, "012"), (*S2, "34"), (*SD, WR)],
-                    vec![(*SD, "    "), (*SD, RA), (*S2, "56")]
+                    vec![(*SD, "   "), (*SD, RA), (*S2, "56")]
                 ]
             );
         }
@@ -940,13 +957,19 @@ mod tests {
         );
 
         // Not working: Tailored grapheme clusters: क्षि  = क् + षि
-        let line = vec![(*S1, "abc"), (*S2, "deநி"), (*S1, "ghij")];
+        //
+        // Difference compare to previous example (even they may look like the
+        // same width in text editor.) :
+        //
+        // width நி: 2
+        // width ö̲: 1
+        let line = vec![(*S1, "abc"), (*S2, "dநி"), (*S1, "ghij")];
         let lines = wrap_test(&cfg, line, 4);
         assert_eq!(
             lines,
             vec![
                 vec![(*S1, "abc"), (*SD, W)],
-                vec![(*S2, "deநி"), (*SD, W)],
+                vec![(*S2, "dநி"), (*SD, W)],
                 vec![(*S1, "ghij")]
             ]
         );
-- 
cgit v1.2.3