summaryrefslogtreecommitdiffstats
path: root/src/utils
diff options
context:
space:
mode:
authorClement Tsang <34804052+ClementTsang@users.noreply.github.com>2022-12-05 04:21:37 -0500
committerGitHub <noreply@github.com>2022-12-05 04:21:37 -0500
commitac5e2ce4a28ccc9689f8ba43fb7dfc3c819a3366 (patch)
tree4fcb4b80219de4c642bfecb321e84f545b8d5de8 /src/utils
parentd3577bd1ab5e90d20d9baf6029b0bd0b7f61911d (diff)
bug: fix incorrect text width calculation (#925)
* bug: fix incorrect text calculation * actual fix, add tests * appease clippy * add link to inspiration
Diffstat (limited to 'src/utils')
-rw-r--r--src/utils/gen_util.rs233
1 files changed, 204 insertions, 29 deletions
diff --git a/src/utils/gen_util.rs b/src/utils/gen_util.rs
index 60311105..02b8bfd4 100644
--- a/src/utils/gen_util.rs
+++ b/src/utils/gen_util.rs
@@ -2,6 +2,7 @@ use std::cmp::Ordering;
use tui::text::{Span, Spans, Text};
use unicode_segmentation::UnicodeSegmentation;
+use unicode_width::UnicodeWidthStr;
pub const KILO_LIMIT: u64 = 1000;
pub const MEGA_LIMIT: u64 = 1_000_000;
@@ -96,40 +97,59 @@ pub fn get_decimal_prefix(quantity: u64, unit: &str) -> (f64, String) {
}
/// Truncates text if it is too long, and adds an ellipsis at the end if needed.
-pub fn truncate_text<'a, U: Into<usize>>(content: &str, width: U) -> Text<'a> {
+pub fn truncate_to_text<'a, U: Into<usize>>(content: &str, width: U) -> Text<'a> {
+ Text {
+ lines: vec![Spans(vec![Span::raw(truncate_str(content, width))])],
+ }
+}
+
+/// Truncates a string with an ellipsis character.
+///
+/// NB: This probably does not handle EVERY case, but I think it handles most cases
+/// we will use this function for fine... hopefully.
+fn truncate_str<U: Into<usize>>(content: &str, width: U) -> String {
let width = width.into();
- let mut graphemes = UnicodeSegmentation::graphemes(content, true);
- let grapheme_len = {
- let (_, upper) = graphemes.size_hint();
- match upper {
- Some(upper) => upper,
- None => graphemes.clone().count(), // Don't think this ever fires.
- }
- };
+ let mut text = String::with_capacity(width);
- let text = if grapheme_len > width {
- let mut text = String::with_capacity(width);
- // Truncate with ellipsis.
+ if width > 0 {
+ let mut curr_width = 0;
+ let mut early_break = false;
- // Use a hack to reduce the size to size `width`. Think of it like removing
- // The last `grapheme_len - width` graphemes, which reduces the length to
- // `width` long.
- //
- // This is a way to get around the currently experimental`advance_back_by`.
- graphemes.nth_back(grapheme_len - width);
+ // This tracks the length of the last added string - note this does NOT match the grapheme *width*.
+ let mut last_added_str_len = 0;
- text.push_str(graphemes.as_str());
- text.push('…');
+ // Cases to handle:
+ // - Completes adding the entire string.
+ // - Adds a character up to the boundary, then fails.
+ // - Adds a character not up to the boundary, then fails.
+ // Inspired by https://tomdebruijn.com/posts/rust-string-length-width-calculations/
+ for g in UnicodeSegmentation::graphemes(content, true) {
+ let g_width = if g.contains('\u{200d}') {
+ 2
+ } else {
+ UnicodeWidthStr::width(g)
+ };
- text
- } else {
- content.to_string()
- };
+ if curr_width + g_width <= width {
+ curr_width += g_width;
+ last_added_str_len = g.len();
+ text.push_str(g);
+ } else {
+ early_break = true;
+ break;
+ }
+ }
- // TODO: [OPT] maybe add interning here?
- Text {
- lines: vec![Spans(vec![Span::raw(text)])],
+ if early_break {
+ if curr_width == width {
+ // Remove the last grapheme cluster added.
+ text.truncate(text.len() - last_added_str_len);
+ }
+ text.push('…');
+ }
}
+
+ text
}
#[inline]
@@ -180,7 +200,162 @@ mod test {
}
#[test]
- fn test_truncation() {
- // TODO: Add tests for `truncate_text`
+ fn test_truncate() {
+ let cpu_header = "CPU(c)▲";
+
+ assert_eq!(
+ truncate_str(cpu_header, 8_usize),
+ cpu_header,
+ "should match base string as there is enough room"
+ );
+
+ assert_eq!(
+ truncate_str(cpu_header, 7_usize),
+ cpu_header,
+ "should match base string as there is enough room"
+ );
+
+ assert_eq!(truncate_str(cpu_header, 6_usize), "CPU(c…");
+ assert_eq!(truncate_str(cpu_header, 5_usize), "CPU(…");
+ assert_eq!(truncate_str(cpu_header, 4_usize), "CPU…");
+ assert_eq!(truncate_str(cpu_header, 1_usize), "…");
+ assert_eq!(truncate_str(cpu_header, 0_usize), "");
+ }
+
+ #[test]
+ fn test_truncate_cjk() {
+ let cjk = "施氏食獅史";
+
+ assert_eq!(
+ truncate_str(cjk, 11_usize),
+ cjk,
+ "should match base string as there is enough room"
+ );
+
+ assert_eq!(
+ truncate_str(cjk, 10_usize),
+ cjk,
+ "should match base string as there is enough room"
+ );
+
+ assert_eq!(truncate_str(cjk, 9_usize), "施氏食獅…");
+ assert_eq!(truncate_str(cjk, 8_usize), "施氏食…");
+ assert_eq!(truncate_str(cjk, 2_usize), "…");
+ assert_eq!(truncate_str(cjk, 1_usize), "…");
+ assert_eq!(truncate_str(cjk, 0_usize), "");
+ }
+
+ #[test]
+ fn test_truncate_mixed() {
+ let test = "Test (施氏食獅史) Test";
+
+ assert_eq!(
+ truncate_str(test, 30_usize),
+ test,
+ "should match base string as there is enough room"
+ );
+
+ assert_eq!(
+ truncate_str(test, 22_usize),
+ test,
+ "should match base string as there is just enough room"
+ );
+
+ assert_eq!(
+ truncate_str(test, 21_usize),
+ "Test (施氏食獅史) Te…",
+ "should truncate the t and replace the s with ellipsis"
+ );
+
+ assert_eq!(truncate_str(test, 18_usize), "Test (施氏食獅史)…");
+ assert_eq!(truncate_str(test, 17_usize), "Test (施氏食獅史…");
+ assert_eq!(truncate_str(test, 16_usize), "Test (施氏食獅…");
+ assert_eq!(truncate_str(test, 15_usize), "Test (施氏食獅…");
+ assert_eq!(truncate_str(test, 14_usize), "Test (施氏食…");
+ assert_eq!(truncate_str(test, 13_usize), "Test (施氏食…");
+ assert_eq!(truncate_str(test, 8_usize), "Test (…");
+ assert_eq!(truncate_str(test, 7_usize), "Test (…");
+ assert_eq!(truncate_str(test, 6_usize), "Test …");
+ }
+
+ #[test]
+ fn test_truncate_flags() {
+ let flag = "🇨🇦";
+ assert_eq!(truncate_str(flag, 3_usize), flag);
+ assert_eq!(truncate_str(flag, 2_usize), flag);
+ assert_eq!(truncate_str(flag, 1_usize), "…");
+ assert_eq!(truncate_str(flag, 0_usize), "");
+
+ let flag_text = "oh 🇨🇦";
+ assert_eq!(truncate_str(flag_text, 6_usize), flag_text);
+ assert_eq!(truncate_str(flag_text, 5_usize), flag_text);
+ assert_eq!(truncate_str(flag_text, 4_usize), "oh …");
+
+ let flag_text_wrap = "!🇨🇦!";
+ assert_eq!(truncate_str(flag_text_wrap, 6_usize), flag_text_wrap);
+ assert_eq!(truncate_str(flag_text_wrap, 4_usize), flag_text_wrap);
+ assert_eq!(truncate_str(flag_text_wrap, 3_usize), "!…");
+ assert_eq!(truncate_str(flag_text_wrap, 2_usize), "!…");
+ assert_eq!(truncate_str(flag_text_wrap, 1_usize), "…");
+
+ let flag_cjk = "加拿大🇨🇦";
+ assert_eq!(truncate_str(flag_cjk, 9_usize), flag_cjk);
+ assert_eq!(truncate_str(flag_cjk, 8_usize), flag_cjk);
+ assert_eq!(truncate_str(flag_cjk, 7_usize), "加拿大…");
+ assert_eq!(truncate_str(flag_cjk, 6_usize), "加拿…");
+ assert_eq!(truncate_str(flag_cjk, 5_usize), "加拿…");
+ assert_eq!(truncate_str(flag_cjk, 4_usize), "加…");
+
+ let flag_mix = "🇨🇦加gaa拿naa大daai🇨🇦";
+ assert_eq!(truncate_str(flag_mix, 20_usize), flag_mix);
+ assert_eq!(truncate_str(flag_mix, 19_usize), "🇨🇦加gaa拿naa大daai…");
+ assert_eq!(truncate_str(flag_mix, 18_usize), "🇨🇦加gaa拿naa大daa…");
+ assert_eq!(truncate_str(flag_mix, 17_usize), "🇨🇦加gaa拿naa大da…");
+ assert_eq!(truncate_str(flag_mix, 15_usize), "🇨🇦加gaa拿naa大…");
+ assert_eq!(truncate_str(flag_mix, 14_usize), "🇨🇦加gaa拿naa…");
+ assert_eq!(truncate_str(flag_mix, 13_usize), "🇨🇦加gaa拿naa…");
+ assert_eq!(truncate_str(flag_mix, 3_usize), "🇨🇦…");
+ assert_eq!(truncate_str(flag_mix, 2_usize), "…");
+ assert_eq!(truncate_str(flag_mix, 1_usize), "…");
+ assert_eq!(truncate_str(flag_mix, 0_usize), "");
+ }
+
+ /// This might not be the best way to handle it, but this at least tests that it doesn't crash...
+ #[test]
+ fn test_truncate_hindi() {
+ // cSpell:disable
+ let test = "हिन्दी";
+ assert_eq!(truncate_str(test, 10_usize), test);
+ assert_eq!(truncate_str(test, 6_usize), "हिन्दी");
+ assert_eq!(truncate_str(test, 5_usize), "हिन्दी");
+ assert_eq!(truncate_str(test, 4_usize), "हिन्…");
+ assert_eq!(truncate_str(test, 3_usize), "हि…");
+ assert_eq!(truncate_str(test, 2_usize), "…");
+ assert_eq!(truncate_str(test, 1_usize), "…");
+ assert_eq!(truncate_str(test, 0_usize), "");
+ // cSpell:enable
+ }
+
+ #[test]
+ fn test_truncate_emoji() {
+ let heart = "❤️";
+ assert_eq!(truncate_str(heart, 2_usize), heart);
+ assert_eq!(truncate_str(heart, 1_usize), heart);
+ assert_eq!(truncate_str(heart, 0_usize), "");
+
+ let emote = "💎";
+ assert_eq!(truncate_str(emote, 2_usize), emote);
+ assert_eq!(truncate_str(emote, 1_usize), "…");
+ assert_eq!(truncate_str(emote, 0_usize), "");
+
+ let family = "👨‍👨‍👧‍👦";
+ assert_eq!(truncate_str(family, 2_usize), family);
+ assert_eq!(truncate_str(family, 1_usize), "…");
+ assert_eq!(truncate_str(family, 0_usize), "");
+
+ let scientist = "👩‍🔬";
+ assert_eq!(truncate_str(scientist, 2_usize), scientist);
+ assert_eq!(truncate_str(scientist, 1_usize), "…");
+ assert_eq!(truncate_str(scientist, 0_usize), "");
}
}