diff options
author | Dan Davison <dandavison7@gmail.com> | 2020-08-03 09:46:56 -0400 |
---|---|---|
committer | Dan Davison <dandavison7@gmail.com> | 2020-08-14 10:14:54 -0400 |
commit | 0a9c48c75051fb507ec1a801ca9d0cf96fadbc48 (patch) | |
tree | e1e0b136b7254b962d62f19dc9e87658f82400dc /src/ansi/mod.rs | |
parent | 5ff4e13f10b80574f15db6968086f1c45fd1860a (diff) |
New ANSI escape sequence parser based on vte
Reimplement utility functions from `console` crate, but with support
for OSC sequences.
Diffstat (limited to 'src/ansi/mod.rs')
-rw-r--r-- | src/ansi/mod.rs | 184 |
1 files changed, 160 insertions, 24 deletions
diff --git a/src/ansi/mod.rs b/src/ansi/mod.rs index 158b562c..e2fdaf7f 100644 --- a/src/ansi/mod.rs +++ b/src/ansi/mod.rs @@ -1,18 +1,79 @@ -pub mod parse; +mod console_tests; +mod iterator; -use std::cmp::min; +use std::borrow::Cow; -use console; use itertools::Itertools; +use unicode_segmentation::UnicodeSegmentation; +use unicode_width::UnicodeWidthStr; + +use iterator::{AnsiElementIterator, Element}; pub const ANSI_CSI_CLEAR_TO_EOL: &str = "\x1b[0K"; pub const ANSI_CSI_CLEAR_TO_BOL: &str = "\x1b[1K"; pub const ANSI_SGR_RESET: &str = "\x1b[0m"; -pub fn string_starts_with_ansi_escape_sequence(s: &str) -> bool { - console::AnsiCodeIterator::new(s) +pub fn strip_ansi_codes(s: &str) -> String { + strip_ansi_codes_from_strings_iterator(ansi_strings_iterator(s)) +} + +pub fn measure_text_width(s: &str) -> usize { + // TODO: how should e.g. '\n' be handled? + strip_ansi_codes(s).width() +} + +/// Truncate string such that `tail` is present as a suffix, preceded by as much of `s` as can be +/// displayed in the requested width. +// Return string constructed as follows: +// 1. `display_width` characters are available. If the string fits, return it. +// +// 2. Contribute graphemes and ANSI escape sequences from `tail` until either (1) `tail` is +// exhausted, or (2) the display width of the result would exceed `display_width`. +// +// 3. If tail was exhausted, then contribute graphemes and ANSI escape sequences from `s` until the +// display_width of the result would exceed `display_width`. +pub fn truncate_str<'a, 'b>(s: &'a str, display_width: usize, tail: &'b str) -> Cow<'a, str> { + let items = ansi_strings_iterator(s).collect::<Vec<(&str, bool)>>(); + let width = strip_ansi_codes_from_strings_iterator(items.iter().map(|el| *el)).width(); + if width <= display_width { + return Cow::from(s); + } + let result_tail = if !tail.is_empty() { + truncate_str(tail, display_width, "").to_string() + } else { + String::new() + }; + let mut used = measure_text_width(&result_tail); + let mut result = String::new(); + for (t, is_ansi) in items { + if !is_ansi { + for g in t.graphemes(true) { + let w = g.width(); + if used + w > display_width { + break; + } + result.push_str(g); + used += w; + } + } else { + result.push_str(t); + } + } + + return Cow::from(format!("{}{}", result, result_tail)); +} + +pub fn parse_first_style(s: &str) -> Option<ansi_term::Style> { + AnsiElementIterator::new(s).find_map(|el| match el { + Element::CSI(style, _, _) => Some(style), + _ => None, + }) +} + +pub fn string_starts_with_ansi_style_sequence(s: &str) -> bool { + AnsiElementIterator::new(s) .nth(0) - .map(|(_, is_ansi)| is_ansi) + .map(|el| matches!(el, Element::CSI(_, _, _))) .unwrap_or(false) } @@ -20,36 +81,111 @@ pub fn string_starts_with_ansi_escape_sequence(s: &str) -> bool { /// counts bytes in non-ANSI-escape-sequence content only. All ANSI escape sequences in the /// original string are preserved. pub fn ansi_preserving_slice(s: &str, start: usize) -> String { - console::AnsiCodeIterator::new(s) - .scan(0, |i, (substring, is_ansi)| { - // i is the index in non-ANSI-escape-sequence content. - let substring_slice = if is_ansi || *i > start { - substring - } else { - &substring[min(substring.len(), start - *i)..] - }; - if !is_ansi { - *i += substring.len(); - } - Some(substring_slice) + AnsiElementIterator::new(s) + .scan(0, |index, element| { + // `index` is the index in non-ANSI-escape-sequence content. + Some(match element { + Element::CSI(_, a, b) => &s[a..b], + Element::ESC(a, b) => &s[a..b], + Element::OSC(a, b) => &s[a..b], + Element::Text(a, b) => { + let i = *index; + *index += b - a; + if *index <= start { + // This text segment ends before start, so contributes no bytes. + "" + } else if i > start { + // This section starts after `start`, so contributes all its bytes. + &s[a..b] + } else { + // This section contributes those bytes that are >= start + &s[(a + start - i)..b] + } + } + }) }) .join("") } +fn ansi_strings_iterator(s: &str) -> impl Iterator<Item = (&str, bool)> { + AnsiElementIterator::new(s).map(move |el| match el { + Element::CSI(_, i, j) => (&s[i..j], true), + Element::ESC(i, j) => (&s[i..j], true), + Element::OSC(i, j) => (&s[i..j], true), + Element::Text(i, j) => (&s[i..j], false), + }) +} + +fn strip_ansi_codes_from_strings_iterator<'a>( + strings: impl Iterator<Item = (&'a str, bool)>, +) -> String { + strings + .filter_map(|(el, is_ansi)| if !is_ansi { Some(el) } else { None }) + .join("") +} + #[cfg(test)] mod tests { - use crate::ansi::ansi_preserving_slice; - use crate::ansi::string_starts_with_ansi_escape_sequence; + use super::{ + ansi_preserving_slice, measure_text_width, parse_first_style, + string_starts_with_ansi_style_sequence, strip_ansi_codes, + }; + + #[test] + fn test_strip_ansi_codes() { + for s in &["src/ansi/mod.rs", "バー", "src/ansi/modバー.rs"] { + assert_eq!(strip_ansi_codes(s), *s); + } + assert_eq!(strip_ansi_codes("\x1b[31mバー\x1b[0m"), "バー"); + } + + #[test] + fn test_measure_text_width() { + assert_eq!(measure_text_width("src/ansi/mod.rs"), 15); + assert_eq!(measure_text_width("バー"), 4); + assert_eq!(measure_text_width("src/ansi/modバー.rs"), 19); + assert_eq!(measure_text_width("\x1b[31mバー\x1b[0m"), 4); + assert_eq!(measure_text_width("a\nb\n"), 2); + } + + #[test] + fn test_strip_ansi_codes_osc_hyperlink() { + assert_eq!(strip_ansi_codes("\x1b[38;5;4m\x1b]8;;file:///Users/dan/src/delta/src/ansi/mod.rs\x1b\\src/ansi/mod.rs\x1b]8;;\x1b\\\x1b[0m\n"), + "src/ansi/mod.rs\n"); + } + + #[test] + fn test_measure_text_width_osc_hyperlink() { + assert_eq!(measure_text_width("\x1b[38;5;4m\x1b]8;;file:///Users/dan/src/delta/src/ansi/mod.rs\x1b\\src/ansi/mod.rs\x1b]8;;\x1b\\\x1b[0m"), + measure_text_width("src/ansi/mod.rs")); + } + + #[test] + fn test_measure_text_width_osc_hyperlink_non_ascii() { + assert_eq!(measure_text_width("\x1b[38;5;4m\x1b]8;;file:///Users/dan/src/delta/src/ansi/mod.rs\x1b\\src/ansi/modバー.rs\x1b]8;;\x1b\\\x1b[0m"), + measure_text_width("src/ansi/modバー.rs")); + } + + #[test] + fn test_parse_first_style() { + let minus_line_from_unconfigured_git = "\x1b[31m-____\x1b[m\n"; + let style = parse_first_style(minus_line_from_unconfigured_git); + let expected_style = ansi_term::Style { + foreground: Some(ansi_term::Color::Red), + ..ansi_term::Style::default() + }; + assert_eq!(Some(expected_style), style); + } #[test] fn test_string_starts_with_ansi_escape_sequence() { - assert!(!string_starts_with_ansi_escape_sequence("")); - assert!(!string_starts_with_ansi_escape_sequence("-")); - assert!(string_starts_with_ansi_escape_sequence( + assert!(!string_starts_with_ansi_style_sequence("")); + assert!(!string_starts_with_ansi_style_sequence("-")); + assert!(string_starts_with_ansi_style_sequence( "\x1b[31m-XXX\x1b[m\n" )); - assert!(string_starts_with_ansi_escape_sequence("\x1b[32m+XXX")); + assert!(string_starts_with_ansi_style_sequence("\x1b[32m+XXX")); } #[test] |