diff options
author | Dan Davison <dandavison7@gmail.com> | 2019-07-19 17:45:09 -0400 |
---|---|---|
committer | Dan Davison <dandavison7@gmail.com> | 2019-07-20 18:10:27 -0400 |
commit | 2404597c29f1ca4125740891224216d00e75ac6f (patch) | |
tree | 09deb1306a8f7f9911f8af89004a4389d5aca40c /src | |
parent | 003d3c464888f603df63b727adab639b024a6ab3 (diff) |
Use grapheme units for all visible character calculations0.0.4
Diffstat (limited to 'src')
-rw-r--r-- | src/delta.rs | 6 | ||||
-rw-r--r-- | src/draw.rs | 5 | ||||
-rw-r--r-- | src/edits.rs | 60 |
3 files changed, 46 insertions, 25 deletions
diff --git a/src/delta.rs b/src/delta.rs index 425f87b7..57777bfe 100644 --- a/src/delta.rs +++ b/src/delta.rs @@ -3,6 +3,7 @@ use std::io::Write; use ansi_term::Colour::{Blue, Yellow}; use console::strip_ansi_codes; use syntect::easy::HighlightLines; +use unicode_segmentation::UnicodeSegmentation; use crate::bat::assets::HighlightingAssets; use crate::cli; @@ -249,9 +250,10 @@ fn prepare(_line: &str, config: &Config) -> String { line.push_str(" "); line.push_str(&_line[1..]); } + let line_length = line.graphemes(true).count(); match config.width { - Some(width) if width > line.len() => { - format!("{}{}\n", line, " ".repeat(width - line.len())) + Some(width) if width > line_length => { + format!("{}{}\n", line, " ".repeat(width - line_length)) } _ => format!("{}\n", line), } diff --git a/src/draw.rs b/src/draw.rs index 1018614f..7fe54e1d 100644 --- a/src/draw.rs +++ b/src/draw.rs @@ -3,6 +3,7 @@ use std::io::Write; use ansi_term::Style; use box_drawing; use console::strip_ansi_codes; +use unicode_segmentation::UnicodeSegmentation; /// Write text to stream, surrounded by a box, leaving the cursor just /// beyond the bottom right corner. @@ -18,7 +19,7 @@ pub fn write_boxed( } else { box_drawing::light::UP_LEFT }; - let box_width = strip_ansi_codes(text).len() + 1; + let box_width = strip_ansi_codes(text).graphemes(true).count() + 1; write_boxed_partial(writer, text, box_width, line_style, heavy)?; write!(writer, "{}", line_style.paint(up_left))?; Ok(()) @@ -33,7 +34,7 @@ pub fn write_boxed_with_line( line_style: Style, heavy: bool, ) -> std::io::Result<()> { - let box_width = strip_ansi_codes(text).len() + 1; + let box_width = strip_ansi_codes(text).graphemes(true).count() + 1; write_boxed_with_horizontal_whisker(writer, text, box_width, line_style, heavy)?; write_horizontal_line(writer, line_width - box_width - 1, line_style, heavy)?; Ok(()) diff --git a/src/edits.rs b/src/edits.rs index b3eb43fd..20893b4e 100644 --- a/src/edits.rs +++ b/src/edits.rs @@ -305,6 +305,7 @@ mod tests { mod string_pair { use std::iter::Peekable; + use unicode_segmentation::UnicodeSegmentation; /// A pair of right-trimmed strings. pub struct StringPair { @@ -315,9 +316,10 @@ mod string_pair { impl StringPair { pub fn new(s0: &str, s1: &str) -> StringPair { - let common_prefix_length = StringPair::common_prefix_length(s0.chars(), s1.chars()); - let (common_suffix_length, trailing_whitespace) = - StringPair::suffix_data(s0.chars(), s1.chars()); + let (g0, g1) = (s0.grapheme_indices(true), s1.grapheme_indices(true)); + let common_prefix_length = StringPair::common_prefix_length(g0, g1); // TODO: pass references + let (g0, g1) = (s0.grapheme_indices(true), s1.grapheme_indices(true)); + let (common_suffix_length, trailing_whitespace) = StringPair::suffix_data(g0, g1); StringPair { common_prefix_length, common_suffix_length, @@ -328,26 +330,36 @@ mod string_pair { } } - fn common_prefix_length<I>(s0: I, s1: I) -> usize + /// Align the two strings at their left ends and consider only + /// the bytes up to the length of the shorter string. Return + /// the byte offset of the first differing grapheme cluster, + /// or the byte length of shorter string if they do not + /// differ. + fn common_prefix_length<'a, I>(s0: I, s1: I) -> usize where - I: Iterator, - I::Item: PartialEq, + I: Iterator<Item = (usize, &'a str)>, { let mut i = 0; - for (c0, c1) in s0.zip(s1) { + for ((_, c0), (_, c1)) in s0.zip(s1) { if c0 != c1 { break; } else { - i += 1; + i += c0.len(); } } i } - /// Return common suffix length and number of trailing whitespace characters on each string. - fn suffix_data<I>(s0: I, s1: I) -> (usize, [usize; 2]) + /// Trim trailing whitespace and align the two strings at + /// their right ends. Fix the origin at their right ends and, + /// looking left, consider only the bytes up to the length of + /// the shorter string. Return the byte offset of the first + /// differing grapheme cluster, or the byte length of the + /// shorter string if they do not differ. Also return the + /// number of bytes of whitespace trimmed from each string. + fn suffix_data<'a, I>(s0: I, s1: I) -> (usize, [usize; 2]) where - I: DoubleEndedIterator<Item = char>, + I: DoubleEndedIterator<Item = (usize, &'a str)>, { let mut s0 = s0.rev().peekable(); let mut s1 = s1.rev().peekable(); @@ -358,21 +370,24 @@ mod string_pair { } /// Consume leading whitespace; return number of characters consumed. - fn consume_whitespace<I>(s: &mut Peekable<I>) -> usize + fn consume_whitespace<'a, I>(s: &mut Peekable<I>) -> usize where - I: Iterator<Item = char>, + I: Iterator<Item = (usize, &'a str)>, { - let mut i = 0; + let mut n = 0; loop { match s.peek() { - Some('\n') | Some(' ') => { + // TODO: Use a whitespace unicode character class? + // Allow for whitespace grapheme clusters > 1 + // byte? + Some(&(_, "\n")) | Some(&(_, " ")) => { s.next(); - i += 1; + n += 1; } _ => break, } } - i + n } } @@ -440,13 +455,16 @@ mod string_pair { assert_eq!(common_suffix_length(" ", "á"), 0); assert_eq!(common_suffix_length("á ", ""), 0); assert_eq!(common_suffix_length("á", "b "), 0); - assert_eq!(common_suffix_length("á", "á "), 1); + assert_eq!(common_suffix_length("á", "á "), "á".len()); assert_eq!(common_suffix_length("a ", "áb "), 0); assert_eq!(common_suffix_length("ab", "á "), 0); assert_eq!(common_suffix_length("áb ", "b "), 1); - assert_eq!(common_suffix_length("áb ", "aáb "), 2); - assert_eq!(common_suffix_length("abá ", "bá"), 2); - assert_eq!(common_suffix_length("áaáabá ", "ááabá "), 4); + assert_eq!(common_suffix_length("áb ", "aáb "), 1 + "á".len()); + assert_eq!(common_suffix_length("abá ", "bá"), 1 + "á".len()); + assert_eq!( + common_suffix_length("áaáabá ", "ááabá "), + 2 + 2 * "á".len() + ); } } } |