summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Davison <dandavison7@gmail.com>2019-07-19 17:45:09 -0400
committerDan Davison <dandavison7@gmail.com>2019-07-20 18:10:27 -0400
commit2404597c29f1ca4125740891224216d00e75ac6f (patch)
tree09deb1306a8f7f9911f8af89004a4389d5aca40c
parent003d3c464888f603df63b727adab639b024a6ab3 (diff)
Use grapheme units for all visible character calculations0.0.4
-rw-r--r--Cargo.toml1
-rw-r--r--src/delta.rs6
-rw-r--r--src/draw.rs5
-rw-r--r--src/edits.rs60
4 files changed, 47 insertions, 25 deletions
diff --git a/Cargo.toml b/Cargo.toml
index aa04d10d..a9ab9b3b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,6 +12,7 @@ console = "0.7.7"
shell-words = "0.1.0"
structopt = "0.2.16"
syntect = "3.2"
+unicode-segmentation = "1.3.0"
[dependencies.error-chain]
version = "0.12"
diff --git a/src/delta.rs b/src/delta.rs
index 425f87b7..57777bfe 100644
--- a/src/delta.rs
+++ b/src/delta.rs
@@ -3,6 +3,7 @@ use std::io::Write;
use ansi_term::Colour::{Blue, Yellow};
use console::strip_ansi_codes;
use syntect::easy::HighlightLines;
+use unicode_segmentation::UnicodeSegmentation;
use crate::bat::assets::HighlightingAssets;
use crate::cli;
@@ -249,9 +250,10 @@ fn prepare(_line: &str, config: &Config) -> String {
line.push_str(" ");
line.push_str(&_line[1..]);
}
+ let line_length = line.graphemes(true).count();
match config.width {
- Some(width) if width > line.len() => {
- format!("{}{}\n", line, " ".repeat(width - line.len()))
+ Some(width) if width > line_length => {
+ format!("{}{}\n", line, " ".repeat(width - line_length))
}
_ => format!("{}\n", line),
}
diff --git a/src/draw.rs b/src/draw.rs
index 1018614f..7fe54e1d 100644
--- a/src/draw.rs
+++ b/src/draw.rs
@@ -3,6 +3,7 @@ use std::io::Write;
use ansi_term::Style;
use box_drawing;
use console::strip_ansi_codes;
+use unicode_segmentation::UnicodeSegmentation;
/// Write text to stream, surrounded by a box, leaving the cursor just
/// beyond the bottom right corner.
@@ -18,7 +19,7 @@ pub fn write_boxed(
} else {
box_drawing::light::UP_LEFT
};
- let box_width = strip_ansi_codes(text).len() + 1;
+ let box_width = strip_ansi_codes(text).graphemes(true).count() + 1;
write_boxed_partial(writer, text, box_width, line_style, heavy)?;
write!(writer, "{}", line_style.paint(up_left))?;
Ok(())
@@ -33,7 +34,7 @@ pub fn write_boxed_with_line(
line_style: Style,
heavy: bool,
) -> std::io::Result<()> {
- let box_width = strip_ansi_codes(text).len() + 1;
+ let box_width = strip_ansi_codes(text).graphemes(true).count() + 1;
write_boxed_with_horizontal_whisker(writer, text, box_width, line_style, heavy)?;
write_horizontal_line(writer, line_width - box_width - 1, line_style, heavy)?;
Ok(())
diff --git a/src/edits.rs b/src/edits.rs
index b3eb43fd..20893b4e 100644
--- a/src/edits.rs
+++ b/src/edits.rs
@@ -305,6 +305,7 @@ mod tests {
mod string_pair {
use std::iter::Peekable;
+ use unicode_segmentation::UnicodeSegmentation;
/// A pair of right-trimmed strings.
pub struct StringPair {
@@ -315,9 +316,10 @@ mod string_pair {
impl StringPair {
pub fn new(s0: &str, s1: &str) -> StringPair {
- let common_prefix_length = StringPair::common_prefix_length(s0.chars(), s1.chars());
- let (common_suffix_length, trailing_whitespace) =
- StringPair::suffix_data(s0.chars(), s1.chars());
+ let (g0, g1) = (s0.grapheme_indices(true), s1.grapheme_indices(true));
+ let common_prefix_length = StringPair::common_prefix_length(g0, g1); // TODO: pass references
+ let (g0, g1) = (s0.grapheme_indices(true), s1.grapheme_indices(true));
+ let (common_suffix_length, trailing_whitespace) = StringPair::suffix_data(g0, g1);
StringPair {
common_prefix_length,
common_suffix_length,
@@ -328,26 +330,36 @@ mod string_pair {
}
}
- fn common_prefix_length<I>(s0: I, s1: I) -> usize
+ /// Align the two strings at their left ends and consider only
+ /// the bytes up to the length of the shorter string. Return
+ /// the byte offset of the first differing grapheme cluster,
+ /// or the byte length of shorter string if they do not
+ /// differ.
+ fn common_prefix_length<'a, I>(s0: I, s1: I) -> usize
where
- I: Iterator,
- I::Item: PartialEq,
+ I: Iterator<Item = (usize, &'a str)>,
{
let mut i = 0;
- for (c0, c1) in s0.zip(s1) {
+ for ((_, c0), (_, c1)) in s0.zip(s1) {
if c0 != c1 {
break;
} else {
- i += 1;
+ i += c0.len();
}
}
i
}
- /// Return common suffix length and number of trailing whitespace characters on each string.
- fn suffix_data<I>(s0: I, s1: I) -> (usize, [usize; 2])
+ /// Trim trailing whitespace and align the two strings at
+ /// their right ends. Fix the origin at their right ends and,
+ /// looking left, consider only the bytes up to the length of
+ /// the shorter string. Return the byte offset of the first
+ /// differing grapheme cluster, or the byte length of the
+ /// shorter string if they do not differ. Also return the
+ /// number of bytes of whitespace trimmed from each string.
+ fn suffix_data<'a, I>(s0: I, s1: I) -> (usize, [usize; 2])
where
- I: DoubleEndedIterator<Item = char>,
+ I: DoubleEndedIterator<Item = (usize, &'a str)>,
{
let mut s0 = s0.rev().peekable();
let mut s1 = s1.rev().peekable();
@@ -358,21 +370,24 @@ mod string_pair {
}
/// Consume leading whitespace; return number of characters consumed.
- fn consume_whitespace<I>(s: &mut Peekable<I>) -> usize
+ fn consume_whitespace<'a, I>(s: &mut Peekable<I>) -> usize
where
- I: Iterator<Item = char>,
+ I: Iterator<Item = (usize, &'a str)>,
{
- let mut i = 0;
+ let mut n = 0;
loop {
match s.peek() {
- Some('\n') | Some(' ') => {
+ // TODO: Use a whitespace unicode character class?
+ // Allow for whitespace grapheme clusters > 1
+ // byte?
+ Some(&(_, "\n")) | Some(&(_, " ")) => {
s.next();
- i += 1;
+ n += 1;
}
_ => break,
}
}
- i
+ n
}
}
@@ -440,13 +455,16 @@ mod string_pair {
assert_eq!(common_suffix_length(" ", "á"), 0);
assert_eq!(common_suffix_length("á ", ""), 0);
assert_eq!(common_suffix_length("á", "b "), 0);
- assert_eq!(common_suffix_length("á", "á "), 1);
+ assert_eq!(common_suffix_length("á", "á "), "á".len());
assert_eq!(common_suffix_length("a ", "áb "), 0);
assert_eq!(common_suffix_length("ab", "á "), 0);
assert_eq!(common_suffix_length("áb ", "b "), 1);
- assert_eq!(common_suffix_length("áb ", "aáb "), 2);
- assert_eq!(common_suffix_length("abá ", "bá"), 2);
- assert_eq!(common_suffix_length("áaáabá ", "ááabá "), 4);
+ assert_eq!(common_suffix_length("áb ", "aáb "), 1 + "á".len());
+ assert_eq!(common_suffix_length("abá ", "bá"), 1 + "á".len());
+ assert_eq!(
+ common_suffix_length("áaáabá ", "ááabá "),
+ 2 + 2 * "á".len()
+ );
}
}
}