src/utils/tabs.rs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64

use unicode_segmentation::UnicodeSegmentation;

#[derive(Debug, Clone)]
pub struct TabCfg {
    replacement: String,
}

impl TabCfg {
    pub fn new(width: usize) -> Self {
        TabCfg {
            replacement: " ".repeat(width),
        }
    }
    pub fn width(&self) -> usize {
        self.replacement.len()
    }
    pub fn replace(&self) -> bool {
        !self.replacement.is_empty()
    }
}

/// Expand tabs as spaces.
pub fn expand(line: &str, tab_cfg: &TabCfg) -> String {
    if tab_cfg.replace() && line.as_bytes().iter().any(|c| *c == b'\t') {
        itertools::join(line.split('\t'), &tab_cfg.replacement)
    } else {
        line.to_string()
    }
}

/// Remove `prefix` chars from `line`, then call `tabs::expand()`.
pub fn remove_prefix_and_expand(prefix: usize, line: &str, tab_cfg: &TabCfg) -> String {
    let line_bytes = line.as_bytes();
    // The to-be-removed prefixes are almost always ascii +/- (or ++/ +/.. for merges) for
    // which grapheme clusters are not required.
    if line_bytes.len() >= prefix && line_bytes[..prefix].is_ascii() {
        // Safety: slicing into the utf-8 line-str is ok, upto `prefix` only ascii was present.
        expand(&line[prefix..], tab_cfg)
    } else {
        let cut_line = line.graphemes(true).skip(prefix).collect::<String>();
        expand(&cut_line, tab_cfg)
    }
}

#[cfg(test)]
pub mod tests {
    use super::*;

    #[test]
    fn test_remove_prefix_and_expand() {
        let line = "+-foo\tbar";
        let result = remove_prefix_and_expand(2, line, &TabCfg::new(3));
        assert_eq!(result, "foo   bar");
        let result = remove_prefix_and_expand(2, line, &TabCfg::new(0));
        assert_eq!(result, "foo\tbar");

        let utf8_prefix = "-│-foo\tbar";
        let n = 3;
        let result = remove_prefix_and_expand(n, utf8_prefix, &TabCfg::new(1));
        assert_eq!(result, "foo bar");
        // ensure non-ascii chars were removed:
        assert!(utf8_prefix.len() - result.len() > n);
    }
}