Misc tab refactoring (#1424)

* Move tabs logic into utils * Re-use buffer returned by tabs::expand * Add TabCfg to configure tabs Use the String from this config for the tab replacement. This avoids creating a new String for each processed line. * Avoid unicode segmentation for each line just to remove a prefix In some code paths no prefix is removed, and in almost all other cases the prefix is just ascii. This simplifies a lot of calls. * Set default tab with to 8 Editors like vim, emacs, nano and most terminal emulators set this value as the default tab display width.
author: Thomas Otto <th1000s@posteo.net> 2023-05-31 19:17:18 +0200
committer: GitHub <noreply@github.com> 2023-05-31 13:17:18 -0400
commit: 139cdb9656292edba917fd6addc0a7960cf60342 (patch)
tree: d598b6663e03ebd5f54d2198b00df4d00d25c893 /src/utils
parent: 65418aaa3bc064dc7ce34e6d2f231b96a5c6acb9 (diff)
2 files changed, 65 insertions, 0 deletions
diff --git a/src/utils/mod.rs b/src/utils/mod.rs
index 257c7b03..fa8427b6 100644
--- a/src/utils/mod.rs
+++ b/src/utils/mod.rs
@@ -5,4 +5,5 @@ pub mod process;
 pub mod regex_replacement;
 pub mod round_char_boundary;
 pub mod syntect;
+pub mod tabs;
 pub mod workarounds;
diff --git a/src/utils/tabs.rs b/src/utils/tabs.rs
new file mode 100644
index 00000000..67eab32b
--- /dev/null
+++ b/src/utils/tabs.rs
@@ -0,0 +1,64 @@
+use unicode_segmentation::UnicodeSegmentation;
+
+#[derive(Debug, Clone)]
+pub struct TabCfg {
+    replacement: String,
+}
+
+impl TabCfg {
+    pub fn new(width: usize) -> Self {
+        TabCfg {
+            replacement: " ".repeat(width),
+        }
+    }
+    pub fn width(&self) -> usize {
+        self.replacement.len()
+    }
+    pub fn replace(&self) -> bool {
+        !self.replacement.is_empty()
+    }
+}
+
+/// Expand tabs as spaces.
+pub fn expand(line: &str, tab_cfg: &TabCfg) -> String {
+    if tab_cfg.replace() && line.as_bytes().iter().any(|c| *c == b'\t') {
+        itertools::join(line.split('\t'), &tab_cfg.replacement)
+    } else {
+        line.to_string()
+    }
+}
+
+/// Remove `prefix` chars from `line`, then call `tabs::expand()`.
+pub fn remove_prefix_and_expand(prefix: usize, line: &str, tab_cfg: &TabCfg) -> String {
+    let line_bytes = line.as_bytes();
+    // The to-be-removed prefixes are almost always ascii +/- (or ++/ +/.. for merges) for
+    // which grapheme clusters are not required.
+    if line_bytes.len() >= prefix && line_bytes[..prefix].is_ascii() {
+        // Safety: slicing into the utf-8 line-str is ok, upto `prefix` only ascii was present.
+        expand(&line[prefix..], tab_cfg)
+    } else {
+        let cut_line = line.graphemes(true).skip(prefix).collect::<String>();
+        expand(&cut_line, tab_cfg)
+    }
+}
+
+#[cfg(test)]
+pub mod tests {
+    use super::*;
+
+    #[test]
+    fn test_remove_prefix_and_expand() {
+        let line = "+-foo\tbar";
+        let result = remove_prefix_and_expand(2, line, &TabCfg::new(3));
+        assert_eq!(result, "foo   bar");
+        let result = remove_prefix_and_expand(2, line, &TabCfg::new(0));
+        assert_eq!(result, "foo\tbar");
+
+        let utf8_prefix = "-│-foo\tbar";
+        let n = 3;
+        let result = remove_prefix_and_expand(n, utf8_prefix, &TabCfg::new(1));
+        assert_eq!(result, "foo bar");
+        // ensure non-ascii chars were removed:
+        assert!(utf8_prefix.len() - result.len() > n);
+    }
+}
author	Thomas Otto <th1000s@posteo.net>	2023-05-31 19:17:18 +0200
committer	GitHub <noreply@github.com>	2023-05-31 13:17:18 -0400
commit	139cdb9656292edba917fd6addc0a7960cf60342 (patch)
tree	d598b6663e03ebd5f54d2198b00df4d00d25c893 /src/utils
parent	65418aaa3bc064dc7ce34e6d2f231b96a5c6acb9 (diff)