diff options
author | Thomas Otto <th1000s@posteo.net> | 2023-05-31 19:17:18 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-31 13:17:18 -0400 |
commit | 139cdb9656292edba917fd6addc0a7960cf60342 (patch) | |
tree | d598b6663e03ebd5f54d2198b00df4d00d25c893 /src/utils | |
parent | 65418aaa3bc064dc7ce34e6d2f231b96a5c6acb9 (diff) |
Misc tab refactoring (#1424)
* Move tabs logic into utils
* Re-use buffer returned by tabs::expand
* Add TabCfg to configure tabs
Use the String from this config for the tab replacement. This avoids
creating a new String for each processed line.
* Avoid unicode segmentation for each line just to remove a prefix
In some code paths no prefix is removed, and in almost all other
cases the prefix is just ascii.
This simplifies a lot of calls.
* Set default tab with to 8
Editors like vim, emacs, nano and most terminal emulators set
this value as the default tab display width.
Diffstat (limited to 'src/utils')
-rw-r--r-- | src/utils/mod.rs | 1 | ||||
-rw-r--r-- | src/utils/tabs.rs | 64 |
2 files changed, 65 insertions, 0 deletions
diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 257c7b03..fa8427b6 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -5,4 +5,5 @@ pub mod process; pub mod regex_replacement; pub mod round_char_boundary; pub mod syntect; +pub mod tabs; pub mod workarounds; diff --git a/src/utils/tabs.rs b/src/utils/tabs.rs new file mode 100644 index 00000000..67eab32b --- /dev/null +++ b/src/utils/tabs.rs @@ -0,0 +1,64 @@ +use unicode_segmentation::UnicodeSegmentation; + +#[derive(Debug, Clone)] +pub struct TabCfg { + replacement: String, +} + +impl TabCfg { + pub fn new(width: usize) -> Self { + TabCfg { + replacement: " ".repeat(width), + } + } + pub fn width(&self) -> usize { + self.replacement.len() + } + pub fn replace(&self) -> bool { + !self.replacement.is_empty() + } +} + +/// Expand tabs as spaces. +pub fn expand(line: &str, tab_cfg: &TabCfg) -> String { + if tab_cfg.replace() && line.as_bytes().iter().any(|c| *c == b'\t') { + itertools::join(line.split('\t'), &tab_cfg.replacement) + } else { + line.to_string() + } +} + +/// Remove `prefix` chars from `line`, then call `tabs::expand()`. +pub fn remove_prefix_and_expand(prefix: usize, line: &str, tab_cfg: &TabCfg) -> String { + let line_bytes = line.as_bytes(); + // The to-be-removed prefixes are almost always ascii +/- (or ++/ +/.. for merges) for + // which grapheme clusters are not required. + if line_bytes.len() >= prefix && line_bytes[..prefix].is_ascii() { + // Safety: slicing into the utf-8 line-str is ok, upto `prefix` only ascii was present. + expand(&line[prefix..], tab_cfg) + } else { + let cut_line = line.graphemes(true).skip(prefix).collect::<String>(); + expand(&cut_line, tab_cfg) + } +} + +#[cfg(test)] +pub mod tests { + use super::*; + + #[test] + fn test_remove_prefix_and_expand() { + let line = "+-foo\tbar"; + let result = remove_prefix_and_expand(2, line, &TabCfg::new(3)); + assert_eq!(result, "foo bar"); + let result = remove_prefix_and_expand(2, line, &TabCfg::new(0)); + assert_eq!(result, "foo\tbar"); + + let utf8_prefix = "-│-foo\tbar"; + let n = 3; + let result = remove_prefix_and_expand(n, utf8_prefix, &TabCfg::new(1)); + assert_eq!(result, "foo bar"); + // ensure non-ascii chars were removed: + assert!(utf8_prefix.len() - result.len() > n); + } +} |